Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RDISCROWD-7704: Update task with checksum value #1020

Merged
merged 2 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pybossa/model/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# along with PYBOSSA. If not, see <http://www.gnu.org/licenses/>.

from sqlalchemy import Integer, Boolean, Float, UnicodeText, Text, DateTime
from sqlalchemy import String
import sqlalchemy
from sqlalchemy.schema import Column, ForeignKey, Index
from sqlalchemy.orm import relationship, backref
Expand Down Expand Up @@ -65,6 +66,8 @@ class Task(db.Model, DomainObject):
gold_answers = Column(JSONB)
#: Task.expiration field to determine when a task should no longer be scheduled. As UTC timestamp without timezone
expiration = Column(DateTime, nullable=True)
#: Task.dup_checksum field to contain checksum for duplicate check
dup_checksum = Column(String(64), nullable=True)

task_runs = relationship(TaskRun, cascade='all, delete, delete-orphan', backref='task')

Expand Down
4 changes: 1 addition & 3 deletions pybossa/repositories/task_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,7 @@ def save(self, element, clean_project=True):
# set task default expiration
if element.__class__.__name__ == "Task":
element.expiration = get_task_expiration(element.expiration, make_timestamp())
checksum = generate_checksum(element)
current_app.logger.info("Project %d duplicate checksum %s", element.project_id, checksum)
# element.checksum = generate_checksum(element) TODO: upon task table updated
element.dup_checksum = generate_checksum(element)
self.db.session.add(element)
self.db.session.commit()
if clean_project:
Expand Down
4 changes: 3 additions & 1 deletion pybossa/task_creator_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,4 +231,6 @@ def generate_checksum(task):
current_app.logger.info("Project %d duplicate check fields %s", task.project_id, str(list(checksum_fields)))
checksum = hashlib.sha256()
checksum.update(json.dumps(checksum_payload, sort_keys=True).encode("utf-8"))
return checksum.hexdigest()
checksum_value = checksum.hexdigest()
current_app.logger.info("Project %d duplicate checksum %s", project.id, checksum_value)
return checksum_value
4 changes: 2 additions & 2 deletions test/test_exporter/test_task_csv_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def test_exporters_generates_zip(self, json_uploader, csv_uploader, dataframe ):

expected_headers = ['info', 'fav_user_ids', 'user_pref', 'n_answers', 'quorum', 'calibration',
'created', 'state', 'gold_answers_best_job', 'gold_answers_best_boss', 'exported',
'project_id', 'id', 'priority_0', 'expiration', 'worker_pref', 'worker_filter']
'project_id', 'id', 'priority_0', 'expiration', 'worker_pref', 'worker_filter', 'dup_checksum']
obj_keys = list(task1_data.keys())

self._compare_object_keys(obj_keys, expected_headers)
Expand All @@ -163,7 +163,7 @@ def test_exporters_generates_zip(self, json_uploader, csv_uploader, dataframe ):

expected_headers = ['info', 'fav_user_ids', 'user_pref', 'n_answers', 'quorum', 'calibration',
'created', 'state', 'gold_answers', 'exported', 'project_id', 'id', 'priority_0', 'expiration',
'worker_pref', 'worker_filter']
'worker_pref', 'worker_filter', 'dup_checksum']
obj_keys = list(task2_data.keys())

self._compare_object_keys(obj_keys, expected_headers)
Expand Down
8 changes: 8 additions & 0 deletions test/test_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -10386,9 +10386,17 @@ def test_generate_checksum_public_data(self):
project=project,
info={"a": 1, "b": 2, "c": 3}
)
# confirm task payload populated with checksum generated
task.dup_checksum == expected_checksum
checksum = generate_checksum(task)
assert checksum == expected_checksum

# project w/o duplicate checksum configured gets
# tasks created with null checksum value
project2 = ProjectFactory.create(owner=subadmin, info={"x": 123}, short_name="xyz")
task2 = TaskFactory.create(project=project2, info={"a": 1, "b": 2, "c": 3})
assert task2.dup_checksum == None

@with_context
@patch("pybossa.task_creator_helper.get_encryption_key")
@patch("pybossa.task_creator_helper.read_encrypted_file")
Expand Down
Loading