Skip to content

Commit

Permalink
update db w/ checksum value
Browse files Browse the repository at this point in the history
  • Loading branch information
dchhabda committed Dec 19, 2024
1 parent b193893 commit 36e26d1
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 6 deletions.
3 changes: 3 additions & 0 deletions pybossa/model/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# along with PYBOSSA. If not, see <http://www.gnu.org/licenses/>.

from sqlalchemy import Integer, Boolean, Float, UnicodeText, Text, DateTime
from sqlalchemy import String
import sqlalchemy
from sqlalchemy.schema import Column, ForeignKey, Index
from sqlalchemy.orm import relationship, backref
Expand Down Expand Up @@ -65,6 +66,8 @@ class Task(db.Model, DomainObject):
gold_answers = Column(JSONB)
#: Task.expiration field to determine when a task should no longer be scheduled. As UTC timestamp without timezone
expiration = Column(DateTime, nullable=True)
#: Task.dup_checksum field to contain checksum for duplicate check
dup_checksum = Column(String(64), nullable=True)

task_runs = relationship(TaskRun, cascade='all, delete, delete-orphan', backref='task')

Expand Down
4 changes: 1 addition & 3 deletions pybossa/repositories/task_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,7 @@ def save(self, element, clean_project=True):
# set task default expiration
if element.__class__.__name__ == "Task":
element.expiration = get_task_expiration(element.expiration, make_timestamp())
checksum = generate_checksum(element)
current_app.logger.info("Project %d duplicate checksum %s", element.project_id, checksum)
# element.checksum = generate_checksum(element) TODO: upon task table updated
element.dup_checksum = generate_checksum(element)
self.db.session.add(element)
self.db.session.commit()
if clean_project:
Expand Down
4 changes: 3 additions & 1 deletion pybossa/task_creator_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,4 +231,6 @@ def generate_checksum(task):
current_app.logger.info("Project %d duplicate check fields %s", task.project_id, str(list(checksum_fields)))
checksum = hashlib.sha256()
checksum.update(json.dumps(checksum_payload, sort_keys=True).encode("utf-8"))
return checksum.hexdigest()
checksum_value = checksum.hexdigest()
current_app.logger.info("Project %d duplicate checksum %s", project.id, checksum_value)
return checksum_value
4 changes: 2 additions & 2 deletions test/test_exporter/test_task_csv_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def test_exporters_generates_zip(self, json_uploader, csv_uploader, dataframe ):

expected_headers = ['info', 'fav_user_ids', 'user_pref', 'n_answers', 'quorum', 'calibration',
'created', 'state', 'gold_answers_best_job', 'gold_answers_best_boss', 'exported',
'project_id', 'id', 'priority_0', 'expiration', 'worker_pref', 'worker_filter']
'project_id', 'id', 'priority_0', 'expiration', 'worker_pref', 'worker_filter', 'dup_checksum']
obj_keys = list(task1_data.keys())

self._compare_object_keys(obj_keys, expected_headers)
Expand All @@ -163,7 +163,7 @@ def test_exporters_generates_zip(self, json_uploader, csv_uploader, dataframe ):

expected_headers = ['info', 'fav_user_ids', 'user_pref', 'n_answers', 'quorum', 'calibration',
'created', 'state', 'gold_answers', 'exported', 'project_id', 'id', 'priority_0', 'expiration',
'worker_pref', 'worker_filter']
'worker_pref', 'worker_filter', 'dup_checksum']
obj_keys = list(task2_data.keys())

self._compare_object_keys(obj_keys, expected_headers)
Expand Down

0 comments on commit 36e26d1

Please sign in to comment.