From fadde460439716dfa2f6f17ebdd3556bad4337d1 Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Tue, 30 Apr 2024 11:10:46 -0500 Subject: [PATCH 1/4] adds interface for bulk inserting harvest_records; adds test --- app/interface.py | 17 +++++++++ app/models.py | 4 +-- tests/database/test_db.py | 73 ++++++++++++++++++++++++++++----------- 3 files changed, 72 insertions(+), 22 deletions(-) diff --git a/app/interface.py b/app/interface.py index a960d586..87e1c9f2 100644 --- a/app/interface.py +++ b/app/interface.py @@ -259,6 +259,23 @@ def add_harvest_record(self, record_data): self.db.rollback() return None + def add_harvest_records(self, records_data: list) -> bool: + """ + Add many records at once + + :param list records_data: List of records with unique UUIDs + :return bool success of operation + :raises Exception: if the records_data contains records with errors + """ + try: + self.db.bulk_insert_mappings(HarvestRecord, records_data) + self.db.commit() + return True + except Exception as e: + print("Error:", e) + self.db.rollback() + return None + # for test, will remove later def get_all_harvest_records(self): harvest_records = self.db.query(HarvestRecord).all() diff --git a/app/models.py b/app/models.py index 0dac4ead..042dc7dc 100644 --- a/app/models.py +++ b/app/models.py @@ -83,10 +83,10 @@ class HarvestError(Base): reference = db.Column(db.String) -class HarvestRecord(db.Model): +class HarvestRecord(Base): __tablename__ = "harvest_record" - id = db.Column(db.String, primary_key=True) + identifier = db.Column(db.String()) harvest_job_id = db.Column( db.String(36), db.ForeignKey("harvest_job.id"), nullable=True ) diff --git a/tests/database/test_db.py b/tests/database/test_db.py index f037bbee..ca52c18c 100644 --- a/tests/database/test_db.py +++ b/tests/database/test_db.py @@ -1,4 +1,5 @@ import pytest +import uuid from sqlalchemy.orm import scoped_session, sessionmaker from app import create_app @@ -43,12 +44,36 @@ def org_data(): return {"name": "Test Org", "logo": "https://example.com/logo.png"} +@pytest.fixture +def source_data(organization): + return { + "name": "Test Source", + "notification_emails": "email@example.com", + "organization_id": organization.id, + "frequency": "daily", + "url": "http://example.com", + "schema_type": "type1", + "source_type": "typeA", + "status": "active", + } + + @pytest.fixture def organization(interface, org_data): org = interface.add_organization(org_data) return org +@pytest.fixture +def job_data(): + return {"status": "new"} + + +@pytest.fixture +def record_data(): + return {"identifier": "1234abcd", "source_hash": "1234abcd"} + + def test_add_organization(interface, org_data): org = interface.add_organization(org_data) assert org is not None @@ -74,20 +99,6 @@ def test_delete_organization(interface, organization): assert result == "Organization deleted successfully" -@pytest.fixture -def source_data(organization): - return { - "name": "Test Source", - "notification_emails": "email@example.com", - "organization_id": organization.id, - "frequency": "daily", - "url": "http://example.com", - "schema_type": "type1", - "source_type": "typeA", - "status": "active", - } - - def test_add_harvest_source(interface, source_data): source = interface.add_harvest_source(source_data) assert source is not None @@ -127,13 +138,7 @@ def test_delete_harvest_source(interface, source_data): assert deleted_source is None -@pytest.fixture -def job_data(source_data): - return {"status": "new"} - - def test_harvest_source_by_jobid(interface, source_data, job_data): - source = interface.add_harvest_source(source_data) job_data["harvest_source_id"] = source.id @@ -141,3 +146,31 @@ def test_harvest_source_by_jobid(interface, source_data, job_data): harvest_source = interface.get_source_by_jobid(harvest_job.id) assert source.id == harvest_source["id"] + + +def test_add_harvest_record(interface, source_data, job_data, record_data): + source = interface.add_harvest_source(source_data) + job_data["harvest_source_id"] = source.id + harvest_job = interface.add_harvest_job(job_data) + record_data["harvest_source_id"] = source.id + record_data["harvest_job_id"] = harvest_job.id + + record = interface.add_harvest_record(record_data) + + assert record.harvest_source_id == source.id + assert record.harvest_job_id == harvest_job.id + + +def test_add_harvest_records(interface, source_data, job_data, record_data): + source = interface.add_harvest_source(source_data) + job_data["harvest_source_id"] = source.id + harvest_job = interface.add_harvest_job(job_data) + record_data["harvest_source_id"] = source.id + record_data["harvest_job_id"] = harvest_job.id + + records_data = [record_data.copy() for i in range(10)] + for record in records_data: + record["identifier"] = str(uuid.uuid4()) + success = interface.add_harvest_records(records_data) + assert success is True + assert len(interface.get_all_harvest_records()) == 10 From 0621c5f393a58323f429144df6e55b6ec776b67c Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Tue, 30 Apr 2024 11:12:48 -0500 Subject: [PATCH 2/4] adds test comment, lint, and bumps version --- pyproject.toml | 2 +- tests/database/test_db.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1cca4817..37d59b38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "datagov-harvesting-logic" -version = "0.3.9" +version = "0.3.10" description = "" # authors = [ # {name = "Jin Sun", email = "jin.sun@gsa.gov"}, diff --git a/tests/database/test_db.py b/tests/database/test_db.py index ca52c18c..b587e594 100644 --- a/tests/database/test_db.py +++ b/tests/database/test_db.py @@ -1,5 +1,6 @@ -import pytest import uuid + +import pytest from sqlalchemy.orm import scoped_session, sessionmaker from app import create_app @@ -169,7 +170,7 @@ def test_add_harvest_records(interface, source_data, job_data, record_data): record_data["harvest_job_id"] = harvest_job.id records_data = [record_data.copy() for i in range(10)] - for record in records_data: + for record in records_data: # add unique uuid to reach record record["identifier"] = str(uuid.uuid4()) success = interface.add_harvest_records(records_data) assert success is True From 0d717228355c850dbf208028711c0b5afc1016de Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Tue, 30 Apr 2024 15:10:10 -0500 Subject: [PATCH 3/4] makes records_data a fixture --- tests/database/test_db.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/database/test_db.py b/tests/database/test_db.py index b587e594..4630da51 100644 --- a/tests/database/test_db.py +++ b/tests/database/test_db.py @@ -72,7 +72,11 @@ def job_data(): @pytest.fixture def record_data(): - return {"identifier": "1234abcd", "source_hash": "1234abcd"} + return {"identifier": str(uuid.uuid4()), "source_hash": str(uuid.uuid4())} + +@pytest.fixture +def records_data(): + return [{"identifier": str(uuid.uuid4()), "source_hash": str(uuid.uuid4())} for i in range(10)] def test_add_organization(interface, org_data): @@ -162,16 +166,15 @@ def test_add_harvest_record(interface, source_data, job_data, record_data): assert record.harvest_job_id == harvest_job.id -def test_add_harvest_records(interface, source_data, job_data, record_data): +def test_add_harvest_records(interface, source_data, job_data, records_data): source = interface.add_harvest_source(source_data) job_data["harvest_source_id"] = source.id harvest_job = interface.add_harvest_job(job_data) - record_data["harvest_source_id"] = source.id - record_data["harvest_job_id"] = harvest_job.id - records_data = [record_data.copy() for i in range(10)] - for record in records_data: # add unique uuid to reach record - record["identifier"] = str(uuid.uuid4()) + for record in records_data: + record["harvest_source_id"] = source.id + record["harvest_job_id"] = harvest_job.id + success = interface.add_harvest_records(records_data) assert success is True assert len(interface.get_all_harvest_records()) == 10 From 25ce986335daf7f515fa52d4a8a50c05ac4d607b Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Tue, 30 Apr 2024 18:04:22 -0500 Subject: [PATCH 4/4] fix lint --- tests/database/test_db.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/database/test_db.py b/tests/database/test_db.py index 4630da51..47ffcf8b 100644 --- a/tests/database/test_db.py +++ b/tests/database/test_db.py @@ -74,9 +74,13 @@ def job_data(): def record_data(): return {"identifier": str(uuid.uuid4()), "source_hash": str(uuid.uuid4())} + @pytest.fixture def records_data(): - return [{"identifier": str(uuid.uuid4()), "source_hash": str(uuid.uuid4())} for i in range(10)] + return [ + {"identifier": str(uuid.uuid4()), "source_hash": str(uuid.uuid4())} + for i in range(10) + ] def test_add_organization(interface, org_data):