Skip to content

Commit

Permalink
Merge pull request #64 from GSA/refactor-bulk-add-harvest-records
Browse files Browse the repository at this point in the history
refactor bulk add records to follow ORM addall pattern
  • Loading branch information
rshewitt authored May 14, 2024
2 parents 77292fa + c5f52c6 commit 03f52c6
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 13 deletions.
15 changes: 11 additions & 4 deletions database/interface.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import uuid
from sqlalchemy import create_engine, inspect, or_
from sqlalchemy.exc import NoResultFound
from sqlalchemy.orm import scoped_session, sessionmaker
Expand Down Expand Up @@ -223,18 +224,24 @@ def add_harvest_record(self, record_data):
self.db.rollback()
return None

def add_harvest_records(self, records_data: list) -> bool:
def add_harvest_records(self, records_data: list) -> dict:
"""
Add many records at once
:param list records_data: List of records with unique UUIDs
:return bool success of operation
:return dict id_lookup_table: identifiers -> ids
:raises Exception: if the records_data contains records with errors
"""
try:
self.db.bulk_insert_mappings(HarvestRecord, records_data)
id_lookup_table = {}
for i, record_data in enumerate(records_data):
new_record = HarvestRecord(id=str(uuid.uuid4()), **record_data)
id_lookup_table[new_record.identifier] = new_record.id
self.db.add(new_record)
if i % 1000 == 0:
self.db.flush()
self.db.commit()
return True
return id_lookup_table
except Exception as e:
print("Error:", e)
self.db.rollback()
Expand Down
15 changes: 11 additions & 4 deletions tests/unit/database/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,17 @@ def test_add_harvest_records(
interface.add_harvest_source(source_data_dcatus)
interface.add_harvest_job(job_data_dcatus)

records = [record_data_dcatus] * 10
success = interface.add_harvest_records(records)
assert success is True
assert len(interface.get_all_harvest_records()) == 10
records = []
for i in range(10):
new_record = record_data_dcatus.copy()
new_record["identifier"] = f"test-identifier-{i}"
records.append(new_record)

id_lookup_table = interface.add_harvest_records(records)
db_records = interface.get_all_harvest_records()
assert len(id_lookup_table) == 10
assert len(db_records) == 10
assert id_lookup_table[db_records[0]["identifier"]] == db_records[0]["id"]

def test_add_harvest_job_with_id(
self, interface, organization_data, source_data_dcatus, job_data_dcatus
Expand Down
5 changes: 0 additions & 5 deletions tests/unit/test_load_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ def mock_bad_cf_index(monkeypatch):
monkeypatch.setenv("CF_INSTANCE_INDEX", "1")


@pytest.fixture(autouse=True)
def mock_lm_config(monkeypatch):
monkeypatch.setenv("LM_RUNNER_APP_GUID", "f4ab7f86-bee0-44fd-8806-1dca7f8e215a")


class TestLoadManager:
@patch.object(HarvesterDBInterface, "update_harvest_job")
@patch.object(CFHandler, "start_task")
Expand Down

1 comment on commit 03f52c6

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
harvester
   __init__.py50100% 
   ckan_utils.py11366 95%
   exceptions.py420100% 
   harvest.py2374141 83%
   logger_config.py10100% 
   utils.py791010 87%
TOTAL4775788% 

Tests Skipped Failures Errors Time
51 0 💤 0 ❌ 0 🔥 3.629s ⏱️

Please sign in to comment.