Skip to content

Commit

Permalink
fix: drop data only if data exists
Browse files Browse the repository at this point in the history
  • Loading branch information
sylvanr committed Feb 7, 2024
1 parent 9d5f4c6 commit 0691c4b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
3 changes: 3 additions & 0 deletions direct_indexing/direct_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ def drop_removed_data():
url = f'{settings.SOLR_DATASET}/select?fl=name%2Cid%2Ciati_cloud_indexed&indent=true&q.op=OR&q=*%3A*&rows=10000000'
data = requests.get(url)
data = data.json()['response']['docs']
if len(data) == 0:
logging.info('drop_removed_data:: No data found in the dataset index, skipping drop')
return

# Get a list of dataset names from the dataset metadata file
with open(f'{settings.BASE_DIR}/direct_indexing/data_sources/datasets/dataset_metadata.json') as f:
Expand Down
10 changes: 9 additions & 1 deletion tests/direct_indexing/test_direct_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ def test_drop_removed_data(mocker, tmp_path, requests_mock, fixture_solr_respons
# Mock settings.SOLR_DATASET to https://test.com
mocker.patch('direct_indexing.direct_indexing.settings.SOLR_DATASET', 'https://test.com')
test_url = 'https://test.com/select?fl=name%2Cid%2Ciati_cloud_indexed&indent=true&q.op=OR&q=*%3A*&rows=10000000'
requests_mock.get(test_url, json=fixture_solr_response)





# mock settings.BASE_DIR to be tmp_path
mocker.patch('direct_indexing.direct_indexing.settings.BASE_DIR', tmp_path)
Expand All @@ -105,7 +108,12 @@ def test_drop_removed_data(mocker, tmp_path, requests_mock, fixture_solr_respons
mock_solr = mocker.patch(SOLR, return_value=solr_instance_mock)
# mock solr.search to return a list with 2 elements
solr_instance_mock.search.return_value = [{}]

requests_mock.get(test_url, json={'response': {'docs': []}})
drop_removed_data()
assert solr_instance_mock.delete.call_count == 0
# Run drop_removed_data
requests_mock.get(test_url, json=fixture_solr_response)
drop_removed_data()

# assert mock_solr was called 5 times, once for each core including datasets
Expand Down

0 comments on commit 0691c4b

Please sign in to comment.