Skip to content

Commit

Permalink
saving progress.
Browse files Browse the repository at this point in the history
  • Loading branch information
rshewitt committed Jan 22, 2024
1 parent 436e9ce commit 7ff2069
Show file tree
Hide file tree
Showing 26 changed files with 58,085 additions and 99 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
228 changes: 147 additions & 81 deletions harvester/harvest.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def dcatus_config() -> dict:
return {
"_title": "test_harvest_source_name",
"_url": "http://localhost/dcatus/dcatus.json",
"_extract_type": "dcatus",
"_extract_type": "datajson",
}


Expand All @@ -21,7 +21,7 @@ def waf_config() -> dict:
return {
"_title": "test_harvest_source_name",
"_url": "http://localhost",
"_extract_type": "waf",
"_extract_type": "waf-collection",
"_waf_config": {"filters": ["../", "dcatus/"]},
}

Expand All @@ -32,7 +32,7 @@ def dcatus_compare_config() -> dict:
return {
"_title": "test_harvest_source_name",
"_url": "http://localhost/dcatus/dcatus_compare.json",
"_extract_type": "dcatus",
"_extract_type": "datajson",
}


Expand Down
1,252 changes: 1,252 additions & 0 deletions tests/harvest-sources/dcatus/all_daily_sources.json

Large diffs are not rendered by default.

6,995 changes: 6,995 additions & 0 deletions tests/harvest-sources/dcatus/all_harvest_sources/0.json

Large diffs are not rendered by default.

7,037 changes: 7,037 additions & 0 deletions tests/harvest-sources/dcatus/all_harvest_sources/1.json

Large diffs are not rendered by default.

7,049 changes: 7,049 additions & 0 deletions tests/harvest-sources/dcatus/all_harvest_sources/2.json

Large diffs are not rendered by default.

7,073 changes: 7,073 additions & 0 deletions tests/harvest-sources/dcatus/all_harvest_sources/3.json

Large diffs are not rendered by default.

4,100 changes: 4,100 additions & 0 deletions tests/harvest-sources/dcatus/all_harvest_sources/4.json

Large diffs are not rendered by default.

24,319 changes: 24,319 additions & 0 deletions tests/harvest-sources/dcatus/all_harvest_sources/exceptions.txt

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
hawaii json https://data.hawaii.gov/data.json?version=2
Traceback (most recent call last):
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/requests/models.py", line 971, in json
return complexjson.loads(self.text, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/simplejson/__init__.py", line 514, in loads
return _default_decoder.decode(s)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/simplejson/decoder.py", line 386, in decode
obj, end = self.raw_decode(s)
^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/simplejson/decoder.py", line 416, in raw_decode
return self.scan_once(s, idx=_w(s, idx).end())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
simplejson.errors.JSONDecodeError: Expecting value: line 3 column 1 (char 2)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/Users/reidhewitt/work/xentity/datagov/datagov-harvesting-logic/harvester/harvest.py", line 554, in <module>
harvest_source.get_record_changes()
File "/Users/reidhewitt/work/xentity/datagov/datagov-harvesting-logic/harvester/harvest.py", line 237, in get_record_changes
self.get_harvest_records_as_id_hash()
File "/Users/reidhewitt/work/xentity/datagov/datagov-harvesting-logic/harvester/harvest.py", line 219, in get_harvest_records_as_id_hash
download_res = self.download_dcatus()
^^^^^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/work/xentity/datagov/datagov-harvesting-logic/harvester/harvest.py", line 115, in download_dcatus
return resp.json()
^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/requests/models.py", line 975, in json
raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
requests.exceptions.JSONDecodeError: Expecting value: line 3 column 1 (char 2)


Federal Laboratory Consortium Data.json https://federallabs.org/at-report.json
Traceback (most recent call last):
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/requests/models.py", line 971, in json
return complexjson.loads(self.text, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/simplejson/__init__.py", line 514, in loads
return _default_decoder.decode(s)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/simplejson/decoder.py", line 386, in decode
obj, end = self.raw_decode(s)
^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/simplejson/decoder.py", line 416, in raw_decode
return self.scan_once(s, idx=_w(s, idx).end())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
simplejson.errors.JSONDecodeError: Expecting value: line 2 column 1 (char 2)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/Users/reidhewitt/work/xentity/datagov/datagov-harvesting-logic/harvester/harvest.py", line 554, in <module>
harvest_source.get_record_changes()
File "/Users/reidhewitt/work/xentity/datagov/datagov-harvesting-logic/harvester/harvest.py", line 237, in get_record_changes
self.get_harvest_records_as_id_hash()
File "/Users/reidhewitt/work/xentity/datagov/datagov-harvesting-logic/harvester/harvest.py", line 219, in get_harvest_records_as_id_hash
download_res = self.download_dcatus()
^^^^^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/work/xentity/datagov/datagov-harvesting-logic/harvester/harvest.py", line 115, in download_dcatus
return resp.json()
^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/requests/models.py", line 975, in json
raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
requests.exceptions.JSONDecodeError: Expecting value: line 2 column 1 (char 2)


City and County of Durham, North Carolina Data.json Harvest Source https://opendurham.nc.gov/data.json
Traceback (most recent call last):
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/urllib3/connectionpool.py", line 467, in _make_request
self._validate_conn(conn)
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/urllib3/connectionpool.py", line 1096, in _validate_conn
conn.connect()
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/urllib3/connection.py", line 642, in connect
sock_and_verified = _ssl_wrap_socket_and_match_hostname(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/urllib3/connection.py", line 782, in _ssl_wrap_socket_and_match_hostname
ssl_sock = ssl_wrap_socket(
^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/urllib3/util/ssl_.py", line 470, in ssl_wrap_socket
ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/urllib3/util/ssl_.py", line 514, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/[email protected]/3.11.4_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/ssl.py", line 517, in wrap_socket
return self.sslsocket_class._create(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/[email protected]/3.11.4_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/ssl.py", line 1075, in _create
self.do_handshake()
File "/opt/homebrew/Cellar/[email protected]/3.11.4_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/ssl.py", line 1346, in do_handshake
self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'opendurham.nc.gov'. (_ssl.c:1002)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/urllib3/connectionpool.py", line 790, in urlopen
response = self._make_request(
^^^^^^^^^^^^^^^^^^^
File "/Users/reidhewitt/Library/Caches/pypoetry/virtualenvs/datagov-harvesting-logic-sySHycX5-py3.11/lib/python3.11/site-packages/urllib3/connectionpool.py", line 491, in _make_request
raise new_e
urllib3.exceptions.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'opendurham.nc.gov'. (_ssl.c:1002)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
17 changes: 2 additions & 15 deletions tests/unit/load/test_ckan_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def test_ckanify_dcatus(self, dcatus_config):
"name": "commitment-of-traders",
"owner_org": "test",
"identifier": "cftc-dc1",
"author": None,
"author_email": None,
"maintainer": "Harold W. Hild",
"maintainer_email": "[email protected]",
"notes": "COT reports provide a breakdown of each Tuesday's open interest for futures and options on futures market in which 20 or more traders hold positions equal to or above the reporting levels established by CFTC",
Expand All @@ -59,29 +61,16 @@ def test_ckanify_dcatus(self, dcatus_config):
{"name": "open-interest"},
],
"extras": [
{
"key": "publisher_hierarchy",
"value": "U.S. Government > U.S. Commodity Futures Trading Commission",
},
{"key": "resource-type", "value": "Dataset"},
{
"key": "publisher",
"value": "U.S. Commodity Futures Trading Commission",
},
{"key": "accessLevel", "value": "public"},
{"key": "bureauCode", "value": "339:00"},
{"key": "identifier", "value": "cftc-dc1"},
{"key": "modified", "value": "R/P1W"},
{"key": "programCode", "value": "000:000"},
{
"key": "publisher",
"value": "U.S. Commodity Futures Trading Commission",
},
{
"key": "publisher_hierarchy",
"value": "U.S. Government > U.S. Commodity Futures Trading Commission",
},
{"key": "resource-type", "value": "Dataset"},
{
"key": "publisher",
"value": "U.S. Commodity Futures Trading Commission",
Expand All @@ -92,8 +81,6 @@ def test_ckanify_dcatus(self, dcatus_config):
},
{"key": "harvest_source_name", "value": "test_harvest_source_name"},
],
"author": None,
"author_email": None,
}

test_record = harvest_source.records["cftc-dc1"]
Expand Down

1 comment on commit 7ff2069

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
harvester
   __init__.py00100% 
   harvest.py2972929 90%
   utils.py100100% 
TOTAL3072991% 

Tests Skipped Failures Errors Time
7 0 💤 0 ❌ 0 🔥 0.967s ⏱️

Please sign in to comment.