From 2eea1fd4d69c24bd110130ed8bee344e958aeb71 Mon Sep 17 00:00:00 2001
From: Harris Tzovanakis <me@drjova.com>
Date: Fri, 7 Jul 2023 15:12:23 +0200
Subject: [PATCH 1/3] workflows: only use matcher on help

* ref: cern-sis/issues-inspire#347
---
 inspirehep/modules/workflows/tasks/actions.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/inspirehep/modules/workflows/tasks/actions.py b/inspirehep/modules/workflows/tasks/actions.py
index 14f4b3f2ff..5c0563ed66 100644
--- a/inspirehep/modules/workflows/tasks/actions.py
+++ b/inspirehep/modules/workflows/tasks/actions.py
@@ -78,7 +78,6 @@
     extract_references_from_text,
     extract_references_from_text_data,
 )
-from inspirehep.modules.refextract.matcher import match_references
 from inspirehep.modules.workflows.utils import _get_headers_for_hep_root_table_request, create_error
 from inspirehep.modules.workflows.errors import BadGatewayError, MissingRecordControlNumber
 from inspirehep.modules.workflows.utils import (
@@ -474,10 +473,6 @@ def match_references_hep(references):
     create_error(response)
 
 
-def match_references_based_on_flag(references):
-    return match_references(references)
-
-
 @with_debug_logging
 def refextract(obj, eng):
     """Extract references from various sources and add them to the workflow.
@@ -497,7 +492,7 @@ def refextract(obj, eng):
     if 'references' in obj.data:
         extracted_raw_references = dedupe_list(extract_references_from_raw_refs(obj.data['references']))
         obj.log.info('Extracted %d references from raw refs.', len(extracted_raw_references))
-        obj.data['references'] = match_references_based_on_flag(extracted_raw_references)
+        obj.data['references'] = match_references_hep(extracted_raw_references)
         return
 
     matched_pdf_references, matched_text_references = [], []
@@ -511,12 +506,12 @@ def refextract(obj, eng):
                 url, source=source, custom_kbs_file=journal_kb_dict
             )
         )
-        matched_pdf_references = match_references_based_on_flag(pdf_references)
+        matched_pdf_references = match_references_hep(pdf_references)
     else:
         with get_document_in_workflow(obj) as tmp_document:
             if tmp_document:
                 pdf_references = dedupe_list(extract_references_from_pdf(tmp_document, source))
-                matched_pdf_references = match_references_based_on_flag(pdf_references)
+                matched_pdf_references = match_references_hep(pdf_references)
 
     text = get_value(obj.extra_data, 'formdata.references')
     if text and current_app.config.get("FEATURE_FLAG_ENABLE_REFEXTRACT_SERVICE"):
@@ -525,10 +520,10 @@ def refextract(obj, eng):
                 text, source=source, custom_kbs_file=journal_kb_dict
             )
         )
-        matched_text_references = match_references_based_on_flag(text_references)
+        matched_text_references = match_references_hep(text_references)
     elif text:
         text_references = dedupe_list(extract_references_from_text(text, source))
-        matched_text_references = match_references_based_on_flag(text_references)
+        matched_text_references = match_references_hep(text_references)
 
     if not matched_pdf_references and not matched_text_references:
         obj.log.info('No references extracted.')

From fc657d4657a6e8ce7d170257f2702dca5d0a0c39 Mon Sep 17 00:00:00 2001
From: Harris Tzovanakis <me@drjova.com>
Date: Mon, 10 Jul 2023 11:45:37 +0200
Subject: [PATCH 2/3] tests: fix linked references

---
 tests/integration/workflows/conftest.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/integration/workflows/conftest.py b/tests/integration/workflows/conftest.py
index f85095bc80..311d566b6f 100644
--- a/tests/integration/workflows/conftest.py
+++ b/tests/integration/workflows/conftest.py
@@ -289,6 +289,13 @@ def mocked_external_services(workflow_app):
             headers=_get_headers_for_hep_root_table_request(),
             status_code=200,
         )
+        requests_mocker.register_uri(
+            "POST",
+            "http://web:8000/api/matcher/linked_references/",
+            json={"references": []},
+            headers=_get_headers_for_hep_root_table_request(),
+            status_code=200,
+        )
         if "INSPIREHEP_URL" in workflow_app.config:
             # HEP record upload
             requests_mocker.register_uri(

From 196dfa709215ea8710dd95fd3e9fcb1f80fa6029 Mon Sep 17 00:00:00 2001
From: Harris Tzovanakis <me@drjova.com>
Date: Mon, 10 Jul 2023 14:42:52 +0000
Subject: [PATCH 3/3] more fixes

---
 .dockerignore                                 |    3 +
 setup.cfg                                     |    2 -
 .../workflows/test_workflows_tasks_actions.py | 1074 ++++++++++-------
 3 files changed, 620 insertions(+), 459 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index a306647fe3..8e70a6e095 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -32,3 +32,6 @@
 Procfile*
 docker*
 gun
+
+**/__pycache__
+**/*.pyc
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 884a649ed1..169d37fa83 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -46,8 +46,6 @@ omit =
   inspirehep/wsgi.py
   inspirehep/wsgi_with_coverage.py
 
-[tool:pytest]
-addopts = --cov=inspirehep --cov-report=term-missing:skip-covered
 
 [flake8]
 ignore = *.py E501 FI12 FI14 FI15 FI16 FI17 FI18 FI50 FI51 FI53 W504 W605 W743 W503
diff --git a/tests/unit/workflows/test_workflows_tasks_actions.py b/tests/unit/workflows/test_workflows_tasks_actions.py
index 6ade796b57..7cc5175d33 100644
--- a/tests/unit/workflows/test_workflows_tasks_actions.py
+++ b/tests/unit/workflows/test_workflows_tasks_actions.py
@@ -60,15 +60,15 @@
 
 def _get_auto_reject_obj(decision, has_core_keywords, fulltext_used):
     obj_params = {
-        'classifier_results': {
-            'complete_output': {
-                'core_keywords': ['something'] if has_core_keywords else [],
+        "classifier_results": {
+            "complete_output": {
+                "core_keywords": ["something"] if has_core_keywords else [],
             },
-            'fulltext_used': fulltext_used,
+            "fulltext_used": fulltext_used,
         },
-        'relevance_prediction': {
-            'max_score': '0.222113',
-            'decision': decision,
+        "relevance_prediction": {
+            "max_score": "0.222113",
+            "decision": decision,
         },
     }
 
@@ -78,109 +78,121 @@ def _get_auto_reject_obj(decision, has_core_keywords, fulltext_used):
 def test_download_documents():
     with requests_mock.Mocker() as requests_mocker:
         requests_mocker.register_uri(
-            'GET', 'http://export.arxiv.org/pdf/1605.03844',
+            "GET",
+            "http://export.arxiv.org/pdf/1605.03844",
             content=pkg_resources.resource_string(
-                __name__, os.path.join('fixtures', '1605.03844.pdf')),
+                __name__, os.path.join("fixtures", "1605.03844.pdf")
+            ),
         )
 
-        schema = load_schema('hep')
-        subschema = schema['properties']['documents']
+        schema = load_schema("hep")
+        subschema = schema["properties"]["documents"]
 
         data = {
-            'documents': [
+            "documents": [
                 {
-                    'key': '1605.03844.pdf',
-                    'url': 'http://export.arxiv.org/pdf/1605.03844'
+                    "key": "1605.03844.pdf",
+                    "url": "http://export.arxiv.org/pdf/1605.03844",
                 },
             ],
         }  # literature/1458302
         extra_data = {}
         files = MockFiles({})
-        assert validate(data['documents'], subschema) is None
+        assert validate(data["documents"], subschema) is None
 
         obj = MockObj(data, extra_data, files=files)
         eng = MockEng()
 
         assert download_documents(obj, eng) is None
 
-        documents = obj.data['documents']
-        expected_document_url = '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03844.pdf'
+        documents = obj.data["documents"]
+        expected_document_url = (
+            "/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03844.pdf"
+        )
 
         assert 1 == len(documents)
-        assert expected_document_url == documents[0]['url']
+        assert expected_document_url == documents[0]["url"]
 
 
 def test_download_documents_with_multiple_documents():
     with requests_mock.Mocker() as requests_mocker:
         requests_mocker.register_uri(
-            'GET', 'http://export.arxiv.org/pdf/1605.03844',
+            "GET",
+            "http://export.arxiv.org/pdf/1605.03844",
             content=pkg_resources.resource_string(
-                __name__, os.path.join('fixtures', '1605.03844.pdf')),
+                __name__, os.path.join("fixtures", "1605.03844.pdf")
+            ),
         )
         requests_mocker.register_uri(
-            'GET', 'http://export.arxiv.org/pdf/1605.03845',
+            "GET",
+            "http://export.arxiv.org/pdf/1605.03845",
             content=pkg_resources.resource_string(
-                __name__, os.path.join('fixtures', '1605.03844.pdf')),
+                __name__, os.path.join("fixtures", "1605.03844.pdf")
+            ),
         )
 
-        schema = load_schema('hep')
-        subschema = schema['properties']['documents']
+        schema = load_schema("hep")
+        subschema = schema["properties"]["documents"]
 
         data = {
-            'documents': [
+            "documents": [
                 {
-                    'key': '1605.03844.pdf',
-                    'url': 'http://export.arxiv.org/pdf/1605.03844'
+                    "key": "1605.03844.pdf",
+                    "url": "http://export.arxiv.org/pdf/1605.03844",
                 },
                 {
-                    'key': '1605.03845.pdf',
-                    'url': 'http://export.arxiv.org/pdf/1605.03845'
+                    "key": "1605.03845.pdf",
+                    "url": "http://export.arxiv.org/pdf/1605.03845",
                 },
             ],
         }  # literature/1458302
         extra_data = {}
         files = MockFiles({})
-        assert validate(data['documents'], subschema) is None
+        assert validate(data["documents"], subschema) is None
 
         obj = MockObj(data, extra_data, files=files)
         eng = MockEng()
 
         assert download_documents(obj, eng) is None
 
-        documents = obj.data['documents']
-        expected_document_url_1 = '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03844.pdf'
-        expected_document_url_2 = '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03845.pdf'
+        documents = obj.data["documents"]
+        expected_document_url_1 = (
+            "/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03844.pdf"
+        )
+        expected_document_url_2 = (
+            "/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/1605.03845.pdf"
+        )
 
         assert 2 == len(documents)
-        assert expected_document_url_1 == documents[0]['url']
-        assert expected_document_url_2 == documents[1]['url']
+        assert expected_document_url_1 == documents[0]["url"]
+        assert expected_document_url_2 == documents[1]["url"]
 
 
 def test_mark():
     obj = MockObj({}, {})
     eng = MockEng()
 
-    foobar_mark = mark('foo', 'bar')
+    foobar_mark = mark("foo", "bar")
 
-    assert foobar_mark(obj, eng) == {'foo': 'bar'}
-    assert obj.extra_data == {'foo': 'bar'}
+    assert foobar_mark(obj, eng) == {"foo": "bar"}
+    assert obj.extra_data == {"foo": "bar"}
 
 
 def test_mark_overwrites():
-    obj = MockObj({}, {'foo': 'bar'})
+    obj = MockObj({}, {"foo": "bar"})
     eng = MockEng()
 
-    foobaz_mark = mark('foo', 'baz')
+    foobaz_mark = mark("foo", "baz")
 
-    assert foobaz_mark(obj, eng) == {'foo': 'baz'}
-    assert obj.extra_data == {'foo': 'baz'}
+    assert foobaz_mark(obj, eng) == {"foo": "baz"}
+    assert obj.extra_data == {"foo": "baz"}
 
 
 def test_is_marked():
-    obj = MockObj({}, {'foo': 'bar'})
+    obj = MockObj({}, {"foo": "bar"})
     eng = MockEng()
 
-    is_foo_marked = is_marked('foo')
+    is_foo_marked = is_marked("foo")
 
     assert is_foo_marked(obj, eng)
 
@@ -189,22 +201,22 @@ def test_is_marked_returns_false_when_key_does_not_exist():
     obj = MockObj({}, {})
     eng = MockEng()
 
-    is_foo_marked = is_marked('foo')
+    is_foo_marked = is_marked("foo")
 
     assert not is_foo_marked(obj, eng)
 
 
 def test_is_marked_returns_false_when_value_is_falsy():
-    obj = MockObj({}, {'foo': False})
+    obj = MockObj({}, {"foo": False})
     eng = MockEng()
 
-    is_foo_marked = is_marked('foo')
+    is_foo_marked = is_marked("foo")
 
     assert not is_foo_marked(obj, eng)
 
 
 def test_is_record_accepted():
-    obj = MockObj({}, {'approved': True})
+    obj = MockObj({}, {"approved": True})
 
     assert is_record_accepted(obj)
 
@@ -216,13 +228,13 @@ def test_is_record_accepted_returns_false_when_key_does_not_exist():
 
 
 def test_is_record_accepted_returns_false_when_value_is_falsy():
-    obj = MockObj({}, {'approved': False})
+    obj = MockObj({}, {"approved": False})
 
     assert not is_record_accepted(obj)
 
 
 def test_shall_halt_workflow():
-    obj = MockObj({}, {'halt_workflow': True})
+    obj = MockObj({}, {"halt_workflow": True})
 
     assert shall_halt_workflow(obj)
 
@@ -234,13 +246,13 @@ def test_shall_halt_workflow_returns_false_when_key_does_not_exist():
 
 
 def test_shall_halt_workflow_returns_false_when_value_is_falsy():
-    obj = MockObj({}, {'halt_workflow': False})
+    obj = MockObj({}, {"halt_workflow": False})
 
     assert not shall_halt_workflow(obj)
 
 
 def test_in_production_mode():
-    config = {'PRODUCTION_MODE': True}
+    config = {"PRODUCTION_MODE": True}
 
     with patch.dict(current_app.config, config):
         assert in_production_mode()
@@ -254,26 +266,26 @@ def test_in_production_mode_returns_false_when_variable_does_not_exist():
 
 
 def test_in_production_mode_returns_false_when_variable_is_falsy():
-    config = {'PRODUCTION_MODE': False}
+    config = {"PRODUCTION_MODE": False}
 
     with patch.dict(current_app.config, config):
         assert not in_production_mode()
 
 
 def test_add_core_sets_core_to_true_if_extra_data_core_is_true():
-    obj = MockObj({}, {'core': True})
+    obj = MockObj({}, {"core": True})
     eng = MockEng()
 
     assert add_core(obj, eng) is None
-    assert obj.data == {'core': True}
+    assert obj.data == {"core": True}
 
 
 def test_add_core_sets_core_to_false_if_extra_data_core_is_false():
-    obj = MockObj({}, {'core': False})
+    obj = MockObj({}, {"core": False})
     eng = MockEng()
 
     assert add_core(obj, eng) is None
-    assert obj.data == {'core': False}
+    assert obj.data == {"core": False}
 
 
 def test_add_core_does_nothing_if_extra_data_has_no_core_key():
@@ -285,127 +297,180 @@ def test_add_core_does_nothing_if_extra_data_has_no_core_key():
 
 
 def test_add_core_overrides_core_if_extra_data_has_core_key():
-    obj = MockObj({'core': False}, {'core': True})
+    obj = MockObj({"core": False}, {"core": True})
     eng = MockEng()
 
     assert add_core(obj, eng) is None
-    assert obj.data == {'core': True}
+    assert obj.data == {"core": True}
 
 
 def test_halt_record():
-    obj = MockObj({}, {'halt_action': 'foo', 'halt_message': 'bar'})
+    obj = MockObj({}, {"halt_action": "foo", "halt_message": "bar"})
     eng = MockEng()
 
     default_halt_record = halt_record()
 
     assert default_halt_record(obj, eng) is None
-    assert eng.action == 'foo'
-    assert eng.msg == 'bar'
+    assert eng.action == "foo"
+    assert eng.msg == "bar"
 
 
 def test_halt_record_accepts_custom_action():
     obj = MockObj({}, {})
     eng = MockEng()
 
-    foo_action_halt_record = halt_record(action='foo')
+    foo_action_halt_record = halt_record(action="foo")
 
     assert foo_action_halt_record(obj, eng) is None
-    assert eng.action == 'foo'
+    assert eng.action == "foo"
 
 
 def test_halt_record_accepts_custom_msg():
     obj = MockObj({}, {})
     eng = MockEng()
 
-    bar_message_halt_record = halt_record(message='bar')
+    bar_message_halt_record = halt_record(message="bar")
 
     assert bar_message_halt_record(obj, eng) is None
-    assert eng.msg == 'bar'
+    assert eng.msg == "bar"
 
 
 def test_preserve_root():
-    config = {
-        'FEATURE_FLAG_ENABLE_MERGER': True
-    }
+    config = {"FEATURE_FLAG_ENABLE_MERGER": True}
 
     with patch.dict(current_app.config, config):
-        obj = MockObj({'foo': 'bar'}, {})
+        obj = MockObj({"foo": "bar"}, {})
         eng = MockEng()
 
         assert preserve_root(obj, eng) is None
-        assert obj.extra_data['merger_root'] == {'foo': 'bar'}
+        assert obj.extra_data["merger_root"] == {"foo": "bar"}
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.log_workflows_action')
+@patch("inspirehep.modules.workflows.tasks.actions.log_workflows_action")
 def test_reject_record(l_w_a):
-    obj = MockObj({}, {
-        'relevance_prediction': {
-            'max_score': '0.222113',
-            'decision': 'Rejected',
+    obj = MockObj(
+        {},
+        {
+            "relevance_prediction": {
+                "max_score": "0.222113",
+                "decision": "Rejected",
+            },
         },
-    })
+    )
 
-    foo_reject_record = reject_record('foo')
+    foo_reject_record = reject_record("foo")
 
     assert foo_reject_record(obj) is None
     assert obj.extra_data == {
-        'approved': False,
-        'reason': 'foo',
-        'relevance_prediction': {
-            'decision': 'Rejected',
-            'max_score': '0.222113',
+        "approved": False,
+        "reason": "foo",
+        "relevance_prediction": {
+            "decision": "Rejected",
+            "max_score": "0.222113",
         },
     }
-    assert obj.log._info.getvalue() == 'foo'
+    assert obj.log._info.getvalue() == "foo"
     l_w_a.assert_called_once_with(
-        action='reject_record',
+        action="reject_record",
         relevance_prediction={
-            'decision': 'Rejected',
-            'max_score': '0.222113',
+            "decision": "Rejected",
+            "max_score": "0.222113",
         },
         object_id=1,
         user_id=None,
-        source='workflow',
+        source="workflow",
     )
 
 
 @pytest.mark.parametrize(
-    'expected,obj',
+    "expected,obj",
     [
-        (False, _get_auto_reject_obj('Core', has_core_keywords=False, fulltext_used=True)),
-        (False, _get_auto_reject_obj('Non-Core', has_core_keywords=False, fulltext_used=True)),
-        (True, _get_auto_reject_obj('Rejected', has_core_keywords=False, fulltext_used=True)),
-        (False, _get_auto_reject_obj('Core', has_core_keywords=True, fulltext_used=True)),
-        (False, _get_auto_reject_obj('Non-Core', has_core_keywords=True, fulltext_used=True)),
-        (False, _get_auto_reject_obj('Rejected', has_core_keywords=True, fulltext_used=True)),
-        (False, _get_auto_reject_obj('Core', has_core_keywords=False, fulltext_used=False)),
-        (False, _get_auto_reject_obj('Non-Core', has_core_keywords=False, fulltext_used=False)),
-        (False, _get_auto_reject_obj('Rejected', has_core_keywords=False, fulltext_used=False)),
-        (False, _get_auto_reject_obj('Core', has_core_keywords=True, fulltext_used=False)),
-        (False, _get_auto_reject_obj('Non-Core', has_core_keywords=True, fulltext_used=False)),
-        (False, _get_auto_reject_obj('Rejected', has_core_keywords=True, fulltext_used=False)),
+        (
+            False,
+            _get_auto_reject_obj("Core", has_core_keywords=False, fulltext_used=True),
+        ),
+        (
+            False,
+            _get_auto_reject_obj(
+                "Non-Core", has_core_keywords=False, fulltext_used=True
+            ),
+        ),
+        (
+            True,
+            _get_auto_reject_obj(
+                "Rejected", has_core_keywords=False, fulltext_used=True
+            ),
+        ),
+        (
+            False,
+            _get_auto_reject_obj("Core", has_core_keywords=True, fulltext_used=True),
+        ),
+        (
+            False,
+            _get_auto_reject_obj(
+                "Non-Core", has_core_keywords=True, fulltext_used=True
+            ),
+        ),
+        (
+            False,
+            _get_auto_reject_obj(
+                "Rejected", has_core_keywords=True, fulltext_used=True
+            ),
+        ),
+        (
+            False,
+            _get_auto_reject_obj("Core", has_core_keywords=False, fulltext_used=False),
+        ),
+        (
+            False,
+            _get_auto_reject_obj(
+                "Non-Core", has_core_keywords=False, fulltext_used=False
+            ),
+        ),
+        (
+            False,
+            _get_auto_reject_obj(
+                "Rejected", has_core_keywords=False, fulltext_used=False
+            ),
+        ),
+        (
+            False,
+            _get_auto_reject_obj("Core", has_core_keywords=True, fulltext_used=False),
+        ),
+        (
+            False,
+            _get_auto_reject_obj(
+                "Non-Core", has_core_keywords=True, fulltext_used=False
+            ),
+        ),
+        (
+            False,
+            _get_auto_reject_obj(
+                "Rejected", has_core_keywords=True, fulltext_used=False
+            ),
+        ),
     ],
     ids=[
-        'Dont reject: No core keywords (from fulltext) with core decision',
-        'Dont reject: No core keywords (from fulltext) with non-core decision',
-        'Reject: No core keywords (from fulltext) with rejected decision',
-        'Dont reject: Core keywords (from fulltext) with core decision',
-        'Dont reject: Core keywords (from fulltext) with non-core decision',
-        'Dont reject: Core keywords (from fulltext) with rejected decision',
-        'Dont reject: No core keywords (not from fulltext) with core decision',
-        'Dont reject: No core keywords (not from fulltext) with non-core decision',
-        'Dont reject: No core keywords (not from fulltext) with rejected decision',
-        'Dont reject: Core keywords (not from fulltext) with core decision',
-        'Dont reject: Core keywords (not from fulltext) with non-core decision',
-        'Dont reject: Core keywords (not from fulltext) with rejected decision',
-    ]
+        "Dont reject: No core keywords (from fulltext) with core decision",
+        "Dont reject: No core keywords (from fulltext) with non-core decision",
+        "Reject: No core keywords (from fulltext) with rejected decision",
+        "Dont reject: Core keywords (from fulltext) with core decision",
+        "Dont reject: Core keywords (from fulltext) with non-core decision",
+        "Dont reject: Core keywords (from fulltext) with rejected decision",
+        "Dont reject: No core keywords (not from fulltext) with core decision",
+        "Dont reject: No core keywords (not from fulltext) with non-core decision",
+        "Dont reject: No core keywords (not from fulltext) with rejected decision",
+        "Dont reject: Core keywords (not from fulltext) with core decision",
+        "Dont reject: Core keywords (not from fulltext) with non-core decision",
+        "Dont reject: Core keywords (not from fulltext) with rejected decision",
+    ],
 )
 def test__is_auto_rejected(expected, obj):
     assert _is_auto_rejected(obj) is expected
 
 
 @pytest.mark.parametrize(
-    'expected, should_submission, should_auto_reject, should_auto_approve, full_journal_coverage',
+    "expected, should_submission, should_auto_reject, should_auto_approve, full_journal_coverage",
     [
         (True, True, True, False, False),
         (True, True, False, False, False),
@@ -417,20 +482,20 @@ def test__is_auto_rejected(expected, obj):
         (True, False, False, True, False),
     ],
     ids=[
-        'Relevant: non auto-approved is submission and autorejected',
-        'Relevant: non auto-approved is submission and not autorejected',
-        'Not relevant: non auto-approved is not submission and autorejected',
-        'Relevant: non auto-approved is not submission and not autorejected',
-        'Relevant: auto-approved is submission and autorejected',
-        'Relevant: auto-approved is submission and not autorejected',
-        'Relevant: auto-approved is not submission and autorejected',
-        'Relevant: auto-approved is not submission and not autorejected',
-    ]
+        "Relevant: non auto-approved is submission and autorejected",
+        "Relevant: non auto-approved is submission and not autorejected",
+        "Not relevant: non auto-approved is not submission and autorejected",
+        "Relevant: non auto-approved is not submission and not autorejected",
+        "Relevant: auto-approved is submission and autorejected",
+        "Relevant: auto-approved is submission and not autorejected",
+        "Relevant: auto-approved is not submission and autorejected",
+        "Relevant: auto-approved is not submission and not autorejected",
+    ],
 )
-@patch('inspirehep.modules.workflows.tasks.actions._is_journal_coverage_full')
-@patch('inspirehep.modules.workflows.tasks.actions.is_submission')
-@patch('inspirehep.modules.workflows.tasks.actions._is_auto_rejected')
-@patch('inspirehep.modules.workflows.tasks.actions._is_auto_approved')
+@patch("inspirehep.modules.workflows.tasks.actions._is_journal_coverage_full")
+@patch("inspirehep.modules.workflows.tasks.actions.is_submission")
+@patch("inspirehep.modules.workflows.tasks.actions._is_auto_rejected")
+@patch("inspirehep.modules.workflows.tasks.actions._is_auto_approved")
 def test_is_record_relevant(
     _is_auto_approved_mock,
     _is_auto_rejected_mock,
@@ -452,9 +517,9 @@ def test_is_record_relevant(
     assert is_record_relevant(obj, eng) is expected
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.is_submission')
-@patch('inspirehep.modules.workflows.tasks.actions._is_auto_rejected')
-@patch('inspirehep.modules.workflows.tasks.actions._is_auto_approved')
+@patch("inspirehep.modules.workflows.tasks.actions.is_submission")
+@patch("inspirehep.modules.workflows.tasks.actions._is_auto_rejected")
+@patch("inspirehep.modules.workflows.tasks.actions._is_auto_approved")
 def test_is_record_relevant_when_journal_coverage_full(
     _is_auto_approved_mock,
     _is_auto_rejected_mock,
@@ -464,16 +529,16 @@ def test_is_record_relevant_when_journal_coverage_full(
     _is_auto_rejected_mock.return_value = False
     is_submission_mock.return_value = False
     data = {
-        'arxiv_eprints': [
+        "arxiv_eprints": [
             {
-                'categories': [
-                    'hep-ex',
+                "categories": [
+                    "hep-ex",
                 ],
-                'value': 'hep-ex/0008040',
+                "value": "hep-ex/0008040",
             },
         ],
     }  # literature/532168
-    extra_data = {'journal_coverage': 'full'}
+    extra_data = {"journal_coverage": "full"}
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -482,21 +547,21 @@ def test_is_record_relevant_when_journal_coverage_full(
 
 
 def test_is_experimental_paper_returns_true_if_obj_has_an_experimental_arxiv_category():
-    schema = load_schema('hep')
-    subschema = schema['properties']['arxiv_eprints']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["arxiv_eprints"]
 
     data = {
-        'arxiv_eprints': [
+        "arxiv_eprints": [
             {
-                'categories': [
-                    'hep-ex',
+                "categories": [
+                    "hep-ex",
                 ],
-                'value': 'hep-ex/0008040',
+                "value": "hep-ex/0008040",
             },
         ],
     }  # literature/532168
     extra_data = {}
-    assert validate(data['arxiv_eprints'], subschema) is None
+    assert validate(data["arxiv_eprints"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -505,16 +570,16 @@ def test_is_experimental_paper_returns_true_if_obj_has_an_experimental_arxiv_cat
 
 
 def test_is_experimental_paper_returns_true_if_obj_has_an_experimental_inspire_category():
-    schema = load_schema('hep')
-    subschema = schema['properties']['inspire_categories']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["inspire_categories"]
 
     data = {
-        'inspire_categories': [
-            {'term': 'Experiment-HEP'},
+        "inspire_categories": [
+            {"term": "Experiment-HEP"},
         ],
     }  # literature/532168
     extra_data = {}
-    assert validate(data['inspire_categories'], subschema) is None
+    assert validate(data["inspire_categories"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -530,16 +595,16 @@ def test_is_experimental_paper_returns_false_otherwise():
 
 
 def test_is_experimental_paper_does_not_raise_if_obj_has_no_arxiv_category():
-    schema = load_schema('hep')
-    subschema = schema['properties']['arxiv_eprints']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["arxiv_eprints"]
 
     data = {
-        'arxiv_eprints': [
-            {'value': '1712.02280'},
+        "arxiv_eprints": [
+            {"value": "1712.02280"},
         ],
     }
     extra_data = {}
-    assert validate(data['arxiv_eprints'], subschema) is None
+    assert validate(data["arxiv_eprints"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -555,27 +620,27 @@ def test_is_arxiv_paper_returns_false_if_acquision_source_is_not_present():
 
 
 def test_is_arxiv_paper_returns_false_if_method_is_not_hepcrawl_or_arxiv():
-    schema = load_schema('hep')
-    acquisition_source_schema = schema['properties']['acquisition_source']
-    arxiv_eprints_schema = schema['properties']['arxiv_eprints']
+    schema = load_schema("hep")
+    acquisition_source_schema = schema["properties"]["acquisition_source"]
+    arxiv_eprints_schema = schema["properties"]["arxiv_eprints"]
 
     data = {
-        'acquisition_source': {
-            'method': 'batchuploader',
-            'source': 'arxiv',
+        "acquisition_source": {
+            "method": "batchuploader",
+            "source": "arxiv",
         },
-        'arxiv_eprints': [
+        "arxiv_eprints": [
             {
-                'categories': [
-                    'hep-th',
+                "categories": [
+                    "hep-th",
                 ],
-                'value': '0801.4782',
+                "value": "0801.4782",
             },
         ],
     }
     extra_data = {}
-    assert validate(data['acquisition_source'], acquisition_source_schema) is None
-    assert validate(data['arxiv_eprints'], arxiv_eprints_schema) is None
+    assert validate(data["acquisition_source"], acquisition_source_schema) is None
+    assert validate(data["arxiv_eprints"], arxiv_eprints_schema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -584,26 +649,26 @@ def test_is_arxiv_paper_returns_false_if_method_is_not_hepcrawl_or_arxiv():
 
 
 def test_is_arxiv_paper_for_submission():
-    schema = load_schema('hep')
-    acquisition_source_schema = schema['properties']['acquisition_source']
-    arxiv_eprints_schema = schema['properties']['arxiv_eprints']
+    schema = load_schema("hep")
+    acquisition_source_schema = schema["properties"]["acquisition_source"]
+    arxiv_eprints_schema = schema["properties"]["arxiv_eprints"]
 
     data = {
-        'acquisition_source': {
-            'method': 'submitter',
+        "acquisition_source": {
+            "method": "submitter",
         },
-        'arxiv_eprints': [
+        "arxiv_eprints": [
             {
-                'categories': [
-                    'hep-th',
+                "categories": [
+                    "hep-th",
                 ],
-                'value': '0801.4782',
+                "value": "0801.4782",
             },
         ],
     }
     extra_data = {}
-    assert validate(data['acquisition_source'], acquisition_source_schema) is None
-    assert validate(data['arxiv_eprints'], arxiv_eprints_schema) is None
+    assert validate(data["acquisition_source"], acquisition_source_schema) is None
+    assert validate(data["arxiv_eprints"], arxiv_eprints_schema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -612,16 +677,16 @@ def test_is_arxiv_paper_for_submission():
 
 
 def test_is_arxiv_paper_returns_false_when_arxiv_eprints_is_not_present_for_submission():
-    schema = load_schema('hep')
-    subschema = schema['properties']['acquisition_source']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["acquisition_source"]
 
     data = {
-        'acquisition_source': {
-            'method': 'submitter',
+        "acquisition_source": {
+            "method": "submitter",
         },
     }
     extra_data = {}
-    assert validate(data['acquisition_source'], subschema) is None
+    assert validate(data["acquisition_source"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -630,17 +695,17 @@ def test_is_arxiv_paper_returns_false_when_arxiv_eprints_is_not_present_for_subm
 
 
 def test_is_arxiv_paper_for_hepcrawl():
-    schema = load_schema('hep')
-    subschema = schema['properties']['acquisition_source']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["acquisition_source"]
 
     data = {
-        'acquisition_source': {
-            'method': 'hepcrawl',
-            'source': 'arxiv',
+        "acquisition_source": {
+            "method": "hepcrawl",
+            "source": "arxiv",
         },
     }
     extra_data = {}
-    assert validate(data['acquisition_source'], subschema) is None
+    assert validate(data["acquisition_source"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -649,17 +714,17 @@ def test_is_arxiv_paper_for_hepcrawl():
 
 
 def test_is_arxiv_paper_ignores_case_for_hepcrawl():
-    schema = load_schema('hep')
-    subschema = schema['properties']['acquisition_source']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["acquisition_source"]
 
     data = {
-        'acquisition_source': {
-            'method': 'hepcrawl',
-            'source': 'arXiv',
+        "acquisition_source": {
+            "method": "hepcrawl",
+            "source": "arXiv",
         },
     }
     extra_data = {}
-    assert validate(data['acquisition_source'], subschema) is None
+    assert validate(data["acquisition_source"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -668,17 +733,17 @@ def test_is_arxiv_paper_ignores_case_for_hepcrawl():
 
 
 def test_is_arxiv_paper_returns_false_if_source_is_not_arxiv_for_hepcrawl():
-    schema = load_schema('hep')
-    subschema = schema['properties']['acquisition_source']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["acquisition_source"]
 
     data = {
-        'acquisition_source': {
-            'method': 'hepcrawl',
-            'source': 'something else',
+        "acquisition_source": {
+            "method": "hepcrawl",
+            "source": "something else",
         },
     }
     extra_data = {}
-    assert validate(data['acquisition_source'], subschema) is None
+    assert validate(data["acquisition_source"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -687,16 +752,16 @@ def test_is_arxiv_paper_returns_false_if_source_is_not_arxiv_for_hepcrawl():
 
 
 def test_is_arxiv_paper_returns_false_if_source_is_not_present_for_hepcrawl():
-    schema = load_schema('hep')
-    subschema = schema['properties']['acquisition_source']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["acquisition_source"]
 
     data = {
-        'acquisition_source': {
-            'method': 'hepcrawl',
+        "acquisition_source": {
+            "method": "hepcrawl",
         },
     }
     extra_data = {}
-    assert validate(data['acquisition_source'], subschema) is None
+    assert validate(data["acquisition_source"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -705,14 +770,14 @@ def test_is_arxiv_paper_returns_false_if_source_is_not_present_for_hepcrawl():
 
 
 def test_is_submission():
-    obj = MockObj({'acquisition_source': {'method': 'submitter'}}, {})
+    obj = MockObj({"acquisition_source": {"method": "submitter"}}, {})
     eng = MockEng()
 
     assert is_submission(obj, eng)
 
 
 def test_is_submission_returns_false_if_method_is_not_submission():
-    obj = MockObj({'acquisition_source': {'method': 'not-submission'}}, {})
+    obj = MockObj({"acquisition_source": {"method": "not-submission"}}, {})
     eng = MockEng()
 
     assert not is_submission(obj, eng)
@@ -733,17 +798,17 @@ def test_is_submission_returns_false_if_obj_has_falsy_acquisition_source():
 
 
 def test_fix_submission_number():
-    schema = load_schema('hep')
-    subschema = schema['properties']['acquisition_source']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["acquisition_source"]
 
     data = {
-        'acquisition_source': {
-            'method': 'hepcrawl',
-            'submission_number': '751e374a017311e896d6fa163ec92c6a',
+        "acquisition_source": {
+            "method": "hepcrawl",
+            "submission_number": "751e374a017311e896d6fa163ec92c6a",
         },
     }
     extra_data = {}
-    assert validate(data['acquisition_source'], subschema) is None
+    assert validate(data["acquisition_source"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -751,27 +816,27 @@ def test_fix_submission_number():
     fix_submission_number(obj, eng)
 
     expected = {
-        'method': 'hepcrawl',
-        'submission_number': '1',
+        "method": "hepcrawl",
+        "submission_number": "1",
     }
-    result = obj.data['acquisition_source']
+    result = obj.data["acquisition_source"]
 
     assert validate(result, subschema) is None
     assert expected == result
 
 
 def fix_submission_number_does_nothing_if_method_is_not_hepcrawl():
-    schema = load_schema('hep')
-    subschema = schema['properties']['acquisition_source']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["acquisition_source"]
 
     data = {
-        'acquisition_source': {
-            'method': 'submitter',
-            'submission_number': '869215',
+        "acquisition_source": {
+            "method": "submitter",
+            "submission_number": "869215",
         },
     }
     extra_data = {}
-    assert validate(data['acquisition_source'], subschema) is None
+    assert validate(data["acquisition_source"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
@@ -779,81 +844,87 @@ def fix_submission_number_does_nothing_if_method_is_not_hepcrawl():
     fix_submission_number(obj, eng)
 
     expected = {
-        'method': 'submitter',
-        'submission_number': '869215',
+        "method": "submitter",
+        "submission_number": "869215",
     }
-    result = obj.data['acquisition_source']
+    result = obj.data["acquisition_source"]
 
     assert validate(result, subschema) is None
     assert expected == result
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.replace_refs')
-def test_populate_journal_coverage_writes_full_if_any_coverage_is_full(mock_replace_refs):
-    schema = load_schema('journals')
-    subschema = schema['properties']['_harvesting_info']
+@patch("inspirehep.modules.workflows.tasks.actions.replace_refs")
+def test_populate_journal_coverage_writes_full_if_any_coverage_is_full(
+    mock_replace_refs,
+):
+    schema = load_schema("journals")
+    subschema = schema["properties"]["_harvesting_info"]
 
-    journals = [{'_harvesting_info': {'coverage': 'full'}}]
-    assert validate(journals[0]['_harvesting_info'], subschema) is None
+    journals = [{"_harvesting_info": {"coverage": "full"}}]
+    assert validate(journals[0]["_harvesting_info"], subschema) is None
 
     mock_replace_refs.return_value = journals
 
-    schema = load_schema('hep')
-    subschema = schema['properties']['publication_info']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["publication_info"]
 
     data = {
-        'publication_info': [
-            {'journal_record': {'$ref': 'http://localhost:/api/journals/1213103'}},
+        "publication_info": [
+            {"journal_record": {"$ref": "http://localhost:/api/journals/1213103"}},
         ],
     }
     extra_data = {}
-    assert validate(data['publication_info'], subschema) is None
+    assert validate(data["publication_info"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
 
     assert populate_journal_coverage(obj, eng) is None
 
-    expected = 'full'
-    result = obj.extra_data['journal_coverage']
+    expected = "full"
+    result = obj.extra_data["journal_coverage"]
 
     assert expected == result
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.replace_refs')
-def test_populate_journal_coverage_writes_partial_if_all_coverages_are_partial(mock_replace_refs):
-    schema = load_schema('journals')
-    subschema = schema['properties']['_harvesting_info']
+@patch("inspirehep.modules.workflows.tasks.actions.replace_refs")
+def test_populate_journal_coverage_writes_partial_if_all_coverages_are_partial(
+    mock_replace_refs,
+):
+    schema = load_schema("journals")
+    subschema = schema["properties"]["_harvesting_info"]
 
-    journals = [{'_harvesting_info': {'coverage': 'partial'}}]
-    assert validate(journals[0]['_harvesting_info'], subschema) is None
+    journals = [{"_harvesting_info": {"coverage": "partial"}}]
+    assert validate(journals[0]["_harvesting_info"], subschema) is None
 
     mock_replace_refs.return_value = journals
 
-    schema = load_schema('hep')
-    subschema = schema['properties']['publication_info']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["publication_info"]
 
     data = {
-        'publication_info': [
-            {'journal_record': {'$ref': 'http://localhost:/api/journals/1212337'}},
+        "publication_info": [
+            {"journal_record": {"$ref": "http://localhost:/api/journals/1212337"}},
         ],
     }
     extra_data = {}
-    assert validate(data['publication_info'], subschema) is None
+    assert validate(data["publication_info"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
 
     assert populate_journal_coverage(obj, eng) is None
 
-    expected = 'partial'
-    result = obj.extra_data['journal_coverage']
+    expected = "partial"
+    result = obj.extra_data["journal_coverage"]
 
     assert expected == result
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.replace_refs')
-def test_populate_journal_coverage_does_nothing_if_no_journal_is_found(mock_replace_refs):
+@patch("inspirehep.modules.workflows.tasks.actions.replace_refs")
+def test_populate_journal_coverage_does_nothing_if_no_journal_is_found(
+    mock_replace_refs,
+):
     mock_replace_refs.return_value = []
 
     data = {}
@@ -863,36 +934,38 @@ def test_populate_journal_coverage_does_nothing_if_no_journal_is_found(mock_repl
     eng = MockEng()
 
     assert populate_journal_coverage(obj, eng) is None
-    assert 'journal_coverage' not in obj.extra_data
+    assert "journal_coverage" not in obj.extra_data
 
 
 def test_populate_submission_document():
     with requests_mock.Mocker() as requests_mocker:
         requests_mocker.register_uri(
-            'GET', 'http://export.arxiv.org/pdf/1605.03844',
+            "GET",
+            "http://export.arxiv.org/pdf/1605.03844",
             content=pkg_resources.resource_string(
-                __name__, os.path.join('fixtures', '1605.03844.pdf')),
+                __name__, os.path.join("fixtures", "1605.03844.pdf")
+            ),
         )
 
-        schema = load_schema('hep')
-        subschema = schema['properties']['acquisition_source']
+        schema = load_schema("hep")
+        subschema = schema["properties"]["acquisition_source"]
 
         data = {
-            'acquisition_source': {
-                'datetime': '2017-11-30T16:38:43.352370',
-                'email': 'david.caro@cern.ch',
-                'internal_uid': 54252,
-                'method': 'submitter',
-                'orcid': '0000-0002-2174-4493',
-                'source': 'submitter',
-                'submission_number': '1',
+            "acquisition_source": {
+                "datetime": "2017-11-30T16:38:43.352370",
+                "email": "david.caro@cern.ch",
+                "internal_uid": 54252,
+                "method": "submitter",
+                "orcid": "0000-0002-2174-4493",
+                "source": "submitter",
+                "submission_number": "1",
             },
         }
         extra_data = {
-            'submission_pdf': 'http://export.arxiv.org/pdf/1605.03844',
+            "submission_pdf": "http://export.arxiv.org/pdf/1605.03844",
         }
         files = MockFiles({})
-        assert validate(data['acquisition_source'], subschema) is None
+        assert validate(data["acquisition_source"], subschema) is None
 
         obj = MockObj(data, extra_data, files=files)
         eng = MockEng()
@@ -901,14 +974,14 @@ def test_populate_submission_document():
 
         expected = [
             {
-                'fulltext': True,
-                'key': 'fulltext.pdf',
-                'original_url': 'http://export.arxiv.org/pdf/1605.03844',
-                'source': 'submitter',
-                'url': 'http://export.arxiv.org/pdf/1605.03844',
+                "fulltext": True,
+                "key": "fulltext.pdf",
+                "original_url": "http://export.arxiv.org/pdf/1605.03844",
+                "source": "submitter",
+                "url": "http://export.arxiv.org/pdf/1605.03844",
             },
         ]
-        result = obj.data['documents']
+        result = obj.data["documents"]
 
         assert expected == result
 
@@ -916,30 +989,32 @@ def test_populate_submission_document():
 def test_populate_submission_document_does_not_duplicate_documents():
     with requests_mock.Mocker() as requests_mocker:
         requests_mocker.register_uri(
-            'GET', 'http://export.arxiv.org/pdf/1605.03844',
+            "GET",
+            "http://export.arxiv.org/pdf/1605.03844",
             content=pkg_resources.resource_string(
-                __name__, os.path.join('fixtures', '1605.03844.pdf')),
+                __name__, os.path.join("fixtures", "1605.03844.pdf")
+            ),
         )
 
-        schema = load_schema('hep')
-        subschema = schema['properties']['acquisition_source']
+        schema = load_schema("hep")
+        subschema = schema["properties"]["acquisition_source"]
 
         data = {
-            'acquisition_source': {
-                'datetime': '2017-11-30T16:38:43.352370',
-                'email': 'david.caro@cern.ch',
-                'internal_uid': 54252,
-                'method': 'submitter',
-                'orcid': '0000-0002-2174-4493',
-                'source': 'submitter',
-                'submission_number': '1',
+            "acquisition_source": {
+                "datetime": "2017-11-30T16:38:43.352370",
+                "email": "david.caro@cern.ch",
+                "internal_uid": 54252,
+                "method": "submitter",
+                "orcid": "0000-0002-2174-4493",
+                "source": "submitter",
+                "submission_number": "1",
             },
         }
         extra_data = {
-            'submission_pdf': 'http://export.arxiv.org/pdf/1605.03844',
+            "submission_pdf": "http://export.arxiv.org/pdf/1605.03844",
         }
         files = MockFiles({})
-        assert validate(data['acquisition_source'], subschema) is None
+        assert validate(data["acquisition_source"], subschema) is None
 
         obj = MockObj(data, extra_data, files=files)
         eng = MockEng()
@@ -949,14 +1024,14 @@ def test_populate_submission_document_does_not_duplicate_documents():
 
         expected = [
             {
-                'fulltext': True,
-                'key': 'fulltext.pdf',
-                'original_url': 'http://export.arxiv.org/pdf/1605.03844',
-                'source': 'submitter',
-                'url': 'http://export.arxiv.org/pdf/1605.03844',
+                "fulltext": True,
+                "key": "fulltext.pdf",
+                "original_url": "http://export.arxiv.org/pdf/1605.03844",
+                "source": "submitter",
+                "url": "http://export.arxiv.org/pdf/1605.03844",
             },
         ]
-        result = obj.data['documents']
+        result = obj.data["documents"]
 
         assert expected == result
 
@@ -964,51 +1039,53 @@ def test_populate_submission_document_does_not_duplicate_documents():
 def test_populate_submission_document_without_pdf():
     with requests_mock.Mocker() as requests_mocker:
         requests_mocker.register_uri(
-            'GET', 'http://export.arxiv.org/pdf/1707.02785',
+            "GET",
+            "http://export.arxiv.org/pdf/1707.02785",
             content=pkg_resources.resource_string(
-                __name__, os.path.join('fixtures', '1707.02785.html')),
+                __name__, os.path.join("fixtures", "1707.02785.html")
+            ),
         )
 
-        schema = load_schema('hep')
-        subschema = schema['properties']['acquisition_source']
+        schema = load_schema("hep")
+        subschema = schema["properties"]["acquisition_source"]
         data = {
-            'acquisition_source': {
-                'datetime': '2017-11-30T16:38:43.352370',
-                'email': 'david.caro@cern.ch',
-                'internal_uid': 54252,
-                'method': 'submitter',
-                'orcid': '0000-0002-2174-4493',
-                'source': 'submitter',
-                'submission_number': '1'
+            "acquisition_source": {
+                "datetime": "2017-11-30T16:38:43.352370",
+                "email": "david.caro@cern.ch",
+                "internal_uid": 54252,
+                "method": "submitter",
+                "orcid": "0000-0002-2174-4493",
+                "source": "submitter",
+                "submission_number": "1",
             }
         }
-        assert validate(data['acquisition_source'], subschema) is None
+        assert validate(data["acquisition_source"], subschema) is None
 
         extra_data = {
-            'submission_pdf': 'http://export.arxiv.org/pdf/1707.02785',
+            "submission_pdf": "http://export.arxiv.org/pdf/1707.02785",
         }
         files = MockFiles({})
         obj = MockObj(data, extra_data, files=files)
         eng = MockEng()
 
         assert populate_submission_document(obj, eng) is None
-        assert not obj.data.get('documents')
+        assert not obj.data.get("documents")
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.replace_refs')
+@patch("inspirehep.modules.workflows.tasks.actions.replace_refs")
 def test_set_refereed_and_fix_document_type(mock_replace_refs):
-    schema = load_schema('journals')
-    subschema = schema['properties']['refereed']
+    schema = load_schema("journals")
+    subschema = schema["properties"]["refereed"]
 
-    journals = [{'refereed': True}]
-    assert validate(journals[0]['refereed'], subschema) is None
+    journals = [{"refereed": True}]
+    assert validate(journals[0]["refereed"], subschema) is None
 
     mock_replace_refs.return_value = journals
 
-    schema = load_schema('hep')
-    subschema = schema['properties']['refereed']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["refereed"]
 
-    data = {'document_type': ['article']}
+    data = {"document_type": ["article"]}
     extra_data = {}
 
     obj = MockObj(data, extra_data)
@@ -1017,28 +1094,30 @@ def test_set_refereed_and_fix_document_type(mock_replace_refs):
     assert set_refereed_and_fix_document_type(obj, eng) is None
 
     expected = True
-    result = obj.data['refereed']
+    result = obj.data["refereed"]
 
     assert validate(result, subschema) is None
     assert expected == result
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.replace_refs')
-def test_set_refereed_and_fix_document_type_handles_journals_that_publish_mixed_content(mock_replace_refs):
-    schema = load_schema('journals')
-    proceedings_schema = schema['properties']['proceedings']
-    refereed_schema = schema['properties']['refereed']
+@patch("inspirehep.modules.workflows.tasks.actions.replace_refs")
+def test_set_refereed_and_fix_document_type_handles_journals_that_publish_mixed_content(
+    mock_replace_refs,
+):
+    schema = load_schema("journals")
+    proceedings_schema = schema["properties"]["proceedings"]
+    refereed_schema = schema["properties"]["refereed"]
 
-    journals = [{'proceedings': True, 'refereed': True}]
-    assert validate(journals[0]['proceedings'], proceedings_schema) is None
-    assert validate(journals[0]['refereed'], refereed_schema) is None
+    journals = [{"proceedings": True, "refereed": True}]
+    assert validate(journals[0]["proceedings"], proceedings_schema) is None
+    assert validate(journals[0]["refereed"], refereed_schema) is None
 
     mock_replace_refs.return_value = journals
 
-    schema = load_schema('hep')
-    subschema = schema['properties']['refereed']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["refereed"]
 
-    data = {'document_type': ['article']}
+    data = {"document_type": ["article"]}
     extra_data = {}
 
     obj = MockObj(data, extra_data)
@@ -1047,26 +1126,28 @@ def test_set_refereed_and_fix_document_type_handles_journals_that_publish_mixed_
     assert set_refereed_and_fix_document_type(obj, eng) is None
 
     expected = True
-    result = obj.data['refereed']
+    result = obj.data["refereed"]
 
     assert validate(result, subschema) is None
     assert expected == result
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.replace_refs')
-def test_set_refereed_and_fix_document_type_sets_refereed_to_false_if_all_journals_are_not_refereed(mock_replace_refs):
-    schema = load_schema('journals')
-    subschema = schema['properties']['refereed']
+@patch("inspirehep.modules.workflows.tasks.actions.replace_refs")
+def test_set_refereed_and_fix_document_type_sets_refereed_to_false_if_all_journals_are_not_refereed(
+    mock_replace_refs,
+):
+    schema = load_schema("journals")
+    subschema = schema["properties"]["refereed"]
 
-    journals = [{'refereed': False}]
-    assert validate(journals[0]['refereed'], subschema) is None
+    journals = [{"refereed": False}]
+    assert validate(journals[0]["refereed"], subschema) is None
 
     mock_replace_refs.return_value = journals
 
-    schema = load_schema('hep')
-    subschema = schema['properties']['refereed']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["refereed"]
 
-    data = {'document_type': ['article']}
+    data = {"document_type": ["article"]}
     extra_data = {}
 
     obj = MockObj(data, extra_data)
@@ -1075,26 +1156,28 @@ def test_set_refereed_and_fix_document_type_sets_refereed_to_false_if_all_journa
     assert set_refereed_and_fix_document_type(obj, eng) is None
 
     expected = False
-    result = obj.data['refereed']
+    result = obj.data["refereed"]
 
     assert validate(result, subschema) is None
     assert expected == result
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.replace_refs')
-def test_set_refereed_and_fix_document_type_replaces_article_with_conference_paper_if_needed(mock_replace_refs):
-    schema = load_schema('journals')
-    subschema = schema['properties']['proceedings']
+@patch("inspirehep.modules.workflows.tasks.actions.replace_refs")
+def test_set_refereed_and_fix_document_type_replaces_article_with_conference_paper_if_needed(
+    mock_replace_refs,
+):
+    schema = load_schema("journals")
+    subschema = schema["properties"]["proceedings"]
 
-    journals = [{'proceedings': True}]
-    assert validate(journals[0]['proceedings'], subschema) is None
+    journals = [{"proceedings": True}]
+    assert validate(journals[0]["proceedings"], subschema) is None
 
     mock_replace_refs.return_value = journals
 
-    schema = load_schema('hep')
-    subschema = schema['properties']['document_type']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["document_type"]
 
-    data = {'document_type': ['article']}
+    data = {"document_type": ["article"]}
     extra_data = {}
 
     obj = MockObj(data, extra_data)
@@ -1102,39 +1185,41 @@ def test_set_refereed_and_fix_document_type_replaces_article_with_conference_pap
 
     assert set_refereed_and_fix_document_type(obj, eng) is None
 
-    expected = ['conference paper']
-    result = obj.data['document_type']
+    expected = ["conference paper"]
+    result = obj.data["document_type"]
 
     assert validate(result, subschema) is None
     assert expected == result
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.replace_refs')
-def test_set_refereed_and_fix_document_type_does_nothing_if_no_journals_were_found(mock_replace_refs):
+@patch("inspirehep.modules.workflows.tasks.actions.replace_refs")
+def test_set_refereed_and_fix_document_type_does_nothing_if_no_journals_were_found(
+    mock_replace_refs,
+):
     mock_replace_refs.return_value = []
 
-    data = {'document_type': ['article']}
+    data = {"document_type": ["article"]}
     extra_data = {}
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
 
     assert set_refereed_and_fix_document_type(obj, eng) is None
-    assert 'refereed' not in obj.data
+    assert "refereed" not in obj.data
 
 
 def test_validate_record():
-    schema = load_schema('hep')
+    schema = load_schema("hep")
 
     data = {
-        '_collections': [
-            'Literature',
+        "_collections": [
+            "Literature",
         ],
-        'document_type': [
-            'article',
+        "document_type": [
+            "article",
         ],
-        'titles': [
-            {'title': 'Partial Symmetries of Weak Interactions'},
+        "titles": [
+            {"title": "Partial Symmetries of Weak Interactions"},
         ],
     }
     extra_data = {}
@@ -1143,20 +1228,20 @@ def test_validate_record():
     obj = MockObj(data, extra_data)
     eng = MockEng()
 
-    _validate_record = validate_record('hep')
+    _validate_record = validate_record("hep")
 
     assert _validate_record(obj, eng) is None
 
 
 def test_validate_record_raises_when_record_is_invalid():
-    schema = load_schema('hep')
+    schema = load_schema("hep")
 
     data = {
-        'document_type': [
-            'article',
+        "document_type": [
+            "article",
         ],
-        'titles': [
-            {'title': 'Partial Symmetries of Weak Interactions'},
+        "titles": [
+            {"title": "Partial Symmetries of Weak Interactions"},
         ],
     }
     extra_data = {}
@@ -1166,96 +1251,123 @@ def test_validate_record_raises_when_record_is_invalid():
     obj = MockObj(data, extra_data)
     eng = MockEng()
 
-    _validate_record = validate_record('hep')
+    _validate_record = validate_record("hep")
 
     with pytest.raises(ValidationError):
         _validate_record(obj, eng)
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.create_journal_kb_dict', return_value={})
-@patch('inspirehep.modules.workflows.tasks.actions.get_document_in_workflow')
 @patch(
-    'inspirehep.modules.refextract.matcher.match',
-    return_value=iter([])
+    "inspirehep.modules.workflows.tasks.actions.create_journal_kb_dict", return_value={}
 )
-def test_refextract_from_text(mock_match, mock_get_document_in_workflow, mock_create_journal_kb_dict):
+@patch("inspirehep.modules.workflows.tasks.actions.get_document_in_workflow")
+@patch("inspirehep.modules.refextract.matcher.match", return_value=iter([]))
+def test_refextract_from_text(
+    mock_match, mock_get_document_in_workflow, mock_create_journal_kb_dict
+):
     """TODO: Make this an integration test and also test reference matching."""
 
     mock_get_document_in_workflow.return_value.__enter__.return_value = None
     mock_get_document_in_workflow.return_value.__exit__.return_value = None
 
-    schema = load_schema('hep')
-    subschema = schema['properties']['acquisition_source']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["acquisition_source"]
 
-    data = {'acquisition_source': {'source': 'submitter'}}
+    data = {"acquisition_source": {"source": "submitter"}}
     extra_data = {
-        'formdata': {
-            'references': 'M.R. Douglas, G.W. Moore, D-branes, quivers, and ALE instantons, arXiv:hep-th/9603167',
+        "formdata": {
+            "references": "M.R. Douglas, G.W. Moore, D-branes, quivers, and ALE instantons, arXiv:hep-th/9603167",
         },
     }
-    assert validate(data['acquisition_source'], subschema) is None
+    assert validate(data["acquisition_source"], subschema) is None
 
     obj = MockObj(data, extra_data)
     eng = MockEng()
+    with requests_mock.Mocker() as requests_mocker:
+        requests_mocker.register_uri(
+            "POST",
+            "http://web:8000/api/matcher/linked_references/",
+            json={"references": [{"raw_refs": [{"source": "submitter"}]}]},
+            status_code=200,
+        )
 
-    assert refextract(obj, eng) is None
-    assert obj.data['references'][0]['raw_refs'][0]['source'] == 'submitter'
+        assert refextract(obj, eng) is None
+        assert obj.data["references"][0]["raw_refs"][0]["source"] == "submitter"
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.create_journal_kb_dict', return_value={})
 @patch(
-    'inspirehep.modules.refextract.matcher.match',
-    return_value=iter([])
+    "inspirehep.modules.workflows.tasks.actions.create_journal_kb_dict", return_value={}
 )
+@patch("inspirehep.modules.refextract.matcher.match", return_value=iter([]))
 def test_refextract_from_raw_refs(mock_create_journal_dict, mock_match):
     """TODO: Make this an integration test and also test reference matching."""
-    schema = load_schema('hep')
-    subschema = schema['properties']['references']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["references"]
 
     data = {
-        'references': [
+        "references": [
             {
-                'raw_refs': [
+                "raw_refs": [
                     {
-                        'schema': 'text',
-                        'source': 'arXiv',
-                        'value': '[37] M. Vallisneri, \u201cUse and abuse of the Fisher information matrix in the assessment of gravitational-wave parameter-estimation prospects,\u201d Phys. Rev. D 77, 042001 (2008) doi:10.1103/PhysRevD.77.042001 [gr-qc/0703086 [GR-QC]].'
+                        "schema": "text",
+                        "source": "arXiv",
+                        "value": "[37] M. Vallisneri, \u201cUse and abuse of the Fisher information matrix in the assessment of gravitational-wave parameter-estimation prospects,\u201d Phys. Rev. D 77, 042001 (2008) doi:10.1103/PhysRevD.77.042001 [gr-qc/0703086 [GR-QC]].",
                     },
                 ],
             },
         ],
     }
-    assert validate(data['references'], subschema) is None
+    assert validate(data["references"], subschema) is None
 
     obj = MockObj(data, {})
     eng = MockEng()
+    with requests_mock.Mocker() as requests_mocker:
+        requests_mocker.register_uri(
+            "POST",
+            "http://web:8000/api/matcher/linked_references/",
+            json={
+                "references": [
+                    {
+                        "reference": {
+                            "publication_info": {
+                                "artid": "045",
+                                "journal_title": "JHEP",
+                                "journal_volume": "06",
+                                "page_start": "045",
+                                "year": 2007,
+                            }
+                        }
+                    }
+                ]
+            },
+            status_code=200,
+        )
 
-    assert refextract(obj, eng) is None
-    assert 'reference' in obj.data['references'][0]
+        assert refextract(obj, eng) is None
+        assert "reference" in obj.data["references"][0]
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.create_journal_kb_dict', return_value={})
 @patch(
-    'inspirehep.modules.refextract.matcher.match',
-    return_value=iter([])
+    "inspirehep.modules.workflows.tasks.actions.create_journal_kb_dict", return_value={}
 )
+@patch("inspirehep.modules.refextract.matcher.match", return_value=iter([]))
 def test_refextract_valid_refs_from_raw_refs(mock_create_journal_dict, mock_match):
-    schema = load_schema('hep')
-    subschema = schema['properties']['references']
+    schema = load_schema("hep")
+    subschema = schema["properties"]["references"]
 
     data = {
-        'references': [
+        "references": [
             {
-                'raw_refs': [
+                "raw_refs": [
                     {
-                        'schema': 'text',
-                        'source': 'arXiv',
-                        'value': '[37] M. Vallisneri, \u201cUse and abuse of the Fisher information matrix in the assessment of gravitational-wave parameter-estimation prospects,\u201d Phys. Rev. D 77, 042001 (2008) doi:10.1103/PhysRevD.77.042001 [gr-qc/0703086 [GR-QC]].'
+                        "schema": "text",
+                        "source": "arXiv",
+                        "value": "[37] M. Vallisneri, \u201cUse and abuse of the Fisher information matrix in the assessment of gravitational-wave parameter-estimation prospects,\u201d Phys. Rev. D 77, 042001 (2008) doi:10.1103/PhysRevD.77.042001 [gr-qc/0703086 [GR-QC]].",
                     },
                     {
-                        'schema': 'text',
-                        'source': 'arXiv',
-                        'value': '[37] M. Vallisneri, \u201cUse and abuse of the Fisher information matrix in the assessment of gravitational-wave parameter-estimation prospects,\u201d Phys. Rev. D 77, 042001 (2008) doi:10.1103/PhysRevD.77.042001 [gr-qc/0703086 [GR-QC]].'
+                        "schema": "text",
+                        "source": "arXiv",
+                        "value": "[37] M. Vallisneri, \u201cUse and abuse of the Fisher information matrix in the assessment of gravitational-wave parameter-estimation prospects,\u201d Phys. Rev. D 77, 042001 (2008) doi:10.1103/PhysRevD.77.042001 [gr-qc/0703086 [GR-QC]].",
                     },
                 ],
             },
@@ -1264,104 +1376,152 @@ def test_refextract_valid_refs_from_raw_refs(mock_create_journal_dict, mock_matc
     obj = MockObj(data, {})
     eng = MockEng()
 
-    assert refextract(obj, eng) is None
-    assert len(obj.data['references']) == 1
-    assert validate(obj.data['references'], subschema) is None
+    with requests_mock.Mocker() as requests_mocker:
+        requests_mocker.register_uri(
+            "POST",
+            "http://web:8000/api/matcher/linked_references/",
+            json={
+                "references": [
+                    {
+                        "reference": {
+                            "publication_info": {
+                                "artid": "045",
+                                "journal_title": "JHEP",
+                                "journal_volume": "06",
+                                "page_start": "045",
+                                "year": 2007,
+                            }
+                        }
+                    }
+                ]
+            },
+            status_code=200,
+        )
+
+        assert refextract(obj, eng) is None
+        assert len(obj.data["references"]) == 1
+        assert validate(obj.data["references"], subschema) is None
 
 
-@patch('inspirehep.modules.workflows.tasks.actions.create_journal_kb_dict', return_value={})
-@patch('inspirehep.modules.workflows.tasks.actions.get_document_in_workflow')
 @patch(
-    'inspirehep.modules.refextract.matcher.match',
-    return_value=iter([])
+    "inspirehep.modules.workflows.tasks.actions.create_journal_kb_dict", return_value={}
 )
-def test_refextract_valid_refs_from_text(mock_match, mock_get_document_in_workflow, mock_create_journal_kb_dict):
+@patch("inspirehep.modules.workflows.tasks.actions.get_document_in_workflow")
+def test_refextract_valid_refs_from_text(
+    mock_get_document_in_workflow, mock_create_journal_kb_dict
+):
     """TODO: Make this an integration test and also test reference matching."""
 
     mock_get_document_in_workflow.return_value.__enter__.return_value = None
     mock_get_document_in_workflow.return_value.__exit__.return_value = None
+    with requests_mock.Mocker() as requests_mocker:
+        requests_mocker.register_uri(
+            "POST",
+            "http://web:8000/api/matcher/linked_references/",
+            json={
+                "references": [
+                    {
+                        "reference": {
+                            "publication_info": {
+                                "artid": "045",
+                                "journal_title": "JHEP",
+                                "journal_volume": "06",
+                                "page_start": "045",
+                                "year": 2007,
+                            }
+                        }
+                    }
+                ]
+            },
+            status_code=200,
+        )
+        schema = load_schema("hep")
+        refs_subschema = schema["properties"]["references"]
+        acquisition_source_subschema = schema["properties"]["acquisition_source"]
 
-    schema = load_schema('hep')
-    refs_subschema = schema['properties']['references']
-    acquisition_source_subschema = schema['properties']['acquisition_source']
-
-    data = {'acquisition_source': {'source': 'submitter'}}
-    extra_data = {
-        'formdata': {
-            'references': 'M.R. Douglas, G.W. Moore, D-branes, quivers, and ALE instantons, arXiv:hep-th/9603167\nM.R. Douglas, G.W. Moore, D-branes, quivers, and ALE instantons, arXiv:hep-th/9603167',
-        },
-    }
-    assert validate(data['acquisition_source'], acquisition_source_subschema) is None
+        data = {"acquisition_source": {"source": "submitter"}}
+        extra_data = {
+            "formdata": {
+                "references": "M.R. Douglas, G.W. Moore, D-branes, quivers, and ALE instantons, arXiv:hep-th/9603167\nM.R. Douglas, G.W. Moore, D-branes, quivers, and ALE instantons, arXiv:hep-th/9603167",
+            },
+        }
+        assert (
+            validate(data["acquisition_source"], acquisition_source_subschema) is None
+        )
 
-    obj = MockObj(data, extra_data)
-    eng = MockEng()
+        obj = MockObj(data, extra_data)
+        eng = MockEng()
 
-    assert refextract(obj, eng) is None
-    assert len(obj.data['references']) == 1
-    assert validate(obj.data['references'], refs_subschema) is None
+        assert refextract(obj, eng) is None
+        assert len(obj.data["references"]) == 1
+        assert validate(obj.data["references"], refs_subschema) is None
 
 
 def test_url_is_correctly_escaped():
     with requests_mock.Mocker() as requests_mocker:
         requests_mocker.register_uri(
-            'GET', 'http://inspirehep.net/api/files/f6b4bd83-52c7-43b7-b99d-24bffcb407ba/0375-9474%2876%2990288-8.xml',
+            "GET",
+            "http://inspirehep.net/api/files/f6b4bd83-52c7-43b7-b99d-24bffcb407ba/0375-9474%2876%2990288-8.xml",
             content=pkg_resources.resource_string(
-                __name__, os.path.join('fixtures', '0375-9474%2876%2990288-8.xml')),
+                __name__, os.path.join("fixtures", "0375-9474%2876%2990288-8.xml")
+            ),
         )
-        schema = load_schema('hep')
-        subschema = schema['properties']['documents']
+        schema = load_schema("hep")
+        subschema = schema["properties"]["documents"]
 
         data = {
-            'documents': [
+            "documents": [
                 {
-                    'key': '0375-9474%2876%2990288-8.xml',
-                    'url': 'http://inspirehep.net/api/files/f6b4bd83-52c7-43b7-b99d-24bffcb407ba/0375-9474%2876%2990288-8.xml'
+                    "key": "0375-9474%2876%2990288-8.xml",
+                    "url": "http://inspirehep.net/api/files/f6b4bd83-52c7-43b7-b99d-24bffcb407ba/0375-9474%2876%2990288-8.xml",
                 },
             ],
         }
         extra_data = {}
         files = MockFiles({})
-        assert validate(data['documents'], subschema) is None
+        assert validate(data["documents"], subschema) is None
 
         obj = MockObj(data, extra_data, files=files)
         eng = MockEng()
 
         assert download_documents(obj, eng) is None
 
-        documents = obj.data['documents']
-        expected_document_url = '/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/0375-9474%252876%252990288-8.xml'
+        documents = obj.data["documents"]
+        expected_document_url = "/api/files/0b9dd5d1-feae-4ba5-809d-3a029b0bc110/0375-9474%252876%252990288-8.xml"
 
         assert 1 == len(documents)
-        assert expected_document_url == documents[0]['url']
+        assert expected_document_url == documents[0]["url"]
 
 
 def test_populate_submission_document_without_documents():
     with requests_mock.Mocker() as requests_mocker:
         requests_mocker.register_uri(
-            'GET', 'http://export.arxiv.org/pdf/1605.03844',
+            "GET",
+            "http://export.arxiv.org/pdf/1605.03844",
             content=pkg_resources.resource_string(
-                __name__, os.path.join('fixtures', '1605.03844.pdf')),
+                __name__, os.path.join("fixtures", "1605.03844.pdf")
+            ),
         )
 
-        schema = load_schema('hep')
-        subschema = schema['properties']['acquisition_source']
+        schema = load_schema("hep")
+        subschema = schema["properties"]["acquisition_source"]
         data = {
-            'acquisition_source': {
-                'datetime': '2017-11-30T16:38:43.352370',
-                'email': 'david.caro@cern.ch',
-                'internal_uid': 54252,
-                'method': 'submitter',
-                'orcid': '0000-0002-2174-4493',
-                'source': 'submitter',
-                'submission_number': '1'
+            "acquisition_source": {
+                "datetime": "2017-11-30T16:38:43.352370",
+                "email": "david.caro@cern.ch",
+                "internal_uid": 54252,
+                "method": "submitter",
+                "orcid": "0000-0002-2174-4493",
+                "source": "submitter",
+                "submission_number": "1",
             },
-            'documents': []
+            "documents": [],
         }
-        assert validate(data['acquisition_source'], subschema) is None
+        assert validate(data["acquisition_source"], subschema) is None
         extra_data = {}
         files = MockFiles({})
         obj = MockObj(data, extra_data, files=files)
         eng = MockEng()
 
         assert populate_submission_document(obj, eng) is None
-        assert 'documents' not in obj.data
+        assert "documents" not in obj.data