From bdccd6ceb568e1e1d2192753db6562a6f04ac2e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= Date: Mon, 15 Jan 2024 10:26:42 +0100 Subject: [PATCH 1/9] TDD: add test case with missing required header and schema_sync=True detector option in validation of a table resource: test fails --- tests/validator/resource/test_schema.py | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/validator/resource/test_schema.py b/tests/validator/resource/test_schema.py index af8503c91b..ca2dc90398 100644 --- a/tests/validator/resource/test_schema.py +++ b/tests/validator/resource/test_schema.py @@ -1,6 +1,7 @@ import pytest from frictionless import Checklist, Detector, FrictionlessException, Schema, fields +import frictionless from frictionless.resources import TableResource # General @@ -304,3 +305,29 @@ def test_resource_validate_less_actual_fields_with_required_constraint_issue_950 [3, 3, "constraint-error"], [3, 3, "missing-cell"], ] + + +def test_resource_with_missing_required_header(): + schema = { + "$schema": "https://frictionlessdata.io/schemas/table-schema.json", + "fields": [ + { + "name": "A", + "title": "Field A", + "type": "string", + "constraints": {"required": True}, + }, + {"name": "B", "title": "Field B", "type": "string"}, + {"name": "C", "title": "Field C", "type": "string"}, + ], + } + source = [["B", "C"], ["b", "c"]] + schema = Schema.from_descriptor(schema) + resource = TableResource( + source, schema=schema, detector=Detector(schema_sync=True) + ) + report = frictionless.validate(resource) + print(report.flatten(["rowNumber", "fieldNumber", "type"])) + assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ + [None, 3, "missing-label"], + ] \ No newline at end of file From 49ddefd41d0ddd0c1013f64a3058701e4efb3843 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= Date: Mon, 15 Jan 2024 10:29:50 +0100 Subject: [PATCH 2/9] TDD: fix test case by removing fields info related to missing labels in creation of row_stream: test passes --- frictionless/resources/table.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/frictionless/resources/table.py b/frictionless/resources/table.py index baf0d4b96c..991ce8f9be 100644 --- a/frictionless/resources/table.py +++ b/frictionless/resources/table.py @@ -387,7 +387,16 @@ def row_stream(): # Yield row yield row - # Crreate row stream + # Create row stream + # NB: missing required labels are not included in the + # field_info parameter used for row creation + if self.detector.schema_sync: + for field in self.schema.fields: + if field.name not in self.labels: + field_index = field_info["names"].index(field.name) + del field_info["names"][field_index] + del field_info["objects"][field_index] + del field_info["mapping"][field.name] self.__row_stream = row_stream() # Read From 2d1a6b009eff1ef73681f53d7799e2a9dd33aee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= Date: Mon, 15 Jan 2024 10:38:47 +0100 Subject: [PATCH 3/9] Add test case for validation TableResource with schema_sync=True detector option with two missing required header: test passes --- tests/validator/resource/test_schema.py | 28 ++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tests/validator/resource/test_schema.py b/tests/validator/resource/test_schema.py index ca2dc90398..e50fc005ab 100644 --- a/tests/validator/resource/test_schema.py +++ b/tests/validator/resource/test_schema.py @@ -330,4 +330,30 @@ def test_resource_with_missing_required_header(): print(report.flatten(["rowNumber", "fieldNumber", "type"])) assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ [None, 3, "missing-label"], - ] \ No newline at end of file + ] + + schema = { + "$schema": "https://frictionlessdata.io/schemas/table-schema.json", + "fields": [ + { + "name": "A", + "title": "Field A", + "type": "string", + "constraints": {"required": True}, + }, + {"name": "B", "title": "Field B", "type": "string"}, + {"name": "C", "title": "Field C", "type": "string", + "constraints": {"required": True}}, + ], + } + source = [["B"], ["b"]] + schema = Schema.from_descriptor(schema) + resource = TableResource( + source, schema=schema, detector=Detector(schema_sync=True) + ) + report = frictionless.validate(resource) + print(report.flatten(["rowNumber", "fieldNumber", "type"])) + assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ + [None, 2, "missing-label"], + [None, 3, "missing-label"], + ] From 243b3bd65741e0de20b03196950c74d5d5198fb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= Date: Mon, 15 Jan 2024 11:00:21 +0100 Subject: [PATCH 4/9] Refacto: factorize test cases and rename test function --- tests/validator/resource/test_schema.py | 67 ++++++++++++------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/tests/validator/resource/test_schema.py b/tests/validator/resource/test_schema.py index e50fc005ab..3b58c67dad 100644 --- a/tests/validator/resource/test_schema.py +++ b/tests/validator/resource/test_schema.py @@ -1,3 +1,4 @@ +from copy import deepcopy import pytest from frictionless import Checklist, Detector, FrictionlessException, Schema, fields @@ -307,8 +308,8 @@ def test_resource_validate_less_actual_fields_with_required_constraint_issue_950 ] -def test_resource_with_missing_required_header(): - schema = { +def test_resource_with_missing_required_header_with_schema_sync_is_true_issue_1611(): + schema_descriptor_1 = { "$schema": "https://frictionlessdata.io/schemas/table-schema.json", "fields": [ { @@ -321,39 +322,33 @@ def test_resource_with_missing_required_header(): {"name": "C", "title": "Field C", "type": "string"}, ], } - source = [["B", "C"], ["b", "c"]] - schema = Schema.from_descriptor(schema) - resource = TableResource( - source, schema=schema, detector=Detector(schema_sync=True) - ) - report = frictionless.validate(resource) - print(report.flatten(["rowNumber", "fieldNumber", "type"])) - assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ - [None, 3, "missing-label"], - ] - schema = { - "$schema": "https://frictionlessdata.io/schemas/table-schema.json", - "fields": [ - { - "name": "A", - "title": "Field A", - "type": "string", - "constraints": {"required": True}, - }, - {"name": "B", "title": "Field B", "type": "string"}, - {"name": "C", "title": "Field C", "type": "string", - "constraints": {"required": True}}, - ], - } - source = [["B"], ["b"]] - schema = Schema.from_descriptor(schema) - resource = TableResource( - source, schema=schema, detector=Detector(schema_sync=True) - ) - report = frictionless.validate(resource) - print(report.flatten(["rowNumber", "fieldNumber", "type"])) - assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ - [None, 2, "missing-label"], - [None, 3, "missing-label"], + schema_descriptor_2 = deepcopy(schema_descriptor_1) + # Add required constraint on "C" field + schema_descriptor_2["fields"][2]["constraints"] = {"required": True} + + test_cases = [ + { + "schema": schema_descriptor_1, + "source": [["B", "C"], ["b", "c"]], + "expected_flattened_report": [ + [None, 3, "missing-label"], + ] + }, + { + "schema": schema_descriptor_2, + "source": [["B"], ["b"]], + "expected_flattened_report": [ + [None, 2, "missing-label"], + [None, 3, "missing-label"], + ], + } ] + for tc in test_cases: + schema = Schema.from_descriptor(tc["schema"]) + resource = TableResource( + tc["source"], schema=schema, detector=Detector(schema_sync=True) + ) + report = frictionless.validate(resource) + print(report.flatten(["rowNumber", "fieldNumber", "type"])) + assert report.flatten(["rowNumber", "fieldNumber", "type"]) == tc["expected_flattened_report"] From bf2bb00f7ea41b6d1d7c4d75fa193f64e91ea2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= Date: Mon, 15 Jan 2024 11:28:50 +0100 Subject: [PATCH 5/9] Refacto: update field_info directly in row_stream local function just before processing Row creation --- frictionless/resources/table.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/frictionless/resources/table.py b/frictionless/resources/table.py index 991ce8f9be..3d94ea10dd 100644 --- a/frictionless/resources/table.py +++ b/frictionless/resources/table.py @@ -310,7 +310,16 @@ def row_stream(): for row_number, cells in enumerated_content_stream: self.stats.rows += 1 - # Create row + # NB: missing required labels are not included in the + # field_info parameter used for row creation + if self.detector.schema_sync: + for field in self.schema.fields: + if (field.name not in self.labels and + field.name in field_info["names"]): + field_index = field_info["names"].index(field.name) + del field_info["names"][field_index] + del field_info["objects"][field_index] + del field_info["mapping"][field.name] row = Row( cells, field_info=field_info, @@ -388,15 +397,6 @@ def row_stream(): yield row # Create row stream - # NB: missing required labels are not included in the - # field_info parameter used for row creation - if self.detector.schema_sync: - for field in self.schema.fields: - if field.name not in self.labels: - field_index = field_info["names"].index(field.name) - del field_info["names"][field_index] - del field_info["objects"][field_index] - del field_info["mapping"][field.name] self.__row_stream = row_stream() # Read From 23e4231df7c59225164ccca6f38efa2aa5559995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= Date: Mon, 15 Jan 2024 14:48:54 +0100 Subject: [PATCH 6/9] Sort imports --- tests/validator/resource/test_schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/validator/resource/test_schema.py b/tests/validator/resource/test_schema.py index 3b58c67dad..b63b47c8a8 100644 --- a/tests/validator/resource/test_schema.py +++ b/tests/validator/resource/test_schema.py @@ -1,8 +1,9 @@ from copy import deepcopy + import pytest -from frictionless import Checklist, Detector, FrictionlessException, Schema, fields import frictionless +from frictionless import Checklist, Detector, FrictionlessException, Schema, fields from frictionless.resources import TableResource # General From 4dc62749636bdcab5d352d63a698e5d4a16a32a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= Date: Thu, 25 Jan 2024 09:58:31 +0100 Subject: [PATCH 7/9] Linting: lint and format files --- frictionless/resources/table.py | 6 ++++-- tests/validator/resource/test_schema.py | 13 ++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/frictionless/resources/table.py b/frictionless/resources/table.py index 3d94ea10dd..fc9dfe3af3 100644 --- a/frictionless/resources/table.py +++ b/frictionless/resources/table.py @@ -314,8 +314,10 @@ def row_stream(): # field_info parameter used for row creation if self.detector.schema_sync: for field in self.schema.fields: - if (field.name not in self.labels and - field.name in field_info["names"]): + if ( + field.name not in self.labels + and field.name in field_info["names"] + ): field_index = field_info["names"].index(field.name) del field_info["names"][field_index] del field_info["objects"][field_index] diff --git a/tests/validator/resource/test_schema.py b/tests/validator/resource/test_schema.py index b63b47c8a8..e743115ef5 100644 --- a/tests/validator/resource/test_schema.py +++ b/tests/validator/resource/test_schema.py @@ -323,18 +323,18 @@ def test_resource_with_missing_required_header_with_schema_sync_is_true_issue_16 {"name": "C", "title": "Field C", "type": "string"}, ], } - + schema_descriptor_2 = deepcopy(schema_descriptor_1) # Add required constraint on "C" field schema_descriptor_2["fields"][2]["constraints"] = {"required": True} - + test_cases = [ { "schema": schema_descriptor_1, "source": [["B", "C"], ["b", "c"]], "expected_flattened_report": [ [None, 3, "missing-label"], - ] + ], }, { "schema": schema_descriptor_2, @@ -343,7 +343,7 @@ def test_resource_with_missing_required_header_with_schema_sync_is_true_issue_16 [None, 2, "missing-label"], [None, 3, "missing-label"], ], - } + }, ] for tc in test_cases: schema = Schema.from_descriptor(tc["schema"]) @@ -352,4 +352,7 @@ def test_resource_with_missing_required_header_with_schema_sync_is_true_issue_16 ) report = frictionless.validate(resource) print(report.flatten(["rowNumber", "fieldNumber", "type"])) - assert report.flatten(["rowNumber", "fieldNumber", "type"]) == tc["expected_flattened_report"] + assert ( + report.flatten(["rowNumber", "fieldNumber", "type"]) + == tc["expected_flattened_report"] + ) From 42872cca1f87344f9e19ba0bf217c8ff808e624d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= Date: Mon, 29 Jan 2024 10:58:37 +0100 Subject: [PATCH 8/9] Refacto: Move added block dealing with missing required labels in __open_run_stream() TableResource method --- frictionless/resources/table.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/frictionless/resources/table.py b/frictionless/resources/table.py index fc9dfe3af3..9f886501a3 100644 --- a/frictionless/resources/table.py +++ b/frictionless/resources/table.py @@ -310,18 +310,6 @@ def row_stream(): for row_number, cells in enumerated_content_stream: self.stats.rows += 1 - # NB: missing required labels are not included in the - # field_info parameter used for row creation - if self.detector.schema_sync: - for field in self.schema.fields: - if ( - field.name not in self.labels - and field.name in field_info["names"] - ): - field_index = field_info["names"].index(field.name) - del field_info["names"][field_index] - del field_info["objects"][field_index] - del field_info["mapping"][field.name] row = Row( cells, field_info=field_info, @@ -398,7 +386,16 @@ def row_stream(): # Yield row yield row - # Create row stream + # NB: missing required labels are not included in the + # field_info parameter used for row creation + if self.detector.schema_sync: + for field in self.schema.fields: + if field.name not in self.labels and field.name in field_info["names"]: + field_index = field_info["names"].index(field.name) + del field_info["names"][field_index] + del field_info["objects"][field_index] + del field_info["mapping"][field.name] + # # Create row stream self.__row_stream = row_stream() # Read From e2a910e3bef65434e81aeefb13237ad7fef458a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= Date: Mon, 29 Jan 2024 11:37:03 +0100 Subject: [PATCH 9/9] Add test cases --- tests/validator/resource/test_schema.py | 27 ++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/tests/validator/resource/test_schema.py b/tests/validator/resource/test_schema.py index e743115ef5..616f0a2728 100644 --- a/tests/validator/resource/test_schema.py +++ b/tests/validator/resource/test_schema.py @@ -333,15 +333,32 @@ def test_resource_with_missing_required_header_with_schema_sync_is_true_issue_16 "schema": schema_descriptor_1, "source": [["B", "C"], ["b", "c"]], "expected_flattened_report": [ - [None, 3, "missing-label"], + [None, 3, "A", "missing-label"], ], }, { "schema": schema_descriptor_2, "source": [["B"], ["b"]], "expected_flattened_report": [ - [None, 2, "missing-label"], - [None, 3, "missing-label"], + [None, 2, "A", "missing-label"], + [None, 3, "C", "missing-label"], + ], + }, + { + "schema": schema_descriptor_2, + "source": [ + ["A", "B"], + ["a", "b"], + ["a1"], + ["a2", "b2"], + [], + ["a3", "b3", "c3"], + ], + "expected_flattened_report": [ + [None, 3, "C", "missing-label"], + [3, 2, "B", "missing-cell"], + [5, None, None, "blank-row"], + [6, 3, "", "extra-cell"], ], }, ] @@ -351,8 +368,8 @@ def test_resource_with_missing_required_header_with_schema_sync_is_true_issue_16 tc["source"], schema=schema, detector=Detector(schema_sync=True) ) report = frictionless.validate(resource) - print(report.flatten(["rowNumber", "fieldNumber", "type"])) + print(report.flatten(["rowNumber", "fieldNumber", "fieldName", "type"])) assert ( - report.flatten(["rowNumber", "fieldNumber", "type"]) + report.flatten(["rowNumber", "fieldNumber", "fieldName", "type"]) == tc["expected_flattened_report"] )