From 0d0dbea12d1ba5c7a0984b45898add15e33ad4fe Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 7 May 2024 14:14:18 +0000
Subject: [PATCH 01/16] feat(logic): strip white space;

- Strip white space from cell values when loading into DataStore.
---
 ckanext/xloader/loader.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 85be3f34..997ba631 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -169,11 +169,17 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         try:
             with UnknownEncodingStream(csv_filepath, file_format, decoding_result,
                                        skip_rows=skip_rows) as stream:
-                stream.save(**save_args)
+                for row in stream:
+                    for _index, _cell in enumerate(row):
+                        row[_index] = str(_cell).strip()  # strip white space around cell values
+                    stream.save(**save_args)  # have to save inside of the tabulator stream iterator
         except (EncodingError, UnicodeDecodeError):
             with Stream(csv_filepath, format=file_format, encoding=SINGLE_BYTE_ENCODING,
                         skip_rows=skip_rows) as stream:
-                stream.save(**save_args)
+                for row in stream:
+                    for _index, _cell in enumerate(row):
+                        row[_index] = str(_cell).strip()  # strip white space around cell values
+                    stream.save(**save_args)  # have to save inside of the tabulator stream iterator
         csv_filepath = f_write.name
 
         # datastore db connection
@@ -441,6 +447,7 @@ def row_iterator():
             logger.info('Saving chunk %s', i)
             for row in records:
                 for column_index, column_name in enumerate(row):
+                    row[column_name] = str(row[column_name]).strip()  # strip white space around cell values
                     if headers_dicts[column_index]['type'] in non_empty_types and row[column_name] == '':
                         row[column_name] = None
             send_resource_to_datastore(resource_id, headers_dicts, records)

From 616c948ce261955ddeb3c0ad56ba87efa8d13070 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 7 May 2024 14:35:57 +0000
Subject: [PATCH 02/16] fix(logic): strip white space for load table;

- Fixed white space stripping for `load_table`.
---
 ckanext/xloader/loader.py | 1 -
 ckanext/xloader/parser.py | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 997ba631..3f8df083 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -447,7 +447,6 @@ def row_iterator():
             logger.info('Saving chunk %s', i)
             for row in records:
                 for column_index, column_name in enumerate(row):
-                    row[column_name] = str(row[column_name]).strip()  # strip white space around cell values
                     if headers_dicts[column_index]['type'] in non_empty_types and row[column_name] == '':
                         row[column_name] = None
             send_resource_to_datastore(resource_id, headers_dicts, records)
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index 11e756cd..ee9d05eb 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -31,6 +31,8 @@ def convert_types(self, extended_rows):
             for cell_index, cell_value in enumerate(row):
                 if cell_value is None:
                     row[cell_index] = ''
+                cell_value = cell_value.strip()  # strip white space around cell values
+                row[cell_index] = cell_value.strip()  # strip white space around cell values
                 if not cell_value:
                     continue
                 cell_type = self.types[cell_index] if self.types else None

From 860ca9eb8a82e86a857d70ea62382a707418cfae Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 7 May 2024 14:46:53 +0000
Subject: [PATCH 03/16] fix(logic): strip white space for load table;

- Fixed white space stripping for `load_table`.
---
 ckanext/xloader/parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index ee9d05eb..b3ce1772 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -31,8 +31,8 @@ def convert_types(self, extended_rows):
             for cell_index, cell_value in enumerate(row):
                 if cell_value is None:
                     row[cell_index] = ''
-                cell_value = cell_value.strip()  # strip white space around cell values
-                row[cell_index] = cell_value.strip()  # strip white space around cell values
+                cell_value = str(cell_value).strip()  # strip white space around cell values
+                row[cell_index] = str(cell_value).strip()  # strip white space around cell values
                 if not cell_value:
                     continue
                 cell_type = self.types[cell_index] if self.types else None

From 88f96a866761aaeb90a03cdfa75655fc4bfc4376 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Wed, 8 May 2024 13:53:38 +0000
Subject: [PATCH 04/16] fix(logic): strip white space;

- Condition stripping on `str` type.
---
 ckanext/xloader/loader.py | 10 ++++++++--
 ckanext/xloader/parser.py |  7 +++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 3f8df083..a38eb107 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -171,14 +171,20 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
                                        skip_rows=skip_rows) as stream:
                 for row in stream:
                     for _index, _cell in enumerate(row):
-                        row[_index] = str(_cell).strip()  # strip white space around cell values
+                        if isinstance(_cell, str):
+                            # strip white space around cell values
+                            #TODO: condition behind DataDictionary option??
+                            row[_index] = _cell.strip()
                     stream.save(**save_args)  # have to save inside of the tabulator stream iterator
         except (EncodingError, UnicodeDecodeError):
             with Stream(csv_filepath, format=file_format, encoding=SINGLE_BYTE_ENCODING,
                         skip_rows=skip_rows) as stream:
                 for row in stream:
                     for _index, _cell in enumerate(row):
-                        row[_index] = str(_cell).strip()  # strip white space around cell values
+                        if isinstance(_cell, str):
+                            # strip white space around cell values
+                            #TODO: condition behind DataDictionary option??
+                            row[_index] = _cell.strip()
                     stream.save(**save_args)  # have to save inside of the tabulator stream iterator
         csv_filepath = f_write.name
 
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index b3ce1772..890e8776 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -31,8 +31,11 @@ def convert_types(self, extended_rows):
             for cell_index, cell_value in enumerate(row):
                 if cell_value is None:
                     row[cell_index] = ''
-                cell_value = str(cell_value).strip()  # strip white space around cell values
-                row[cell_index] = str(cell_value).strip()  # strip white space around cell values
+                if isinstance(cell_value, str):
+                    # strip white space around cell values
+                    #TODO: condition behind DataDictionary option??
+                    cell_value = cell_value.strip()
+                    row[cell_index] = cell_value.strip()
                 if not cell_value:
                     continue
                 cell_type = self.types[cell_index] if self.types else None

From 21a1eced97557c7ea68e994a64662617357160aa Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Wed, 8 May 2024 19:00:22 +0000
Subject: [PATCH 05/16] fix(tests): new code for tests;

- Align test with new strip code.
- Write headers to stream for `load_csv`.
---
 ckanext/xloader/loader.py            |  2 ++
 ckanext/xloader/tests/test_loader.py | 10 +++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index daddc534..f5291b14 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -174,6 +174,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         try:
             with UnknownEncodingStream(csv_filepath, file_format, decoding_result,
                                        skip_rows=skip_rows) as stream:
+                stream.save(**save_args)  # have to save headers
                 for row in stream:
                     for _index, _cell in enumerate(row):
                         if isinstance(_cell, str):
@@ -184,6 +185,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         except (EncodingError, UnicodeDecodeError):
             with Stream(csv_filepath, format=file_format, encoding=SINGLE_BYTE_ENCODING,
                         skip_rows=skip_rows) as stream:
+                stream.save(**save_args)  # have to save headers
                 for row in stream:
                     for _index, _cell in enumerate(row):
                         if isinstance(_cell, str):
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index a8b23b53..3eacd5a0 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -1049,7 +1049,7 @@ def test_boston_311(self, Session):
                 u"",
                 u"ONTIME",
                 u"Open",
-                u" ",
+                u"",  # " " transforms to ""
                 u"Street Light Outages",
                 u"Public Works Department",
                 u"Street Lights",
@@ -1081,14 +1081,14 @@ def test_boston_311(self, Session):
                 u"",
                 u"ONTIME",
                 u"Open",
-                u" ",
+                u"",  # " " transforms to ""
                 u"Graffiti Removal",
                 u"Property Management",
                 u"Graffiti",
                 u"Graffiti Removal",
                 u"PROP_GRAF_GraffitiRemoval",
                 u"PROP",
-                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",
+                u"https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",   # strip white spaces
                 u"",
                 u"522 Saratoga St  East Boston  MA  02128",
                 Decimal("1"),
@@ -1113,14 +1113,14 @@ def test_boston_311(self, Session):
                 u"",
                 u"ONTIME",
                 u"Open",
-                u" ",
+                u"",  # " " transforms to ""
                 u"Graffiti Removal",
                 u"Property Management",
                 u"Graffiti",
                 u"Graffiti Removal",
                 u"PROP_GRAF_GraffitiRemoval",
                 u"PROP",
-                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg",
+                u"https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg",  # strip white spaces
                 u"",
                 u"965 Bennington St  East Boston  MA  02128",
                 Decimal("1"),

From a6ab0a045083cfa430d6542f2397f2b1d78bd731 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Sun, 12 May 2024 12:46:06 +0000
Subject: [PATCH 06/16] fix(logic): load csv white space;

- Extended tabulator stream iterator to strip white space from cell values.
---
 ckanext/xloader/loader.py | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index f5291b14..a4c99cc0 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -174,25 +174,27 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         try:
             with UnknownEncodingStream(csv_filepath, file_format, decoding_result,
                                        skip_rows=skip_rows) as stream:
-                stream.save(**save_args)  # have to save headers
-                for row in stream:
-                    for _index, _cell in enumerate(row):
-                        if isinstance(_cell, str):
-                            # strip white space around cell values
-                            #TODO: condition behind DataDictionary option??
-                            row[_index] = _cell.strip()
-                    stream.save(**save_args)  # have to save inside of the tabulator stream iterator
+                super_iter = stream.iter
+                def strip_white_space_iter():
+                    for row in super_iter():
+                        for _index, _cell in enumerate(row):
+                            if isinstance(_cell, str):
+                                row[_index] = _cell.strip()
+                        yield row
+                stream.iter = strip_white_space_iter
+                stream.save(**save_args)
         except (EncodingError, UnicodeDecodeError):
             with Stream(csv_filepath, format=file_format, encoding=SINGLE_BYTE_ENCODING,
                         skip_rows=skip_rows) as stream:
-                stream.save(**save_args)  # have to save headers
-                for row in stream:
-                    for _index, _cell in enumerate(row):
-                        if isinstance(_cell, str):
-                            # strip white space around cell values
-                            #TODO: condition behind DataDictionary option??
-                            row[_index] = _cell.strip()
-                    stream.save(**save_args)  # have to save inside of the tabulator stream iterator
+                super_iter = stream.iter
+                def strip_white_space_iter():
+                    for row in super_iter():
+                        for _index, _cell in enumerate(row):
+                            if isinstance(_cell, str):
+                                row[_index] = _cell.strip()
+                        yield row
+                stream.iter = strip_white_space_iter
+                stream.save(**save_args)
         csv_filepath = f_write.name
 
         # datastore db connection

From 54f87e0c7c1a0eca2031cb20922936329483c030 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 14 May 2024 15:49:42 +0000
Subject: [PATCH 07/16] feat(logic): added `strip_extra_white` info field;

- Added `strip_extra_white` info field and form fields.
- Added validator for `strip_extra_white`.
- Used `strip_extra_white` to control stripping white space.
---
 ckanext/xloader/loader.py                     | 64 ++++++++++---------
 ckanext/xloader/parser.py                     | 13 ++--
 ckanext/xloader/plugin.py                     | 17 ++++-
 .../datastore/snippets/dictionary_form.html   | 11 ++++
 ckanext/xloader/tests/test_loader.py          | 12 ++--
 ckanext/xloader/validators.py                 | 12 ++++
 6 files changed, 87 insertions(+), 42 deletions(-)
 create mode 100644 ckanext/xloader/templates/datastore/snippets/dictionary_form.html
 create mode 100644 ckanext/xloader/validators.py

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index a4c99cc0..abe03f41 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -170,33 +170,6 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     logger.info('Ensuring character coding is UTF8')
     f_write = tempfile.NamedTemporaryFile(suffix=file_format, delete=False)
     try:
-        save_args = {'target': f_write.name, 'format': 'csv', 'encoding': 'utf-8', 'delimiter': delimiter}
-        try:
-            with UnknownEncodingStream(csv_filepath, file_format, decoding_result,
-                                       skip_rows=skip_rows) as stream:
-                super_iter = stream.iter
-                def strip_white_space_iter():
-                    for row in super_iter():
-                        for _index, _cell in enumerate(row):
-                            if isinstance(_cell, str):
-                                row[_index] = _cell.strip()
-                        yield row
-                stream.iter = strip_white_space_iter
-                stream.save(**save_args)
-        except (EncodingError, UnicodeDecodeError):
-            with Stream(csv_filepath, format=file_format, encoding=SINGLE_BYTE_ENCODING,
-                        skip_rows=skip_rows) as stream:
-                super_iter = stream.iter
-                def strip_white_space_iter():
-                    for row in super_iter():
-                        for _index, _cell in enumerate(row):
-                            if isinstance(_cell, str):
-                                row[_index] = _cell.strip()
-                        yield row
-                stream.iter = strip_white_space_iter
-                stream.save(**save_args)
-        csv_filepath = f_write.name
-
         # datastore db connection
         engine = get_write_engine()
 
@@ -238,11 +211,40 @@ def strip_white_space_iter():
         else:
             fields = [
                 {'id': header_name,
-                 'type': 'text'}
+                 'type': 'text',}
                 for header_name in headers]
 
         logger.info('Fields: %s', fields)
 
+        save_args = {'target': f_write.name, 'format': 'csv', 'encoding': 'utf-8', 'delimiter': delimiter}
+        try:
+            with UnknownEncodingStream(csv_filepath, file_format, decoding_result,
+                                       skip_rows=skip_rows) as stream:
+                super_iter = stream.iter
+                def strip_white_space_iter():
+                    for row in super_iter():
+                        for _index, _cell in enumerate(row):
+                            # only strip white space if strip_extra_white is True
+                            if fields[_index].get('info', {}).get('strip_extra_white', True) and isinstance(_cell, str):
+                                row[_index] = _cell.strip()
+                        yield row
+                stream.iter = strip_white_space_iter
+                stream.save(**save_args)
+        except (EncodingError, UnicodeDecodeError):
+            with Stream(csv_filepath, format=file_format, encoding=SINGLE_BYTE_ENCODING,
+                        skip_rows=skip_rows) as stream:
+                super_iter = stream.iter
+                def strip_white_space_iter():
+                    for row in super_iter():
+                        for _index, _cell in enumerate(row):
+                            # only strip white space if strip_extra_white is True
+                            if fields[_index].get('info', {}).get('strip_extra_white', True) and isinstance(_cell, str):
+                                row[_index] = _cell.strip()
+                        yield row
+                stream.iter = strip_white_space_iter
+                stream.save(**save_args)
+        csv_filepath = f_write.name
+
         # Create table
         from ckan import model
         context = {'model': model, 'ignore_auth': True}
@@ -401,6 +403,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
 
     TYPES, TYPE_MAPPING = get_types()
     types = type_guess(stream.sample[1:], types=TYPES, strict=True)
+    info = []
 
     # override with types user requested
     if existing_info:
@@ -411,9 +414,12 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
                 'timestamp': datetime.datetime,
             }.get(existing_info.get(h, {}).get('type_override'), t)
             for t, h in zip(types, headers)]
+        for h in headers:
+            info.append(existing_info.get(h, {}))
+
 
     headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
-    type_converter = TypeConverter(types=types)
+    type_converter = TypeConverter(types=types, info=info)
 
     with UnknownEncodingStream(table_filepath, file_format, decoding_result,
                                skip_rows=skip_rows,
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index 890e8776..d27cd0ce 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -18,8 +18,9 @@ class TypeConverter:
     as desired.
     """
 
-    def __init__(self, types=None):
+    def __init__(self, types=None, info=None):
         self.types = types
+        self.info = info
 
     def convert_types(self, extended_rows):
         """ Try converting cells to numbers or timestamps if applicable.
@@ -31,11 +32,11 @@ def convert_types(self, extended_rows):
             for cell_index, cell_value in enumerate(row):
                 if cell_value is None:
                     row[cell_index] = ''
-                if isinstance(cell_value, str):
-                    # strip white space around cell values
-                    #TODO: condition behind DataDictionary option??
-                    cell_value = cell_value.strip()
-                    row[cell_index] = cell_value.strip()
+                if self.info:
+                    # only strip white space if strip_extra_white is True
+                    if self.info[cell_index].get('strip_extra_white', True) and isinstance(cell_value, str):
+                        cell_value = cell_value.strip()
+                        row[cell_index] = cell_value.strip()
                 if not cell_value:
                     continue
                 cell_type = self.types[cell_index] if self.types else None
diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index e0ce027e..6b22d8d8 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -4,12 +4,13 @@
 
 from ckan import plugins
 from ckan.plugins import toolkit
+from ckanext.datastore.interfaces import IDataDictionaryForm
 
 from ckan.model.domain_object import DomainObjectOperation
 from ckan.model.resource import Resource
 from ckan.model.package import Package
 
-from . import action, auth, helpers as xloader_helpers, utils
+from . import action, auth, helpers as xloader_helpers, utils, validators
 from ckanext.xloader.utils import XLoaderFormats
 
 try:
@@ -34,6 +35,8 @@ class xloaderPlugin(plugins.SingletonPlugin):
     plugins.implements(plugins.IResourceController, inherit=True)
     plugins.implements(plugins.IClick)
     plugins.implements(plugins.IBlueprint)
+    plugins.implements(plugins.IValidators)
+    plugins.implements(IDataDictionaryForm, inherit=True)
 
     # IClick
     def get_commands(self):
@@ -207,6 +210,18 @@ def get_helpers(self):
             "is_resource_supported_by_xloader": xloader_helpers.is_resource_supported_by_xloader,
         }
 
+    # IValidators
+
+    def get_validators(self):
+        return {'xloader_datastore_fields_validator': validators.datastore_fields_validator}
+
+    # IDataDictionaryForm
+
+    def update_datastore_create_schema(self, schema):
+        info_validator = toolkit.get_validator('xloader_datastore_fields_validator')
+        schema['fields']['info'] = [info_validator] + schema['fields']['info']
+        return schema
+
 
 def _should_remove_unsupported_resource_from_datastore(res_dict):
     if not toolkit.asbool(toolkit.config.get('ckanext.xloader.clean_datastore_tables', False)):
diff --git a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
new file mode 100644
index 00000000..1a91b00f
--- /dev/null
+++ b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
@@ -0,0 +1,11 @@
+{% ckan_extends %}
+{% import 'macros/form.html' as form %}
+
+{% block additional_fields %}
+  {{ super() }}
+  {{ form.select('info__' ~ position ~ '__strip_extra_white',
+    label=_('Strip Extra Leading and Trailing White Space'), options=[
+    {'text': 'Yes', 'value': true},
+    {'text': 'No', 'value': false},
+    ], selected=field.get('info', {}).get('strip_extra_white')) }}
+{% endblock %}
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 3eacd5a0..51334543 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -227,7 +227,7 @@ def test_boston_311(self, Session):
                 None,
                 u"ONTIME",
                 u"Open",
-                u" ",
+                None,  # " " transforms to None
                 u"Street Light Outages",
                 u"Public Works Department",
                 u"Street Lights",
@@ -259,14 +259,14 @@ def test_boston_311(self, Session):
                 None,
                 u"ONTIME",
                 u"Open",
-                u" ",
+                None,  # " " transforms to None
                 u"Graffiti Removal",
                 u"Property Management",
                 u"Graffiti",
                 u"Graffiti Removal",
                 u"PROP_GRAF_GraffitiRemoval",
                 u"PROP",
-                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",
+                u"https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",  # strip white spaces
                 None,
                 u"522 Saratoga St  East Boston  MA  02128",
                 u"1",
@@ -291,14 +291,14 @@ def test_boston_311(self, Session):
                 None,
                 u"ONTIME",
                 u"Open",
-                u" ",
+                None,  # " " transforms to None
                 u"Graffiti Removal",
                 u"Property Management",
                 u"Graffiti",
                 u"Graffiti Removal",
                 u"PROP_GRAF_GraffitiRemoval",
                 u"PROP",
-                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg",
+                u"https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg",  # strip white spaces
                 None,
                 u"965 Bennington St  East Boston  MA  02128",
                 u"1",
@@ -1088,7 +1088,7 @@ def test_boston_311(self, Session):
                 u"Graffiti Removal",
                 u"PROP_GRAF_GraffitiRemoval",
                 u"PROP",
-                u"https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",   # strip white spaces
+                u"https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",  # strip white spaces
                 u"",
                 u"522 Saratoga St  East Boston  MA  02128",
                 Decimal("1"),
diff --git a/ckanext/xloader/validators.py b/ckanext/xloader/validators.py
new file mode 100644
index 00000000..a14f71f3
--- /dev/null
+++ b/ckanext/xloader/validators.py
@@ -0,0 +1,12 @@
+from ckan.plugins.toolkit import asbool
+
+
+def datastore_fields_validator(value, context):
+    if 'strip_extra_white' not in value:
+        # default to True
+        value['strip_extra_white'] = True
+
+    # bool value for strip_extra_white
+    value['strip_extra_white'] = asbool(value['strip_extra_white'])
+
+    return value

From 50080ea14d725385bd25cbb2ff395b249f6599d0 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 14 May 2024 16:52:23 +0000
Subject: [PATCH 08/16] feat(logic): added `strip_extra_white` field;

- Added `strip_extra_white` field and form fields.
- Used `strip_extra_white` to control stripping white space.
---
 ckanext/xloader/loader.py                     | 26 ++++++++++++-------
 ckanext/xloader/parser.py                     |  8 +++---
 ckanext/xloader/plugin.py                     | 19 +++++++-------
 .../datastore/snippets/dictionary_form.html   |  4 +--
 ckanext/xloader/validators.py                 | 12 ---------
 5 files changed, 33 insertions(+), 36 deletions(-)
 delete mode 100644 ckanext/xloader/validators.py

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index abe03f41..817b55c1 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -177,10 +177,13 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         existing = datastore_resource_exists(resource_id)
         existing_info = {}
         if existing:
-            existing_fields = existing.get('fields', [])
+            ds_info = p.toolkit.get_action('datastore_info')({'ignore_auth': True}, {'id': resource_id})
+            existing_fields = ds_info.get('fields', [])
             existing_info = dict((f['id'], f['info'])
                                  for f in existing_fields
                                  if 'info' in f)
+            existing_fields_by_headers = dict((f['id'], f)
+                                              for f in existing_fields)
 
             # Column types are either set (overridden) in the Data Dictionary page
             # or default to text type (which is robust)
@@ -195,6 +198,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
             for f in fields:
                 if f['id'] in existing_info:
                     f['info'] = existing_info[f['id']]
+                    f['strip_extra_white'] = existing_fields_by_headers[f['id']].get('strip_extra_white', True)
 
             '''
             Delete or truncate existing datastore table before proceeding,
@@ -211,7 +215,8 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         else:
             fields = [
                 {'id': header_name,
-                 'type': 'text',}
+                 'type': 'text',
+                 'strip_extra_white': True,}
                 for header_name in headers]
 
         logger.info('Fields: %s', fields)
@@ -225,7 +230,7 @@ def strip_white_space_iter():
                     for row in super_iter():
                         for _index, _cell in enumerate(row):
                             # only strip white space if strip_extra_white is True
-                            if fields[_index].get('info', {}).get('strip_extra_white', True) and isinstance(_cell, str):
+                            if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
                                 row[_index] = _cell.strip()
                         yield row
                 stream.iter = strip_white_space_iter
@@ -238,7 +243,7 @@ def strip_white_space_iter():
                     for row in super_iter():
                         for _index, _cell in enumerate(row):
                             # only strip white space if strip_extra_white is True
-                            if fields[_index].get('info', {}).get('strip_extra_white', True) and isinstance(_cell, str):
+                            if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
                                 row[_index] = _cell.strip()
                         yield row
                 stream.iter = strip_white_space_iter
@@ -388,10 +393,13 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     existing = datastore_resource_exists(resource_id)
     existing_info = None
     if existing:
-        existing_fields = existing.get('fields', [])
+        ds_info = p.toolkit.get_action('datastore_info')({'ignore_auth': True}, {'id': resource_id})
+        existing_fields = ds_info.get('fields', [])
         existing_info = dict(
             (f['id'], f['info'])
             for f in existing_fields if 'info' in f)
+        existing_fields_by_headers = dict((f['id'], f)
+                                          for f in existing_fields)
 
     # Some headers might have been converted from strings to floats and such.
     headers = encode_headers(headers)
@@ -403,7 +411,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
 
     TYPES, TYPE_MAPPING = get_types()
     types = type_guess(stream.sample[1:], types=TYPES, strict=True)
-    info = []
+    fields = []
 
     # override with types user requested
     if existing_info:
@@ -415,11 +423,10 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
             }.get(existing_info.get(h, {}).get('type_override'), t)
             for t, h in zip(types, headers)]
         for h in headers:
-            info.append(existing_info.get(h, {}))
-
+            fields.append(existing_fields_by_headers.get(h, {}))
 
     headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
-    type_converter = TypeConverter(types=types, info=info)
+    type_converter = TypeConverter(types=types, fields=fields)
 
     with UnknownEncodingStream(table_filepath, file_format, decoding_result,
                                skip_rows=skip_rows,
@@ -440,6 +447,7 @@ def row_iterator():
             for h in headers_dicts:
                 if h['id'] in existing_info:
                     h['info'] = existing_info[h['id']]
+                    h['strip_extra_white'] = existing_fields_by_headers[h['id']].get('strip_extra_white', True)
                     # create columns with types user requested
                     type_override = existing_info[h['id']].get('type_override')
                     if type_override in list(_TYPE_MAPPING.values()):
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index d27cd0ce..c587f187 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -18,9 +18,9 @@ class TypeConverter:
     as desired.
     """
 
-    def __init__(self, types=None, info=None):
+    def __init__(self, types=None, fields=None):
         self.types = types
-        self.info = info
+        self.fields = fields
 
     def convert_types(self, extended_rows):
         """ Try converting cells to numbers or timestamps if applicable.
@@ -32,9 +32,9 @@ def convert_types(self, extended_rows):
             for cell_index, cell_value in enumerate(row):
                 if cell_value is None:
                     row[cell_index] = ''
-                if self.info:
+                if self.fields:
                     # only strip white space if strip_extra_white is True
-                    if self.info[cell_index].get('strip_extra_white', True) and isinstance(cell_value, str):
+                    if self.fields[cell_index].get('strip_extra_white', True) and isinstance(cell_value, str):
                         cell_value = cell_value.strip()
                         row[cell_index] = cell_value.strip()
                 if not cell_value:
diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index 6b22d8d8..051185e6 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -10,7 +10,7 @@
 from ckan.model.resource import Resource
 from ckan.model.package import Package
 
-from . import action, auth, helpers as xloader_helpers, utils, validators
+from . import action, auth, helpers as xloader_helpers, utils
 from ckanext.xloader.utils import XLoaderFormats
 
 try:
@@ -35,7 +35,6 @@ class xloaderPlugin(plugins.SingletonPlugin):
     plugins.implements(plugins.IResourceController, inherit=True)
     plugins.implements(plugins.IClick)
     plugins.implements(plugins.IBlueprint)
-    plugins.implements(plugins.IValidators)
     plugins.implements(IDataDictionaryForm, inherit=True)
 
     # IClick
@@ -210,18 +209,20 @@ def get_helpers(self):
             "is_resource_supported_by_xloader": xloader_helpers.is_resource_supported_by_xloader,
         }
 
-    # IValidators
-
-    def get_validators(self):
-        return {'xloader_datastore_fields_validator': validators.datastore_fields_validator}
-
     # IDataDictionaryForm
 
     def update_datastore_create_schema(self, schema):
-        info_validator = toolkit.get_validator('xloader_datastore_fields_validator')
-        schema['fields']['info'] = [info_validator] + schema['fields']['info']
+        default = toolkit.get_validator('default')
+        boolean_validator = toolkit.get_validator('boolean_validator')
+        to_datastore_plugin_data = toolkit.get_validator('to_datastore_plugin_data')
+        schema['fields']['strip_extra_white'] = [default(True), boolean_validator, to_datastore_plugin_data('xloader')]
         return schema
 
+    def update_datastore_info_field(self, field, plugin_data):
+        # expose all our non-secret plugin data in the field
+        field.update(plugin_data.get('xloader', {}))
+        return field
+
 
 def _should_remove_unsupported_resource_from_datastore(res_dict):
     if not toolkit.asbool(toolkit.config.get('ckanext.xloader.clean_datastore_tables', False)):
diff --git a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
index 1a91b00f..afdf80ff 100644
--- a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
+++ b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
@@ -3,9 +3,9 @@
 
 {% block additional_fields %}
   {{ super() }}
-  {{ form.select('info__' ~ position ~ '__strip_extra_white',
+  {{ form.select('fields__' ~ position ~ '__strip_extra_white',
     label=_('Strip Extra Leading and Trailing White Space'), options=[
     {'text': 'Yes', 'value': true},
     {'text': 'No', 'value': false},
-    ], selected=field.get('info', {}).get('strip_extra_white')) }}
+    ], selected=field.get('strip_extra_white')) }}
 {% endblock %}
diff --git a/ckanext/xloader/validators.py b/ckanext/xloader/validators.py
deleted file mode 100644
index a14f71f3..00000000
--- a/ckanext/xloader/validators.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from ckan.plugins.toolkit import asbool
-
-
-def datastore_fields_validator(value, context):
-    if 'strip_extra_white' not in value:
-        # default to True
-        value['strip_extra_white'] = True
-
-    # bool value for strip_extra_white
-    value['strip_extra_white'] = asbool(value['strip_extra_white'])
-
-    return value

From 116c29fa92897f0b190c763e4bbee3e41f6bf6ef Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 14 May 2024 17:20:43 +0000
Subject: [PATCH 09/16] fix(logic): minor logic fixes;

- Minor logic fixes for the new `strip_extra_white` field.
---
 ckanext/xloader/loader.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 817b55c1..30940828 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -230,7 +230,7 @@ def strip_white_space_iter():
                     for row in super_iter():
                         for _index, _cell in enumerate(row):
                             # only strip white space if strip_extra_white is True
-                            if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
+                            if fields and fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
                                 row[_index] = _cell.strip()
                         yield row
                 stream.iter = strip_white_space_iter
@@ -243,7 +243,7 @@ def strip_white_space_iter():
                     for row in super_iter():
                         for _index, _cell in enumerate(row):
                             # only strip white space if strip_extra_white is True
-                            if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
+                            if fields and fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
                                 row[_index] = _cell.strip()
                         yield row
                 stream.iter = strip_white_space_iter
@@ -424,6 +424,10 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
             for t, h in zip(types, headers)]
         for h in headers:
             fields.append(existing_fields_by_headers.get(h, {}))
+    else:
+        # default strip_extra_white
+        for h in headers:
+            fields.append({'strip_extra_white': True})
 
     headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
     type_converter = TypeConverter(types=types, fields=fields)
@@ -452,6 +456,10 @@ def row_iterator():
                     type_override = existing_info[h['id']].get('type_override')
                     if type_override in list(_TYPE_MAPPING.values()):
                         h['type'] = type_override
+        else:
+            # default strip_extra_white
+            for h in headers_dicts:
+                h['strip_extra_white'] = True
 
         logger.info('Determined headers and types: %s', headers_dicts)
 

From 43b9f94cc9284ae0bab2276b7f6deb4cc5c9ca8f Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Mon, 15 Jul 2024 15:34:40 +0000
Subject: [PATCH 10/16] feat(tests,i18n): updated tests;

- Updated various tests.
- Added more gettext.
---
 .../datastore/snippets/dictionary_form.html   |   4 +-
 .../tests/samples/boston_311_sample.csv       |   8 +-
 ckanext/xloader/tests/test_loader.py          | 507 +++++++++---------
 3 files changed, 260 insertions(+), 259 deletions(-)

diff --git a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
index afdf80ff..02919354 100644
--- a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
+++ b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
@@ -5,7 +5,7 @@
   {{ super() }}
   {{ form.select('fields__' ~ position ~ '__strip_extra_white',
     label=_('Strip Extra Leading and Trailing White Space'), options=[
-    {'text': 'Yes', 'value': true},
-    {'text': 'No', 'value': false},
+    {'text': _('Yes'), 'value': true},
+    {'text': _('No'), 'value': false},
     ], selected=field.get('strip_extra_white')) }}
 {% endblock %}
diff --git a/ckanext/xloader/tests/samples/boston_311_sample.csv b/ckanext/xloader/tests/samples/boston_311_sample.csv
index 83e0d5f2..e3a7e5be 100644
--- a/ckanext/xloader/tests/samples/boston_311_sample.csv
+++ b/ckanext/xloader/tests/samples/boston_311_sample.csv
@@ -1,4 +1,4 @@
-CASE_ENQUIRY_ID,open_dt,target_dt,closed_dt,OnTime_Status,CASE_STATUS,CLOSURE_REASON,CASE_TITLE,SUBJECT,REASON,TYPE,QUEUE,Department,SubmittedPhoto,ClosedPhoto,Location,Fire_district,pwd_district,city_council_district,police_district,neighborhood,neighborhood_services_district,ward,precinct,LOCATION_STREET_NAME,LOCATION_ZIPCODE,Latitude,Longitude,Source
-101002153891,2017-07-06 23:38:43,2017-07-21 08:30:00,,ONTIME,Open, ,Street Light Outages,Public Works Department,Street Lights,Street Light Outages,PWDx_Street Light Outages,PWDx,,,480 Harvard St  Dorchester  MA  02124,8,07,4,B3,Greater Mattapan,9,Ward 14,1411,480 Harvard St,02124,42.288,-71.0927,Citizens Connect App
-101002153890,2017-07-06 23:29:13,2017-09-11 08:30:00,,ONTIME,Open, ,Graffiti Removal,Property Management,Graffiti,Graffiti Removal,PROP_GRAF_GraffitiRemoval,PROP, https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg,,522 Saratoga St  East Boston  MA  02128,1,09,1,A7,East Boston,1,Ward 1,0110,522 Saratoga St,02128,42.3807,-71.0259,Citizens Connect App
-101002153889,2017-07-06 23:24:20,2017-09-11 08:30:00,,ONTIME,Open, ,Graffiti Removal,Property Management,Graffiti,Graffiti Removal,PROP_GRAF_GraffitiRemoval,PROP, https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg,,965 Bennington St  East Boston  MA  02128,1,09,1,A7,East Boston,1,Ward 1,0112,965 Bennington St,02128,42.386,-71.008,Citizens Connect App
+CASE_ENQUIRY_ID,open_dt,target_dt,closed_dt,OnTime_Status,CASE_STATUS,CLOSURE_REASON,CASE_TITLE,SUBJECT,REASON,TYPE,QUEUE,Department,SubmittedPhoto,ClosedPhoto,Location,Fire_district,pwd_district,city_council_district,police_district,neighborhood,neighborhood_services_district,ward,precinct,LOCATION_STREET_NAME,LOCATION_ZIPCODE,Latitude,Longitude,Source
+101002153891,2017-07-06 23:38:43,2017-07-21 08:30:00,,ONTIME,Open, ,Street Light Outages,Public Works Department   ,Street Lights,Street Light Outages,PWDx_Street Light Outages,PWDx,,,480 Harvard St  Dorchester  MA  02124,8,07,4,B3,Greater Mattapan,9,Ward 14,1411,480 Harvard St,02124,42.288,-71.0927,Citizens Connect App
+101002153890,2017-07-06 23:29:13,2017-09-11 08:30:00,,ONTIME,Open, ,Graffiti Removal,Property Management,Graffiti,Graffiti Removal,PROP_GRAF_GraffitiRemoval,PROP, https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg,,522 Saratoga St  East Boston  MA  02128,1,09,1,A7,East Boston,1,Ward 1,0110,522 Saratoga St,02128,42.3807,-71.0259,Citizens Connect App
+101002153889,2017-07-06 23:24:20,2017-09-11 08:30:00,,ONTIME,Open, ,Graffiti Removal,Property Management,Graffiti,Graffiti Removal,PROP_GRAF_GraffitiRemoval,PROP, https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg,,965 Bennington St  East Boston  MA  02128,1,09,1,A7,East Boston,1,Ward 1,0112,965 Bennington St,02128,42.386,-71.008,Citizens Connect App
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 51334543..ec0ee8e4 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -102,6 +102,20 @@ def test_simple(self, Session):
             logger=logger,
         )
 
+        records = self._get_records(Session, resource_id)
+        print(self._get_column_names(Session, resource_id))
+        assert self._get_column_names(Session, resource_id) == [
+            u"_id",
+            u"_full_text",
+            u"date",
+            u"temperature",
+            u"place",
+        ]
+        print(self._get_column_types(Session, resource_id))
+        assert self._get_column_types(Session, resource_id) == [
+            u"int4",
+            u"tsvector",
+        ] + [u"text"] * (len(records[0]) - 1)
         assert self._get_records(
             Session, resource_id, limit=1, exclude_full_text_column=False
         ) == [
@@ -113,7 +127,8 @@ def test_simple(self, Session):
                 u"Galway",
             )
         ]
-        assert self._get_records(Session, resource_id) == [
+        print(records)
+        assert records == [
             (1, u"2011-01-01", u"1", u"Galway"),
             (2, u"2011-01-02", u"-1", u"Galway"),
             (3, u"2011-01-03", u"0", u"Galway"),
@@ -121,20 +136,6 @@ def test_simple(self, Session):
             (5, None, None, u"Berkeley"),
             (6, u"2011-01-03", u"5", None),
         ]
-        assert self._get_column_names(Session, resource_id) == [
-            u"_id",
-            u"_full_text",
-            u"date",
-            u"temperature",
-            u"place",
-        ]
-        assert self._get_column_types(Session, resource_id) == [
-            u"int4",
-            u"tsvector",
-            u"text",
-            u"text",
-            u"text",
-        ]
 
     def test_simple_with_indexing(self, Session):
         csv_filepath = get_sample_filepath("simple.csv")
@@ -217,6 +218,45 @@ def test_boston_311(self, Session):
         )
 
         records = self._get_records(Session, resource_id)
+        print(self._get_column_names(Session, resource_id))
+        assert self._get_column_names(Session, resource_id) == [
+            u"_id",
+            u"_full_text",
+            u"CASE_ENQUIRY_ID",
+            u"open_dt",
+            u"target_dt",
+            u"closed_dt",
+            u"OnTime_Status",
+            u"CASE_STATUS",
+            u"CLOSURE_REASON",
+            u"CASE_TITLE",
+            u"SUBJECT",
+            u"REASON",
+            u"TYPE",
+            u"QUEUE",
+            u"Department",
+            u"SubmittedPhoto",
+            u"ClosedPhoto",
+            u"Location",
+            u"Fire_district",
+            u"pwd_district",
+            u"city_council_district",
+            u"police_district",
+            u"neighborhood",
+            u"neighborhood_services_district",
+            u"ward",
+            u"precinct",
+            u"LOCATION_STREET_NAME",
+            u"LOCATION_ZIPCODE",
+            u"Latitude",
+            u"Longitude",
+            u"Source",
+        ]  # noqa
+        print(self._get_column_types(Session, resource_id))
+        assert self._get_column_types(Session, resource_id) == [
+            u"int4",
+            u"tsvector",
+        ] + [u"text"] * (len(records[0]) - 1)
         print(records)
         assert records == [
             (
@@ -229,7 +269,7 @@ def test_boston_311(self, Session):
                 u"Open",
                 None,  # " " transforms to None
                 u"Street Light Outages",
-                u"Public Works Department",
+                u"Public Works Department",  # "   " trailing whitespace gets trimmed
                 u"Street Lights",
                 u"Street Light Outages",
                 u"PWDx_Street Light Outages",
@@ -316,45 +356,6 @@ def test_boston_311(self, Session):
                 u"Citizens Connect App",
             ),
         ]  # noqa
-        print(self._get_column_names(Session, resource_id))
-        assert self._get_column_names(Session, resource_id) == [
-            u"_id",
-            u"_full_text",
-            u"CASE_ENQUIRY_ID",
-            u"open_dt",
-            u"target_dt",
-            u"closed_dt",
-            u"OnTime_Status",
-            u"CASE_STATUS",
-            u"CLOSURE_REASON",
-            u"CASE_TITLE",
-            u"SUBJECT",
-            u"REASON",
-            u"TYPE",
-            u"QUEUE",
-            u"Department",
-            u"SubmittedPhoto",
-            u"ClosedPhoto",
-            u"Location",
-            u"Fire_district",
-            u"pwd_district",
-            u"city_council_district",
-            u"police_district",
-            u"neighborhood",
-            u"neighborhood_services_district",
-            u"ward",
-            u"precinct",
-            u"LOCATION_STREET_NAME",
-            u"LOCATION_ZIPCODE",
-            u"Latitude",
-            u"Longitude",
-            u"Source",
-        ]  # noqa
-        print(self._get_column_types(Session, resource_id))
-        assert self._get_column_types(Session, resource_id) == [
-            u"int4",
-            u"tsvector",
-        ] + [u"text"] * (len(records[0]) - 1)
 
     def test_brazilian(self, Session):
         csv_filepath = get_sample_filepath("brazilian_sample.csv")
@@ -368,105 +369,6 @@ def test_brazilian(self, Session):
         )
 
         records = self._get_records(Session, resource_id)
-        print(records)
-        assert records[0] == (
-            1,
-            u"01/01/1996 12:00:00 AM",
-            u"1100015",
-            u"ALTA FLORESTA D'OESTE",
-            u"RO",
-            None,
-            u"128",
-            u"0",
-            u"8",
-            u"119",
-            u"1",
-            u"0",
-            u"3613",
-            u"3051",
-            u"130",
-            u"7",
-            u"121",
-            u"3716",
-            u"3078",
-            u"127",
-            u"7",
-            None,
-            None,
-            None,
-            None,
-            u"6794",
-            u"5036",
-            u"1758",
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            u"337",
-            u"0.26112759",
-            u"0.17210683",
-            u"0.43323442",
-            u"0.13353115",
-            u"24.833692447908199",
-            None,
-            None,
-            u"22.704964",
-            u"67.080006197818605",
-            u"65.144188573097907",
-            u"74.672390253375497",
-            u"16.7913561569619",
-            u"19.4894563570641",
-            u"8.649237411458509",
-            u"7.60165422117368",
-            u"11.1540090366186",
-            u"17.263407056738099",
-            u"8.5269823",
-            u"9.2213373",
-            u"5.3085136",
-            u"52.472769803217503",
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            u"25.0011414302354",
-            u"22.830887000000001",
-            u"66.8150490097632",
-            u"64.893674212235595",
-            u"74.288246611754104",
-            u"17.0725384713319",
-            u"19.8404105332814",
-            u"8.856561911292371",
-            u"7.74275834336647",
-            u"11.357671741889",
-            u"17.9410577459881",
-            u"8.3696527",
-            u"8.9979973",
-            u"5.0570836",
-            u"53.286314230720798",
-            None,
-            None,
-            None,
-            None,
-            None,
-            u"122988",
-            None,
-            u"10.155015000000001",
-            u"14.826086999999999",
-            u"11.671533",
-            u"9.072917",
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-            None,
-        )  # noqa
         print(self._get_column_names(Session, resource_id))
         assert self._get_column_names(Session, resource_id) == [
             u"_id",
@@ -572,6 +474,105 @@ def test_brazilian(self, Session):
             u"int4",
             u"tsvector",
         ] + [u"text"] * (len(records[0]) - 1)
+        print(records)
+        assert records[0] == (
+            1,
+            u"01/01/1996 12:00:00 AM",
+            u"1100015",
+            u"ALTA FLORESTA D'OESTE",
+            u"RO",
+            None,
+            u"128",
+            u"0",
+            u"8",
+            u"119",
+            u"1",
+            u"0",
+            u"3613",
+            u"3051",
+            u"130",
+            u"7",
+            u"121",
+            u"3716",
+            u"3078",
+            u"127",
+            u"7",
+            None,
+            None,
+            None,
+            None,
+            u"6794",
+            u"5036",
+            u"1758",
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            u"337",
+            u"0.26112759",
+            u"0.17210683",
+            u"0.43323442",
+            u"0.13353115",
+            u"24.833692447908199",
+            None,
+            None,
+            u"22.704964",
+            u"67.080006197818605",
+            u"65.144188573097907",
+            u"74.672390253375497",
+            u"16.7913561569619",
+            u"19.4894563570641",
+            u"8.649237411458509",
+            u"7.60165422117368",
+            u"11.1540090366186",
+            u"17.263407056738099",
+            u"8.5269823",
+            u"9.2213373",
+            u"5.3085136",
+            u"52.472769803217503",
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            u"25.0011414302354",
+            u"22.830887000000001",
+            u"66.8150490097632",
+            u"64.893674212235595",
+            u"74.288246611754104",
+            u"17.0725384713319",
+            u"19.8404105332814",
+            u"8.856561911292371",
+            u"7.74275834336647",
+            u"11.357671741889",
+            u"17.9410577459881",
+            u"8.3696527",
+            u"8.9979973",
+            u"5.0570836",
+            u"53.286314230720798",
+            None,
+            None,
+            None,
+            None,
+            None,
+            u"122988",
+            None,
+            u"10.155015000000001",
+            u"14.826086999999999",
+            u"11.671533",
+            u"9.072917",
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+        )  # noqa
 
     def test_german(self, Session):
         csv_filepath = get_sample_filepath("german_sample.csv")
@@ -585,20 +586,6 @@ def test_german(self, Session):
         )
 
         records = self._get_records(Session, resource_id)
-        print(records)
-        assert records[0] == (
-            1,
-            u"Zürich",
-            u"68260",
-            u"65444",
-            u"62646",
-            u"6503",
-            u"28800",
-            u"1173",
-            u"6891",
-            u"24221",
-            u"672",
-        )
         print(self._get_column_names(Session, resource_id))
         assert self._get_column_names(Session, resource_id) == [
             u"_id",
@@ -619,6 +606,20 @@ def test_german(self, Session):
             u"int4",
             u"tsvector",
         ] + [u"text"] * (len(records[0]) - 1)
+        print(records)
+        assert records[0] == (
+            1,
+            u"Zürich",
+            u"68260",
+            u"65444",
+            u"62646",
+            u"6503",
+            u"28800",
+            u"1173",
+            u"6891",
+            u"24221",
+            u"672",
+        )
 
     def test_with_blanks(self, Session):
         csv_filepath = get_sample_filepath("sample_with_blanks.csv")
@@ -699,7 +700,6 @@ def test_reload(self, Session):
             logger=logger,
         )
 
-        assert len(self._get_records(Session, resource_id)) == 6
         assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
@@ -714,6 +714,7 @@ def test_reload(self, Session):
             u"text",
             u"text",
         ]
+        assert len(self._get_records(Session, resource_id)) == 6
 
     @pytest.mark.skipif(
         not p.toolkit.check_ckan_version(min_version="2.7"),
@@ -752,7 +753,6 @@ def test_reload_with_overridden_types(self, Session):
             fields=fields, resource_id=resource_id, logger=logger
         )
 
-        assert len(self._get_records(Session, resource_id)) == 6
         assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
@@ -767,6 +767,7 @@ def test_reload_with_overridden_types(self, Session):
             u"numeric",
             u"text",
         ]
+        assert len(self._get_records(Session, resource_id)) == 6
 
         # check that rows with nulls are indexed correctly
         records = self._get_records(
@@ -919,6 +920,20 @@ def test_simple(self, Session):
         #   "'-01':4,5 '00':6,7,8 '1':1 '2011':3 'galway':2"
         #   "'-01':2,3 '00':5,6 '1':7 '2011':1 'galway':8 't00':4"
 
+        assert self._get_column_names(Session, resource_id) == [
+            u"_id",
+            u"_full_text",
+            u"date",
+            u"temperature",
+            u"place",
+        ]
+        assert self._get_column_types(Session, resource_id) == [
+            u"int4",
+            u"tsvector",
+            u"timestamp",
+            u"numeric",
+            u"text",
+        ]
         assert self._get_records(Session, resource_id) == [
             (1, datetime.datetime(2011, 1, 1, 0, 0), Decimal("1"), u"Galway",),
             (
@@ -947,20 +962,6 @@ def test_simple(self, Session):
                 u"Berkeley",
             ),
         ]
-        assert self._get_column_names(Session, resource_id) == [
-            u"_id",
-            u"_full_text",
-            u"date",
-            u"temperature",
-            u"place",
-        ]
-        assert self._get_column_types(Session, resource_id) == [
-            u"int4",
-            u"tsvector",
-            u"timestamp",
-            u"numeric",
-            u"text",
-        ]
 
     def test_simple_large_file(self, Session):
         csv_filepath = get_sample_filepath("simple-large.csv")
@@ -1039,6 +1040,74 @@ def test_boston_311(self, Session):
         )
 
         records = self._get_records(Session, resource_id)
+        print(self._get_column_names(Session, resource_id))
+        assert self._get_column_names(Session, resource_id) == [
+            u"_id",                                 # int4
+            u"_full_text",                          # tsvector
+            u"CASE_ENQUIRY_ID",                     # numeric
+            u"open_dt",                             # timestamp
+            u"target_dt",                           # timestamp
+            u"closed_dt",                           # text
+            u"OnTime_Status",                       # text
+            u"CASE_STATUS",                         # text
+            u"CLOSURE_REASON",                      # text
+            u"CASE_TITLE",                          # text
+            u"SUBJECT",                             # text
+            u"REASON",                              # text
+            u"TYPE",                                # text
+            u"QUEUE",                               # text
+            u"Department",                          # text
+            u"SubmittedPhoto",                      # text
+            u"ClosedPhoto",                         # text
+            u"Location",                            # text
+            u"Fire_district",                       # numeric
+            u"pwd_district",                        # numeric
+            u"city_council_district",               # numeric
+            u"police_district",                     # text
+            u"neighborhood",                        # text
+            u"neighborhood_services_district",      # numeric
+            u"ward",                                # text
+            u"precinct",                            # numeric
+            u"LOCATION_STREET_NAME",                # text
+            u"LOCATION_ZIPCODE",                    # numeric
+            u"Latitude",                            # numeric
+            u"Longitude",                           # numeric
+            u"Source",                              # text
+        ]  # noqa
+        print(self._get_column_types(Session, resource_id))
+        assert self._get_column_types(Session, resource_id) == [
+            u"int4",            # _id
+            u"tsvector",        # _full_text
+            u"numeric",         # CASE_ENQUIRY_ID
+            u"timestamp",       # open_dt
+            u"timestamp",       # target_dt
+            u"text",            # closed_dt
+            u"text",            # OnTime_Status
+            u"text",            # CASE_STATUS
+            u"text",            # CLOSURE_REASON
+            u"text",            # CASE_TITLE
+            u"text",            # SUBJECT
+            u"text",            # REASON
+            u"text",            # TYPE
+            u"text",            # QUEUE
+            u"text",            # Department
+            u"text",            # SubmittedPhoto
+            u"text",            # ClosedPhoto
+            u"text",            # Location
+            u"numeric",         # Fire_district
+            u"numeric",         # pwd_district
+            u"numeric",         # city_council_district
+            u"text",            # police_district
+            u"text",            # neighborhood
+            u"numeric",         # neighborhood_services_district
+            u"text",            # ward
+            u"numeric",         # precinct
+            u"text",            # LOCATION_STREET_NAME
+            u"numeric",         # LOCATION_ZIPCODE
+            u"numeric",         # Latitude
+            u"numeric",         # Longitude
+            u"text",            # Source
+        ]  # noqa
         print(records)
         assert records == [
             (
@@ -1051,7 +1120,7 @@ def test_boston_311(self, Session):
                 u"Open",
                 u"",  # " " transforms to ""
                 u"Street Light Outages",
-                u"Public Works Department",
+                u"Public Works Department",  # "   " trailing whitespace gets trimmed
                 u"Street Lights",
                 u"Street Light Outages",
                 u"PWDx_Street Light Outages",
@@ -1138,74 +1207,6 @@ def test_boston_311(self, Session):
                 u"Citizens Connect App",
             ),
         ]  # noqa
-        print(self._get_column_names(Session, resource_id))
-        assert self._get_column_names(Session, resource_id) == [
-            u"_id",
-            u"_full_text",
-            u"CASE_ENQUIRY_ID",
-            u"open_dt",
-            u"target_dt",
-            u"closed_dt",
-            u"OnTime_Status",
-            u"CASE_STATUS",
-            u"CLOSURE_REASON",
-            u"CASE_TITLE",
-            u"SUBJECT",
-            u"REASON",
-            u"TYPE",
-            u"QUEUE",
-            u"Department",
-            u"SubmittedPhoto",
-            u"ClosedPhoto",
-            u"Location",
-            u"Fire_district",
-            u"pwd_district",
-            u"city_council_district",
-            u"police_district",
-            u"neighborhood",
-            u"neighborhood_services_district",
-            u"ward",
-            u"precinct",
-            u"LOCATION_STREET_NAME",
-            u"LOCATION_ZIPCODE",
-            u"Latitude",
-            u"Longitude",
-            u"Source",
-        ]  # noqa
-        print(self._get_column_types(Session, resource_id))
-        assert self._get_column_types(Session, resource_id) == [
-            u"int4",
-            u"tsvector",
-            u"numeric",
-            u"timestamp",
-            u"timestamp",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"text",
-            u"numeric",
-            u"numeric",
-            u"numeric",
-            u"text",
-            u"text",
-            u"numeric",
-            u"text",
-            u"numeric",
-            u"text",
-            u"numeric",
-            u"numeric",
-            u"numeric",
-            u"text",
-        ]  # noqa
 
     def test_no_entries(self):
         csv_filepath = get_sample_filepath("no_entries.csv")

From c00fb5ae3c90187e27fc42b67df365572cf261b5 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Mon, 15 Jul 2024 19:09:30 +0000
Subject: [PATCH 11/16] fix(tests,logic): misc fixes;

- Updated post parser to set empty string type cells to `None` for parody with `load_csv`.
- Updated some tests for new code.
- Added check for ckan version `2.11` for the data dictionary form.
- Updated conditions in `load_csv`.
---
 ckanext/xloader/loader.py                     | 18 +++++++++--------
 ckanext/xloader/parser.py                     |  6 +++++-
 ckanext/xloader/plugin.py                     | 10 ++++++++--
 .../datastore/snippets/dictionary_form.html   | 12 ++++++-----
 ckanext/xloader/tests/test_loader.py          | 20 +++++++++----------
 requirements.txt                              |  4 ++--
 6 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index f8454904..8528a657 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -229,10 +229,11 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
                 super_iter = stream.iter
                 def strip_white_space_iter():
                     for row in super_iter():
-                        for _index, _cell in enumerate(row):
-                            # only strip white space if strip_extra_white is True
-                            if fields and fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
-                                row[_index] = _cell.strip()
+                        if len(row) == len(fields):
+                            for _index, _cell in enumerate(row):
+                                # only strip white space if strip_extra_white is True
+                                if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
+                                    row[_index] = _cell.strip()
                         yield row
                 stream.iter = strip_white_space_iter
                 stream.save(**save_args)
@@ -242,10 +243,11 @@ def strip_white_space_iter():
                 super_iter = stream.iter
                 def strip_white_space_iter():
                     for row in super_iter():
-                        for _index, _cell in enumerate(row):
-                            # only strip white space if strip_extra_white is True
-                            if fields and fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
-                                row[_index] = _cell.strip()
+                        if len(row) == len(fields):
+                            for _index, _cell in enumerate(row):
+                                # only strip white space if strip_extra_white is True
+                                if fields[_index].get('strip_extra_white', True) and isinstance(_cell, str):
+                                    row[_index] = _cell.strip()
                         yield row
                 stream.iter = strip_white_space_iter
                 stream.save(**save_args)
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index c587f187..26193203 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -34,10 +34,14 @@ def convert_types(self, extended_rows):
                     row[cell_index] = ''
                 if self.fields:
                     # only strip white space if strip_extra_white is True
-                    if self.fields[cell_index].get('strip_extra_white', True) and isinstance(cell_value, str):
+                    if self.fields[cell_index].get('strip_extra_white', True) and isinstance(cell_value, six.text_type):
                         cell_value = cell_value.strip()
                         row[cell_index] = cell_value.strip()
                 if not cell_value:
+                    # load_csv parody: empty of string type should be None
+                    if self.types and self.types[cell_index] == six.text_type:
+                        cell_value = None
+                        row[cell_index] = None
                     continue
                 cell_type = self.types[cell_index] if self.types else None
                 if cell_type in [Decimal, None]:
diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index ba74119e..fc4ed2e3 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -4,7 +4,6 @@
 
 from ckan import plugins
 from ckan.plugins import toolkit
-from ckanext.datastore.interfaces import IDataDictionaryForm
 
 from ckan.model.domain_object import DomainObjectOperation
 from ckan.model.resource import Resource
@@ -21,6 +20,12 @@
     def config_declarations(cls):
         return cls
 
+if toolkit.check_ckan_version(min_version='2.11'):
+    from ckanext.datastore.interfaces import IDataDictionaryForm
+    has_idata_dictionary_form = True
+else:
+    has_idata_dictionary_form = False
+
 log = logging.getLogger(__name__)
 
 
@@ -35,7 +40,8 @@ class xloaderPlugin(plugins.SingletonPlugin):
     plugins.implements(plugins.IResourceController, inherit=True)
     plugins.implements(plugins.IClick)
     plugins.implements(plugins.IBlueprint)
-    plugins.implements(IDataDictionaryForm, inherit=True)
+    if has_idata_dictionary_form:
+        plugins.implements(IDataDictionaryForm, inherit=True)
 
     # IClick
     def get_commands(self):
diff --git a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
index 02919354..f5c6d06f 100644
--- a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
+++ b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
@@ -3,9 +3,11 @@
 
 {% block additional_fields %}
   {{ super() }}
-  {{ form.select('fields__' ~ position ~ '__strip_extra_white',
-    label=_('Strip Extra Leading and Trailing White Space'), options=[
-    {'text': _('Yes'), 'value': true},
-    {'text': _('No'), 'value': false},
-    ], selected=field.get('strip_extra_white')) }}
+  {% if h.check_ckan_version(min_version='2.11') %}
+    {{ form.select('fields__' ~ position ~ '__strip_extra_white',
+      label=_('Strip Extra Leading and Trailing White Space'), options=[
+      {'text': _('Yes'), 'value': true},
+      {'text': _('No'), 'value': false},
+      ], selected=field.get('strip_extra_white')) }}
+  {% endif %}
 {% endblock %}
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 0e225a06..2fae544e 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -1136,18 +1136,18 @@ def test_boston_311(self, Session):
                 Decimal("101002153891"),
                 datetime.datetime(2017, 7, 6, 23, 38, 43),
                 datetime.datetime(2017, 7, 21, 8, 30),
-                u"",
+                None,
                 u"ONTIME",
                 u"Open",
-                u"",  # " " transforms to ""
+                None,  # " " transforms to None
                 u"Street Light Outages",
                 u"Public Works Department",  # "   " trailing whitespace gets trimmed
                 u"Street Lights",
                 u"Street Light Outages",
                 u"PWDx_Street Light Outages",
                 u"PWDx",
-                u"",
-                u"",
+                None,
+                None,
                 u"480 Harvard St  Dorchester  MA  02124",
                 Decimal("8"),
                 Decimal("7"),
@@ -1168,10 +1168,10 @@ def test_boston_311(self, Session):
                 Decimal("101002153890"),
                 datetime.datetime(2017, 7, 6, 23, 29, 13),
                 datetime.datetime(2017, 9, 11, 8, 30),
-                u"",
+                None,
                 u"ONTIME",
                 u"Open",
-                u"",  # " " transforms to ""
+                None,  # " " transforms to None
                 u"Graffiti Removal",
                 u"Property Management",
                 u"Graffiti",
@@ -1179,7 +1179,7 @@ def test_boston_311(self, Session):
                 u"PROP_GRAF_GraffitiRemoval",
                 u"PROP",
                 u"https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",  # strip white spaces
-                u"",
+                None,
                 u"522 Saratoga St  East Boston  MA  02128",
                 Decimal("1"),
                 Decimal("9"),
@@ -1200,10 +1200,10 @@ def test_boston_311(self, Session):
                 Decimal("101002153889"),
                 datetime.datetime(2017, 7, 6, 23, 24, 20),
                 datetime.datetime(2017, 9, 11, 8, 30),
-                u"",
+                None,
                 u"ONTIME",
                 u"Open",
-                u"",  # " " transforms to ""
+                None,  # " " transforms to None
                 u"Graffiti Removal",
                 u"Property Management",
                 u"Graffiti",
@@ -1211,7 +1211,7 @@ def test_boston_311(self, Session):
                 u"PROP_GRAF_GraffitiRemoval",
                 u"PROP",
                 u"https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg",  # strip white spaces
-                u"",
+                None,
                 u"965 Bennington St  East Boston  MA  02128",
                 Decimal("1"),
                 Decimal("9"),
diff --git a/requirements.txt b/requirements.txt
index fe92b6d7..ce7cd03e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
-ckantoolkit
+ckantoolkit>=0.0.4
 requests[security]>=2.11.1
 six>=1.12.0
 tabulator==1.53.5
 Unidecode==1.0.22
 python-dateutil>=2.8.2
-chardet==5.2.0
\ No newline at end of file
+chardet==5.2.0

From 669930eb59c68135ed4f22a1f29b4f2aa22a1528 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Mon, 15 Jul 2024 20:04:09 +0000
Subject: [PATCH 12/16] fix(tests,logic): new output and parody;

- Fixed test for new output with `strip_extra_white`.
- Fixed CKAN version pardoy of `info` vs `_info` in `update_datastore_info_field` (upstream issue).
---
 ckanext/xloader/plugin.py          | 3 +++
 ckanext/xloader/tests/test_jobs.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index fc4ed2e3..e8268776 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -228,6 +228,9 @@ def update_datastore_create_schema(self, schema):
     def update_datastore_info_field(self, field, plugin_data):
         # expose all our non-secret plugin data in the field
         field.update(plugin_data.get('xloader', {}))
+        # CKAN version parody
+        if '_info' in plugin_data:
+            field.update({'info': plugin_data['_info']})
         return field
 
 
diff --git a/ckanext/xloader/tests/test_jobs.py b/ckanext/xloader/tests/test_jobs.py
index e819dad9..62ae7174 100644
--- a/ckanext/xloader/tests/test_jobs.py
+++ b/ckanext/xloader/tests/test_jobs.py
@@ -81,7 +81,7 @@ def test_xloader_data_into_datastore(self, cli, data):
         with mock.patch("ckanext.xloader.jobs.get_response", get_response):
             stdout = cli.invoke(ckan, ["jobs", "worker", "--burst"]).output
             assert "File hash: d44fa65eda3675e11710682fdb5f1648" in stdout
-            assert "Fields: [{'id': 'x', 'type': 'text'}, {'id': 'y', 'type': 'text'}]" in stdout
+            assert "Fields: [{'id': 'x', 'type': 'text', 'strip_extra_white': True}, {'id': 'y', 'type': 'text', 'strip_extra_white': True}]" in stdout
             assert "Copying to database..." in stdout
             assert "Creating search index..." in stdout
             assert "Express Load completed" in stdout

From 3263bababcb0bcb0c20744cf360d119fd607ac0a Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 16 Jul 2024 13:57:23 +0000
Subject: [PATCH 13/16] fix(logic): ckan versioning;

- DS fields for ckan versions.
---
 ckanext/xloader/loader.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 8528a657..da0edb1d 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -178,8 +178,11 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         existing = datastore_resource_exists(resource_id)
         existing_info = {}
         if existing:
-            ds_info = p.toolkit.get_action('datastore_info')({'ignore_auth': True}, {'id': resource_id})
-            existing_fields = ds_info.get('fields', [])
+            if p.toolkit.check_ckan_version(max_version='2.9'):
+                existing_fields = existing.get('fields', [])
+            else:
+                ds_info = p.toolkit.get_action('datastore_info')({'ignore_auth': True}, {'id': resource_id})
+                existing_fields = ds_info.get('fields', [])
             existing_info = dict((f['id'], f['info'])
                                  for f in existing_fields
                                  if 'info' in f)

From 7cb6a84d682a5c691ab3d40eba5c31df3c61512c Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 16 Jul 2024 14:15:00 +0000
Subject: [PATCH 14/16] fix(logic): ckan versioning;

- DS fields for ckan versions.
---
 ckanext/xloader/loader.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index da0edb1d..1da79fee 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -178,11 +178,11 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         existing = datastore_resource_exists(resource_id)
         existing_info = {}
         if existing:
-            if p.toolkit.check_ckan_version(max_version='2.9'):
-                existing_fields = existing.get('fields', [])
-            else:
+            if p.toolkit.check_ckan_version(min_version='2.10'):
                 ds_info = p.toolkit.get_action('datastore_info')({'ignore_auth': True}, {'id': resource_id})
                 existing_fields = ds_info.get('fields', [])
+            else:
+                existing_fields = existing.get('fields', [])
             existing_info = dict((f['id'], f['info'])
                                  for f in existing_fields
                                  if 'info' in f)

From bf2e9396e541f36f71833cbb11124a30dca32240 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 16 Jul 2024 14:36:52 +0000
Subject: [PATCH 15/16] feat(templates,logic): pre-datadictionary implement;

- Support current versions of CKAN for the DataDictionary form override for `strip_extra_white`.
- Support current versions of CKAN for existing info before existing data dictionary custom fields.
---
 ckanext/xloader/loader.py                          |  6 ++++--
 .../datastore/snippets/dictionary_form.html        | 14 +++++++++-----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 1da79fee..54ab026b 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -202,7 +202,8 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
             for f in fields:
                 if f['id'] in existing_info:
                     f['info'] = existing_info[f['id']]
-                    f['strip_extra_white'] = existing_fields_by_headers[f['id']].get('strip_extra_white', True)
+                    f['strip_extra_white'] = existing_info[f['id']].get('strip_extra_white') if 'strip_extra_white' in existing_info[f['id']] \
+                         else existing_fields_by_headers[f['id']].get('strip_extra_white', True)
 
             '''
             Delete or truncate existing datastore table before proceeding,
@@ -459,7 +460,8 @@ def row_iterator():
             for h in headers_dicts:
                 if h['id'] in existing_info:
                     h['info'] = existing_info[h['id']]
-                    h['strip_extra_white'] = existing_fields_by_headers[h['id']].get('strip_extra_white', True)
+                    h['strip_extra_white'] = existing_info[h['id']].get('strip_extra_white') if 'strip_extra_white' in existing_info[h['id']] \
+                        else existing_fields_by_headers[h['id']].get('strip_extra_white', True)
                     # create columns with types user requested
                     type_override = existing_info[h['id']].get('type_override')
                     if type_override in list(_TYPE_MAPPING.values()):
diff --git a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
index f5c6d06f..808aa764 100644
--- a/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
+++ b/ckanext/xloader/templates/datastore/snippets/dictionary_form.html
@@ -4,10 +4,14 @@
 {% block additional_fields %}
   {{ super() }}
   {% if h.check_ckan_version(min_version='2.11') %}
-    {{ form.select('fields__' ~ position ~ '__strip_extra_white',
-      label=_('Strip Extra Leading and Trailing White Space'), options=[
-      {'text': _('Yes'), 'value': true},
-      {'text': _('No'), 'value': false},
-      ], selected=field.get('strip_extra_white')) }}
+    {% set field_prefix = 'fields__' %}
+  {% else %}
+    {% set field_prefix = 'info__' %}
   {% endif %}
+  {% set is_selected = field.get('info', {}).get('strip_extra_white', field.get('strip_extra_white')) != 'False' %}
+  {{ form.select(field_prefix ~ position ~ '__strip_extra_white',
+    label=_('Strip Extra Leading and Trailing White Space'), options=[
+    {'text': _('Yes'), 'value': true},
+    {'text': _('No'), 'value': false},
+    ], selected=is_selected) }}
 {% endblock %}

From d6de1b18bcfcd22062dd7d038c2d9220869375aa Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Mon, 22 Jul 2024 19:45:24 +0000
Subject: [PATCH 16/16] feat(tests): add coverage;

- Added test coverage for no strip white extra space.
---
 ckanext/xloader/tests/test_loader.py | 379 +++++++++++++++++++++++++++
 1 file changed, 379 insertions(+)

diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 2fae544e..ba1b9288 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -817,6 +817,181 @@ def test_column_names(self, Session):
             u"Galway",
         )
 
+    def test_load_with_no_strip_white(self, Session):
+        csv_filepath = get_sample_filepath("boston_311_sample.csv")
+        resource = factories.Resource()
+        resource_id = resource['id']
+        loader.load_csv(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+
+        # Change strip_extra_white, as it would be done by Data Dictionary
+        rec = p.toolkit.get_action("datastore_search")(
+            None, {"resource_id": resource_id, "limit": 0}
+        )
+        fields = [f for f in rec["fields"] if not f["id"].startswith("_")]
+        for field in fields:
+            field["info"] = {"strip_extra_white": False}  # <=2.10
+            field["strip_extra_white"] = False  # >=2.11
+        p.toolkit.get_action("datastore_create")(
+            {"ignore_auth": True},
+            {"resource_id": resource_id, "force": True, "fields": fields},
+        )
+
+        # Load it again with new strip_extra_white
+        fields = loader.load_csv(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        loader.create_column_indexes(
+            fields=fields, resource_id=resource_id, logger=logger
+        )
+
+        records = self._get_records(Session, resource_id)
+        print(self._get_column_names(Session, resource_id))
+        assert self._get_column_names(Session, resource_id) == [
+            u"_id",
+            u"_full_text",
+            u"CASE_ENQUIRY_ID",
+            u"open_dt",
+            u"target_dt",
+            u"closed_dt",
+            u"OnTime_Status",
+            u"CASE_STATUS",
+            u"CLOSURE_REASON",
+            u"CASE_TITLE",
+            u"SUBJECT",
+            u"REASON",
+            u"TYPE",
+            u"QUEUE",
+            u"Department",
+            u"SubmittedPhoto",
+            u"ClosedPhoto",
+            u"Location",
+            u"Fire_district",
+            u"pwd_district",
+            u"city_council_district",
+            u"police_district",
+            u"neighborhood",
+            u"neighborhood_services_district",
+            u"ward",
+            u"precinct",
+            u"LOCATION_STREET_NAME",
+            u"LOCATION_ZIPCODE",
+            u"Latitude",
+            u"Longitude",
+            u"Source",
+        ]  # noqa
+        print(self._get_column_types(Session, resource_id))
+        assert self._get_column_types(Session, resource_id) == [
+            u"int4",
+            u"tsvector",
+        ] + [u"text"] * (len(records[0]) - 1)
+        print(records)
+        assert records == [
+            (
+                4,  # ds auto increment
+                u"101002153891",
+                u"2017-07-06 23:38:43",
+                u"2017-07-21 08:30:00",
+                None,
+                u"ONTIME",
+                u"Open",
+                u" ",  # no strip_extra_white
+                u"Street Light Outages",
+                u"Public Works Department   ",  # no strip_extra_white
+                u"Street Lights",
+                u"Street Light Outages",
+                u"PWDx_Street Light Outages",
+                u"PWDx",
+                None,
+                None,
+                u"480 Harvard St  Dorchester  MA  02124",
+                u"8",
+                u"07",
+                u"4",
+                u"B3",
+                u"Greater Mattapan",
+                u"9",
+                u"Ward 14",
+                u"1411",
+                u"480 Harvard St",
+                u"02124",
+                u"42.288",
+                u"-71.0927",
+                u"Citizens Connect App",
+            ),  # noqa
+            (
+                5,  # ds auto increment
+                u"101002153890",
+                u"2017-07-06 23:29:13",
+                u"2017-09-11 08:30:00",
+                None,
+                u"ONTIME",
+                u"Open",
+                u" ",  # no strip_extra_white
+                u"Graffiti Removal",
+                u"Property Management",
+                u"Graffiti",
+                u"Graffiti Removal",
+                u"PROP_GRAF_GraffitiRemoval",
+                u"PROP",
+                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",  # no strip_extra_white
+                None,
+                u"522 Saratoga St  East Boston  MA  02128",
+                u"1",
+                u"09",
+                u"1",
+                u"A7",
+                u"East Boston",
+                u"1",
+                u"Ward 1",
+                u"0110",
+                u"522 Saratoga St",
+                u"02128",
+                u"42.3807",
+                u"-71.0259",
+                u"Citizens Connect App",
+            ),  # noqa
+            (
+                6,  # ds auto increment
+                u"101002153889",
+                u"2017-07-06 23:24:20",
+                u"2017-09-11 08:30:00",
+                None,
+                u"ONTIME",
+                u"Open",
+                u" ",  # no strip_extra_white
+                u"Graffiti Removal",
+                u"Property Management",
+                u"Graffiti",
+                u"Graffiti Removal",
+                u"PROP_GRAF_GraffitiRemoval",
+                u"PROP",
+                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg",  # no strip_extra_white
+                None,
+                u"965 Bennington St  East Boston  MA  02128",
+                u"1",
+                u"09",
+                u"1",
+                u"A7",
+                u"East Boston",
+                u"1",
+                u"Ward 1",
+                u"0112",
+                u"965 Bennington St",
+                u"02128",
+                u"42.386",
+                u"-71.008",
+                u"Citizens Connect App",
+            ),
+        ]  # noqa
+
 
 class TestLoadUnhandledTypes(TestLoadBase):
     def test_kml(self):
@@ -1299,3 +1474,207 @@ def test_preserving_time_ranges(self, Session):
             (3, "Barcaldine", 4725, Decimal("-23.55327901"), Decimal("145.289156"),
              "9:00-12:30", "13:30-16:30", datetime.datetime(2018, 7, 20))
         ]
+
+    def test_load_with_no_strip_white(self, Session):
+        csv_filepath = get_sample_filepath("boston_311_sample.csv")
+        resource = factories.Resource()
+        resource_id = resource['id']
+        loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="csv",
+            logger=logger,
+        )
+
+        # Change strip_extra_white, as it would be done by Data Dictionary
+        rec = p.toolkit.get_action("datastore_search")(
+            None, {"resource_id": resource_id, "limit": 0}
+        )
+        fields = [f for f in rec["fields"] if not f["id"].startswith("_")]
+        for field in fields:
+            field["info"] = {"strip_extra_white": False}  # <=2.10
+            field["strip_extra_white"] = False  # >=2.11
+        p.toolkit.get_action("datastore_create")(
+            {"ignore_auth": True},
+            {"resource_id": resource_id, "force": True, "fields": fields},
+        )
+
+        # Load it again with new strip_extra_white
+        fields = loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="csv",
+            logger=logger,
+        )
+        loader.create_column_indexes(
+            fields=fields, resource_id=resource_id, logger=logger
+        )
+
+        records = self._get_records(Session, resource_id)
+        print(self._get_column_names(Session, resource_id))
+        assert self._get_column_names(Session, resource_id) == [
+            u"_id",                                 # int4
+            u"_full_text",                          # tsvector
+            u"CASE_ENQUIRY_ID",                     # numeric
+            u"open_dt",                             # timestamp
+            u"target_dt",                           # timestamp
+            u"closed_dt",                           # text
+            u"OnTime_Status",                       # text
+            u"CASE_STATUS",                         # text
+            u"CLOSURE_REASON",                      # text
+            u"CASE_TITLE",                          # text
+            u"SUBJECT",                             # text
+            u"REASON",                              # text
+            u"TYPE",                                # text
+            u"QUEUE",                               # text
+            u"Department",                          # text
+            u"SubmittedPhoto",                      # text
+            u"ClosedPhoto",                         # text
+            u"Location",                            # text
+            u"Fire_district",                       # numeric
+            u"pwd_district",                        # numeric
+            u"city_council_district",               # numeric
+            u"police_district",                     # text
+            u"neighborhood",                        # text
+            u"neighborhood_services_district",      # numeric
+            u"ward",                                # text
+            u"precinct",                            # numeric
+            u"LOCATION_STREET_NAME",                # text
+            u"LOCATION_ZIPCODE",                    # numeric
+            u"Latitude",                            # numeric
+            u"Longitude",                           # numeric
+            u"Source",                              # text
+        ]  # noqa
+        print(self._get_column_types(Session, resource_id))
+        assert self._get_column_types(Session, resource_id) == [
+            u"int4",            # _id
+            u"tsvector",        # _full_text
+            u"numeric",         # CASE_ENQUIRY_ID
+            u"timestamp",       # open_dt
+            u"timestamp",       # target_dt
+            u"text",            # closed_dt
+            u"text",            # OnTime_Status
+            u"text",            # CASE_STATUS
+            u"text",            # CLOSURE_REASON
+            u"text",            # CASE_TITLE
+            u"text",            # SUBJECT
+            u"text",            # REASON
+            u"text",            # TYPE
+            u"text",            # QUEUE
+            u"text",            # Department
+            u"text",            # SubmittedPhoto
+            u"text",            # ClosedPhoto
+            u"text",            # Location
+            u"numeric",         # Fire_district
+            u"numeric",         # pwd_district
+            u"numeric",         # city_council_district
+            u"text",            # police_district
+            u"text",            # neighborhood
+            u"numeric",         # neighborhood_services_district
+            u"text",            # ward
+            u"numeric",         # precinct
+            u"text",            # LOCATION_STREET_NAME
+            u"numeric",         # LOCATION_ZIPCODE
+            u"numeric",         # Latitude
+            u"numeric",         # Longitude
+            u"text",            # Source
+        ]  # noqa
+        print(records)
+        assert records == [
+            (
+                4,  # ds auto increment
+                Decimal("101002153891"),
+                datetime.datetime(2017, 7, 6, 23, 38, 43),
+                datetime.datetime(2017, 7, 21, 8, 30),
+                None,
+                u"ONTIME",
+                u"Open",
+                u" ",  # no strip_extra_white
+                u"Street Light Outages",
+                u"Public Works Department   ",  # no strip_extra_white
+                u"Street Lights",
+                u"Street Light Outages",
+                u"PWDx_Street Light Outages",
+                u"PWDx",
+                None,
+                None,
+                u"480 Harvard St  Dorchester  MA  02124",
+                Decimal("8"),
+                Decimal("7"),
+                Decimal("4"),
+                u"B3",
+                u"Greater Mattapan",
+                Decimal("9"),
+                u"Ward 14",
+                Decimal("1411"),
+                u"480 Harvard St",
+                Decimal("2124"),
+                Decimal("42.288"),
+                Decimal("-71.0927"),
+                u"Citizens Connect App",
+            ),  # noqa
+            (
+                5,  # ds auto increment
+                Decimal("101002153890"),
+                datetime.datetime(2017, 7, 6, 23, 29, 13),
+                datetime.datetime(2017, 9, 11, 8, 30),
+                None,
+                u"ONTIME",
+                u"Open",
+                u" ",  # no strip_extra_white
+                u"Graffiti Removal",
+                u"Property Management",
+                u"Graffiti",
+                u"Graffiti Removal",
+                u"PROP_GRAF_GraffitiRemoval",
+                u"PROP",
+                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595f0000048560f46d94b9fa/report.jpg",  # no strip_extra_white
+                None,
+                u"522 Saratoga St  East Boston  MA  02128",
+                Decimal("1"),
+                Decimal("9"),
+                Decimal("1"),
+                u"A7",
+                u"East Boston",
+                Decimal("1"),
+                u"Ward 1",
+                Decimal("110"),
+                u"522 Saratoga St",
+                Decimal("2128"),
+                Decimal("42.3807"),
+                Decimal("-71.0259"),
+                u"Citizens Connect App",
+            ),  # noqa
+            (
+                6,  # ds auto increment
+                Decimal("101002153889"),
+                datetime.datetime(2017, 7, 6, 23, 24, 20),
+                datetime.datetime(2017, 9, 11, 8, 30),
+                None,
+                u"ONTIME",
+                u"Open",
+                u" ",  # no strip_extra_white
+                u"Graffiti Removal",
+                u"Property Management",
+                u"Graffiti",
+                u"Graffiti Removal",
+                u"PROP_GRAF_GraffitiRemoval",
+                u"PROP",
+                u" https://mayors24.cityofboston.gov/media/boston/report/photos/595efedb048560f46d94b9ef/report.jpg",  # no strip_extra_white
+                None,
+                u"965 Bennington St  East Boston  MA  02128",
+                Decimal("1"),
+                Decimal("9"),
+                Decimal("1"),
+                u"A7",
+                u"East Boston",
+                Decimal("1"),
+                u"Ward 1",
+                Decimal("112"),
+                u"965 Bennington St",
+                Decimal("2128"),
+                Decimal("42.386"),
+                Decimal("-71.008"),
+                u"Citizens Connect App",
+            ),
+        ]  # noqa