From 702cc9ebfd80c4bcbce8fa534b43173e289b2c25 Mon Sep 17 00:00:00 2001 From: Brian Park Date: Mon, 17 Jun 2019 08:08:08 -0700 Subject: [PATCH 01/13] README.md: Fix date typo, should be 2019-06-17 not 2017-06-19 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6689888..4932607 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ $ generate-schema < file.data.json > file.schema.json $ generate-schema --input_format csv < file.data.csv > file.schema.json ``` -Version: 0.5.1 (2019-06-19) +Version: 0.5.1 (2019-06-17) ## Background From fc3293b50d6dfd06cdb12605c6247debb206a75d Mon Sep 17 00:00:00 2001 From: Brian Park Date: Mon, 9 Sep 2019 13:56:24 -0700 Subject: [PATCH 02/13] README.md: Discourage running pip3 as root whening installing (#38) --- README.md | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 4932607..1a2a624 100644 --- a/README.md +++ b/README.md @@ -44,18 +44,33 @@ the input dataset. ## Installation -Install from [PyPI](https://pypi.python.org/pypi) repository using `pip3`. -If you want to install the package for your entire system globally, use +Install from [PyPI](https://pypi.python.org/pypi) repository using `pip3`. There +are too many ways to install packages in Python. The following are in order +highest to lowest recommendation: + +1) If you are using a virtual environment (such as +[venv](https://docs.python.org/3/library/venv.html)), then use: ``` -$ sudo -H pip3 install bigquery_schema_generator +$ pip3 install bigquery_schema_generator ``` -If you are using a virtual environment (such as -[venv](https://docs.python.org/3/library/venv.html)), then you don't need -the `sudo` coommand, and you can just type: + +2) If you aren't using a virtual environment you can install into +your local Python directory: + ``` -$ pip3 install bigquery_schema_generator +$ pip3 install --user bigquery_schema_generator ``` +3) If you want to install the package for your entire system globally, use +``` +$ sudo -H pip3 install bigquery_schema_generator +``` +but realize that you will be running code from PyPI as `root` so this has +security implications. + +Sometimes, your Python environment gets into a complete mess and the `pip3` +command won't work. Try typing `python3 -m pip` instead. + A successful install should print out something like the following (the version number may be different): ``` From 8e29d06e054820e888e7f834ced081bbebc4910c Mon Sep 17 00:00:00 2001 From: "Riccardo M. Cefala" Date: Sat, 4 Apr 2020 14:48:48 +0200 Subject: [PATCH 03/13] fix recursive call in flatten_schema_map and add relative test --- bigquery_schema_generator/generate_schema.py | 2 +- tests/testdata.txt | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py index 8f1b0c9..593b069 100755 --- a/bigquery_schema_generator/generate_schema.py +++ b/bigquery_schema_generator/generate_schema.py @@ -679,7 +679,7 @@ def flatten_schema_map(schema_map, else: # Recursively flatten the sub-fields of a RECORD entry. new_value = flatten_schema_map( - value, keep_nulls, sorted_schema, sanitize_names) + value, keep_nulls, sorted_schema, infer_mode, sanitize_names) elif key == 'type' and value in ['QINTEGER', 'QFLOAT', 'QBOOLEAN']: new_value = value[1:] elif key == 'mode': diff --git a/tests/testdata.txt b/tests/testdata.txt index 8053bc3..45d5551 100644 --- a/tests/testdata.txt +++ b/tests/testdata.txt @@ -873,6 +873,26 @@ SCHEMA ] END +# Sanitize the names to comply with BigQuery recursively. +DATA sanitize_names +{ "r" : { "a-name": [1, 2] } } +SCHEMA +[ + { + "fields": [ + { + "mode": "REPEATED", + "name": "a_name", + "type": "INTEGER" + } + ], + "mode": "NULLABLE", + "name": "r", + "type": "RECORD" + } +] +END + # Sanitize the names to comply with BigQuery. DATA csv infer_mode sanitize_names name,surname,age_in_#years,eighteencharacterseighteencharacterseighteencharacterseighteencharacterseighteencharacterseighteencharacterseighteencharacterseighteencharacters From 1ab0329a0727959da92907b0f3d6f07df2e5440e Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 09:45:12 -0700 Subject: [PATCH 04/13] generate_schema.py: use named parameters to avoid bug #43 in the future --- bigquery_schema_generator/generate_schema.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py index 593b069..f76301d 100755 --- a/bigquery_schema_generator/generate_schema.py +++ b/bigquery_schema_generator/generate_schema.py @@ -618,11 +618,13 @@ def is_string_type(thetype): ] -def flatten_schema_map(schema_map, - keep_nulls=False, - sorted_schema=True, - infer_mode=False, - sanitize_names=False): +def flatten_schema_map( + schema_map, + keep_nulls=False, + sorted_schema=True, + infer_mode=False, + sanitize_names=False, +): """Converts the 'schema_map' into a more flatten version which is compatible with BigQuery schema. @@ -679,7 +681,12 @@ def flatten_schema_map(schema_map, else: # Recursively flatten the sub-fields of a RECORD entry. new_value = flatten_schema_map( - value, keep_nulls, sorted_schema, infer_mode, sanitize_names) + schema_map=value, + keep_nulls=keep_nulls, + sorted_schema=sorted_schema, + infer_mode=infer_mode, + sanitize_names=sanitize_names, + ) elif key == 'type' and value in ['QINTEGER', 'QFLOAT', 'QBOOLEAN']: new_value = value[1:] elif key == 'mode': From 6385840c979bf7e1199bdb5f7fa50269191907a5 Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 09:58:15 -0700 Subject: [PATCH 05/13] generate_schema.py: prettify some complex OrderedDict() lines --- bigquery_schema_generator/generate_schema.py | 90 +++++++++++--------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py index f76301d..b5114ce 100755 --- a/bigquery_schema_generator/generate_schema.py +++ b/bigquery_schema_generator/generate_schema.py @@ -114,8 +114,8 @@ def __init__(self, # This option generally wants to be turned on as any inferred schema # will not be accepted by `bq load` when it contains illegal characters. - # Characters such as #, / or -. Neither will it be accepted if the column name - # in the schema is larger than 128 characters. + # Characters such as #, / or -. Neither will it be accepted if the + # column name in the schema is larger than 128 characters. self.sanitize_names = sanitize_names def log_error(self, msg): @@ -323,7 +323,6 @@ def get_schema_entry(self, key, value): if not value_mode or not value_type: return None - # yapf: disable if value_type == 'RECORD': # recursively figure out the RECORD fields = OrderedDict() @@ -332,39 +331,48 @@ def get_schema_entry(self, key, value): else: for val in value: self.deduce_schema_for_line(val, fields) - schema_entry = OrderedDict([('status', 'hard'), - ('filled', True), - ('info', OrderedDict([ - ('fields', fields), - ('mode', value_mode), - ('name', key), - ('type', value_type), - ]))]) + # yapf: disable + schema_entry = OrderedDict([ + ('status', 'hard'), + ('filled', True), + ('info', OrderedDict([ + ('fields', fields), + ('mode', value_mode), + ('name', key), + ('type', value_type), + ])), + ]) elif value_type == '__null__': - schema_entry = OrderedDict([('status', 'soft'), - ('filled', False), - ('info', OrderedDict([ - ('mode', 'NULLABLE'), - ('name', key), - ('type', 'STRING'), - ]))]) + schema_entry = OrderedDict([ + ('status', 'soft'), + ('filled', False), + ('info', OrderedDict([ + ('mode', 'NULLABLE'), + ('name', key), + ('type', 'STRING'), + ])), + ]) elif value_type == '__empty_array__': - schema_entry = OrderedDict([('status', 'soft'), - ('filled', False), - ('info', OrderedDict([ - ('mode', 'REPEATED'), - ('name', key), - ('type', 'STRING'), - ]))]) + schema_entry = OrderedDict([ + ('status', 'soft'), + ('filled', False), + ('info', OrderedDict([ + ('mode', 'REPEATED'), + ('name', key), + ('type', 'STRING'), + ])), + ]) elif value_type == '__empty_record__': - schema_entry = OrderedDict([('status', 'soft'), - ('filled', False), - ('info', OrderedDict([ - ('fields', OrderedDict()), - ('mode', value_mode), - ('name', key), - ('type', 'RECORD'), - ]))]) + schema_entry = OrderedDict( + [('status', 'soft'), + ('filled', False), + ('info', OrderedDict([ + ('fields', OrderedDict()), + ('mode', value_mode), + ('name', key), + ('type', 'RECORD'), + ])), + ]) else: # Empty fields are returned as empty strings, and must be treated as # a (soft String) to allow clobbering by subsquent non-empty fields. @@ -374,13 +382,15 @@ def get_schema_entry(self, key, value): else: status = 'hard' filled = True - schema_entry = OrderedDict([('status', status), - ('filled', filled), - ('info', OrderedDict([ - ('mode', value_mode), - ('name', key), - ('type', value_type), - ]))]) + schema_entry = OrderedDict([ + ('status', status), + ('filled', filled), + ('info', OrderedDict([ + ('mode', value_mode), + ('name', key), + ('type', value_type), + ])), + ]) # yapf: enable return schema_entry From 4207d0f97234550e8c7cd3e2e76f9df4d272e976 Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 10:08:29 -0700 Subject: [PATCH 06/13] generate_schema.py: Precompile the regexp which sanitizes the field name --- bigquery_schema_generator/generate_schema.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py index b5114ce..92d45b9 100755 --- a/bigquery_schema_generator/generate_schema.py +++ b/bigquery_schema_generator/generate_schema.py @@ -73,6 +73,9 @@ class SchemaGenerator: # Detect floats inside quotes. FLOAT_MATCHER = re.compile(r'^[-]?\d+\.\d+$') + # Valid field name characters of BigQuery + FIELD_NAME_MATCHER = re.compile(r'[^a-zA-Z0-9_]') + def __init__(self, input_format='json', infer_mode=False, @@ -698,6 +701,7 @@ def flatten_schema_map( sanitize_names=sanitize_names, ) elif key == 'type' and value in ['QINTEGER', 'QFLOAT', 'QBOOLEAN']: + # Convert QINTEGER -> INTEGER, similarly for QFLAT and QBOOLEAN. new_value = value[1:] elif key == 'mode': if infer_mode and value == 'NULLABLE' and filled: @@ -705,7 +709,9 @@ def flatten_schema_map( else: new_value = value elif key == 'name' and sanitize_names: - new_value = re.sub('[^a-zA-Z0-9_]', '_', value)[0:127] + new_value = SchemaGenerator.FIELD_NAME_MATCHER.sub( + '_', value, + )[0:127] else: new_value = value new_info[key] = new_value From ca6f48b4fb39bc3570d3454719e30094b5ffe3fc Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 10:32:43 -0700 Subject: [PATCH 07/13] Initial pythonpackage.yml Add flake8 and python -m unittest. --- .github/workflows/pythonpackage.yml | 42 +++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/pythonpackage.yml diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml new file mode 100644 index 0000000..57aa260 --- /dev/null +++ b/.github/workflows/pythonpackage.yml @@ -0,0 +1,42 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: BigQuery Schema Generator CI + +on: + push: + branches: [ develop ] + pull_request: + branches: [ develop ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.5, 3.6, 3.7, 3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + # pip install -r requirements.txt + - name: Lint with flake8 + run: | + pip install flake8 + # stop the build for most python errors + # W503 and W504 are contradictory, so we have to suppress one of them. + # E501 complains that 80 > 79 columns, but 80 is the default line wrap in vim. + flake8 . --count --ignore E501,W503 --show-source --statistics + # exit-zero treats all errors as warnings. Vim editor defaults to 80 + # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics + - name: Test with unittest + run: | + cd tests + python -m unittest From 257d9c6d347aad51ad4e1bee109fb110a168c3f1 Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 10:50:59 -0700 Subject: [PATCH 08/13] flake8: Fix errors and warnings from GitHub Action flake8 --- bigquery_schema_generator/generate_schema.py | 11 +++---- setup.py | 31 ++++++++++---------- tests/test_generate_schema.py | 22 +++++++++----- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py index 92d45b9..2752d59 100755 --- a/bigquery_schema_generator/generate_schema.py +++ b/bigquery_schema_generator/generate_schema.py @@ -366,8 +366,8 @@ def get_schema_entry(self, key, value): ])), ]) elif value_type == '__empty_record__': - schema_entry = OrderedDict( - [('status', 'soft'), + schema_entry = OrderedDict([ + ('status', 'soft'), ('filled', False), ('info', OrderedDict([ ('fields', OrderedDict()), @@ -448,8 +448,8 @@ def infer_value_type(self, value): # Implement the same type inference algorithm as 'bq load' for # quoted values that look like ints, floats or bools. if self.INTEGER_MATCHER.match(value): - if int(value) < self.INTEGER_MIN_VALUE or \ - self.INTEGER_MAX_VALUE < int(value): + if (int(value) < self.INTEGER_MIN_VALUE + or self.INTEGER_MAX_VALUE < int(value)): return 'QFLOAT' # quoted float else: return 'QINTEGER' # quoted integer @@ -662,7 +662,8 @@ def flatten_schema_map( else schema_map.items() for name, meta in map_items: # Skip over fields which have been explicitly removed - if not meta: continue + if not meta: + continue status = meta['status'] filled = meta['filled'] diff --git a/setup.py b/setup.py index c59e42c..7daae07 100644 --- a/setup.py +++ b/setup.py @@ -4,28 +4,29 @@ try: import pypandoc long_description = pypandoc.convert('README.md', 'rst', format='md') -except: +except: # noqa: E722 # If unable to convert, try inserting the raw README.md file. try: with open('README.md', encoding="utf-8") as f: long_description = f.read() - except: + except: # noqa: E722 # If all else fails, use some reasonable string. long_description = 'BigQuery schema generator.' -setup(name='bigquery-schema-generator', - version='0.5.1', - description='BigQuery schema generator from JSON or CSV data', - long_description=long_description, - url='https://github.com/bxparks/bigquery-schema-generator', - author='Brian T. Park', - author_email='brian@xparks.net', - license='Apache 2.0', - packages=['bigquery_schema_generator'], - python_requires='~=3.5', - entry_points={ - 'console_scripts': [ +setup( + name='bigquery-schema-generator', + version='0.5.1', + description='BigQuery schema generator from JSON or CSV data', + long_description=long_description, + url='https://github.com/bxparks/bigquery-schema-generator', + author='Brian T. Park', + author_email='brian@xparks.net', + license='Apache 2.0', + packages=['bigquery_schema_generator'], + python_requires='~=3.5', + entry_points={ + 'console_scripts': [ 'generate-schema = bigquery_schema_generator.generate_schema:main' ] - } + }, ) diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py index 894da67..0b078e0 100755 --- a/tests/test_generate_schema.py +++ b/tests/test_generate_schema.py @@ -194,7 +194,7 @@ def test_infer_bigquery_type(self): generator.infer_bigquery_type(2.0)) # yapf: disable self.assertEqual(('NULLABLE', 'RECORD'), - generator.infer_bigquery_type({ 'a': 1, 'b': 2 })) + generator.infer_bigquery_type({'a': 1, 'b': 2})) # yapf: enable self.assertEqual(('NULLABLE', '__null__'), generator.infer_bigquery_type(None)) @@ -209,9 +209,12 @@ def test_infer_bigquery_type(self): self.assertEqual( ('REPEATED', 'DATE'), generator.infer_bigquery_type(['2018-02-08', '2018-02-09'])) - self.assertEqual(('REPEATED', 'TIMESTAMP'), - generator.infer_bigquery_type( - ['2018-02-08T12:34:56', '2018-02-08T12:34:56'])) + self.assertEqual( + ('REPEATED', 'TIMESTAMP'), + generator.infer_bigquery_type( + ['2018-02-08T12:34:56', '2018-02-08T12:34:56'], + ) + ) self.assertEqual(('REPEATED', 'STRING'), generator.infer_bigquery_type(['a', 'b', 'c'])) self.assertEqual(('REPEATED', 'BOOLEAN'), @@ -221,10 +224,13 @@ def test_infer_bigquery_type(self): self.assertEqual(('REPEATED', 'FLOAT'), generator.infer_bigquery_type([1.0, 2.0])) # yapf: disable - self.assertEqual(('REPEATED', 'RECORD'), - generator.infer_bigquery_type([ - { 'a': 1, 'b': 2 }, - { 'c': 3 }])) + self.assertEqual( + ('REPEATED', 'RECORD'), + generator.infer_bigquery_type([ + {'a': 1, 'b': 2}, + {'c': 3}, + ]) + ) # yapf: enable self.assertEqual(('REPEATED', '__empty_record__'), generator.infer_bigquery_type([{}])) From 2609dbe13ae5781475a6cf9df16bc044573fee7c Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 12:03:09 -0700 Subject: [PATCH 09/13] tests: Add relative import for data_reader; remove +x bit on test_*.py files; make tests runnable only from the top-level package --- .github/workflows/pythonpackage.yml | 17 +++++++++++------ Makefile | 4 ++++ tests/__init__.py | 0 tests/test_anonymize.py | 0 tests/test_generate_schema.py | 2 +- 5 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 Makefile create mode 100644 tests/__init__.py mode change 100755 => 100644 tests/test_anonymize.py mode change 100755 => 100644 tests/test_generate_schema.py diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 57aa260..459be7f 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -30,13 +30,18 @@ jobs: - name: Lint with flake8 run: | pip install flake8 - # stop the build for most python errors - # W503 and W504 are contradictory, so we have to suppress one of them. - # E501 complains that 80 > 79 columns, but 80 is the default line wrap in vim. + # Stop the build for most python errors. + # W503 and W504 are both enabled by default and contradictory, so we + # have to suppress one of them. + # E501 complains that 80 > 79 columns, but 80 is the default line wrap + # in vim. flake8 . --count --ignore E501,W503 --show-source --statistics - # exit-zero treats all errors as warnings. Vim editor defaults to 80 - # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics + + # Exit-zero treats all errors as warnings. Vim editor defaults to 80. + # The complexity warning is not useful... in fact the whole thing is + # not useful, so turn it off. + # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 + # --statistics - name: Test with unittest run: | - cd tests python -m unittest diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7d8eff1 --- /dev/null +++ b/Makefile @@ -0,0 +1,4 @@ +.PHONY: tests + +tests: + python3 -m unittest diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_anonymize.py b/tests/test_anonymize.py old mode 100755 new mode 100644 diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py old mode 100755 new mode 100644 index 0b078e0..76add10 --- a/tests/test_generate_schema.py +++ b/tests/test_generate_schema.py @@ -22,7 +22,7 @@ from bigquery_schema_generator.generate_schema import SchemaGenerator from bigquery_schema_generator.generate_schema import is_string_type from bigquery_schema_generator.generate_schema import convert_type -from data_reader import DataReader +from .data_reader import DataReader class TestSchemaGenerator(unittest.TestCase): From 5a9668ae8bad9a33c61535e719426e3f92387418 Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 12:06:02 -0700 Subject: [PATCH 10/13] github actions: Remove Python 3.5 from the test matrix; it does not support f-strings --- .github/workflows/pythonpackage.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 459be7f..efd3710 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -15,7 +15,8 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7, 3.8] + # 3.5 does not support f-strings + python-version: [3.6, 3.7, 3.8] steps: - uses: actions/checkout@v2 From c5147bb1b9abb7bf32aea347a04d63a2052f5415 Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 12:10:00 -0700 Subject: [PATCH 11/13] tests/README.md: Update instructions for running tests; got all mixed up with Python's confusing rules re: relative imports --- tests/README.md | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/tests/README.md b/tests/README.md index c453aef..a128825 100644 --- a/tests/README.md +++ b/tests/README.md @@ -9,10 +9,25 @@ file which is parsed by the unit test program. This has two advantages: * we can more easily update the input and output data records, and * the `testdata.txt` data can be reused for versions written in other languages -The output of `test_generate_schema.py` should look something like this: +## Running the Tests + +The tests should be run from the top-level package: + +``` +$ cd .../bigquery-schema-generator + +$ make tests + +# OR + +$ python3 -m unittest +``` + +## Test Output + +The output of `test_generate_schema.py` will look something like this: ``` -$ ./test_generate_schema.py ---------------------------------------------------------------------- Ran 4 tests in 0.002s @@ -31,24 +46,3 @@ Test chunk 11: First record: { "i": [1, 2] } Test chunk 12: First record: { "r" : { "i": 3 } } Test chunk 13: First record: { "r" : [{ "i": 4 }] } ``` - -## Unit Test for anonymize.py - -The unit test for `anonymize.py` should look like this: -``` -$ ./test_anonymize.py -. ----------------------------------------------------------------------- -Ran 1 test in 0.000s - -OK -``` - -## Running All Tests - -Use the -[discovery mode](https://docs.python.org/3/library/unittest.html) -for `unittest` which runs all tests with the `test_` prefix: -``` -$ python3 -m unittest -``` From fc3203227252939f26c6b1b3454479a18cdecdd9 Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 12:22:26 -0700 Subject: [PATCH 12/13] README.md, CHANGELOG.md: Update Python version to >= 3.6; update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ README.md | 12 ++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cbcbba..f499ba8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Changelog * Unreleased + * Fix `--sanitize_names` for recursive RECORD fields (Thanks riccardomc, + see #43). + * Clean up how unit tests are run, trying my best to figure out + Python's convolution package importing mechanism. + * Add GitHub Actions continuous integration pipelines with flake8 checks and + automated unit testing. * 0.5.1 (2019-06-17) * Add `--sanitize_names` to convert invalid characters in column names and to shorten them if too long. (See #33; thanks @jonwarghed). diff --git a/README.md b/README.md index 1a2a624..f51cd85 100644 --- a/README.md +++ b/README.md @@ -659,16 +659,20 @@ took 67s on a Dell Precision M4700 laptop with an Intel Core i7-3840QM CPU @ ## System Requirements -This project was initially developed on Ubuntu 17.04 using Python 3.5.3. I have -tested it on: +This project was initially developed on Ubuntu 17.04 using Python 3.5.3, but it +now requires Python 3.6 or higher, I think mostly due to the use of f-strings. +I have tested it on: + +* Ubuntu 18.04, Python 3.7.7 * Ubuntu 18.04, Python 3.6.7 * Ubuntu 17.10, Python 3.6.3 -* Ubuntu 17.04, Python 3.5.3 -* Ubuntu 16.04, Python 3.5.2 * MacOS 10.14.2, [Python 3.6.4](https://www.python.org/downloads/release/python-364/) * MacOS 10.13.2, [Python 3.6.4](https://www.python.org/downloads/release/python-364/) +The GitHub Actions continuous integration pipeline validates on Python 3.6, 3.7 +and 3.8. + ## Changelog See [CHANGELOG.md](CHANGELOG.md). From 3bf559dbe9e7c55dee94ef6fbbc2845409326a64 Mon Sep 17 00:00:00 2001 From: Brian Park Date: Sat, 4 Apr 2020 12:26:41 -0700 Subject: [PATCH 13/13] Bump version to 1.0 --- CHANGELOG.md | 5 +++-- README.md | 2 +- setup.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f499ba8..5e49473 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ # Changelog * Unreleased - * Fix `--sanitize_names` for recursive RECORD fields (Thanks riccardomc, +* 1.0 (2020-04-04) + * Fix `--sanitize_names` for recursive RECORD fields (Thanks riccardomc@, see #43). * Clean up how unit tests are run, trying my best to figure out Python's convolution package importing mechanism. @@ -9,7 +10,7 @@ automated unit testing. * 0.5.1 (2019-06-17) * Add `--sanitize_names` to convert invalid characters in column names and - to shorten them if too long. (See #33; thanks @jonwarghed). + to shorten them if too long. (See #33; thanks jonwarghed@). * 0.5 (2019-06-06) * Add input and output parameters to run() to allow the client code using `SchemaGenerator` to redirect the input and output files. (See #30). diff --git a/README.md b/README.md index f51cd85..774cefb 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ $ generate-schema < file.data.json > file.schema.json $ generate-schema --input_format csv < file.data.csv > file.schema.json ``` -Version: 0.5.1 (2019-06-17) +Version: 1.0 (2020-04-04) ## Background diff --git a/setup.py b/setup.py index 7daae07..d600d0c 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ setup( name='bigquery-schema-generator', - version='0.5.1', + version='1.0', description='BigQuery schema generator from JSON or CSV data', long_description=long_description, url='https://github.com/bxparks/bigquery-schema-generator', @@ -23,7 +23,7 @@ author_email='brian@xparks.net', license='Apache 2.0', packages=['bigquery_schema_generator'], - python_requires='~=3.5', + python_requires='~=3.6', entry_points={ 'console_scripts': [ 'generate-schema = bigquery_schema_generator.generate_schema:main'