From 702cc9ebfd80c4bcbce8fa534b43173e289b2c25 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Mon, 17 Jun 2019 08:08:08 -0700
Subject: [PATCH 01/13] README.md: Fix date typo, should be 2019-06-17 not
 2017-06-19

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6689888..4932607 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ $ generate-schema < file.data.json > file.schema.json
 $ generate-schema --input_format csv < file.data.csv > file.schema.json
 ```
 
-Version: 0.5.1 (2019-06-19)
+Version: 0.5.1 (2019-06-17)
 
 ## Background
 

From fc3293b50d6dfd06cdb12605c6247debb206a75d Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Mon, 9 Sep 2019 13:56:24 -0700
Subject: [PATCH 02/13] README.md: Discourage running pip3 as root whening
 installing (#38)

---
 README.md | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 4932607..1a2a624 100644
--- a/README.md
+++ b/README.md
@@ -44,18 +44,33 @@ the input dataset.
 
 ## Installation
 
-Install from [PyPI](https://pypi.python.org/pypi) repository using `pip3`.
-If you want to install the package for your entire system globally, use
+Install from [PyPI](https://pypi.python.org/pypi) repository using `pip3`. There
+are too many ways to install packages in Python. The following are in order
+highest to lowest recommendation:
+
+1) If you are using a virtual environment (such as
+[venv](https://docs.python.org/3/library/venv.html)), then use:
 ```
-$ sudo -H pip3 install bigquery_schema_generator
+$ pip3 install bigquery_schema_generator
 ```
-If you are using a virtual environment (such as
-[venv](https://docs.python.org/3/library/venv.html)), then you don't need
-the `sudo` coommand, and you can just type:
+
+2) If you aren't using a virtual environment you can install into
+your local Python directory:
+
 ```
-$ pip3 install bigquery_schema_generator
+$ pip3 install --user bigquery_schema_generator
 ```
 
+3) If you want to install the package for your entire system globally, use
+```
+$ sudo -H pip3 install bigquery_schema_generator
+```
+but realize that you will be running code from PyPI as `root` so this has
+security implications.
+
+Sometimes, your Python environment gets into a complete mess and the `pip3`
+command won't work. Try typing `python3 -m pip` instead.
+
 A successful install should print out something like the following (the version
 number may be different):
 ```

From 8e29d06e054820e888e7f834ced081bbebc4910c Mon Sep 17 00:00:00 2001
From: "Riccardo M. Cefala" <riccardo.cefala@container-solutions.com>
Date: Sat, 4 Apr 2020 14:48:48 +0200
Subject: [PATCH 03/13] fix recursive call in flatten_schema_map and add
 relative test

---
 bigquery_schema_generator/generate_schema.py |  2 +-
 tests/testdata.txt                           | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index 8f1b0c9..593b069 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -679,7 +679,7 @@ def flatten_schema_map(schema_map,
                 else:
                     # Recursively flatten the sub-fields of a RECORD entry.
                     new_value = flatten_schema_map(
-                        value, keep_nulls, sorted_schema, sanitize_names)
+                        value, keep_nulls, sorted_schema, infer_mode, sanitize_names)
             elif key == 'type' and value in ['QINTEGER', 'QFLOAT', 'QBOOLEAN']:
                 new_value = value[1:]
             elif key == 'mode':
diff --git a/tests/testdata.txt b/tests/testdata.txt
index 8053bc3..45d5551 100644
--- a/tests/testdata.txt
+++ b/tests/testdata.txt
@@ -873,6 +873,26 @@ SCHEMA
 ]
 END
 
+# Sanitize the names to comply with BigQuery recursively.
+DATA sanitize_names
+{ "r" : { "a-name": [1, 2] } }
+SCHEMA
+[
+  {
+    "fields": [
+      {
+        "mode": "REPEATED",
+        "name": "a_name",
+        "type": "INTEGER"
+      }
+    ],
+    "mode": "NULLABLE",
+    "name": "r",
+    "type": "RECORD"
+  }
+]
+END
+
 # Sanitize the names to comply with BigQuery.
 DATA csv infer_mode sanitize_names
 name,surname,age_in_#years,eighteencharacterseighteencharacterseighteencharacterseighteencharacterseighteencharacterseighteencharacterseighteencharacterseighteencharacters

From 1ab0329a0727959da92907b0f3d6f07df2e5440e Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 09:45:12 -0700
Subject: [PATCH 04/13] generate_schema.py: use named parameters to avoid bug
 #43 in the future

---
 bigquery_schema_generator/generate_schema.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index 593b069..f76301d 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -618,11 +618,13 @@ def is_string_type(thetype):
     ]
 
 
-def flatten_schema_map(schema_map,
-                       keep_nulls=False,
-                       sorted_schema=True,
-                       infer_mode=False,
-                       sanitize_names=False):
+def flatten_schema_map(
+        schema_map,
+        keep_nulls=False,
+        sorted_schema=True,
+        infer_mode=False,
+        sanitize_names=False,
+):
     """Converts the 'schema_map' into a more flatten version which is
     compatible with BigQuery schema.
 
@@ -679,7 +681,12 @@ def flatten_schema_map(schema_map,
                 else:
                     # Recursively flatten the sub-fields of a RECORD entry.
                     new_value = flatten_schema_map(
-                        value, keep_nulls, sorted_schema, infer_mode, sanitize_names)
+                        schema_map=value,
+                        keep_nulls=keep_nulls,
+                        sorted_schema=sorted_schema,
+                        infer_mode=infer_mode,
+                        sanitize_names=sanitize_names,
+                    )
             elif key == 'type' and value in ['QINTEGER', 'QFLOAT', 'QBOOLEAN']:
                 new_value = value[1:]
             elif key == 'mode':

From 6385840c979bf7e1199bdb5f7fa50269191907a5 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 09:58:15 -0700
Subject: [PATCH 05/13] generate_schema.py: prettify some complex OrderedDict()
 lines

---
 bigquery_schema_generator/generate_schema.py | 90 +++++++++++---------
 1 file changed, 50 insertions(+), 40 deletions(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index f76301d..b5114ce 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -114,8 +114,8 @@ def __init__(self,
 
         # This option generally wants to be turned on as any inferred schema
         # will not be accepted by `bq load` when it contains illegal characters.
-        # Characters such as #, / or -. Neither will it be accepted if the column name
-        # in the schema is larger than 128 characters.
+        # Characters such as #, / or -. Neither will it be accepted if the
+        # column name in the schema is larger than 128 characters.
         self.sanitize_names = sanitize_names
 
     def log_error(self, msg):
@@ -323,7 +323,6 @@ def get_schema_entry(self, key, value):
         if not value_mode or not value_type:
             return None
 
-        # yapf: disable
         if value_type == 'RECORD':
             # recursively figure out the RECORD
             fields = OrderedDict()
@@ -332,39 +331,48 @@ def get_schema_entry(self, key, value):
             else:
                 for val in value:
                     self.deduce_schema_for_line(val, fields)
-            schema_entry = OrderedDict([('status', 'hard'),
-                                        ('filled', True),
-                                        ('info', OrderedDict([
-                                            ('fields', fields),
-                                            ('mode', value_mode),
-                                            ('name', key),
-                                            ('type', value_type),
-                                        ]))])
+            # yapf: disable
+            schema_entry = OrderedDict([
+                ('status', 'hard'),
+                ('filled', True),
+                ('info', OrderedDict([
+                    ('fields', fields),
+                    ('mode', value_mode),
+                    ('name', key),
+                    ('type', value_type),
+                ])),
+            ])
         elif value_type == '__null__':
-            schema_entry = OrderedDict([('status', 'soft'),
-                                        ('filled', False),
-                                        ('info', OrderedDict([
-                                            ('mode', 'NULLABLE'),
-                                            ('name', key),
-                                            ('type', 'STRING'),
-                                        ]))])
+            schema_entry = OrderedDict([
+                ('status', 'soft'),
+                ('filled', False),
+                ('info', OrderedDict([
+                    ('mode', 'NULLABLE'),
+                    ('name', key),
+                    ('type', 'STRING'),
+                ])),
+            ])
         elif value_type == '__empty_array__':
-            schema_entry = OrderedDict([('status', 'soft'),
-                                        ('filled', False),
-                                        ('info', OrderedDict([
-                                            ('mode', 'REPEATED'),
-                                            ('name', key),
-                                            ('type', 'STRING'),
-                                        ]))])
+            schema_entry = OrderedDict([
+                ('status', 'soft'),
+                ('filled', False),
+                ('info', OrderedDict([
+                    ('mode', 'REPEATED'),
+                    ('name', key),
+                    ('type', 'STRING'),
+                ])),
+            ])
         elif value_type == '__empty_record__':
-            schema_entry = OrderedDict([('status', 'soft'),
-                                        ('filled', False),
-                                        ('info', OrderedDict([
-                                            ('fields', OrderedDict()),
-                                            ('mode', value_mode),
-                                            ('name', key),
-                                            ('type', 'RECORD'),
-                                        ]))])
+            schema_entry = OrderedDict(
+                [('status', 'soft'),
+                ('filled', False),
+                ('info', OrderedDict([
+                    ('fields', OrderedDict()),
+                    ('mode', value_mode),
+                    ('name', key),
+                    ('type', 'RECORD'),
+                ])),
+            ])
         else:
             # Empty fields are returned as empty strings, and must be treated as
             # a (soft String) to allow clobbering by subsquent non-empty fields.
@@ -374,13 +382,15 @@ def get_schema_entry(self, key, value):
             else:
                 status = 'hard'
                 filled = True
-            schema_entry = OrderedDict([('status', status),
-                                        ('filled', filled),
-                                        ('info', OrderedDict([
-                                            ('mode', value_mode),
-                                            ('name', key),
-                                            ('type', value_type),
-                                        ]))])
+            schema_entry = OrderedDict([
+                ('status', status),
+                ('filled', filled),
+                ('info', OrderedDict([
+                    ('mode', value_mode),
+                    ('name', key),
+                    ('type', value_type),
+                ])),
+            ])
         # yapf: enable
         return schema_entry
 

From 4207d0f97234550e8c7cd3e2e76f9df4d272e976 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 10:08:29 -0700
Subject: [PATCH 06/13] generate_schema.py: Precompile the regexp which
 sanitizes the field name

---
 bigquery_schema_generator/generate_schema.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index b5114ce..92d45b9 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -73,6 +73,9 @@ class SchemaGenerator:
     # Detect floats inside quotes.
     FLOAT_MATCHER = re.compile(r'^[-]?\d+\.\d+$')
 
+    # Valid field name characters of BigQuery
+    FIELD_NAME_MATCHER = re.compile(r'[^a-zA-Z0-9_]')
+
     def __init__(self,
                  input_format='json',
                  infer_mode=False,
@@ -698,6 +701,7 @@ def flatten_schema_map(
                         sanitize_names=sanitize_names,
                     )
             elif key == 'type' and value in ['QINTEGER', 'QFLOAT', 'QBOOLEAN']:
+                # Convert QINTEGER -> INTEGER, similarly for QFLAT and QBOOLEAN.
                 new_value = value[1:]
             elif key == 'mode':
                 if infer_mode and value == 'NULLABLE' and filled:
@@ -705,7 +709,9 @@ def flatten_schema_map(
                 else:
                     new_value = value
             elif key == 'name' and sanitize_names:
-                new_value = re.sub('[^a-zA-Z0-9_]', '_', value)[0:127]
+                new_value = SchemaGenerator.FIELD_NAME_MATCHER.sub(
+                    '_', value,
+                )[0:127]
             else:
                 new_value = value
             new_info[key] = new_value

From ca6f48b4fb39bc3570d3454719e30094b5ffe3fc Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 10:32:43 -0700
Subject: [PATCH 07/13] Initial pythonpackage.yml

Add flake8 and python -m unittest.
---
 .github/workflows/pythonpackage.yml | 42 +++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 .github/workflows/pythonpackage.yml

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
new file mode 100644
index 0000000..57aa260
--- /dev/null
+++ b/.github/workflows/pythonpackage.yml
@@ -0,0 +1,42 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: BigQuery Schema Generator CI 
+
+on:
+  push:
+    branches: [ develop ]
+  pull_request:
+    branches: [ develop ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy: 
+      matrix:
+        python-version: [3.5, 3.6, 3.7, 3.8]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        # pip install -r requirements.txt
+    - name: Lint with flake8
+      run: |
+        pip install flake8
+        # stop the build for most python errors
+        # W503 and W504 are contradictory, so we have to suppress one of them.
+        # E501 complains that 80 > 79 columns, but 80 is the default line wrap in vim.
+        flake8 . --count --ignore E501,W503 --show-source --statistics
+        # exit-zero treats all errors as warnings. Vim editor defaults to 80
+        # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics
+    - name: Test with unittest
+      run: |
+        cd tests
+        python -m unittest

From 257d9c6d347aad51ad4e1bee109fb110a168c3f1 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 10:50:59 -0700
Subject: [PATCH 08/13] flake8: Fix errors and warnings from GitHub Action
 flake8

---
 bigquery_schema_generator/generate_schema.py | 11 +++----
 setup.py                                     | 31 ++++++++++----------
 tests/test_generate_schema.py                | 22 +++++++++-----
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index 92d45b9..2752d59 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -366,8 +366,8 @@ def get_schema_entry(self, key, value):
                 ])),
             ])
         elif value_type == '__empty_record__':
-            schema_entry = OrderedDict(
-                [('status', 'soft'),
+            schema_entry = OrderedDict([
+                ('status', 'soft'),
                 ('filled', False),
                 ('info', OrderedDict([
                     ('fields', OrderedDict()),
@@ -448,8 +448,8 @@ def infer_value_type(self, value):
                 # Implement the same type inference algorithm as 'bq load' for
                 # quoted values that look like ints, floats or bools.
                 if self.INTEGER_MATCHER.match(value):
-                    if int(value) < self.INTEGER_MIN_VALUE or \
-                        self.INTEGER_MAX_VALUE < int(value):
+                    if (int(value) < self.INTEGER_MIN_VALUE
+                            or self.INTEGER_MAX_VALUE < int(value)):
                         return 'QFLOAT'  # quoted float
                     else:
                         return 'QINTEGER'  # quoted integer
@@ -662,7 +662,8 @@ def flatten_schema_map(
         else schema_map.items()
     for name, meta in map_items:
         # Skip over fields which have been explicitly removed
-        if not meta: continue
+        if not meta:
+            continue
 
         status = meta['status']
         filled = meta['filled']
diff --git a/setup.py b/setup.py
index c59e42c..7daae07 100644
--- a/setup.py
+++ b/setup.py
@@ -4,28 +4,29 @@
 try:
     import pypandoc
     long_description = pypandoc.convert('README.md', 'rst', format='md')
-except:
+except:  # noqa: E722
     # If unable to convert, try inserting the raw README.md file.
     try:
         with open('README.md', encoding="utf-8") as f:
             long_description = f.read()
-    except:
+    except:  # noqa: E722
         # If all else fails, use some reasonable string.
         long_description = 'BigQuery schema generator.'
 
-setup(name='bigquery-schema-generator',
-      version='0.5.1',
-      description='BigQuery schema generator from JSON or CSV data',
-      long_description=long_description,
-      url='https://github.com/bxparks/bigquery-schema-generator',
-      author='Brian T. Park',
-      author_email='brian@xparks.net',
-      license='Apache 2.0',
-      packages=['bigquery_schema_generator'],
-      python_requires='~=3.5',
-      entry_points={
-          'console_scripts': [
+setup(
+    name='bigquery-schema-generator',
+    version='0.5.1',
+    description='BigQuery schema generator from JSON or CSV data',
+    long_description=long_description,
+    url='https://github.com/bxparks/bigquery-schema-generator',
+    author='Brian T. Park',
+    author_email='brian@xparks.net',
+    license='Apache 2.0',
+    packages=['bigquery_schema_generator'],
+    python_requires='~=3.5',
+    entry_points={
+        'console_scripts': [
             'generate-schema = bigquery_schema_generator.generate_schema:main'
         ]
-      }
+    },
 )
diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py
index 894da67..0b078e0 100755
--- a/tests/test_generate_schema.py
+++ b/tests/test_generate_schema.py
@@ -194,7 +194,7 @@ def test_infer_bigquery_type(self):
                          generator.infer_bigquery_type(2.0))
         # yapf: disable
         self.assertEqual(('NULLABLE', 'RECORD'),
-                         generator.infer_bigquery_type({ 'a': 1, 'b': 2 }))
+                         generator.infer_bigquery_type({'a': 1, 'b': 2}))
         # yapf: enable
         self.assertEqual(('NULLABLE', '__null__'),
                          generator.infer_bigquery_type(None))
@@ -209,9 +209,12 @@ def test_infer_bigquery_type(self):
         self.assertEqual(
             ('REPEATED', 'DATE'),
             generator.infer_bigquery_type(['2018-02-08', '2018-02-09']))
-        self.assertEqual(('REPEATED', 'TIMESTAMP'),
-                         generator.infer_bigquery_type(
-                             ['2018-02-08T12:34:56', '2018-02-08T12:34:56']))
+        self.assertEqual(
+            ('REPEATED', 'TIMESTAMP'),
+            generator.infer_bigquery_type(
+                ['2018-02-08T12:34:56', '2018-02-08T12:34:56'],
+            )
+        )
         self.assertEqual(('REPEATED', 'STRING'),
                          generator.infer_bigquery_type(['a', 'b', 'c']))
         self.assertEqual(('REPEATED', 'BOOLEAN'),
@@ -221,10 +224,13 @@ def test_infer_bigquery_type(self):
         self.assertEqual(('REPEATED', 'FLOAT'),
                          generator.infer_bigquery_type([1.0, 2.0]))
         # yapf: disable
-        self.assertEqual(('REPEATED', 'RECORD'),
-                         generator.infer_bigquery_type([
-                            { 'a': 1, 'b': 2 },
-                            { 'c': 3 }]))
+        self.assertEqual(
+            ('REPEATED', 'RECORD'),
+            generator.infer_bigquery_type([
+                {'a': 1, 'b': 2},
+                {'c': 3},
+            ])
+        )
         # yapf: enable
         self.assertEqual(('REPEATED', '__empty_record__'),
                          generator.infer_bigquery_type([{}]))

From 2609dbe13ae5781475a6cf9df16bc044573fee7c Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 12:03:09 -0700
Subject: [PATCH 09/13] tests: Add relative import for data_reader; remove +x
 bit on test_*.py files; make tests runnable only from the top-level package

---
 .github/workflows/pythonpackage.yml | 17 +++++++++++------
 Makefile                            |  4 ++++
 tests/__init__.py                   |  0
 tests/test_anonymize.py             |  0
 tests/test_generate_schema.py       |  2 +-
 5 files changed, 16 insertions(+), 7 deletions(-)
 create mode 100644 Makefile
 create mode 100644 tests/__init__.py
 mode change 100755 => 100644 tests/test_anonymize.py
 mode change 100755 => 100644 tests/test_generate_schema.py

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 57aa260..459be7f 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -30,13 +30,18 @@ jobs:
     - name: Lint with flake8
       run: |
         pip install flake8
-        # stop the build for most python errors
-        # W503 and W504 are contradictory, so we have to suppress one of them.
-        # E501 complains that 80 > 79 columns, but 80 is the default line wrap in vim.
+        # Stop the build for most python errors.
+        # W503 and W504 are both enabled by default and contradictory, so we
+        # have to suppress one of them.
+        # E501 complains that 80 > 79 columns, but 80 is the default line wrap
+        # in vim.
         flake8 . --count --ignore E501,W503 --show-source --statistics
-        # exit-zero treats all errors as warnings. Vim editor defaults to 80
-        # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics
+
+        # Exit-zero treats all errors as warnings. Vim editor defaults to 80.
+        # The complexity warning is not useful... in fact the whole thing is
+        # not useful, so turn it off.
+        # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80
+        # --statistics
     - name: Test with unittest
       run: |
-        cd tests
         python -m unittest
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..7d8eff1
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,4 @@
+.PHONY: tests
+
+tests:
+	python3 -m unittest
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_anonymize.py b/tests/test_anonymize.py
old mode 100755
new mode 100644
diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py
old mode 100755
new mode 100644
index 0b078e0..76add10
--- a/tests/test_generate_schema.py
+++ b/tests/test_generate_schema.py
@@ -22,7 +22,7 @@
 from bigquery_schema_generator.generate_schema import SchemaGenerator
 from bigquery_schema_generator.generate_schema import is_string_type
 from bigquery_schema_generator.generate_schema import convert_type
-from data_reader import DataReader
+from .data_reader import DataReader
 
 
 class TestSchemaGenerator(unittest.TestCase):

From 5a9668ae8bad9a33c61535e719426e3f92387418 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 12:06:02 -0700
Subject: [PATCH 10/13] github actions: Remove Python 3.5 from the test matrix;
 it does not support f-strings

---
 .github/workflows/pythonpackage.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 459be7f..efd3710 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -15,7 +15,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy: 
       matrix:
-        python-version: [3.5, 3.6, 3.7, 3.8]
+        # 3.5 does not support f-strings
+        python-version: [3.6, 3.7, 3.8]
 
     steps:
     - uses: actions/checkout@v2

From c5147bb1b9abb7bf32aea347a04d63a2052f5415 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 12:10:00 -0700
Subject: [PATCH 11/13] tests/README.md: Update instructions for running tests;
 got all mixed up with Python's confusing rules re: relative imports

---
 tests/README.md | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/tests/README.md b/tests/README.md
index c453aef..a128825 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -9,10 +9,25 @@ file which is parsed by the unit test program.  This has two advantages:
 * we can more easily update the input and output data records, and 
 * the `testdata.txt` data can be reused for versions written in other languages
 
-The output of `test_generate_schema.py` should look something like this:
+## Running the Tests
+
+The tests should be run from the top-level package:
+
+```
+$ cd .../bigquery-schema-generator
+
+$ make tests
+
+# OR
+
+$ python3 -m unittest
+```
+
+## Test Output
+
+The output of `test_generate_schema.py` will look something like this:
 
 ```
-$ ./test_generate_schema.py
 ----------------------------------------------------------------------
 Ran 4 tests in 0.002s
 
@@ -31,24 +46,3 @@ Test chunk 11: First record: { "i": [1, 2] }
 Test chunk 12: First record: { "r" : { "i": 3 } }
 Test chunk 13: First record: { "r" : [{ "i": 4 }] }
 ```
-
-## Unit Test for anonymize.py
-
-The unit test for `anonymize.py` should look like this:
-```
-$ ./test_anonymize.py
-.
-----------------------------------------------------------------------
-Ran 1 test in 0.000s
-
-OK
-```
-
-## Running All Tests
-
-Use the
-[discovery mode](https://docs.python.org/3/library/unittest.html)
-for `unittest` which runs all tests with the `test_` prefix:
-```
-$ python3 -m unittest
-```

From fc3203227252939f26c6b1b3454479a18cdecdd9 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 12:22:26 -0700
Subject: [PATCH 12/13] README.md, CHANGELOG.md: Update Python version to >=
 3.6; update CHANGELOG.md

---
 CHANGELOG.md |  6 ++++++
 README.md    | 12 ++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0cbcbba..f499ba8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Changelog
 
 * Unreleased
+    * Fix `--sanitize_names` for recursive RECORD fields (Thanks riccardomc,
+      see #43).
+    * Clean up how unit tests are run, trying my best to figure out
+      Python's convolution package importing mechanism.
+    * Add GitHub Actions continuous integration pipelines with flake8 checks and
+      automated unit testing.
 * 0.5.1 (2019-06-17)
     * Add `--sanitize_names` to convert invalid characters in column names and
       to shorten them if too long. (See #33; thanks @jonwarghed).
diff --git a/README.md b/README.md
index 1a2a624..f51cd85 100644
--- a/README.md
+++ b/README.md
@@ -659,16 +659,20 @@ took 67s on a Dell Precision M4700 laptop with an Intel Core i7-3840QM CPU @
 
 ## System Requirements
 
-This project was initially developed on Ubuntu 17.04 using Python 3.5.3. I have
-tested it on:
+This project was initially developed on Ubuntu 17.04 using Python 3.5.3, but it
+now requires Python 3.6 or higher, I think mostly due to the use of f-strings.
 
+I have tested it on:
+
+* Ubuntu 18.04, Python 3.7.7
 * Ubuntu 18.04, Python 3.6.7
 * Ubuntu 17.10, Python 3.6.3
-* Ubuntu 17.04, Python 3.5.3
-* Ubuntu 16.04, Python 3.5.2
 * MacOS 10.14.2, [Python 3.6.4](https://www.python.org/downloads/release/python-364/)
 * MacOS 10.13.2, [Python 3.6.4](https://www.python.org/downloads/release/python-364/)
 
+The GitHub Actions continuous integration pipeline validates on Python 3.6, 3.7
+and 3.8.
+
 ## Changelog
 
 See [CHANGELOG.md](CHANGELOG.md).

From 3bf559dbe9e7c55dee94ef6fbbc2845409326a64 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Sat, 4 Apr 2020 12:26:41 -0700
Subject: [PATCH 13/13] Bump version to 1.0

---
 CHANGELOG.md | 5 +++--
 README.md    | 2 +-
 setup.py     | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f499ba8..5e49473 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,8 @@
 # Changelog
 
 * Unreleased
-    * Fix `--sanitize_names` for recursive RECORD fields (Thanks riccardomc,
+* 1.0 (2020-04-04)
+    * Fix `--sanitize_names` for recursive RECORD fields (Thanks riccardomc@,
       see #43).
     * Clean up how unit tests are run, trying my best to figure out
       Python's convolution package importing mechanism.
@@ -9,7 +10,7 @@
       automated unit testing.
 * 0.5.1 (2019-06-17)
     * Add `--sanitize_names` to convert invalid characters in column names and
-      to shorten them if too long. (See #33; thanks @jonwarghed).
+      to shorten them if too long. (See #33; thanks jonwarghed@).
 * 0.5 (2019-06-06)
     * Add input and output parameters to run() to allow the client code using
       `SchemaGenerator` to redirect the input and output files. (See #30).
diff --git a/README.md b/README.md
index f51cd85..774cefb 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ $ generate-schema < file.data.json > file.schema.json
 $ generate-schema --input_format csv < file.data.csv > file.schema.json
 ```
 
-Version: 0.5.1 (2019-06-17)
+Version: 1.0 (2020-04-04)
 
 ## Background
 
diff --git a/setup.py b/setup.py
index 7daae07..d600d0c 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
 
 setup(
     name='bigquery-schema-generator',
-    version='0.5.1',
+    version='1.0',
     description='BigQuery schema generator from JSON or CSV data',
     long_description=long_description,
     url='https://github.com/bxparks/bigquery-schema-generator',
@@ -23,7 +23,7 @@
     author_email='brian@xparks.net',
     license='Apache 2.0',
     packages=['bigquery_schema_generator'],
-    python_requires='~=3.5',
+    python_requires='~=3.6',
     entry_points={
         'console_scripts': [
             'generate-schema = bigquery_schema_generator.generate_schema:main'