From 93bb5e183c49764001358451156b53bf05ca1324 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:19:35 -0400 Subject: [PATCH 1/7] [edgetest] automated change (#308) Co-authored-by: fdosani --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ab77965..1f9f505 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ { name="Faisal Dosani", email="faisal.dosani@capitalone.com" }, ] license = {text = "Apache Software License"} -dependencies = ["boto3<=1.35.43,>=1.9.92", "PyYAML<=6.0.1,>=5.1", "pandas<=2.2.3,>=0.25.2", "numpy<=2.0.2,>=1.22.0", "polars>=0.20.0"] +dependencies = ["boto3<=1.35.48,>=1.9.92", "PyYAML<=6.0.1,>=5.1", "pandas<=2.2.3,>=0.25.2", "numpy<=2.0.2,>=1.22.0", "polars>=0.20.0"] requires-python = ">=3.9.0" classifiers = [ From 3aaf448e8afb89269afb8835286b98d71668ac3c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 12:03:19 -0600 Subject: [PATCH 2/7] [edgetest] automated change (#311) Co-authored-by: fdosani --- pyproject.toml | 2 +- requirements.txt | 25 ++++++++++--------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f9f505..0f7e292 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ { name="Faisal Dosani", email="faisal.dosani@capitalone.com" }, ] license = {text = "Apache Software License"} -dependencies = ["boto3<=1.35.48,>=1.9.92", "PyYAML<=6.0.1,>=5.1", "pandas<=2.2.3,>=0.25.2", "numpy<=2.0.2,>=1.22.0", "polars>=0.20.0"] +dependencies = ["boto3<=1.35.53,>=1.9.92", "PyYAML<=6.0.1,>=5.1", "pandas<=2.2.3,>=0.25.2", "numpy<=2.0.2,>=1.22.0", "polars>=0.20.0"] requires-python = ">=3.9.0" classifiers = [ diff --git a/requirements.txt b/requirements.txt index 3a26156..db34d19 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,8 @@ -# -# This file is autogenerated by pip-compile with Python 3.11 -# by the following command: -# -# pip-compile --output-file=requirements.txt pyproject.toml -# - -boto3==1.34.126 +# This file was autogenerated by uv via the following command: +# uv pip compile --output-file=requirements.txt pyproject.toml +boto3==1.35.53 # via locopy (pyproject.toml) -botocore==1.34.130 +botocore==1.35.53 # via # boto3 # s3transfer @@ -15,27 +10,27 @@ jmespath==1.0.1 # via # boto3 # botocore -numpy==1.26.4 +numpy==2.0.2 # via # locopy (pyproject.toml) # pandas -pandas==2.2.2 +pandas==2.2.3 # via locopy (pyproject.toml) -polars==1.6.0 +polars==1.12.0 # via locopy (pyproject.toml) python-dateutil==2.9.0.post0 # via # botocore # pandas -pytz==2024.1 +pytz==2024.2 # via pandas pyyaml==6.0.1 # via locopy (pyproject.toml) -s3transfer==0.10.1 +s3transfer==0.10.3 # via boto3 six==1.16.0 # via python-dateutil -tzdata==2024.1 +tzdata==2024.2 # via pandas urllib3==1.26.20 # via botocore From ee7f51b3c1f0870a078df3764c1d78219f515e14 Mon Sep 17 00:00:00 2001 From: Jacob Dawang Date: Fri, 1 Nov 2024 15:20:14 -0600 Subject: [PATCH 3/7] Update edgetest python version (#309) * Update edgetest python version * Add workflow_dispatch * Test action * Python version in right place * Update permissions * Revert PR trigger * Update edgetest.yml --- .github/workflows/edgetest.yml | 6 +++++- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/edgetest.yml b/.github/workflows/edgetest.yml index a690653..b8b4d94 100644 --- a/.github/workflows/edgetest.yml +++ b/.github/workflows/edgetest.yml @@ -5,10 +5,14 @@ name: Run edgetest on: schedule: - cron: '35 17 * * 5' + workflow_dispatch: jobs: edgetest: runs-on: ubuntu-latest name: running edgetest + permissions: + contents: write + pull-requests: write steps: - uses: actions/checkout@v4 with: @@ -19,7 +23,7 @@ jobs: cp tests/data/.locopyrc ~/.locopyrc cp tests/data/.locopy-sfrc ~/.locopy-sfrc - id: run-edgetest - uses: fdosani/run-edgetest-action@v1.3 + uses: edgetest-dev/run-edgetest-action@v1.5 with: edgetest-flags: '-c pyproject.toml --export' base-branch: 'develop' diff --git a/pyproject.toml b/pyproject.toml index 0f7e292..91bf317 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,7 +104,7 @@ ban-relative-imports = "all" convention = "numpy" [edgetest.envs.core] -python_version = "3.9" +python_version = "3.10" extras = [ "tests", "psycopg2", From 6b0f1f89bff43123b7efdf2b95d5b580274d346f Mon Sep 17 00:00:00 2001 From: Gladys Teh <97971054+gladysteh99@users.noreply.github.com> Date: Tue, 19 Nov 2024 17:05:38 -0500 Subject: [PATCH 4/7] add pyarrow support in find_column_type for pandas dataframes (#313) * add pyarrow support in find_column_type for pandas dataframes * update pandas lower pin * change default to varchar --- locopy/utility.py | 53 +++++++++++++++++++++++++++++-------------- pyproject.toml | 2 +- tests/test_utility.py | 42 ++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 18 deletions(-) diff --git a/locopy/utility.py b/locopy/utility.py index 7b61f31..22cd6c0 100644 --- a/locopy/utility.py +++ b/locopy/utility.py @@ -30,6 +30,7 @@ import pandas as pd import polars as pl +import pyarrow as pa import yaml from locopy.errors import ( @@ -317,6 +318,20 @@ def validate_float_object(column): except (ValueError, TypeError): return None + def check_column_type_pyarrow(pa_dtype): + if pa.types.is_temporal(pa_dtype): + return "timestamp" + elif pa.types.is_boolean(pa_dtype): + return "boolean" + elif pa.types.is_integer(pa_dtype): + return "int" + elif pa.types.is_floating(pa_dtype): + return "float" + elif pa.types.is_string(pa_dtype): + return "varchar" + else: + return "varchar" + if warehouse_type.lower() not in ["snowflake", "redshift"]: raise ValueError( 'warehouse_type argument must be either "snowflake" or "redshift"' @@ -328,24 +343,28 @@ def validate_float_object(column): data = dataframe[column].dropna().reset_index(drop=True) if data.size == 0: column_type.append("varchar") - elif (data.dtype in ["datetime64[ns]", "M8[ns]"]) or ( - re.match(r"(datetime64\[ns\,\W)([a-zA-Z]+)(\])", str(data.dtype)) - ): - column_type.append("timestamp") - elif str(data.dtype).lower().startswith("bool"): - column_type.append("boolean") - elif str(data.dtype).startswith("object"): - data_type = validate_float_object(data) or validate_date_object(data) - if not data_type: - column_type.append("varchar") - else: - column_type.append(data_type) - elif str(data.dtype).lower().startswith("int"): - column_type.append("int") - elif str(data.dtype).lower().startswith("float"): - column_type.append("float") + elif isinstance(data.dtype, pd.ArrowDtype): + datatype = check_column_type_pyarrow(data.dtype.pyarrow_dtype) + column_type.append(datatype) else: - column_type.append("varchar") + if (data.dtype in ["datetime64[ns]", "M8[ns]"]) or ( + re.match(r"(datetime64\[ns\,\W)([a-zA-Z]+)(\])", str(data.dtype)) + ): + column_type.append("timestamp") + elif str(data.dtype).lower().startswith("bool"): + column_type.append("boolean") + elif str(data.dtype).startswith("object"): + data_type = validate_float_object(data) or validate_date_object(data) + if not data_type: + column_type.append("varchar") + else: + column_type.append(data_type) + elif str(data.dtype).lower().startswith("int"): + column_type.append("int") + elif str(data.dtype).lower().startswith("float"): + column_type.append("float") + else: + column_type.append("varchar") logger.info("Parsing column %s to %s", column, column_type[-1]) return OrderedDict(zip(list(dataframe.columns), column_type)) diff --git a/pyproject.toml b/pyproject.toml index 91bf317..43d6bf7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ { name="Faisal Dosani", email="faisal.dosani@capitalone.com" }, ] license = {text = "Apache Software License"} -dependencies = ["boto3<=1.35.53,>=1.9.92", "PyYAML<=6.0.1,>=5.1", "pandas<=2.2.3,>=0.25.2", "numpy<=2.0.2,>=1.22.0", "polars>=0.20.0"] +dependencies = ["boto3<=1.35.53,>=1.9.92", "PyYAML<=6.0.1,>=5.1", "pandas<=2.2.3,>=1.5.0", "numpy<=2.0.2,>=1.22.0", "polars>=0.20.0", "pyarrow>=10.0.1"] requires-python = ">=3.9.0" classifiers = [ diff --git a/tests/test_utility.py b/tests/test_utility.py index ec6f8f2..a7991f8 100644 --- a/tests/test_utility.py +++ b/tests/test_utility.py @@ -29,6 +29,7 @@ from unittest import mock import locopy.utility as util +import pyarrow as pa import pytest from locopy.errors import ( CompressionError, @@ -388,7 +389,48 @@ def test_find_column_type_new(): "d": "varchar", "e": "boolean", } + assert find_column_type(input_text, "snowflake") == output_text_snowflake + assert find_column_type(input_text, "redshift") == output_text_redshift + + +def test_find_column_type_pyarrow(): + import pandas as pd + + input_text = pd.DataFrame.from_dict( + { + "a": [1], + "b": [pd.Timestamp("2017-01-01T12+0")], + "c": [1.2], + "d": ["a"], + "e": [True], + } + ) + input_text = input_text.astype( + dtype={ + "a": "int64[pyarrow]", + "b": "timestamp[ns, tz=UTC][pyarrow]", + "c": "float64[pyarrow]", + "d": pd.ArrowDtype(pa.string()), + "e": "bool[pyarrow]", + } + ) + + output_text_snowflake = { + "a": "int", + "b": "timestamp", + "c": "float", + "d": "varchar", + "e": "boolean", + } + + output_text_redshift = { + "a": "int", + "b": "timestamp", + "c": "float", + "d": "varchar", + "e": "boolean", + } assert find_column_type(input_text, "snowflake") == output_text_snowflake assert find_column_type(input_text, "redshift") == output_text_redshift From 902d4a69d477c714cc774d812cd73bc0820700c0 Mon Sep 17 00:00:00 2001 From: Faisal Date: Wed, 20 Nov 2024 14:32:48 -0400 Subject: [PATCH 5/7] edgetest bump versions (#312) * Update requirements.txt * Update pyproject.toml * Recompile reqs * Bump lower pandas --------- Co-authored-by: JacobDawang --- pyproject.toml | 2 +- requirements.txt | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 43d6bf7..d233483 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ { name="Faisal Dosani", email="faisal.dosani@capitalone.com" }, ] license = {text = "Apache Software License"} -dependencies = ["boto3<=1.35.53,>=1.9.92", "PyYAML<=6.0.1,>=5.1", "pandas<=2.2.3,>=1.5.0", "numpy<=2.0.2,>=1.22.0", "polars>=0.20.0", "pyarrow>=10.0.1"] +dependencies = ["boto3<=1.35.63,>=1.9.92", "PyYAML<=6.0.1,>=5.1", "pandas<=2.2.3,>=1.5.0", "numpy<=2.1.3,>=1.22.0", "polars>=0.20.0", "pyarrow>=10.0.1"] requires-python = ">=3.9.0" classifiers = [ diff --git a/requirements.txt b/requirements.txt index db34d19..91e85cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # This file was autogenerated by uv via the following command: -# uv pip compile --output-file=requirements.txt pyproject.toml -boto3==1.35.53 +# uv pip compile pyproject.toml -o requirements.txt +boto3==1.35.63 # via locopy (pyproject.toml) -botocore==1.35.53 +botocore==1.35.64 # via # boto3 # s3transfer @@ -10,13 +10,15 @@ jmespath==1.0.1 # via # boto3 # botocore -numpy==2.0.2 +numpy==2.1.3 # via # locopy (pyproject.toml) # pandas pandas==2.2.3 # via locopy (pyproject.toml) -polars==1.12.0 +polars==1.14.0 + # via locopy (pyproject.toml) +pyarrow==18.0.0 # via locopy (pyproject.toml) python-dateutil==2.9.0.post0 # via @@ -32,5 +34,5 @@ six==1.16.0 # via python-dateutil tzdata==2024.2 # via pandas -urllib3==1.26.20 +urllib3==2.2.3 # via botocore From ce532077255d0a61963172e27a65b57009d6ae10 Mon Sep 17 00:00:00 2001 From: Gladys Teh <97971054+gladysteh99@users.noreply.github.com> Date: Wed, 20 Nov 2024 13:40:27 -0500 Subject: [PATCH 6/7] fix test to be compatible with pandas 1.5.0 (lowest) (#314) --- tests/test_utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utility.py b/tests/test_utility.py index a7991f8..2ccf772 100644 --- a/tests/test_utility.py +++ b/tests/test_utility.py @@ -409,7 +409,7 @@ def test_find_column_type_pyarrow(): input_text = input_text.astype( dtype={ "a": "int64[pyarrow]", - "b": "timestamp[ns, tz=UTC][pyarrow]", + "b": pd.ArrowDtype(pa.timestamp("ns", tz="UTC")), "c": "float64[pyarrow]", "d": pd.ArrowDtype(pa.string()), "e": "bool[pyarrow]", From 3966732395ec261d08e0a1d65c24bfb3500b6881 Mon Sep 17 00:00:00 2001 From: Faisal Date: Tue, 17 Dec 2024 13:20:06 -0400 Subject: [PATCH 7/7] edgetest bump versions (#315) * bump versions * Update requirements.txt * Update _version.py --- locopy/_version.py | 2 +- pyproject.toml | 2 +- requirements.txt | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/locopy/_version.py b/locopy/_version.py index dc3ce4d..9a5b5d9 100644 --- a/locopy/_version.py +++ b/locopy/_version.py @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.6.2" +__version__ = "0.6.3" diff --git a/pyproject.toml b/pyproject.toml index d233483..1f4065c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ { name="Faisal Dosani", email="faisal.dosani@capitalone.com" }, ] license = {text = "Apache Software License"} -dependencies = ["boto3<=1.35.63,>=1.9.92", "PyYAML<=6.0.1,>=5.1", "pandas<=2.2.3,>=1.5.0", "numpy<=2.1.3,>=1.22.0", "polars>=0.20.0", "pyarrow>=10.0.1"] +dependencies = ["boto3<=1.35.80,>=1.9.92", "PyYAML<=6.0.2,>=5.1", "pandas<=2.2.3,>=1.5.0", "numpy<=2.2.0,>=1.22.0", "polars>=0.20.0", "pyarrow>=10.0.1"] requires-python = ">=3.9.0" classifiers = [ diff --git a/requirements.txt b/requirements.txt index 91e85cc..7e9be3e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # This file was autogenerated by uv via the following command: # uv pip compile pyproject.toml -o requirements.txt -boto3==1.35.63 +boto3==1.35.80 # via locopy (pyproject.toml) -botocore==1.35.64 +botocore==1.35.80 # via # boto3 # s3transfer @@ -10,15 +10,15 @@ jmespath==1.0.1 # via # boto3 # botocore -numpy==2.1.3 +numpy==2.2.0 # via # locopy (pyproject.toml) # pandas pandas==2.2.3 # via locopy (pyproject.toml) -polars==1.14.0 +polars==1.17.1 # via locopy (pyproject.toml) -pyarrow==18.0.0 +pyarrow==18.1.0 # via locopy (pyproject.toml) python-dateutil==2.9.0.post0 # via @@ -26,11 +26,11 @@ python-dateutil==2.9.0.post0 # pandas pytz==2024.2 # via pandas -pyyaml==6.0.1 +pyyaml==6.0.2 # via locopy (pyproject.toml) -s3transfer==0.10.3 +s3transfer==0.10.4 # via boto3 -six==1.16.0 +six==1.17.0 # via python-dateutil tzdata==2024.2 # via pandas