diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index 969d27f6714a..a21d89c3872b 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -295,6 +295,9 @@ def visit_Date(self, op, *, arg): def visit_RegexReplace(self, op, *, arg, pattern, replacement): return self.f.regexp_replace(arg, pattern, replacement, dialect=self.dialect) + def visit_RegexExtract(self, op, *, arg, pattern, index): + return self.f.anon.regexp_extract(arg, pattern, index) + def visit_Round(self, op, *, arg, digits): rounded = self.f.round(*filter(None, (arg, digits))) diff --git a/ibis/backends/sql/compilers/snowflake.py b/ibis/backends/sql/compilers/snowflake.py index 207422a12b6f..ad94da56433f 100644 --- a/ibis/backends/sql/compilers/snowflake.py +++ b/ibis/backends/sql/compilers/snowflake.py @@ -424,13 +424,7 @@ def visit_ArrayPosition(self, op, *, arg, other): def visit_RegexExtract(self, op, *, arg, pattern, index): # https://docs.snowflake.com/en/sql-reference/functions/regexp_substr - return sge.RegexpExtract( - this=arg, - expression=pattern, - position=sge.convert(1), - group=index, - parameters=sge.convert("ce"), - ) + return self.f.anon.regexp_substr(arg, pattern, 1, 1, "ce", index) def visit_ArrayZip(self, op, *, arg): return self.if_( diff --git a/ibis/backends/sql/dialects.py b/ibis/backends/sql/dialects.py index 6cc8967b6c8e..f7a2eb38dca9 100644 --- a/ibis/backends/sql/dialects.py +++ b/ibis/backends/sql/dialects.py @@ -205,6 +205,7 @@ def new_name(names: set[str], name: str) -> str: class Flink(Hive): UNESCAPED_SEQUENCES = {"\\\\d": "\\d"} + REGEXP_EXTRACT_DEFAULT_GROUP = 0 class Generator(Hive.Generator): UNNEST_WITH_ORDINALITY = False @@ -308,6 +309,7 @@ class Tokenizer(Hive.Tokenizer): class Impala(Hive): NULL_ORDERING = "nulls_are_large" + REGEXP_EXTRACT_DEFAULT_GROUP = 0 class Generator(Hive.Generator): TRANSFORMS = Hive.Generator.TRANSFORMS.copy() | { diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 2ab995d3931e..7f7c545dbed6 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1257,11 +1257,6 @@ def string_temp_table(backend, con): raises=AssertionError, reason="Spark SQL LTRIM doesn't accept characters to trim", ), - pytest.mark.notimpl( - ["bigquery", "snowflake"], - raises=AssertionError, - reason="does a full `strip` instead", - ), ], ), param( @@ -1274,11 +1269,6 @@ def string_temp_table(backend, con): raises=AssertionError, reason="Spark SQL RTRIM doesn't accept characters to trim", ), - pytest.mark.notimpl( - ["bigquery", "snowflake"], - raises=AssertionError, - reason="does a full `strip` instead", - ), ], ), param( diff --git a/poetry.lock b/poetry.lock index 124ad410d3a4..785f7f77f7e5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6932,18 +6932,18 @@ sqlcipher = ["sqlcipher3_binary"] [[package]] name = "sqlglot" -version = "25.19.0" +version = "25.20.1" description = "An easily customizable SQL parser and transpiler" optional = false python-versions = ">=3.7" files = [ - {file = "sqlglot-25.19.0-py3-none-any.whl", hash = "sha256:07f3f7c90b909cc324e2f659e6d551adf52293afd317141db8a4a7cfe01468a2"}, - {file = "sqlglot-25.19.0.tar.gz", hash = "sha256:1cf4c4bf5a52a0e943fb182df9f6ae386470806b5ec5d57f59a07d4a6c639322"}, + {file = "sqlglot-25.20.1-py3-none-any.whl", hash = "sha256:ea8c957ed22cc825d7714c46e165b66da33921492124f4d6b7cc742a1a960ec4"}, + {file = "sqlglot-25.20.1.tar.gz", hash = "sha256:495afc1aa26dabedfe2faf9c655779eaf6e2401686a20920b742786d26a26cb0"}, ] [package.extras] dev = ["duckdb (>=0.6)", "maturin (>=1.4,<2.0)", "mypy", "pandas", "pandas-stubs", "pdoc", "pre-commit", "python-dateutil", "pytz", "ruff (==0.4.3)", "types-python-dateutil", "types-pytz", "typing-extensions"] -rs = ["sqlglotrs (==0.2.10)"] +rs = ["sqlglotrs (==0.2.12)"] [[package]] name = "stack-data" @@ -7941,4 +7941,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "4004a68834cc4a029c319c7d13237802281d5ed16e43c33cffed1b1330bf94c3" +content-hash = "eef90ccc5c638e17fff405ae65db499f53dda72ff68ecfd08c17a63621c7e268" diff --git a/pyproject.toml b/pyproject.toml index f4010a69fcbc..cc94bc90d06e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ atpublic = ">=2.3,<6" parsy = ">=2,<3" python-dateutil = ">=2.8.2,<3" pytz = ">=2022.7" -sqlglot = ">=23.4,<25.20" +sqlglot = ">=23.4,<25.21" toolz = ">=0.11,<1" typing-extensions = ">=4.3.0,<5" numpy = { version = ">=1.23.2,<3", optional = true } diff --git a/requirements-dev.txt b/requirements-dev.txt index 342a58f09554..ad517f4f58a2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -257,7 +257,7 @@ sortedcontainers==2.4.0 ; python_version >= "3.10" and python_version < "4.0" soupsieve==2.6 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1.1 ; python_version >= "3.10" and python_version < "3.13" sqlalchemy==2.0.32 ; python_version >= "3.10" and python_version < "3.13" -sqlglot==25.19.0 ; python_version >= "3.10" and python_version < "4.0" +sqlglot==25.20.1 ; python_version >= "3.10" and python_version < "4.0" stack-data==0.6.3 ; python_version >= "3.10" and python_version < "4.0" statsmodels==0.14.2 ; python_version >= "3.10" and python_version < "3.13" tabulate==0.9.0 ; python_version >= "3.10" and python_version < "3.13"