diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index 664b27048a75..06b344e014a8 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -773,3 +773,23 @@ def create_view( with self._safe_raw_sql(src, external_tables=external_tables): pass return self.table(name, database=database) + + def _in_memory_table_exists(self, name: str) -> bool: + name = sg.table(name, quoted=self.compiler.quoted).sql(self.dialect) + try: + # DESCRIBE TABLE $TABLE FORMAT NULL is the fastest way to check + # table existence in clickhouse; FORMAT NULL produces no data which + # is ideal since we don't care about the output for existence + # checking + # + # Other methods compared were + # 1. SELECT 1 FROM $TABLE LIMIT 0 + # 2. SHOW TABLES LIKE $TABLE LIMIT 1 + # + # if the table exists nothing is returned and there's no error + # otherwise there's an error + self.con.raw_query(f"DESCRIBE {name} FORMAT NULL") + except cc.driver.exceptions.DatabaseError: + return False + else: + return True diff --git a/ibis/backends/exasol/__init__.py b/ibis/backends/exasol/__init__.py index 05456db385f5..84e4fbd2b005 100644 --- a/ibis/backends/exasol/__init__.py +++ b/ibis/backends/exasol/__init__.py @@ -243,6 +243,9 @@ def _get_schema_using_query(self, query: str) -> sch.Schema: finally: self.con.execute(drop_view) + def _in_memory_table_exists(self, name: str) -> bool: + return self.con.meta.table_exists(name) + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: diff --git a/ibis/backends/mssql/__init__.py b/ibis/backends/mssql/__init__.py index e20624e1cd07..737175f95750 100644 --- a/ibis/backends/mssql/__init__.py +++ b/ibis/backends/mssql/__init__.py @@ -703,6 +703,16 @@ def create_table( namespace=ops.Namespace(catalog=catalog, database=db), ).to_expr() + def _in_memory_table_exists(self, name: str) -> bool: + # The single character U here means user-defined table + # see https://learn.microsoft.com/en-us/sql/relational-databases/system-catalog-views/sys-objects-transact-sql?view=sql-server-ver16 + sql = sg.select(sg.func("object_id", sge.convert(name), sge.convert("U"))).sql( + self.dialect + ) + with self.begin() as cur: + [(result,)] = cur.execute(sql).fetchall() + return result is not None + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index eec4df9ac634..4281c81831d0 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -13,6 +13,7 @@ import pymysql import sqlglot as sg import sqlglot.expressions as sge +from pymysql.constants import ER import ibis import ibis.backends.sql.compilers as sc @@ -465,6 +466,23 @@ def create_table( name, schema=schema, source=self, namespace=ops.Namespace(database=database) ).to_expr() + def _in_memory_table_exists(self, name: str) -> bool: + name = sg.to_identifier(name, quoted=self.compiler.quoted).sql(self.dialect) + # just return the single field with column names; no need to bring back + # everything if the command succeeds + sql = f"SHOW COLUMNS FROM {name} LIKE 'Field'" + try: + with self.begin() as cur: + cur.execute(sql) + cur.fetchall() + except pymysql.err.ProgrammingError as e: + err_code, _ = e.args + if err_code == ER.NO_SUCH_TABLE: + return False + raise + else: + return True + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index 51d9427a2a6c..2481175c600b 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -24,7 +24,7 @@ from ibis import util from ibis.backends import CanListDatabase, CanListSchema from ibis.backends.sql import SQLBackend -from ibis.backends.sql.compilers.base import STAR, C +from ibis.backends.sql.compilers.base import NULL, STAR, C if TYPE_CHECKING: from urllib.parse import ParseResult @@ -495,6 +495,21 @@ def drop_table( super().drop_table(name, database=(catalog, db), force=force) + def _in_memory_table_exists(self, name: str) -> bool: + sql = ( + sg.select(NULL) + .from_(sg.to_identifier("USER_OBJECTS", quoted=self.compiler.quoted)) + .where( + C.OBJECT_TYPE.eq(sge.convert("TABLE")), + C.OBJECT_NAME.eq(sge.convert(name)), + ) + .limit(sge.convert(1)) + .sql(self.dialect) + ) + with self.begin() as cur: + results = cur.execute(sql).fetchall() + return bool(results) + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index fcfa517a8aff..777b28e00b25 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -89,6 +89,21 @@ def _from_url(self, url: ParseResult, **kwargs): return self.connect(**kwargs) + def _in_memory_table_exists(self, name: str) -> bool: + import psycopg2.errors + + ident = sg.to_identifier(name, quoted=self.compiler.quoted) + sql = sg.select(sge.convert(1)).from_(ident).limit(0).sql(self.dialect) + + try: + with self.begin() as cur: + cur.execute(sql) + cur.fetchall() + except psycopg2.errors.UndefinedTable: + return False + else: + return True + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: from psycopg2.extras import execute_batch diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index def05da78f82..c19958e3db38 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -411,11 +411,18 @@ def _register_udfs(self, expr: ir.Expr) -> None: self._session.udf.register(f"unwrap_json_{typ.__name__}", unwrap_json(typ)) self._session.udf.register("unwrap_json_float", unwrap_json_float) + def _in_memory_table_exists(self, name: str) -> bool: + sql = f"SHOW TABLES IN {self.current_database} LIKE '{name}'" + return bool(self._session.sql(sql).count()) + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = PySparkSchema.from_ibis(op.schema) df = self._session.createDataFrame(data=op.data.to_frame(), schema=schema) df.createTempView(op.name) + def _finalize_memtable(self, name: str) -> None: + self._session.catalog.dropTempView(name) + @contextlib.contextmanager def _safe_raw_sql(self, query: str) -> Any: yield self.raw_sql(query) diff --git a/ibis/backends/risingwave/__init__.py b/ibis/backends/risingwave/__init__.py index e824d93d93a3..27ae76cf9385 100644 --- a/ibis/backends/risingwave/__init__.py +++ b/ibis/backends/risingwave/__init__.py @@ -260,6 +260,21 @@ def create_table( name, schema=schema, source=self, namespace=ops.Namespace(database=database) ).to_expr() + def _in_memory_table_exists(self, name: str) -> bool: + import psycopg2.errors + + ident = sg.to_identifier(name, quoted=self.compiler.quoted) + sql = sg.select(sge.convert(1)).from_(ident).limit(0).sql(self.dialect) + + try: + with self.begin() as cur: + cur.execute(sql) + cur.fetchall() + except psycopg2.errors.InternalError: + return False + else: + return True + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 5eb378186414..93522f9b2113 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -645,9 +645,23 @@ def list_tables( return self._filter_with_like(tables + views, like=like) def _in_memory_table_exists(self, name: str) -> bool: - with self.con.cursor() as con: - result = con.execute(f"SHOW TABLES LIKE '{name}'").fetchone() - return bool(result) + import snowflake.connector + + ident = sg.to_identifier(name, quoted=self.compiler.quoted) + sql = sg.select(sge.convert(1)).from_(ident).limit(0).sql(self.dialect) + + try: + with self.con.cursor() as cur: + cur.execute(sql).fetchall() + except snowflake.connector.errors.ProgrammingError as e: + # this cryptic error message is the only generic and reliable way + # to tell if the error means "table not found for any reason" + # otherwise, we need to reraise the exception + if e.sqlstate == "42S02": + return False + raise + else: + return True def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: import pyarrow.parquet as pq diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index c52c654486ee..d59a1fbc39dc 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -338,6 +338,18 @@ def _generate_create_table(self, table: sge.Table, schema: sch.Schema): return sge.Create(kind="TABLE", this=target) + def _in_memory_table_exists(self, name: str) -> bool: + ident = sg.to_identifier(name, quoted=self.compiler.quoted) + query = sg.select(sge.convert(1)).from_(ident).limit(0).sql(self.dialect) + try: + with self.begin() as cur: + cur.execute(query) + cur.fetchall() + except sqlite3.OperationalError: + return False + else: + return True + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: table = sg.table(op.name, quoted=self.compiler.quoted, catalog="temp") create_stmt = self._generate_create_table(table, op.schema).sql(self.name) diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index 7c4ab32ec77b..0c182f7c0c9a 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -552,6 +552,21 @@ def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: df = TrinoPandasData.convert_table(df, schema) return df + def _in_memory_table_exists(self, name: str) -> bool: + ident = sg.to_identifier(name, quoted=self.compiler.quoted) + sql = sg.select(sge.convert(1)).from_(ident).limit(0).sql(self.dialect) + + try: + with self.begin() as cur: + cur.execute(sql) + cur.fetchall() + except trino.exceptions.TrinoUserError as e: + if e.error_name == "TABLE_NOT_FOUND": + return False + raise + else: + return True + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: schema = op.schema if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: