From 1ef9226641ca10de326d2375c2b4cc4fcf05fafa Mon Sep 17 00:00:00 2001 From: Jing Wang Date: Fri, 1 Sep 2017 10:44:41 -0700 Subject: [PATCH] Fix dotted column names in Hive This has the side effect of changing the return value of ResultProxy.keys() and RowProxy.keys(), unless using hive_raw_colnames. After this diff, those methods will return undotted names. --- pyhive/sqlalchemy_hive.py | 24 ++++++++++++++++ pyhive/tests/test_hive.py | 32 ++++++++++----------- pyhive/tests/test_sqlalchemy_hive.py | 24 ++++++++++++++++ scripts/travis-conf/hive/hive-site-ldap.xml | 9 ------ scripts/travis-conf/hive/hive-site.xml | 9 ------ 5 files changed, 64 insertions(+), 34 deletions(-) diff --git a/pyhive/sqlalchemy_hive.py b/pyhive/sqlalchemy_hive.py index 38d084c5..7d7dbc33 100644 --- a/pyhive/sqlalchemy_hive.py +++ b/pyhive/sqlalchemy_hive.py @@ -154,9 +154,33 @@ def visit_DATETIME(self, type_): return 'TIMESTAMP' +class HiveExecutionContext(default.DefaultExecutionContext): + """This is pretty much the same as SQLiteExecutionContext to work around the same issue. + + http://docs.sqlalchemy.org/en/latest/dialects/sqlite.html#dotted-column-names + + engine = create_engine('hive://...', execution_options={'hive_raw_colnames': True}) + """ + + @util.memoized_property + def _preserve_raw_colnames(self): + # Ideally, this would also gate on hive.resultset.use.unique.column.names + return self.execution_options.get('hive_raw_colnames', False) + + def _translate_colname(self, colname): + # Adjust for dotted column names. + # When hive.resultset.use.unique.column.names is true (the default), Hive returns column + # names as "tablename.colname" in cursor.description. + if not self._preserve_raw_colnames and '.' in colname: + return colname.split('.')[-1], colname + else: + return colname, None + + class HiveDialect(default.DefaultDialect): name = b'hive' driver = b'thrift' + execution_ctx_cls = HiveExecutionContext preparer = HiveIdentifierPreparer statement_compiler = HiveCompiler supports_views = True diff --git a/pyhive/tests/test_hive.py b/pyhive/tests/test_hive.py index c766f4bf..1cf0750a 100644 --- a/pyhive/tests/test_hive.py +++ b/pyhive/tests/test_hive.py @@ -38,28 +38,28 @@ def connect(self): def test_description(self, cursor): cursor.execute('SELECT * FROM one_row') - desc = [('number_of_rows', 'INT_TYPE', None, None, None, None, True)] + desc = [('one_row.number_of_rows', 'INT_TYPE', None, None, None, None, True)] self.assertEqual(cursor.description, desc) @with_cursor def test_complex(self, cursor): cursor.execute('SELECT * FROM one_row_complex') self.assertEqual(cursor.description, [ - ('boolean', 'BOOLEAN_TYPE', None, None, None, None, True), - ('tinyint', 'TINYINT_TYPE', None, None, None, None, True), - ('smallint', 'SMALLINT_TYPE', None, None, None, None, True), - ('int', 'INT_TYPE', None, None, None, None, True), - ('bigint', 'BIGINT_TYPE', None, None, None, None, True), - ('float', 'FLOAT_TYPE', None, None, None, None, True), - ('double', 'DOUBLE_TYPE', None, None, None, None, True), - ('string', 'STRING_TYPE', None, None, None, None, True), - ('timestamp', 'TIMESTAMP_TYPE', None, None, None, None, True), - ('binary', 'BINARY_TYPE', None, None, None, None, True), - ('array', 'ARRAY_TYPE', None, None, None, None, True), - ('map', 'MAP_TYPE', None, None, None, None, True), - ('struct', 'STRUCT_TYPE', None, None, None, None, True), - ('union', 'UNION_TYPE', None, None, None, None, True), - ('decimal', 'DECIMAL_TYPE', None, None, None, None, True), + ('one_row_complex.boolean', 'BOOLEAN_TYPE', None, None, None, None, True), + ('one_row_complex.tinyint', 'TINYINT_TYPE', None, None, None, None, True), + ('one_row_complex.smallint', 'SMALLINT_TYPE', None, None, None, None, True), + ('one_row_complex.int', 'INT_TYPE', None, None, None, None, True), + ('one_row_complex.bigint', 'BIGINT_TYPE', None, None, None, None, True), + ('one_row_complex.float', 'FLOAT_TYPE', None, None, None, None, True), + ('one_row_complex.double', 'DOUBLE_TYPE', None, None, None, None, True), + ('one_row_complex.string', 'STRING_TYPE', None, None, None, None, True), + ('one_row_complex.timestamp', 'TIMESTAMP_TYPE', None, None, None, None, True), + ('one_row_complex.binary', 'BINARY_TYPE', None, None, None, None, True), + ('one_row_complex.array', 'ARRAY_TYPE', None, None, None, None, True), + ('one_row_complex.map', 'MAP_TYPE', None, None, None, None, True), + ('one_row_complex.struct', 'STRUCT_TYPE', None, None, None, None, True), + ('one_row_complex.union', 'UNION_TYPE', None, None, None, None, True), + ('one_row_complex.decimal', 'DECIMAL_TYPE', None, None, None, None, True), ]) rows = cursor.fetchall() expected = [( diff --git a/pyhive/tests/test_sqlalchemy_hive.py b/pyhive/tests/test_sqlalchemy_hive.py index f730d04e..821934df 100644 --- a/pyhive/tests/test_sqlalchemy_hive.py +++ b/pyhive/tests/test_sqlalchemy_hive.py @@ -41,6 +41,30 @@ class TestSqlAlchemyHive(unittest.TestCase, SqlAlchemyTestCase): def create_engine(self): return create_engine('hive://localhost:10000/default') + @with_engine_connection + def test_dotted_column_names(self, engine, connection): + """When Hive returns a dotted column name, both the non-dotted version should be available + as an attribute, and the dotted version should remain available as a key. + """ + row = connection.execute('SELECT * FROM one_row').fetchone() + assert row.keys() == ['number_of_rows'] + assert 'number_of_rows' in row + assert row.number_of_rows == 1 + assert row['number_of_rows'] == 1 + assert getattr(row, 'one_row.number_of_rows') == 1 + assert row['one_row.number_of_rows'] == 1 + + @with_engine_connection + def test_dotted_column_names_raw(self, engine, connection): + """When Hive returns a dotted column name, and raw mode is on, nothing should be modified. + """ + row = connection.execution_options(hive_raw_colnames=True)\ + .execute('SELECT * FROM one_row').fetchone() + assert row.keys() == ['one_row.number_of_rows'] + assert 'number_of_rows' not in row + assert getattr(row, 'one_row.number_of_rows') == 1 + assert row['one_row.number_of_rows'] == 1 + @with_engine_connection def test_reflect_select(self, engine, connection): """reflecttable should be able to fill in a table from the name""" diff --git a/scripts/travis-conf/hive/hive-site-ldap.xml b/scripts/travis-conf/hive/hive-site-ldap.xml index a75cde00..83453782 100644 --- a/scripts/travis-conf/hive/hive-site-ldap.xml +++ b/scripts/travis-conf/hive/hive-site-ldap.xml @@ -13,15 +13,6 @@ fs.defaultFS file:/// - - - hive.resultset.use.unique.column.names - false - hive.server2.authentication LDAP diff --git a/scripts/travis-conf/hive/hive-site.xml b/scripts/travis-conf/hive/hive-site.xml index ed0272da..3ec65b80 100644 --- a/scripts/travis-conf/hive/hive-site.xml +++ b/scripts/travis-conf/hive/hive-site.xml @@ -13,13 +13,4 @@ fs.defaultFS file:/// - - - hive.resultset.use.unique.column.names - false -