Skip to content

Commit

Permalink
Dasc 1121 (#600)
Browse files Browse the repository at this point in the history
* Bump version: 2.54.12 → 2.54.13

* adding instructions to run single test case

* adding instructions to get BQ service account

* adding dependency of google-cloud-bigquery-storage for python3.6+

* adding testcases for BQ query without pandas. + docstrings
  • Loading branch information
darolt authored Mar 7, 2023
1 parent 92566cb commit 7dd403a
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 2.54.12
current_version = 2.54.13
tag_name = {new_version}
commit = True
tag = True
Expand Down
5 changes: 5 additions & 0 deletions DEV.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ Most unit tests are located at ./test . To run unit tests please use:
coverage run -m pytest
```

To run a specific test:
```bash
pytest ./test/<test file name>
```

In order to check the results either use the textual output:
```bash
coverage report
Expand Down
12 changes: 12 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,18 @@ data transformation is needed.
results = bq.query(query_str)
In case one needs a service account with access to BigQuery, the following code can be
used:

.. code:: python
from pycarol import Carol
from pycarol.bigquery import TokenManager
tm = TokenManager(Carol())
service_account = tm.get_token().service_account
PyCarol provides access to BigQuery Storage API also. It allows for much faster reading
times, but with limited querying capabilities. For instance, only tables are readable,
so 'ingestion_stg_model_deep_audit' is ok, but 'stg_model_deep_audit' is not (it is a
Expand Down
1 change: 1 addition & 0 deletions doc/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ gcsfs>=0.3.0,<0.7
google-auth
google-auth-httplib2
google-cloud-bigquery>=2.26.0
google-cloud-bigquery-storage
google-cloud-core>=1.4.1
google-cloud-storage
python-dotenv
Expand Down
2 changes: 1 addition & 1 deletion pycarol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import tempfile

__version__ = "2.54.12"
__version__ = "2.54.13"

__TEMP_STORAGE__ = os.path.join(tempfile.gettempdir(), "carolina/cache")

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"google-auth",
"google-auth-httplib2",
"google-cloud-bigquery>=2.26.0",
"google-cloud-bigquery-storage",
"google-cloud-core>=1.4.1",
"google-cloud-storage",
"python-dotenv",
Expand Down
111 changes: 95 additions & 16 deletions test/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from unittest import mock
import typing as T

import pandas as pd
import pycarol


def test_token_init() -> None:
"""Test the initialization of the Token class in the pycarol.bigquery module."""
token_mock = mock.MagicMock()
service_account = {"expiration_time": "expiration_time"}
env: T.Dict = {}
Expand All @@ -19,6 +19,7 @@ def test_token_init() -> None:


def test_token_to_dict() -> None:
"""Test the to_dict() method of the Token class in the pycarol.bigquery module."""
token_mock = mock.MagicMock()
token_mock.service_account = {}
token_mock._env = {}
Expand All @@ -28,6 +29,7 @@ def test_token_to_dict() -> None:


def test_token_expired() -> None:
"""Test the expired() method of the Token class in the pycarol.bigquery module."""
token_mock = mock.MagicMock()
dt_format = "%Y-%m-%dT%H:%M:%S.%fZ"
pass_date = datetime.utcnow() - timedelta(1)
Expand All @@ -38,7 +40,7 @@ def test_token_expired() -> None:

@mock.patch("pycarol.bigquery.Storage")
def test_token_manager_init(storage_mock) -> None:
"""TokenManager __init__ with defaults."""
"""Test the initialization of the pycarol.bigquery.TokenManager class."""
manager_mock = mock.MagicMock()
carol_mock = mock.MagicMock()
carol_mock.get_current.return_value = {"env_id": 5}
Expand All @@ -54,6 +56,7 @@ def test_token_manager_init(storage_mock) -> None:


def test_token_manager_issue_new_key() -> None:
"""Test the _issue_new_key() method of the pycarol.bigquery.TokenManager class."""
manager_mock = mock.MagicMock()
manager_mock._carol = mock.MagicMock()
manager_mock._carol.call_api.return_value = {}
Expand All @@ -62,6 +65,7 @@ def test_token_manager_issue_new_key() -> None:


def test_token_manager_save_token_file() -> None:
"""Test the _save_token_file() method of the pycarol.bigquery.TokenManager class."""
manager_mock = mock.MagicMock()
manager_mock._tmp_filepath = Path("/tmp/pycarol_test/test_sa.env")
token_mock = mock.MagicMock()
Expand All @@ -75,6 +79,7 @@ def test_token_manager_save_token_file() -> None:


def test_token_manager_save_token_cloud() -> None:
"""Test the _save_token_cloud() method of the pycarol.bigquery.TokenManager."""
manager_mock = mock.MagicMock()
Path("/tmp/pycarol_test/test_sa.env").touch()
pycarol.bigquery.TokenManager._save_token_cloud(manager_mock)
Expand All @@ -84,6 +89,7 @@ def test_token_manager_save_token_cloud() -> None:

@mock.patch("pycarol.bigquery.Token")
def test_token_manager_load_token_file(token_mock) -> None:
"""Test the _load_token_file() method of the pycarol.bigquery.TokenManager class."""
sa = {"service_account": "test", "env": "test"}
test_path = Path("/tmp/pycarol_test/test_sa.env")
with open(test_path, "w", encoding="utf-8") as file:
Expand All @@ -97,11 +103,13 @@ def test_token_manager_load_token_file(token_mock) -> None:


def test_token_manager_load_token_cloud() -> None:
"""Test the _load_token_cloud() method of the pycarol.bigquery.TokenManager."""
manager_mock = mock.MagicMock()
test_path = Path("/tmp/pycarol_test/test_sa.env")
test_path.touch()
manager_mock._storage.exists.return_value = True
manager_mock._storage.load.return_value = "/tmp/pycarol_test/test_sa.env"
manager_mock._tmp_filepath = Path("/tmp/pycarol_test/test_sa2.env")
token = pycarol.bigquery.TokenManager._load_token_cloud(manager_mock)
assert token == manager_mock._load_token_file.return_value

Expand All @@ -115,13 +123,15 @@ def test_token_manager_get_forced_token(token_mock) -> None:

@mock.patch("pycarol.bigquery.Token")
def test_token_manager_get_token(token_mock) -> None:
"""Test the get_token() method of the pycarol.bigquery.TokenManager class."""
manager_mock = mock.MagicMock()
token = pycarol.bigquery.TokenManager.get_token(manager_mock)
assert token == manager_mock.get_forced_token.return_value


@mock.patch("pycarol.bigquery.TokenManager")
def test_bq_init(manager_mock) -> None:
"""Test the initialization of the BQ class in the pycarol.bigquery module."""
bq_mock = mock.MagicMock()
carol_mock = mock.MagicMock()
carol_mock.get_current.return_value = {"env_id": "5"}
Expand All @@ -135,13 +145,33 @@ def test_bq_init(manager_mock) -> None:
@mock.patch("pycarol.bigquery.Credentials")
@mock.patch("pycarol.bigquery.bigquery")
def test_bq_generate_client(bigquery_mock, credentials_mock) -> None:
"""Test the _generate_client() method of the pycarol.bigquery.BQ class."""
sa = {"project_id": ""}
client = pycarol.bigquery.BQ._generate_client(sa)
assert client == bigquery_mock.Client.return_value


@mock.patch("pycarol.bigquery.bigquery.QueryJobConfig")
def test_bq_query_pd(query_job_mock) -> None:
"""Test the query() method of the pycarol.bigquery.BQ class."""
import pandas as pd

bq_mock = mock.MagicMock()
query_ret = [
{"col1": "val1", "col2": "val2"},
{"col1": "val1", "col2": "val2"},
]
client_mock = mock.MagicMock()
client_mock.query.return_value = query_ret
bq_mock._generate_client.return_value = client_mock
query = ""
ret = pycarol.bigquery.BQ.query(bq_mock, query, return_dataframe=True)
assert ret.equals(pd.DataFrame(query_ret)) # type: ignore


@mock.patch("pycarol.bigquery.bigquery.QueryJobConfig")
def test_bq_query(query_job_mock) -> None:
"""Test the query() method of the pycarol.bigquery.BQ class."""
bq_mock = mock.MagicMock()
query_ret = [
{"col1": "val1", "col2": "val2"},
Expand All @@ -151,12 +181,13 @@ def test_bq_query(query_job_mock) -> None:
client_mock.query.return_value = query_ret
bq_mock._generate_client.return_value = client_mock
query = ""
ret = pycarol.bigquery.BQ.query(bq_mock, query)
assert ret.equals(pd.DataFrame(query_ret))
ret = pycarol.bigquery.BQ.query(bq_mock, query, return_dataframe=False)
assert ret == query_ret


@mock.patch("pycarol.bigquery.TokenManager")
def test_storage_init(manager_mock) -> None:
"""Test the initialization of the BQStorage class in the pycarol.bigquery module."""
storage_mock = mock.MagicMock()
carol_mock = mock.MagicMock()
carol_mock.get_current.return_value = {"env_id": "5"}
Expand All @@ -170,13 +201,15 @@ def test_storage_init(manager_mock) -> None:
@mock.patch("pycarol.bigquery.Credentials")
@mock.patch("pycarol.bigquery.bigquery_storage")
def test_storage_generate_client(bigquery_mock, credentials_mock) -> None:
"""Test the _generate_client() method of the pycarol.bigquery.BQStorage class."""
sa = {"project_id": ""}
client = pycarol.bigquery.BQStorage._generate_client(sa)
assert client == bigquery_mock.BigQueryReadClient.return_value


@mock.patch("pycarol.bigquery.types")
def test_storage_get_read_session(types_mock) -> None:
"""Test the _get_read_session() method of the pycarol.bigquery.BQStorage class."""
storage_mock = mock.MagicMock()
client_mock = mock.MagicMock()
session = pycarol.bigquery.BQStorage._get_read_session(
Expand All @@ -185,18 +218,25 @@ def test_storage_get_read_session(types_mock) -> None:
assert session == client_mock.create_read_session.return_value


def test_storage_query() -> None:
def test_storage_query_pd() -> None:
"""Test the query() method of the pycarol.bigquery.BQStorage class."""
import pandas as pd

pages = mock.MagicMock()
page1 = mock.MagicMock()
page1.to_dataframe.return_value = pd.DataFrame([
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
])
page1.to_dataframe.return_value = pd.DataFrame(
[
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
]
)
page2 = mock.MagicMock()
page2.to_dataframe.return_value = pd.DataFrame([
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
])
page2.to_dataframe.return_value = pd.DataFrame(
[
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
]
)
pages.pages = [page1, page2]
reader_mock = mock.MagicMock()
reader_mock.rows.return_value = pages
Expand All @@ -208,10 +248,49 @@ def test_storage_query() -> None:
storage_mock._generate_client.return_value = client_mock
ret = pycarol.bigquery.BQStorage.query(storage_mock, "table")

ret_expected = pd.DataFrame([
ret_expected = pd.DataFrame(
[
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
]
)
assert ret_expected.equals(ret)


def test_storage_query() -> None:
"""Test the query() method of the pycarol.bigquery.BQStorage class."""
pages = mock.MagicMock()
page1 = [
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
]
page2 = [
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
])
assert ret_expected.equals(ret)
]
pages.pages = [page1, page2]
reader_mock = mock.MagicMock()
reader_mock.rows.return_value = pages

client_mock = mock.MagicMock()
client_mock.read_rows.return_value = reader_mock

storage_mock = mock.MagicMock()
storage_mock._generate_client.return_value = client_mock
ret = pycarol.bigquery.BQStorage.query(
storage_mock, "table", return_dataframe=False
)

ret_expected = [
[
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
],
[
{"col1": "name1", "col2": "name2"},
{"col1": "name1", "col2": "name2"},
],
]
assert ret_expected == ret

0 comments on commit 7dd403a

Please sign in to comment.