-
Notifications
You must be signed in to change notification settings - Fork 20
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Generator for paged-queries #75
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from nisystemlink.clients.dataframe import DataFrameClient | ||
from nisystemlink.clients.dataframe.models import QueryTablesRequest | ||
|
||
client = DataFrameClient() | ||
|
||
# List all tables with more than 100k rows | ||
query = QueryTablesRequest( | ||
filter="rowCount > 100000", order_by="CREATED_AT", order_by_descending=True | ||
) | ||
|
||
# Print the query results | ||
for table in client.query_tables_generator(query): | ||
print(f"{table.created_at} \t {table.name} \t Rows={table.row_count}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
"""Implementation of DataFrameClient.""" | ||
|
||
from typing import List, Optional | ||
from typing import Generator, List, Optional | ||
|
||
from nisystemlink.clients import core | ||
from nisystemlink.clients.core._uplink._base_client import BaseClient | ||
|
@@ -15,6 +15,7 @@ | |
from requests.models import Response | ||
from uplink import Body, Field, Path, Query | ||
|
||
|
||
from . import models | ||
|
||
|
||
|
@@ -119,6 +120,31 @@ def query_tables(self, query: models.QueryTablesRequest) -> models.PagedTables: | |
""" | ||
... | ||
|
||
def query_tables_generator( | ||
self, query: models.QueryTablesRequest | ||
) -> Generator[models.TableMetadata, None, None]: | ||
"""Queries available tables on the SystemLink DataFrame service and returns their metadata. | ||
|
||
Args: | ||
query: The request to query tables. `continuation_token` is ignored. | ||
|
||
Yields: | ||
`models.TableMetadata` Table Metadata | ||
""" | ||
_query = query.copy() | ||
_query.continuation_token = None | ||
while True: | ||
paged_tables = self.query_tables(query=_query) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we include exponential backoff on this so that we protect users from running into rate limiting when they use this style for large queries? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implementing the rate limiter at the client class level will automatically cover all methods; hence, additional handling is not required within the generator. |
||
cont_token = paged_tables.continuation_token | ||
|
||
for table in paged_tables.tables: | ||
yield table | ||
|
||
if cont_token is None: | ||
break | ||
else: | ||
_query.continuation_token = cont_token | ||
|
||
@get("tables/{id}") | ||
def get_table_metadata(self, id: str) -> models.TableMetadata: | ||
"""Retrieves the metadata and column information for a single table identified by its ID. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's weird to me that we're using the term
_generator
in the function name here. Shouldn't this just bequery_tables
and returnGenerator
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, this is not the final name, this PR was raised to discuss the aligned way to proceed