Skip to content

Commit

Permalink
[PLT-1392] [PLT-1350] Remove SDK methods for exports v1 (#1800)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomislav-peharda authored Sep 11, 2024
1 parent cc9d6af commit 0e9f43f
Show file tree
Hide file tree
Showing 14 changed files with 0 additions and 1,210 deletions.
53 changes: 0 additions & 53 deletions libs/labelbox/src/labelbox/schema/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,59 +87,6 @@ def remove_queued_data_rows(self) -> None:
},
experimental=True)

def export_data_rows(self,
timeout_seconds=120,
include_metadata: bool = False) -> Generator:
""" Returns a generator that produces all data rows that are currently
in this batch.
Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear
until the end of the cache period.
Args:
timeout_seconds (float): Max waiting time, in seconds.
include_metadata (bool): True to return related DataRow metadata
Returns:
Generator that yields DataRow objects belonging to this batch.
Raises:
LabelboxError: if the export fails or is unable to download within the specified time.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)

id_param = "batchId"
metadata_param = "includeMetadataInput"
query_str = """mutation GetBatchDataRowsExportUrlPyApi($%s: ID!, $%s: Boolean!)
{exportBatchDataRows(data:{batchId: $%s , includeMetadataInput: $%s}) {downloadUrl createdAt status}}
""" % (id_param, metadata_param, id_param, metadata_param)
sleep_time = 2
while True:
res = self.client.execute(query_str, {
id_param: self.uid,
metadata_param: include_metadata
})
res = res["exportBatchDataRows"]
if res["status"] == "COMPLETE":
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
reader = parser.reader(StringIO(response.text))
return (
Entity.DataRow(self.client, result) for result in reader)
elif res["status"] == "FAILED":
raise LabelboxError("Data row export failed.")

timeout_seconds -= sleep_time
if timeout_seconds <= 0:
raise LabelboxError(
f"Unable to export data rows within {timeout_seconds} seconds."
)

logger.debug("Batch '%s' data row export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def delete(self) -> None:
""" Deletes the given batch.
Expand Down
52 changes: 0 additions & 52 deletions libs/labelbox/src/labelbox/schema/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,58 +337,6 @@ def data_row_for_external_id(self, external_id) -> "DataRow":
external_id)
return data_rows[0]

def export_data_rows(self,
timeout_seconds=120,
include_metadata: bool = False) -> Generator:
""" Returns a generator that produces all data rows that are currently
attached to this dataset.
Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear
until the end of the cache period.
Args:
timeout_seconds (float): Max waiting time, in seconds.
include_metadata (bool): True to return related DataRow metadata
Returns:
Generator that yields DataRow objects belonging to this dataset.
Raises:
LabelboxError: if the export fails or is unable to download within the specified time.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)
id_param = "datasetId"
metadata_param = "includeMetadataInput"
query_str = """mutation GetDatasetDataRowsExportUrlPyApi($%s: ID!, $%s: Boolean!)
{exportDatasetDataRows(data:{datasetId: $%s , includeMetadataInput: $%s}) {downloadUrl createdAt status}}
""" % (id_param, metadata_param, id_param, metadata_param)
sleep_time = 2
while True:
res = self.client.execute(query_str, {
id_param: self.uid,
metadata_param: include_metadata
})
res = res["exportDatasetDataRows"]
if res["status"] == "COMPLETE":
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
reader = parser.reader(StringIO(response.text))
return (
Entity.DataRow(self.client, result) for result in reader)
elif res["status"] == "FAILED":
raise LabelboxError("Data row export failed.")

timeout_seconds -= sleep_time
if timeout_seconds <= 0:
raise LabelboxError(
f"Unable to export data rows within {timeout_seconds} seconds."
)

logger.debug("Dataset '%s' data row export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def export(
self,
task_name: Optional[str] = None,
Expand Down
51 changes: 0 additions & 51 deletions libs/labelbox/src/labelbox/schema/model_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,57 +461,6 @@ def get_config(self) -> Dict[str, Any]:
experimental=True)
return res["modelRun"]["trainingMetadata"]

@experimental
def export_labels(
self,
download: bool = False,
timeout_seconds: int = 600
) -> Optional[Union[str, List[Dict[Any, Any]]]]:
"""
Experimental. To use, make sure client has enable_experimental=True.
Fetches Labels from the ModelRun
Args:
download (bool): Returns the url if False
Returns:
URL of the data file with this ModelRun's labels.
If download=True, this instead returns the contents as NDJSON format.
If the server didn't generate during the `timeout_seconds` period,
None is returned.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)
sleep_time = 2
query_str = """mutation exportModelRunAnnotationsPyApi($modelRunId: ID!) {
exportModelRunAnnotations(data: {modelRunId: $modelRunId}) {
downloadUrl createdAt status
}
}
"""

while True:
url = self.client.execute(
query_str, {'modelRunId': self.uid},
experimental=True)['exportModelRunAnnotations']['downloadUrl']

if url:
if not download:
return url
else:
response = requests.get(url)
response.raise_for_status()
return parser.loads(response.content)

timeout_seconds -= sleep_time
if timeout_seconds <= 0:
return None

logger.debug("ModelRun '%s' label export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def export(self,
task_name: Optional[str] = None,
params: Optional[ModelRunExportParams] = None) -> ExportTask:
Expand Down
152 changes: 0 additions & 152 deletions libs/labelbox/src/labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,157 +313,6 @@ def labels(self, datasets=None, order_by=None) -> PaginatedCollection:
return PaginatedCollection(self.client, query_str, {id_param: self.uid},
["project", "labels"], Label)

def export_queued_data_rows(
self,
timeout_seconds=120,
include_metadata: bool = False) -> List[Dict[str, str]]:
""" Returns all data rows that are currently enqueued for this project.
Args:
timeout_seconds (float): Max waiting time, in seconds.
include_metadata (bool): True to return related DataRow metadata
Returns:
Data row fields for all data rows in the queue as json
Raises:
LabelboxError: if the export fails or is unable to download within the specified time.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)
id_param = "projectId"
metadata_param = "includeMetadataInput"
query_str = """mutation GetQueuedDataRowsExportUrlPyApi($%s: ID!, $%s: Boolean!)
{exportQueuedDataRows(data:{projectId: $%s , includeMetadataInput: $%s}) {downloadUrl createdAt status} }
""" % (id_param, metadata_param, id_param, metadata_param)
sleep_time = 2
start_time = time.time()
while True:
res = self.client.execute(query_str, {
id_param: self.uid,
metadata_param: include_metadata
})
res = res["exportQueuedDataRows"]
if res["status"] == "COMPLETE":
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
return parser.loads(response.text)
elif res["status"] == "FAILED":
raise LabelboxError("Data row export failed.")

current_time = time.time()
if current_time - start_time > timeout_seconds:
raise LabelboxError(
f"Unable to export data rows within {timeout_seconds} seconds."
)

logger.debug(
"Project '%s' queued data row export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def export_labels(self,
download=False,
timeout_seconds=1800,
**kwargs) -> Optional[Union[str, List[Dict[Any, Any]]]]:
""" Calls the server-side Label exporting that generates a JSON
payload, and returns the URL to that payload.
Will only generate a new URL at a max frequency of 30 min.
Args:
download (bool): Returns the url if False
timeout_seconds (float): Max waiting time, in seconds.
start (str): Earliest date for labels, formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
end (str): Latest date for labels, formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
last_activity_start (str): Will include all labels that have had any updates to
data rows, issues, comments, metadata, or reviews since this timestamp.
formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
last_activity_end (str): Will include all labels that do not have any updates to
data rows, issues, comments, metadata, or reviews after this timestamp.
formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
Returns:
URL of the data file with this Project's labels. If the server didn't
generate during the `timeout_seconds` period, None is returned.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)

def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str:
"""Returns a concatenated string of the dictionary's keys and values
The string will be formatted as {key}: 'value' for each key. Value will be inclusive of
quotations while key will not. This can be toggled with `value_with_quotes`"""

quote = "\"" if value_with_quotes else ""
return ",".join([
f"""{c}: {quote}{dictionary.get(c)}{quote}"""
for c in dictionary
if dictionary.get(c)
])

sleep_time = 2
id_param = "projectId"
filter_param = ""
filter_param_dict = {}

if "start" in kwargs or "end" in kwargs:
created_at_dict = {
"start": kwargs.get("start", ""),
"end": kwargs.get("end", "")
}
[validate_datetime(date) for date in created_at_dict.values()]
filter_param_dict["labelCreatedAt"] = "{%s}" % _string_from_dict(
created_at_dict, value_with_quotes=True)

if "last_activity_start" in kwargs or "last_activity_end" in kwargs:
last_activity_start = kwargs.get('last_activity_start')
last_activity_end = kwargs.get('last_activity_end')

if last_activity_start:
validate_datetime(str(last_activity_start))
if last_activity_end:
validate_datetime(str(last_activity_end))

filter_param_dict["lastActivityAt"] = "{%s}" % _string_from_dict(
{
"start": last_activity_start,
"end": last_activity_end
},
value_with_quotes=True)

if filter_param_dict:
filter_param = """, filters: {%s }""" % (_string_from_dict(
filter_param_dict, value_with_quotes=False))

query_str = """mutation GetLabelExportUrlPyApi($%s: ID!)
{exportLabels(data:{projectId: $%s%s}) {downloadUrl createdAt shouldPoll} }
""" % (id_param, id_param, filter_param)

start_time = time.time()

while True:
res = self.client.execute(query_str, {id_param: self.uid})
res = res["exportLabels"]
if not res["shouldPoll"] and res["downloadUrl"] is not None:
url = res['downloadUrl']
if not download:
return url
else:
response = requests.get(url)
response.raise_for_status()
return response.json()

current_time = time.time()
if current_time - start_time > timeout_seconds:
return None

logger.debug("Project '%s' label export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def export(
self,
task_name: Optional[str] = None,
Expand Down Expand Up @@ -1944,4 +1793,3 @@ class LabelingParameterOverride(DbObject):
"consensus average_benchmark_agreement last_activity_time")
LabelerPerformance.__doc__ = (
"Named tuple containing info about a labeler's performance.")

4 changes: 0 additions & 4 deletions libs/labelbox/tests/data/annotation_types/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ def create_data_rows(self, args):
def wait_till_done(self):
pass

def export_data_rows(self):
for export in self.exports:
yield export


def test_generator(list_of_labels):
generator = LabelGenerator([list_of_labels[0]])
Expand Down
19 changes: 0 additions & 19 deletions libs/labelbox/tests/data/export/legacy/test_export_catalog.py

This file was deleted.

Loading

0 comments on commit 0e9f43f

Please sign in to comment.