From 5e2f9a1877e224add58478cfb0e724a54a33a928 Mon Sep 17 00:00:00 2001 From: makostadima Date: Mon, 30 Sep 2024 15:55:46 +0000 Subject: [PATCH] added helper functions from the kumo_model_apply_operator --- tercen/util/export.py | 14 ++-- tercen/util/helper_functions.py | 110 ++++++++++++++++++++++++++++++-- 2 files changed, 115 insertions(+), 9 deletions(-) diff --git a/tercen/util/export.py b/tercen/util/export.py index e5a370f..bfe9543 100644 --- a/tercen/util/export.py +++ b/tercen/util/export.py @@ -47,6 +47,7 @@ def export_to_project_as_csv(context, df, fname, projectId, folderId, user, work def export_obj_pickle_to_project(context, data, fname, \ projectId, folderId, user, compression=1, fileExt="gz",\ inplace=True): + fname = "{}.{}".format(fname, fileExt) @@ -54,7 +55,8 @@ def export_obj_pickle_to_project(context, data, fname, \ with gzip.open(fname, 'wb', compresslevel=compression) as f: pickle.dump(data, f) - + del data + data = [] file = FileDocument() file.name = fname.split("/")[-1] @@ -64,12 +66,14 @@ def export_obj_pickle_to_project(context, data, fname, \ file.folderId = folderId file.metadata.contentEncoding = "gzip" + with open(fname, 'rb') as f: + bytes_data = f.read() + context.log("Exporting {}: Uploading".format(fname)) - context.client.fileService.uploadFromFile(file, fname) + context.client.fileService.uploadFromFile(file, bytes_data) if inplace == True: - del data - data = [] return data else: - return None \ No newline at end of file + return None + diff --git a/tercen/util/helper_functions.py b/tercen/util/helper_functions.py index 763753b..1719d4b 100644 --- a/tercen/util/helper_functions.py +++ b/tercen/util/helper_functions.py @@ -9,15 +9,14 @@ import pytson as ptson -import uuid, os, hashlib, base64 +import uuid, os, hashlib, base64, time from tercen.model.impl import Table, Column, InMemoryRelation, Relation, \ SimpleRelation, Schema, \ - CompositeRelation, JoinOperator, ColumnPair + CompositeRelation, JoinOperator, ColumnPair, Pair, \ + RenameRelation, FileDocument, CubeQueryTask from http.client import IncompleteRead -# import tercen.util.pytmp as ptmp -import time def dataframe_to_table(df, values_as_list=False) -> Table: @@ -469,7 +468,110 @@ def download_to_file(client, fileDoc, fname, maxTries=10, interval=5, isGzip=Fal # return pickle.loads(data) +def download_filedocs(fileDocs, context, ext="" ): + savedFilePaths = [] + baseDir = tempfile.gettempdir() + \ + '/' +\ + ''.join(random.choices(string.ascii_uppercase + string.digits, k=4)) + + if not os.path.exists(baseDir): + os.mkdir(baseDir) + + for fd in fileDocs: + fname = baseDir + '/' + \ + ''.join(random.choices(string.ascii_uppercase + string.digits, k=8)) + \ + ext + resp = context.context.client.fileService.download(fd.id) + #touch + f = open(fname, "wb") + f.close() + + with open(fname, "ab") as file: + for chunk in read_in_chunks(resp): + file.write(chunk) + + savedFilePaths.append(fname) + + return savedFilePaths + +def filter_by_type(vec, type): + outVec = [] + for o in vec: + if isinstance(o, type): + outVec.append(o) + + return outVec + +def get_inmemory_relations(relation): + relations = [] + + if isinstance(relation, CompositeRelation): + rels = get_inmemory_relations(relation.mainRelation) + [relations.append(r) for r in rels] + + for jo in relation.joinOperators: + rels = get_inmemory_relations(jo.rightRelation) + [relations.append(r) for r in rels] + + elif isinstance(relation, InMemoryRelation): + relations.append(relation) + elif not isinstance(relation, SimpleRelation): + rels = get_inmemory_relations(relation.relation) + [relations.append(r) for r in rels] + + return relations + +def get_document_id(queryRelation, aliasId, colName): + inMemRels = get_inmemory_relations(queryRelation) + + for rel in inMemRels: + tbl = rel.inMemoryTable + + documentIds = get(tbl.columns, where([c.name == ".documentId" for c in tbl.columns ])) + # documentAliasIds = get(tbl.columns, where([c.name == "documentId" for c in tbl.columns ])) + documentAliasIds = get(tbl.columns, where([c.name == colName for c in tbl.columns ])) + + if not documentIds is None and not documentAliasIds is None: + idx = where([id == aliasId for id in documentAliasIds[0].values ]) + if not idx is None and len(idx) > 0: + return documentIds[0].values[idx[0]] +def get_data(context, fileDoc, is_data=True): + maxTries = 10 + downloadTry = 1 + downloadSuccessful = False + + data = None + while(downloadTry < maxTries): + try: + print("Downloading {} [Try {}]".format(fileDoc.name, downloadTry)) + resp = context.context.client.fileService.download(fileDoc.id) + + + with gzip.open(resp, 'rb') as gFile: + data = gFile.read() + downloadSuccessful = True + break + except IncompleteRead: + print("Download failed. Trying again in 5 seconds.") + downloadTry += 1 + time.sleep(5) + + + if not downloadSuccessful or data is None: + raise RuntimeError("tercen.util.helper_functions.get_data: Failed to download or extract {}".format(fileDoc.name)) + return pickle.loads(data) + +def append_img_to_df(df, imagePath, pltCi=0): + if df is None: + df = utl.image_file_to_df(imagePath) + df.insert(0, ".ri", int(pltCi)) + else: + tmpDf = utl.image_file_to_df(imagePath) + tmpDf.insert(0, ".ri", int(pltCi)) + df = pd.concat([df, tmpDf]) + return df + # def random_string(size=6, chars=string.ascii_uppercase + string.digits): # return ''.join(random.choice(chars) for _ in range(size))