From 5e2f9a1877e224add58478cfb0e724a54a33a928 Mon Sep 17 00:00:00 2001
From: makostadima <kostadima@gmail.com>
Date: Mon, 30 Sep 2024 15:55:46 +0000
Subject: [PATCH] added helper functions from the kumo_model_apply_operator

---
 tercen/util/export.py           |  14 ++--
 tercen/util/helper_functions.py | 110 ++++++++++++++++++++++++++++++--
 2 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/tercen/util/export.py b/tercen/util/export.py
index e5a370f..bfe9543 100644
--- a/tercen/util/export.py
+++ b/tercen/util/export.py
@@ -47,6 +47,7 @@ def export_to_project_as_csv(context, df, fname, projectId, folderId, user, work
 def export_obj_pickle_to_project(context, data, fname, \
         projectId, folderId, user, compression=1, fileExt="gz",\
         inplace=True):
+    
     fname = "{}.{}".format(fname, fileExt)
 
 
@@ -54,7 +55,8 @@ def export_obj_pickle_to_project(context, data, fname, \
     with gzip.open(fname, 'wb', compresslevel=compression) as f:
         pickle.dump(data, f)
     
-
+    del data
+    data = []
 
     file = FileDocument()
     file.name = fname.split("/")[-1]
@@ -64,12 +66,14 @@ def export_obj_pickle_to_project(context, data, fname, \
     file.folderId = folderId
     file.metadata.contentEncoding = "gzip"
 
+    with open(fname, 'rb') as f:
+            bytes_data =  f.read()
+
     context.log("Exporting {}: Uploading".format(fname))
-    context.client.fileService.uploadFromFile(file, fname)
+    context.client.fileService.uploadFromFile(file, bytes_data)
     
     if inplace == True:
-        del data
-        data = []
         return data
     else:
-        return None
\ No newline at end of file
+        return None
+
diff --git a/tercen/util/helper_functions.py b/tercen/util/helper_functions.py
index 763753b..1719d4b 100644
--- a/tercen/util/helper_functions.py
+++ b/tercen/util/helper_functions.py
@@ -9,15 +9,14 @@
 
 import pytson as ptson
 
-import uuid, os, hashlib, base64
+import uuid, os, hashlib, base64, time
 from tercen.model.impl import Table, Column, InMemoryRelation, Relation, \
                         SimpleRelation, Schema, \
-                        CompositeRelation, JoinOperator, ColumnPair
+                        CompositeRelation, JoinOperator, ColumnPair, Pair, \
+                        RenameRelation, FileDocument, CubeQueryTask
 
 from http.client import IncompleteRead
-# import tercen.util.pytmp as ptmp
 
-import time
 
 def dataframe_to_table(df, values_as_list=False) -> Table:
 
@@ -469,7 +468,110 @@ def download_to_file(client, fileDoc, fname, maxTries=10, interval=5, isGzip=Fal
 
     # return pickle.loads(data)
 
+def download_filedocs(fileDocs, context, ext="" ):
+    savedFilePaths = []
+    baseDir = tempfile.gettempdir() + \
+              '/' +\
+              ''.join(random.choices(string.ascii_uppercase + string.digits, k=4))
+    
+    if not os.path.exists(baseDir):
+        os.mkdir(baseDir)
+
+    for fd in fileDocs:
+        fname = baseDir + '/' + \
+                ''.join(random.choices(string.ascii_uppercase + string.digits, k=8)) + \
+                ext
+        resp = context.context.client.fileService.download(fd.id)
+        #touch
+        f = open(fname, "wb")
+        f.close()
+
+        with open(fname, "ab") as file:
+            for chunk in read_in_chunks(resp):
+                file.write(chunk)
+
+        savedFilePaths.append(fname)
+
+    return savedFilePaths
+
+def filter_by_type(vec, type):
+    outVec = []
+    for o in vec:
+        if isinstance(o, type):
+            outVec.append(o)
+
+    return outVec
+
+def get_inmemory_relations(relation):
+    relations = []
+
+    if isinstance(relation, CompositeRelation):
+        rels = get_inmemory_relations(relation.mainRelation)
+        [relations.append(r) for r in rels]
+
+        for jo in relation.joinOperators:
+            rels = get_inmemory_relations(jo.rightRelation)
+            [relations.append(r) for r in rels]
+
+    elif isinstance(relation, InMemoryRelation):
+        relations.append(relation)
+    elif not isinstance(relation, SimpleRelation):
+        rels = get_inmemory_relations(relation.relation)
+        [relations.append(r) for r in rels]
+
+    return relations
+
+def get_document_id(queryRelation, aliasId, colName):
+    inMemRels = get_inmemory_relations(queryRelation)
+
+    for rel in inMemRels:
+        tbl = rel.inMemoryTable
+        
+        documentIds = get(tbl.columns, where([c.name == ".documentId" for c in tbl.columns ]))
+        # documentAliasIds = get(tbl.columns, where([c.name == "documentId" for c in tbl.columns ]))
+        documentAliasIds = get(tbl.columns, where([c.name == colName for c in tbl.columns ]))
+
+        if not documentIds is None and not documentAliasIds is None:
+            idx = where([id == aliasId for id in documentAliasIds[0].values ])
+            if not idx is None and len(idx) > 0:
+                return documentIds[0].values[idx[0]]
 
+def get_data(context, fileDoc, is_data=True):
+    maxTries = 10
+    downloadTry = 1
+    downloadSuccessful = False
+
+    data = None
+    while(downloadTry < maxTries):
+        try:
+            print("Downloading {} [Try {}]".format(fileDoc.name, downloadTry))
+            resp = context.context.client.fileService.download(fileDoc.id)
+            
+            
+            with gzip.open(resp, 'rb') as gFile:
+                data = gFile.read()
+                downloadSuccessful = True
+                break
+        except IncompleteRead:
+            print("Download failed. Trying again in 5 seconds.")
+            downloadTry += 1
+            time.sleep(5)
+
+
+    if not downloadSuccessful or data is None:
+        raise RuntimeError("tercen.util.helper_functions.get_data: Failed to download or extract {}".format(fileDoc.name))
 
+    return pickle.loads(data)
+
+def append_img_to_df(df, imagePath, pltCi=0):
+    if df is None:
+        df = utl.image_file_to_df(imagePath)
+        df.insert(0, ".ri", int(pltCi))
+    else:
+        tmpDf = utl.image_file_to_df(imagePath)
+        tmpDf.insert(0, ".ri", int(pltCi))
+        df = pd.concat([df, tmpDf])
+    return df
+    
 # def random_string(size=6, chars=string.ascii_uppercase + string.digits):
 # return ''.join(random.choice(chars) for _ in range(size))