Skip to content

Commit

Permalink
added helper functions from the kumo_model_apply_operator
Browse files Browse the repository at this point in the history
  • Loading branch information
makostadima committed Sep 30, 2024
1 parent 25d1a79 commit 5e2f9a1
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 9 deletions.
14 changes: 9 additions & 5 deletions tercen/util/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,16 @@ def export_to_project_as_csv(context, df, fname, projectId, folderId, user, work
def export_obj_pickle_to_project(context, data, fname, \
projectId, folderId, user, compression=1, fileExt="gz",\
inplace=True):

fname = "{}.{}".format(fname, fileExt)


context.log("Exporting {}: Writing temp file".format(fname))
with gzip.open(fname, 'wb', compresslevel=compression) as f:
pickle.dump(data, f)


del data
data = []

file = FileDocument()
file.name = fname.split("/")[-1]
Expand All @@ -64,12 +66,14 @@ def export_obj_pickle_to_project(context, data, fname, \
file.folderId = folderId
file.metadata.contentEncoding = "gzip"

with open(fname, 'rb') as f:
bytes_data = f.read()

context.log("Exporting {}: Uploading".format(fname))
context.client.fileService.uploadFromFile(file, fname)
context.client.fileService.uploadFromFile(file, bytes_data)

if inplace == True:
del data
data = []
return data
else:
return None
return None

110 changes: 106 additions & 4 deletions tercen/util/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@

import pytson as ptson

import uuid, os, hashlib, base64
import uuid, os, hashlib, base64, time
from tercen.model.impl import Table, Column, InMemoryRelation, Relation, \
SimpleRelation, Schema, \
CompositeRelation, JoinOperator, ColumnPair
CompositeRelation, JoinOperator, ColumnPair, Pair, \
RenameRelation, FileDocument, CubeQueryTask

from http.client import IncompleteRead
# import tercen.util.pytmp as ptmp

import time

def dataframe_to_table(df, values_as_list=False) -> Table:

Expand Down Expand Up @@ -469,7 +468,110 @@ def download_to_file(client, fileDoc, fname, maxTries=10, interval=5, isGzip=Fal

# return pickle.loads(data)

def download_filedocs(fileDocs, context, ext="" ):
savedFilePaths = []
baseDir = tempfile.gettempdir() + \
'/' +\
''.join(random.choices(string.ascii_uppercase + string.digits, k=4))

if not os.path.exists(baseDir):
os.mkdir(baseDir)

for fd in fileDocs:
fname = baseDir + '/' + \
''.join(random.choices(string.ascii_uppercase + string.digits, k=8)) + \
ext
resp = context.context.client.fileService.download(fd.id)
#touch
f = open(fname, "wb")
f.close()

with open(fname, "ab") as file:
for chunk in read_in_chunks(resp):
file.write(chunk)

savedFilePaths.append(fname)

return savedFilePaths

def filter_by_type(vec, type):
outVec = []
for o in vec:
if isinstance(o, type):
outVec.append(o)

return outVec

def get_inmemory_relations(relation):
relations = []

if isinstance(relation, CompositeRelation):
rels = get_inmemory_relations(relation.mainRelation)
[relations.append(r) for r in rels]

for jo in relation.joinOperators:
rels = get_inmemory_relations(jo.rightRelation)
[relations.append(r) for r in rels]

elif isinstance(relation, InMemoryRelation):
relations.append(relation)
elif not isinstance(relation, SimpleRelation):
rels = get_inmemory_relations(relation.relation)
[relations.append(r) for r in rels]

return relations

def get_document_id(queryRelation, aliasId, colName):
inMemRels = get_inmemory_relations(queryRelation)

for rel in inMemRels:
tbl = rel.inMemoryTable

documentIds = get(tbl.columns, where([c.name == ".documentId" for c in tbl.columns ]))
# documentAliasIds = get(tbl.columns, where([c.name == "documentId" for c in tbl.columns ]))
documentAliasIds = get(tbl.columns, where([c.name == colName for c in tbl.columns ]))

if not documentIds is None and not documentAliasIds is None:
idx = where([id == aliasId for id in documentAliasIds[0].values ])
if not idx is None and len(idx) > 0:
return documentIds[0].values[idx[0]]

def get_data(context, fileDoc, is_data=True):
maxTries = 10
downloadTry = 1
downloadSuccessful = False

data = None
while(downloadTry < maxTries):
try:
print("Downloading {} [Try {}]".format(fileDoc.name, downloadTry))
resp = context.context.client.fileService.download(fileDoc.id)


with gzip.open(resp, 'rb') as gFile:
data = gFile.read()
downloadSuccessful = True
break
except IncompleteRead:
print("Download failed. Trying again in 5 seconds.")
downloadTry += 1
time.sleep(5)


if not downloadSuccessful or data is None:
raise RuntimeError("tercen.util.helper_functions.get_data: Failed to download or extract {}".format(fileDoc.name))

return pickle.loads(data)

def append_img_to_df(df, imagePath, pltCi=0):
if df is None:
df = utl.image_file_to_df(imagePath)
df.insert(0, ".ri", int(pltCi))
else:
tmpDf = utl.image_file_to_df(imagePath)
tmpDf.insert(0, ".ri", int(pltCi))
df = pd.concat([df, tmpDf])
return df

# def random_string(size=6, chars=string.ascii_uppercase + string.digits):
# return ''.join(random.choice(chars) for _ in range(size))

0 comments on commit 5e2f9a1

Please sign in to comment.