diff --git a/oda_api/api.py b/oda_api/api.py index 338dcfa7..3f7a340c 100644 --- a/oda_api/api.py +++ b/oda_api/api.py @@ -7,6 +7,7 @@ import pathlib import rdflib +from json.decoder import JSONDecodeError # NOTE gw is optional for now try: @@ -16,7 +17,14 @@ except ModuleNotFoundError: pass -from .data_products import NumpyDataProduct, BinaryData, ApiCatalog, GWContoursDataProduct, PictureProduct +from .data_products import (NumpyDataProduct, + BinaryData, + BinaryProduct, + ApiCatalog, + GWContoursDataProduct, + PictureProduct, + ODAAstropyTable, + TextLikeProduct) from oda_api.token import TokenLocation from builtins import (bytes, str, open, super, range, @@ -1158,9 +1166,9 @@ def __init__(self, data_list, add_meta_to_name=['src_name', 'product'], instrume name = '' if hasattr(data, 'name'): - name = data.name + name = data.name - if name.strip() == '': + if name is None or name.strip() == '': if product is not None: name = '%s' % product elif instrument is not None: @@ -1250,19 +1258,23 @@ def from_response_json(cls, res_json, instrument, product): for d in res_json['products']['numpy_data_product_list']]) if 'binary_data_product_list' in res_json['products'].keys(): - data.extend([BinaryData().decode(d) - for d in res_json['products']['binary_data_product_list']]) + try: + data.extend([BinaryProduct.decode(d) + for d in res_json['products']['binary_data_product_list']]) + except: + data.extend([BinaryData().decode(d) + for d in res_json['products']['binary_data_product_list']]) if 'catalog' in res_json['products'].keys(): data.append(ApiCatalog( res_json['products']['catalog'], name='dispatcher_catalog')) - if 'astropy_table_product_ascii_list' in res_json['products'].keys(): - data.extend([ascii.read(table_text['ascii']) + if 'astropy_table_product_ascii_list' in res_json['products'].keys(): + data.extend([ODAAstropyTable.decode(table_text, use_binary=False) for table_text in res_json['products']['astropy_table_product_ascii_list']]) if 'astropy_table_product_binary_list' in res_json['products'].keys(): - data.extend([ascii.read(table_binary) + data.extend([ODAAstropyTable.decode(table_binary, use_binary=True) for table_binary in res_json['products']['astropy_table_product_binary_list']]) if 'binary_image_product_list' in res_json['products'].keys(): @@ -1270,8 +1282,12 @@ def from_response_json(cls, res_json, instrument, product): for bin_image_data in res_json['products']['binary_image_product_list']]) if 'text_product_list' in res_json['products'].keys(): - data.extend([text_data - for text_data in res_json['products']['text_product_list']]) + try: + data.extend([TextLikeProduct.decode(text_data) + for text_data in res_json['products']['text_product_list']]) + except (JSONDecodeError, KeyError): + data.extend([text_data + for text_data in res_json['products']['text_product_list']]) if 'gw_strain_product_list' in res_json['products'].keys(): data.extend([TimeSeries(strain_data['value'], diff --git a/oda_api/data_products.py b/oda_api/data_products.py index 6c7d4c93..ec434c73 100644 --- a/oda_api/data_products.py +++ b/oda_api/data_products.py @@ -85,10 +85,9 @@ def _chekc_enc_data(data): return _l -# not used? class ODAAstropyTable(object): - def __init__(self,table_object,name='astropy table', meta_data={}): + def __init__(self,table_object,name=None, meta_data={}): self.name=name self.meta_data=meta_data self._table=table_object @@ -119,7 +118,7 @@ def from_file(cls, file_path, name=None, delimiter=None, format=None): if hasattr(table, 'meta'): meta = table.meta - return cls(table, meta_data=meta) + return cls(table, meta_data=meta, name=name) def encode(self,use_binary=False,to_json = False): @@ -179,14 +178,51 @@ def encode(self,file_path=None): _file_b64_md5 = hashlib.md5(_file_binary).hexdigest() return _file_b64,_file_b64_md5 - + def decode(self,encoded_obj): return base64.urlsafe_b64decode(encoded_obj.encode('ascii', 'ignore')) - + + +class BinaryProduct: + # New implementation of binary data product. + # The meaning of the methods is more in-line with the rest of the products + def __init__(self, bin_data, name=None): + self.bin_data = bin_data + if name == 'None': name = None + self.name = name + + def encode(self): + return { + 'name': self.name, + 'data': base64.urlsafe_b64encode(self.bin_data).decode(), + 'md5': hashlib.md5(self.bin_data).hexdigest() + } + + @classmethod + def decode(cls, encoded_obj): + if not isinstance(encoded_obj, dict): + encoded_obj = json.loads(encoded_obj) + + name = encoded_obj['name'] + bin_data = base64.urlsafe_b64decode(encoded_obj['data'].encode('ascii', 'ignore')) + decoded_md5 = hashlib.md5(bin_data).hexdigest() + assert decoded_md5 == encoded_obj['md5'] + + return cls(bin_data, name) + + def write_file(self, file_path): + with open(file_path, 'wb') as fd: + fd.write(self.bin_data) + + @classmethod + def from_file(cls, file_path, name=None): + with open(file_path, 'rb') as fd: + bin_data = fd.read() + return cls(bin_data, name) class NumpyDataUnit(object): - def __init__(self, data, data_header={}, meta_data={}, hdu_type=None, name='table', units_dict=None): + def __init__(self, data, data_header={}, meta_data={}, hdu_type=None, name=None, units_dict=None): self._hdu_type_list_ = ['primary', 'image', 'table', 'bintable'] self.name=name @@ -240,9 +276,9 @@ def _check_dict(self,_kw): @classmethod - def from_fits_hdu(cls,hdu,name=''): + def from_fits_hdu(cls,hdu,name=None): - if name=='': + if name is None or name == '': name=hdu.name return cls(data=hdu.data, @@ -433,7 +469,7 @@ def decode(cls,encoded_obj,use_gzip=False,from_json=False): @classmethod def from_pandas(cls, pandas_dataframe, - name = 'table', + name = None, column_names=[], units_dict={}, meta_data = {}, @@ -455,7 +491,7 @@ def from_pandas(cls, class NumpyDataProduct(object): - def __init__(self, data_unit, name='', meta_data={}): + def __init__(self, data_unit, name=None, meta_data={}): self.name=name @@ -621,7 +657,7 @@ def decode(cls, encoded_obj: typing.Union[str, dict], from_json=False): class ApiCatalog(object): - def __init__(self,cat_dict,name='catalog'): + def __init__(self,cat_dict,name=None): self.name=name _skip_list=['meta_ID'] meta = {} @@ -703,7 +739,7 @@ def from_arrays(cls, errors = None, units_spec = {}, # TODO: not used yet time_format = None, - name = 'lightcurve'): + name = None): data_header = {} meta_data = {} # meta data could be attached to both NumpyDataUnit and NumpyDataProduct. Decide on this @@ -767,9 +803,10 @@ def from_arrays(cls, name = name) class PictureProduct: - def __init__(self, binary_data, metadata={}, file_path=None, write_on_creation = False): + def __init__(self, binary_data, name=None, metadata={}, file_path=None, write_on_creation = False): self.binary_data = binary_data self.metadata = metadata + self.name = name if file_path is not None and os.path.isfile(file_path): self.file_path = file_path logger.info(f'Image file {file_path} already exist. No automatical rewriting.') @@ -784,10 +821,10 @@ def __init__(self, binary_data, metadata={}, file_path=None, write_on_creation = self.img_type = tp @classmethod - def from_file(cls, file_path): + def from_file(cls, file_path, name=None): with open(file_path, 'rb') as fd: binary_data = fd.read() - return cls(binary_data, file_path=file_path) + return cls(binary_data, name=name, file_path=file_path) def write_file(self, file_path): logger.info(f'Creating image file {file_path}.') @@ -801,6 +838,7 @@ def encode(self): output_dict['img_type'] = self.img_type output_dict['b64data'] = b64data.decode() output_dict['metadata'] = self.metadata + output_dict['name'] = self.name if self.file_path: output_dict['filename'] = os.path.basename(self.file_path) return output_dict @@ -815,6 +853,7 @@ def decode(cls, encoded_data, write_on_creation = False): return cls(binary_data, metadata = _encoded_data['metadata'], file_path = _encoded_data.get('filename'), + name = _encoded_data.get('name'), write_on_creation = write_on_creation) def show(self): @@ -826,8 +865,8 @@ def show(self): class ImageDataProduct(NumpyDataProduct): @classmethod - def from_fits_file(cls,filename,ext=None,hdu_name=None,meta_data={},name=''): - npdp = super().from_fits_file(filename,ext=None,hdu_name=None,meta_data={},name='') + def from_fits_file(cls,filename,ext=None,hdu_name=None,meta_data={},name=None): + npdp = super().from_fits_file(filename,ext=ext,hdu_name=hdu_name,meta_data=meta_data,name=name) contains_image = cls.check_contains_image(npdp) if contains_image: @@ -845,4 +884,25 @@ def check_contains_image(numpy_data_prod): except: pass return False - \ No newline at end of file + +class TextLikeProduct: + def __init__(self, value, name=None, meta_data={}): + self.value = value + self.name = name + self.meta_data = meta_data + + def encode(self): + return {'name': self.name, + 'value': self.value, + 'meta_data': self.meta_data} + + @classmethod + def decode(cls, encoded): + if not isinstance(encoded, dict): + encoded = json.loads(encoded) + return cls(name=encoded.get('name'), + value=encoded['value'], + meta_data=encoded.get('meta_data', {})) + + def __repr__(self): + return self.encode().__repr__() diff --git a/oda_api/json.py b/oda_api/json.py index 4719ef9a..b045b96f 100644 --- a/oda_api/json.py +++ b/oda_api/json.py @@ -2,7 +2,7 @@ import logging import numpy as np -from .data_products import NumpyDataProduct, ODAAstropyTable, PictureProduct, BinaryData +from .data_products import NumpyDataProduct, ODAAstropyTable, PictureProduct, BinaryData, BinaryProduct from astropy.io.fits.card import Undefined as astropyUndefined @@ -15,7 +15,7 @@ def default(self, obj): if isinstance(obj, astropyUndefined): return "UNDEFINED" - if isinstance(obj, (NumpyDataProduct, ODAAstropyTable, PictureProduct)): + if isinstance(obj, (NumpyDataProduct, ODAAstropyTable, PictureProduct, BinaryProduct)): return obj.encode() if isinstance(obj, BinaryData): diff --git a/requirements.txt b/requirements.txt index 2b7f5d06..d0339578 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ matplotlib numpy jsonschema astroquery --e git+https://github.com/oda-hub/dispatcher-app.git@list-products-from-given-source#egg=cdci_data_analysis[test] +-e git+https://github.com/oda-hub/dispatcher-app.git#egg=cdci_data_analysis[test] simplejson sentry_sdk rdflib diff --git a/tests/test_data_products.py b/tests/test_data_products.py index f56e172a..2d164ff0 100644 --- a/tests/test_data_products.py +++ b/tests/test_data_products.py @@ -9,8 +9,13 @@ import os import typing from oda_api.json import CustomJSONEncoder +import filecmp -from oda_api.data_products import LightCurveDataProduct, NumpyDataProduct, ODAAstropyTable, PictureProduct +from oda_api.data_products import (LightCurveDataProduct, + NumpyDataProduct, + ODAAstropyTable, + PictureProduct, + BinaryProduct) from astropy import time as atime from astropy import units as u from astropy.table import Table @@ -165,4 +170,15 @@ def test_lightcurve_product_from_arrays(times, values, time_format, expected_uni lc = LightCurveDataProduct.from_arrays(times = times, fluxes = values, errors = errors, time_format=time_format) assert lc.data_unit[1].units_dict in expected_units_dict_variants assert all(lc.data_unit[1].data['TIME'].astype('int') == 59630) + +def test_new_binary_product(): + infile = 'tests/test_data/lc.fits' + bin_prod = BinaryProduct.from_file(infile, name='binprd') + encoded = bin_prod.encode() + assert encoded['name'] == 'binprd' + decoded = BinaryProduct.decode(encoded) + assert decoded.name == 'binprd' + decoded.write_file('decbinprd.foo') + assert filecmp.cmp(infile, 'decbinprd.foo') + os.remove('decbinprd.foo') \ No newline at end of file