From 946370b89662642fa69e35527caf1f8f82e5ead0 Mon Sep 17 00:00:00 2001 From: Johannes Sahlmann Date: Fri, 18 Oct 2024 11:05:41 +0200 Subject: [PATCH 1/5] add test remove unused code and dependencies --- pystrometry/utils/archives.py | 155 ++--------------------- pystrometry/utils/tests/test_archives.py | 22 ++++ 2 files changed, 31 insertions(+), 146 deletions(-) create mode 100644 pystrometry/utils/tests/test_archives.py diff --git a/pystrometry/utils/archives.py b/pystrometry/utils/archives.py index cc36961..2abc6e8 100644 --- a/pystrometry/utils/archives.py +++ b/pystrometry/utils/archives.py @@ -8,15 +8,12 @@ import logging import os from astropy.table import Table -import astropy.units as u -from astroquery.gaia import Gaia, TapPlus -from astropy.time import Time +from astroquery.gaia import Gaia import pandas as pd -def get_gaiadr_data(analysis_dataset_name, data_dir, source_id_array=None, gaia_data_release='dr3int5', - overwrite_query=False, gaia_table_name='gaia_source', shared_user_name=None, - gacs_connection=None): +def get_gaiadr_data(analysis_dataset_name, data_dir, source_id_array=None, gaia_data_release='gaiadr3', + overwrite_query=False, gaia_table_name='gaia_source', shared_user_name=None): """Query a Gaia archive table by source_id. Only data corresponding to source_id_array are returned. Parameters @@ -33,30 +30,22 @@ def get_gaiadr_data(analysis_dataset_name, data_dir, source_id_array=None, gaia_ """ + if os.path.exists(data_dir) is False: + os.makedirs(data_dir) + # retrieve Gaia DR data by submitting list of source_id to GACS output_file = os.path.join(data_dir, '{}_{}_sources.parquet'.format(gaia_data_release, analysis_dataset_name)) if (not os.path.isfile(output_file)) or (overwrite_query): - if 'int' in gaia_data_release: - if gacs_connection is None: - gaia = TapPlus(url="http://geapre.esac.esa.int/tap-server/tap") - else: - gaia = gacs_connection + gaia = Gaia + if shared_user_name is not None: if getattr(gaia, '_TapPlus__isLoggedIn') is False: gaia.login() - if shared_user_name is None: - shared_user_name = gaia_data_release table_name = 'user_{}'.format(shared_user_name) else: - gaia = Gaia - if shared_user_name is not None: - if getattr(gaia, '_TapPlus__isLoggedIn') is False: - gaia.login() - table_name = 'user_{}'.format(shared_user_name) - else: - table_name = '{}'.format(gaia_data_release) + table_name = '{}'.format(gaia_data_release) if source_id_array is not None: input_table_name = '{}_source_id'.format(analysis_dataset_name) @@ -89,129 +78,3 @@ def get_gaiadr_data(analysis_dataset_name, data_dir, source_id_array=None, gaia_ print('Retrieved {} rows from {}.{}'.format(len(df), gaia_data_release, gaia_table_name)) return df - - -def query_dpcg(connection, out_dir, tag='dpcgdata', query=None, reference_time=None, - selected_source_id_string=None, overwrite=False): - - out_file = os.path.join(out_dir, f'dpcg_{tag}.parquet') - - if overwrite or (os.path.isfile(out_file) is False): - assert connection.closed == 0 - - - # reference_time = Time(nss_all['ref_epoch'][0], format='jyear') - ref_epoch_jd = reference_time.jd - # selected_source_id_string = ','.join(selected_source_id_array.astype(str)) - - - - if query is None: - query = f""" - select - sourceid as source_id, - -- DR3 position reference epoch: 2016-01-01T12:00:00.000000000 (TCB) = JD 2457389.0 (update at 3 placed below when changed) - ((t).obstime - {ref_epoch_jd})/365.25 as t_min_t0_yr, - cos( (t).scanposangle) as cpsi_obs, - sin( (t).scanposangle) as spsi_obs, - (t).varpifactoral as ppfact_obs, - ((t).obstime - {ref_epoch_jd})/365.25*cos( (t).scanposangle) as tcpsi_obs, - ((t).obstime - {ref_epoch_jd})/365.25*sin( (t).scanposangle) as tspsi_obs, - (t).centroidposal as da_mas_obs, - (t).centroidposerroral as errda_mas_obs, - -- remove last 4 bits as these were added by CU4 to encode ccd number - (t).transitid/16 as transitid, - -- convert last 4 bits of CU4 transitid into number - ( (t).transitid - (t).transitid/16*16 ) as ccdnumber, - array_length(transits,1) as num_obs_for_src - - -- from mdb_gaia_starobject_088 - -- join lateral unnest(transits) t on true - from mdb_gaia_starobject_088 so - join dr3_ops_cs36_mv.dgdreq58_rejected_cu4transitids_astro transrej using (sourceid) - join lateral unnest(filter_transits(transits,rejected_cu4transitids_astro)) as t on true - -- provide source id list (comma separated) - where sourceid in ({selected_source_id_string}) - """ - - dpcg_df = pd.read_sql(query, connection) - dpcg_df.to_parquet(out_file) - logging.info(f'Wrote {len(dpcg_df)} rows to {out_file}') - else: - dpcg_df = pd.read_parquet(out_file) - logging.info(f'Read {len(dpcg_df)} rows from {out_file}') - return dpcg_df - - -def query_dpcg_epochastrometry(connection, out_dir, tag='dpcgdata', query=None, reference_time=None, - selected_source_id_string=None, overwrite=False): - """This query already applies the CU4 DU432 pre-processing filter. - - :param connection: - :param out_dir: - :param tag: - :param query: - :param reference_time: - :param selected_source_id_string: - :param overwrite: - :return: - """ - - out_file = os.path.join(out_dir, f'dpcg_{tag}.parquet') - - if overwrite or (os.path.isfile(out_file) is False): - assert connection.closed == 0 - - # reference_time = Time(nss_all['ref_epoch'][0], format='jyear') - ref_epoch_jd = reference_time.jd - # selected_source_id_string = ','.join(selected_source_id_array.astype(str)) - - tcb_ref_epoch_jd = Time(2010.0, format='jyear', scale='tcb').jd - - if query is None: - query = f""" - select - sourceid as source_id, - -- DR3 position reference epoch: 2016-01-01T12:00:00.000000000 (TCB) = JD 2457389.0 (update at 3 placed below when changed) - ((t).obstime - {ref_epoch_jd})/365.25 as t_min_t0_yr, - ((t).obstime - {tcb_ref_epoch_jd})*{u.day.to(u.nanosecond)} as obsTimeTcb, - ((t).scanposangle) as scanPosAngle, - cos( (t).scanposangle) as cpsi_obs, - sin( (t).scanposangle) as spsi_obs, - (t).varpifactoral as parallaxFactorAl, - ((t).obstime - {ref_epoch_jd})/365.25*cos( (t).scanposangle) as tcpsi_obs, - ((t).obstime - {ref_epoch_jd})/365.25*sin( (t).scanposangle) as tspsi_obs, - (t).centroidposal as centroidPosAl, - (t).centroidposerroral as centroidPosErrorAl, - -- remove last 4 bits as these were added by CU4 to encode ccd number - (t).transitid/16 as transitid, - -- convert last 4 bits of CU4 transitid into number - ( (t).transitid - (t).transitid/16*16 ) as ccdnumber, - (astromsource).alpha as ra0, - (astromsource).delta as dec0, - array_length(transits,1) as num_obs_for_src - - -- from mdb_gaia_starobject_088 - -- join lateral unnest(transits) t on true - from mdb_gaia_starobject_088 so - join dr3_ops_cs36_mv.dgdreq58_rejected_cu4transitids_astro transrej using (sourceid) - join lateral unnest(filter_transits(transits,rejected_cu4transitids_astro)) as t on true - """ - - if selected_source_id_string != None: - query += f"""-- provide source id list (comma separated) - where sourceid in ({selected_source_id_string})""" - else: - query += """ where sourceid != 0""" - - dpcg_df = pd.read_sql(query, connection) - dpcg_df.to_parquet(out_file) - logging.info(f'Wrote {len(dpcg_df)} rows to {out_file}') - else: - dpcg_df = pd.read_parquet(out_file) - logging.info(f'Read {len(dpcg_df)} rows from {out_file}') - return dpcg_df - - - - diff --git a/pystrometry/utils/tests/test_archives.py b/pystrometry/utils/tests/test_archives.py new file mode 100644 index 0000000..cf1be97 --- /dev/null +++ b/pystrometry/utils/tests/test_archives.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Tests for the utils.archives module. + +Authors +------- + Johannes Sahlmann + +""" +import os +import numpy as np + +from ..archives import get_gaiadr_data + +def test_basic_query(): + analysis_dataset_name = 'testing' + data_dir = os.path.join(os.getcwd(), 'tmp') + source_id_array = np.array([3482973840016015744]) + gaia_data_release = 'gaiadr3' + gaia_table_name = 'gaia_source' + + df = get_gaiadr_data(analysis_dataset_name, data_dir, source_id_array, gaia_data_release, gaia_table_name) + assert len(df) == 1 From 7c6638d0539d0e9206962c3ff5ca0a764e890d0e Mon Sep 17 00:00:00 2001 From: Johannes Sahlmann Date: Fri, 18 Oct 2024 11:15:08 +0200 Subject: [PATCH 2/5] bump version run utils tests --- .github/workflows/python-package-conda.yml | 1 + setup.cfg | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index bbda8ee..23d37b3 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -60,6 +60,7 @@ jobs: conda config --set solver classic conda install pytest pytest pystrometry/tests + pytest pystrometry/utils/tests build-linux-python-3p9: runs-on: ubuntu-latest diff --git a/setup.cfg b/setup.cfg index 02363a1..ffbd8ab 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,7 +43,7 @@ github_project = https://github.com/Johannes-Sahlmann/pystrometry install_requires = astropy, linearfit>=1.0.2, matplotlib, scipy, astroquery, sympy # version should be PEP440 compatible (https://www.python.org/dev/peps/pep-0440/) -version = 0.6.0 +version = 0.6.1 # Note: you will also need to change this in your package's __init__.py minimum_python_version = 3.7 From 20c4da6ad5562c81ebbc2d40b7416e7db233f994 Mon Sep 17 00:00:00 2001 From: Johannes Sahlmann Date: Fri, 18 Oct 2024 11:21:21 +0200 Subject: [PATCH 3/5] add pyarrow --- environment-3.10-pinned.yml | 1 + environment-3.10.yml | 1 + environment-3.11.yml | 1 + environment-3.9-pinned.yml | 1 + environment-3.9.yml | 1 + 5 files changed, 5 insertions(+) diff --git a/environment-3.10-pinned.yml b/environment-3.10-pinned.yml index 0b71367..77962ec 100644 --- a/environment-3.10-pinned.yml +++ b/environment-3.10-pinned.yml @@ -7,6 +7,7 @@ dependencies: - pip == 23.1.2 - kepmodel == 1.0.6 - pandas == 2.0.2 + - pyarrow - matplotlib == 3.7.1 - astroquery == 0.4.6 - astropy == 5.0.6 diff --git a/environment-3.10.yml b/environment-3.10.yml index 039af28..37584bf 100644 --- a/environment-3.10.yml +++ b/environment-3.10.yml @@ -7,6 +7,7 @@ dependencies: - pip - kepmodel - pandas + - pyarrow - matplotlib - astroquery - astropy diff --git a/environment-3.11.yml b/environment-3.11.yml index 29ee0b7..24543d2 100644 --- a/environment-3.11.yml +++ b/environment-3.11.yml @@ -7,6 +7,7 @@ dependencies: - pip - kepmodel - pandas + - pyarrow - matplotlib - astroquery - astropy diff --git a/environment-3.9-pinned.yml b/environment-3.9-pinned.yml index d256a1a..c7a0f96 100644 --- a/environment-3.9-pinned.yml +++ b/environment-3.9-pinned.yml @@ -7,6 +7,7 @@ dependencies: - pip == 23.1.2 - kepmodel == 1.0.6 - pandas == 2.0.2 + - pyarrow - matplotlib == 3.7.1 - astroquery == 0.4.6 - astropy == 5.0.6 diff --git a/environment-3.9.yml b/environment-3.9.yml index 69bbeff..9cbe4cd 100644 --- a/environment-3.9.yml +++ b/environment-3.9.yml @@ -7,6 +7,7 @@ dependencies: - pip - kepmodel - pandas + - pyarrow - matplotlib - astroquery - astropy From d63c0d6279bc182b57105232056b9d56bfff7349 Mon Sep 17 00:00:00 2001 From: Johannes Sahlmann Date: Fri, 18 Oct 2024 11:21:57 +0200 Subject: [PATCH 4/5] run tests --- .github/workflows/python-package-conda.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 23d37b3..5d0df30 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -31,6 +31,7 @@ jobs: conda config --set solver classic conda install pytest pytest pystrometry/tests + pytest pystrometry/utils/tests build-linux-python-3p10-pinned: runs-on: ubuntu-latest @@ -89,4 +90,5 @@ jobs: run: | conda install pytest pytest pystrometry/tests + pytest pystrometry/utils/tests From 50b72560666d93fc60454c8a44d04e13e6db81dd Mon Sep 17 00:00:00 2001 From: Johannes Sahlmann Date: Fri, 18 Oct 2024 11:29:18 +0200 Subject: [PATCH 5/5] add dependency --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ffbd8ab..59fa309 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,7 +40,7 @@ github_project = https://github.com/Johannes-Sahlmann/pystrometry # install_requires should be formatted as a comma-separated list, e.g.: # install_requires = astropy, scipy, matplotlib -install_requires = astropy, linearfit>=1.0.2, matplotlib, scipy, astroquery, sympy +install_requires = astropy, linearfit>=1.0.2, matplotlib, scipy, astroquery, sympy, pandas, pyarrow # version should be PEP440 compatible (https://www.python.org/dev/peps/pep-0440/) version = 0.6.1