diff --git a/astroquery/gaia/core.py b/astroquery/gaia/core.py index 7170f72214..d8f373fcc8 100644 --- a/astroquery/gaia/core.py +++ b/astroquery/gaia/core.py @@ -14,8 +14,11 @@ """ +from warnings import warn + from requests import HTTPError +from astroquery.exceptions import MaxResultsWarning from astroquery.utils.tap import TapPlus from astroquery.utils import commons from astroquery import log @@ -41,7 +44,7 @@ class GaiaClass(TapPlus): MAIN_GAIA_TABLE = None MAIN_GAIA_TABLE_RA = conf.MAIN_GAIA_TABLE_RA MAIN_GAIA_TABLE_DEC = conf.MAIN_GAIA_TABLE_DEC - ROW_LIMIT = conf.ROW_LIMIT + ROW_LIMIT = None VALID_DATALINK_RETRIEVAL_TYPES = conf.VALID_DATALINK_RETRIEVAL_TYPES def __init__(self, tap_plus_conn_handler=None, @@ -356,7 +359,7 @@ def get_datalinks(self, ids, verbose=False): return self.__gaiadata.get_datalinks(ids=ids, verbose=verbose) def __query_object(self, coordinate, radius=None, width=None, height=None, - async_job=False, verbose=False, columns=[]): + async_job=False, verbose=False, columns=[], row_limit=None): """Launches a job TAP & TAP+ @@ -378,6 +381,10 @@ def __query_object(self, coordinate, radius=None, width=None, height=None, flag to display information about the process columns: list, optional, default [] if empty, all columns will be selected + row_limit : int, optional + The maximum number of retrieved rows. Will default to the value + provided by the configuration system if not set explicitly. The + value -1 removes the limit entirely. Returns ------- @@ -395,7 +402,7 @@ def __query_object(self, coordinate, radius=None, width=None, height=None, heightQuantity = self.__getQuantityInput(height, "height") widthDeg = widthQuantity.to(units.deg) heightDeg = heightQuantity.to(units.deg) - + row_limit = row_limit or self.ROW_LIMIT or conf.ROW_LIMIT if columns: columns = ','.join(map(str, columns)) else: @@ -424,7 +431,7 @@ def __query_object(self, coordinate, radius=None, width=None, height=None, ) ORDER BY dist ASC - """.format(**{'row_limit': "TOP {0}".format(self.ROW_LIMIT) if self.ROW_LIMIT > 0 else "", + """.format(**{'row_limit': "TOP {0}".format(row_limit) if row_limit > 0 else "", 'ra_column': self.MAIN_GAIA_TABLE_RA, 'dec_column': self.MAIN_GAIA_TABLE_DEC, 'columns': columns, 'table_name': self.MAIN_GAIA_TABLE or conf.MAIN_GAIA_TABLE, 'ra': ra, 'dec': dec, 'width': widthDeg.value, 'height': heightDeg.value}) @@ -432,10 +439,14 @@ def __query_object(self, coordinate, radius=None, width=None, height=None, job = self.launch_job_async(query, verbose=verbose) else: job = self.launch_job(query, verbose=verbose) - return job.get_results() + table = job.get_results() + if verbose and len(table) == row_limit: + warn(f'The number of rows in the result matches the current row limit of {row_limit}. ' + f'You might wish to specify a different "row_limit" value.', MaxResultsWarning) + return table def query_object(self, coordinate, radius=None, width=None, height=None, - verbose=False, columns=[]): + verbose=False, columns=[], row_limit=None): """Launches a job TAP & TAP+ @@ -453,15 +464,21 @@ def query_object(self, coordinate, radius=None, width=None, height=None, flag to display information about the process columns: list, optional, default [] if empty, all columns will be selected + row_limit : int, optional + The maximum number of retrieved rows. Will default to the value + provided by the configuration system if not set explicitly. The + value -1 removes the limit entirely. Returns ------- The job results (astropy.table). """ - return self.__query_object(coordinate, radius, width, height, async_job=False, verbose=verbose, columns=columns) + return self.__query_object(coordinate, radius, width, height, + async_job=False, verbose=verbose, + columns=columns, row_limit=row_limit) def query_object_async(self, coordinate, radius=None, width=None, - height=None, verbose=False, columns=[]): + height=None, verbose=False, columns=[], row_limit=None): """Launches a job (async) TAP & TAP+ @@ -479,12 +496,17 @@ def query_object_async(self, coordinate, radius=None, width=None, flag to display information about the process columns: list, optional, default [] if empty, all columns will be selected + row_limit : int, optional + The maximum number of retrieved rows. Will default to the value + provided by the configuration system if not set explicitly. The + value -1 removes the limit entirely. Returns ------- The job results (astropy.table). """ - return self.__query_object(coordinate, radius, width, height, async_job=True, verbose=verbose, columns=columns) + return self.__query_object(coordinate, radius, width, height, async_job=True, + verbose=verbose, columns=columns, row_limit=row_limit) def __cone_search(self, coordinate, radius, table_name=None, ra_column_name=MAIN_GAIA_TABLE_RA, @@ -493,7 +515,7 @@ def __cone_search(self, coordinate, radius, table_name=None, background=False, output_file=None, output_format="votable", verbose=False, dump_to_file=False, - columns=[]): + columns=[], row_limit=None): """Cone search sorted by distance TAP & TAP+ @@ -526,6 +548,10 @@ def __cone_search(self, coordinate, radius, table_name=None, if True, the results are saved in a file instead of using memory columns: list, optional, default [] if empty, all columns will be selected + row_limit : int, optional + The maximum number of retrieved rows. Will default to the value + provided by the configuration system if not set explicitly. The + value -1 removes the limit entirely. Returns ------- @@ -542,6 +568,7 @@ def __cone_search(self, coordinate, radius, table_name=None, columns = ','.join(map(str, columns)) else: columns = "*" + row_limit = row_limit or self.ROW_LIMIT or conf.ROW_LIMIT query = """ SELECT @@ -561,23 +588,22 @@ def __cone_search(self, coordinate, radius, table_name=None, ORDER BY dist ASC """.format(**{'ra_column': ra_column_name, - 'row_limit': "TOP {0}".format(self.ROW_LIMIT) if self.ROW_LIMIT > 0 else "", + 'row_limit': "TOP {0}".format(row_limit) if row_limit > 0 else "", 'dec_column': dec_column_name, 'columns': columns, 'ra': ra, 'dec': dec, 'radius': radiusDeg, 'table_name': table_name or self.MAIN_GAIA_TABLE or conf.MAIN_GAIA_TABLE}) if async_job: - return self.launch_job_async(query=query, - output_file=output_file, - output_format=output_format, - verbose=verbose, - dump_to_file=dump_to_file, - background=background) + result = self.launch_job_async(query=query, output_file=output_file, + output_format=output_format, verbose=verbose, + dump_to_file=dump_to_file, background=background) else: - return self.launch_job(query=query, - output_file=output_file, - output_format=output_format, - verbose=verbose, - dump_to_file=dump_to_file) + result = self.launch_job(query=query, output_file=output_file, + output_format=output_format, verbose=verbose, + dump_to_file=dump_to_file) + if verbose and len(result.get_data()) == row_limit: + warn(f'The number of rows in the result matches the current row limit of {row_limit}. ' + f'You might wish to specify a different "row_limit" value.', MaxResultsWarning) + return result def cone_search(self, coordinate, radius=None, table_name=None, @@ -586,7 +612,7 @@ def cone_search(self, coordinate, radius=None, output_file=None, output_format="votable", verbose=False, dump_to_file=False, - columns=[]): + columns=[], row_limit=None): """Cone search sorted by distance (sync.) TAP & TAP+ @@ -613,6 +639,10 @@ def cone_search(self, coordinate, radius=None, if True, the results are saved in a file instead of using memory columns: list, optional, default [] if empty, all columns will be selected + row_limit : int, optional + The maximum number of retrieved rows. Will default to the value + provided by the configuration system if not set explicitly. The + value -1 removes the limit entirely. Returns ------- @@ -628,7 +658,7 @@ def cone_search(self, coordinate, radius=None, output_file=output_file, output_format=output_format, verbose=verbose, - dump_to_file=dump_to_file, columns=columns) + dump_to_file=dump_to_file, columns=columns, row_limit=row_limit) def cone_search_async(self, coordinate, radius=None, table_name=None, @@ -636,7 +666,7 @@ def cone_search_async(self, coordinate, radius=None, dec_column_name=MAIN_GAIA_TABLE_DEC, background=False, output_file=None, output_format="votable", - verbose=False, dump_to_file=False, columns=[]): + verbose=False, dump_to_file=False, columns=[], row_limit=None): """Cone search sorted by distance (async) TAP & TAP+ @@ -665,6 +695,10 @@ def cone_search_async(self, coordinate, radius=None, flag to display information about the process dump_to_file : bool, optional, default 'False' if True, the results are saved in a file instead of using memory + row_limit : int, optional + The maximum number of retrieved rows. Will default to the value + provided by the configuration system if not set explicitly. The + value -1 removes the limit entirely. Returns ------- @@ -680,7 +714,7 @@ def cone_search_async(self, coordinate, radius=None, output_file=output_file, output_format=output_format, verbose=verbose, - dump_to_file=dump_to_file, columns=columns) + dump_to_file=dump_to_file, columns=columns, row_limit=row_limit) def __checkQuantityInput(self, value, msg): if not (isinstance(value, str) or isinstance(value, units.Quantity)): diff --git a/astroquery/gaia/tests/test_gaia_remote.py b/astroquery/gaia/tests/test_gaia_remote.py index 98f34f100f..aa3ad4f0d4 100644 --- a/astroquery/gaia/tests/test_gaia_remote.py +++ b/astroquery/gaia/tests/test_gaia_remote.py @@ -2,6 +2,8 @@ import pytest from astropy.coordinates import SkyCoord +from astroquery.exceptions import MaxResultsWarning +from astroquery.gaia import conf from .. import GaiaClass @@ -13,15 +15,17 @@ def test_query_object_row_limit(): height = u.Quantity(0.1, u.deg) r = Gaia.query_object_async(coordinate=coord, width=width, height=height) - assert len(r) == Gaia.ROW_LIMIT + assert len(r) == conf.ROW_LIMIT Gaia.ROW_LIMIT = 10 - r = Gaia.query_object_async(coordinate=coord, width=width, height=height) + msg = ('The number of rows in the result matches the current row limit of ' + '10. You might wish to specify a different "row_limit" value.') + with pytest.warns(MaxResultsWarning, match=msg): + r = Gaia.query_object_async(coordinate=coord, width=width, height=height, verbose=True) assert len(r) == 10 == Gaia.ROW_LIMIT - Gaia.ROW_LIMIT = -1 - r = Gaia.query_object_async(coordinate=coord, width=width, height=height) + r = Gaia.query_object_async(coordinate=coord, width=width, height=height, row_limit=-1) assert len(r) == 176 @@ -34,16 +38,18 @@ def test_cone_search_row_limit(): j = Gaia.cone_search_async(coord, radius) r = j.get_results() - assert len(r) == Gaia.ROW_LIMIT + assert len(r) == conf.ROW_LIMIT Gaia.ROW_LIMIT = 10 - j = Gaia.cone_search_async(coord, radius) + msg = ('The number of rows in the result matches the current row limit of 10. You might wish ' + 'to specify a different "row_limit" value.') + with pytest.warns(MaxResultsWarning, match=msg): + j = Gaia.cone_search_async(coord, radius, verbose=True) r = j.get_results() assert len(r) == 10 == Gaia.ROW_LIMIT - Gaia.ROW_LIMIT = -1 - j = Gaia.cone_search_async(coord, radius) + j = Gaia.cone_search_async(coord, radius, row_limit=-1) r = j.get_results() assert len(r) == 1188 diff --git a/astroquery/gaia/tests/test_gaiatap.py b/astroquery/gaia/tests/test_gaiatap.py index 77abccb2ee..a6080e2488 100644 --- a/astroquery/gaia/tests/test_gaiatap.py +++ b/astroquery/gaia/tests/test_gaiatap.py @@ -18,6 +18,7 @@ import os import pytest +from astroquery.exceptions import MaxResultsWarning from astroquery.gaia import conf from astroquery.gaia.core import GaiaClass from astroquery.gaia.tests.DummyTapHandler import DummyTapHandler @@ -214,6 +215,12 @@ def test_query_object_async(self): 'table1_oid', None, np.int32) + # No warning without verbose=True + job = tap.query_object_async(sc, radius, row_limit=3) + msg = ('The number of rows in the result matches the current row limit of 3. You might ' + 'wish to specify a different "row_limit" value.') + with pytest.warns(MaxResultsWarning, match=msg): + job = tap.query_object_async(sc, radius, row_limit=3, verbose=True) def test_cone_search_sync(self): connHandler = DummyConnHandler() @@ -360,6 +367,29 @@ def test_cone_search_async(self): # Cleanup. conf.reset('MAIN_GAIA_TABLE') + # Test the default value from conf. + assert 'TOP 50' in job.parameters['query'] + # Test changing the row limit through conf at runtime. + with conf.set_temp('ROW_LIMIT', 42): + job = tap.cone_search_async(sc, radius) + assert 'TOP 42' in job.parameters['query'] + # Changing the value through the class should overrule conf. + tap.ROW_LIMIT = 17 + job = tap.cone_search_async(sc, radius) + assert 'TOP 17' in job.parameters['query'] + # Function argument has highest priority. + job = tap.cone_search_async(sc, radius, row_limit=9) + assert 'TOP 9' in job.parameters['query'] + # No row limit + job = tap.cone_search_async(sc, radius, row_limit=-1) + assert 'TOP' not in job.parameters['query'] + # No warning without verbose=True + job = tap.cone_search_async(sc, radius, row_limit=3) + msg = ('The number of rows in the result matches the current row limit of 3. You might ' + 'wish to specify a different "row_limit" value.') + with pytest.warns(MaxResultsWarning, match=msg): + job = tap.cone_search_async(sc, radius, row_limit=3, verbose=True) + def __check_results_column(self, results, columnName, description, unit, dataType): c = results[columnName] diff --git a/docs/gaia/gaia.rst b/docs/gaia/gaia.rst index 48a81f61e7..6690adc504 100644 --- a/docs/gaia/gaia.rst +++ b/docs/gaia/gaia.rst @@ -125,12 +125,19 @@ degrees around an specific point in RA/Dec coordinates. 0.021615117161838747 1635721458409799680 ... Length = 50 rows -Queries return a limited number of rows controlled by ``Gaia.ROW_LIMIT``. To change the default behaviour set this appropriately. +By default the number of rows returned by a query is limited by the +``astroquery.gaia.conf.ROW_LIMIT`` value. This value can be overruled in a +single function call with the ``row_limit`` argument. Alternatively, if the +class attribute ``Gaia.ROW_LIMIT`` is set then it will take precedence over +``conf.ROW_LIMIT``. If you call the function with ``verbose=True`` then you +will be warned if the length of the query result is equal to the row limit. .. code-block:: python - >>> Gaia.ROW_LIMIT = 8 - >>> r = Gaia.query_object_async(coordinate=coord, width=width, height=height) + >>> from astroquery.gaia import conf + >>> conf.ROW_LIMIT = 8 # Or Gaia.ROW_LIMIT = 8 + >>> r = Gaia.query_object_async(coordinate=coord, width=width, height=height, + verbose=True) >>> r.pprint() dist solution_id ... epoch_photometry_url @@ -144,13 +151,15 @@ Queries return a limited number of rows controlled by ``Gaia.ROW_LIMIT``. To cha 0.006209042666371929 1635721458409799680 ... 0.007469463683838576 1635721458409799680 ... 0.008202004514524316 1635721458409799680 ... + MaxResultsWarning: The number of rows in the result matches the current row + limit of 8. You might wish to specify a different "row_limit" value. -To return an unlimited number of rows set ``Gaia.ROW_LIMIT`` to -1. +To return an unlimited number of rows set the row limit to ``-1``. .. code-block:: python - >>> Gaia.ROW_LIMIT = -1 - >>> r = Gaia.query_object_async(coordinate=coord, width=width, height=height) + >>> r = Gaia.query_object_async(coordinate=coord, width=width, height=height, + ... row_limit=-1) >>> r.pprint() dist solution_id ... epoch_photometry_url @@ -184,7 +193,7 @@ To return an unlimited number of rows set ``Gaia.ROW_LIMIT`` to -1. ~~~~~~~~~~~~~~~~ This query performs a cone search centered at the specified RA/Dec coordinates with the provided -radius argument. +radius argument. The number of rows is limited just like in object queries. .. code-block:: python