From d070bdf0bf71900b4403afac41e620d403fc0cf2 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 13 Aug 2023 16:37:01 -0400 Subject: [PATCH 01/12] refactor wfau to retrieve metadata about images and reject deprecated ones --- astroquery/ukidss/tests/test_ukidss_remote.py | 11 +++ astroquery/wfau/core.py | 90 ++++++++++++------- 2 files changed, 69 insertions(+), 32 deletions(-) diff --git a/astroquery/ukidss/tests/test_ukidss_remote.py b/astroquery/ukidss/tests/test_ukidss_remote.py index 42966fc650..7a49e2d48a 100644 --- a/astroquery/ukidss/tests/test_ukidss_remote.py +++ b/astroquery/ukidss/tests/test_ukidss_remote.py @@ -58,3 +58,14 @@ def test_query_region_constraints(self): assert isinstance(table_constraint, Table) assert len(table_noconstraint) >= len(table_constraint) + + def test_deprecated_image_list(self): + """ + Regression test for Issue 2808 + """ + crd = SkyCoord(ra=211.3194905, dec=54.413845, unit=(u.deg,u.deg)) + ukidss = ukidss.core.UkidssClass() + ukidss.database = 'UHSDR2' + result = ukidss.get_image_list(crd, waveband='all', ignore_deprecated=True) + assert "http://wsa.roe.ac.uk/cgi-bin/getImage.cgi?file=/disk73/wsa/ingest/fits/20190614_v5/w20190614_00626_st.fit&mfid=11076607&extNo=4&lx=1276&hx=1426&ly=187&hy=337&rf=0&flip=1&uniq=834_579_14_86394_6&xpos=75.9&ypos=75.7&band=K&ra=211.3194905&dec=54.413845" in result + assert "http://wsa.roe.ac.uk/cgi-bin/getImage.cgi?file=/disk53/wsa/ingest/fits/20150129_v5/w20150129_02901_st.fit&mfid=8278383&extNo=4&lx=1274&hx=1425&ly=195&hy=345&rf=0&flip=1&uniq=834_579_14_86394_5&xpos=76.6&ypos=75.9&band=J&ra=211.3194905&dec=54.413845" not in result diff --git a/astroquery/wfau/core.py b/astroquery/wfau/core.py index 7df464b142..3a7b0e4cec 100644 --- a/astroquery/wfau/core.py +++ b/astroquery/wfau/core.py @@ -12,6 +12,7 @@ import astropy.units as u import astropy.coordinates as coord import astropy.io.votable as votable +from astropy.io import ascii from ..query import QueryWithLogin from ..exceptions import InvalidQueryError, TimeoutError, NoResultsWarning @@ -290,10 +291,46 @@ def get_images_async(self, coordinates, *, waveband='all', frame_type='stack', show_progress=show_progress) for url in image_urls] - def get_image_list(self, coordinates, *, waveband='all', frame_type='stack', - image_width=1 * u.arcmin, image_height=None, - radius=None, database=None, - programme_id=None, get_query_payload=False): + + def get_image_list(self, coordinates, *, radius=None, ignore_deprecated=True, **kwargs): + """ + See `get_image_table` for a full list of options. + + This method will return _only_ the URLs requested as a list of URLs. + + Parameters + ---------- + ignore_deprecated : bool + If set (default: True), only images with the ``deprecated`` flag + set to zero will be included + + Returns + ------- + url_list : list of image urls + + """ + image_table = self.get_image_table(coordinates, radius=radius, **kwargs) + + if ignore_deprecated: + image_urls = image_table[image_table['deprecated'] == 0]['Link'] + else: + image_urls = image_table['Link'] + + # different links for radius queries and simple ones + if radius is not None: + image_urls = [link for link in image_urls if + ('fits_download' in link and '_cat.fits' + not in link and '_two.fit' not in link)] + else: + image_urls = [link.replace("getImage", "getFImage") + for link in image_urls] + + return image_urls + + def get_image_table(self, coordinates, *, waveband='all', frame_type='stack', + image_width=1 * u.arcmin, image_height=None, + radius=None, database=None, + programme_id=None, get_query_payload=False): """ Function that returns a list of urls from which to download the FITS images. @@ -337,7 +374,9 @@ def get_image_list(self, coordinates, *, waveband='all', frame_type='stack', Returns ------- - url_list : list of image urls + table : Table + An astropy table containing the metadata table, including URLs, of + the requested files. """ @@ -399,39 +438,26 @@ def get_image_list(self, coordinates, *, waveband='all', frame_type='stack', return request_payload response = self._wfau_send_request(query_url, request_payload) + self._penultimate_response = response response = self._check_page(response.url, "row") + self._last_response = response - image_urls = self.extract_urls(response.text) - # different links for radius queries and simple ones - if radius is not None: - image_urls = [link for link in image_urls if - ('fits_download' in link and '_cat.fits' - not in link and '_two.fit' not in link)] - else: - image_urls = [link.replace("getImage", "getFImage") - for link in image_urls] - - return image_urls + return self.parse_imagequery_page(response.text) - def extract_urls(self, html_in): + def parse_imagequery_page(self, html_in): """ - Helper function that uses regexps to extract the image urls from the - given HTML. - - Parameters - ---------- - html_in : str - source from which the urls are to be extracted. - - Returns - ------- - links : list - The list of URLS extracted from the input. + Parse the image metadata page """ - # Parse html input for links ahref = re.compile(r'href="([a-zA-Z0-9_\.&\?=%/:-]+)"') - links = ahref.findall(html_in) - return links + + html = "\n".join([ + # for ascii.read: th -> header + row.replace("td", "th") if row.startswith("show<", ">{}<".format(ahref.search(row).groups()[0])) if ">show<" in row else + row + for row in html_in.split("\n")]) + return ascii.read(html, format='html') def query_region(self, coordinates, *, radius=1 * u.arcmin, programme_id=None, database=None, From 2fa50506c89e101ad57cc311743e70334960c6b4 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 13 Aug 2023 16:40:43 -0400 Subject: [PATCH 02/12] codestyle --- astroquery/ukidss/tests/test_ukidss_remote.py | 15 +++++++++------ astroquery/wfau/core.py | 1 - 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/astroquery/ukidss/tests/test_ukidss_remote.py b/astroquery/ukidss/tests/test_ukidss_remote.py index 7a49e2d48a..cb88dac017 100644 --- a/astroquery/ukidss/tests/test_ukidss_remote.py +++ b/astroquery/ukidss/tests/test_ukidss_remote.py @@ -63,9 +63,12 @@ def test_deprecated_image_list(self): """ Regression test for Issue 2808 """ - crd = SkyCoord(ra=211.3194905, dec=54.413845, unit=(u.deg,u.deg)) - ukidss = ukidss.core.UkidssClass() - ukidss.database = 'UHSDR2' - result = ukidss.get_image_list(crd, waveband='all', ignore_deprecated=True) - assert "http://wsa.roe.ac.uk/cgi-bin/getImage.cgi?file=/disk73/wsa/ingest/fits/20190614_v5/w20190614_00626_st.fit&mfid=11076607&extNo=4&lx=1276&hx=1426&ly=187&hy=337&rf=0&flip=1&uniq=834_579_14_86394_6&xpos=75.9&ypos=75.7&band=K&ra=211.3194905&dec=54.413845" in result - assert "http://wsa.roe.ac.uk/cgi-bin/getImage.cgi?file=/disk53/wsa/ingest/fits/20150129_v5/w20150129_02901_st.fit&mfid=8278383&extNo=4&lx=1274&hx=1425&ly=195&hy=345&rf=0&flip=1&uniq=834_579_14_86394_5&xpos=76.6&ypos=75.9&band=J&ra=211.3194905&dec=54.413845" not in result + crd = SkyCoord(ra=211.3194905, dec=54.413845, unit=(u.deg, u.deg)) + uk = ukidss.core.UkidssClass() + uk.database = 'UHSDR2' + result = uk.get_image_list(crd, waveband='all', ignore_deprecated=True) + + # this image is not deprecated (deprecated==0) + assert "http://wsa.roe.ac.uk/cgi-bin/getImage.cgi?file=/disk73/wsa/ingest/fits/20190614_v5/w20190614_00626_st.fit&mfid=11076607&extNo=4&lx=1276&hx=1426&ly=187&hy=337&rf=0&flip=1&uniq=834_579_14_86394_6&xpos=75.9&ypos=75.7&band=K&ra=211.3194905&dec=54.413845" in result # noqa: E501 + # this image is deprecated (deprecated==80) + assert "http://wsa.roe.ac.uk/cgi-bin/getImage.cgi?file=/disk53/wsa/ingest/fits/20150129_v5/w20150129_02901_st.fit&mfid=8278383&extNo=4&lx=1274&hx=1425&ly=195&hy=345&rf=0&flip=1&uniq=834_579_14_86394_5&xpos=76.6&ypos=75.9&band=J&ra=211.3194905&dec=54.413845" not in result # noqa: E501 diff --git a/astroquery/wfau/core.py b/astroquery/wfau/core.py index 3a7b0e4cec..731865a936 100644 --- a/astroquery/wfau/core.py +++ b/astroquery/wfau/core.py @@ -291,7 +291,6 @@ def get_images_async(self, coordinates, *, waveband='all', frame_type='stack', show_progress=show_progress) for url in image_urls] - def get_image_list(self, coordinates, *, radius=None, ignore_deprecated=True, **kwargs): """ See `get_image_table` for a full list of options. From 6d16328cb02b1151c7f6afc9245042bb24c1e287 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 13 Aug 2023 16:49:36 -0400 Subject: [PATCH 03/12] WIP: trying to fix radius=not None queries --- astroquery/wfau/core.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/astroquery/wfau/core.py b/astroquery/wfau/core.py index 731865a936..03b392aea1 100644 --- a/astroquery/wfau/core.py +++ b/astroquery/wfau/core.py @@ -310,7 +310,7 @@ def get_image_list(self, coordinates, *, radius=None, ignore_deprecated=True, ** """ image_table = self.get_image_table(coordinates, radius=radius, **kwargs) - if ignore_deprecated: + if ignore_deprecated and radius is None: image_urls = image_table[image_table['deprecated'] == 0]['Link'] else: image_urls = image_table['Link'] @@ -441,9 +441,9 @@ def get_image_table(self, coordinates, *, waveband='all', frame_type='stack', response = self._check_page(response.url, "row") self._last_response = response - return self.parse_imagequery_page(response.text) + return self.parse_imagequery_page(response.text, radius=radius) - def parse_imagequery_page(self, html_in): + def parse_imagequery_page(self, html_in, radius=None): """ Parse the image metadata page """ @@ -454,9 +454,14 @@ def parse_imagequery_page(self, html_in): row.replace("td", "th") if row.startswith("
show<", ">{}<".format(ahref.search(row).groups()[0])) if ">show<" in row else + # for radius searches, "FITS" needs to be s/FITS/url/ + row.replace(">FITS<", ">{}<".format(ahref.search(row).groups()[0])) if ">FITS<" in row else row for row in html_in.split("\n")]) - return ascii.read(html, format='html') + if radius is None: + return ascii.read(html, format='html') + else: + return ascii.read(html, format='html', htmldict={'table_id': 3}) def query_region(self, coordinates, *, radius=1 * u.arcmin, programme_id=None, database=None, From bfd412eecf0d158bfbca110a36da633861bb62cd Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 13 Aug 2023 17:24:39 -0400 Subject: [PATCH 04/12] update reader for radius!=None searches, and correspondingly update the image request test data --- .../ukidss/tests/data/image_results.html | 169 +++++++++++++++--- astroquery/wfau/core.py | 42 +++-- 2 files changed, 176 insertions(+), 35 deletions(-) diff --git a/astroquery/ukidss/tests/data/image_results.html b/astroquery/ukidss/tests/data/image_results.html index 138a48c8eb..fc34523455 100644 --- a/astroquery/ukidss/tests/data/image_results.html +++ b/astroquery/ukidss/tests/data/image_results.html @@ -1,22 +1,147 @@ -  -

GetImage cut-out results

-
J2000 coords: RA: 83.6330757 Dec:22.014436 -
Programme: All UKIDSS surveys -
Filter: all -
Processing ... -
Connecting to database: UKIDSSDR7PLUS

-

- - - - - - - - - -
LinkmultiframeIDframetypeobstypefilteridshortnamedateObsextNum
show1737581leavstackOBJECT5K2007-10-11 13:12:05.55
-1 rows returned. + + + + + + + +WSA ImageList + + + + + + +
WSA ImageList   

+Not logged in: links will only be returned for frames that are publicly accessible

+Archive Listing

Searching...
+Survey: UKIDSS Galactic Clusters Survey, GCS
+Waveband: K
+Minimum RA: 5.551333333333333 hours Maximum RA: 5.599333333333333 hours
+Minimum Dec: 21.68116666666667 degrees Maximum Dec: 22.347833333333334 degrees
+ +
Using database: UKIDSSDR11PLUS +

+ + +
View column linkshows jpeg images of multiframe in a new window plus links to download file(s)
Img column linkdownload the RICE compressed FITS image file. Use View column link to retrieve uncompressed images.
Cat column linkdownload the FITS catalogue file.
+
begin row 1
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ViewImgCatmultiframeIDframeTypeobstyperaBasedecBaseshortnameexptimedateObsprojectnumDetectorsukirtRunNo
viewFITS 1737551leavOBJECT+5.5777306+21.7913333K+10.0000002007-10-11 13:08:30.0U/UKIDSS/GCS2141802
viewFITSFITS1737553leavstackOBJECT+5.5777306+21.7913333K+10.0000002007-10-11 13:08:30.0U/UKIDSS/GCS2141802
viewFITS 1737559leavOBJECT+5.5777306+21.7913333K+10.0000002007-10-11 13:09:18.6U/UKIDSS/GCS2141806
viewFITS 1737579leavOBJECT+5.5935528+21.7913333K+10.0000002007-10-11 13:12:05.5U/UKIDSS/GCS2141818
viewFITSFITS1737581leavstackOBJECT+5.5935528+21.7913333K+10.0000002007-10-11 13:12:05.5U/UKIDSS/GCS2141818
viewFITS 1737587leavOBJECT+5.5935528+21.7913333K+10.0000002007-10-11 13:12:53.8U/UKIDSS/GCS2141822

row(s) 1 to 6 displayed.
+

Back to form (uses Javascript)
+ 
diff --git a/astroquery/wfau/core.py b/astroquery/wfau/core.py
index 03b392aea1..d0a971311c 100644
--- a/astroquery/wfau/core.py
+++ b/astroquery/wfau/core.py
@@ -6,7 +6,7 @@
 import time
 from math import cos, radians
 import requests
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
 from io import BytesIO, StringIO
 
 import astropy.units as u
@@ -312,6 +312,8 @@ def get_image_list(self, coordinates, *, radius=None, ignore_deprecated=True, **
 
         if ignore_deprecated and radius is None:
             image_urls = image_table[image_table['deprecated'] == 0]['Link']
+        elif radius is not None:
+            image_urls = image_table['Img']
         else:
             image_urls = image_table['Link']
 
@@ -321,6 +323,8 @@ def get_image_list(self, coordinates, *, radius=None, ignore_deprecated=True, **
                           ('fits_download' in link and '_cat.fits'
                            not in link and '_two.fit' not in link)]
         else:
+            # Not sure this is necessary any more (as of #2809), but it seems
+            # harmless and I'm not removing it until I'm sure
             image_urls = [link.replace("getImage", "getFImage")
                           for link in image_urls]
 
@@ -449,19 +453,31 @@ def parse_imagequery_page(self, html_in, radius=None):
         """
         ahref = re.compile(r'href="([a-zA-Z0-9_\.&\?=%/:-]+)"')
 
-        html = "\n".join([
-            # for ascii.read: th -> header
-            row.replace("td", "th") if row.startswith("show<", ">{}<".format(ahref.search(row).groups()[0])) if ">show<" in row else
-            # for radius searches, "FITS" needs to be s/FITS/url/
-            row.replace(">FITS<", ">{}<".format(ahref.search(row).groups()[0])) if ">FITS<" in row else
-            row
-            for row in html_in.split("\n")])
-        if radius is None:
-            return ascii.read(html, format='html')
+        if radius is not None:
+            html = "\n".join([
+                # for radius searches, "FITS" needs to be s/FITS/url/
+                row.replace(">FITS<", ">{}<".format(ahref.search(row).groups()[0])) if ">FITS<" in row else
+                row
+                for row in html_in.split("\n")])
+            with warnings.catch_warnings():
+                # this is really html; the xml parser doesn't work
+                warnings.simplefilter(action="ignore", category=XMLParsedAsHTMLWarning)
+                soup = BeautifulSoup(html, features='html5')
+            httb = soup.findAll('table')[2]
+            firstrow = httb.findAll('tr')[0]
+            for td in firstrow.findAll('td'):
+                td.name = 'th'
+            return ascii.read(str(httb), format='html')
+
         else:
-            return ascii.read(html, format='html', htmldict={'table_id': 3})
+            html = "\n".join([
+                # for ascii.read: th -> header
+                row.replace("td", "th") if row.startswith("
show<", ">{}<".format(ahref.search(row).groups()[0])) if ">show<" in row else + row + for row in html_in.split("\n")]) + return ascii.read(html, format='html') def query_region(self, coordinates, *, radius=1 * u.arcmin, programme_id=None, database=None, From e3e4905eb514b61d72ab0cbe056488fb81a5193a Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Mon, 14 Aug 2023 21:27:10 -0400 Subject: [PATCH 05/12] restore extract_urls - used by other things --- astroquery/wfau/core.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/astroquery/wfau/core.py b/astroquery/wfau/core.py index d0a971311c..f558aeca3b 100644 --- a/astroquery/wfau/core.py +++ b/astroquery/wfau/core.py @@ -479,6 +479,26 @@ def parse_imagequery_page(self, html_in, radius=None): for row in html_in.split("\n")]) return ascii.read(html, format='html') + def extract_urls(self, html_in): + """ + Helper function that uses regexps to extract the image urls from the + given HTML. + + Parameters + ---------- + html_in : str + source from which the urls are to be extracted. + + Returns + ------- + links : list + The list of URLS extracted from the input. + """ + # Parse html input for links + ahref = re.compile(r'href="([a-zA-Z0-9_\.&\?=%/:-]+)"') + links = ahref.findall(html_in) + return links + def query_region(self, coordinates, *, radius=1 * u.arcmin, programme_id=None, database=None, verbose=False, get_query_payload=False, system='J2000', From 3a7b298522a2b328fc482b050e8eb2eae88faa0d Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Mon, 14 Aug 2023 21:44:18 -0400 Subject: [PATCH 06/12] tests of different image result queries --- ...sults.html => image_results_noradius.html} | 0 .../tests/data/image_results_radius.html | 22 +++++++++++++++++++ astroquery/ukidss/tests/test_ukidss.py | 14 +++++++++--- astroquery/wfau/core.py | 17 ++++++++++---- 4 files changed, 46 insertions(+), 7 deletions(-) rename astroquery/ukidss/tests/data/{image_results.html => image_results_noradius.html} (100%) create mode 100644 astroquery/ukidss/tests/data/image_results_radius.html diff --git a/astroquery/ukidss/tests/data/image_results.html b/astroquery/ukidss/tests/data/image_results_noradius.html similarity index 100% rename from astroquery/ukidss/tests/data/image_results.html rename to astroquery/ukidss/tests/data/image_results_noradius.html diff --git a/astroquery/ukidss/tests/data/image_results_radius.html b/astroquery/ukidss/tests/data/image_results_radius.html new file mode 100644 index 0000000000..138a48c8eb --- /dev/null +++ b/astroquery/ukidss/tests/data/image_results_radius.html @@ -0,0 +1,22 @@ +  +

GetImage cut-out results

+
J2000 coords: RA: 83.6330757 Dec:22.014436 +
Programme: All UKIDSS surveys +
Filter: all +
Processing ... +
Connecting to database: UKIDSSDR7PLUS

+

+ + + + + + + + + +
LinkmultiframeIDframetypeobstypefilteridshortnamedateObsextNum
show1737581leavstackOBJECT5K2007-10-11 13:12:05.55
+1 rows returned. diff --git a/astroquery/ukidss/tests/test_ukidss.py b/astroquery/ukidss/tests/test_ukidss.py index d95e6268fb..c5235001da 100644 --- a/astroquery/ukidss/tests/test_ukidss.py +++ b/astroquery/ukidss/tests/test_ukidss.py @@ -14,7 +14,8 @@ from ...exceptions import InvalidQueryError DATA_FILES = {"vo_results": "vo_results.html", - "image_results": "image_results.html", + "image_results_noradius": "image_results_noradius.html", + "image_results_radius": "image_results_radius.html", "image": "image.fits", "votable": "votable.xml", "error": "error.html" @@ -74,8 +75,11 @@ def parse_coordinates_mock_return(c): def get_mockreturn(method='GET', url='default_url', params=None, timeout=10, **kwargs): - if "Image" in url: - filename = DATA_FILES["image_results"] + if "GetImage" in url: + filename = DATA_FILES["image_results_noradius"] + url = "Image_URL" + elif "ImageList" in url: + filename = DATA_FILES["image_results_radius"] url = "Image_URL" elif "SQL" in url: filename = DATA_FILES["vo_results"] @@ -114,6 +118,10 @@ def test_get_images_async_1(): def test_get_images_async_2(patch_get, patch_get_readable_fileobj): + # debug: get the table first + tbl = ukidss.core.Ukidss.get_image_table(icrs_skycoord, programme_id="GPS") + assert "deprecated" in tbl.colnames + image_urls = ukidss.core.Ukidss.get_images_async(icrs_skycoord, programme_id="GPS") assert len(image_urls) == 1 diff --git a/astroquery/wfau/core.py b/astroquery/wfau/core.py index f558aeca3b..3b635fe83c 100644 --- a/astroquery/wfau/core.py +++ b/astroquery/wfau/core.py @@ -291,7 +291,8 @@ def get_images_async(self, coordinates, *, waveband='all', frame_type='stack', show_progress=show_progress) for url in image_urls] - def get_image_list(self, coordinates, *, radius=None, ignore_deprecated=True, **kwargs): + def get_image_list(self, coordinates, *, radius=None, ignore_deprecated=True, + get_query_payload=False, **kwargs): """ See `get_image_table` for a full list of options. @@ -308,7 +309,12 @@ def get_image_list(self, coordinates, *, radius=None, ignore_deprecated=True, ** url_list : list of image urls """ - image_table = self.get_image_table(coordinates, radius=radius, **kwargs) + image_table = self.get_image_table(coordinates, radius=radius, + get_query_payload=get_query_payload, + **kwargs) + if get_query_payload: + # actully a payload, not a table + return image_table if ignore_deprecated and radius is None: image_urls = image_table[image_table['deprecated'] == 0]['Link'] @@ -462,7 +468,7 @@ def parse_imagequery_page(self, html_in, radius=None): with warnings.catch_warnings(): # this is really html; the xml parser doesn't work warnings.simplefilter(action="ignore", category=XMLParsedAsHTMLWarning) - soup = BeautifulSoup(html, features='html5') + soup = BeautifulSoup(html, features='html5lib') httb = soup.findAll('table')[2] firstrow = httb.findAll('tr')[0] for td in firstrow.findAll('td'): @@ -477,7 +483,10 @@ def parse_imagequery_page(self, html_in, radius=None): row.replace(">show<", ">{}<".format(ahref.search(row).groups()[0])) if ">show<" in row else row for row in html_in.split("\n")]) - return ascii.read(html, format='html') + with warnings.catch_warnings(): + # ascii.read uses bs4, result is html, not xml, despite xml tag + warnings.simplefilter(action="ignore", category=XMLParsedAsHTMLWarning) + return ascii.read(html, format='html') def extract_urls(self, html_in): """ From c4db9fb81da465e041df8be5d37cf30efd59e6aa Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Mon, 14 Aug 2023 21:50:27 -0400 Subject: [PATCH 07/12] update test data --- .../tests/data/image_results_noradius.html | 184 ++++-------------- .../tests/data/image_results_radius.html | 169 +++++++++++++--- astroquery/ukidss/tests/test_ukidss.py | 4 +- 3 files changed, 189 insertions(+), 168 deletions(-) diff --git a/astroquery/ukidss/tests/data/image_results_noradius.html b/astroquery/ukidss/tests/data/image_results_noradius.html index fc34523455..c66ee27fb4 100644 --- a/astroquery/ukidss/tests/data/image_results_noradius.html +++ b/astroquery/ukidss/tests/data/image_results_noradius.html @@ -1,147 +1,43 @@ - - - - - - - -WSA ImageList - - - - - - -
WSA ImageList   

-Not logged in: links will only be returned for frames that are publicly accessible

-Archive Listing

Searching...
-Survey: UKIDSS Galactic Clusters Survey, GCS
-Waveband: K
-Minimum RA: 5.551333333333333 hours Maximum RA: 5.599333333333333 hours
-Minimum Dec: 21.68116666666667 degrees Maximum Dec: 22.347833333333334 degrees
- -
Using database: UKIDSSDR11PLUS -

- - -
View column linkshows jpeg images of multiframe in a new window plus links to download file(s)
Img column linkdownload the RICE compressed FITS image file. Use View column link to retrieve uncompressed images.
Cat column linkdownload the FITS catalogue file.
-
begin row 1
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +  +

GetImage cut-out results

+
J2000 coords: RA: 83.633083 Dec:22.0145 +
Programme: UKIDSS Galactic Clusters Survey, GCS +
Filter: K +
Processing ... +
Connecting to database: UKIDSSDR11PLUS

+

ViewImgCatmultiframeIDframeTypeobstyperaBasedecBaseshortnameexptimedateObsprojectnumDetectorsukirtRunNo
viewFITS 1737551leavOBJECT+5.5777306+21.7913333K+10.0000002007-10-11 13:08:30.0U/UKIDSS/GCS2141802
viewFITSFITS1737553leavstackOBJECT+5.5777306+21.7913333K+10.0000002007-10-11 13:08:30.0U/UKIDSS/GCS2141802
- - - - - - - - - - - - - - - + + + + + + + + + - - - - - - - - - - - - - - - + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LinkmultiframeIDframetypeobstypefilteridshortnamedateObsextNumdeprecated
viewFITS 1737559leavOBJECT+5.5777306+21.7913333K+10.0000002007-10-11 13:09:18.6U/UKIDSS/GCS2141806
show1737581leavstackOBJECT5K2007-10-11 13:12:05.550
viewFITS 1737579leavOBJECT+5.5935528+21.7913333K+10.0000002007-10-11 13:12:05.5U/UKIDSS/GCS2141818
show1737579leavOBJECT5K2007-10-11 13:12:05.550
viewFITSFITS1737581leavstackOBJECT+5.5935528+21.7913333K+10.0000002007-10-11 13:12:05.5U/UKIDSS/GCS2141818
viewFITS 1737587leavOBJECT+5.5935528+21.7913333K+10.0000002007-10-11 13:12:53.8U/UKIDSS/GCS2141822

row(s) 1 to 6 displayed.
-

Back to form (uses Javascript)
- 
+show
+1737587
+leav
+OBJECT
+5
+K
+2007-10-11 13:12:53.8
+5
+0
+
+3 rows returned. 
diff --git a/astroquery/ukidss/tests/data/image_results_radius.html b/astroquery/ukidss/tests/data/image_results_radius.html
index 138a48c8eb..0a8503ad79 100644
--- a/astroquery/ukidss/tests/data/image_results_radius.html
+++ b/astroquery/ukidss/tests/data/image_results_radius.html
@@ -1,22 +1,147 @@
- 
-

GetImage cut-out results

-
J2000 coords: RA: 83.6330757 Dec:22.014436 -
Programme: All UKIDSS surveys -
Filter: all -
Processing ... -
Connecting to database: UKIDSSDR7PLUS

- - - - - - - - - - -
LinkmultiframeIDframetypeobstypefilteridshortnamedateObsextNum
show1737581leavstackOBJECT5K2007-10-11 13:12:05.55
-1 rows returned. + + + + + + + +WSA ImageList + + + + + + +
WSA ImageList   

+Not logged in: links will only be returned for frames that are publicly accessible

+Archive Listing

Searching...
+Survey: UKIDSS Galactic Clusters Survey, GCS
+Waveband: K
+Minimum RA: 5.551333333333333 hours Maximum RA: 5.599333333333333 hours
+Minimum Dec: 21.68116666666667 degrees Maximum Dec: 22.347833333333334 degrees
+ +
Using database: UKIDSSDR11PLUS +

+ + +
View column linkshows jpeg images of multiframe in a new window plus links to download file(s)
Img column linkdownload the RICE compressed FITS image file. Use View column link to retrieve uncompressed images.
Cat column linkdownload the FITS catalogue file.
+
begin row 1
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ViewImgCatmultiframeIDframeTypeobstyperaBasedecBaseshortnameexptimedateObsprojectnumDetectorsukirtRunNo
viewFITSFITS1737553leavstackOBJECT+5.5777306+21.7913333K+10.0000002007-10-11 13:08:30.0U/UKIDSS/GCS2141802
viewFITS 1737551leavOBJECT+5.5777306+21.7913333K+10.0000002007-10-11 13:08:30.0U/UKIDSS/GCS2141802
viewFITS 1737559leavOBJECT+5.5777306+21.7913333K+10.0000002007-10-11 13:09:18.6U/UKIDSS/GCS2141806
viewFITSFITS1737581leavstackOBJECT+5.5935528+21.7913333K+10.0000002007-10-11 13:12:05.5U/UKIDSS/GCS2141818
viewFITS 1737579leavOBJECT+5.5935528+21.7913333K+10.0000002007-10-11 13:12:05.5U/UKIDSS/GCS2141818
viewFITS 1737587leavOBJECT+5.5935528+21.7913333K+10.0000002007-10-11 13:12:53.8U/UKIDSS/GCS2141822

row(s) 1 to 6 displayed.
+

Back to form (uses Javascript)
+ 
diff --git a/astroquery/ukidss/tests/test_ukidss.py b/astroquery/ukidss/tests/test_ukidss.py
index c5235001da..621df3be7f 100644
--- a/astroquery/ukidss/tests/test_ukidss.py
+++ b/astroquery/ukidss/tests/test_ukidss.py
@@ -77,10 +77,10 @@ def get_mockreturn(method='GET', url='default_url',
                    params=None, timeout=10, **kwargs):
     if "GetImage" in url:
         filename = DATA_FILES["image_results_noradius"]
-        url = "Image_URL"
+        url = "GetImage"
     elif "ImageList" in url:
         filename = DATA_FILES["image_results_radius"]
-        url = "Image_URL"
+        url = "ImageList"
     elif "SQL" in url:
         filename = DATA_FILES["vo_results"]
         url = "SQL_URL"

From 79fd8803176f226ccc12495e04c2b3f75f9ae22a Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" 
Date: Mon, 14 Aug 2023 21:53:23 -0400
Subject: [PATCH 08/12] update tests to match new data

---
 astroquery/ukidss/tests/test_ukidss.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/astroquery/ukidss/tests/test_ukidss.py b/astroquery/ukidss/tests/test_ukidss.py
index 621df3be7f..ec280a56b2 100644
--- a/astroquery/ukidss/tests/test_ukidss.py
+++ b/astroquery/ukidss/tests/test_ukidss.py
@@ -118,27 +118,27 @@ def test_get_images_async_1():
 
 def test_get_images_async_2(patch_get, patch_get_readable_fileobj):
 
-    # debug: get the table first
+    # debug check: get the table first & make sure it has 'deprecated' column as expected
     tbl = ukidss.core.Ukidss.get_image_table(icrs_skycoord, programme_id="GPS")
     assert "deprecated" in tbl.colnames
 
     image_urls = ukidss.core.Ukidss.get_images_async(icrs_skycoord, programme_id="GPS")
 
-    assert len(image_urls) == 1
+    assert len(image_urls) == 3
 
 
 def test_get_image_list(patch_get, patch_get_readable_fileobj):
     urls = ukidss.core.Ukidss.get_image_list(
         icrs_skycoord, frame_type="all", waveband="all", programme_id="GPS")
     print(urls)
-    assert len(urls) == 1
+    assert len(urls) == 3
 
 
 def test_extract_urls():
-    with open(data_path(DATA_FILES["image_results"]), 'r') as infile:
+    with open(data_path(DATA_FILES["image_results_radius"]), 'r') as infile:
         html_in = infile.read()
     urls = ukidss.core.Ukidss.extract_urls(html_in)
-    assert len(urls) == 1
+    assert len(urls) == 14
 
 
 def test_query_region(patch_get, patch_get_readable_fileobj):

From 75e4726e2c03cf480534e40126ed05c3ca9fd6b2 Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" 
Date: Mon, 14 Aug 2023 21:59:48 -0400
Subject: [PATCH 09/12] change remote test

---
 astroquery/ukidss/tests/test_ukidss_remote.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/astroquery/ukidss/tests/test_ukidss_remote.py b/astroquery/ukidss/tests/test_ukidss_remote.py
index cb88dac017..a819d9bd45 100644
--- a/astroquery/ukidss/tests/test_ukidss_remote.py
+++ b/astroquery/ukidss/tests/test_ukidss_remote.py
@@ -69,6 +69,9 @@ def test_deprecated_image_list(self):
         result = uk.get_image_list(crd, waveband='all', ignore_deprecated=True)
 
         # this image is not deprecated (deprecated==0)
-        assert "http://wsa.roe.ac.uk/cgi-bin/getImage.cgi?file=/disk73/wsa/ingest/fits/20190614_v5/w20190614_00626_st.fit&mfid=11076607&extNo=4&lx=1276&hx=1426&ly=187&hy=337&rf=0&flip=1&uniq=834_579_14_86394_6&xpos=75.9&ypos=75.7&band=K&ra=211.3194905&dec=54.413845" in result  # noqa: E501
+        # can't check for exact URL match because URLs include generated 'uniq' strings
+        assert any("file=/disk73/wsa/ingest/fits/20190614_v5/w20190614_00626_st.fit"
+                   in x for x in result)
         # this image is deprecated (deprecated==80)
-        assert "http://wsa.roe.ac.uk/cgi-bin/getImage.cgi?file=/disk53/wsa/ingest/fits/20150129_v5/w20150129_02901_st.fit&mfid=8278383&extNo=4&lx=1274&hx=1425&ly=195&hy=345&rf=0&flip=1&uniq=834_579_14_86394_5&xpos=76.6&ypos=75.9&band=J&ra=211.3194905&dec=54.413845" not in result  # noqa: E501
+        assert not any("file=/disk53/wsa/ingest/fits/20150129_v5/w20150129_02901_st.fit"
+                       in x for x in result)

From 5ac89b09ae84add2ef70b22a1216dbb69897fb21 Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" 
Date: Mon, 14 Aug 2023 22:04:49 -0400
Subject: [PATCH 10/12] avoid reusing variable

---
 astroquery/wfau/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/astroquery/wfau/core.py b/astroquery/wfau/core.py
index 3b635fe83c..194b0f317c 100644
--- a/astroquery/wfau/core.py
+++ b/astroquery/wfau/core.py
@@ -446,9 +446,9 @@ def get_image_table(self, coordinates, *, waveband='all', frame_type='stack',
         if get_query_payload:
             return request_payload
 
-        response = self._wfau_send_request(query_url, request_payload)
-        self._penultimate_response = response
-        response = self._check_page(response.url, "row")
+        initial_response = self._wfau_send_request(query_url, request_payload)
+        self._penultimate_response = initial_response
+        response = self._check_page(initial_response.url, "row")
         self._last_response = response
 
         return self.parse_imagequery_page(response.text, radius=radius)

From ed94efe2f94004ff48725ca9cbb02b10996e6e36 Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" 
Date: Mon, 14 Aug 2023 22:07:37 -0400
Subject: [PATCH 11/12] add changelog entry

---
 CHANGES.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index aa3d78d28c..060ce935b7 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -242,6 +242,12 @@ xmatch
 - Minor internal change to use VOTable as the response format that include
   units, too. [#1375]
 
+wfau
+^^^^
+
+- Minor enhancement to enable getting tables of images to download instead
+  of just raw URLs.  Tables include metadata about deprecation [#2809]
+
 
 Infrastructure, Utility and Other Changes and Additions
 -------------------------------------------------------

From 8733870a2a10565569b721b3c903ebf9f54b1a7d Mon Sep 17 00:00:00 2001
From: "Adam Ginsburg (keflavich)" 
Date: Tue, 15 Aug 2023 08:34:47 -0400
Subject: [PATCH 12/12] privatize _extract_urls & try to fix a broken old test

---
 astroquery/ukidss/tests/test_ukidss.py |  2 +-
 astroquery/wfau/core.py                | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/astroquery/ukidss/tests/test_ukidss.py b/astroquery/ukidss/tests/test_ukidss.py
index ec280a56b2..d16afb154a 100644
--- a/astroquery/ukidss/tests/test_ukidss.py
+++ b/astroquery/ukidss/tests/test_ukidss.py
@@ -137,7 +137,7 @@ def test_get_image_list(patch_get, patch_get_readable_fileobj):
 def test_extract_urls():
     with open(data_path(DATA_FILES["image_results_radius"]), 'r') as infile:
         html_in = infile.read()
-    urls = ukidss.core.Ukidss.extract_urls(html_in)
+    urls = ukidss.core.Ukidss._extract_urls(html_in)
     assert len(urls) == 14
 
 
diff --git a/astroquery/wfau/core.py b/astroquery/wfau/core.py
index 194b0f317c..9dc946810c 100644
--- a/astroquery/wfau/core.py
+++ b/astroquery/wfau/core.py
@@ -6,7 +6,11 @@
 import time
 from math import cos, radians
 import requests
-from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
+try:
+    from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
+except ImportError:
+    # workaround: older versions of bs4, which we still support, didn't have this warning
+    XMLParsedAsHTMLWarning = object
 from io import BytesIO, StringIO
 
 import astropy.units as u
@@ -488,7 +492,7 @@ def parse_imagequery_page(self, html_in, radius=None):
                 warnings.simplefilter(action="ignore", category=XMLParsedAsHTMLWarning)
                 return ascii.read(html, format='html')
 
-    def extract_urls(self, html_in):
+    def _extract_urls(self, html_in):
         """
         Helper function that uses regexps to extract the image urls from the
         given HTML.
@@ -676,7 +680,7 @@ def _parse_result(self, response, *, verbose=False):
         -------
         table : `~astropy.table.Table`
         """
-        table_links = self.extract_urls(response.text)
+        table_links = self._extract_urls(response.text)
         # keep only one link that is not a webstart
         if len(table_links) == 0:
             raise Exception("No VOTable found on returned webpage!")