Random bits of cleanup (#37)

kgaughan · Aug 18, 2024 · 7513f44 · 7513f44
1 parent 82c918a
commit 7513f44
Show file tree

Hide file tree

Showing 9 changed files with 81 additions and 93 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,5 @@ __pycache__/
 # General detritus
 *~
 .*.sw?
+
+/src/uwhoisd/_version.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,7 @@ dev-dependencies = [
     "pytest>=8.3.2",
     "pytest-cov>=5.0.0",
     "types-beautifulsoup4>=4.12.0.20240511",
+    "types-requests>=2.32.0.20240712",
 ]
 universal = true
 generate-hashes = true
@@ -97,6 +98,7 @@ ignore = [
   "TID252",
   "UP006",
   "UP035",
+  "EM101",
 ]
 
 [tool.ruff.lint.isort]

diff --git a/requirements-dev.lock b/requirements-dev.lock
@@ -285,6 +285,9 @@ types-html5lib==1.1.11.20240806 \
     --hash=sha256:575c4fd84ba8eeeaa8520c7e4c7042b7791f5ec3e9c0a5d5c418124c42d9e7e4 \
     --hash=sha256:8060dc98baf63d6796a765bbbc809fff9f7a383f6e3a9add526f814c086545ef
     # via types-beautifulsoup4
+types-requests==2.32.0.20240712 \
+    --hash=sha256:90c079ff05e549f6bf50e02e910210b98b8ff1ebdd18e19c873cd237737c1358 \
+    --hash=sha256:f754283e152c752e46e70942fa2a146b5bc70393522257bb85bd1ef7e019dcc3
 typing-extensions==4.12.2 \
     --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
     --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
@@ -293,3 +296,4 @@ urllib3==2.2.2 \
     --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \
     --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168
     # via requests
+    # via types-requests
diff --git a/src/uwhoisd/net.py b/src/uwhoisd/net.py
@@ -16,7 +16,7 @@
 logger = logging.getLogger("uwhoisd")
 
 
-def handle_signal(sig, frame):
+def handle_signal(_sig, _frame):
     """
     Stop the main loop on signal.
     """
@@ -46,7 +46,7 @@ def __enter__(self):
         self.sock.settimeout(10)
         return self
 
-    def __exit__(self, type, value, traceback):
+    def __exit__(self, _type, value, traceback):
         """
         Terminate a `with` statement.
         """

diff --git a/src/uwhoisd/scraper.py b/src/uwhoisd/scraper.py
@@ -8,7 +8,7 @@
 import sys
 import typing as t
 from urllib.parse import urljoin
-import xml.etree.ElementTree as etree
+import xml.etree.ElementTree as etree  # noqa: N813
 
 from bs4 import BeautifulSoup
 import requests
@@ -28,15 +28,16 @@ def fetch_ipv4_assignments(url: str):
     Fetch WHOIS server list for the IPv4 /8 assignments from IANA.
     """
     res = requests.get(url, stream=False, timeout=10)
-    root = etree.fromstring(res.text)
+    root = etree.fromstring(res.text)  # noqa: S314
     for record in root.findall("assignments:record", NSS):
-        status = record.find("assignments:status", NSS).text
+        status = record.findtext("assignments:status", default="", namespaces=NSS)
         if status not in ("ALLOCATED", "LEGACY"):
             continue
-        prefix = record.find("assignments:prefix", NSS).text
+        prefix = record.findtext("assignments:prefix", default="", namespaces=NSS)
         prefix, _ = prefix.lstrip("0").split("/", 1)
-        whois = record.find("assignments:whois", NSS).text
-        yield prefix, whois
+        whois = record.findtext("assignments:whois", default="", namespaces=NSS)
+        if prefix != "" and whois != "":
+            yield prefix, whois
 
 
 def fetch(session: requests.Session, url: str):
@@ -64,7 +65,7 @@ def scrape_whois_from_iana(root_zone_db_url: str, existing: t.Mapping[str, str])
     body = fetch(session, root_zone_db_url)
 
     for link in body.select("#tld-table .tld a"):
-        if "href" not in link.attrs:
+        if "href" not in link.attrs or link.string is None:
             continue
 
         zone = munge_zone(link.string)

diff --git a/src/uwhoisd/utils.py b/src/uwhoisd/utils.py
@@ -8,47 +8,38 @@
 from importlib import resources
 import os.path
 import re
+import typing as t
 
 # We only accept ASCII or ACE-encoded domain names. IDNs must be converted
 # to ACE first.
 FQDN_PATTERN = re.compile(r"^([-a-z0-9]{1,63})(\.[-a-z0-9]{1,63}){1,}$")
 
 
 class ConfigParser(configparser.ConfigParser):
-    """
-    Enhanced configuration parser.
-    """
+    """Enhanced configuration parser."""
 
-    def get_bool(self, section, option):
-        """
-        Get a configuration option as a boolean.
-        """
+    def get_bool(self, section: str, option: str) -> bool:
+        """Get a configuration option as a boolean."""
         return self.get(section, option).lower() in ("1", "true", "yes", "on")
 
-    def get_list(self, section, option):
-        """
-        Split the lines of a configuration option value into a list.
-        """
+    def get_list(self, section: str, option: str) -> t.List[str]:
+        """Split the lines of a configuration option value into a list."""
         lines = []
         for line in self.get(section, option).split("\n"):
             line = line.strip()
             if line != "":
                 lines.append(line)
         return lines
 
-    def get_section_dict(self, section):
-        """
-        Pull a section out of the config as a dictionary safely.
-        """
+    def get_section_dict(self, section: str) -> t.Dict[str, str]:
+        """Pull a section out of the config as a dictionary safely."""
         if self.has_section(section):
             return {key: decode_value(value) for key, value in self.items(section)}
         return {}
 
 
-def make_config_parser(config_path=None):
-    """
-    Create a config parser.
-    """
+def make_config_parser(config_path: t.Optional[str] = None) -> ConfigParser:
+    """Create a config parser."""
     parser = ConfigParser()
 
     with resources.open_text("uwhoisd", "defaults.ini", encoding="utf-8") as fh:
@@ -63,75 +54,21 @@ def make_config_parser(config_path=None):
     return parser
 
 
-def is_well_formed_fqdn(fqdn):
-    """
-    Check if a string looks like a well formed FQDN without a trailing dot.
-
-    >>> is_well_formed_fqdn('stereochro.me')
-    True
-    >>> is_well_formed_fqdn('stereochro.me.')
-    False
-    >>> is_well_formed_fqdn('stereochrome')
-    False
-    >>> is_well_formed_fqdn('stereochrome.')
-    False
-    >>> is_well_formed_fqdn('keithgaughan.co.uk')
-    True
-    >>> is_well_formed_fqdn('')
-    False
-    >>> is_well_formed_fqdn('.')
-    False
-    >>> is_well_formed_fqdn('x' * 64 + '.foo')
-    False
-    >>> is_well_formed_fqdn('foo.' + 'x' * 64)
-    False
-    """
+def is_well_formed_fqdn(fqdn: str) -> bool:
+    """Check if a string looks like a well formed FQDN without a trailing dot."""
     return FQDN_PATTERN.match(fqdn) is not None
 
 
-def split_fqdn(fqdn):
-    """
-    Split an FQDN into the domain name and zone.
-
-    >>> split_fqdn('stereochro.me')
-    ['stereochro', 'me']
-    >>> split_fqdn('stereochro.me.')
-    ['stereochro', 'me']
-    >>> split_fqdn('stereochrome')
-    ['stereochrome']
-    >>> split_fqdn('keithgaughan.co.uk')
-    ['keithgaughan', 'co.uk']
-    """
-    return fqdn.rstrip(".").split(".", 1)
+def split_fqdn(fqdn: str) -> t.List[str]:
+    """Split an FQDN into the domain name and zone."""
+    return fqdn.rstrip(".").split(".", 1) if fqdn else []
 
 
-def decode_value(s):
-    r"""
-    Decode a quoted string.
+def decode_value(s: str) -> str:
+    """Decode a quoted string.
 
     If a string is quoted, it's parsed like a python string, otherwise it's
     passed straight through as-is.
-
-    >>> decode_value('foo')
-    'foo'
-    >>> decode_value('"foo"')
-    'foo'
-    >>> decode_value('"foo\\nbar"')
-    'foo\nbar'
-    >>> decode_value('foo\\nbar')
-    'foo\\nbar'
-    >>> decode_value('"foo')
-    Traceback (most recent call last):
-        ...
-    ValueError: The trailing quote be present and match the leading quote.
-    >>> decode_value("'foo")
-    Traceback (most recent call last):
-        ...
-    ValueError: The trailing quote be present and match the leading quote.
-    >>> decode_value("\"foo\'")
-    Traceback (most recent call last):
-        ...
-    ValueError: The trailing quote be present and match the leading quote.
     """
     if len(s) > 1 and s[0] in ('"', "'"):
         if s[0] != s[-1]:

diff --git a/tests/test_bucket.py b/tests/test_bucket.py
@@ -66,7 +66,7 @@ def test_comparison():
 def test_pickle():
     original = rl.TokenBucket(5, 20)
     original.consume(1)
-    unpickled = pickle.loads(pickle.dumps(original))
+    unpickled = pickle.loads(pickle.dumps(original))  # noqa: S301
     assert unpickled is not original
     assert original.clock is unpickled.clock
     assert original.ts == unpickled.ts

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -0,0 +1,42 @@
+import pytest
+
+from uwhoisd import utils
+
+
+def test_is_well_formed_fqdn():
+    assert utils.is_well_formed_fqdn("stereochro.me")
+    assert utils.is_well_formed_fqdn("bbc.co.uk")
+    assert utils.is_well_formed_fqdn("x" * 63 + ".com")
+
+
+def test_malformed_domains():
+    assert not utils.is_well_formed_fqdn("stereochrome"), "Must have more than one label"
+    assert not utils.is_well_formed_fqdn("stereochr.me."), "No trailing dot allowed"
+    assert not utils.is_well_formed_fqdn(".stereochr.me"), "No leading dot allowed"
+    assert not utils.is_well_formed_fqdn("stereochrome."), "Sigh..."
+    assert not utils.is_well_formed_fqdn("invalid domain.com"), "No spaces allowed"
+    assert not utils.is_well_formed_fqdn(""), "Must not be an empty string"
+    assert not utils.is_well_formed_fqdn("."), "Must have at least one label"
+    assert not utils.is_well_formed_fqdn("x" * 64 + ".foo"), "Labels should not exceed 63 characters (1)"
+    assert not utils.is_well_formed_fqdn("foo." + "x" * 64), "Labels should not exceed 63 characters (2)"
+
+
+def test_split_fqdn():
+    assert utils.split_fqdn("stereochro.me") == ["stereochro", "me"]
+    assert utils.split_fqdn("stereochro.me.") == ["stereochro", "me"]
+    assert utils.split_fqdn("stereochrome") == ["stereochrome"]
+    assert utils.split_fqdn("bbc.co.uk") == ["bbc", "co.uk"]
+    assert utils.split_fqdn("") == []
+
+
+def test_decode_value():
+    assert utils.decode_value("foo") == "foo"
+    assert utils.decode_value('"foo"') == "foo"
+    assert utils.decode_value('"foo\nbar"') == "foo\nbar"
+    assert utils.decode_value("foo\nbar") == "foo\nbar"
+    assert utils.decode_value('""') == ""
+    assert utils.decode_value("''") == ""
+
+    for bad_value in ['"foo', "'foo", "\"foo'"]:
+        with pytest.raises(ValueError, match="The trailing quote be present and match the leading quote."):
+            utils.decode_value(bad_value)
diff --git a/tests/utils.py b/tests/utils.py
@@ -15,15 +15,15 @@ class Clock:
     A fake clock.
     """
 
-    def __init__(self, initial=0):
+    def __init__(self, initial: int = 0):
         super().__init__()
         self.ticks = initial
 
     def __call__(self):
         return self.ticks
 
 
-def create_uwhois():
+def create_uwhois() -> uwhoisd.UWhois:
     """Prepare a UWhois object for testing."""
     config = path.join(HERE, "..", "extra", "uwhoisd.ini")
     parser = make_config_parser(config)
@@ -32,7 +32,7 @@ def create_uwhois():
     return uwhois
 
 
-def read_transcript(name):
+def read_transcript(name: str) -> str:
     """Read a WHOIS transcript file."""
     with open(path.join(HERE, "transcripts", name)) as fh:
         return fh.read()
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,3 +13,5 @@ __pycache__/ @@
     # General detritus
     *~
     .*.sw?
+    /src/uwhoisd/_version.py