Skip to content

Commit

Permalink
Random bits of cleanup (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
kgaughan authored Aug 18, 2024
1 parent 82c918a commit 7513f44
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 93 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ __pycache__/
# General detritus
*~
.*.sw?

/src/uwhoisd/_version.py
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ dev-dependencies = [
"pytest>=8.3.2",
"pytest-cov>=5.0.0",
"types-beautifulsoup4>=4.12.0.20240511",
"types-requests>=2.32.0.20240712",
]
universal = true
generate-hashes = true
Expand Down Expand Up @@ -97,6 +98,7 @@ ignore = [
"TID252",
"UP006",
"UP035",
"EM101",
]

[tool.ruff.lint.isort]
Expand Down
4 changes: 4 additions & 0 deletions requirements-dev.lock
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,9 @@ types-html5lib==1.1.11.20240806 \
--hash=sha256:575c4fd84ba8eeeaa8520c7e4c7042b7791f5ec3e9c0a5d5c418124c42d9e7e4 \
--hash=sha256:8060dc98baf63d6796a765bbbc809fff9f7a383f6e3a9add526f814c086545ef
# via types-beautifulsoup4
types-requests==2.32.0.20240712 \
--hash=sha256:90c079ff05e549f6bf50e02e910210b98b8ff1ebdd18e19c873cd237737c1358 \
--hash=sha256:f754283e152c752e46e70942fa2a146b5bc70393522257bb85bd1ef7e019dcc3
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
--hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
Expand All @@ -293,3 +296,4 @@ urllib3==2.2.2 \
--hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \
--hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168
# via requests
# via types-requests
4 changes: 2 additions & 2 deletions src/uwhoisd/net.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
logger = logging.getLogger("uwhoisd")


def handle_signal(sig, frame):
def handle_signal(_sig, _frame):
"""
Stop the main loop on signal.
"""
Expand Down Expand Up @@ -46,7 +46,7 @@ def __enter__(self):
self.sock.settimeout(10)
return self

def __exit__(self, type, value, traceback):
def __exit__(self, _type, value, traceback):
"""
Terminate a `with` statement.
"""
Expand Down
15 changes: 8 additions & 7 deletions src/uwhoisd/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import sys
import typing as t
from urllib.parse import urljoin
import xml.etree.ElementTree as etree
import xml.etree.ElementTree as etree # noqa: N813

from bs4 import BeautifulSoup
import requests
Expand All @@ -28,15 +28,16 @@ def fetch_ipv4_assignments(url: str):
Fetch WHOIS server list for the IPv4 /8 assignments from IANA.
"""
res = requests.get(url, stream=False, timeout=10)
root = etree.fromstring(res.text)
root = etree.fromstring(res.text) # noqa: S314
for record in root.findall("assignments:record", NSS):
status = record.find("assignments:status", NSS).text
status = record.findtext("assignments:status", default="", namespaces=NSS)
if status not in ("ALLOCATED", "LEGACY"):
continue
prefix = record.find("assignments:prefix", NSS).text
prefix = record.findtext("assignments:prefix", default="", namespaces=NSS)
prefix, _ = prefix.lstrip("0").split("/", 1)
whois = record.find("assignments:whois", NSS).text
yield prefix, whois
whois = record.findtext("assignments:whois", default="", namespaces=NSS)
if prefix != "" and whois != "":
yield prefix, whois


def fetch(session: requests.Session, url: str):
Expand Down Expand Up @@ -64,7 +65,7 @@ def scrape_whois_from_iana(root_zone_db_url: str, existing: t.Mapping[str, str])
body = fetch(session, root_zone_db_url)

for link in body.select("#tld-table .tld a"):
if "href" not in link.attrs:
if "href" not in link.attrs or link.string is None:
continue

zone = munge_zone(link.string)
Expand Down
97 changes: 17 additions & 80 deletions src/uwhoisd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,47 +8,38 @@
from importlib import resources
import os.path
import re
import typing as t

# We only accept ASCII or ACE-encoded domain names. IDNs must be converted
# to ACE first.
FQDN_PATTERN = re.compile(r"^([-a-z0-9]{1,63})(\.[-a-z0-9]{1,63}){1,}$")


class ConfigParser(configparser.ConfigParser):
"""
Enhanced configuration parser.
"""
"""Enhanced configuration parser."""

def get_bool(self, section, option):
"""
Get a configuration option as a boolean.
"""
def get_bool(self, section: str, option: str) -> bool:
"""Get a configuration option as a boolean."""
return self.get(section, option).lower() in ("1", "true", "yes", "on")

def get_list(self, section, option):
"""
Split the lines of a configuration option value into a list.
"""
def get_list(self, section: str, option: str) -> t.List[str]:
"""Split the lines of a configuration option value into a list."""
lines = []
for line in self.get(section, option).split("\n"):
line = line.strip()
if line != "":
lines.append(line)
return lines

def get_section_dict(self, section):
"""
Pull a section out of the config as a dictionary safely.
"""
def get_section_dict(self, section: str) -> t.Dict[str, str]:
"""Pull a section out of the config as a dictionary safely."""
if self.has_section(section):
return {key: decode_value(value) for key, value in self.items(section)}
return {}


def make_config_parser(config_path=None):
"""
Create a config parser.
"""
def make_config_parser(config_path: t.Optional[str] = None) -> ConfigParser:
"""Create a config parser."""
parser = ConfigParser()

with resources.open_text("uwhoisd", "defaults.ini", encoding="utf-8") as fh:
Expand All @@ -63,75 +54,21 @@ def make_config_parser(config_path=None):
return parser


def is_well_formed_fqdn(fqdn):
"""
Check if a string looks like a well formed FQDN without a trailing dot.
>>> is_well_formed_fqdn('stereochro.me')
True
>>> is_well_formed_fqdn('stereochro.me.')
False
>>> is_well_formed_fqdn('stereochrome')
False
>>> is_well_formed_fqdn('stereochrome.')
False
>>> is_well_formed_fqdn('keithgaughan.co.uk')
True
>>> is_well_formed_fqdn('')
False
>>> is_well_formed_fqdn('.')
False
>>> is_well_formed_fqdn('x' * 64 + '.foo')
False
>>> is_well_formed_fqdn('foo.' + 'x' * 64)
False
"""
def is_well_formed_fqdn(fqdn: str) -> bool:
"""Check if a string looks like a well formed FQDN without a trailing dot."""
return FQDN_PATTERN.match(fqdn) is not None


def split_fqdn(fqdn):
"""
Split an FQDN into the domain name and zone.
>>> split_fqdn('stereochro.me')
['stereochro', 'me']
>>> split_fqdn('stereochro.me.')
['stereochro', 'me']
>>> split_fqdn('stereochrome')
['stereochrome']
>>> split_fqdn('keithgaughan.co.uk')
['keithgaughan', 'co.uk']
"""
return fqdn.rstrip(".").split(".", 1)
def split_fqdn(fqdn: str) -> t.List[str]:
"""Split an FQDN into the domain name and zone."""
return fqdn.rstrip(".").split(".", 1) if fqdn else []


def decode_value(s):
r"""
Decode a quoted string.
def decode_value(s: str) -> str:
"""Decode a quoted string.
If a string is quoted, it's parsed like a python string, otherwise it's
passed straight through as-is.
>>> decode_value('foo')
'foo'
>>> decode_value('"foo"')
'foo'
>>> decode_value('"foo\\nbar"')
'foo\nbar'
>>> decode_value('foo\\nbar')
'foo\\nbar'
>>> decode_value('"foo')
Traceback (most recent call last):
...
ValueError: The trailing quote be present and match the leading quote.
>>> decode_value("'foo")
Traceback (most recent call last):
...
ValueError: The trailing quote be present and match the leading quote.
>>> decode_value("\"foo\'")
Traceback (most recent call last):
...
ValueError: The trailing quote be present and match the leading quote.
"""
if len(s) > 1 and s[0] in ('"', "'"):
if s[0] != s[-1]:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_comparison():
def test_pickle():
original = rl.TokenBucket(5, 20)
original.consume(1)
unpickled = pickle.loads(pickle.dumps(original))
unpickled = pickle.loads(pickle.dumps(original)) # noqa: S301
assert unpickled is not original
assert original.clock is unpickled.clock
assert original.ts == unpickled.ts
Expand Down
42 changes: 42 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pytest

from uwhoisd import utils


def test_is_well_formed_fqdn():
assert utils.is_well_formed_fqdn("stereochro.me")
assert utils.is_well_formed_fqdn("bbc.co.uk")
assert utils.is_well_formed_fqdn("x" * 63 + ".com")


def test_malformed_domains():
assert not utils.is_well_formed_fqdn("stereochrome"), "Must have more than one label"
assert not utils.is_well_formed_fqdn("stereochr.me."), "No trailing dot allowed"
assert not utils.is_well_formed_fqdn(".stereochr.me"), "No leading dot allowed"
assert not utils.is_well_formed_fqdn("stereochrome."), "Sigh..."
assert not utils.is_well_formed_fqdn("invalid domain.com"), "No spaces allowed"
assert not utils.is_well_formed_fqdn(""), "Must not be an empty string"
assert not utils.is_well_formed_fqdn("."), "Must have at least one label"
assert not utils.is_well_formed_fqdn("x" * 64 + ".foo"), "Labels should not exceed 63 characters (1)"
assert not utils.is_well_formed_fqdn("foo." + "x" * 64), "Labels should not exceed 63 characters (2)"


def test_split_fqdn():
assert utils.split_fqdn("stereochro.me") == ["stereochro", "me"]
assert utils.split_fqdn("stereochro.me.") == ["stereochro", "me"]
assert utils.split_fqdn("stereochrome") == ["stereochrome"]
assert utils.split_fqdn("bbc.co.uk") == ["bbc", "co.uk"]
assert utils.split_fqdn("") == []


def test_decode_value():
assert utils.decode_value("foo") == "foo"
assert utils.decode_value('"foo"') == "foo"
assert utils.decode_value('"foo\nbar"') == "foo\nbar"
assert utils.decode_value("foo\nbar") == "foo\nbar"
assert utils.decode_value('""') == ""
assert utils.decode_value("''") == ""

for bad_value in ['"foo', "'foo", "\"foo'"]:
with pytest.raises(ValueError, match="The trailing quote be present and match the leading quote."):
utils.decode_value(bad_value)
6 changes: 3 additions & 3 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ class Clock:
A fake clock.
"""

def __init__(self, initial=0):
def __init__(self, initial: int = 0):
super().__init__()
self.ticks = initial

def __call__(self):
return self.ticks


def create_uwhois():
def create_uwhois() -> uwhoisd.UWhois:
"""Prepare a UWhois object for testing."""
config = path.join(HERE, "..", "extra", "uwhoisd.ini")
parser = make_config_parser(config)
Expand All @@ -32,7 +32,7 @@ def create_uwhois():
return uwhois


def read_transcript(name):
def read_transcript(name: str) -> str:
"""Read a WHOIS transcript file."""
with open(path.join(HERE, "transcripts", name)) as fh:
return fh.read()

0 comments on commit 7513f44

Please sign in to comment.