Skip to content

Commit

Permalink
change logic to urlparse & update doctests
Browse files Browse the repository at this point in the history
Signed-off-by: mathioud <[email protected]>
  • Loading branch information
gmathiou4 committed Jul 31, 2023
1 parent 7d065b0 commit 6b3b3a5
Showing 1 changed file with 20 additions and 14 deletions.
34 changes: 20 additions & 14 deletions src/python_inspector/utils_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
from typing import List
from typing import NamedTuple
from urllib.parse import quote_plus
from urllib.parse import urljoin
from urllib.parse import urlparse
from urllib.parse import urlunparse

import attr
import packageurl
Expand Down Expand Up @@ -1595,29 +1596,34 @@ def fetch_links(
url, _, _sha256 = anchor_tag["href"].partition("#sha256=")
if "data-requires-python" in anchor_tag.attrs:
python_requires = anchor_tag.attrs["data-requires-python"]
url = resolve_relative_url(package_url, url) # Resolve relative URL
# Resolve relative URL
url = resolve_relative_url(package_url, url)
links.append(Link(url=url, python_requires=python_requires))
# TODO: keep sha256
return links


def resolve_relative_url(package_url, url):
"""
Resolve a relative URL using the package URL.
Return the resolved `url` URLstring given a `package_url` base URL string
of a package.
Args:
package_url (str): The base URL of the package.
url (str): The URL to be resolved.
Returns:
str: The resolved URL.
Examples:
>>> resolve_relative_url("https://example.com/package", "../path/file.txt")
'https://example.com/path/file.txt'
For example:
>>> resolve_relative_url("https://example.com/package", "../path/file.txt")
'https://example.com/path/file.txt'
"""
if not url.startswith(("http://", "https://")):
base_url = "/".join(package_url.split("/")[:-1]) # Extract base URL
url = urljoin(base_url, url) # Resolve relative URL
base_url_parts = urlparse(package_url)
url_parts = urlparse(url)
# If the relative URL starts with '..', remove the last directory from the base URL
if url_parts.path.startswith(".."):
path = base_url_parts.path.rstrip("/").rsplit("/", 1)[0] + url_parts.path[2:]
else:
path = urlunparse(
("", "", url_parts.path, url_parts.params, url_parts.query, url_parts.fragment)
)
resolved_url_parts = base_url_parts._replace(path=path)
url = urlunparse(resolved_url_parts)
return url


Expand Down

0 comments on commit 6b3b3a5

Please sign in to comment.