Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically resolve latest/current URLs for Debian/Ubuntu #647

Merged
merged 10 commits into from
Sep 8, 2023
280 changes: 152 additions & 128 deletions openstack_image_manager/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,101 @@
import requests
import ruamel.yaml
import typer
import yaml

app = typer.Typer()
DEBUBU_REGEX = r'<a href="([^"]+)/">(?:release-)?([0-9]+)(\-[0-9]+)?/</a>'

IMAGES = ["almalinux", "centos", "debian", "rockylinux", "ubuntu"]

def get_latest_default(shortname, latest_checksum_url, latest_url, checksum_type="sha256"):
result = requests.get(latest_checksum_url)
result.raise_for_status()

latest_filename = os.path.basename(urlparse(latest_url).path)
filename_pattern = None
if shortname in ["centos-stream-8", "centos-stream-9", "centos-7"]:
filename_pattern = latest_filename.replace("HEREBE", "")
filename_pattern = filename_pattern.replace("DRAGONS", "")

checksums = {}
for line in result.text.split("\n"):
cs = re.split("\s+", line) # noqa W605
if shortname in ["rocky-8", "rocky-9"]:
if len(cs) == 4 and cs[0] == "SHA256":
checksums[latest_filename] = cs[3]
elif shortname in ["centos-7"]:
if len(cs) == 2 and re.search(filename_pattern, cs[1]):
checksums[cs[1]] = cs[0]
elif shortname in ["centos-stream-8", "centos-stream-9"]:
if len(cs) == 4 and cs[0] == "SHA256" and re.search(
filename_pattern, cs[1][1:-1]
):
checksums[cs[1][1:-1]] = cs[3]
else:
if len(cs) == 2:
checksums[cs[1]] = cs[0]

if filename_pattern:
new_latest_filename = natsorted(checksums.keys())[-1]
new_latest_url = latest_url.replace(latest_filename, new_latest_filename)

logger.info(f"Latest URL is now {new_latest_url}")
logger.info(f"Latest filename is now {new_latest_filename}")

latest_filename = new_latest_filename
latest_url = new_latest_url

current_checksum = f"{checksum_type}:{checksums[latest_filename]}"
return current_checksum, latest_url, None


def resolve_debubu(base_url, rex=re.compile(DEBUBU_REGEX)):
result = requests.get(base_url)
result.raise_for_status()
latest_folder, latest_date, latest_build = sorted(rex.findall(result.text))[-1]
return latest_folder, latest_date, latest_build


def get_latest_debubu(shortname, latest_checksum_url, latest_url, checksum_type=None):
base_url, _, filename = latest_url.rsplit("/", 2)
latest_folder, latest_date, latest_build = resolve_debubu(base_url)
current_base_url = f"{base_url}/{latest_folder}"
current_checksum_url = f"{current_base_url}/{latest_checksum_url.rsplit('/', 1)[-1]}"
result = requests.get(current_checksum_url)
result.raise_for_status()
current_checksum = None
current_filename = filename
if latest_build: # Debian includes date-build in file name
fn_pre, fn_suf = filename.rsplit('.', 1)
current_filename = f"{fn_pre}-{latest_date}{latest_build}.{fn_suf}"
for line in result.text.splitlines():
cs = line.split()
if len(cs) != 2:
continue
if cs[1].startswith("*"): # Ubuntu has the asterisk in front of the name
cs[1] = cs[1][1:]
if cs[1] != current_filename:
continue
if checksum_type is None: # use heuristics to distinguish sha256/sha512
checksum_type = "sha256" if len(cs[0]) == 64 else "sha512"
current_checksum = f"{checksum_type}:{cs[0]}"
break
if current_checksum is None:
raise RuntimeError(f"{current_checksum_url} does not contain {current_filename}")
current_url = f"{current_base_url}/{current_filename}"
return current_checksum, current_url, latest_date


IMAGES = {
"almalinux": get_latest_default,
"centos": get_latest_default,
"debian": get_latest_debubu,
"rockylinux": get_latest_default,
"ubuntu": get_latest_debubu,
}


def mirror_image(
image, latest_url, minio_server, minio_bucket, minio_access_key, minio_secret_key
image, minio_server, minio_bucket, minio_access_key, minio_secret_key
):
client = Minio(
minio_server,
Expand All @@ -35,9 +121,8 @@ def mirror_image(
)

version = image["versions"][0]
version["source"] = latest_url

path = urlparse(version["source"])
path = urlparse(version["url"])
dirname = image["shortname"]
filename, fileextension = os.path.splitext(os.path.basename(path.path))

Expand All @@ -54,8 +139,8 @@ def mirror_image(
logger.info("'%s' available in '%s'" % (new_filename, dirname))
except S3Error:
logger.info("'%s' not yet available in '%s'" % (new_filename, dirname))
logger.info("Downloading '%s'" % version["source"])
response = requests.get(version["source"], stream=True)
logger.info("Downloading '%s'" % version["url"])
response = requests.get(version["url"], stream=True)
with open(os.path.basename(path.path), "wb") as fp:
shutil.copyfileobj(response.raw, fp)
del response
Expand All @@ -73,85 +158,22 @@ def mirror_image(
os.remove(filename)


def update_image(image, minio_server, minio_bucket, minio_access_key, minio_secret_key):
def update_image(image, getter, minio_server, minio_bucket, minio_access_key, minio_secret_key):
name = image["name"]
logger.info(f"Checking image {name}")

latest_url = image["latest_url"]
logger.info(f"Latest download URL is {latest_url}")

parsed_url = urlparse(latest_url)
latest_filename = os.path.basename(parsed_url.path)

latest_checksum_url = image["latest_checksum_url"]
logger.info(f"Getting checksums from {latest_checksum_url}")

shortname = image["shortname"]
current_checksum, current_url, current_version = getter(shortname, latest_checksum_url, latest_url)

result = requests.get(latest_checksum_url)
checksums = {}

checksum_type = "sha256"
filename_pattern = None

if image["shortname"] in ["centos-stream-8", "centos-stream-9", "centos-7"]:
filename_pattern = latest_filename.replace("HEREBE", "")
filename_pattern = filename_pattern.replace("DRAGONS", "")
elif image["shortname"] in ["debian-10", "debian-11", "debian-12"]:
checksum_type = "sha512"

for line in result.text.split("\n"):
if image["shortname"] in ["rocky-8", "rocky-9"]:
splitted_line = re.split("\s+", line) # noqa W605
if splitted_line[0] == "SHA256":
checksums[latest_filename] = splitted_line[3]
elif image["shortname"] in [
"ubuntu-14.04",
"ubuntu-16.04",
"ubuntu-16.04-minimal",
"ubuntu-18.04",
"ubuntu-18.04-minimal",
"ubuntu-20.04",
"ubuntu-20.04-minimal",
"ubuntu-22.04",
"ubuntu-22.04-minimal",
]:
splitted_line = re.split("\s+", line) # noqa W605
if len(splitted_line) == 2:
checksums[splitted_line[1][1:]] = splitted_line[0]
elif image["shortname"] in ["centos-7"]:
splitted_line = re.split("\s+", line) # noqa W605
if len(splitted_line) == 2:
if re.search(filename_pattern, splitted_line[1]):
checksums[splitted_line[1]] = splitted_line[0]
elif image["shortname"] in ["centos-stream-8", "centos-stream-9"]:
splitted_line = re.split("\s+", line) # noqa W605
if splitted_line[0] == "SHA256" and re.search(
filename_pattern, splitted_line[1][1:-1]
):
checksums[splitted_line[1][1:-1]] = splitted_line[3]
else:
splitted_line = re.split("\s+", line) # noqa W605
if len(splitted_line) == 2:
checksums[splitted_line[1]] = splitted_line[0]

if filename_pattern:
new_latest_filename = natsorted(checksums.keys())[-1]
new_latest_url = latest_url.replace(latest_filename, new_latest_filename)

logger.info(f"Latest URL is now {new_latest_url}")
logger.info(f"Latest filename is now {new_latest_filename}")

latest_filename = new_latest_filename
latest_url = new_latest_url
logger.info(f"Checksum of current {current_url.rsplit('/', 1)[-1]} is {current_checksum}")

current_checksum = f"{checksum_type}:{checksums[latest_filename]}"
logger.info(f"Checksum of current {latest_filename} is {current_checksum}")

try:
latest_version = image["versions"][0]
latest_checksum = latest_version["checksum"]
logger.info(f"Our checksum is {latest_checksum}")
except IndexError:
latest_checksum = None
if not image["versions"]:
logger.info("No image available so far")
image["versions"].append(
{
Expand All @@ -162,50 +184,48 @@ def update_image(image, minio_server, minio_bucket, minio_access_key, minio_secr
}
)

if latest_checksum != current_checksum:
logger.info(f"Checking {latest_url}")
latest_checksum = image["versions"][0]["checksum"]
logger.info(f"Our checksum is {latest_checksum}")

if latest_checksum == current_checksum:
logger.info(f"Image {name} is up-to-date, nothing to do")
return 0

if current_version is None:
logger.info(f"Checking {current_url}")

conn = urlopen(latest_url, timeout=30)
struct = time.strptime(
conn = urlopen(current_url, timeout=30)
dt = datetime.strptime(
conn.headers["last-modified"], "%a, %d %b %Y %H:%M:%S %Z"
)
dt = datetime.fromtimestamp(time.mktime(struct))

new_version = dt.strftime("%Y%m%d")
logger.info(f"New version is {new_version}")
image["versions"][0]["version"] = new_version

new_build_date = dt.strftime("%Y-%m-%d")
logger.info(f"New build date is {new_build_date}")
image["versions"][0]["build_date"] = dt.date()

logger.info(f"New checksum is {current_checksum}")
image["versions"][0]["checksum"] = current_checksum

shortname = image["shortname"]
format = image["format"]

minio_server = str(minio_server)
minio_bucket = str(minio_bucket)
new_url = f"https://{minio_server}/{minio_bucket}/{shortname}/{new_version}-{shortname}.{format}"
logger.info(f"New URL is {new_url}")
image["versions"][0]["mirror_url"] = new_url
image["versions"][0]["url"] = latest_url

mirror_image(
image,
latest_url,
minio_server,
minio_bucket,
minio_access_key,
minio_secret_key,
)
del image["versions"][0]["source"]
current_version = dt.strftime("%Y%m%d")

else:
logger.info(f"Image {name} is up-to-date, nothing to do")
new_values = {
"version": current_version,
"build_date": datetime.strptime(current_version, "%Y%m%d").date(),
"checksum": current_checksum,
"url": current_url,
}
logger.info(f"New values are {new_values}")
image["versions"][0].update(new_values)

return image
shortname = image["shortname"]
format = image["format"]

minio_server = str(minio_server)
minio_bucket = str(minio_bucket)
new_url = f"https://{minio_server}/{minio_bucket}/{shortname}/{current_version}-{shortname}.{format}"
logger.info(f"New URL is {new_url}")
image["versions"][0]["mirror_url"] = new_url

mirror_image(
image,
minio_server,
minio_bucket,
minio_access_key,
minio_secret_key,
)
return 1


@app.command()
Expand Down Expand Up @@ -235,25 +255,29 @@ def main(
)
logger.add(sys.stderr, format=log_fmt, level=level, colorize=True)

for image in IMAGES:
for image, getter in IMAGES.items():
p = f"etc/images/{image}.yml"

ryaml = ruamel.yaml.YAML()
with open(p) as fp:
data = yaml.safe_load(fp)
data = ryaml.load(fp)

updates = 0
for index, image in enumerate(data["images"]):
if "latest_url" in image:
updated_image = update_image(
image,
minio_server,
minio_bucket,
minio_access_key,
minio_secret_key,
)
data["images"][index] = updated_image

if "latest_url" not in image:
continue
updates += update_image(
image,
getter,
minio_server,
minio_bucket,
minio_access_key,
minio_secret_key,
)

if not updates:
continue
with open(p, "w+") as fp:
ryaml = ruamel.yaml.YAML()
ryaml.explicit_start = True
ryaml.indent(sequence=4, offset=2)
ryaml.dump(data, fp)
Expand Down