From 3473dc27d08705fa308dd3f87dcd2583e579d76a Mon Sep 17 00:00:00 2001 From: Constantin Pape Date: Fri, 18 Oct 2024 21:57:12 +0200 Subject: [PATCH] More fixes to embedding download --- workshops/i2k_2024/download_embeddings.py | 53 ++++++++++++----------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/workshops/i2k_2024/download_embeddings.py b/workshops/i2k_2024/download_embeddings.py index 7f08e96e..80074727 100644 --- a/workshops/i2k_2024/download_embeddings.py +++ b/workshops/i2k_2024/download_embeddings.py @@ -4,33 +4,33 @@ URLS = { - "lucchi": [ - "https://owncloud.gwdg.de/index.php/s/kQMA1B8L9LOvYrl/download", # vit_b - "https://owncloud.gwdg.de/index.php/s/U8xs6moRg0cQhkS/download", # vit_b_em_organelles - ], - "embedseg": [ - "https://owncloud.gwdg.de/index.php/s/EF9ZdMzYjDjl8fd/download", # vit_b - "https://owncloud.gwdg.de/index.php/s/7IVekm8K7ln7yQ6/download", # vit_b_lm - ], - "platynereis": [ - "https://owncloud.gwdg.de/index.php/s/1OgOEeMIK9Ok2Kj/download", # vit_b - "https://owncloud.gwdg.de/index.php/s/i9DrXe6YFL8jvgP/download", # vit_b_em_organelles - ], + "lucchi": { + "vit_b": "https://owncloud.gwdg.de/index.php/s/kQMA1B8L9LOvYrl/download", + "vit_b_em_organelles": "https://owncloud.gwdg.de/index.php/s/U8xs6moRg0cQhkS/download", + }, + "embedseg": { + "vit_b": "https://owncloud.gwdg.de/index.php/s/EF9ZdMzYjDjl8fd/download", + "vit_b_lm": "https://owncloud.gwdg.de/index.php/s/7IVekm8K7ln7yQ6/download", + }, + "platynereis": { + "vit_b": "https://owncloud.gwdg.de/index.php/s/1OgOEeMIK9Ok2Kj/download", + "vit_b_em_organelles": "https://owncloud.gwdg.de/index.php/s/i9DrXe6YFL8jvgP/download", + }, } CHECKSUMS = { - "lucchi": [ - "e0d064765f1758a1a0823b2c02d399caa5cae0d8ac5a1e2ed96548a647717433", # vit_b - "e0b5ab781c42e6f68b746fc056c918d56559ccaeedb4e4f2848b1e5e8f1bec58", # vit_b_em_organelles - ], - "embedseg": [ - "82f5351486e484dda5a3a327381458515c89da5dda8a48a0b1ab96ef10d23f02", # vit_b - "80fd701c01b81bbfb32beed6e2ece8c5706625dbc451776d8ba1c22253f097b9", # vit_b_lm - ], - "platynereis": [ - "95c5e31c5e55e94780568f3fb8a3fdf33f8586a4c6a375d28dccba6567f37a47", # vit_b - "3d8d91313656fde271a48ea0a3552762f2536955a357ffb43e7c43b5b27e0627", # vit_b_em_organelles - ], + "lucchi": { + "vit_b": "e0d064765f1758a1a0823b2c02d399caa5cae0d8ac5a1e2ed96548a647717433", + "vit_b_em_organelles": "e0b5ab781c42e6f68b746fc056c918d56559ccaeedb4e4f2848b1e5e8f1bec58", + }, + "embedseg": { + "vit_b": "82f5351486e484dda5a3a327381458515c89da5dda8a48a0b1ab96ef10d23f02", + "vit_b_lm": "80fd701c01b81bbfb32beed6e2ece8c5706625dbc451776d8ba1c22253f097b9", + }, + "platynereis": { + "vit_b": "95c5e31c5e55e94780568f3fb8a3fdf33f8586a4c6a375d28dccba6567f37a47", + "vit_b_em_organelles": "3d8d91313656fde271a48ea0a3552762f2536955a357ffb43e7c43b5b27e0627", + }, } @@ -53,11 +53,12 @@ def _download_embeddings(embedding_dir, dataset_name): os.makedirs(data_embedding_dir, exist_ok=True) # Download the precomputed embeddings as zipfiles and unzip the embeddings per model. - for url, checksum in zip(urls, checksums): + for name, url in urls.items(): fnames = os.listdir(data_embedding_dir) - if fnames and all([p.startswith("vit_b") for p in fnames]): + if name in fnames: continue + checksum = checksums[name] zip_path = os.path.join(data_embedding_dir, "embeddings.zip") download_source(path=zip_path, url=url, download=True, checksum=checksum) unzip(zip_path=zip_path, dst=data_embedding_dir)