Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Vincent-Maladiere committed Jan 20, 2025
1 parent 3d2da9a commit aff5112
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions skrub/datasets/_utils.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
import json
import hashlib
import pandas as pd
import os
import shutil
import time
import tarfile
import warnings
import requests
from collections import namedtuple
from pathlib import Path
from tempfile import NamedTemporaryFile
from urllib.error import URLError
from urllib.request import urlretrieve
from sklearn.utils import Bunch

ARCHIVE_METADATA = {
DATASET_INFO = {
"medical_charge": {
"urls": ["https://figshare.com/ndownloader/files/51807752"],
"urls": [
"https://osf.io/download/pu2hq/",
"https://figshare.com/ndownloader/files/51807752",
],
"sha256": "d10a9d7c0862a8bebe9292ed948df9e6e02cdf4415a8e66306b12578f5f56754",
},
"employee_salaries": {
"urls": [
"https://osf.io/download/bszkv/",
"https://figshare.com/ndownloader/files/51807500",
],
"sha256": "1a73268a1a5ce0d376e493737a5fcf0d3f8ffb4cafeca20c7b39381bbc943292",
},
}


Expand Down Expand Up @@ -117,7 +121,7 @@ def extract_archive(dataset_name, data_home):

def download_archive(dataset_name, data_home, retry=3, delay=1, timeout=30):

metadata = ARCHIVE_METADATA[dataset_name]
metadata = DATASET_INFO[dataset_name]
error_flag = False

while True:
Expand Down

0 comments on commit aff5112

Please sign in to comment.