Skip to content

Commit

Permalink
Add Python docstrings and reformat (#24)
Browse files Browse the repository at this point in the history
Co-authored-by: UltralyticsAssistant <[email protected]>
  • Loading branch information
glenn-jocher and UltralyticsAssistant authored Apr 28, 2024
1 parent fbfd673 commit d8c596c
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 8 deletions.
6 changes: 5 additions & 1 deletion flickr_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@


def get_urls(search="honeybees on flowers", n=10, download=False):
"""Fetch Flickr URLs for `search` term images, optionally downloading them; supports up to `n` images."""
t = time.time()
flickr = FlickrAPI(key, secret)
license = () # https://www.flickr.com/services/api/explore/?method=flickr.photos.licenses.getInfo
Expand All @@ -36,7 +37,10 @@ def get_urls(search="honeybees on flowers", n=10, download=False):
# construct url https://www.flickr.com/services/api/misc.urls.html
url = photo.get("url_o") # original size
if url is None:
url = f"https://farm{photo.get('farm')}.staticflickr.com/{photo.get('server')}/{photo.get('id')}_{photo.get('secret')}_b.jpg"
url = (
f"https://farm{photo.get('farm')}.staticflickr.com/{photo.get('server')}/"
f"{photo.get('id')}_{photo.get('secret')}_b.jpg"
)

# download
if download:
Expand Down
16 changes: 10 additions & 6 deletions utils/clean_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@


def scan(files, max_wh=1920, remove=False, multi_thread=True, tojpg=False, quality=95, workers=8):
# Args:
# files: list of image files
# max_wh: maximum image wh (larger images will be reduced in size)
# remove: delete corrupted/duplicate images
# tojpg: replace current image with jpg for smaller size / faster loading
# quality: PIL JPG saving quality (0-100)
"""
Scans and processes images by resizing, converting to jpg, and removing duplicates or corrupt files.
Args:
files: list of image files
max_wh: maximum image wh (larger images will be reduced in size)
remove: delete corrupted/duplicate images
tojpg: replace current image with jpg for smaller size / faster loading
quality: PIL JPG saving quality (0-100)
"""
img_formats = [".bmp", ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".dng"] # valid image formats from YOLOv5

def scan_one_file(f):
Expand Down
2 changes: 1 addition & 1 deletion utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def download_uri(uri, dir="./"):
# Download a file from a given URI, including minimal checks
"""Downloads file from URI, performing checks and renaming; supports timeout and image format suffix addition."""

# Download
f = dir + os.path.basename(uri) # filename
Expand Down
1 change: 1 addition & 0 deletions utils/multithread_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@


def fetch_url(url):
"""Downloads a file from a URL to a local directory, returning the URL and any error encountered."""
try:
f = dir + url.split("/")[-1]
request.urlretrieve(url, f)
Expand Down

0 comments on commit d8c596c

Please sign in to comment.