-
Notifications
You must be signed in to change notification settings - Fork 3
/
check_images.py
60 lines (48 loc) · 2.14 KB
/
check_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import argparse
from concurrent.futures import ThreadPoolExecutor
from PIL import Image, UnidentifiedImageError, ImageFile
from tqdm import tqdm
def check_image(source_path):
ImageFile.LOAD_TRUNCATED_IMAGES = False # Handle truncated images
# Define a set of acceptable image formats
acceptable_formats = {'JPEG', 'PNG', 'BMP', 'GIF', 'TIFF', 'WEBP'}
try:
with Image.open(source_path) as img:
img.load() # Explicitly load the image data
if img.format not in acceptable_formats:
print(f"Unacceptable image format ({img.format}) for file: {source_path}")
return False
# If this succeeds, the image is accessible, in the correct format, and can be opened
return True
except UnidentifiedImageError:
print(f"File is not an image or unrecognized format: {source_path}")
return False
except Exception as e:
print(f"Error processing file {source_path}: {e}")
try:
os.remove(source_path)
print(f"File {source_path} successfully deleted.")
except Exception as delete_error:
print(f"Failed to delete {source_path}: {delete_error}")
return False
def get_all_images(source_folder):
all_images = []
# Define a set of acceptable image file extensions
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.tif', '.webp'}
for root, dirs, files in os.walk(source_folder):
for file in files:
if os.path.splitext(file)[1].lower() in image_extensions:
source_path = os.path.join(root, file)
all_images.append(source_path)
return all_images
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--source-folder', required=True, help='The source directory')
args = parser.parse_args()
all_images = get_all_images(args.source_folder)
with ThreadPoolExecutor(max_workers=20) as executor:
# Wrap the executor.map call with tqdm for the progress bar
list(tqdm(executor.map(check_image, all_images), total=len(all_images)))
if __name__ == '__main__':
main()