-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
109 lines (82 loc) · 3.44 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import sys
from pathlib import Path
import fitz
from iiif import IIIFStatic
from iiif_prezi.factory import ManifestFactory
extract_path = Path("./output/images")
image_path = Path("./output/iiif/images")
manifest_path = Path("./output/iiif")
def ensure_dirs():
"""Construct output dirs"""
extract_path.mkdir(parents=True, exist_ok=True)
image_path.mkdir(parents=True, exist_ok=True)
manifest_path.mkdir(parents=True, exist_ok=True)
def extract_images_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
image_count = 0
images = []
for i in range(len(doc)):
page = i + 1
print(f"extracting images from page {page}..")
count_per_page = 1
for img in doc.getPageImageList(i):
xref = img[0]
pix = fitz.Pixmap(doc, xref)
target_out = Path(extract_path, f"{image_count:02}.png")
if pix.n - pix.alpha < 4: # this is GRAY or RGB
pix.writePNG(target_out)
else: # CMYK: convert to RGB first
pix = fitz.Pixmap(fitz.csRGB, pix)
pix.writePNG(target_out)
pix = None
images.append(target_out)
count_per_page = count_per_page + 1
image_count = image_count + 1
print(f"finished extracting {image_count} images")
return images
def generate_iiif(images, pdf):
"""Generate IIIF 2.0 static image-service and manifest"""
# configure manifest factory
manifest_factory = ManifestFactory()
manifest_factory.set_base_prezi_dir(str(manifest_path))
manifest_factory.set_base_prezi_uri("http://localhost:8000")
manifest_factory.set_base_image_uri("http://localhost:8000/images")
manifest_factory.set_iiif_image_info(2.0, 1)
manifest = manifest_factory.manifest(label="Example Manifest from PDF")
manifest.description = "Sample P2 manifest with images from PDF"
manifest.set_metadata({"Generated from": pdf})
# configure tile generator for static assets
tile_generator = IIIFStatic(dst=str(image_path),
prefix="http://localhost:8000/images",
tilesize=512,
api_version="2.1",
extras=['/full/90,/0/default.jpg',
'/full/200,/0/default.jpg']) # thumbnail for UV
seq = manifest.sequence()
idx = 0
for i in images:
print(f"processing image {idx}")
image_id = i.stem
# create a canvas with an annotation
canvas = seq.canvas(ident=image_id, label=f"Canvas {idx}")
# create an annotation on the Canvas
annotation = canvas.annotation(ident=f"page-{idx}")
# add an image to the anno
img = annotation.image(image_id, iiif=True)
img.service.profile = 'http://iiif.io/api/image/2/level0.json'
# set image + canvas hw
img.set_hw_from_file(str(i))
canvas.height = img.height
canvas.width = img.width
# generate image-pyramid
tile_generator.generate(src=i, identifier=image_id)
idx = idx + 1
manifest.toFile(compact=False)
if __name__ == '__main__':
if pdf := sys.argv[-1]:
ensure_dirs()
images = extract_images_from_pdf(pdf)
generate_iiif(images, pdf)
print("finished")
else:
print("no arg supplied")