-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrop.py
201 lines (158 loc) · 5.96 KB
/
crop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import cv2
from pdf2image import convert_from_path
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
class Line:
def __init__(self, line):
self.x1, self.y1, self.x2, self.y2 = line
if self.x1 > self.x2:
self.x1, self.y1, self.x2, self.y2 = self.x2, self.y2, self.x1, self.y1
self.angle = np.arctan2(self.y2 - self.y1, self.x2 - self.x1) * 180 / np.pi
self.length = np.sqrt((self.x2 - self.x1) ** 2 + (self.y2 - self.y1) ** 2)
def p1(self):
return int(self.x1), int(self.y1)
def p2(self):
return int(self.x2), int(self.y2)
CORE_COUNT = len(os.sched_getaffinity(0))
def get_args():
import argparse
parser = argparse.ArgumentParser(
description="Rotate pdf pages based on horizontal lines"
)
parser.add_argument("pdf", type=str, help="input pdf file path")
parser.add_argument(
"--angle-threshold",
type=float,
default=10,
help="angle threshold to consider a line as horizontal",
)
# add debug flag to show images
parser.add_argument(
"--debug",
action="store_true",
required=False,
help="show each page with detected lines and rotation angle",
)
parser.add_argument(
"--output",
type=str,
default="out.pdf",
required=False,
help="output pdf file path",
)
args = parser.parse_args()
return args
if __name__ == "__main__":
args = get_args()
print("loading pdf file", args.pdf)
images = convert_from_path(args.pdf, thread_count=CORE_COUNT)
print("loaded", len(images), "pages")
pil_images = []
# cli progress bar
print("start processing pages")
for i, image in tqdm(enumerate(images), desc="Processing pages", total=len(images)):
img_orig = np.array(image)
img = cv2.resize(img_orig, None, fx=0.5, fy=0.5)
img_grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
detector = cv2.createLineSegmentDetector()
lines, width, prec, nfa = detector.detect(img_grey)
horizontal_lines: list[Line] = []
for line in lines:
line = Line(line[0])
# if nearly horizontal
if -args.angle_threshold < line.angle < args.angle_threshold:
# print("angle:", line.angle, "length:", line.length)
horizontal_lines.append(line)
# calculate avarage angle
length_threshold = np.quantile([h.length for h in horizontal_lines], 0.6)
rotation_angle_lines = [
h for h in horizontal_lines if h.length > length_threshold
]
rotation_angle_lines.sort(key=lambda x: x.y1)
std_dev_y = np.std(
[line.y1 for line in rotation_angle_lines]
+ [line.y2 for line in rotation_angle_lines]
)
most_left = int(min([line.x1 for line in rotation_angle_lines]))
most_right = int(max([line.x2 for line in rotation_angle_lines]))
most_top = int(min([line.y1 for line in rotation_angle_lines]))
most_bottom = int(max([line.y2 for line in rotation_angle_lines]))
quantile_upper = np.quantile(
[line.y1 for line in rotation_angle_lines]
+ [line.y2 for line in rotation_angle_lines],
0.9,
)
quantile_lower = np.quantile(
[line.y1 for line in rotation_angle_lines]
+ [line.y2 for line in rotation_angle_lines],
0.1,
)
upper_bound = quantile_upper + std_dev_y
lower_bound = quantile_lower - std_dev_y
# rotation_angle_lines = [
# line
# for line in rotation_angle_lines
# if lower_bound < line.y1 < upper_bound
# and lower_bound < line.y2 < upper_bound
# ]
angles = [h.angle for h in rotation_angle_lines]
weights = [h.length for h in rotation_angle_lines]
avg_angle = np.average(angles, weights=weights)
if args.debug:
print("page", i)
print("avg angle:", avg_angle)
print("quantile_upper:", quantile_upper)
print("quantile_lower:", quantile_lower)
print("std_dev_y:", std_dev_y)
print("upper_bound:", upper_bound)
print("lower_bound:", lower_bound)
# debug img
debug_img = np.copy(img)
for line in rotation_angle_lines:
cv2.line(debug_img, line.p1(), line.p2(), (255, 0, 0), 1)
# draw line with avg angle
cv2.line(
debug_img,
(0, int(img.shape[0] / 2)),
(
img.shape[1],
int(
img.shape[0] / 2
+ img.shape[1] * np.tan(avg_angle * np.pi / 180)
),
),
(0, 255, 0),
1,
)
# draw bounding box
cv2.rectangle(
debug_img,
(int(most_left), int(most_top)),
(most_right, most_bottom),
(0, 0, 255),
1,
)
cv2.imshow("debug", debug_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# plot histogram of lines over y axis
plt.hist([line.y1 for line in rotation_angle_lines], bins=50)
plt.savefig("hist.png")
Image.open("hist.png").show()
# print("median angle:", avg_angle)
# rotate image
rows, cols = img_orig.shape[:2]
M = cv2.getRotationMatrix2D((cols / 2, rows / 2), avg_angle, 1)
dest_img = cv2.warpAffine(
img_orig,
M,
(cols, rows),
borderMode=cv2.BORDER_CONSTANT,
borderValue=(255, 255, 255),
)
# write rotated image to pil_images
pil_images.append(Image.fromarray(dest_img))
pil_images[0].save(args.output, "PDF", save_all=True, append_images=pil_images[1:])