forked from NJUOCR/synthetic_scanning_text
-
Notifications
You must be signed in to change notification settings - Fork 0
/
interference.py
316 lines (258 loc) · 9.95 KB
/
interference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import random as rd
import cv2 as cv
import numpy as np
class Interference:
def interfere(self, img):
"""
:param img:
:return: A tuple (out_img, random_val)
"""
raise Exception("interfere function not implement")
@staticmethod
def get_bounds(img):
"""
Get the minimum rectangle box containing the text. This method assumes that:
1. there is no noise in the given image
2. background color is white(255)
:param img: input image
:return: top, left, bottom, right
> FYI: image_height = bottom - top, image_width = right - left
"""
h, w = img.shape
black_vertical = np.zeros((h, ), np.uint8)
black_horizontal = np.zeros((w, ), np.uint8)
for left in range(w):
if not (img[:, left] == black_vertical).all():
break
else:
left = None
for right in range(w - 1, -1, -1):
if not (img[:, right] == black_vertical).all():
break
else:
right = None
for top in range(h):
if not (img[top, :] == black_horizontal).all():
break
else:
top = None
for bottom in range(h - 1, -1, -1):
if not (img[bottom, :] == black_horizontal).all():
break
else:
bottom = None
return top, left, bottom, right
@staticmethod
def make_grid(img, x_num: tuple, y_num: tuple) -> list:
"""
***Ignore***
:param img: input image
:param x_num: (min_num, max_num) along width, border contained
:param y_num: (min_num, max_num) along height, border contained
:return: a list containing `x_num` x `y_num` cells
"""
# 获取图片的高和宽
height, width = img.shape
x = rd.randint(*x_num)
y = rd.randint(*y_num)
dx = width // x
dy = height // y
grids = []
for i in range(y):
for j in range(x):
grids.append(img[i * dy:(i + 1) * dy, j * dx:(j + 1) * dx])
return grids
class Inversion(Interference):
def __init__(self):
"""
Inverse the input image
"""
def interfere(self, img):
"""
:param img: the input image, of which the mode should be `gray-scale`. An auto conversion will be
done otherwise.
:return: A gray-scale image
"""
channel = img.shape[2] if len(img.shape) == 3 else 1
# 颜色空间转换函数
out = img
if channel == 3:
out = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
return 255 - out, None
class RandomGaussianBlur(Interference):
def __init__(self, min_r, max_r, min_sigma, max_sigma):
"""
Gaussian blur with random radius [min_r, max_r] and random sigma [min_sigma, max_sigma]
:param min_r: minimum radius
:param max_r: maximum radius
:param min_sigma: minimum sigma
:param max_sigma: maximum sigma
"""
self.r_seeds = []
for r in range(min_r, max_r+1, 2):
self.r_seeds.append(r)
self.sigma_range = max_sigma - min_sigma
self.sigma_bias = min_sigma
def interfere(self, img):
r = self.r_seeds[rd.randint(0, len(self.r_seeds)-1)]
sigma = rd.random() * self.sigma_range + self.sigma_bias
return cv.GaussianBlur(img, (r, r), sigma), None
class RandomTranslation(Interference):
def __init__(self):
"""
Randomly move the content(text) of the image, along both x and y axises.
**Keep text in image boundary**
"""
def interfere(self, img):
# 获取图片大小
img_input = img
height, width = img.shape
top, left, bottom, right = Interference.get_bounds(img)
if None in (top, left, bottom, right):
return img_input, (0, 0)
offset_x = rd.randint(-left, width - right)
offset_y = rd.randint(-top, height - bottom)
# 仿射矩阵,移位矩阵
mat_translation = np.float32([[1, 0, offset_x],
[0, 1, offset_y]])
# 调用的一个仿射方法
img_output = cv.warpAffine(img_input, mat_translation, (img_input.shape[1], img_input.shape[0]))
return img_output, (offset_x, offset_y)
class RandomNoise(Interference):
def __init__(self, rate, min_val, max_val):
"""
Add noise to image, every pixel in the image can be a noise pixel under the possibility `p`,
the val of the noise pixel is between `min_val` and `max_val`
> Background noise or foreground noise is decided by the val
:param rate: The possibility that one pixel in image is a noise pixel
:param min_val: the minimum val of a noise
:param max_val: the maximum val of a noise
"""
self.rate = rate
self.min_val = min_val
self.max_val = max_val
def interfere(self, img):
# todo 增加噪点
# white_noise
w_rate = self.rate
w_range = (self.max_val, self.max_val)
# np.nditer: numpy array自带的迭代器 参考网址:https://www.jianshu.com/p/f2bd63766204
# 按顺序遍历会出现噪点扎堆的请看
for x in np.nditer(img, op_flags=['readwrite']):
if rd.random() < w_rate:
x[...] = rd.randint(*w_range)
return img, None
class RandomResize(Interference):
def __init__(self, min_scale, max_scale):
"""
Resize image with a scale between `min_scale` and `max_scale`
:param min_scale: should be larger than 0.0
:param max_scale: should be smaller than or equal to 1.0
"""
self.min_scale = min_scale
self.max_scale = max_scale
def interfere(self, img):
# 生成min_scale至max_scale之间的随机浮点数
scale = rd.uniform(self.min_scale, self.max_scale)
height, width = img.shape
# CV_INTER_LINEAR :雙線性插補(預設)
interpolation = cv.INTER_LINEAR
# interpolation:內插方式
img = cv.resize(img, (int(width * scale), int(height * scale)), interpolation=interpolation)
return img, scale
class Padding(Interference):
def __init__(self, width, height, val):
"""
use `val` to pad the image to size of `width` x `height`
:param width:
:param height:
:param val:
"""
# 新的图片的宽度,高度
self.width = width
self.height = height
self.val = val
def interfere(self, img):
# todo 边缘补齐
new_height, new_width = self.height, self.width
top = left = bottom = right = 0
# 获取当前图片的宽度,高度
cur_height, cur_width = img.shape
if new_height > cur_height:
# 上下填充
top = (new_height - cur_height) // 2
bottom = new_height - cur_height - top
if new_width > cur_width:
# 左右填充
left = (new_width - cur_width) // 2
right = new_width - cur_width - left
if top == 0 and bottom == 0 and left == 0 and right == 0:
return img, None
out = cv.copyMakeBorder(img, top, bottom, left, right, cv.BORDER_CONSTANT, value=self.val)
return out, None
class RandomRotation(Interference):
def __init__(self, min_angle, max_angle):
"""
Rotate the image with a random angle between `min_angle` and `max_angle`
:param min_angle: better use a value smaller than 0 to make a clockwise rotation
:param max_angle: better use a value larger than 0 to make a anti-clockwise rotation
"""
self.min_angle = min_angle
self.max_angle = max_angle
def interfere(self, img):
height, width = img.shape
angle = rd.random() * (self.max_angle - self.min_angle) + self.min_angle
mat = cv.getRotationMatrix2D((width / 2, height / 2), angle, 0.8)
output_img = cv.warpAffine(img, mat, (width, height))
return output_img, angle
class RandomDilution(Interference):
def __init__(self, min_ratio, max_ratio):
"""
Dilute the whole image by a random ratio between `min_ratio` and `max_ratio`
:param min_ratio: <= 100
:param max_ratio: <= 100
"""
self.min_ratio = min_ratio
self.max_ratio = max_ratio
def interfere(self, img):
# todo 淡化
ratio = rd.randint(self.min_ratio, self.max_ratio) / 100
img = img * ratio
return img, ratio
class RandomStroke(Interference):
def __init__(self, bolder: float, plain: float, kernel_size: int):
self.kernel_size = kernel_size
self.bolder = bolder
self.plain = plain
def interfere(self, img):
# todo 笔画
# 定义一个2*2的十字形结构
kernel = cv.getStructuringElement(cv.MORPH_RECT, (self.kernel_size, self.kernel_size))
if rd.random() < self.bolder:
output_img = cv.dilate(img, kernel)
elif self.bolder < rd.random() < self.bolder + self.plain:
output_img = cv.erode(img, kernel)
else:
output_img = np.copy(img)
return output_img, None
class AutoBin(Interference):
def __init__(self, block):
self.block = block
def interfere(self, img):
"""
:param img:
:return:
"""
# 读取图像,并转为灰度图
# img_grey = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# 自适应二值化
img = img.astype(np.uint8)
img_at_mean = cv.adaptiveThreshold(img, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C,
cv.THRESH_BINARY, self.block, 0)
return img_at_mean, None
class Threshold(Interference):
def __init__(self, thresh):
self.thresh = thresh
def interfere(self, img):
_, output_img = cv.threshold(img, self.thresh, 255, cv.THRESH_BINARY)
return output_img, None