-
Notifications
You must be signed in to change notification settings - Fork 0
/
parseq_text_reader.py
69 lines (47 loc) · 1.85 KB
/
parseq_text_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import cv2
from ultralytics import YOLO
import numpy as np
from PIL import Image
import os
import matplotlib.pyplot as plt
import tempfile
import sys
sys.path.append('/Users/juliajorstad/Sinhala-ParSeq-main')
from pretrained_model import pretrained
# Load model
model = YOLO("yolov8s.pt")
def prediction(img_path):
image = Image.open(img_path)
#img = "data_old/test/images/Scan 19 Oct 2023 at 15.51_page_1.jpg"
results = model.predict(image, save=False, stream=True)
image = cv2.imread(img)
for r in results:
im_array = r.plot() # plot a BGR numpy array of predictions
im = Image.fromarray(im_array[..., ::-1]) # RGB PIL image
im.show() # show image
def crop_text(img_path, model):
# Load the image
image = Image.open(img_path)
# Get model predictions
results = model.predict(image, save=False, stream=True)
# Initialize a list to hold cropped text images
# Iterate through detection results
for r in results:
r_array = r.numpy()
if r_array.names[4]:
text_bbox = r_array.boxes.xyxy
for bbox in text_bbox:
x_min, y_min, x_max, y_max = map(int, bbox[:4])
cropped_img = image.crop((x_min, y_min, x_max, y_max))
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
cropped_img.save(tmp.name, format='PNG')
tmp_path = tmp.name # Store the temporary file path to use with the pretrained function
# Use the temporary PNG file with the pretrained function
pretrained(tmp_path)
plt.figure(figsize=(5, 5))
plt.imshow(cropped_img)
plt.axis('off') # Hide axes ticks
#plt.show()
img= "data/test/images/Kalkveien_21_U_page_1.jpg"
#prediction(img)
#crop_text(img,model)