-
Notifications
You must be signed in to change notification settings - Fork 0
/
lw_test.py
97 lines (80 loc) · 3.47 KB
/
lw_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import PIL.Image
import supervision as sv
import torch
import numpy as np
import supervision as sv
from mmengine.config import Config
from mmengine.dataset import Compose
from mmengine.runner import Runner
from mmengine.runner.amp import autocast
from mmyolo.registry import RUNNERS
from torchvision.ops import nms
bounding_box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
class_names = ("person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, "
"traffic light, fire hydrant, stop sign, parking meter, bench, bird, "
"cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, "
"backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard, "
"sports ball, kite, baseball bat, baseball glove, skateboard, "
"surfboard, tennis racket, bottle, wine glass, cup, fork, knife, "
"spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, "
"hot dog, pizza, donut, cake, chair, couch, potted plant, bed, "
"dining table, toilet, tv, laptop, mouse, remote, keyboard, "
"cell phone, microwave, oven, toaster, sink, refrigerator, book, "
"clock, vase, scissors, teddy bear, hair drier, toothbrush")
class_names2 = ("dog, eye, tongue, ear, leash")
def run_image(
runner,
input_image,
max_num_boxes=100,
score_thr=0.05,
nms_thr=0.5,
output_image="output.png",
):
texts = [[t.strip()] for t in class_names.split(",")] + [[" "]]
data_info = runner.pipeline(dict(img_id=0, img_path=input_image,
texts=texts))
data_batch = dict(
inputs=data_info["inputs"].unsqueeze(0),
data_samples=[data_info["data_samples"]],
)
with autocast(enabled=False), torch.no_grad():
output = runner.model.test_step(data_batch)[0]
runner.model.class_names = texts
pred_instances = output.pred_instances
keep_idxs = nms(pred_instances.bboxes, pred_instances.scores, iou_threshold=nms_thr)
pred_instances = pred_instances[keep_idxs]
pred_instances = pred_instances[pred_instances.scores.float() > score_thr]
if len(pred_instances.scores) > max_num_boxes:
indices = pred_instances.scores.float().topk(max_num_boxes)[1]
pred_instances = pred_instances[indices]
output.pred_instances = pred_instances
pred_instances = pred_instances.cpu().numpy()
detections = sv.Detections(
xyxy=pred_instances['bboxes'],
class_id=pred_instances['labels'],
confidence=pred_instances['scores']
)
labels = [
f"{class_id} {confidence:0.2f}"
for class_id, confidence
in zip(detections.class_id, detections.confidence)
]
image = PIL.Image.open(input_image)
svimage = np.array(image)
svimage = bounding_box_annotator.annotate(svimage, detections)
svimage = label_annotator.annotate(svimage, detections, labels)
return svimage[:, :, ::-1]
cfg = Config.fromfile(
"./configs/pretrain/yolo_world_l_t2i_bn_2e-4_100e_4x8gpus_obj365v1_goldg_train_lvis_minival.py"
)
cfg.work_dir = "."
cfg.load_from = "yolow-v8_l_clipv2_frozen_t2iv2_bn_o365_goldg_pretrain.pth"
runner = Runner.from_cfg(cfg)
runner.call_hook("before_run")
runner.load_or_resume()
pipeline = cfg.test_dataloader.dataset.pipeline
runner.pipeline = Compose(pipeline)
runner.model.eval()
img = run_image(runner,"dog.jpeg")
sv.plot_image(img)