Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Openvino models #14283

Merged
merged 3 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions frigate/comms/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,11 @@ def handle_update_event_description():
)

def handle_update_model_state():
model = payload["model"]
state = payload["state"]
self.model_state[model] = ModelStatusTypesEnum[state]
self.publish("model_state", json.dumps(self.model_state))
if payload:
model = payload["model"]
state = payload["state"]
self.model_state[model] = ModelStatusTypesEnum[state]
self.publish("model_state", json.dumps(self.model_state))

def handle_model_state():
self.publish("model_state", json.dumps(self.model_state.copy()))
Expand Down
7 changes: 5 additions & 2 deletions frigate/comms/inter_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,11 @@ def __init__(self) -> None:

def send_data(self, topic: str, data: any) -> any:
"""Sends data and then waits for reply."""
self.socket.send_json((topic, data))
return self.socket.recv_json()
try:
self.socket.send_json((topic, data))
return self.socket.recv_json()
except zmq.ZMQError:
return ""

def stop(self) -> None:
self.socket.close()
Expand Down
3 changes: 3 additions & 0 deletions frigate/detectors/plugins/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import openvino as ov
import openvino.properties as props
from pydantic import Field
from typing_extensions import Literal

Expand Down Expand Up @@ -34,6 +35,8 @@ def __init__(self, detector_config: OvDetectorConfig):
logger.error(f"OpenVino model file {detector_config.model.path} not found.")
raise FileNotFoundError

os.makedirs("/config/model_cache/openvino", exist_ok=True)
self.ov_core.set_property({props.cache_dir: "/config/model_cache/openvino"})
self.interpreter = self.ov_core.compile_model(
model=detector_config.model.path, device_name=detector_config.device
)
Expand Down
35 changes: 12 additions & 23 deletions frigate/embeddings/functions/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import Callable, Dict, List, Optional, Union

import numpy as np
import onnxruntime as ort
import requests
from PIL import Image

Expand All @@ -19,7 +18,7 @@
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
from frigate.types import ModelStatusTypesEnum
from frigate.util.downloader import ModelDownloader
from frigate.util.model import get_ort_providers
from frigate.util.model import ONNXModelRunner

warnings.filterwarnings(
"ignore",
Expand Down Expand Up @@ -54,16 +53,12 @@ def __init__(
self.download_urls = download_urls
self.embedding_function = embedding_function
self.model_type = model_type # 'text' or 'vision'
self.providers, self.provider_options = get_ort_providers(
force_cpu=device == "CPU",
requires_fp16=model_size == "large" or self.model_type == "text",
openvino_device=device,
)

self.model_size = model_size
self.device = device
self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
self.tokenizer = None
self.feature_extractor = None
self.session = None
self.runner = None
files_names = list(self.download_urls.keys()) + (
[self.tokenizer_file] if self.tokenizer_file else []
)
Expand Down Expand Up @@ -124,15 +119,17 @@ def _download_model(self, path: str):
)

def _load_model_and_tokenizer(self):
if self.session is None:
if self.runner is None:
if self.downloader:
self.downloader.wait_for_download()
if self.model_type == "text":
self.tokenizer = self._load_tokenizer()
else:
self.feature_extractor = self._load_feature_extractor()
self.session = self._load_model(
os.path.join(self.download_path, self.model_file)
self.runner = ONNXModelRunner(
os.path.join(self.download_path, self.model_file),
self.device,
self.model_size,
)

def _load_tokenizer(self):
Expand All @@ -149,14 +146,6 @@ def _load_feature_extractor(self):
f"{MODEL_CACHE_DIR}/{self.model_name}",
)

def _load_model(self, path: str) -> Optional[ort.InferenceSession]:
if os.path.exists(path):
return ort.InferenceSession(
path, providers=self.providers, provider_options=self.provider_options
)
else:
return None

def _process_image(self, image):
if isinstance(image, str):
if image.startswith("http"):
Expand All @@ -170,7 +159,7 @@ def __call__(
) -> List[np.ndarray]:
self._load_model_and_tokenizer()

if self.session is None or (
if self.runner is None or (
self.tokenizer is None and self.feature_extractor is None
):
logger.error(
Expand All @@ -188,14 +177,14 @@ def __call__(
images=processed_images, return_tensors="np"
)

input_names = [input.name for input in self.session.get_inputs()]
input_names = self.runner.get_input_names()
onnx_inputs = {
name: processed_inputs[name]
for name in input_names
if name in processed_inputs
}

outputs = self.session.run(None, onnx_inputs)
outputs = self.runner.run(onnx_inputs)
embeddings = self.embedding_function(outputs)

return [embedding for embedding in embeddings]
60 changes: 60 additions & 0 deletions frigate/util/model.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
"""Model Utils"""

import os
from typing import Any

import onnxruntime as ort

try:
import openvino as ov
except ImportError:
# openvino is not included
pass


def get_ort_providers(
force_cpu: bool = False, openvino_device: str = "AUTO", requires_fp16: bool = False
Expand Down Expand Up @@ -42,3 +49,56 @@ def get_ort_providers(
options.append({})

return (providers, options)


class ONNXModelRunner:
"""Run onnx models optimally based on available hardware."""

def __init__(self, model_path: str, device: str, requires_fp16: bool = False):
self.model_path = model_path
self.ort: ort.InferenceSession = None
self.ov: ov.Core = None
providers, options = get_ort_providers(device == "CPU", device, requires_fp16)

if "OpenVINOExecutionProvider" in providers:
# use OpenVINO directly
self.type = "ov"
self.ov = ov.Core()
self.ov.set_property(
{ov.properties.cache_dir: "/config/model_cache/openvino"}
)
self.interpreter = self.ov.compile_model(
model=model_path, device_name=device
)
else:
# Use ONNXRuntime
self.type = "ort"
self.ort = ort.InferenceSession(
model_path, providers=providers, provider_options=options
)

def get_input_names(self) -> list[str]:
if self.type == "ov":
input_names = []

for input in self.interpreter.inputs:
input_names.extend(input.names)

return input_names
elif self.type == "ort":
return [input.name for input in self.ort.get_inputs()]

def run(self, input: dict[str, Any]) -> Any:
if self.type == "ov":
infer_request = self.interpreter.create_infer_request()
input_tensor = list(input.values())

if len(input_tensor) == 1:
input_tensor = ov.Tensor(array=input_tensor[0])
else:
input_tensor = ov.Tensor(array=input_tensor)

infer_request.infer(input_tensor)
return [infer_request.get_output_tensor().data]
elif self.type == "ort":
return self.ort.run(None, input)