Skip to content

Commit

Permalink
feat(huggingface): Load local artefacts in HuggingFace runtime (#1319)
Browse files Browse the repository at this point in the history
Adding ability to load local artefacts in HuggingFace runtime
  • Loading branch information
vtaskow authored Aug 18, 2023
1 parent 10aa233 commit 50b51fe
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 6 deletions.
11 changes: 11 additions & 0 deletions runtimes/huggingface/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ MLSERVER_MODEL_HUGGINGFACE_OPTIMUM_MODEL=true
```
````

### Loading models
#### Local models
It is possible to load a local model into a HuggingFace pipeline by specifying the model artefact folder path in `parameters.uri` in `model-settings.json`.

#### HuggingFace models
Models in the HuggingFace hub can be loaded by specifying their name in `parameters.extra.pretrained_model` in `model-settings.json`.

````{note}
If `parameters.extra.pretrained_model` is specified, it takes precedence over `parameters.uri`.
````

### Reference

You can find the full reference of the accepted extra settings for the
Expand Down
9 changes: 5 additions & 4 deletions runtimes/huggingface/mlserver_huggingface/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@
def load_pipeline_from_settings(
hf_settings: HuggingFaceSettings, settings: ModelSettings
) -> Pipeline:
# TODO: Support URI for locally downloaded artifacts
# uri = model_parameters.uri
pipeline = _get_pipeline_class(hf_settings)

batch_size = 1
if settings.max_batch_size:
batch_size = settings.max_batch_size

model = hf_settings.pretrained_model
if not model:
model = settings.parameters.uri # type: ignore
tokenizer = hf_settings.pretrained_tokenizer
if not tokenizer:
tokenizer = hf_settings.pretrained_model
Expand All @@ -51,7 +52,7 @@ def load_pipeline_from_settings(

hf_pipeline = pipeline(
hf_settings.task_name,
model=hf_settings.pretrained_model,
model=model,
tokenizer=tokenizer,
device=hf_settings.device,
batch_size=batch_size,
Expand All @@ -61,7 +62,7 @@ def load_pipeline_from_settings(
# If max_batch_size > 0 we need to ensure tokens are padded
if settings.max_batch_size:
model = hf_pipeline.model
eos_token_id = model.config.eos_token_id
eos_token_id = model.config.eos_token_id # type: ignore
hf_pipeline.tokenizer.pad_token_id = [str(eos_token_id)] # type: ignore

return hf_pipeline
Expand Down
3 changes: 2 additions & 1 deletion runtimes/huggingface/mlserver_huggingface/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import orjson

from typing import Optional, Dict, Union, NewType
from pydantic import BaseSettings
from pydantic import BaseSettings, Extra
from distutils.util import strtobool
from transformers.pipelines import SUPPORTED_TASKS

Expand Down Expand Up @@ -37,6 +37,7 @@ class HuggingFaceSettings(BaseSettings):

class Config:
env_prefix = ENV_PREFIX_HUGGINGFACE_SETTINGS
extra = Extra.ignore

# TODO: Document fields
task: str = ""
Expand Down
59 changes: 58 additions & 1 deletion runtimes/huggingface/tests/test_common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from unittest.mock import MagicMock, patch

import pytest

from typing import Dict
from typing import Dict, Optional
from optimum.onnxruntime.modeling_ort import ORTModelForQuestionAnswering
from transformers.models.distilbert.modeling_distilbert import (
DistilBertForQuestionAnswering,
Expand Down Expand Up @@ -45,3 +47,58 @@ def test_load_pipeline(optimum_model: bool, expected):
pipeline = load_pipeline_from_settings(hf_settings, model_settings)

assert isinstance(pipeline.model, expected)


@pytest.mark.parametrize(
"pretrained_model, parameters_uri, expected",
[
(None, None, None),
(None, "", ""),
(None, "/some/folder/model-artefacts", "/some/folder/model-artefacts"),
("", None, None),
("", "", ""),
("", "/some/folder/model-artefacts", "/some/folder/model-artefacts"),
("some-model", None, "some-model"),
("some-model", "", "some-model"),
("some-model", "/some/folder/model-artefacts", "some-model"),
(
"/some/other/folder/model-artefacts",
None,
"/some/other/folder/model-artefacts",
),
(
"/some/other/folder/model-artefacts",
"",
"/some/other/folder/model-artefacts",
),
(
"/some/other/folder/model-artefacts",
"/some/folder/model-artefacts",
"/some/other/folder/model-artefacts",
),
],
)
@patch("mlserver_huggingface.common._get_pipeline_class")
def test_pipeline_is_initialised_with_correct_model_param(
mock_pipeline_factory,
pretrained_model: Optional[str],
parameters_uri: Optional[str],
expected: Optional[str],
):
mock_pipeline_factory.return_value = MagicMock()

hf_settings = HuggingFaceSettings(pretrained_model=pretrained_model)
model_params = ModelParameters(uri=parameters_uri)

model_settings = ModelSettings(
name="foo",
implementation=HuggingFaceRuntime,
parameters=model_params,
)

_ = load_pipeline_from_settings(hf_settings, model_settings)

mock_pipeline_factory.return_value.assert_called_once()
pipeline_call_args = mock_pipeline_factory.return_value.call_args

assert pipeline_call_args.kwargs["model"] == expected

0 comments on commit 50b51fe

Please sign in to comment.