diff --git a/requirements-dev.txt b/requirements-dev.txt index b663c725..49239d6e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -7,8 +7,10 @@ mypy>=0.990,<1.1.0; platform_machine != "aarch64" and python_version >= "3.7" black==22.3.0 types-requests==2.28.11.16 # Prompting libraries needed for testing -langchain==0.0.302; python_version>="3.9" -openai>=0.27; python_version>="3.9" +langchain==0.0.331; python_version>="3.9" +# Workaround for LangChain bug: pin OpenAI version. To be removed after LangChain has been fixed - see +# https://github.com/langchain-ai/langchain/issues/12967. +openai>=0.27,<=0.28.1; python_version>="3.9" # Necessary for running all local models on GPU. transformers[sentencepiece]>=4.0.0 diff --git a/setup.cfg b/setup.cfg index e6646203..e79f6f1a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -version = 0.6.1 +version = 0.6.3 description = Integrating LLMs into structured NLP pipelines author = Explosion author_email = contact@explosion.ai @@ -44,7 +44,7 @@ spacy_misc = [options.extras_require] langchain = - langchain==0.0.249 + langchain==0.0.335 transformers = torch>=1.13.1,<2.0 transformers>=4.28.1,<5.0 diff --git a/spacy_llm/models/hf/base.py b/spacy_llm/models/hf/base.py index 71fdc074..87209118 100644 --- a/spacy_llm/models/hf/base.py +++ b/spacy_llm/models/hf/base.py @@ -27,12 +27,46 @@ def __init__( inference_config (Dict[Any, Any]): HF config for model run. """ self._name = name if self.hf_account in name else f"{self.hf_account}/{name}" - self._config_init, self._config_run = self.compile_default_configs() + default_cfg_init, default_cfg_run = self.compile_default_configs() + self._config_init, self._config_run = default_cfg_init, default_cfg_run + if config_init: self._config_init = {**self._config_init, **config_init} if config_run: self._config_run = {**self._config_run, **config_run} + # `device` and `device_map` are conflicting arguments - ensure they aren't both set. + if config_init: + # Case 1: both device and device_map explicitly set by user. + if "device" in config_init and "device_map" in config_init: + warnings.warn( + "`device` and `device_map` are conflicting arguments - don't set both. Dropping argument " + "`device`." + ) + self._config_init.pop("device") + # Case 2: we have a CUDA GPU (and hence device="cuda:0" by default), but device_map is set by user. + elif "device" in default_cfg_init and "device_map" in config_init: + self._config_init.pop("device") + # Case 3: we don't have a CUDA GPU (and hence "device_map=auto" by default), but device is set by user. + elif "device_map" in default_cfg_init and "device" in config_init: + self._config_init.pop("device_map") + + # Fetch proper torch.dtype, if specified. + if ( + has_torch + and "torch_dtype" in self._config_init + and self._config_init["torch_dtype"] != "auto" + ): + try: + self._config_init["torch_dtype"] = getattr( + torch, self._config_init["torch_dtype"] + ) + except AttributeError as ex: + raise ValueError( + f"Invalid value {self._config_init['torch_dtype']} was specified for `torch_dtype`. " + f"Double-check you specified a valid dtype." + ) from ex + # Init HF model. HuggingFace.check_installation() self._check_model() @@ -89,7 +123,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]: default_cfg_run: Dict[str, Any] = {} if has_torch: - default_cfg_init["torch_dtype"] = torch.bfloat16 + default_cfg_init["torch_dtype"] = "bfloat16" if has_torch_cuda_gpu: # this ensures it fails explicitely when GPU is not enabled or sufficient default_cfg_init["device"] = "cuda:0" @@ -106,6 +140,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]: "Install CUDA to load and run the LLM on the GPU, or install 'accelerate' to dynamically " "distribute the LLM on the CPU or even the hard disk. The latter may be slow." ) + return default_cfg_init, default_cfg_run @abc.abstractmethod diff --git a/spacy_llm/models/hf/falcon.py b/spacy_llm/models/hf/falcon.py index 76d4e9e2..2e18ac9d 100644 --- a/spacy_llm/models/hf/falcon.py +++ b/spacy_llm/models/hf/falcon.py @@ -19,7 +19,6 @@ def __init__( config_run: Optional[Dict[str, Any]], ): self._tokenizer: Optional["transformers.AutoTokenizer"] = None - self._device: Optional[str] = None super().__init__(name=name, config_init=config_init, config_run=config_run) assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase) diff --git a/spacy_llm/models/hf/mistral.py b/spacy_llm/models/hf/mistral.py index 6fe78c78..56ae7be3 100644 --- a/spacy_llm/models/hf/mistral.py +++ b/spacy_llm/models/hf/mistral.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Dict, Iterable, Optional, Tuple +from typing import Any, Callable, Dict, Iterable, Optional from confection import SimpleFrozenDict @@ -17,7 +17,6 @@ def __init__( config_run: Optional[Dict[str, Any]], ): self._tokenizer: Optional["transformers.AutoTokenizer"] = None - self._device: Optional[str] = None self._is_instruct = "instruct" in name super().__init__(name=name, config_init=config_init, config_run=config_run) @@ -33,14 +32,15 @@ def __init__( def init_model(self) -> Any: self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name) init_cfg = self._config_init + device: Optional[str] = None if "device" in init_cfg: - self._device = init_cfg.pop("device") + device = init_cfg.pop("device") model = transformers.AutoModelForCausalLM.from_pretrained( self._name, **init_cfg, resume_download=True ) - if self._device: - model.to(self._device) + if device: + model.to(device) return model @@ -61,8 +61,7 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove ).input_ids for prompt in prompts ] - if self._device: - tokenized_input_ids = [tp.to(self._device) for tp in tokenized_input_ids] + tokenized_input_ids = [tp.to(self._model.device) for tp in tokenized_input_ids] return [ self._tokenizer.decode( @@ -74,14 +73,6 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove for tok_ii in tokenized_input_ids ] - @staticmethod - def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]: - default_cfg_init, default_cfg_run = HuggingFace.compile_default_configs() - return ( - default_cfg_init, - default_cfg_run, - ) - @registry.llm_models("spacy.Mistral.v1") def mistral_hf( diff --git a/spacy_llm/models/hf/openllama.py b/spacy_llm/models/hf/openllama.py index 4cf2f4cf..8ceb5bbc 100644 --- a/spacy_llm/models/hf/openllama.py +++ b/spacy_llm/models/hf/openllama.py @@ -2,7 +2,7 @@ from confection import SimpleFrozenDict -from ...compat import Literal, torch, transformers +from ...compat import Literal, transformers from ...registry.util import registry from .base import HuggingFace @@ -22,7 +22,6 @@ def __init__( config_run: Optional[Dict[str, Any]], ): self._tokenizer: Optional["transformers.AutoTokenizer"] = None - self._device: Optional[str] = None super().__init__(name=name, config_init=config_init, config_run=config_run) def init_model(self) -> "transformers.AutoModelForCausalLM": @@ -32,14 +31,15 @@ def init_model(self) -> "transformers.AutoModelForCausalLM": # Initialize tokenizer and model. self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name) init_cfg = self._config_init + device: Optional[str] = None if "device" in init_cfg: - self._device = init_cfg.pop("device") + device = init_cfg.pop("device") + model = transformers.AutoModelForCausalLM.from_pretrained( self._name, **init_cfg ) - - if self._device: - model.to(self._device) + if device: + model.to(device) return model @@ -48,8 +48,9 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove tokenized_input_ids = [ self._tokenizer(prompt, return_tensors="pt").input_ids for prompt in prompts ] - if self._device: - tokenized_input_ids = [tii.to(self._device) for tii in tokenized_input_ids] + tokenized_input_ids = [ + tii.to(self._model.device) for tii in tokenized_input_ids + ] assert hasattr(self._model, "generate") return [ @@ -71,7 +72,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]: return ( { **default_cfg_init, - "torch_dtype": torch.float16, + "torch_dtype": "float16", }, {**default_cfg_run, "max_new_tokens": 32}, ) diff --git a/spacy_llm/models/hf/stablelm.py b/spacy_llm/models/hf/stablelm.py index 4711d69f..34698e0e 100644 --- a/spacy_llm/models/hf/stablelm.py +++ b/spacy_llm/models/hf/stablelm.py @@ -42,7 +42,6 @@ def __init__( ): self._tokenizer: Optional["transformers.AutoTokenizer"] = None self._is_tuned = "tuned" in name - self._device: Optional[str] = None super().__init__(name=name, config_init=config_init, config_run=config_run) def init_model(self) -> "transformers.AutoModelForCausalLM": @@ -51,14 +50,15 @@ def init_model(self) -> "transformers.AutoModelForCausalLM": """ self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name) init_cfg = self._config_init + device: Optional[str] = None if "device" in init_cfg: - self._device = init_cfg.pop("device") + device = init_cfg.pop("device") + model = transformers.AutoModelForCausalLM.from_pretrained( self._name, **init_cfg ) - - if self._device: - model.half().to(self._device) + if device: + model.half().to(device) return model @@ -80,8 +80,7 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove ] ) ] - if self._device: - tokenized_input_ids = [tp.to(self._device) for tp in tokenized_input_ids] + tokenized_input_ids = [tp.to(self._model.device) for tp in tokenized_input_ids] assert hasattr(self._model, "generate") return [ diff --git a/spacy_llm/models/langchain/model.py b/spacy_llm/models/langchain/model.py index 6c491fd3..03657cdf 100644 --- a/spacy_llm/models/langchain/model.py +++ b/spacy_llm/models/langchain/model.py @@ -43,7 +43,7 @@ def get_type_to_cls_dict() -> Dict[ """Returns langchain.llms.type_to_cls_dict. RETURNS (Dict[str, Type[langchain.base_language.BaseLanguageModel]]): langchain.llms.type_to_cls_dict. """ - return langchain.llms.type_to_cls_dict + return getattr(langchain.llms, "type_to_cls_dict") def __call__(self, prompts: Iterable[Any]) -> Iterable[Any]: """Executes prompts on specified API. diff --git a/spacy_llm/models/rest/anthropic/model.py b/spacy_llm/models/rest/anthropic/model.py index 4c2567f2..774d0e83 100644 --- a/spacy_llm/models/rest/anthropic/model.py +++ b/spacy_llm/models/rest/anthropic/model.py @@ -1,7 +1,7 @@ import os import warnings from enum import Enum -from typing import Any, Dict, Iterable, List, Sized, Tuple +from typing import Any, Dict, Iterable, List, Sized import requests # type: ignore[import] import srsly # type: ignore[import] @@ -54,7 +54,7 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: headers = { **self._credentials, "model": self._name, - "anthropic_version": self._config.get("anthropic_version", "2023-06-01"), + "anthropic-version": self._config.get("anthropic-version", "2023-06-01"), "Content-Type": "application/json", } @@ -107,26 +107,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]: assert len(api_responses) == len(prompts) return api_responses - - @classmethod - def get_model_names(cls) -> Tuple[str, ...]: - return ( - # claude-2 - "claude-2", - "claude-2-100k", - # claude-1 - "claude-1", - "claude-1-100k", - # claude-instant-1 - "claude-instant-1", - "claude-instant-1-100k", - # claude-instant-1.1 - "claude-instant-1.1", - "claude-instant-1.1-100k", - # claude-1.3 - "claude-1.3", - "claude-1.3-100k", - # others - "claude-1.0", - "claude-1.2", - ) diff --git a/spacy_llm/models/rest/azure/model.py b/spacy_llm/models/rest/azure/model.py index a30173ff..d8f433d6 100644 --- a/spacy_llm/models/rest/azure/model.py +++ b/spacy_llm/models/rest/azure/model.py @@ -1,7 +1,7 @@ import os import warnings from enum import Enum -from typing import Any, Dict, Iterable, List, Sized, Tuple +from typing import Any, Dict, Iterable, List, Sized import requests # type: ignore[import] import srsly # type: ignore[import] @@ -48,7 +48,8 @@ def endpoint(self) -> str: return ( self._endpoint + ("" if self._endpoint.endswith("/") else "/") - + f"openai/deployments/{self._name}/{self._model_type.value}" + + f"openai/deployments/{self._name}/{'' if self._model_type == ModelType.COMPLETION else 'chat/'}" + f"completions" ) @property @@ -146,12 +147,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]: api_responses.append(response.get("text", srsly.json_dumps(response))) return api_responses - - @classmethod - def get_model_names(cls) -> Tuple[str, ...]: - # We treat the deployment name as "model name", hence it can be arbitrary. - return ("",) - - def _check_model(self) -> None: - # We treat the deployment name as "model name", hence it can be arbitrary. - pass diff --git a/spacy_llm/models/rest/base.py b/spacy_llm/models/rest/base.py index b7dccca3..f54f90ac 100644 --- a/spacy_llm/models/rest/base.py +++ b/spacy_llm/models/rest/base.py @@ -1,7 +1,7 @@ import abc import time from enum import Enum -from typing import Any, Callable, Dict, Iterable, Optional, Tuple +from typing import Any, Callable, Dict, Iterable, Optional import requests # type: ignore from requests import ConnectTimeout, ReadTimeout @@ -61,16 +61,8 @@ def __init__( assert self._interval > 0 assert self._max_request_time > 0 - self._check_model() self._verify_auth() - def _check_model(self) -> None: - """Checks whether model is supported. Raises if it isn't.""" - if self._name not in self.get_model_names(): - raise ValueError( - f"Model '{self._name}' is not supported - select one of {self.get_model_names()} instead" - ) - @abc.abstractmethod def __call__(self, prompts: Iterable[str]) -> Iterable[str]: """Executes prompts on specified API. @@ -78,13 +70,6 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: RETURNS (Iterable[str]): API responses. """ - @classmethod - @abc.abstractmethod - def get_model_names(cls) -> Tuple[str, ...]: - """Names of supported models. - RETURNS (Tuple[str]): Names of supported models. - """ - @property @abc.abstractmethod def credentials(self) -> Dict[str, str]: diff --git a/spacy_llm/models/rest/cohere/model.py b/spacy_llm/models/rest/cohere/model.py index 293ed92b..58ba3231 100644 --- a/spacy_llm/models/rest/cohere/model.py +++ b/spacy_llm/models/rest/cohere/model.py @@ -1,7 +1,7 @@ import os import warnings from enum import Enum -from typing import Any, Dict, Iterable, List, Sized, Tuple +from typing import Any, Dict, Iterable, List, Sized import requests # type: ignore[import] import srsly # type: ignore[import] @@ -54,7 +54,7 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]: call_method=requests.post, url=self._endpoint, headers=headers, - json={**json_data, **self._config}, + json={**json_data, **self._config, "model": self._name}, timeout=self._max_request_time, ) try: @@ -111,7 +111,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]: else: api_responses.append(srsly.json_dumps(response)) return api_responses - - @classmethod - def get_model_names(cls) -> Tuple[str, ...]: - return "command", "command-light", "command-light-nightly", "command-nightly" diff --git a/spacy_llm/models/rest/noop/model.py b/spacy_llm/models/rest/noop/model.py index 0e3e0398..31d830b8 100644 --- a/spacy_llm/models/rest/noop/model.py +++ b/spacy_llm/models/rest/noop/model.py @@ -1,5 +1,5 @@ import time -from typing import Dict, Iterable, Tuple +from typing import Dict, Iterable from ..base import REST @@ -33,7 +33,3 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # Assume time penalty for API calls. time.sleep(NoOpModel._CALL_TIMEOUT) return [_NOOP_RESPONSE] * len(list(prompts)) - - @classmethod - def get_model_names(cls) -> Tuple[str, ...]: - return ("NoOp",) diff --git a/spacy_llm/models/rest/openai/model.py b/spacy_llm/models/rest/openai/model.py index a712e082..8fa9dc20 100644 --- a/spacy_llm/models/rest/openai/model.py +++ b/spacy_llm/models/rest/openai/model.py @@ -1,7 +1,7 @@ import os import warnings from enum import Enum -from typing import Any, Dict, Iterable, List, Sized, Tuple +from typing import Any, Dict, Iterable, List, Sized import requests # type: ignore[import] import srsly # type: ignore[import] @@ -140,31 +140,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]: api_responses.append(srsly.json_dumps(response)) return api_responses - - @classmethod - def get_model_names(cls) -> Tuple[str, ...]: - return ( - # gpt-4 - "gpt-4", - "gpt-4-0314", - "gpt-4-32k", - "gpt-4-32k-0314", - # gpt-3.5 - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-0613-16k", - "gpt-3.5-turbo-instruct", - # text-davinci - "text-davinci-002", - "text-davinci-003", - # others - "code-davinci-002", - "text-curie-001", - "text-babbage-001", - "text-ada-001", - "davinci", - "curie", - "babbage", - "ada", - ) diff --git a/spacy_llm/models/rest/openai/registry.py b/spacy_llm/models/rest/openai/registry.py index 1a4c4fd7..82436e4a 100644 --- a/spacy_llm/models/rest/openai/registry.py +++ b/spacy_llm/models/rest/openai/registry.py @@ -21,6 +21,35 @@ """ +@registry.llm_models("spacy.GPT-4.v3") +def openai_gpt_4_v3( + config: Dict[Any, Any] = SimpleFrozenDict(temperature=_DEFAULT_TEMPERATURE), + name: str = "gpt-4", # noqa: F722 + strict: bool = OpenAI.DEFAULT_STRICT, + max_tries: int = OpenAI.DEFAULT_MAX_TRIES, + interval: float = OpenAI.DEFAULT_INTERVAL, + max_request_time: float = OpenAI.DEFAULT_MAX_REQUEST_TIME, +) -> Callable[[Iterable[str]], Iterable[str]]: + """Returns OpenAI instance for 'gpt-4' model using REST to prompt API. + + config (Dict[Any, Any]): LLM config passed on to the model's initialization. + name (str): Model name to use. Can be any model name supported by the OpenAI API - e. g. 'gpt-4', + "gpt-4-1106-preview", .... + RETURNS (Callable[[Iterable[str]], Iterable[str]]]): OpenAI instance for 'gpt-4' model + + DOCS: https://spacy.io/api/large-language-models#models + """ + return OpenAI( + name=name, + endpoint=Endpoints.CHAT.value, + config=config, + strict=strict, + max_tries=max_tries, + interval=interval, + max_request_time=max_request_time, + ) + + @registry.llm_models("spacy.GPT-4.v2") def openai_gpt_4_v2( config: Dict[Any, Any] = SimpleFrozenDict(temperature=_DEFAULT_TEMPERATURE), @@ -35,7 +64,7 @@ def openai_gpt_4_v2( """Returns OpenAI instance for 'gpt-4' model using REST to prompt API. config (Dict[Any, Any]): LLM config passed on to the model's initialization. - name (Optional[Literal["0314", "32k", "32k-0314"]]): Model to use. Base 'gpt-4' model by default. + name (Literal["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]): Model to use. Base 'gpt-4' model by default. RETURNS (Callable[[Iterable[str]], Iterable[str]]]): OpenAI instance for 'gpt-4' model DOCS: https://spacy.io/api/large-language-models#models @@ -65,7 +94,8 @@ def openai_gpt_4( """Returns OpenAI instance for 'gpt-4' model using REST to prompt API. config (Dict[Any, Any]): LLM config passed on to the model's initialization. - name (Optional[Literal["0314", "32k", "32k-0314"]]): Model to use. Base 'gpt-4' model by default. + name (Literal["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]): Model to use. Base 'gpt-4' model by + default. RETURNS (Callable[[Iterable[str]], Iterable[str]]]): OpenAI instance for 'gpt-4' model DOCS: https://spacy.io/api/large-language-models#models @@ -81,6 +111,37 @@ def openai_gpt_4( ) +@registry.llm_models("spacy.GPT-3-5.v3") +def openai_gpt_3_5_v3( + config: Dict[Any, Any] = SimpleFrozenDict(temperature=_DEFAULT_TEMPERATURE), + name: str = "gpt-3.5-turbo", + strict: bool = OpenAI.DEFAULT_STRICT, + max_tries: int = OpenAI.DEFAULT_MAX_TRIES, + interval: float = OpenAI.DEFAULT_INTERVAL, + max_request_time: float = OpenAI.DEFAULT_MAX_REQUEST_TIME, +) -> Callable[[Iterable[str]], Iterable[str]]: + """Returns OpenAI instance for 'gpt-3.5' model using REST to prompt API. + + config (Dict[Any, Any]): LLM config passed on to the model's initialization. + name (str): Name of model to use. Can be any model name supported by the OpenAI API - e. g. 'gpt-3.5', + "gpt-3.5-turbo", .... + RETURNS (Callable[[Iterable[str]], Iterable[str]]]): OpenAI instance for 'gpt-3.5' model + + DOCS: https://spacy.io/api/large-language-models#models + """ + return OpenAI( + name=name, + endpoint=Endpoints.CHAT.value + # gpt-3.5-turbo-instruct runs on the non-chat endpoint, so we use that one by default to allow batching. + if name != "gpt-3.5-turbo-instruct" else Endpoints.NON_CHAT.value, + config=config, + strict=strict, + max_tries=max_tries, + interval=interval, + max_request_time=max_request_time, + ) + + @registry.llm_models("spacy.GPT-3-5.v2") def openai_gpt_3_5_v2( config: Dict[Any, Any] = SimpleFrozenDict(temperature=_DEFAULT_TEMPERATURE), diff --git a/spacy_llm/models/rest/palm/model.py b/spacy_llm/models/rest/palm/model.py index 1e9b10b1..1a488000 100644 --- a/spacy_llm/models/rest/palm/model.py +++ b/spacy_llm/models/rest/palm/model.py @@ -1,7 +1,7 @@ import os import warnings from enum import Enum -from typing import Any, Dict, Iterable, List, Sized, Tuple +from typing import Any, Dict, Iterable, List, Sized import requests # type: ignore[import] import srsly # type: ignore[import] @@ -107,7 +107,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]: api_responses.append(srsly.json_dumps(response)) return api_responses - - @classmethod - def get_model_names(cls) -> Tuple[str, ...]: - return "text-bison-001", "chat-bison-001" diff --git a/spacy_llm/pipeline/llm.py b/spacy_llm/pipeline/llm.py index 1c3365d4..70003a8b 100644 --- a/spacy_llm/pipeline/llm.py +++ b/spacy_llm/pipeline/llm.py @@ -138,10 +138,15 @@ def labels(self) -> Tuple[str, ...]: labels = self._task.labels return labels - def add_label(self, label: str) -> int: + def add_label(self, label: str, label_definition: Optional[str] = None) -> int: if not isinstance(self._task, LabeledTask): raise ValueError("The task of this LLM component does not have labels.") - return self._task.add_label(label) + return self._task.add_label(label, label_definition) + + def clear(self) -> None: + if not isinstance(self._task, LabeledTask): + raise ValueError("The task of this LLM component does not have labels.") + return self._task.clear() @property def task(self) -> LLMTask: diff --git a/spacy_llm/tasks/builtin_task.py b/spacy_llm/tasks/builtin_task.py index f959c7c0..e879c52d 100644 --- a/spacy_llm/tasks/builtin_task.py +++ b/spacy_llm/tasks/builtin_task.py @@ -329,14 +329,25 @@ def _extract_labels_from_example(self, example: Example) -> List[str]: def labels(self) -> Tuple[str, ...]: return tuple(self._label_dict.values()) - def add_label(self, label: str) -> int: + def add_label(self, label: str, label_definition: Optional[str] = None) -> int: + """Add a label to the task""" if not isinstance(label, str): raise ValueError(Errors.E187) if label in self.labels: return 0 self._label_dict[self._normalizer(label)] = label + if label_definition is None: + return 1 + if self._label_definitions is None: + self._label_definitions = {} + self._label_definitions[label] = label_definition return 1 + def clear(self) -> None: + """Reset all labels.""" + self._label_dict = {} + self._label_definitions = None + @property def normalizer(self) -> Callable[[str], str]: return self._normalizer diff --git a/spacy_llm/tasks/rel/task.py b/spacy_llm/tasks/rel/task.py index 83accb81..81fd8917 100644 --- a/spacy_llm/tasks/rel/task.py +++ b/spacy_llm/tasks/rel/task.py @@ -8,7 +8,7 @@ from ...ty import FewshotExample, TaskResponseParser from ..builtin_task import BuiltinTaskWithLabels from ..templates import read_template -from .util import EntityItem, RelationItem +from .util import RelationItem DEFAULT_REL_TEMPLATE: str = read_template("rel.v1") @@ -77,9 +77,7 @@ def _preannotate(doc: Union[Doc, FewshotExample]) -> str: for i, ent in enumerate(doc.ents): end = ent.end_char before, after = text[: end + offset], text[end + offset :] - annotation = ( - f"[ENT{i}:{ent.label if isinstance(ent, EntityItem) else ent.label_}]" - ) + annotation = f"[ENT{i}:{ent.label_ if isinstance(doc, Doc) else ent.label}]" offset += len(annotation) text = f"{before}{annotation}{after}" diff --git a/spacy_llm/tasks/sentiment/registry.py b/spacy_llm/tasks/sentiment/registry.py index 6dd51606..ab15f151 100644 --- a/spacy_llm/tasks/sentiment/registry.py +++ b/spacy_llm/tasks/sentiment/registry.py @@ -1,10 +1,10 @@ from typing import Optional, Type from ...registry import registry -from ...ty import ExamplesConfigType, FewshotExample, TaskResponseParser +from ...ty import ExamplesConfigType, FewshotExample, Scorer, TaskResponseParser from .parser import parse_responses_v1 from .task import DEFAULT_SENTIMENT_TEMPLATE_V1, SentimentTask -from .util import SentimentExample +from .util import SentimentExample, score @registry.llm_tasks("spacy.Sentiment.v1") @@ -14,6 +14,7 @@ def make_sentiment_task( prompt_example_type: Optional[Type[FewshotExample]] = None, examples: ExamplesConfigType = None, field: str = "sentiment", + scorer: Optional[Scorer] = None, ): """Sentiment.v1 task factory. @@ -24,6 +25,7 @@ def make_sentiment_task( examples (ExamplesConfigType): Optional callable that reads a file containing task examples for few-shot learning. If None is passed, then zero-shot learning will be used. field (str): The name of the doc extension in which to store the summary. + scorer (Optional[Scorer]): Scorer function. """ raw_examples = examples() if callable(examples) else examples example_type = prompt_example_type or SentimentExample @@ -37,4 +39,5 @@ def make_sentiment_task( prompt_example_type=example_type, prompt_examples=sentiment_examples, field=field, + scorer=scorer or score, ) diff --git a/spacy_llm/tasks/sentiment/task.py b/spacy_llm/tasks/sentiment/task.py index ab34b4dd..57015ca8 100644 --- a/spacy_llm/tasks/sentiment/task.py +++ b/spacy_llm/tasks/sentiment/task.py @@ -1,10 +1,10 @@ -from typing import Callable, Iterable, List, Optional, Type +from typing import Any, Callable, Dict, Iterable, List, Optional, Type from spacy.language import Language from spacy.tokens import Doc from spacy.training import Example -from ...ty import FewshotExample, Self, TaskResponseParser +from ...ty import FewshotExample, Scorer, Self, TaskResponseParser from ..builtin_task import BuiltinTask from ..templates import read_template @@ -19,6 +19,7 @@ def __init__( prompt_example_type: Type[FewshotExample[Self]], field: str, prompt_examples: Optional[List[FewshotExample[Self]]], + scorer: Scorer, ): """Sentiment analysis task. @@ -35,6 +36,7 @@ def __init__( prompt_examples=prompt_examples, ) self._field = field + self._scorer = scorer self._check_doc_extension() def _check_doc_extension(self): @@ -75,6 +77,9 @@ def parse_responses( yield doc + def scorer(self, examples: Iterable[Example]) -> Dict[str, Any]: + return self._scorer(examples, field=self._field) + @property def _cfg_keys(self) -> List[str]: return ["_template"] diff --git a/spacy_llm/tasks/sentiment/util.py b/spacy_llm/tasks/sentiment/util.py index 72adbedb..f53bae32 100644 --- a/spacy_llm/tasks/sentiment/util.py +++ b/spacy_llm/tasks/sentiment/util.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Any, Dict, Iterable, Optional from spacy.training import Example @@ -17,3 +17,19 @@ def generate(cls, example: Example, task: SentimentTask) -> Optional[Self]: text=example.reference.text, score=getattr(example.reference._, task.field), ) + + +def score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]: + """Score sentiment accuracy in examples. + examples (Iterable[Example]): Examples to score. + RETURNS (Dict[str, Any]): Dict with metric name -> score. + """ + score_diffs = [ + abs( + getattr(example.predicted._, kwargs["field"]) + - getattr(example.reference._, kwargs["field"]) + ) + for example in examples + ] + + return {"acc_sentiment": 1 - (sum(score_diffs) / len(score_diffs))} diff --git a/spacy_llm/tests/models/test_anthropic.py b/spacy_llm/tests/models/test_anthropic.py index 61f1da77..eb366205 100644 --- a/spacy_llm/tests/models/test_anthropic.py +++ b/spacy_llm/tests/models/test_anthropic.py @@ -56,7 +56,10 @@ def test_anthropic_error_unsupported_model(): """Ensure graceful handling of error when model is not supported""" incorrect_model = "x-gpt-3.5-turbo" with pytest.raises( - ValueError, match=re.escape("Model 'x-gpt-3.5-turbo' is not supported") + ValueError, + match=re.escape( + "Ensure that the selected model (x-gpt-3.5-turbo) is supported by the API" + ), ): Anthropic( name=incorrect_model, diff --git a/spacy_llm/tests/models/test_cohere.py b/spacy_llm/tests/models/test_cohere.py index 4b555aa4..5d1db35f 100644 --- a/spacy_llm/tests/models/test_cohere.py +++ b/spacy_llm/tests/models/test_cohere.py @@ -77,7 +77,7 @@ def test_cohere_api_response_when_error(): def test_cohere_error_unsupported_model(): """Ensure graceful handling of error when model is not supported""" incorrect_model = "x-gpt-3.5-turbo" - with pytest.raises(ValueError, match="Model 'x-gpt-3.5-turbo' is not supported"): + with pytest.raises(ValueError, match="model not found"): Cohere( name=incorrect_model, config={}, diff --git a/spacy_llm/tests/models/test_hf.py b/spacy_llm/tests/models/test_hf.py new file mode 100644 index 00000000..3058035c --- /dev/null +++ b/spacy_llm/tests/models/test_hf.py @@ -0,0 +1,78 @@ +from typing import Tuple + +import pytest +import spacy +from thinc.compat import has_torch_cuda_gpu + +from spacy_llm.compat import has_accelerate, torch + +_PIPE_CFG = { + "model": { + "@llm_models": "", + "name": "", + }, + "task": {"@llm_tasks": "spacy.NoOp.v1"}, + "save_io": True, +} + + +@pytest.mark.gpu +@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") +@pytest.mark.parametrize( + "model", (("spacy.Dolly.v1", "dolly-v2-3b"), ("spacy.Llama2.v1", "Llama-2-7b-hf")) +) +def test_device_config_conflict(model: Tuple[str, str]): + """Test device configuration.""" + nlp = spacy.blank("en") + model, name = model + cfg = {**_PIPE_CFG, **{"model": {"@llm_models": model, "name": name}}} + + # Set device only. + cfg["model"]["config_init"] = {"device": "cpu"} # type: ignore[index] + nlp.add_pipe("llm", name="llm1", config=cfg) + + # Set device_map only. + cfg["model"]["config_init"] = {"device_map": "auto"} # type: ignore[index] + if has_accelerate: + nlp.add_pipe("llm", name="llm2", config=cfg) + else: + with pytest.raises(ImportError, match="requires Accelerate"): + nlp.add_pipe("llm", name="llm2", config=cfg) + + # Set device_map and device. + cfg["model"]["config_init"] = {"device_map": "auto", "device": "cpu"} # type: ignore[index] + with pytest.warns(UserWarning, match="conflicting arguments"): + if has_accelerate: + nlp.add_pipe("llm", name="llm3", config=cfg) + else: + with pytest.raises(ImportError, match="requires Accelerate"): + nlp.add_pipe("llm", name="llm3", config=cfg) + + torch.cuda.empty_cache() + + +@pytest.mark.gpu +@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") +def test_torch_dtype(): + """Test torch_dtype setting.""" + nlp = spacy.blank("en") + cfg = { + **_PIPE_CFG, + **{"model": {"@llm_models": "spacy.Dolly.v1", "name": "dolly-v2-3b"}}, + } + + # Should be converted to torch.float16. + cfg["model"]["config_init"] = {"torch_dtype": "float16"} # type: ignore[index] + llm = nlp.add_pipe("llm", name="llm1", config=cfg) + assert llm._model._config_init["torch_dtype"] == torch.float16 + + # Should remain "auto". + cfg["model"]["config_init"] = {"torch_dtype": "auto"} # type: ignore[index] + nlp.add_pipe("llm", name="llm2", config=cfg) + + # Should fail - nonexistent dtype. + cfg["model"]["config_init"] = {"torch_dtype": "float999"} # type: ignore[index] + with pytest.raises(ValueError, match="Invalid value float999"): + nlp.add_pipe("llm", name="llm3", config=cfg) + + torch.cuda.empty_cache() diff --git a/spacy_llm/tests/models/test_mistral.py b/spacy_llm/tests/models/test_mistral.py index 5dde49f0..548d4d29 100644 --- a/spacy_llm/tests/models/test_mistral.py +++ b/spacy_llm/tests/models/test_mistral.py @@ -48,6 +48,7 @@ def test_init(): @pytest.mark.gpu +@pytest.mark.skip(reason="CI runner needs more GPU memory") @pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA") def test_init_from_config(): orig_config = Config().from_str(_NLP_CONFIG) diff --git a/spacy_llm/tests/tasks/test_ner.py b/spacy_llm/tests/tasks/test_ner.py index 7d7c8c82..3c180048 100644 --- a/spacy_llm/tests/tasks/test_ner.py +++ b/spacy_llm/tests/tasks/test_ner.py @@ -992,7 +992,42 @@ def test_add_label(): doc = nlp(text) assert len(doc.ents) == 0 + for label, definition in [ + ("PERSON", "Every person with the name Jack"), + ("LOCATION", "A geographical location, like a country or a city"), + ("COMPANY", None), + ]: + llm.add_label(label, definition) + doc = nlp(text) + assert len(doc.ents) > 1 + + +@pytest.mark.external +@pytest.mark.skipif(has_openai_key is False, reason="OpenAI API key not available") +def test_clear_label(): + nlp = spacy.blank("en") + llm = nlp.add_pipe( + "llm", + config={ + "task": { + "@llm_tasks": "spacy.NER.v3", + }, + "model": { + "@llm_models": "spacy.GPT-3-5.v1", + }, + }, + ) + + nlp.initialize() + text = "Jack and Jill visited France." + doc = nlp(text) + for label in ["PERSON", "LOCATION"]: llm.add_label(label) doc = nlp(text) assert len(doc.ents) == 3 + + llm.clear() + + doc = nlp(text) + assert len(doc.ents) == 0 diff --git a/spacy_llm/tests/tasks/test_rel.py b/spacy_llm/tests/tasks/test_rel.py index ba75d7c2..3650114e 100644 --- a/spacy_llm/tests/tasks/test_rel.py +++ b/spacy_llm/tests/tasks/test_rel.py @@ -266,3 +266,23 @@ def test_incorrect_indexing(): ) == 0 ) + + +@pytest.mark.external +@pytest.mark.skipif(has_openai_key is False, reason="OpenAI API key not available") +def test_labels_in_prompt(request: FixtureRequest): + """See https://github.com/explosion/spacy-llm/issues/366.""" + config = Config().from_str(request.getfixturevalue("zeroshot_cfg_string")) + config["components"].pop("ner") + config.pop("initialize") + config["nlp"]["pipeline"] = ["llm"] + config["components"]["llm"]["task"]["labels"] = ["A", "B", "C"] + nlp = assemble_from_config(config) + + doc = Doc(get_lang_class("en")().vocab, words=["Well", "hello", "there"]) + doc.ents = [Span(doc, 0, 1, "A"), Span(doc, 1, 2, "B"), Span(doc, 2, 3, "C")] + + assert ( + "Well[ENT0:A] hello[ENT1:B] there[ENT2:C]" + in list(nlp.get_pipe("llm")._task.generate_prompts([doc]))[0] + ) diff --git a/spacy_llm/tests/tasks/test_sentiment.py b/spacy_llm/tests/tasks/test_sentiment.py index 4b2bd63b..f61f875a 100644 --- a/spacy_llm/tests/tasks/test_sentiment.py +++ b/spacy_llm/tests/tasks/test_sentiment.py @@ -1,8 +1,10 @@ from pathlib import Path +import numpy import pytest import spacy from confection import Config +from spacy.training import Example from spacy.util import make_tempdir from spacy_llm.registry import fewshot_reader, file_reader @@ -263,3 +265,20 @@ def test_external_template_actually_loads(): Sentiment: """.strip() ) + + +@pytest.mark.external +@pytest.mark.skipif(has_openai_key is False, reason="OpenAI API key not available") +def test_sentiment_score(request): + """Test scoring mechanism.""" + cfg = request.getfixturevalue("zeroshot_cfg_string") + orig_config = Config().from_str(cfg) + nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True) + + sent_diff = 0.2 + doc1 = nlp("This works well.") + doc2 = doc1.copy() + doc2._.sentiment -= sent_diff + assert numpy.isclose( + nlp.get_pipe("llm").score([Example(doc1, doc2)])["acc_sentiment"], 1 - sent_diff + ) diff --git a/spacy_llm/tests/test_combinations.py b/spacy_llm/tests/test_combinations.py index 0cd986c5..d36e72ac 100644 --- a/spacy_llm/tests/test_combinations.py +++ b/spacy_llm/tests/test_combinations.py @@ -12,7 +12,7 @@ @pytest.mark.skipif(has_langchain is False, reason="LangChain is not installed") @pytest.mark.parametrize( "model", - ["langchain.OpenAI.v1", "spacy.GPT-3-5.v1", "spacy.GPT-3-5.v2"], + ["langchain.OpenAI.v1", "spacy.GPT-3-5.v3", "spacy.GPT-4.v3"], ids=["langchain", "rest-openai", "rest-openai"], ) @pytest.mark.parametrize( diff --git a/spacy_llm/ty.py b/spacy_llm/ty.py index 7052c197..69e73a0c 100644 --- a/spacy_llm/ty.py +++ b/spacy_llm/ty.py @@ -138,7 +138,10 @@ class LabeledTask(Protocol): def labels(self) -> Tuple[str, ...]: ... - def add_label(self, label: str) -> int: + def add_label(self, label: str, label_definition: Optional[str] = None) -> int: + ... + + def clear(self) -> None: ...