diff --git a/requirements-dev.txt b/requirements-dev.txt
index b663c725..49239d6e 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -7,8 +7,10 @@ mypy>=0.990,<1.1.0; platform_machine != "aarch64" and python_version >= "3.7"
 black==22.3.0
 types-requests==2.28.11.16
 # Prompting libraries needed for testing
-langchain==0.0.302; python_version>="3.9"
-openai>=0.27; python_version>="3.9"
+langchain==0.0.331; python_version>="3.9"
+# Workaround for LangChain bug: pin OpenAI version. To be removed after LangChain has been fixed - see
+# https://github.com/langchain-ai/langchain/issues/12967.
+openai>=0.27,<=0.28.1; python_version>="3.9"
 
 # Necessary for running all local models on GPU.
 transformers[sentencepiece]>=4.0.0
diff --git a/setup.cfg b/setup.cfg
index e6646203..e79f6f1a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-version = 0.6.1
+version = 0.6.3
 description = Integrating LLMs into structured NLP pipelines
 author = Explosion
 author_email = contact@explosion.ai
@@ -44,7 +44,7 @@ spacy_misc =
 
 [options.extras_require]
 langchain =
-    langchain==0.0.249
+    langchain==0.0.335
 transformers =
     torch>=1.13.1,<2.0
     transformers>=4.28.1,<5.0
diff --git a/spacy_llm/models/hf/base.py b/spacy_llm/models/hf/base.py
index 71fdc074..87209118 100644
--- a/spacy_llm/models/hf/base.py
+++ b/spacy_llm/models/hf/base.py
@@ -27,12 +27,46 @@ def __init__(
         inference_config (Dict[Any, Any]): HF config for model run.
         """
         self._name = name if self.hf_account in name else f"{self.hf_account}/{name}"
-        self._config_init, self._config_run = self.compile_default_configs()
+        default_cfg_init, default_cfg_run = self.compile_default_configs()
+        self._config_init, self._config_run = default_cfg_init, default_cfg_run
+
         if config_init:
             self._config_init = {**self._config_init, **config_init}
         if config_run:
             self._config_run = {**self._config_run, **config_run}
 
+        # `device` and `device_map` are conflicting arguments - ensure they aren't both set.
+        if config_init:
+            # Case 1: both device and device_map explicitly set by user.
+            if "device" in config_init and "device_map" in config_init:
+                warnings.warn(
+                    "`device` and `device_map` are conflicting arguments - don't set both. Dropping argument "
+                    "`device`."
+                )
+                self._config_init.pop("device")
+            # Case 2: we have a CUDA GPU (and hence device="cuda:0" by default), but device_map is set by user.
+            elif "device" in default_cfg_init and "device_map" in config_init:
+                self._config_init.pop("device")
+            # Case 3: we don't have a CUDA GPU (and hence "device_map=auto" by default), but device is set by user.
+            elif "device_map" in default_cfg_init and "device" in config_init:
+                self._config_init.pop("device_map")
+
+        # Fetch proper torch.dtype, if specified.
+        if (
+            has_torch
+            and "torch_dtype" in self._config_init
+            and self._config_init["torch_dtype"] != "auto"
+        ):
+            try:
+                self._config_init["torch_dtype"] = getattr(
+                    torch, self._config_init["torch_dtype"]
+                )
+            except AttributeError as ex:
+                raise ValueError(
+                    f"Invalid value {self._config_init['torch_dtype']} was specified for `torch_dtype`. "
+                    f"Double-check you specified a valid dtype."
+                ) from ex
+
         # Init HF model.
         HuggingFace.check_installation()
         self._check_model()
@@ -89,7 +123,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
         default_cfg_run: Dict[str, Any] = {}
 
         if has_torch:
-            default_cfg_init["torch_dtype"] = torch.bfloat16
+            default_cfg_init["torch_dtype"] = "bfloat16"
             if has_torch_cuda_gpu:
                 # this ensures it fails explicitely when GPU is not enabled or sufficient
                 default_cfg_init["device"] = "cuda:0"
@@ -106,6 +140,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
                     "Install CUDA to load and run the LLM on the GPU, or install 'accelerate' to dynamically "
                     "distribute the LLM on the CPU or even the hard disk. The latter may be slow."
                 )
+
         return default_cfg_init, default_cfg_run
 
     @abc.abstractmethod
diff --git a/spacy_llm/models/hf/falcon.py b/spacy_llm/models/hf/falcon.py
index 76d4e9e2..2e18ac9d 100644
--- a/spacy_llm/models/hf/falcon.py
+++ b/spacy_llm/models/hf/falcon.py
@@ -19,7 +19,6 @@ def __init__(
         config_run: Optional[Dict[str, Any]],
     ):
         self._tokenizer: Optional["transformers.AutoTokenizer"] = None
-        self._device: Optional[str] = None
         super().__init__(name=name, config_init=config_init, config_run=config_run)
 
         assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase)
diff --git a/spacy_llm/models/hf/mistral.py b/spacy_llm/models/hf/mistral.py
index 6fe78c78..56ae7be3 100644
--- a/spacy_llm/models/hf/mistral.py
+++ b/spacy_llm/models/hf/mistral.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Iterable, Optional, Tuple
+from typing import Any, Callable, Dict, Iterable, Optional
 
 from confection import SimpleFrozenDict
 
@@ -17,7 +17,6 @@ def __init__(
         config_run: Optional[Dict[str, Any]],
     ):
         self._tokenizer: Optional["transformers.AutoTokenizer"] = None
-        self._device: Optional[str] = None
         self._is_instruct = "instruct" in name
         super().__init__(name=name, config_init=config_init, config_run=config_run)
 
@@ -33,14 +32,15 @@ def __init__(
     def init_model(self) -> Any:
         self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
         init_cfg = self._config_init
+        device: Optional[str] = None
         if "device" in init_cfg:
-            self._device = init_cfg.pop("device")
+            device = init_cfg.pop("device")
 
         model = transformers.AutoModelForCausalLM.from_pretrained(
             self._name, **init_cfg, resume_download=True
         )
-        if self._device:
-            model.to(self._device)
+        if device:
+            model.to(device)
 
         return model
 
@@ -61,8 +61,7 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]:  # type: ignore[ove
             ).input_ids
             for prompt in prompts
         ]
-        if self._device:
-            tokenized_input_ids = [tp.to(self._device) for tp in tokenized_input_ids]
+        tokenized_input_ids = [tp.to(self._model.device) for tp in tokenized_input_ids]
 
         return [
             self._tokenizer.decode(
@@ -74,14 +73,6 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]:  # type: ignore[ove
             for tok_ii in tokenized_input_ids
         ]
 
-    @staticmethod
-    def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
-        default_cfg_init, default_cfg_run = HuggingFace.compile_default_configs()
-        return (
-            default_cfg_init,
-            default_cfg_run,
-        )
-
 
 @registry.llm_models("spacy.Mistral.v1")
 def mistral_hf(
diff --git a/spacy_llm/models/hf/openllama.py b/spacy_llm/models/hf/openllama.py
index 4cf2f4cf..8ceb5bbc 100644
--- a/spacy_llm/models/hf/openllama.py
+++ b/spacy_llm/models/hf/openllama.py
@@ -2,7 +2,7 @@
 
 from confection import SimpleFrozenDict
 
-from ...compat import Literal, torch, transformers
+from ...compat import Literal, transformers
 from ...registry.util import registry
 from .base import HuggingFace
 
@@ -22,7 +22,6 @@ def __init__(
         config_run: Optional[Dict[str, Any]],
     ):
         self._tokenizer: Optional["transformers.AutoTokenizer"] = None
-        self._device: Optional[str] = None
         super().__init__(name=name, config_init=config_init, config_run=config_run)
 
     def init_model(self) -> "transformers.AutoModelForCausalLM":
@@ -32,14 +31,15 @@ def init_model(self) -> "transformers.AutoModelForCausalLM":
         # Initialize tokenizer and model.
         self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
         init_cfg = self._config_init
+        device: Optional[str] = None
         if "device" in init_cfg:
-            self._device = init_cfg.pop("device")
+            device = init_cfg.pop("device")
+
         model = transformers.AutoModelForCausalLM.from_pretrained(
             self._name, **init_cfg
         )
-
-        if self._device:
-            model.to(self._device)
+        if device:
+            model.to(device)
 
         return model
 
@@ -48,8 +48,9 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]:  # type: ignore[ove
         tokenized_input_ids = [
             self._tokenizer(prompt, return_tensors="pt").input_ids for prompt in prompts
         ]
-        if self._device:
-            tokenized_input_ids = [tii.to(self._device) for tii in tokenized_input_ids]
+        tokenized_input_ids = [
+            tii.to(self._model.device) for tii in tokenized_input_ids
+        ]
 
         assert hasattr(self._model, "generate")
         return [
@@ -71,7 +72,7 @@ def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
         return (
             {
                 **default_cfg_init,
-                "torch_dtype": torch.float16,
+                "torch_dtype": "float16",
             },
             {**default_cfg_run, "max_new_tokens": 32},
         )
diff --git a/spacy_llm/models/hf/stablelm.py b/spacy_llm/models/hf/stablelm.py
index 4711d69f..34698e0e 100644
--- a/spacy_llm/models/hf/stablelm.py
+++ b/spacy_llm/models/hf/stablelm.py
@@ -42,7 +42,6 @@ def __init__(
     ):
         self._tokenizer: Optional["transformers.AutoTokenizer"] = None
         self._is_tuned = "tuned" in name
-        self._device: Optional[str] = None
         super().__init__(name=name, config_init=config_init, config_run=config_run)
 
     def init_model(self) -> "transformers.AutoModelForCausalLM":
@@ -51,14 +50,15 @@ def init_model(self) -> "transformers.AutoModelForCausalLM":
         """
         self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
         init_cfg = self._config_init
+        device: Optional[str] = None
         if "device" in init_cfg:
-            self._device = init_cfg.pop("device")
+            device = init_cfg.pop("device")
+
         model = transformers.AutoModelForCausalLM.from_pretrained(
             self._name, **init_cfg
         )
-
-        if self._device:
-            model.half().to(self._device)
+        if device:
+            model.half().to(device)
 
         return model
 
@@ -80,8 +80,7 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]:  # type: ignore[ove
                 ]
             )
         ]
-        if self._device:
-            tokenized_input_ids = [tp.to(self._device) for tp in tokenized_input_ids]
+        tokenized_input_ids = [tp.to(self._model.device) for tp in tokenized_input_ids]
 
         assert hasattr(self._model, "generate")
         return [
diff --git a/spacy_llm/models/langchain/model.py b/spacy_llm/models/langchain/model.py
index 6c491fd3..03657cdf 100644
--- a/spacy_llm/models/langchain/model.py
+++ b/spacy_llm/models/langchain/model.py
@@ -43,7 +43,7 @@ def get_type_to_cls_dict() -> Dict[
         """Returns langchain.llms.type_to_cls_dict.
         RETURNS (Dict[str, Type[langchain.base_language.BaseLanguageModel]]): langchain.llms.type_to_cls_dict.
         """
-        return langchain.llms.type_to_cls_dict
+        return getattr(langchain.llms, "type_to_cls_dict")
 
     def __call__(self, prompts: Iterable[Any]) -> Iterable[Any]:
         """Executes prompts on specified API.
diff --git a/spacy_llm/models/rest/anthropic/model.py b/spacy_llm/models/rest/anthropic/model.py
index 4c2567f2..774d0e83 100644
--- a/spacy_llm/models/rest/anthropic/model.py
+++ b/spacy_llm/models/rest/anthropic/model.py
@@ -1,7 +1,7 @@
 import os
 import warnings
 from enum import Enum
-from typing import Any, Dict, Iterable, List, Sized, Tuple
+from typing import Any, Dict, Iterable, List, Sized
 
 import requests  # type: ignore[import]
 import srsly  # type: ignore[import]
@@ -54,7 +54,7 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]:
         headers = {
             **self._credentials,
             "model": self._name,
-            "anthropic_version": self._config.get("anthropic_version", "2023-06-01"),
+            "anthropic-version": self._config.get("anthropic-version", "2023-06-01"),
             "Content-Type": "application/json",
         }
 
@@ -107,26 +107,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:
 
         assert len(api_responses) == len(prompts)
         return api_responses
-
-    @classmethod
-    def get_model_names(cls) -> Tuple[str, ...]:
-        return (
-            # claude-2
-            "claude-2",
-            "claude-2-100k",
-            # claude-1
-            "claude-1",
-            "claude-1-100k",
-            # claude-instant-1
-            "claude-instant-1",
-            "claude-instant-1-100k",
-            # claude-instant-1.1
-            "claude-instant-1.1",
-            "claude-instant-1.1-100k",
-            # claude-1.3
-            "claude-1.3",
-            "claude-1.3-100k",
-            # others
-            "claude-1.0",
-            "claude-1.2",
-        )
diff --git a/spacy_llm/models/rest/azure/model.py b/spacy_llm/models/rest/azure/model.py
index a30173ff..d8f433d6 100644
--- a/spacy_llm/models/rest/azure/model.py
+++ b/spacy_llm/models/rest/azure/model.py
@@ -1,7 +1,7 @@
 import os
 import warnings
 from enum import Enum
-from typing import Any, Dict, Iterable, List, Sized, Tuple
+from typing import Any, Dict, Iterable, List, Sized
 
 import requests  # type: ignore[import]
 import srsly  # type: ignore[import]
@@ -48,7 +48,8 @@ def endpoint(self) -> str:
         return (
             self._endpoint
             + ("" if self._endpoint.endswith("/") else "/")
-            + f"openai/deployments/{self._name}/{self._model_type.value}"
+            + f"openai/deployments/{self._name}/{'' if self._model_type == ModelType.COMPLETION else 'chat/'}"
+            f"completions"
         )
 
     @property
@@ -146,12 +147,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:
                 api_responses.append(response.get("text", srsly.json_dumps(response)))
 
         return api_responses
-
-    @classmethod
-    def get_model_names(cls) -> Tuple[str, ...]:
-        # We treat the deployment name as "model name", hence it can be arbitrary.
-        return ("",)
-
-    def _check_model(self) -> None:
-        # We treat the deployment name as "model name", hence it can be arbitrary.
-        pass
diff --git a/spacy_llm/models/rest/base.py b/spacy_llm/models/rest/base.py
index b7dccca3..f54f90ac 100644
--- a/spacy_llm/models/rest/base.py
+++ b/spacy_llm/models/rest/base.py
@@ -1,7 +1,7 @@
 import abc
 import time
 from enum import Enum
-from typing import Any, Callable, Dict, Iterable, Optional, Tuple
+from typing import Any, Callable, Dict, Iterable, Optional
 
 import requests  # type: ignore
 from requests import ConnectTimeout, ReadTimeout
@@ -61,16 +61,8 @@ def __init__(
         assert self._interval > 0
         assert self._max_request_time > 0
 
-        self._check_model()
         self._verify_auth()
 
-    def _check_model(self) -> None:
-        """Checks whether model is supported. Raises if it isn't."""
-        if self._name not in self.get_model_names():
-            raise ValueError(
-                f"Model '{self._name}' is not supported - select one of {self.get_model_names()} instead"
-            )
-
     @abc.abstractmethod
     def __call__(self, prompts: Iterable[str]) -> Iterable[str]:
         """Executes prompts on specified API.
@@ -78,13 +70,6 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]:
         RETURNS (Iterable[str]): API responses.
         """
 
-    @classmethod
-    @abc.abstractmethod
-    def get_model_names(cls) -> Tuple[str, ...]:
-        """Names of supported models.
-        RETURNS (Tuple[str]): Names of supported models.
-        """
-
     @property
     @abc.abstractmethod
     def credentials(self) -> Dict[str, str]:
diff --git a/spacy_llm/models/rest/cohere/model.py b/spacy_llm/models/rest/cohere/model.py
index 293ed92b..58ba3231 100644
--- a/spacy_llm/models/rest/cohere/model.py
+++ b/spacy_llm/models/rest/cohere/model.py
@@ -1,7 +1,7 @@
 import os
 import warnings
 from enum import Enum
-from typing import Any, Dict, Iterable, List, Sized, Tuple
+from typing import Any, Dict, Iterable, List, Sized
 
 import requests  # type: ignore[import]
 import srsly  # type: ignore[import]
@@ -54,7 +54,7 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:
                 call_method=requests.post,
                 url=self._endpoint,
                 headers=headers,
-                json={**json_data, **self._config},
+                json={**json_data, **self._config, "model": self._name},
                 timeout=self._max_request_time,
             )
             try:
@@ -111,7 +111,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:
             else:
                 api_responses.append(srsly.json_dumps(response))
         return api_responses
-
-    @classmethod
-    def get_model_names(cls) -> Tuple[str, ...]:
-        return "command", "command-light", "command-light-nightly", "command-nightly"
diff --git a/spacy_llm/models/rest/noop/model.py b/spacy_llm/models/rest/noop/model.py
index 0e3e0398..31d830b8 100644
--- a/spacy_llm/models/rest/noop/model.py
+++ b/spacy_llm/models/rest/noop/model.py
@@ -1,5 +1,5 @@
 import time
-from typing import Dict, Iterable, Tuple
+from typing import Dict, Iterable
 
 from ..base import REST
 
@@ -33,7 +33,3 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]:
         # Assume time penalty for API calls.
         time.sleep(NoOpModel._CALL_TIMEOUT)
         return [_NOOP_RESPONSE] * len(list(prompts))
-
-    @classmethod
-    def get_model_names(cls) -> Tuple[str, ...]:
-        return ("NoOp",)
diff --git a/spacy_llm/models/rest/openai/model.py b/spacy_llm/models/rest/openai/model.py
index a712e082..8fa9dc20 100644
--- a/spacy_llm/models/rest/openai/model.py
+++ b/spacy_llm/models/rest/openai/model.py
@@ -1,7 +1,7 @@
 import os
 import warnings
 from enum import Enum
-from typing import Any, Dict, Iterable, List, Sized, Tuple
+from typing import Any, Dict, Iterable, List, Sized
 
 import requests  # type: ignore[import]
 import srsly  # type: ignore[import]
@@ -140,31 +140,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:
                     api_responses.append(srsly.json_dumps(response))
 
         return api_responses
-
-    @classmethod
-    def get_model_names(cls) -> Tuple[str, ...]:
-        return (
-            # gpt-4
-            "gpt-4",
-            "gpt-4-0314",
-            "gpt-4-32k",
-            "gpt-4-32k-0314",
-            # gpt-3.5
-            "gpt-3.5-turbo",
-            "gpt-3.5-turbo-16k",
-            "gpt-3.5-turbo-0613",
-            "gpt-3.5-turbo-0613-16k",
-            "gpt-3.5-turbo-instruct",
-            # text-davinci
-            "text-davinci-002",
-            "text-davinci-003",
-            # others
-            "code-davinci-002",
-            "text-curie-001",
-            "text-babbage-001",
-            "text-ada-001",
-            "davinci",
-            "curie",
-            "babbage",
-            "ada",
-        )
diff --git a/spacy_llm/models/rest/openai/registry.py b/spacy_llm/models/rest/openai/registry.py
index 1a4c4fd7..82436e4a 100644
--- a/spacy_llm/models/rest/openai/registry.py
+++ b/spacy_llm/models/rest/openai/registry.py
@@ -21,6 +21,35 @@
 """
 
 
+@registry.llm_models("spacy.GPT-4.v3")
+def openai_gpt_4_v3(
+    config: Dict[Any, Any] = SimpleFrozenDict(temperature=_DEFAULT_TEMPERATURE),
+    name: str = "gpt-4",  # noqa: F722
+    strict: bool = OpenAI.DEFAULT_STRICT,
+    max_tries: int = OpenAI.DEFAULT_MAX_TRIES,
+    interval: float = OpenAI.DEFAULT_INTERVAL,
+    max_request_time: float = OpenAI.DEFAULT_MAX_REQUEST_TIME,
+) -> Callable[[Iterable[str]], Iterable[str]]:
+    """Returns OpenAI instance for 'gpt-4' model using REST to prompt API.
+
+    config (Dict[Any, Any]): LLM config passed on to the model's initialization.
+    name (str): Model name to use. Can be any model name supported by the OpenAI API - e. g. 'gpt-4',
+        "gpt-4-1106-preview", ....
+    RETURNS (Callable[[Iterable[str]], Iterable[str]]]): OpenAI instance for 'gpt-4' model
+
+    DOCS: https://spacy.io/api/large-language-models#models
+    """
+    return OpenAI(
+        name=name,
+        endpoint=Endpoints.CHAT.value,
+        config=config,
+        strict=strict,
+        max_tries=max_tries,
+        interval=interval,
+        max_request_time=max_request_time,
+    )
+
+
 @registry.llm_models("spacy.GPT-4.v2")
 def openai_gpt_4_v2(
     config: Dict[Any, Any] = SimpleFrozenDict(temperature=_DEFAULT_TEMPERATURE),
@@ -35,7 +64,7 @@ def openai_gpt_4_v2(
     """Returns OpenAI instance for 'gpt-4' model using REST to prompt API.
 
     config (Dict[Any, Any]): LLM config passed on to the model's initialization.
-    name (Optional[Literal["0314", "32k", "32k-0314"]]): Model to use. Base 'gpt-4' model by default.
+    name (Literal["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]): Model to use. Base 'gpt-4' model by default.
     RETURNS (Callable[[Iterable[str]], Iterable[str]]]): OpenAI instance for 'gpt-4' model
 
     DOCS: https://spacy.io/api/large-language-models#models
@@ -65,7 +94,8 @@ def openai_gpt_4(
     """Returns OpenAI instance for 'gpt-4' model using REST to prompt API.
 
     config (Dict[Any, Any]): LLM config passed on to the model's initialization.
-    name (Optional[Literal["0314", "32k", "32k-0314"]]): Model to use. Base 'gpt-4' model by default.
+    name (Literal["gpt-4", "gpt-4-0314", "gpt-4-32k", "gpt-4-32k-0314"]): Model to use. Base 'gpt-4' model by
+        default.
     RETURNS (Callable[[Iterable[str]], Iterable[str]]]): OpenAI instance for 'gpt-4' model
 
     DOCS: https://spacy.io/api/large-language-models#models
@@ -81,6 +111,37 @@ def openai_gpt_4(
     )
 
 
+@registry.llm_models("spacy.GPT-3-5.v3")
+def openai_gpt_3_5_v3(
+    config: Dict[Any, Any] = SimpleFrozenDict(temperature=_DEFAULT_TEMPERATURE),
+    name: str = "gpt-3.5-turbo",
+    strict: bool = OpenAI.DEFAULT_STRICT,
+    max_tries: int = OpenAI.DEFAULT_MAX_TRIES,
+    interval: float = OpenAI.DEFAULT_INTERVAL,
+    max_request_time: float = OpenAI.DEFAULT_MAX_REQUEST_TIME,
+) -> Callable[[Iterable[str]], Iterable[str]]:
+    """Returns OpenAI instance for 'gpt-3.5' model using REST to prompt API.
+
+    config (Dict[Any, Any]): LLM config passed on to the model's initialization.
+    name (str): Name of model to use. Can be any model name supported by the OpenAI API - e. g. 'gpt-3.5',
+        "gpt-3.5-turbo", ....
+    RETURNS (Callable[[Iterable[str]], Iterable[str]]]): OpenAI instance for 'gpt-3.5' model
+
+    DOCS: https://spacy.io/api/large-language-models#models
+    """
+    return OpenAI(
+        name=name,
+        endpoint=Endpoints.CHAT.value
+        # gpt-3.5-turbo-instruct runs on the non-chat endpoint, so we use that one by default to allow batching.
+        if name != "gpt-3.5-turbo-instruct" else Endpoints.NON_CHAT.value,
+        config=config,
+        strict=strict,
+        max_tries=max_tries,
+        interval=interval,
+        max_request_time=max_request_time,
+    )
+
+
 @registry.llm_models("spacy.GPT-3-5.v2")
 def openai_gpt_3_5_v2(
     config: Dict[Any, Any] = SimpleFrozenDict(temperature=_DEFAULT_TEMPERATURE),
diff --git a/spacy_llm/models/rest/palm/model.py b/spacy_llm/models/rest/palm/model.py
index 1e9b10b1..1a488000 100644
--- a/spacy_llm/models/rest/palm/model.py
+++ b/spacy_llm/models/rest/palm/model.py
@@ -1,7 +1,7 @@
 import os
 import warnings
 from enum import Enum
-from typing import Any, Dict, Iterable, List, Sized, Tuple
+from typing import Any, Dict, Iterable, List, Sized
 
 import requests  # type: ignore[import]
 import srsly  # type: ignore[import]
@@ -107,7 +107,3 @@ def _request(json_data: Dict[str, Any]) -> Dict[str, Any]:
                 api_responses.append(srsly.json_dumps(response))
 
         return api_responses
-
-    @classmethod
-    def get_model_names(cls) -> Tuple[str, ...]:
-        return "text-bison-001", "chat-bison-001"
diff --git a/spacy_llm/pipeline/llm.py b/spacy_llm/pipeline/llm.py
index 1c3365d4..70003a8b 100644
--- a/spacy_llm/pipeline/llm.py
+++ b/spacy_llm/pipeline/llm.py
@@ -138,10 +138,15 @@ def labels(self) -> Tuple[str, ...]:
             labels = self._task.labels
         return labels
 
-    def add_label(self, label: str) -> int:
+    def add_label(self, label: str, label_definition: Optional[str] = None) -> int:
         if not isinstance(self._task, LabeledTask):
             raise ValueError("The task of this LLM component does not have labels.")
-        return self._task.add_label(label)
+        return self._task.add_label(label, label_definition)
+
+    def clear(self) -> None:
+        if not isinstance(self._task, LabeledTask):
+            raise ValueError("The task of this LLM component does not have labels.")
+        return self._task.clear()
 
     @property
     def task(self) -> LLMTask:
diff --git a/spacy_llm/tasks/builtin_task.py b/spacy_llm/tasks/builtin_task.py
index f959c7c0..e879c52d 100644
--- a/spacy_llm/tasks/builtin_task.py
+++ b/spacy_llm/tasks/builtin_task.py
@@ -329,14 +329,25 @@ def _extract_labels_from_example(self, example: Example) -> List[str]:
     def labels(self) -> Tuple[str, ...]:
         return tuple(self._label_dict.values())
 
-    def add_label(self, label: str) -> int:
+    def add_label(self, label: str, label_definition: Optional[str] = None) -> int:
+        """Add a label to the task"""
         if not isinstance(label, str):
             raise ValueError(Errors.E187)
         if label in self.labels:
             return 0
         self._label_dict[self._normalizer(label)] = label
+        if label_definition is None:
+            return 1
+        if self._label_definitions is None:
+            self._label_definitions = {}
+        self._label_definitions[label] = label_definition
         return 1
 
+    def clear(self) -> None:
+        """Reset all labels."""
+        self._label_dict = {}
+        self._label_definitions = None
+
     @property
     def normalizer(self) -> Callable[[str], str]:
         return self._normalizer
diff --git a/spacy_llm/tasks/rel/task.py b/spacy_llm/tasks/rel/task.py
index 83accb81..81fd8917 100644
--- a/spacy_llm/tasks/rel/task.py
+++ b/spacy_llm/tasks/rel/task.py
@@ -8,7 +8,7 @@
 from ...ty import FewshotExample, TaskResponseParser
 from ..builtin_task import BuiltinTaskWithLabels
 from ..templates import read_template
-from .util import EntityItem, RelationItem
+from .util import RelationItem
 
 DEFAULT_REL_TEMPLATE: str = read_template("rel.v1")
 
@@ -77,9 +77,7 @@ def _preannotate(doc: Union[Doc, FewshotExample]) -> str:
         for i, ent in enumerate(doc.ents):
             end = ent.end_char
             before, after = text[: end + offset], text[end + offset :]
-            annotation = (
-                f"[ENT{i}:{ent.label if isinstance(ent, EntityItem) else ent.label_}]"
-            )
+            annotation = f"[ENT{i}:{ent.label_ if isinstance(doc, Doc) else ent.label}]"
             offset += len(annotation)
             text = f"{before}{annotation}{after}"
 
diff --git a/spacy_llm/tasks/sentiment/registry.py b/spacy_llm/tasks/sentiment/registry.py
index 6dd51606..ab15f151 100644
--- a/spacy_llm/tasks/sentiment/registry.py
+++ b/spacy_llm/tasks/sentiment/registry.py
@@ -1,10 +1,10 @@
 from typing import Optional, Type
 
 from ...registry import registry
-from ...ty import ExamplesConfigType, FewshotExample, TaskResponseParser
+from ...ty import ExamplesConfigType, FewshotExample, Scorer, TaskResponseParser
 from .parser import parse_responses_v1
 from .task import DEFAULT_SENTIMENT_TEMPLATE_V1, SentimentTask
-from .util import SentimentExample
+from .util import SentimentExample, score
 
 
 @registry.llm_tasks("spacy.Sentiment.v1")
@@ -14,6 +14,7 @@ def make_sentiment_task(
     prompt_example_type: Optional[Type[FewshotExample]] = None,
     examples: ExamplesConfigType = None,
     field: str = "sentiment",
+    scorer: Optional[Scorer] = None,
 ):
     """Sentiment.v1 task factory.
 
@@ -24,6 +25,7 @@ def make_sentiment_task(
     examples (ExamplesConfigType): Optional callable that reads a file containing task examples for
         few-shot learning. If None is passed, then zero-shot learning will be used.
     field (str): The name of the doc extension in which to store the summary.
+    scorer (Optional[Scorer]): Scorer function.
     """
     raw_examples = examples() if callable(examples) else examples
     example_type = prompt_example_type or SentimentExample
@@ -37,4 +39,5 @@ def make_sentiment_task(
         prompt_example_type=example_type,
         prompt_examples=sentiment_examples,
         field=field,
+        scorer=scorer or score,
     )
diff --git a/spacy_llm/tasks/sentiment/task.py b/spacy_llm/tasks/sentiment/task.py
index ab34b4dd..57015ca8 100644
--- a/spacy_llm/tasks/sentiment/task.py
+++ b/spacy_llm/tasks/sentiment/task.py
@@ -1,10 +1,10 @@
-from typing import Callable, Iterable, List, Optional, Type
+from typing import Any, Callable, Dict, Iterable, List, Optional, Type
 
 from spacy.language import Language
 from spacy.tokens import Doc
 from spacy.training import Example
 
-from ...ty import FewshotExample, Self, TaskResponseParser
+from ...ty import FewshotExample, Scorer, Self, TaskResponseParser
 from ..builtin_task import BuiltinTask
 from ..templates import read_template
 
@@ -19,6 +19,7 @@ def __init__(
         prompt_example_type: Type[FewshotExample[Self]],
         field: str,
         prompt_examples: Optional[List[FewshotExample[Self]]],
+        scorer: Scorer,
     ):
         """Sentiment analysis task.
 
@@ -35,6 +36,7 @@ def __init__(
             prompt_examples=prompt_examples,
         )
         self._field = field
+        self._scorer = scorer
         self._check_doc_extension()
 
     def _check_doc_extension(self):
@@ -75,6 +77,9 @@ def parse_responses(
 
             yield doc
 
+    def scorer(self, examples: Iterable[Example]) -> Dict[str, Any]:
+        return self._scorer(examples, field=self._field)
+
     @property
     def _cfg_keys(self) -> List[str]:
         return ["_template"]
diff --git a/spacy_llm/tasks/sentiment/util.py b/spacy_llm/tasks/sentiment/util.py
index 72adbedb..f53bae32 100644
--- a/spacy_llm/tasks/sentiment/util.py
+++ b/spacy_llm/tasks/sentiment/util.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Any, Dict, Iterable, Optional
 
 from spacy.training import Example
 
@@ -17,3 +17,19 @@ def generate(cls, example: Example, task: SentimentTask) -> Optional[Self]:
             text=example.reference.text,
             score=getattr(example.reference._, task.field),
         )
+
+
+def score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
+    """Score sentiment accuracy in examples.
+    examples (Iterable[Example]): Examples to score.
+    RETURNS (Dict[str, Any]): Dict with metric name -> score.
+    """
+    score_diffs = [
+        abs(
+            getattr(example.predicted._, kwargs["field"])
+            - getattr(example.reference._, kwargs["field"])
+        )
+        for example in examples
+    ]
+
+    return {"acc_sentiment": 1 - (sum(score_diffs) / len(score_diffs))}
diff --git a/spacy_llm/tests/models/test_anthropic.py b/spacy_llm/tests/models/test_anthropic.py
index 61f1da77..eb366205 100644
--- a/spacy_llm/tests/models/test_anthropic.py
+++ b/spacy_llm/tests/models/test_anthropic.py
@@ -56,7 +56,10 @@ def test_anthropic_error_unsupported_model():
     """Ensure graceful handling of error when model is not supported"""
     incorrect_model = "x-gpt-3.5-turbo"
     with pytest.raises(
-        ValueError, match=re.escape("Model 'x-gpt-3.5-turbo' is not supported")
+        ValueError,
+        match=re.escape(
+            "Ensure that the selected model (x-gpt-3.5-turbo) is supported by the API"
+        ),
     ):
         Anthropic(
             name=incorrect_model,
diff --git a/spacy_llm/tests/models/test_cohere.py b/spacy_llm/tests/models/test_cohere.py
index 4b555aa4..5d1db35f 100644
--- a/spacy_llm/tests/models/test_cohere.py
+++ b/spacy_llm/tests/models/test_cohere.py
@@ -77,7 +77,7 @@ def test_cohere_api_response_when_error():
 def test_cohere_error_unsupported_model():
     """Ensure graceful handling of error when model is not supported"""
     incorrect_model = "x-gpt-3.5-turbo"
-    with pytest.raises(ValueError, match="Model 'x-gpt-3.5-turbo' is not supported"):
+    with pytest.raises(ValueError, match="model not found"):
         Cohere(
             name=incorrect_model,
             config={},
diff --git a/spacy_llm/tests/models/test_hf.py b/spacy_llm/tests/models/test_hf.py
new file mode 100644
index 00000000..3058035c
--- /dev/null
+++ b/spacy_llm/tests/models/test_hf.py
@@ -0,0 +1,78 @@
+from typing import Tuple
+
+import pytest
+import spacy
+from thinc.compat import has_torch_cuda_gpu
+
+from spacy_llm.compat import has_accelerate, torch
+
+_PIPE_CFG = {
+    "model": {
+        "@llm_models": "",
+        "name": "",
+    },
+    "task": {"@llm_tasks": "spacy.NoOp.v1"},
+    "save_io": True,
+}
+
+
+@pytest.mark.gpu
+@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
+@pytest.mark.parametrize(
+    "model", (("spacy.Dolly.v1", "dolly-v2-3b"), ("spacy.Llama2.v1", "Llama-2-7b-hf"))
+)
+def test_device_config_conflict(model: Tuple[str, str]):
+    """Test device configuration."""
+    nlp = spacy.blank("en")
+    model, name = model
+    cfg = {**_PIPE_CFG, **{"model": {"@llm_models": model, "name": name}}}
+
+    # Set device only.
+    cfg["model"]["config_init"] = {"device": "cpu"}  # type: ignore[index]
+    nlp.add_pipe("llm", name="llm1", config=cfg)
+
+    # Set device_map only.
+    cfg["model"]["config_init"] = {"device_map": "auto"}  # type: ignore[index]
+    if has_accelerate:
+        nlp.add_pipe("llm", name="llm2", config=cfg)
+    else:
+        with pytest.raises(ImportError, match="requires Accelerate"):
+            nlp.add_pipe("llm", name="llm2", config=cfg)
+
+    # Set device_map and device.
+    cfg["model"]["config_init"] = {"device_map": "auto", "device": "cpu"}  # type: ignore[index]
+    with pytest.warns(UserWarning, match="conflicting arguments"):
+        if has_accelerate:
+            nlp.add_pipe("llm", name="llm3", config=cfg)
+        else:
+            with pytest.raises(ImportError, match="requires Accelerate"):
+                nlp.add_pipe("llm", name="llm3", config=cfg)
+
+    torch.cuda.empty_cache()
+
+
+@pytest.mark.gpu
+@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
+def test_torch_dtype():
+    """Test torch_dtype setting."""
+    nlp = spacy.blank("en")
+    cfg = {
+        **_PIPE_CFG,
+        **{"model": {"@llm_models": "spacy.Dolly.v1", "name": "dolly-v2-3b"}},
+    }
+
+    # Should be converted to torch.float16.
+    cfg["model"]["config_init"] = {"torch_dtype": "float16"}  # type: ignore[index]
+    llm = nlp.add_pipe("llm", name="llm1", config=cfg)
+    assert llm._model._config_init["torch_dtype"] == torch.float16
+
+    # Should remain "auto".
+    cfg["model"]["config_init"] = {"torch_dtype": "auto"}  # type: ignore[index]
+    nlp.add_pipe("llm", name="llm2", config=cfg)
+
+    # Should fail - nonexistent dtype.
+    cfg["model"]["config_init"] = {"torch_dtype": "float999"}  # type: ignore[index]
+    with pytest.raises(ValueError, match="Invalid value float999"):
+        nlp.add_pipe("llm", name="llm3", config=cfg)
+
+    torch.cuda.empty_cache()
diff --git a/spacy_llm/tests/models/test_mistral.py b/spacy_llm/tests/models/test_mistral.py
index 5dde49f0..548d4d29 100644
--- a/spacy_llm/tests/models/test_mistral.py
+++ b/spacy_llm/tests/models/test_mistral.py
@@ -48,6 +48,7 @@ def test_init():
 
 
 @pytest.mark.gpu
+@pytest.mark.skip(reason="CI runner needs more GPU memory")
 @pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
 def test_init_from_config():
     orig_config = Config().from_str(_NLP_CONFIG)
diff --git a/spacy_llm/tests/tasks/test_ner.py b/spacy_llm/tests/tasks/test_ner.py
index 7d7c8c82..3c180048 100644
--- a/spacy_llm/tests/tasks/test_ner.py
+++ b/spacy_llm/tests/tasks/test_ner.py
@@ -992,7 +992,42 @@ def test_add_label():
     doc = nlp(text)
     assert len(doc.ents) == 0
 
+    for label, definition in [
+        ("PERSON", "Every person with the name Jack"),
+        ("LOCATION", "A geographical location, like a country or a city"),
+        ("COMPANY", None),
+    ]:
+        llm.add_label(label, definition)
+    doc = nlp(text)
+    assert len(doc.ents) > 1
+
+
+@pytest.mark.external
+@pytest.mark.skipif(has_openai_key is False, reason="OpenAI API key not available")
+def test_clear_label():
+    nlp = spacy.blank("en")
+    llm = nlp.add_pipe(
+        "llm",
+        config={
+            "task": {
+                "@llm_tasks": "spacy.NER.v3",
+            },
+            "model": {
+                "@llm_models": "spacy.GPT-3-5.v1",
+            },
+        },
+    )
+
+    nlp.initialize()
+    text = "Jack and Jill visited France."
+    doc = nlp(text)
+
     for label in ["PERSON", "LOCATION"]:
         llm.add_label(label)
     doc = nlp(text)
     assert len(doc.ents) == 3
+
+    llm.clear()
+
+    doc = nlp(text)
+    assert len(doc.ents) == 0
diff --git a/spacy_llm/tests/tasks/test_rel.py b/spacy_llm/tests/tasks/test_rel.py
index ba75d7c2..3650114e 100644
--- a/spacy_llm/tests/tasks/test_rel.py
+++ b/spacy_llm/tests/tasks/test_rel.py
@@ -266,3 +266,23 @@ def test_incorrect_indexing():
         )
         == 0
     )
+
+
+@pytest.mark.external
+@pytest.mark.skipif(has_openai_key is False, reason="OpenAI API key not available")
+def test_labels_in_prompt(request: FixtureRequest):
+    """See https://github.com/explosion/spacy-llm/issues/366."""
+    config = Config().from_str(request.getfixturevalue("zeroshot_cfg_string"))
+    config["components"].pop("ner")
+    config.pop("initialize")
+    config["nlp"]["pipeline"] = ["llm"]
+    config["components"]["llm"]["task"]["labels"] = ["A", "B", "C"]
+    nlp = assemble_from_config(config)
+
+    doc = Doc(get_lang_class("en")().vocab, words=["Well", "hello", "there"])
+    doc.ents = [Span(doc, 0, 1, "A"), Span(doc, 1, 2, "B"), Span(doc, 2, 3, "C")]
+
+    assert (
+        "Well[ENT0:A] hello[ENT1:B] there[ENT2:C]"
+        in list(nlp.get_pipe("llm")._task.generate_prompts([doc]))[0]
+    )
diff --git a/spacy_llm/tests/tasks/test_sentiment.py b/spacy_llm/tests/tasks/test_sentiment.py
index 4b2bd63b..f61f875a 100644
--- a/spacy_llm/tests/tasks/test_sentiment.py
+++ b/spacy_llm/tests/tasks/test_sentiment.py
@@ -1,8 +1,10 @@
 from pathlib import Path
 
+import numpy
 import pytest
 import spacy
 from confection import Config
+from spacy.training import Example
 from spacy.util import make_tempdir
 
 from spacy_llm.registry import fewshot_reader, file_reader
@@ -263,3 +265,20 @@ def test_external_template_actually_loads():
 Sentiment:
 """.strip()
     )
+
+
+@pytest.mark.external
+@pytest.mark.skipif(has_openai_key is False, reason="OpenAI API key not available")
+def test_sentiment_score(request):
+    """Test scoring mechanism."""
+    cfg = request.getfixturevalue("zeroshot_cfg_string")
+    orig_config = Config().from_str(cfg)
+    nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True)
+
+    sent_diff = 0.2
+    doc1 = nlp("This works well.")
+    doc2 = doc1.copy()
+    doc2._.sentiment -= sent_diff
+    assert numpy.isclose(
+        nlp.get_pipe("llm").score([Example(doc1, doc2)])["acc_sentiment"], 1 - sent_diff
+    )
diff --git a/spacy_llm/tests/test_combinations.py b/spacy_llm/tests/test_combinations.py
index 0cd986c5..d36e72ac 100644
--- a/spacy_llm/tests/test_combinations.py
+++ b/spacy_llm/tests/test_combinations.py
@@ -12,7 +12,7 @@
 @pytest.mark.skipif(has_langchain is False, reason="LangChain is not installed")
 @pytest.mark.parametrize(
     "model",
-    ["langchain.OpenAI.v1", "spacy.GPT-3-5.v1", "spacy.GPT-3-5.v2"],
+    ["langchain.OpenAI.v1", "spacy.GPT-3-5.v3", "spacy.GPT-4.v3"],
     ids=["langchain", "rest-openai", "rest-openai"],
 )
 @pytest.mark.parametrize(
diff --git a/spacy_llm/ty.py b/spacy_llm/ty.py
index 7052c197..69e73a0c 100644
--- a/spacy_llm/ty.py
+++ b/spacy_llm/ty.py
@@ -138,7 +138,10 @@ class LabeledTask(Protocol):
     def labels(self) -> Tuple[str, ...]:
         ...
 
-    def add_label(self, label: str) -> int:
+    def add_label(self, label: str, label_definition: Optional[str] = None) -> int:
+        ...
+
+    def clear(self) -> None:
         ...