diff --git a/spacy_llm/models/hf/dolly.py b/spacy_llm/models/hf/dolly.py index db6599b4..849f34bd 100644 --- a/spacy_llm/models/hf/dolly.py +++ b/spacy_llm/models/hf/dolly.py @@ -14,7 +14,9 @@ def init_model(self) -> Any: """Sets up HF model and needed utilities. RETURNS (Any): HF model. """ - return transformers.pipeline(model=self._name, **self._config_init) + return transformers.pipeline( + model=self._name, return_full_text=False, **self._config_init + ) def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[override] """Queries Dolly HF model. diff --git a/spacy_llm/models/hf/falcon.py b/spacy_llm/models/hf/falcon.py index 00fcff9f..76d4e9e2 100644 --- a/spacy_llm/models/hf/falcon.py +++ b/spacy_llm/models/hf/falcon.py @@ -38,6 +38,7 @@ def init_model(self) -> Any: "text-generation", model=self._name, tokenizer=self._tokenizer, + return_full_text=False, **self._config_init, ) diff --git a/spacy_llm/models/hf/llama2.py b/spacy_llm/models/hf/llama2.py index 6adadc9b..d8d1248b 100644 --- a/spacy_llm/models/hf/llama2.py +++ b/spacy_llm/models/hf/llama2.py @@ -32,6 +32,7 @@ def init_model(self) -> Any: "text-generation", model=self._name, use_auth_token=True, + return_full_text=False, **self._config_init, ) diff --git a/spacy_llm/models/hf/openllama.py b/spacy_llm/models/hf/openllama.py index 6f10204c..4cf2f4cf 100644 --- a/spacy_llm/models/hf/openllama.py +++ b/spacy_llm/models/hf/openllama.py @@ -54,7 +54,9 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove assert hasattr(self._model, "generate") return [ self._tokenizer.decode( - self._model.generate(input_ids=tii, **self._config_run)[0], + self._model.generate(input_ids=tii, **self._config_run)[ + :, tii.shape[1] : + ][0], ) for tii in tokenized_input_ids ] diff --git a/spacy_llm/models/hf/stablelm.py b/spacy_llm/models/hf/stablelm.py index 5aca0f52..4711d69f 100644 --- a/spacy_llm/models/hf/stablelm.py +++ b/spacy_llm/models/hf/stablelm.py @@ -68,8 +68,8 @@ def hf_account(self) -> str: def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[override] assert callable(self._tokenizer) - tokenized_prompts = [ - self._tokenizer(prompt, return_tensors="pt") + tokenized_input_ids = [ + self._tokenizer(prompt, return_tensors="pt").input_ids for prompt in ( # Add prompt formatting for tuned model. prompts @@ -81,15 +81,17 @@ def __call__(self, prompts: Iterable[str]) -> Iterable[str]: # type: ignore[ove ) ] if self._device: - tokenized_prompts = [tp.to(self._device) for tp in tokenized_prompts] + tokenized_input_ids = [tp.to(self._device) for tp in tokenized_input_ids] assert hasattr(self._model, "generate") return [ self._tokenizer.decode( - self._model.generate(**prompt, **self._config_run)[0], + self._model.generate(input_ids=tii, **self._config_run)[ + :, tii.shape[1] : + ][0], skip_special_tokens=True, ) - for prompt in tokenized_prompts + for tii in tokenized_input_ids ] @staticmethod diff --git a/spacy_llm/tests/models/test_dolly.py b/spacy_llm/tests/models/test_dolly.py index 615d6a0c..4b70179d 100644 --- a/spacy_llm/tests/models/test_dolly.py +++ b/spacy_llm/tests/models/test_dolly.py @@ -13,6 +13,7 @@ "name": "dolly-v2-3b", }, "task": {"@llm_tasks": "spacy.NoOp.v1"}, + "save_io": True, } _NLP_CONFIG = """ @@ -26,6 +27,7 @@ [components.llm] factory = "llm" +save_io = True [components.llm.task] @llm_tasks = "spacy.NoOp.v1" @@ -41,12 +43,13 @@ def test_init(): """Test initialization and simple run.""" nlp = spacy.blank("en") - cfg = copy.deepcopy(_PIPE_CFG) - cfg["model"]["@llm_models"] = "spacy.Dolly.v1" - nlp.add_pipe("llm", config=cfg) - nlp("This is a test.") + nlp.add_pipe("llm", config=_PIPE_CFG) + doc = nlp("This is a test.") nlp.get_pipe("llm")._model.get_model_names() torch.cuda.empty_cache() + assert not doc.user_data["llm_io"]["llm"]["response"].startswith( + doc.user_data["llm_io"]["llm"]["prompt"] + ) @pytest.mark.gpu diff --git a/spacy_llm/tests/models/test_falcon.py b/spacy_llm/tests/models/test_falcon.py index eb886922..6638975b 100644 --- a/spacy_llm/tests/models/test_falcon.py +++ b/spacy_llm/tests/models/test_falcon.py @@ -13,6 +13,7 @@ "name": "falcon-rw-1b", }, "task": {"@llm_tasks": "spacy.NoOp.v1"}, + "save_io": True, } _NLP_CONFIG = """ @@ -43,8 +44,11 @@ def test_init(): nlp = spacy.blank("en") cfg = copy.deepcopy(_PIPE_CFG) nlp.add_pipe("llm", config=cfg) - nlp("This is a test.") + doc = nlp("This is a test.") torch.cuda.empty_cache() + assert not doc.user_data["llm_io"]["llm"]["response"].startswith( + doc.user_data["llm_io"]["llm"]["prompt"] + ) @pytest.mark.gpu diff --git a/spacy_llm/tests/models/test_llama2.py b/spacy_llm/tests/models/test_llama2.py index 0dd4623d..6896269b 100644 --- a/spacy_llm/tests/models/test_llama2.py +++ b/spacy_llm/tests/models/test_llama2.py @@ -13,6 +13,7 @@ "name": "Llama-2-7b-hf", }, "task": {"@llm_tasks": "spacy.NoOp.v1"}, + "save_io": True, } _NLP_CONFIG = """ @@ -44,8 +45,11 @@ def test_init(): nlp = spacy.blank("en") cfg = copy.deepcopy(_PIPE_CFG) nlp.add_pipe("llm", config=cfg) - nlp("This is a test.") + doc = nlp("This is a test.") torch.cuda.empty_cache() + assert not doc.user_data["llm_io"]["llm"]["response"].startswith( + doc.user_data["llm_io"]["llm"]["prompt"] + ) @pytest.mark.skip(reason="CI runner needs more GPU memory") diff --git a/spacy_llm/tests/models/test_openllama.py b/spacy_llm/tests/models/test_openllama.py index 5cb8a497..efb1c2d3 100644 --- a/spacy_llm/tests/models/test_openllama.py +++ b/spacy_llm/tests/models/test_openllama.py @@ -13,6 +13,7 @@ "name": "open_llama_3b", }, "task": {"@llm_tasks": "spacy.NoOp.v1"}, + "save_io": True, } _NLP_CONFIG = """ @@ -25,6 +26,7 @@ [components.llm] factory = "llm" +save_io = True [components.llm.task] @llm_tasks = "spacy.NoOp.v1" @@ -41,8 +43,11 @@ def test_init(): """Test initialization and simple run.""" nlp = spacy.blank("en") nlp.add_pipe("llm", config=_PIPE_CFG) - nlp("This is a test.") + doc = nlp("This is a test.") torch.cuda.empty_cache() + assert not doc.user_data["llm_io"]["llm"]["response"].startswith( + doc.user_data["llm_io"]["llm"]["prompt"] + ) @pytest.mark.gpu @@ -53,8 +58,11 @@ def test_init_with_set_config(): cfg = copy.deepcopy(_PIPE_CFG) cfg["model"]["config_run"] = {"max_new_tokens": 32} nlp.add_pipe("llm", config=cfg) - nlp("This is a test.") + doc = nlp("This is a test.") torch.cuda.empty_cache() + assert not doc.user_data["llm_io"]["llm"]["response"].startswith( + doc.user_data["llm_io"]["llm"]["prompt"] + ) @pytest.mark.gpu diff --git a/spacy_llm/tests/models/test_stablelm.py b/spacy_llm/tests/models/test_stablelm.py index 36ba1249..b3b09830 100644 --- a/spacy_llm/tests/models/test_stablelm.py +++ b/spacy_llm/tests/models/test_stablelm.py @@ -13,6 +13,7 @@ "name": "stablelm-base-alpha-3b", }, "task": {"@llm_tasks": "spacy.NoOp.v1"}, + "save_io": True, } _NLP_CONFIG = """ @@ -44,10 +45,13 @@ def test_init(name: str): """ nlp = spacy.blank("en") cfg = copy.deepcopy(_PIPE_CFG) - cfg["model"]["name"] = name + cfg["model"]["name"] = name # type: ignore[index] nlp.add_pipe("llm", config=cfg) - nlp("This is a test.") + doc = nlp("This is a test.") torch.cuda.empty_cache() + assert not doc.user_data["llm_io"]["llm"]["response"].startswith( + doc.user_data["llm_io"]["llm"]["prompt"] + ) @pytest.mark.gpu