Skip to content

Commit

Permalink
Add Dolly NER.v3 usage example. (#302)
Browse files Browse the repository at this point in the history
  • Loading branch information
rmitsch authored Oct 2, 2023
1 parent 8c26a53 commit 63cdee9
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 18 deletions.
3 changes: 3 additions & 0 deletions usage_examples/ner_dolly/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ python run_pipeline.py \
./examples.yml
```

By default this uses v3 if the NER recipe, which leverages a chain-of-thought prompt. If you want to run this with v2,
use `fewshot_v2.cfg`, `zeroshot_v2`.cfg and `examples_v2.yml` instead.

By default, the pipeline assigns `PERSON`, `ORGANIZATION` or `LOCATION` labels
for each entity. You can change these labels by updating the
`zeroshot.cfg` configuration file.
Expand Down
31 changes: 17 additions & 14 deletions usage_examples/ner_dolly/examples.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
- text: Jack and Jill went up the hill.
entities:
PERSON:
- Jack
- Jill
LOCATION:
- hill
- text: Jack fell down and broke his crown.
entities:
PERSON:
- Jack
- text: Jill came tumbling after.
entities:
PERSON:
- Jill
spans:
- text: Jack
is_entity: true
label: PERSON
reason: is the name of a person
- text: Jill
is_entity: true
label: PERSON
reason: is the name of a person
- text: went up
is_entity: false
label: ==NONE==
reason: is a verb
- text: hill
is_entity: true
label: LOCATION
reason: is a location
15 changes: 15 additions & 0 deletions usage_examples/ner_dolly/examples_v2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
- text: Jack and Jill went up the hill.
entities:
PERSON:
- Jack
- Jill
LOCATION:
- hill
- text: Jack fell down and broke his crown.
entities:
PERSON:
- Jack
- text: Jill came tumbling after.
entities:
PERSON:
- Jill
2 changes: 1 addition & 1 deletion usage_examples/ner_dolly/fewshot.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ factory = "llm"
name = "dolly-v2-3b"

[components.llm.task]
@llm_tasks = "spacy.NER.v2"
@llm_tasks = "spacy.NER.v3"
labels = PERSON,ORGANISATION,LOCATION

[components.llm.task.examples]
Expand Down
27 changes: 27 additions & 0 deletions usage_examples/ner_dolly/fewshot_v2.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[paths]
examples = null

[nlp]
lang = "en"
pipeline = ["llm"]
batch_size = 128

[components]

[components.llm]
factory = "llm"

[components.llm.model]
@llm_models = "spacy.Dolly.v1"
name = "dolly-v2-3b"

[components.llm.task]
@llm_tasks = "spacy.NER.v2"
labels = PERSON,ORGANISATION,LOCATION

[components.llm.task.examples]
@misc = "spacy.FewShotReader.v1"
path = ${paths.examples}

[components.llm.task.normalizer]
@misc = "spacy.LowercaseNormalizer.v1"
2 changes: 1 addition & 1 deletion usage_examples/ner_dolly/zeroshot.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ factory = "llm"
name = "dolly-v2-3b"

[components.llm.task]
@llm_tasks = "spacy.NER.v2"
@llm_tasks = "spacy.NER.v3"
labels = PERSON,ORGANISATION,LOCATION
examples = null

Expand Down
21 changes: 21 additions & 0 deletions usage_examples/ner_dolly/zeroshot_v2.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[nlp]
lang = "en"
pipeline = ["llm"]
batch_size = 128

[components]

[components.llm]
factory = "llm"

[components.llm.model]
@llm_models = "spacy.Dolly.v1"
name = "dolly-v2-3b"

[components.llm.task]
@llm_tasks = "spacy.NER.v2"
labels = PERSON,ORGANISATION,LOCATION
examples = null

[components.llm.task.normalizer]
@misc = "spacy.LowercaseNormalizer.v1"
20 changes: 18 additions & 2 deletions usage_examples/tests/test_usage_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

@pytest.mark.gpu
@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
@pytest.mark.parametrize("config_name", ("fewshot.cfg", "zeroshot.cfg"))
@pytest.mark.parametrize("config_name", ("fewshot_v2.cfg", "zeroshot_v2.cfg"))
def test_ner_dolly(config_name: str):
"""Test NER Dolly usage example.
config_name (str): Name of config file to use.
Expand All @@ -22,7 +22,23 @@ def test_ner_dolly(config_name: str):
ner_dolly.run_pipeline(
text="text",
config_path=path / config_name,
examples_path=None if config_name == "zeroshot.cfg" else path / "examples.yml",
examples_path=None if "zeroshot" in config_name else path / "examples_v2.yml",
verbose=False,
)


@pytest.mark.gpu
@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
@pytest.mark.parametrize("config_name", ("fewshot.cfg", "zeroshot.cfg"))
def test_ner_v3_dolly(config_name: str):
"""Test NER Dolly usage example.
config_name (str): Name of config file to use.
"""
path = _USAGE_EXAMPLE_PATH / "ner_dolly"
ner_dolly.run_pipeline(
text="text",
config_path=path / config_name,
examples_path=None if "zeroshot" in config_name else path / "examples.yml",
verbose=False,
)

Expand Down

0 comments on commit 63cdee9

Please sign in to comment.