forked from explosion/projects
-
Notifications
You must be signed in to change notification settings - Fork 0
/
project.yml
106 lines (90 loc) · 4.03 KB
/
project.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
title: "Hugging Face Hub integration"
description: >
With [Hugging Face Hub](https://https://huggingface.co/), you can easily share any trained pipeline with the community. The Hugging Face Hub offers:
- Free model hosting.
- Built-in file versioning, even with very large files, thanks to a git-based approach.
- In-browser widgets to play with the uploaded models.
This uses [`spacy-huggingface-hub`](https://github.com/explosion/spacy-huggingface-hub) to push a packaged pipeline to the Hugging Face Hub, including the `whl` file. This enables using `pip install`ing a pipeline directly from the Hugging Face Hub.
# Variables can be referenced across the project.yml using ${vars.var_name}
vars:
config: "config.cfg"
name: "ner_fashion"
version: "0.0.0"
train: "fashion_brands_training"
dev: "fashion_brands_eval"
# These are the directories that the project needs. The project CLI will make
# sure that they always exist.
directories: ["assets", "training", "configs", "scripts", "corpus", "packages", "hub"]
# Assets that should be downloaded or available in the directory. We're shipping
# them with the project, so they won't have to be downloaded. But the
# 'project assets' command still lets you verify that the checksums match.
assets:
- dest: "assets/${vars.train}.jsonl"
checksum: "63373dd656daa1fd3043ce166a59474c"
description: "JSONL-formatted training data exported from Prodigy, annotated with `FASHION_BRAND` entities (1235 examples)"
- dest: "assets/${vars.dev}.jsonl"
checksum: "5113dc04e03f079525edd8df3f4f39e3"
description: "JSONL-formatted development data exported from Prodigy, annotated with `FASHION_BRAND` entities (500 examples)"
# Workflows are sequences of commands (see below) executed in order. You can
# run them via "spacy project run [workflow]". If a commands's inputs/outputs
# haven't changed, it won't be re-run.
workflows:
all:
- preprocess
- train
- evaluate
- package
- push_to_hub
commands:
- name: "install"
help: "Install dependencies, log in to Hugging Face and download a model"
script:
- "pip install -r requirements.txt"
- "huggingface-cli login"
# Replace this with any code to train a pipeline
- name: "preprocess"
help: "Convert the data to spaCy's binary format"
script:
- "python scripts/preprocess.py assets/${vars.train}.jsonl corpus/${vars.train}.spacy"
- "python scripts/preprocess.py assets/${vars.dev}.jsonl corpus/${vars.dev}.spacy"
deps:
- "assets/${vars.train}.jsonl"
- "assets/${vars.dev}.jsonl"
- "scripts/preprocess.py"
outputs:
- "corpus/${vars.train}.spacy"
- "corpus/${vars.dev}.spacy"
- name: "train"
help: "Train a named entity recognition model"
script:
- "python -m spacy train configs/${vars.config} --output training/ --paths.train corpus/${vars.train}.spacy --paths.dev corpus/${vars.dev}.spacy"
deps:
- "corpus/${vars.train}.spacy"
- "corpus/${vars.dev}.spacy"
outputs:
- "training/model-best"
- name: "evaluate"
help: "Evaluate the model and export metrics"
script:
- "python -m spacy evaluate training/model-best corpus/${vars.dev}.spacy --output training/metrics.json"
deps:
- "corpus/${vars.dev}.spacy"
- "training/model-best"
outputs:
- "training/metrics.json"
# Create the package using --build wheel
- name: package
help: "Package the trained model so it can be installed"
script:
- "python -m spacy package training/model-best packages --name ${vars.name} --version ${vars.version} --force --build wheel"
deps:
- "training/model-best"
outputs_no_cache:
- "packages/en_${vars.name}-${vars.version}/dist/en_${vars.name}-${vars.version}-py3-none-any.whl"
# Push the package to the Hub
- name: push_to_hub
help: Push the model to the Hub
script:
- "python -m spacy huggingface-hub push packages/en_${vars.name}-${vars.version}/dist/en_${vars.name}-${vars.version}-py3-none-any.whl"
deps:
- "packages/en_${vars.name}-${vars.version}"