forked from ucbepic/docetl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyproject.toml
113 lines (105 loc) · 3.81 KB
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
[tool.poetry]
name = "docetl"
version = "0.2"
description = "ETL with LLM operations."
authors = ["Shreya Shankar <[email protected]>"]
license = "MIT"
readme = "README.md"
packages = [
{ include = "docetl" }
]
exclude = ["website/**/*"]
[tool.poetry.scripts]
docetl = "docetl.cli:app"
[tool.poetry.dependencies]
python = "^3.10"
litellm = "^1.51.0"
tqdm = "^4.66.4"
rich = "^13.7.1"
frozendict = "^2.4.4"
diskcache = "^5.6.3"
typer = "^0.12.5"
pydantic = "^2.9.2"
asteval = "^1.0.4"
scikit-learn = "^1.5.2"
pyrate-limiter = "^3.7.0"
python-multipart = "^0.0.17"
openpyxl = { version = "^3.1.5", optional = true }
python-docx = { version = "^1.1.2", optional = true }
pydub = { version = "^0.25.1", optional = true }
python-pptx = { version = "^1.0.2", optional = true }
paddlepaddle = { version = "^2.6.2", optional = true }
pymupdf = { version = "^1.24.10", optional = true }
jsonschema = "^4.23.0"
rapidfuzz = "^3.10.0"
fastapi = { version = "^0.115.0", optional = true }
uvicorn = { version = "^0.31.0", optional = true }
websockets = "^13.1"
docling = { version = "^2.5.2", optional = true }
azure-ai-formrecognizer = { version = "^3.3.3", optional = true }
lzstring = "^1.0.4"
azure-ai-documentintelligence = { version = "^1.0.0b4", optional = true }
[tool.poetry.extras]
parsing = ["python-docx", "openpyxl", "pydub", "python-pptx", "azure-ai-documentintelligence", "paddlepaddle", "pymupdf"]
server = ["fastapi", "uvicorn", "docling", "azure-ai-formrecognizer", "azure-ai-documentintelligence"]
[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
python-dotenv = "^1.0.1"
ruff = "^0.6.1"
mypy = "^1.11.1"
pre-commit = "^3.8.0"
mkdocs = "^1.6.1"
mkdocs-material = "^9.5.34"
mkdocstrings = "^0.26.1"
linkchecker = "^10.5.0"
pytkdocs = "^0.16.2"
mkdocstrings-python = "^1.11.1"
mkdocs-glightbox = "^0.4.0"
pytest-sugar = "^1.0.0"
pytest-xdist = "^3.6.1"
[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "--basetemp=/tmp/pytest"
filterwarnings = [
"ignore::DeprecationWarning",
"ignore::UserWarning",
"ignore::RuntimeWarning"
]
[tool.mypy]
files = "docetl"
mypy_path = "docetl"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
exclude = ['docetl/tests*']
ignore_missing_imports = true
show_error_codes = true
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.plugins."docetl.operation"]
map = "docetl.operations.map:MapOperation"
parallel_map = "docetl.operations.map:ParallelMapOperation"
filter = "docetl.operations.filter:FilterOperation"
unnest = "docetl.operations.unnest:UnnestOperation"
equijoin = "docetl.operations.equijoin:EquijoinOperation"
split = "docetl.operations.split:SplitOperation"
reduce = "docetl.operations.reduce:ReduceOperation"
resolve = "docetl.operations.resolve:ResolveOperation"
gather = "docetl.operations.gather:GatherOperation"
cluster = "docetl.operations.cluster:ClusterOperation"
sample = "docetl.operations.sample:SampleOperation"
link_resolve = "docetl.operations.link_resolve:LinkResolveOperation"
code_map = "docetl.operations.code_operations:CodeMapOperation"
code_reduce = "docetl.operations.code_operations:CodeReduceOperation"
code_filter = "docetl.operations.code_operations:CodeFilterOperation"
[tool.poetry.plugins."docetl.parser"]
llama_index_simple_directory_reader = "docetl.parsing_tools:llama_index_simple_directory_reader"
llama_index_wikipedia_reader = "docetl.parsing_tools:llama_index_wikipedia_reader"
whisper_speech_to_text = "docetl.parsing_tools:whisper_speech_to_text"
xlsx_to_string = "docetl.parsing_tools:xlsx_to_string"
txt_to_string = "docetl.parsing_tools:txt_to_string"
docx_to_string = "docetl.parsing_tools:docx_to_string"
pptx_to_string = "docetl.parsing_tools:pptx_to_string"
azure_di_read = "docetl.parsing_tools:azure_di_read"
paddleocr_pdf_to_string = "docetl.parsing_tools:paddleocr_pdf_to_string"