forked from deepset-ai/haystack
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile-GPU-minimal
41 lines (35 loc) · 1.41 KB
/
Dockerfile-GPU-minimal
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04
WORKDIR /home/user
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
ENV HAYSTACK_DOCKER_CONTAINER="HAYSTACK_MINIMAL_GPU_CONTAINER"
# Install software dependencies
RUN apt-get update && apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
curl \
git \
poppler-utils \
python3-pip \
python3.8 \
python3.8-distutils \
swig \
tesseract-ocr && \
# Cleanup apt cache
rm -rf /var/lib/apt/lists/* && \
# Install PDF converter
curl -s https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz \
| tar -xvzf - -C /usr/local/bin --strip-components=2 xpdf-tools-linux-4.04/bin64/pdftotext
# Copy Haystack package files but not the source code
COPY pyproject.toml VERSION.txt LICENSE README.md /home/user/
# Install all the dependencies, including ocr component
RUN pip3 install --upgrade --no-cache-dir pip && \
pip3 install --no-cache-dir .[ocr] && \
# Install PyTorch with CUDA 11
pip3 install --no-cache-dir torch==1.10.2+cu113 -f https://download.pytorch.org/whl/torch_stable.html
# Copy Haystack source code
COPY haystack /home/user/haystack/
# Install Haystack
RUN pip3 install --no-cache-dir --no-deps .[ocr] && \
# Cleanup copied files after installation is completed
rm -rf /home/user/*