-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from dlcs/feature/handle_pdf_err
Handle 404 when fetching PDF + bump dependencies
- Loading branch information
Showing
7 changed files
with
34 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,19 @@ | ||
FROM debian:bullseye as build | ||
|
||
# avoid issue with packages requiring interaction (e.g. tzdata) | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
RUN apt-get update && apt-get install -y wget cmake clang git autoconf pkg-config | ||
|
||
# Change submodule to https as we're cloning only. Avoids issues with ssh | ||
# 8bb209c0c21476ee904a is 0.4 with some bugfixes | ||
RUN mkdir /home/pdfalto && cd /home/pdfalto \ | ||
&& git clone https://github.com/kermitt2/pdfalto.git && cd pdfalto && git checkout 8bb209c0c21476ee904a && ./install_deps.sh \ | ||
&& git submodule set-url xpdf-4.03 https://github.com/kermitt2/xpdf-4.03.git && git submodule update --init --recursive \ | ||
&& cmake ./ && make | ||
|
||
FROM python:3.9-slim | ||
FROM python:3.11-slim | ||
|
||
LABEL maintainer="Donald Gray <[email protected]>" | ||
LABEL org.opencontainers.image.source=https://github.com/dlcs/pdf-to-alto | ||
LABEL org.opencontainers.image.description="Extract ALTO from PDF" | ||
|
||
COPY --from=build /home/pdfalto/pdfalto/pdfalto /usr/bin/pdfalto | ||
COPY /deps/pdfalto /usr/bin/pdfalto | ||
|
||
COPY requirements.txt /opt/app/requirements.txt | ||
|
||
WORKDIR /opt/app | ||
RUN pip install --no-cache-dir -r requirements.txt | ||
COPY . /opt/app | ||
|
||
COPY app /opt/app/app | ||
COPY monitor.py /opt/app/monitor.py | ||
COPY wait-for-localstack.sh /opt/app/wait-for-localstack.sh | ||
|
||
RUN chmod +x wait-for-localstack.sh | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
FROM localstack/localstack | ||
COPY seed-resources.sh /docker-entrypoint-initaws.d/ | ||
FROM localstack/localstack:2.2.0 | ||
COPY seed-resources.sh /etc/localstack/init/ready.d/ |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Dependencies | ||
|
||
pdfalto is a required binary from https://github.com/kermitt2/pdfalto.git. Built using: | ||
|
||
```dockerfile | ||
FROM debian:bullseye as build | ||
|
||
# avoid issue with packages requiring interaction (e.g. tzdata) | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
RUN apt-get update && apt-get install -y wget cmake clang git autoconf pkg-config | ||
|
||
# Change submodule to https as we're cloning only. Avoids issues with ssh | ||
# 8bb209c0c21476ee904a is 0.4 with some bugfixes | ||
RUN mkdir /home/pdfalto && cd /home/pdfalto \ | ||
&& git clone https://github.com/kermitt2/pdfalto.git && cd pdfalto && git checkout 8bb209c0c21476ee904a && ./install_deps.sh \ | ||
&& git submodule set-url xpdf-4.03 https://github.com/kermitt2/xpdf-4.03.git && git submodule update --init --recursive \ | ||
&& cmake ./ && make | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters