Skip to content

Commit

Permalink
Merge pull request #890 from SuffolkLITLab/background-assembly
Browse files Browse the repository at this point in the history
Basic background assembly implementation
  • Loading branch information
nonprofittechy authored Oct 30, 2024
2 parents 91faf27 + 55a886d commit 11772f1
Show file tree
Hide file tree
Showing 4 changed files with 311 additions and 56 deletions.
224 changes: 169 additions & 55 deletions docassemble/AssemblyLine/al_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import subprocess
from collections import ChainMap
import pikepdf
from typing import Tuple

__all__ = [
"ALAddendumField",
Expand Down Expand Up @@ -1822,6 +1823,107 @@ def as_editable_list(
editable.append(doc)
return editable

def get_cacheable_documents(
self,
key: str = "final",
pdf: bool = True,
docx: bool = False,
original: bool = False,
refresh: bool = True,
pdfa: bool = False,
include_zip: bool = True,
include_full_pdf: bool = False,
append_matching_suffix: bool = True,
) -> Tuple[List[Dict[str, DAFile]], Optional[DAFile], Optional[DAFile]]:
"""
Generates a cache of all enabled documents in the bundle, and returns it in a structure that can be cached
and returned for use in a background process.
The result looks like this:
[
{"title": "Document 1", "pdf": DAFile, "docx": DAFile, download_filename: str},
],
DAFile, # Zip of whole bundle
DAFile # PDF of whole bundle
Args:
key (str): Identifier for the document version, default is "final".
pdf (bool): Flag to include a PDF version of each file, default is True.
docx (bool): Flag to include a DOCX version of each file, default is False.
original (bool): Flag to include the original version of each file, default is False.
refresh (bool): Flag to reconsider the 'enabled' attribute, default is True.
pdfa (bool): Flag to return documents in PDF/A format, default is False.
include_zip (bool): Flag to include a zip option, default is True.
include_full_pdf (bool): Flag to include a PDF version of the whole bundle, default is False.
append_matching_suffix (bool): Flag to determine if matching suffix should be appended to file name, default is True.
Returns:
Tuple[List[Dict[str, DAFile]], Optional[DAFile], Optional[DAFile]]: A list of dictionaries containing the enabled documents, a zip file of the whole bundle, and a PDF of the whole
"""
# reduce idempotency delays
enabled_docs = self.enabled_documents(refresh=refresh)
for doc in enabled_docs:
doc.title

results = []

for doc in enabled_docs:
result = {"title": doc.title}
filename_root = os.path.splitext(str(doc.filename))[0]
if pdf:
result["pdf"] = doc.as_pdf(
key=key,
refresh=refresh,
pdfa=pdfa,
append_matching_suffix=append_matching_suffix,
)
result["download_filename"] = filename_root + ".pdf"
if docx and doc._is_docx(key=key):
result["docx"] = doc.as_docx(
key=key,
refresh=refresh,
append_matching_suffix=append_matching_suffix,
)
result["download_filename"] = filename_root + ".docx"
if original:
result["original"] = doc[key]
result["download_filename"] = doc.filename

try:
# If it's possible, set the file extension to the actual filetype
# This is mostly necessary if people omit the file extension in attachment block
# for filetype="original"
if original:
download_doc = result["original"]
else:
download_doc = result["docx"]
ext = next(
iter(
mimetypes.guess_all_extensions(
download_doc.mimetype, strict=True
)
)
)
result["download_filename"] = filename_root + ext
except:
pass
results.append(result)

if len(enabled_docs) > 1 and include_zip:
bundled_zip = self.as_zip(
key=key, format="original" if original else "docx" if docx else "pdf"
)
else:
bundled_zip = None

if len(enabled_docs) > 1 and include_full_pdf:
bundled_pdf = self.as_pdf(key=key, pdfa=pdfa)
else:
bundled_pdf = None

return results, bundled_zip, bundled_pdf

def download_list_html(
self,
key: str = "final",
Expand All @@ -1838,6 +1940,9 @@ def download_list_html(
zip_icon: str = "file-archive",
append_matching_suffix: bool = True,
include_email: bool = False,
use_previously_cached_files: bool = False,
include_full_pdf: bool = False,
full_pdf_label: Optional[str] = None,
) -> str:
"""
Constructs an HTML table displaying a list of documents with 'view' and 'download' buttons.
Expand All @@ -1857,59 +1962,50 @@ def download_list_html(
zip_icon (str): Icon for the zip option, default is "file-archive".
append_matching_suffix (bool): Flag to determine if matching suffix should be appended to file name, default is True.
include_email (bool): Flag to include an email option, default is False.
use_previously_cached_files (bool): Flag to use previously cached files (e.g., made in background) if defined. default is False.
include_full_pdf (bool): Flag to include a full PDF option, default is False.
full_pdf_label (Optional[str]): Label for the full PDF option. If not provided, uses the generic template for `self.full_pdf_label` ("Download all").
Returns:
str: HTML representation of a table with documents and their associated actions.
"""
if not hasattr(self, "_cached_zip_label"):
self._cached_zip_label = str(self.zip_label)

# Trigger some variables up top to avoid idempotency issues
enabled_docs = self.enabled_documents(refresh=refresh)
for doc in enabled_docs:
doc.title
if format == "pdf":
doc.as_pdf(
key=key,
refresh=refresh,
pdfa=pdfa,
append_matching_suffix=append_matching_suffix,
) # Generate cached file for this session
if not hasattr(self, "_cached_full_pdf_label"):
self._cached_full_pdf_label = str(self.full_pdf_label)

if use_previously_cached_files and hasattr(self, "_downloadable_files"):
downloadable_files, bundled_zip, bundled_pdf = self._downloadable_files
else:
downloadable_files, bundled_zip, bundled_pdf = self.get_cacheable_documents(
key=key,
pdf=(format == "pdf" or view == True),
docx=format == "docx",
original=format == "original",
refresh=refresh,
pdfa=pdfa,
include_zip=include_zip,
include_full_pdf=include_full_pdf,
append_matching_suffix=append_matching_suffix,
)

html = f'<div class="container al_table al_doc_table" id="{ html_safe_str(self.instanceName) }">'

for doc in enabled_docs:
filename_root = os.path.splitext(str(doc.filename))[0]
# Do our best to use the provided filename + the extension from requested filetype
if format == "original":
download_doc = doc[key]
download_filename = doc.filename
if format == "docx" and doc._is_docx(key=key):
download_doc = doc.as_docx(
key=key, append_matching_suffix=append_matching_suffix
)
download_filename = filename_root + ".docx"
else:
download_doc = doc.as_pdf(
key=key, append_matching_suffix=append_matching_suffix
)
download_filename = filename_root + ".pdf"
for result in downloadable_files:
title = result["title"]
download_filename = result.get("download_filename", "document")

try:
# If it's possible, set the file extension to the actual filetype
# This is mostly necessary if people omit the file extension in attachment block
# for filetype="original"
ext = next(
iter(
mimetypes.guess_all_extensions(
download_doc.mimetype, strict=True
)
)
)
download_filename = filename_root + ext
except:
pass
if format == "original" and "original" in result:
download_doc = result["original"]
elif format == "docx" and "docx" in result:
download_doc = result["docx"]
elif "pdf" in result:
download_doc = result["pdf"]
else:
continue # Skip if the desired format is not available

# Construct the download button
doc_download_button = action_button_html(
download_doc.url_for(
attachment=True, display_filename=download_filename
Expand All @@ -1920,12 +2016,14 @@ def download_list_html(
size="md",
classname="al_download al_button",
)
if view and doc.as_pdf().url_for().endswith(".pdf"):

# Construct the view button if needed
if view and "pdf" in result and result["pdf"].url_for().endswith(".pdf"):
# Use .pdf as the filename extension
view_filename = os.path.splitext(download_filename)[0] + ".pdf"
doc_view_button = action_button_html(
doc.as_pdf(key=key).url_for(
attachment=False,
display_filename=filename_root + ".pdf",
append_matching_suffix=append_matching_suffix,
result["pdf"].url_for(
attachment=False, display_filename=view_filename
),
label=view_label,
icon=view_icon,
Expand All @@ -1936,31 +2034,47 @@ def download_list_html(
buttons = [doc_view_button, doc_download_button]
else:
buttons = [doc_download_button]
html += table_row(doc.title, buttons)

# Add a zip file row if there's more than one doc
filename_root = os.path.splitext(str(self.filename))[0]
if len(enabled_docs) > 1 and include_zip:
html += table_row(title, buttons)

# Add a zip file row if included
if include_zip and bundled_zip:
if not zip_label:
zip_label = self._cached_zip_label
# Zip file will match the format of the download table
zip = self.as_zip(key=key, format=format, include_pdf=view)
filename_root = os.path.splitext(str(self.filename))[0]
zip_button = action_button_html(
zip.url_for(attachment=False, display_filename=filename_root + ".zip"),
bundled_zip.url_for(
attachment=False, display_filename=filename_root + ".zip"
),
label=zip_label,
icon=zip_icon,
color="primary",
size="md",
classname="al_zip al_button",
)
html += table_row(zip.title, zip_button)
html += table_row(zip_label, zip_button)

if include_full_pdf and bundled_pdf:
if not full_pdf_label:
full_pdf_label = self._cached_full_pdf_label
filename_root = os.path.splitext(str(self.filename))[0]
full_pdf_button = action_button_html(
bundled_pdf.url_for(
attachment=False, display_filename=filename_root + ".pdf"
),
label=full_pdf_label,
icon="file-pdf",
color="primary",
size="md",
classname="al_full_pdf al_button",
)
html += table_row(full_pdf_label, full_pdf_button)

if include_email:
html += self.send_email_table_row(key=key)

html += "\n</div>"

# Discuss: Do we want a table with the ability to have a merged pdf row?
return html

def download_html(
Expand Down
78 changes: 77 additions & 1 deletion docassemble/AssemblyLine/data/questions/al_document.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,85 @@ template: x.zip_label
content: |
Download all
---
generic object: ALDocumentBundle
template: x.full_pdf_label
content: |
Download as one PDF
---
id: al exhibit ocr pages bg
event: al_exhibit_ocr_pages
code: |
to_pdf = action_argument('to_pdf')
from_file = action_argument('from_file')
background_response(ocrmypdf_task(from_file, to_pdf))
background_response(ocrmypdf_task(from_file, to_pdf))
---
############## Background document assembly #################
---
generic object: ALDocumentBundle
code: |
x.generate_downloads_task = background_action(x.attr_name('create_downloads'))
---
generic object: ALDocumentBundle
event: x.create_downloads
code: |
download_response = x.get_cacheable_documents(key="final", pdf=True, include_full_pdf=True)
background_response_action(x.attr_name('save_downloads'), download_response=download_response)
---
generic object: ALDocumentBundle
code: |
x.generate_downloads_with_docx_task = background_action(x.attr_name('create_downloads_with_docx'))
---
generic object: ALDocumentBundle
event: x.create_downloads_with_docx
code: |
download_response = x.get_cacheable_documents(key="final", pdf=True, docx=True, include_full_pdf=True)
background_response_action(x.attr_name('save_downloads'), download_response=download_response)
---
generic object: ALDocumentBundle
event: x.save_downloads
code: |
x._downloadable_files = action_argument('download_response')
background_response()
---
id: waiting screen
question: |
Please wait while we make your documents
subquestion: |
This can take a few minutes.
<div class="spinner-border text-primary d-flex justify-content-center" role="status">
<span class="sr-only">Making documents...</span>
</div>
event: al_download_waiting_screen
reload: True
---
########## Background preview assembly ################
---
generic object: ALDocumentBundle
code: |
x.generate_preview_task = background_action(x.attr_name('generate_preview_event'))
---
generic object: ALDocumentBundle
event: x.generate_preview_event
code: |
preview_response = x.as_pdf(key="preview")
background_response_action(x.attr_name('save_preview'), preview_response=preview_response)
---
generic object: ALDocumentBundle
event: x.save_preview
code: |
x._preview_file = action_argument('preview_response')
background_response()
---
id: preview waiting screen
question: |
Please wait while we generate a preview of your document
subquestion: |
This can take a few minutes.
<div class="spinner-border text-primary d-flex justify-content-center" role="status">
<span class="sr-only">Generating preview...</span>
</div>
event: al_preview_waiting_screen
reload: True
Loading

0 comments on commit 11772f1

Please sign in to comment.