Skip to content

Commit

Permalink
MVP fix #788 - basic background processing of assembly with examples
Browse files Browse the repository at this point in the history
  • Loading branch information
nonprofittechy committed Oct 28, 2024
1 parent 8e9aa14 commit 7a6b187
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 39 deletions.
114 changes: 76 additions & 38 deletions docassemble/AssemblyLine/al_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import subprocess
from collections import ChainMap
import pikepdf
from typing import Tuple

__all__ = [
"ALAddendumField",
Expand Down Expand Up @@ -1823,17 +1824,17 @@ def as_editable_list(
return editable

def get_cacheable_documents(
self,
key: str = "final",
pdf: bool = True,
docx: bool=False,
original: bool = False,
refresh: bool = True,
pdfa: bool = False,
include_zip: bool = True,
include_pdf: bool = False,
append_matching_suffix: bool = True,
) -> Tuple[List[Dict[str, DAFile]], Optional[DAFile], Optional[DAFile]]:
self,
key: str = "final",
pdf: bool = True,
docx: bool = False,
original: bool = False,
refresh: bool = True,
pdfa: bool = False,
include_zip: bool = True,
include_full_pdf: bool = False,
append_matching_suffix: bool = True,
) -> Tuple[List[Dict[str, DAFile]], Optional[DAFile], Optional[DAFile]]:
"""
Generates a cache of all enabled documents in the bundle, and returns it in a structure that can be cached
and returned for use in a background process.
Expand All @@ -1854,29 +1855,38 @@ def get_cacheable_documents(
refresh (bool): Flag to reconsider the 'enabled' attribute, default is True.
pdfa (bool): Flag to return documents in PDF/A format, default is False.
include_zip (bool): Flag to include a zip option, default is True.
include_pdf (bool): Flag to include a PDF version of the whole bundle, default is False.
include_full_pdf (bool): Flag to include a PDF version of the whole bundle, default is False.
append_matching_suffix (bool): Flag to determine if matching suffix should be appended to file name, default is True.
"""
# reduce idempotency delays
enabled_docs = self.enabled_documents(refresh=refresh)
for doc in enabled_docs:
doc.title

results = []

for doc in enabled_docs:
result = {"title": doc.title}
filename_root = os.path.splitext(str(doc.filename))[0]
if pdf:
result["pdf"] = doc.as_pdf(key=key, refresh=refresh, pdfa=pdfa, append_matching_suffix=append_matching_suffix)
result["pdf"] = doc.as_pdf(
key=key,
refresh=refresh,
pdfa=pdfa,
append_matching_suffix=append_matching_suffix,
)
result["download_filename"] = filename_root + ".pdf"
if docx and doc._is_docx(key=key):
result["docx"] = doc.as_docx(key=key, refresh=refresh, append_matching_suffix=append_matching_suffix)
result["docx"] = doc.as_docx(
key=key,
refresh=refresh,
append_matching_suffix=append_matching_suffix,
)
result["download_filename"] = filename_root + ".docx"
if original:
result["original"] = doc[key]
result["download_filename"] = doc.filename

try:
# If it's possible, set the file extension to the actual filetype
# This is mostly necessary if people omit the file extension in attachment block
Expand All @@ -1896,20 +1906,21 @@ def get_cacheable_documents(
except:
pass
results.append(result)

if len(enabled_docs) > 1 and include_zip:
bundled_zip = self.as_zip(key=key, format="original" if original else "docx" if docx else "pdf")
bundled_zip = self.as_zip(
key=key, format="original" if original else "docx" if docx else "pdf"
)
else:
bundled_zip = None
if len(enabled_docs) > 1 and include_pdf:

if len(enabled_docs) > 1 and include_full_pdf:
bundled_pdf = self.as_pdf(key=key, pdfa=pdfa)
else:
bundled_pdf = None

return results, bundled_zip, bundled_pdf


def download_list_html(
self,
key: str = "final",
Expand All @@ -1927,6 +1938,8 @@ def download_list_html(
append_matching_suffix: bool = True,
include_email: bool = False,
use_previously_cached_files: bool = False,
include_full_pdf: bool = False,
full_pdf_label: Optional[str] = None,
) -> str:
"""
Constructs an HTML table displaying a list of documents with 'view' and 'download' buttons.
Expand All @@ -1946,40 +1959,46 @@ def download_list_html(
zip_icon (str): Icon for the zip option, default is "file-archive".
append_matching_suffix (bool): Flag to determine if matching suffix should be appended to file name, default is True.
include_email (bool): Flag to include an email option, default is False.
use_previously_cached_files (bool): Flag to use previously cached files (e.g., made in background) if defined. default is False.
include_full_pdf (bool): Flag to include a full PDF option, default is False.
full_pdf_label (Optional[str]): Label for the full PDF option. If not provided, uses the generic template for `self.full_pdf_label` ("Download all").
Returns:
str: HTML representation of a table with documents and their associated actions.
"""
if not hasattr(self, "_cached_zip_label"):
self._cached_zip_label = str(self.zip_label)

if not hasattr(self, "_cached_full_pdf_label"):
self._cached_full_pdf_label = str(self.full_pdf_label)

if use_previously_cached_files and hasattr(self, "_downloadable_files"):
downloadable_files, bundled_zip, bundled_pdf = self._downloadable_files
else:
downloadable_files, bundled_zip, bundled_pdf = self.get_cacheable_documents(
key=key,
pdf=format == "pdf",
pdf=(format == "pdf" or view == True),
docx=format == "docx",
original=format == "original",
refresh=refresh,
pdfa=pdfa,
include_zip=include_zip,
include_pdf=view,
include_full_pdf=include_full_pdf,
append_matching_suffix=append_matching_suffix,
)

html = f'<div class="container al_table al_doc_table" id="{ html_safe_str(self.instanceName) }">'

for result in downloadable_files:
title = result['title']
download_filename = result.get('download_filename', 'document')

if format == 'original' and 'original' in result:
download_doc = result['original']
elif format == 'docx' and 'docx' in result:
download_doc = result['docx']
elif 'pdf' in result:
download_doc = result['pdf']
title = result["title"]
download_filename = result.get("download_filename", "document")

if format == "original" and "original" in result:
download_doc = result["original"]
elif format == "docx" and "docx" in result:
download_doc = result["docx"]
elif "pdf" in result:
download_doc = result["pdf"]
else:
continue # Skip if the desired format is not available

Expand All @@ -1996,11 +2015,12 @@ def download_list_html(
)

# Construct the view button if needed
if view and 'pdf' in result and result['pdf'].url_for().endswith(".pdf"):
if view and "pdf" in result and result["pdf"].url_for().endswith(".pdf"):
# Use .pdf as the filename extension
view_filename = os.path.splitext(download_filename)[0] + ".pdf"
doc_view_button = action_button_html(
result['pdf'].url_for(
attachment=False,
display_filename=download_filename
result["pdf"].url_for(
attachment=False, display_filename=view_filename
),
label=view_label,
icon=view_icon,
Expand All @@ -2020,7 +2040,9 @@ def download_list_html(
zip_label = self._cached_zip_label
filename_root = os.path.splitext(str(self.filename))[0]
zip_button = action_button_html(
bundled_zip.url_for(attachment=False, display_filename=filename_root + ".zip"),
bundled_zip.url_for(
attachment=False, display_filename=filename_root + ".zip"
),
label=zip_label,
icon=zip_icon,
color="primary",
Expand All @@ -2029,6 +2051,22 @@ def download_list_html(
)
html += table_row(zip_label, zip_button)

if include_full_pdf and bundled_pdf:
if not full_pdf_label:
full_pdf_label = self._cached_full_pdf_label
filename_root = os.path.splitext(str(self.filename))[0]
full_pdf_button = action_button_html(
bundled_pdf.url_for(
attachment=False, display_filename=filename_root + ".pdf"
),
label=full_pdf_label,
icon="file-pdf",
color="primary",
size="md",
classname="al_full_pdf al_button",
)
html += table_row(full_pdf_label, full_pdf_button)

if include_email:
html += self.send_email_table_row(key=key)

Expand Down
46 changes: 45 additions & 1 deletion docassemble/AssemblyLine/data/questions/al_document.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,53 @@ template: x.zip_label
content: |
Download all
---
generic object: ALDocumentBundle
template: x.full_pdf_label
content: |
Download as one PDF
---
id: al exhibit ocr pages bg
event: al_exhibit_ocr_pages
code: |
to_pdf = action_argument('to_pdf')
from_file = action_argument('from_file')
background_response(ocrmypdf_task(from_file, to_pdf))
background_response(ocrmypdf_task(from_file, to_pdf))
---
generic object: ALDocumentBundle
code: |
x.generate_downloads_task = background_action(x.attr_name('create_downloads'))
---
generic object: ALDocumentBundle
event: x.create_downloads
code: |
download_response = x.get_cacheable_documents(key="final", pdf=True, include_full_pdf=True)
background_response_action(x.attr_name('save_downloads'), download_response=download_response)
---
generic object: ALDocumentBundle
code: |
x.generate_downloads_with_docx_task = background_action(x.attr_name('create_downloads_with_docx'))
---
generic object: ALDocumentBundle
event: x.create_downloads_with_docx
code: |
download_response = x.get_cacheable_documents(key="final", pdf=True, docx=True, include_full_pdf=True)
background_response_action(x.attr_name('save_downloads'), download_response=download_response)
---
generic object: ALDocumentBundle
event: x.save_downloads
code: |
x._downloadable_files = action_argument('download_response')
background_response()
---
id: waiting screen
question: |
Please wait while we make your documents
subquestion: |
This can take a few minutes.
<div class="spinner-border text-primary d-flex justify-content-center" role="status">
<span class="sr-only">Making documents...</span>
</div>
event: al_download_waiting_screen
reload: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
---
include:
- assembly_line.yml
---
metadata:
title: ALDocument Background Processing
---
mandatory: True
code: |
intro_screen
# if not al_user_bundle.generate_downloads_task.ready(): # Without DOCX
if not al_user_bundle.generate_downloads_with_docx_task.ready(): # With DOCX
al_download_waiting_screen
download_screen
---
continue button field: intro_screen
question: |
Intro screen
---
id: download
event: download_screen
question: |
Download the documents
subquestion: |
Your documents are ready for download.
${ al_user_bundle.download_list_html(use_previously_cached_files=True, include_full_pdf=True) }
---
objects:
- al_user_bundle: ALDocumentBundle.using(
elements = [pdf_1, docx_1],
title = "Background processed bundle",
enabled = True,
filename = "background_processed_bundle",
)
---
objects:
- pdf_1: ALDocument.using( title="PDF 1 with a lot of text for the title of this document", filename="pdf_doc_1", enabled=True, has_addendum=False )
- docx_1: ALDocument.using( title="Docx 1 also with a lot of text for the title of this document", filename="docx_doc_1", enabled=True, has_addendum=False )
---
attachment:
variable name: pdf_1[i]
pdf template file: test_aldocument_pdf_1.pdf
filename: pdf_1
fields:
- "sample_field": "Sample input"
---
attachment:
variable name: docx_1[i]
docx template file: test_aldocument_docx_1.docx
filename: docx_1

0 comments on commit 7a6b187

Please sign in to comment.