From 7a6b187e0140e16258f3dcaf5f56f102886762e0 Mon Sep 17 00:00:00 2001 From: Quinten Steenhuis Date: Sun, 27 Oct 2024 23:10:59 -0400 Subject: [PATCH] MVP fix #788 - basic background processing of assembly with examples --- docassemble/AssemblyLine/al_document.py | 114 ++++++++++++------ .../data/questions/al_document.yml | 46 ++++++- .../test_aldocument_background_assembly.yml | 51 ++++++++ 3 files changed, 172 insertions(+), 39 deletions(-) create mode 100644 docassemble/AssemblyLine/data/questions/test_aldocument_background_assembly.yml diff --git a/docassemble/AssemblyLine/al_document.py b/docassemble/AssemblyLine/al_document.py index b4385bac..4c23ae8c 100644 --- a/docassemble/AssemblyLine/al_document.py +++ b/docassemble/AssemblyLine/al_document.py @@ -35,6 +35,7 @@ import subprocess from collections import ChainMap import pikepdf +from typing import Tuple __all__ = [ "ALAddendumField", @@ -1823,17 +1824,17 @@ def as_editable_list( return editable def get_cacheable_documents( - self, - key: str = "final", - pdf: bool = True, - docx: bool=False, - original: bool = False, - refresh: bool = True, - pdfa: bool = False, - include_zip: bool = True, - include_pdf: bool = False, - append_matching_suffix: bool = True, - ) -> Tuple[List[Dict[str, DAFile]], Optional[DAFile], Optional[DAFile]]: + self, + key: str = "final", + pdf: bool = True, + docx: bool = False, + original: bool = False, + refresh: bool = True, + pdfa: bool = False, + include_zip: bool = True, + include_full_pdf: bool = False, + append_matching_suffix: bool = True, + ) -> Tuple[List[Dict[str, DAFile]], Optional[DAFile], Optional[DAFile]]: """ Generates a cache of all enabled documents in the bundle, and returns it in a structure that can be cached and returned for use in a background process. @@ -1854,29 +1855,38 @@ def get_cacheable_documents( refresh (bool): Flag to reconsider the 'enabled' attribute, default is True. pdfa (bool): Flag to return documents in PDF/A format, default is False. include_zip (bool): Flag to include a zip option, default is True. - include_pdf (bool): Flag to include a PDF version of the whole bundle, default is False. + include_full_pdf (bool): Flag to include a PDF version of the whole bundle, default is False. append_matching_suffix (bool): Flag to determine if matching suffix should be appended to file name, default is True. """ # reduce idempotency delays enabled_docs = self.enabled_documents(refresh=refresh) for doc in enabled_docs: doc.title - + results = [] - + for doc in enabled_docs: result = {"title": doc.title} filename_root = os.path.splitext(str(doc.filename))[0] if pdf: - result["pdf"] = doc.as_pdf(key=key, refresh=refresh, pdfa=pdfa, append_matching_suffix=append_matching_suffix) + result["pdf"] = doc.as_pdf( + key=key, + refresh=refresh, + pdfa=pdfa, + append_matching_suffix=append_matching_suffix, + ) result["download_filename"] = filename_root + ".pdf" if docx and doc._is_docx(key=key): - result["docx"] = doc.as_docx(key=key, refresh=refresh, append_matching_suffix=append_matching_suffix) + result["docx"] = doc.as_docx( + key=key, + refresh=refresh, + append_matching_suffix=append_matching_suffix, + ) result["download_filename"] = filename_root + ".docx" if original: result["original"] = doc[key] result["download_filename"] = doc.filename - + try: # If it's possible, set the file extension to the actual filetype # This is mostly necessary if people omit the file extension in attachment block @@ -1896,20 +1906,21 @@ def get_cacheable_documents( except: pass results.append(result) - + if len(enabled_docs) > 1 and include_zip: - bundled_zip = self.as_zip(key=key, format="original" if original else "docx" if docx else "pdf") + bundled_zip = self.as_zip( + key=key, format="original" if original else "docx" if docx else "pdf" + ) else: bundled_zip = None - - if len(enabled_docs) > 1 and include_pdf: + + if len(enabled_docs) > 1 and include_full_pdf: bundled_pdf = self.as_pdf(key=key, pdfa=pdfa) else: bundled_pdf = None return results, bundled_zip, bundled_pdf - def download_list_html( self, key: str = "final", @@ -1927,6 +1938,8 @@ def download_list_html( append_matching_suffix: bool = True, include_email: bool = False, use_previously_cached_files: bool = False, + include_full_pdf: bool = False, + full_pdf_label: Optional[str] = None, ) -> str: """ Constructs an HTML table displaying a list of documents with 'view' and 'download' buttons. @@ -1946,6 +1959,9 @@ def download_list_html( zip_icon (str): Icon for the zip option, default is "file-archive". append_matching_suffix (bool): Flag to determine if matching suffix should be appended to file name, default is True. include_email (bool): Flag to include an email option, default is False. + use_previously_cached_files (bool): Flag to use previously cached files (e.g., made in background) if defined. default is False. + include_full_pdf (bool): Flag to include a full PDF option, default is False. + full_pdf_label (Optional[str]): Label for the full PDF option. If not provided, uses the generic template for `self.full_pdf_label` ("Download all"). Returns: str: HTML representation of a table with documents and their associated actions. @@ -1953,33 +1969,36 @@ def download_list_html( if not hasattr(self, "_cached_zip_label"): self._cached_zip_label = str(self.zip_label) + if not hasattr(self, "_cached_full_pdf_label"): + self._cached_full_pdf_label = str(self.full_pdf_label) + if use_previously_cached_files and hasattr(self, "_downloadable_files"): downloadable_files, bundled_zip, bundled_pdf = self._downloadable_files else: downloadable_files, bundled_zip, bundled_pdf = self.get_cacheable_documents( key=key, - pdf=format == "pdf", + pdf=(format == "pdf" or view == True), docx=format == "docx", original=format == "original", refresh=refresh, pdfa=pdfa, include_zip=include_zip, - include_pdf=view, + include_full_pdf=include_full_pdf, append_matching_suffix=append_matching_suffix, ) html = f'
' for result in downloadable_files: - title = result['title'] - download_filename = result.get('download_filename', 'document') - - if format == 'original' and 'original' in result: - download_doc = result['original'] - elif format == 'docx' and 'docx' in result: - download_doc = result['docx'] - elif 'pdf' in result: - download_doc = result['pdf'] + title = result["title"] + download_filename = result.get("download_filename", "document") + + if format == "original" and "original" in result: + download_doc = result["original"] + elif format == "docx" and "docx" in result: + download_doc = result["docx"] + elif "pdf" in result: + download_doc = result["pdf"] else: continue # Skip if the desired format is not available @@ -1996,11 +2015,12 @@ def download_list_html( ) # Construct the view button if needed - if view and 'pdf' in result and result['pdf'].url_for().endswith(".pdf"): + if view and "pdf" in result and result["pdf"].url_for().endswith(".pdf"): + # Use .pdf as the filename extension + view_filename = os.path.splitext(download_filename)[0] + ".pdf" doc_view_button = action_button_html( - result['pdf'].url_for( - attachment=False, - display_filename=download_filename + result["pdf"].url_for( + attachment=False, display_filename=view_filename ), label=view_label, icon=view_icon, @@ -2020,7 +2040,9 @@ def download_list_html( zip_label = self._cached_zip_label filename_root = os.path.splitext(str(self.filename))[0] zip_button = action_button_html( - bundled_zip.url_for(attachment=False, display_filename=filename_root + ".zip"), + bundled_zip.url_for( + attachment=False, display_filename=filename_root + ".zip" + ), label=zip_label, icon=zip_icon, color="primary", @@ -2029,6 +2051,22 @@ def download_list_html( ) html += table_row(zip_label, zip_button) + if include_full_pdf and bundled_pdf: + if not full_pdf_label: + full_pdf_label = self._cached_full_pdf_label + filename_root = os.path.splitext(str(self.filename))[0] + full_pdf_button = action_button_html( + bundled_pdf.url_for( + attachment=False, display_filename=filename_root + ".pdf" + ), + label=full_pdf_label, + icon="file-pdf", + color="primary", + size="md", + classname="al_full_pdf al_button", + ) + html += table_row(full_pdf_label, full_pdf_button) + if include_email: html += self.send_email_table_row(key=key) diff --git a/docassemble/AssemblyLine/data/questions/al_document.yml b/docassemble/AssemblyLine/data/questions/al_document.yml index a6dae390..45f8cc25 100644 --- a/docassemble/AssemblyLine/data/questions/al_document.yml +++ b/docassemble/AssemblyLine/data/questions/al_document.yml @@ -88,9 +88,53 @@ template: x.zip_label content: | Download all --- +generic object: ALDocumentBundle +template: x.full_pdf_label +content: | + Download as one PDF +--- id: al exhibit ocr pages bg event: al_exhibit_ocr_pages code: | to_pdf = action_argument('to_pdf') from_file = action_argument('from_file') - background_response(ocrmypdf_task(from_file, to_pdf)) \ No newline at end of file + background_response(ocrmypdf_task(from_file, to_pdf)) +--- +generic object: ALDocumentBundle +code: | + x.generate_downloads_task = background_action(x.attr_name('create_downloads')) +--- +generic object: ALDocumentBundle +event: x.create_downloads +code: | + download_response = x.get_cacheable_documents(key="final", pdf=True, include_full_pdf=True) + background_response_action(x.attr_name('save_downloads'), download_response=download_response) +--- +generic object: ALDocumentBundle +code: | + x.generate_downloads_with_docx_task = background_action(x.attr_name('create_downloads_with_docx')) +--- +generic object: ALDocumentBundle +event: x.create_downloads_with_docx +code: | + download_response = x.get_cacheable_documents(key="final", pdf=True, docx=True, include_full_pdf=True) + background_response_action(x.attr_name('save_downloads'), download_response=download_response) +--- +generic object: ALDocumentBundle +event: x.save_downloads +code: | + x._downloadable_files = action_argument('download_response') + + background_response() +--- +id: waiting screen +question: | + Please wait while we make your documents +subquestion: | + This can take a few minutes. + +
+ Making documents... +
+event: al_download_waiting_screen +reload: True \ No newline at end of file diff --git a/docassemble/AssemblyLine/data/questions/test_aldocument_background_assembly.yml b/docassemble/AssemblyLine/data/questions/test_aldocument_background_assembly.yml new file mode 100644 index 00000000..cd5898ea --- /dev/null +++ b/docassemble/AssemblyLine/data/questions/test_aldocument_background_assembly.yml @@ -0,0 +1,51 @@ +--- +include: + - assembly_line.yml +--- +metadata: + title: ALDocument Background Processing +--- +mandatory: True +code: | + intro_screen + # if not al_user_bundle.generate_downloads_task.ready(): # Without DOCX + if not al_user_bundle.generate_downloads_with_docx_task.ready(): # With DOCX + al_download_waiting_screen + download_screen +--- +continue button field: intro_screen +question: | + Intro screen +--- +id: download +event: download_screen +question: | + Download the documents +subquestion: | + Your documents are ready for download. + + ${ al_user_bundle.download_list_html(use_previously_cached_files=True, include_full_pdf=True) } +--- +objects: + - al_user_bundle: ALDocumentBundle.using( + elements = [pdf_1, docx_1], + title = "Background processed bundle", + enabled = True, + filename = "background_processed_bundle", + ) +--- +objects: + - pdf_1: ALDocument.using( title="PDF 1 with a lot of text for the title of this document", filename="pdf_doc_1", enabled=True, has_addendum=False ) + - docx_1: ALDocument.using( title="Docx 1 also with a lot of text for the title of this document", filename="docx_doc_1", enabled=True, has_addendum=False ) +--- +attachment: + variable name: pdf_1[i] + pdf template file: test_aldocument_pdf_1.pdf + filename: pdf_1 + fields: + - "sample_field": "Sample input" +--- +attachment: + variable name: docx_1[i] + docx template file: test_aldocument_docx_1.docx + filename: docx_1