diff --git a/Dockerfile b/Dockerfile index 325d61452..1993d9913 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,6 +46,11 @@ RUN wget https://github.com/ImageMagick/ImageMagick/archive/refs/tags/7.1.0-57.t && rm -rf ImageMagick* \ && rm -rf /var/cache/apk/* +# Install "best" training data for Tesseract +RUN echo "📚 Installing Tesseract Best (training data)!" && \ + cd /usr/share/tessdata/ && \ + wget https://github.com/tesseract-ocr/tessdata_best/blob/main/eng.traineddata?raw=true -O eng_best.traineddata + ARG VIPS_VERSION=8.11.3 RUN set -x -o pipefail \ diff --git a/Gemfile.lock b/Gemfile.lock index 5f296efdf..30ac12d91 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -219,7 +219,7 @@ GIT GIT remote: https://github.com/scientist-softserv/iiif_print.git - revision: 122e86634f17c8f40d9ae4d59d2d7deca00f1694 + revision: cee97520f95e90a9e75d4dbcf159db337676e3c6 branch: main specs: iiif_print (1.0.0) diff --git a/app/services/iiif_print/tenant_config.rb b/app/services/iiif_print/tenant_config.rb index 14ef3642b..4ec49e382 100644 --- a/app/services/iiif_print/tenant_config.rb +++ b/app/services/iiif_print/tenant_config.rb @@ -100,6 +100,11 @@ module PdfSplitter mattr_accessor :iiif_print_splitter self.iiif_print_splitter = ::IiifPrint::SplitPdfs::PagesToJpgsSplitter + ## + def self.never_split_pdfs? + !TenantConfig.use_iiif_print? + end + ## # @api public def self.call(*args) @@ -147,6 +152,12 @@ def service # In Hyrax::WorkShowPresenter we're only looking at the underlying file_sets. But IiifPrint # needs to look at multiple places. module WorkShowPresenterDecorator + ## + # @return [Boolean] Identifies whether IiifPrint PDF splitting is active for this work's tenant + def split_pdfs? + TenantConfig.use_iiif_print? + end + ## # @return [Array] predicate methods (e.g. ending in "?") that reflect the types # of files we want to consider for showing in the IIIF Viewer. diff --git a/config/initializers/iiif_print.rb b/config/initializers/iiif_print.rb index e83325c64..15c567a70 100644 --- a/config/initializers/iiif_print.rb +++ b/config/initializers/iiif_print.rb @@ -22,4 +22,5 @@ # config.sort_iiif_manifest_canvases_by = :date_published config.default_iiif_manifest_version = 3 config.persistence_adapter = IiifPrint::PersistenceLayer::ValkyrieAdapter + config.additional_tesseract_options = "-l eng_best" end diff --git a/db/migrate/20240214005253_add_model_details_to_iiif_print_pending_relationships.iiif_print.rb b/db/migrate/20240214005253_add_model_details_to_iiif_print_pending_relationships.iiif_print.rb index 234377154..280e3e067 100644 --- a/db/migrate/20240214005253_add_model_details_to_iiif_print_pending_relationships.iiif_print.rb +++ b/db/migrate/20240214005253_add_model_details_to_iiif_print_pending_relationships.iiif_print.rb @@ -1,8 +1,8 @@ # This migration comes from iiif_print (originally 20231110163052) class AddModelDetailsToIiifPrintPendingRelationships < ActiveRecord::Migration[5.2] def change - add_column :iiif_print_pending_relationships, :parent_model, :string - add_column :iiif_print_pending_relationships, :child_model, :string - add_column :iiif_print_pending_relationships, :file_id, :string + add_column :iiif_print_pending_relationships, :parent_model, :string unless column_exists?(:iif_print_pending_relationships, :parent_model) + add_column :iiif_print_pending_relationships, :child_model, :string unless column_exists?(:iif_print_pending_relationships, :child_model) + add_column :iiif_print_pending_relationships, :file_id, :string unless column_exists?(:iif_print_pending_relationships, :file_id) end end