Merge branch 'release/1.2.1'

CentreForDigitalHumanities · Oct 14, 2019 · e9b768b · e9b768b
2 parents 9cd3194 + 5dea44f
commit e9b768b
Show file tree

Hide file tree

Showing 7 changed files with 54 additions and 23 deletions.
diff --git a/backend/addcorpus/corpus.py b/backend/addcorpus/corpus.py
@@ -427,6 +427,11 @@ def metadata_from_xml(self, filename, tags):
                 if 'attribute' in tag:
                     right_tag = next((candidate for candidate in candidates if
                                       candidate.attrs == tag['attribute']), None)
+                elif 'list' in tag:
+                    if 'subtag' in tag:
+                        right_tag = [candidate.find(tag['subtag']) for candidate in candidates]
+                    else:
+                        right_tag = candidates
                 elif 'subtag' in tag:
                     right_tag = next((candidate.find(tag['subtag']) for candidate in candidates if 
                                       candidate.find(tag['subtag'])), None)
@@ -436,9 +441,13 @@ def metadata_from_xml(self, filename, tags):
                 if not right_tag:
                     continue
                 if 'save_as' in tag:
-                    out_dict[tag['save_as']] = right_tag.text
+                    out_tag = tag['save_as']
                 else:
-                    out_dict[tag['tag']] = right_tag.text
+                    out_tag = tag['tag']
+                if 'list' in tag:
+                    out_dict[out_tag] = [t.text for t in right_tag]
+                else:
+                    out_dict[out_tag] = right_tag.text
         return out_dict
 
 

diff --git a/backend/corpora/ecco/ecco.py b/backend/corpora/ecco/ecco.py
@@ -73,9 +73,10 @@ def sources(self, start=min_date, end=max_date):
                         meta_tags = [
                             'collation',
                             {'tag': 'author', 'subtag': 'composed'},
+                            {'tag': 'holdings', 'subtag': 'libraryName', 'list': True},
                             'fullTitle',
                             'imprintFull',
-                            'libraryName',
+                            {'tag': 'sourceLibrary', 'subtag': 'libraryName'},
                             'ocr',
                             'pubDateStart',
                             'publicationPlaceComposed',
@@ -214,9 +215,15 @@ def fields(self):
             ),
             Field(
                 name='library',
-                display_name='Holding library',
-                description='The main holding library of the book',
-                extractor=Metadata('libraryName')
+                display_name='Source library',
+                description='The source library of the book',
+                extractor=Metadata('sourceLibrary')
+            ),
+            Field(
+                name='holdings',
+                display_name='Holding libraries',
+                description='Libraries holding a copy of the book',
+                extractor=Metadata('holdings')
             ),
             Field(
                 name='volume',

diff --git a/backend/corpora/ecco/ecco_image_convert.py b/backend/corpora/ecco/ecco_image_convert.py
@@ -1,6 +1,7 @@
 import os
-from os.path import split, join, splitext
+from os.path import exists, split, join, splitext
 import subprocess
+import glob
 
 def convert_tif_to_pdf(data_dir):
     lib_call = ['export', 'DYLD_LIBRARY_PATH="$MAGICK_HOME/lib/"']
@@ -20,12 +21,25 @@ def convert_tif_to_pdf(data_dir):
         book_id = split(directory)[1]
         pdf_name = join(directory, '{}.pdf'.format(book_id))
         print(pdf_name)
-        if splitext(filenames[0])[1]=='.tif':
-            magick_call = ['magick', '-quiet', '*.tif', pdf_name]
-        elif splitext(filenames[0])[1]=='.TIF':
-            magick_call = ['magick', '-quiet', '*.TIF', pdf_name]
-        else:
-            print(splitext(filenames[0]))
+        if exists(pdf_name):
+            print("exists, skipping")
             continue
         os.chdir(directory)
-        subprocess.check_call(magick_call)
+        for filename in filenames:
+            name, ext = splitext(filename)
+            if ext=='.tif' or ext=='.TIF': 
+                magick_call = ['convert', filename, '-quiet', name+'.pdf']
+                subprocess.check_call(magick_call)
+        with open("files.txt", "w") as f:
+            f.write(' '.join(sorted(glob.glob("*.pdf"))))
+        # combine all pdfs in one file
+        ghostscript_call = ['gs', '-sDEVICE=pdfwrite', '-dBATCH', '-dNOPAUSE',
+                '-sOutputFile='+pdf_name, '@files.txt']
+        subprocess.check_call(ghostscript_call)
+        # remove temporary files
+        remove_call = ['find', '.', '-type', 'f', '-name', book_id+'0*.pdf', '-delete']
+        subprocess.check_call(remove_call)
+        remove_call = ['rm', 'files.txt']
+        subprocess.check_call(remove_call)
+
+
diff --git a/backend/es/es_update.py b/backend/es/es_update.py
@@ -28,7 +28,8 @@ def update_index(corpus, corpus_definition, query_model):
     hits = len(results['hits']['hits'])
     total_hits = results['hits']['total']
     for doc in results['hits']['hits']:
-        update_document(client, corpus, corpus_definition, doc_type, doc)
+        update_body = corpus_definition.update_body(doc)
+        update_document(corpus, doc_type, doc, update_body, client)
     while hits<total_hits:       
         scroll_id = results['_scroll_id']
         for doc in results['hits']['hits']:
@@ -43,7 +44,7 @@ def update_index(corpus, corpus_definition, query_model):
 def update_document(corpus, doc_type, doc, update_body, client=None):
     if not client:
         client = get_client(corpus)
-    doc_id = doc['id']
+    doc_id = doc['_id']
     client.update(index=corpus, doc_type=doc_type, id=doc_id, body=update_body)
 
 

diff --git a/frontend/package-lock.json b/frontend/package-lock.json
diff --git a/frontend/package.json b/frontend/package.json
@@ -1,6 +1,6 @@
 {
   "name": "frontend",
-  "version": "1.2.0",
+  "version": "1.2.1",
   "license": "MIT",
   "scripts": {
     "ng": "ng",
@@ -43,7 +43,7 @@
     "marked": "^0.7.0",
     "material-icons": "^0.3.1",
     "moment": "^2.24.0",
-    "ng2-image-viewer": "^3.0.4",
+    "ng2-image-viewer": "^3.0.5",
     "ng2-pdf-viewer": "^5.2.4",
     "ngx-cookie-service": "^2.2.0",
     "ngx-md": "^6.0.9",

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "i-analyzer",
-    "version": "1.2.0",
+    "version": "1.2.1",
     "license": "MIT",
     "scripts": {
         "postinstall": "npm run install-backend && npm run install-frontend",