Skip to content

Commit

Permalink
Merge branch 'release/1.2.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
BeritJanssen committed Oct 14, 2019
2 parents 9cd3194 + 5dea44f commit e9b768b
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 23 deletions.
13 changes: 11 additions & 2 deletions backend/addcorpus/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,11 @@ def metadata_from_xml(self, filename, tags):
if 'attribute' in tag:
right_tag = next((candidate for candidate in candidates if
candidate.attrs == tag['attribute']), None)
elif 'list' in tag:
if 'subtag' in tag:
right_tag = [candidate.find(tag['subtag']) for candidate in candidates]
else:
right_tag = candidates
elif 'subtag' in tag:
right_tag = next((candidate.find(tag['subtag']) for candidate in candidates if
candidate.find(tag['subtag'])), None)
Expand All @@ -436,9 +441,13 @@ def metadata_from_xml(self, filename, tags):
if not right_tag:
continue
if 'save_as' in tag:
out_dict[tag['save_as']] = right_tag.text
out_tag = tag['save_as']
else:
out_dict[tag['tag']] = right_tag.text
out_tag = tag['tag']
if 'list' in tag:
out_dict[out_tag] = [t.text for t in right_tag]
else:
out_dict[out_tag] = right_tag.text
return out_dict


Expand Down
15 changes: 11 additions & 4 deletions backend/corpora/ecco/ecco.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,10 @@ def sources(self, start=min_date, end=max_date):
meta_tags = [
'collation',
{'tag': 'author', 'subtag': 'composed'},
{'tag': 'holdings', 'subtag': 'libraryName', 'list': True},
'fullTitle',
'imprintFull',
'libraryName',
{'tag': 'sourceLibrary', 'subtag': 'libraryName'},
'ocr',
'pubDateStart',
'publicationPlaceComposed',
Expand Down Expand Up @@ -214,9 +215,15 @@ def fields(self):
),
Field(
name='library',
display_name='Holding library',
description='The main holding library of the book',
extractor=Metadata('libraryName')
display_name='Source library',
description='The source library of the book',
extractor=Metadata('sourceLibrary')
),
Field(
name='holdings',
display_name='Holding libraries',
description='Libraries holding a copy of the book',
extractor=Metadata('holdings')
),
Field(
name='volume',
Expand Down
30 changes: 22 additions & 8 deletions backend/corpora/ecco/ecco_image_convert.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from os.path import split, join, splitext
from os.path import exists, split, join, splitext
import subprocess
import glob

def convert_tif_to_pdf(data_dir):
lib_call = ['export', 'DYLD_LIBRARY_PATH="$MAGICK_HOME/lib/"']
Expand All @@ -20,12 +21,25 @@ def convert_tif_to_pdf(data_dir):
book_id = split(directory)[1]
pdf_name = join(directory, '{}.pdf'.format(book_id))
print(pdf_name)
if splitext(filenames[0])[1]=='.tif':
magick_call = ['magick', '-quiet', '*.tif', pdf_name]
elif splitext(filenames[0])[1]=='.TIF':
magick_call = ['magick', '-quiet', '*.TIF', pdf_name]
else:
print(splitext(filenames[0]))
if exists(pdf_name):
print("exists, skipping")
continue
os.chdir(directory)
subprocess.check_call(magick_call)
for filename in filenames:
name, ext = splitext(filename)
if ext=='.tif' or ext=='.TIF':
magick_call = ['convert', filename, '-quiet', name+'.pdf']
subprocess.check_call(magick_call)
with open("files.txt", "w") as f:
f.write(' '.join(sorted(glob.glob("*.pdf"))))
# combine all pdfs in one file
ghostscript_call = ['gs', '-sDEVICE=pdfwrite', '-dBATCH', '-dNOPAUSE',
'-sOutputFile='+pdf_name, '@files.txt']
subprocess.check_call(ghostscript_call)
# remove temporary files
remove_call = ['find', '.', '-type', 'f', '-name', book_id+'0*.pdf', '-delete']
subprocess.check_call(remove_call)
remove_call = ['rm', 'files.txt']
subprocess.check_call(remove_call)


5 changes: 3 additions & 2 deletions backend/es/es_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def update_index(corpus, corpus_definition, query_model):
hits = len(results['hits']['hits'])
total_hits = results['hits']['total']
for doc in results['hits']['hits']:
update_document(client, corpus, corpus_definition, doc_type, doc)
update_body = corpus_definition.update_body(doc)
update_document(corpus, doc_type, doc, update_body, client)
while hits<total_hits:
scroll_id = results['_scroll_id']
for doc in results['hits']['hits']:
Expand All @@ -43,7 +44,7 @@ def update_index(corpus, corpus_definition, query_model):
def update_document(corpus, doc_type, doc, update_body, client=None):
if not client:
client = get_client(corpus)
doc_id = doc['id']
doc_id = doc['_id']
client.update(index=corpus, doc_type=doc_type, id=doc_id, body=update_body)


Expand Down
8 changes: 4 additions & 4 deletions frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions frontend/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "frontend",
"version": "1.2.0",
"version": "1.2.1",
"license": "MIT",
"scripts": {
"ng": "ng",
Expand Down Expand Up @@ -43,7 +43,7 @@
"marked": "^0.7.0",
"material-icons": "^0.3.1",
"moment": "^2.24.0",
"ng2-image-viewer": "^3.0.4",
"ng2-image-viewer": "^3.0.5",
"ng2-pdf-viewer": "^5.2.4",
"ngx-cookie-service": "^2.2.0",
"ngx-md": "^6.0.9",
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "i-analyzer",
"version": "1.2.0",
"version": "1.2.1",
"license": "MIT",
"scripts": {
"postinstall": "npm run install-backend && npm run install-frontend",
Expand Down

0 comments on commit e9b768b

Please sign in to comment.