From 60a8d71689ca769b80709340894ecaa386e56d10 Mon Sep 17 00:00:00 2001 From: Thorsten Schwander Date: Tue, 7 May 2019 11:12:38 -0600 Subject: [PATCH] DocExtract: fix find_doi in linker * strip "doi:" or "hdl:" prefix if any in search query Signed-off-by: Thorsten Schwander --- modules/docextract/lib/refextract_linker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/docextract/lib/refextract_linker.py b/modules/docextract/lib/refextract_linker.py index 971a321015..0cbf4c3dee 100644 --- a/modules/docextract/lib/refextract_linker.py +++ b/modules/docextract/lib/refextract_linker.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. -## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. +## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2019 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as @@ -73,6 +73,8 @@ def find_reportnumber(citation_element): def find_doi(citation_element): doi_string = citation_element['doi_string'] + if doi_string.lower().startswith(('doi:', 'hdl:')) and len(doi_string) > 4: + doi_string = doi_string[4:] recids = get_recids_matching_query(doi_string, 'doi') return recids if len(recids) == 1 else []