Skip to content

Commit

Permalink
Merge pull request #106 from sneakers-the-rat/handle-10_5555-prefix
Browse files Browse the repository at this point in the history
Special case `10.5555` DOIs (and fix ambiguous DOI checker results)
  • Loading branch information
xuanxu authored Aug 20, 2024
2 parents c695650 + fe6b0c3 commit 42fcd98
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 19 deletions.
62 changes: 47 additions & 15 deletions app/lib/doi_checker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,52 @@ def initialize(entries=[])
end

def check_dois
doi_summary = {ok: [], missing: [], invalid: []}
doi_summary = {ok: [], skip: [], missing: [], invalid: []}

if @entries.any?
@entries.each do |entry|
if entry.has_field?('doi') && !entry.doi.empty?
# handle special cases first
special_case = self.handle_special_case(entry)
if special_case
doi_validity = special_case
elsif entry.has_field?('doi') && !entry.doi.empty?
# Validate entries with DOIs
doi_validity = validate_doi(entry.doi.value)
doi_summary[doi_validity[:validity]].push(doi_validity[:msg])
# If there's no DOI present, check Crossref to see if we can find a candidate DOI for this entry.
elsif entry.has_field?('title')
candidate_doi = crossref_lookup(entry.title.value)
truncated_title = entry.title.to_s[0,50]
truncated_title += "..." if truncated_title.length < entry.title.to_s.length
if candidate_doi == "CROSSREF-ERROR"
doi_summary[:missing].push("Errored finding suggestions for \"#{truncated_title}\", please try later")
elsif candidate_doi
doi_summary[:missing].push("#{candidate_doi} may be a valid DOI for title: #{truncated_title}")
else
doi_summary[:missing].push("No DOI given, and none found for title: #{truncated_title}")
end
# Try and find candidate entries if doi absent, but title present
doi_validity = handle_missing_doi(entry)
else
doi_summary[:missing].push("Entry without DOI or title found")
doi_validity = {validity: :missing, msg: "Entry without DOI or title found"}
end

doi_summary[doi_validity[:validity]].push(doi_validity[:msg])
end
end

doi_summary
end

# any special case should return false if not applicable, and an object like
# {:validity => :ok, :msg => "whatever"} otherwise.
# Add additional special cases as private methods and chain in a tidy sequence plz <3
def handle_special_case(entry)
validity = acm_105555_prefix(entry) and return validity
false
end


# If there's no DOI present, check Crossref to see if we can find a candidate DOI for this entry.
def handle_missing_doi(entry)
candidate_doi = crossref_lookup(entry.title.value)
truncated_title = entry.title.to_s[0,50]
truncated_title += "..." if truncated_title.length < entry.title.to_s.length
if candidate_doi == "CROSSREF-ERROR"
{ validity: :missing, msg: "Errored finding suggestions for \"#{truncated_title}\", please try later" }
elsif candidate_doi
{ validity: :missing, msg: "#{candidate_doi} may be a valid DOI for title: #{truncated_title}" }
else
{ validity: :skip, msg: "No DOI given, and none found for title: #{truncated_title}" }
end
end

def validate_doi(doi_string)
Expand Down Expand Up @@ -112,4 +132,16 @@ def levenshtein_distance(s, t)
def similar?(string_1, string_2)
levenshtein_distance(string_1, string_2) < 3
end

private

def acm_105555_prefix(entry)
if entry.has_field?('doi') && entry.doi.include?("10.5555/")
{ validity: :invalid, msg: "#{entry.doi} is INVALID - 10.5555 is a known broken prefix, replace with https://dl.acm.org/doi/{doi} in the {url} field" }
elsif entry.has_field?('url') && entry.url.include?("https://dl.acm.org/doi/10.5555")
{ validity: :skip, msg: "#{entry.url} - correctly put 10.5555 prefixed doi in the url field, editor should ensure this resolves" }
else
false
end
end
end
10 changes: 7 additions & 3 deletions app/responses/doi_checks.erb
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
```
Reference check summary (note 'MISSING' DOIs are suggestions that need verification):
<% doi_summary.each do |type, messages| -%>

<%= type.to_s.upcase %> DOIs

<% if type.to_s === "ok" %>
✅ - <%= type.to_s.upcase %> DOIs
<% elsif type.to_s === "skip" %>
❔ - <%= type.to_s.upcase %> DOIs
<% else %>
❌ - <%= type.to_s.upcase %> DOIs
<% end %>
<% if messages.empty? -%>
- None
<% else -%>
Expand Down
29 changes: 28 additions & 1 deletion spec/doi_checker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@

expect(doi_summary[:ok]).to be_empty
expect(doi_summary[:invalid]).to be_empty
expect(doi_summary[:missing][0]).to eq("No DOI given, and none found for title: #{title}")
expect(doi_summary[:skip][0]).to eq("No DOI given, and none found for title: #{title}")
end

it "should report entries with no DOI or title as missing both" do
Expand All @@ -107,6 +107,33 @@
end
end

describe "#handle_special_case" do
it "should treat DOIs with a 10.5555 prefix as invalid" do
entry = BibTeX::Entry.new(doi: "10.5555/xxxxxxx.yyyyyyyyy")
validity = subject.handle_special_case(entry)
expect(validity[:validity]).to eq(:invalid)
expect(validity[:msg]).to include("replace with https://dl.acm.org/doi")
end

it "should treat URLs with a 10.5555 prefix as a skip" do
entry = BibTeX::Entry.new(url: "https://dl.acm.org/doi/10.5555/2827719.2827740")
validity = subject.handle_special_case(entry)
expect(validity[:validity]).to eq(:skip)
expect(validity[:msg]).to eq("https://dl.acm.org/doi/10.5555/2827719.2827740 - correctly put 10.5555 prefixed doi in the url field, editor should ensure this resolves")
end

it "should handle special cases separately from normal DOI checking" do
entry = BibTeX::Entry.new(doi: "10.5555/xxxxxxx.yyyyyyyyy")
doi_checker = DOIChecker.new([entry])

doi_summary = doi_checker.check_dois
expect(doi_summary[:ok]).to be_empty
expect(doi_summary[:missing]).to be_empty
expect(doi_summary[:skip]).to be_empty
expect(doi_summary[:invalid][0]).to include("is INVALID - 10.5555 is a known broken prefix, replace with https://dl.acm.org/doi/")
end
end

describe "#validate_doi" do

it "should invalidate empty doi strings" do
Expand Down

0 comments on commit 42fcd98

Please sign in to comment.