From ebb15bf8f3740cab29f4784f4de57dd8b10e064c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Sun, 30 Jun 2024 13:52:35 +0200 Subject: [PATCH 01/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 32f6b86d..91575853 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ A service to provide a bot helping scientific journals and communities manage submission reviews. -Buffy automates common editorial tasks like those needed by [The Journal of Open Source Software](https://joss.theoj.org/) or [rOpenSci](https://ropensci.org/). +Buffy automates common editorial tasks like those needed by [The Journal of Open Source Software](https://joss.theoj.org/), [rOpenSci](https://ropensci.org/), [JuliaCon](https://proceedings.juliacon.org/) or [pyOpenSci](https://www.pyopensci.org/). [![Build Status](https://github.com/openjournals/buffy/actions/workflows/tests.yml/badge.svg)](https://github.com/openjournals/buffy/actions/workflows/tests.yml) [![Documentation Status](https://readthedocs.org/projects/buffy/badge/?version=latest)](https://buffy.readthedocs.io/en/latest/?badge=latest) From 8d1e38c10d253ac3fe53ffc5e429fe7ef38a961d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Mon, 1 Jul 2024 12:51:18 +0200 Subject: [PATCH 02/11] Update tests.yml --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 17e8fcfa..1b320d68 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,7 +7,7 @@ jobs: continue-on-error: ${{ matrix.failure-allowed }} strategy: matrix: - ruby-version: ['3.0.7', '3.1.5', '3.2.4', '3.3.3'] + ruby-version: ['3.0.7', '3.1.6', '3.2.4', '3.3.3'] failure-allowed: [false] include: - ruby-version: 'truffleruby' From 10cf374e0b80306234da80b1469d04bfea156562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Wed, 10 Jul 2024 10:23:04 +0200 Subject: [PATCH 03/11] Update Rack [sec], Faraday & Sideqik --- Gemfile.lock | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index d7a2b6f2..9937ed4b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -18,8 +18,9 @@ GEM rexml diff-lcs (1.5.1) dotenv (3.1.2) - faraday (2.9.2) + faraday (2.10.0) faraday-net_http (>= 2.0, < 3.2) + logger faraday-follow_redirects (0.1.0) faraday (>= 2, < 3) faraday-net_http (3.1.0) @@ -42,6 +43,7 @@ GEM reverse_markdown (>= 1, < 3) rugged (>= 0.24, < 2.0) thor (>= 0.19, < 2.0) + logger (1.6.0) mini_mime (1.1.5) mini_portile2 (2.8.7) multi_json (1.15.0) @@ -61,7 +63,7 @@ GEM puma (6.4.2) nio4r (~> 2.0) racc (1.8.0) - rack (3.1.4) + rack (3.1.6) rack-protection (4.0.0) base64 (>= 0.1.0) rack (>= 3.0.0, < 4) @@ -99,11 +101,12 @@ GEM multi_json (~> 1.15) rexml (~> 3.2, >= 3.2.5) thor (~> 1.2, >= 1.2.1) - sidekiq (7.2.4) + sidekiq (7.3.0) concurrent-ruby (< 2) connection_pool (>= 2.3.0) + logger rack (>= 2.2.4) - redis-client (>= 0.19.0) + redis-client (>= 0.22.2) sinatra (4.0.0) mustermann (~> 3.0) rack (>= 3.0.0, < 4) @@ -118,7 +121,7 @@ GEM tilt (~> 2.0) strscan (3.1.0) thor (1.3.1) - tilt (2.3.0) + tilt (2.4.0) uri (0.13.0) webmock (3.23.1) addressable (>= 2.8.0) From 6dde3cacde36e7e6f5cd50d6c4c8fd7d16354cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Fri, 12 Jul 2024 12:00:25 +0200 Subject: [PATCH 04/11] Update Gemfile.lock --- Gemfile.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 9937ed4b..a74375db 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -9,7 +9,7 @@ GEM racc (~> 1.7) bigdecimal (3.1.8) cgi (0.4.1) - charlock_holmes (0.7.7) + charlock_holmes (0.7.9) chronic (0.10.2) concurrent-ruby (1.3.3) connection_pool (2.4.1) @@ -63,7 +63,7 @@ GEM puma (6.4.2) nio4r (~> 2.0) racc (1.8.0) - rack (3.1.6) + rack (3.1.7) rack-protection (4.0.0) base64 (>= 0.1.0) rack (>= 3.0.0, < 4) From 62dd5547bb47a9798388ab39a9daac8162789188 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Wed, 24 Jul 2024 12:14:07 +0200 Subject: [PATCH 05/11] Update Rexml [sec] --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index a74375db..b8f1d3d9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -75,7 +75,7 @@ GEM connection_pool reverse_markdown (2.1.1) nokogiri - rexml (3.3.1) + rexml (3.3.2) strscan rspec (3.13.0) rspec-core (~> 3.13.0) From 743856518ffb0d65777fc907722059fa1160d48f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Sun, 28 Jul 2024 14:09:55 +0200 Subject: [PATCH 06/11] Update tests.yml --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1b320d68..5db78030 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,7 +7,7 @@ jobs: continue-on-error: ${{ matrix.failure-allowed }} strategy: matrix: - ruby-version: ['3.0.7', '3.1.6', '3.2.4', '3.3.3'] + ruby-version: ['3.0.7', '3.1.6', '3.2.5', '3.3.3'] failure-allowed: [false] include: - ruby-version: 'truffleruby' From d1db30ec04b13674cd34fde6d5879d6180fca197 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Wed, 7 Aug 2024 09:58:00 +0200 Subject: [PATCH 07/11] Update REXML [sec] https://www.ruby-lang.org/en/news/2024/08/01/dos-rexml-cve-2024-41946/ --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index b8f1d3d9..cb11d48f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -75,7 +75,7 @@ GEM connection_pool reverse_markdown (2.1.1) nokogiri - rexml (3.3.2) + rexml (3.3.4) strscan rspec (3.13.0) rspec-core (~> 3.13.0) From c6956500bb14ecdc0b255e0833a9adec08c88aeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Fri, 9 Aug 2024 10:21:18 +0200 Subject: [PATCH 08/11] Update tests.yml --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5db78030..22c9ecd4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,7 +7,7 @@ jobs: continue-on-error: ${{ matrix.failure-allowed }} strategy: matrix: - ruby-version: ['3.0.7', '3.1.6', '3.2.5', '3.3.3'] + ruby-version: ['3.0.7', '3.1.6', '3.2.5', '3.3.4'] failure-allowed: [false] include: - ruby-version: 'truffleruby' From fe6b0c324d2701019fef67b9bd699ac3be2e24dc Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Thu, 15 Aug 2024 19:49:20 -0700 Subject: [PATCH 09/11] add special case handling, fix ambiguous "skip" result from prior fix --- app/lib/doi_checker.rb | 62 +++++++++++++++++++++++++++--------- app/responses/doi_checks.erb | 10 ++++-- spec/doi_checker_spec.rb | 29 ++++++++++++++++- 3 files changed, 82 insertions(+), 19 deletions(-) diff --git a/app/lib/doi_checker.rb b/app/lib/doi_checker.rb index cadcca60..ecdcb9cc 100644 --- a/app/lib/doi_checker.rb +++ b/app/lib/doi_checker.rb @@ -9,32 +9,52 @@ def initialize(entries=[]) end def check_dois - doi_summary = {ok: [], missing: [], invalid: []} + doi_summary = {ok: [], skip: [], missing: [], invalid: []} if @entries.any? @entries.each do |entry| - if entry.has_field?('doi') && !entry.doi.empty? + # handle special cases first + special_case = self.handle_special_case(entry) + if special_case + doi_validity = special_case + elsif entry.has_field?('doi') && !entry.doi.empty? + # Validate entries with DOIs doi_validity = validate_doi(entry.doi.value) - doi_summary[doi_validity[:validity]].push(doi_validity[:msg]) - # If there's no DOI present, check Crossref to see if we can find a candidate DOI for this entry. elsif entry.has_field?('title') - candidate_doi = crossref_lookup(entry.title.value) - truncated_title = entry.title.to_s[0,50] - truncated_title += "..." if truncated_title.length < entry.title.to_s.length - if candidate_doi == "CROSSREF-ERROR" - doi_summary[:missing].push("Errored finding suggestions for \"#{truncated_title}\", please try later") - elsif candidate_doi - doi_summary[:missing].push("#{candidate_doi} may be a valid DOI for title: #{truncated_title}") - else - doi_summary[:missing].push("No DOI given, and none found for title: #{truncated_title}") - end + # Try and find candidate entries if doi absent, but title present + doi_validity = handle_missing_doi(entry) else - doi_summary[:missing].push("Entry without DOI or title found") + doi_validity = {validity: :missing, msg: "Entry without DOI or title found"} end + + doi_summary[doi_validity[:validity]].push(doi_validity[:msg]) end end doi_summary + end + + # any special case should return false if not applicable, and an object like + # {:validity => :ok, :msg => "whatever"} otherwise. + # Add additional special cases as private methods and chain in a tidy sequence plz <3 + def handle_special_case(entry) + validity = acm_105555_prefix(entry) and return validity + false + end + + + # If there's no DOI present, check Crossref to see if we can find a candidate DOI for this entry. + def handle_missing_doi(entry) + candidate_doi = crossref_lookup(entry.title.value) + truncated_title = entry.title.to_s[0,50] + truncated_title += "..." if truncated_title.length < entry.title.to_s.length + if candidate_doi == "CROSSREF-ERROR" + { validity: :missing, msg: "Errored finding suggestions for \"#{truncated_title}\", please try later" } + elsif candidate_doi + { validity: :missing, msg: "#{candidate_doi} may be a valid DOI for title: #{truncated_title}" } + else + { validity: :skip, msg: "No DOI given, and none found for title: #{truncated_title}" } + end end def validate_doi(doi_string) @@ -112,4 +132,16 @@ def levenshtein_distance(s, t) def similar?(string_1, string_2) levenshtein_distance(string_1, string_2) < 3 end + + private + + def acm_105555_prefix(entry) + if entry.has_field?('doi') && entry.doi.include?("10.5555/") + { validity: :invalid, msg: "#{entry.doi} is INVALID - 10.5555 is a known broken prefix, replace with https://dl.acm.org/doi/{doi} in the {url} field" } + elsif entry.has_field?('url') && entry.url.include?("https://dl.acm.org/doi/10.5555") + { validity: :skip, msg: "#{entry.url} - correctly put 10.5555 prefixed doi in the url field, editor should ensure this resolves" } + else + false + end + end end diff --git a/app/responses/doi_checks.erb b/app/responses/doi_checks.erb index cd18696b..d5d92c2f 100644 --- a/app/responses/doi_checks.erb +++ b/app/responses/doi_checks.erb @@ -1,9 +1,13 @@ ``` Reference check summary (note 'MISSING' DOIs are suggestions that need verification): <% doi_summary.each do |type, messages| -%> - -<%= type.to_s.upcase %> DOIs - +<% if type.to_s === "ok" %> +✅ - <%= type.to_s.upcase %> DOIs +<% elsif type.to_s === "skip" %> +❔ - <%= type.to_s.upcase %> DOIs +<% else %> +❌ - <%= type.to_s.upcase %> DOIs +<% end %> <% if messages.empty? -%> - None <% else -%> diff --git a/spec/doi_checker_spec.rb b/spec/doi_checker_spec.rb index d45b049a..cbacede3 100644 --- a/spec/doi_checker_spec.rb +++ b/spec/doi_checker_spec.rb @@ -93,7 +93,7 @@ expect(doi_summary[:ok]).to be_empty expect(doi_summary[:invalid]).to be_empty - expect(doi_summary[:missing][0]).to eq("No DOI given, and none found for title: #{title}") + expect(doi_summary[:skip][0]).to eq("No DOI given, and none found for title: #{title}") end it "should report entries with no DOI or title as missing both" do @@ -107,6 +107,33 @@ end end + describe "#handle_special_case" do + it "should treat DOIs with a 10.5555 prefix as invalid" do + entry = BibTeX::Entry.new(doi: "10.5555/xxxxxxx.yyyyyyyyy") + validity = subject.handle_special_case(entry) + expect(validity[:validity]).to eq(:invalid) + expect(validity[:msg]).to include("replace with https://dl.acm.org/doi") + end + + it "should treat URLs with a 10.5555 prefix as a skip" do + entry = BibTeX::Entry.new(url: "https://dl.acm.org/doi/10.5555/2827719.2827740") + validity = subject.handle_special_case(entry) + expect(validity[:validity]).to eq(:skip) + expect(validity[:msg]).to eq("https://dl.acm.org/doi/10.5555/2827719.2827740 - correctly put 10.5555 prefixed doi in the url field, editor should ensure this resolves") + end + + it "should handle special cases separately from normal DOI checking" do + entry = BibTeX::Entry.new(doi: "10.5555/xxxxxxx.yyyyyyyyy") + doi_checker = DOIChecker.new([entry]) + + doi_summary = doi_checker.check_dois + expect(doi_summary[:ok]).to be_empty + expect(doi_summary[:missing]).to be_empty + expect(doi_summary[:skip]).to be_empty + expect(doi_summary[:invalid][0]).to include("is INVALID - 10.5555 is a known broken prefix, replace with https://dl.acm.org/doi/") + end + end + describe "#validate_doi" do it "should invalidate empty doi strings" do From 6c373ce87d947497aad3b9cde3a66a041e252a71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Tue, 20 Aug 2024 11:35:07 +0200 Subject: [PATCH 10/11] Minor changes to DOI checker code --- app/lib/doi_checker.rb | 6 ++---- app/responses/doi_checks.erb | 10 +++++----- spec/doi_checker_spec.rb | 9 +++++++++ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/app/lib/doi_checker.rb b/app/lib/doi_checker.rb index ecdcb9cc..0810c348 100644 --- a/app/lib/doi_checker.rb +++ b/app/lib/doi_checker.rb @@ -14,8 +14,7 @@ def check_dois if @entries.any? @entries.each do |entry| # handle special cases first - special_case = self.handle_special_case(entry) - if special_case + if special_case = handle_special_case(entry) doi_validity = special_case elsif entry.has_field?('doi') && !entry.doi.empty? # Validate entries with DOIs @@ -38,8 +37,7 @@ def check_dois # {:validity => :ok, :msg => "whatever"} otherwise. # Add additional special cases as private methods and chain in a tidy sequence plz <3 def handle_special_case(entry) - validity = acm_105555_prefix(entry) and return validity - false + acm_105555_prefix(entry) || false end diff --git a/app/responses/doi_checks.erb b/app/responses/doi_checks.erb index d5d92c2f..7e4019d9 100644 --- a/app/responses/doi_checks.erb +++ b/app/responses/doi_checks.erb @@ -2,17 +2,17 @@ Reference check summary (note 'MISSING' DOIs are suggestions that need verification): <% doi_summary.each do |type, messages| -%> <% if type.to_s === "ok" %> -✅ - <%= type.to_s.upcase %> DOIs +✅ <%= type.to_s.upcase %> DOIs <% elsif type.to_s === "skip" %> -❔ - <%= type.to_s.upcase %> DOIs +❔ <%= type.to_s.upcase %> DOIs <% else %> -❌ - <%= type.to_s.upcase %> DOIs +❌ <%= type.to_s.upcase %> DOIs <% end %> <% if messages.empty? -%> - None <% else -%> -<% messages.each do |message| -%> -- <%= message%> +<% messages.each do |message| -%> +- <%= message %> <% end -%> <% end -%> <% end -%> diff --git a/spec/doi_checker_spec.rb b/spec/doi_checker_spec.rb index cbacede3..e6c9a75b 100644 --- a/spec/doi_checker_spec.rb +++ b/spec/doi_checker_spec.rb @@ -11,6 +11,7 @@ expect(doi_summary[:ok]).to be_empty expect(doi_summary[:invalid]).to be_empty expect(doi_summary[:missing]).to be_empty + expect(doi_summary[:skip]).to be_empty end it "should classify as invalid entries with invalid DOI" do @@ -25,6 +26,7 @@ expect(doi_summary[:invalid].size).to eq(1) expect(doi_summary[:invalid].first).to eq(validity[:msg]) expect(doi_summary[:missing]).to be_empty + expect(doi_summary[:skip]).to be_empty end it "should classify as ok entries with valid DOI" do @@ -39,6 +41,7 @@ expect(doi_summary[:ok].first).to eq(validity[:msg]) expect(doi_summary[:invalid]).to be_empty expect(doi_summary[:missing]).to be_empty + expect(doi_summary[:skip]).to be_empty end it "should classify as missing entries without DOI but with a candidate crossref entry" do @@ -50,6 +53,7 @@ doi_summary = doi_checker.check_dois expect(doi_summary[:ok]).to be_empty expect(doi_summary[:invalid]).to be_empty + expect(doi_summary[:skip]).to be_empty expect(doi_summary[:missing].size).to eq(1) expect(doi_summary[:missing][0]).to eq("10.maybe/doi may be a valid DOI for title: No DOI") end @@ -63,6 +67,7 @@ doi_summary = doi_checker.check_dois expect(doi_summary[:ok]).to be_empty expect(doi_summary[:invalid]).to be_empty + expect(doi_summary[:skip]).to be_empty expect(doi_summary[:missing].size).to eq(1) expect(doi_summary[:missing][0]).to eq('Errored finding suggestions for "No DOI", please try later') end @@ -78,6 +83,7 @@ doi_summary = doi_checker.check_dois expect(doi_summary[:ok]).to be_empty expect(doi_summary[:invalid]).to be_empty + expect(doi_summary[:skip]).to be_empty expect(doi_summary[:missing].size).to eq(1) expect(doi_summary[:missing][0]).to eq("Errored finding suggestions for \"#{expected_title}\", please try later") end @@ -93,6 +99,8 @@ expect(doi_summary[:ok]).to be_empty expect(doi_summary[:invalid]).to be_empty + expect(doi_summary[:missing]).to be_empty + expect(doi_summary[:skip].size).to eq(1) expect(doi_summary[:skip][0]).to eq("No DOI given, and none found for title: #{title}") end @@ -103,6 +111,7 @@ doi_summary = doi_checker.check_dois expect(doi_summary[:ok]).to be_empty expect(doi_summary[:invalid]).to be_empty + expect(doi_summary[:skip]).to be_empty expect(doi_summary[:missing][0]).to eq("Entry without DOI or title found") end end From 37bbbab41f51e0660c00d4b6ead5134e7a90ba2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juanjo=20Baz=C3=A1n?= Date: Tue, 20 Aug 2024 11:54:46 +0200 Subject: [PATCH 11/11] Update symbol for the "Skip DOIs" section --- app/lib/doi_checker.rb | 2 +- app/responses/doi_checks.erb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/lib/doi_checker.rb b/app/lib/doi_checker.rb index 0810c348..42bc958d 100644 --- a/app/lib/doi_checker.rb +++ b/app/lib/doi_checker.rb @@ -31,7 +31,7 @@ def check_dois end doi_summary - end + end # any special case should return false if not applicable, and an object like # {:validity => :ok, :msg => "whatever"} otherwise. diff --git a/app/responses/doi_checks.erb b/app/responses/doi_checks.erb index 7e4019d9..529ea2f0 100644 --- a/app/responses/doi_checks.erb +++ b/app/responses/doi_checks.erb @@ -4,7 +4,7 @@ Reference check summary (note 'MISSING' DOIs are suggestions that need verificat <% if type.to_s === "ok" %> ✅ <%= type.to_s.upcase %> DOIs <% elsif type.to_s === "skip" %> -❔ <%= type.to_s.upcase %> DOIs +🟡 <%= type.to_s.upcase %> DOIs <% else %> ❌ <%= type.to_s.upcase %> DOIs <% end %>