From 2c9a57396fe83886b7681db82c3753c9832c7dee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josef=20=C5=A0im=C3=A1nek?= Date: Tue, 24 Sep 2024 23:25:29 +0200 Subject: [PATCH] Use long for downloads count in OpenSearch index. (#5052) * Use long for downloads count in OpenSearch index. * Scale the suggestion weight using log --------- Co-authored-by: Colby Swandale <996377+colby-swandale@users.noreply.github.com> --- app/models/concerns/rubygem_searchable.rb | 8 +++- .../concerns/rubygem_searchable_test.rb | 42 +++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/app/models/concerns/rubygem_searchable.rb b/app/models/concerns/rubygem_searchable.rb index d0d5c4737c4..27eee6ad34f 100644 --- a/app/models/concerns/rubygem_searchable.rb +++ b/app/models/concerns/rubygem_searchable.rb @@ -24,7 +24,7 @@ module RubygemSearchable description: { type: "text", analyzer: "english", fields: { raw: { analyzer: "simple", type: "text" } } }, suggest: { type: "completion", contexts: { name: "yanked", type: "category" } }, yanked: { type: "boolean" }, - downloads: { type: "integer" }, + downloads: { type: "long" }, updated: { type: "date" } } } @@ -88,12 +88,16 @@ def suggest_json { suggest: { input: name, - weight: downloads, + weight: suggest_weight_scale(downloads), contexts: { yanked: versions.none?(&:indexed?) } } } end + + def suggest_weight_scale(downloads) + Math.log10(downloads + 1).to_i + end end end diff --git a/test/models/concerns/rubygem_searchable_test.rb b/test/models/concerns/rubygem_searchable_test.rb index c72eeaf79de..d7089196e78 100644 --- a/test/models/concerns/rubygem_searchable_test.rb +++ b/test/models/concerns/rubygem_searchable_test.rb @@ -79,6 +79,48 @@ class RubygemSearchableTest < ActiveSupport::TestCase assert_equal v, json[k], "value doesn't match for key: #{k}" end end + + should "set the suggest json" do + json = @rubygem.search_data + + assert_equal "example_gem", json[:suggest][:input] + end + + should "calculate the suggestion weight based on the number of downloads" do + weights = [ + [0, 0], + [10, 1], + [100, 2], + [1_000, 3], + [10_000, 4], + [100_000, 5], + [1_000_000, 6], + [10_000_000, 7], + [100_000_000, 8], + [1_000_000_000, 9] + ] + + weights.each do |downloads, weight| + @rubygem.gem_download.update(count: downloads) + json = @rubygem.search_data + + assert_equal weight, json[:suggest][:weight] + end + end + + context "when the number of downloads exceeds a 32 bit integer" do + setup do + @rubygem = create(:rubygem, name: "large_downloads_example_gem", downloads: 10_000_000_000) # 10 Billion downloads + @version = create(:version, number: "1.0.0", rubygem: @rubygem) + import_and_refresh + end + + should "allow the number of downloads to be stored as a 64 bit integer" do + json = @rubygem.search_data + + assert_equal 10_000_000_000, json[:downloads] + end + end end context "rubygems analyzer" do