Skip to content

Commit

Permalink
Merge pull request #1521 from sul-dlss/aardvarkDateHierarchy
Browse files Browse the repository at this point in the history
geo_aardvark_config.rb: add_date_hierarchy_sm field
  • Loading branch information
thatbudakguy authored Sep 6, 2024
2 parents b11d09c + 11f518a commit d5e0a2c
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 0 deletions.
2 changes: 2 additions & 0 deletions lib/traject/config/geo_aardvark_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ def log_skip(context)
# - used to power the year facet in the UI
to_field 'gbl_indexYear_im', use_field('dct_temporal_sm'), extract_years, minmax, transform(->(years) { (years.first.to_i..years.last.to_i).to_a if years.any? }), flatten

to_field 'date_hierarchy_sm', use_field('gbl_indexYear_im'), hierarchicalize_year_list

# https://opengeometadata.org/ogm-aardvark/#provider
to_field 'schema_provider_s', literal('Stanford')

Expand Down
25 changes: 25 additions & 0 deletions lib/traject/macros/extras.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,31 @@ def use_field(field)
accumulator.concat Array.wrap(context.output_hash[field]) if context.output_hash[field].present?
end
end

# Given a list of years, return a list with the unique centuries and decades covered by those years, and prefixes on the
# decade and year strings for easy parsing of century and decade when using the Solr results for hierarchical facet display.
# E.g.,
# * given: [1701, 1980, 1991, 1995]
# * return: ["1700-1799", "1900-1999",
# "1700-1799:1700-1710", "1900-1999:1980-1989", "1900-1999:1990-1999",
# "1700-1799:1700-1710:1701", "1900-1999:1980-1989:1980", "1900-1999:1990-1999:1991", "1900-1999:1990-1999:1995"]
# The standalone century and decade ranges make those ranges facetable/searchable, and including the ranges on the year strings
# themselves makes it faster for consumers to parse out century/decade info, without duplicating the logic defined in this module.
def hierarchicalize_year_list
# @param accumulator [Array<Integer>] an array of strings or ints representing calendar years
# @return [Array<String>] a list of strings with exploded century and decade info per the above description
lambda do |_record, accumulator, _context|
centuries = Set.new
decades = Set.new
hierarchicalized_years = accumulator.map do |year|
century, decade = Utils.centimate_and_decimate(year)
centuries << century
decades << [century, decade].join(':')
[century, decade, year].join(':')
end
accumulator.replace(centuries.to_a + decades.to_a + hierarchicalized_years)
end
end
end
end
end
34 changes: 34 additions & 0 deletions lib/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,40 @@ def self.longest_common_call_number_prefix(*strs)
substr.sub(/(\s|[[:punct:]])+\z/, '')
end

# Extract century and decade from year.
# "Obsolete. to take a tenth of or from." - https://www.dictionary.com/browse/decimate
#
# @param maybe_year [Integer] an int that (hopefully) represents a year
def self.centimate_and_decimate(maybe_year)
parsed_date = Date.new(maybe_year)
[century_from_date(parsed_date), decade_from_date(parsed_date)]
rescue Date::Error
%w[unknown_century unknown_decade] # guess not
end

# Given a Date, return a String for the century that contains it.
#
# This uses the colloquial grouping of centuries, because it's more intuitive at a glance, and the code is easier:
# https://en.wikipedia.org/wiki/Century#Start_and_end_of_centuries
#
# @param date [Date] a Date object on which we can call strftime
# @return [String] a String representing the century in which the date belongs (e.g. 1500-1599)
def self.century_from_date(date)
date.strftime('%C00-%C99')
end

# Given a Date, return a String for the decade that contains it.
#
# This uses the more colloquial/popular decade boundary, because it's easier to code and more intuitive for users.
# https://en.wikipedia.org/wiki/Decade#0-to-9_decade
#
# @param date [Date] a Date object on which we can call strftime
# @return [String] a String representing the decade in which the date belongs (e.g. 1990-1999)
def self.decade_from_date(date)
decade_prefix = (date.strftime('%Y').to_i / 10).to_s
"#{decade_prefix}0-#{decade_prefix}9"
end

def self.version
@version ||= begin
file = File.expand_path('../REVISION', __dir__)
Expand Down
27 changes: 27 additions & 0 deletions spec/integration/geo_aardvark_config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@
expect(result['gbl_indexYear_im']).to eq (1978..2005).to_a
end

it 'hierarchicalizes the index years and maps that to a separate field' do
expect(result['date_hierarchy_sm'].length).to eq 34 # 28 years, 4 decades, 2 centuries
expect(result['date_hierarchy_sm']).to include('1900-1999', '2000-2099',
'1900-1999:1970-1979', '1900-1999:1980-1989', '1900-1999:1990-1999',
'2000-2099:2000-2009',
'1900-1999:1980-1989:1980', '2000-2099:2000-2009:2004')
end

it 'maps the provider as stanford' do
expect(result['schema_provider_s']).to eq 'Stanford'
end
Expand Down Expand Up @@ -347,6 +355,12 @@
expect(result['gbl_indexYear_im']).to eq [2014]
end

it 'hierarchicalizes the index years and maps that to a separate field' do
expect(result['date_hierarchy_sm'].sort).to eq(%w[2000-2099
2000-2099:2010-2019
2000-2099:2010-2019:2014].sort)
end

it 'maps the date range' do
expect(result['gbl_dateRange_drsim']).to eq ['[2014 TO 2014]']
end
Expand Down Expand Up @@ -395,6 +409,13 @@
expect(result['gbl_indexYear_im']).to eq (1938..1940).to_a
end

it 'hierarchicalizes the index years and maps that to a separate field' do
expect(result['date_hierarchy_sm'].sort).to eq(%w[1900-1999
1900-1999:1930-1939 1900-1999:1940-1949
1900-1999:1930-1939:1938 1900-1999:1930-1939:1939
1900-1999:1940-1949:1940].sort)
end

it 'maps the date range' do
expect(result['gbl_dateRange_drsim']).to eq ['[1938 TO 1940]']
end
Expand Down Expand Up @@ -463,6 +484,12 @@
expect(result['gbl_indexYear_im']).to eq [2002]
end

it 'hierarchicalizes the index years and maps that to a separate field' do
expect(result['date_hierarchy_sm'].sort).to eq(%w[2000-2099
2000-2099:2000-2009
2000-2099:2000-2009:2002].sort)
end

it 'maps the date range' do
expect(result['gbl_dateRange_drsim']).to eq ['[2002 TO 2002]']
end
Expand Down

0 comments on commit d5e0a2c

Please sign in to comment.