Skip to content

Commit

Permalink
Base call for number for Dewey classifications. (#353)
Browse files Browse the repository at this point in the history
Using the Lcsort gem, creates shelf keys for dewey numbers to sort
in forward and reverse directions.
  • Loading branch information
Banu Kutlu committed Oct 4, 2021
1 parent bd668f1 commit dd09213
Show file tree
Hide file tree
Showing 15 changed files with 387 additions and 218 deletions.
13 changes: 13 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ inherit_from:
AllCops:
DisplayCopNames: true
NewCops: enable
Exclude:
- 'config/ignorethis_*.rb'

Lint/ConstantDefinitionInBlock:
Exclude:
Expand All @@ -35,7 +37,18 @@ Lint/MissingSuper:
Lint/MixedRegexpCaptureTypes:
Exclude:
- 'lib/psulib_traject/processors/call_number/lc.rb'
- 'lib/psulib_traject/processors/call_number/dewey.rb'

RSpec/ImplicitSubject:
Exclude:
- 'spec/lib/psulib_traject/macros/subjects_spec.rb'

RSpec/NestedGroups:
Exclude:
- 'spec/lib/psulib_traject/processors/call_number/lc_spec.rb'
- 'spec/lib/psulib_traject/processors/call_number/dewey_spec.rb'

RSpec/ExpectActual:
Exclude:
- 'spec/lib/psulib_traject/processors/call_number/lc_spec.rb'
- 'spec/lib/psulib_traject/processors/call_number/dewey_spec.rb'
75 changes: 17 additions & 58 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,42 +1,31 @@
# This configuration was generated by
# `rubocop --auto-gen-config --auto-gen-only-exclude`
# on 2021-09-07 19:44:42 UTC using RuboCop version 0.93.1.
# `rubocop --auto-gen-config`
# on 2021-09-10 20:26:43 UTC using RuboCop version 0.93.1.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
# versions of RuboCop, may require this file to be generated again.

# Offense count: 4
# Configuration parameters: CountComments, Max, CountAsOne, ExcludedMethods.
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods.
# ExcludedMethods: refine
Metrics/BlockLength:
Exclude:
- 'spec/**/*'
- 'lib/tasks/docker.rake'
- 'lib/tasks/incrementals.rake'
Max: 75

# Offense count: 1
# Configuration parameters: CountComments, Max, CountAsOne.
# Offense count: 2
# Configuration parameters: CountComments, CountAsOne.
Metrics/ClassLength:
Exclude:
- 'lib/psulib_traject/processors/media_type.rb'
Max: 138

# Offense count: 4
# Configuration parameters: IgnoredMethods, Max.
# Configuration parameters: IgnoredMethods.
Metrics/CyclomaticComplexity:
Exclude:
- 'lib/psulib_traject/holdings.rb'
- 'lib/psulib_traject/macros/subjects.rb'
- 'lib/psulib_traject/processors/media_type.rb'
- 'lib/psulib_traject/processors/preferred_format.rb'
Max: 10

# Offense count: 3
# Configuration parameters: IgnoredMethods, Max.
# Configuration parameters: IgnoredMethods.
Metrics/PerceivedComplexity:
Exclude:
- 'lib/psulib_traject/holdings.rb'
- 'lib/psulib_traject/macros/subjects.rb'
- 'lib/psulib_traject/marc_combining_reader.rb'
Max: 10

# Offense count: 2
# Configuration parameters: MinSize.
Expand All @@ -60,49 +49,19 @@ RSpec/DescribeClass:
- 'spec/integration/access_facet_spec.rb'
- 'spec/integration/bound_with_spec.rb'

# Offense count: 56
# Cop supports --auto-correct.
RSpec/ExpectActual:
Exclude:
- 'spec/routing/**/*'
- 'spec/lib/psulib_traject/processors/call_number/lc_spec.rb'

# Offense count: 20
# Configuration parameters: AllowSubject, Max.
# Offense count: 23
# Configuration parameters: AllowSubject.
RSpec/MultipleMemoizedHelpers:
Exclude:
- 'spec/integration/macros_spec.rb'
- 'spec/integration/title_spec.rb'
- 'spec/lib/psulib_traject/holdings_spec.rb'

# Offense count: 2
# Configuration parameters: Max.
RSpec/NestedGroups:
Exclude:
- 'spec/lib/psulib_traject/processors/call_number/lc_spec.rb'
Max: 9

# Offense count: 4
Style/MixinUsage:
Exclude:
- 'config/traject.rb'

# Offense count: 59
# Offense count: 72
# Cop supports --auto-correct.
# Configuration parameters: AutoCorrect, Max, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
# Configuration parameters: AutoCorrect, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
# URISchemes: http, https
Layout/LineLength:
Exclude:
- 'config/schedule.rb'
- 'config/traject.rb'
- 'lib/psulib_traject/hathi_overlap_reducer.rb'
- 'lib/psulib_traject/macros.rb'
- 'lib/psulib_traject/processors/call_number/base.rb'
- 'lib/psulib_traject/processors/media_type.rb'
- 'lib/psulib_traject/processors/pub_date.rb'
- 'spec/integration/bound_with_spec.rb'
- 'spec/integration/config_spec.rb'
- 'spec/integration/macros_spec.rb'
- 'spec/integration/title_spec.rb'
- 'spec/lib/psulib_traject/holdings_spec.rb'
- 'spec/lib/psulib_traject/marc_combining_reader_spec.rb'
- 'spec/lib/psulib_traject/processors/pub_date_spec.rb'
Max: 231
1 change: 1 addition & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ GEM
yell (2.2.2)

PLATFORMS
universal-java-10
universal-java-11
universal-java-14
x86_64-darwin-20
Expand Down
15 changes: 10 additions & 5 deletions config/traject.rb
Original file line number Diff line number Diff line change
Expand Up @@ -293,14 +293,19 @@
accumulator.replace [lc_rest]
end

# Determines a base call number from the record's holdings and creates forward and reverse shelfkeys
# Call Number Browse
#
## Determines a base call number from the record's holdings and creates forward and reverse shelfkeys for LC, LCPER and DEWEY
each_record do |record, context|
call_numbers = PsulibTraject::Holdings.call(record: record, context: context, classification: ['LC', 'LCPER'])
call_numbers = PsulibTraject::Holdings.call(record: record, context: context)
next if call_numbers.empty?

context.add_output('call_number_lc_ssm', *call_numbers.map(&:value))
context.add_output('forward_lc_shelfkey', *call_numbers.map(&:forward_shelfkey))
context.add_output('reverse_lc_shelfkey', *call_numbers.map(&:reverse_shelfkey))
call_numbers.each do |call_number|
context.add_output(call_number.solr_field, call_number.value)
context.add_output(call_number.forward_shelfkey_field, call_number.forward_shelfkey)
context.add_output(call_number.reverse_shelfkey_field, call_number.reverse_shelfkey)
end

context.add_output('keymap_struct', *call_numbers.map(&:keymap).to_json)
end

Expand Down
52 changes: 47 additions & 5 deletions lib/psulib_traject/call_number.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ class CallNumber
BNDSER-HY
).freeze

DEWEY_KLASS_PREFIX = 'AAA'

attr_reader :value, :classification, :location, :item_type, :leader

def initialize(value: '', classification: '', location: '', item_type: '', leader: '')
Expand All @@ -32,11 +34,18 @@ def reverse_shelfkey
shelf_key.reverse
end

def not_browsable?
return true unless lc? || dewey?

forward_shelfkey.nil? && reverse_shelfkey.nil?
end

def keymap
{
'call_number' => value,
'forward_key' => forward_shelfkey,
'reverse_key' => reverse_shelfkey
call_number: value,
classification: classification,
forward_key: forward_shelfkey,
reverse_key: reverse_shelfkey
}
end

Expand Down Expand Up @@ -65,21 +74,54 @@ def serial?
item_type == 'MICROFORM' && %w(ab as).include?(leader[6..7])
end

def solr_field
"call_number_#{classification_to_field}_ssm"
end

def forward_shelfkey_field
"forward_#{classification_to_field}_shelfkey"
end

def reverse_shelfkey_field
"reverse_#{classification_to_field}_shelfkey"
end

private

def base_value
case classification
when 'LC', 'LCPER'
PsulibTraject::Processors::CallNumber::LC.new(value, serial: serial?).reduce
when 'DEWEY'
PsulibTraject::Processors::CallNumber::Dewey.new(value).reduce
PsulibTraject::Processors::CallNumber::Dewey.new(value, serial: serial?).reduce
else
PsulibTraject::Processors::CallNumber::Other.new(value).reduce
end
end

def lc?
%w[LC LCPER].include? classification
end

def dewey?
classification == 'DEWEY'
end

def classification_to_field
return 'lc' if lc?

classification.downcase
end

# @note Adding a prefix to dewey call number so to be able to use lcsort to create shelf keys
def prefix
return '' unless dewey?

DEWEY_KLASS_PREFIX
end

def shelf_key
@shelf_key ||= ShelfKey.new(value)
@shelf_key ||= ShelfKey.new(value, prefix: prefix)
end
end
end
22 changes: 7 additions & 15 deletions lib/psulib_traject/holdings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,29 @@ module PsulibTraject
class Holdings
# @param record [Marc::Record]
# @param context [Traject::Indexer::Context]
# @param classification [Array<String>] Returns call numbers only for given classifications, example: 'LC', 'LCPER',
# 'DEWEY'. Defaults to [], which return any classification.
# @return [Array<CallNumber>]
def self.call(record:, context:, classification: [])
def self.call(record:, context:)
new(
record: record,
context: context,
classification: Array(classification)
context: context
).resolve_base
end

attr_reader :record, :context, :holdings, :classification
attr_reader :record, :context, :holdings

def initialize(record:, context:, classification:)
def initialize(record:, context:)
@record = record
@context = context
@holdings = extract_holdings
@classification = classification
freeze
end

def resolve_base
return [] if online? || holdings.empty?

holdings.reject! { |call_number| call_number.exclude? || classification_not_requested?(call_number) }
holdings.reject! do |call_number|
call_number.exclude? || call_number.not_browsable?
end

if holdings.one?
holdings
Expand All @@ -45,12 +43,6 @@ def online?
context.output_hash['access_facet']&.include?('Online')
end

def classification_not_requested?(call_number)
return false if classification.empty?

!classification.include?(call_number.classification)
end

# assuming each 949 has only one subfield a, w and l
def extract_holdings
Traject::MarcExtractor.cached('949').collect_matching_lines(record) do |field, _spec, _extractor|
Expand Down
59 changes: 54 additions & 5 deletions lib/psulib_traject/processors/call_number/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Base
shanah
tbd
).join('|')
.concat("[^a-z]t|v|vols?|vyp|k\\.|h\\.|ḥ\\.|#{ORDINALS}")
.concat("[^a-z]t|v|vols?|vyp|k\\.|h\\.|ḥ\\.|t\\.|#{ORDINALS}")

ADDL_VOL_PARTS = %w(
bklet
Expand Down Expand Up @@ -60,15 +60,25 @@ class Base

ADDL_VOL_PATTERN = /[:\/]?(#{ADDL_VOL_PARTS.join('|')}).*/i.freeze
VOL_PARTS_ALL = "((index|ind)\s)?(#{VOL_PARTS}|#{MONTHS})"
VOL_PATTERN = /([.:\/(])?(n\.s\.?,? ?)?[:\/]?#{VOL_PARTS_ALL}[. -\/]?\d+([\/-]\d+)?( \d{4}([\/-]\d{4})?)?( ?suppl\.?)?/i.freeze
VOL_PATTERN = /([.:\/(])?(n\.s\.?,? ?)?[:\/]?#{VOL_PARTS_ALL}[. \-\/]?\d+([\/-]\d+)?( \d{4}([\/-]\d{4})?)?( ?suppl\.?)?/i.freeze
VOL_PATTERN_LOOSER = /([.:\/(])?(n\.s\.?,? ?)?[:\/]?#{VOL_PARTS_ALL}[. -]?\d+.*/i.freeze
VOL_PATTERN_LETTERS = /([.:\/(])?(n\.s\.?,? ?)?[:\/]?#{VOL_PARTS_ALL}[\/. -]?[A-Z]?([\/-][A-Z]+)?.*/i.freeze
FOUR_DIGIT_YEAR_REGEX = /\W *(20|19|18|17|16|15|14)\d{2}\D?$?/.freeze
LOOSE_MONTHS_REGEX = /([.:\/(])? *#{MONTHS}/i.freeze

def reduce
raise NotImplementedError
end
attr_reader :call_number,
:cutter1,
:cutter2,
:cutter3,
:doon1,
:doon2,
:doon3,
:klass,
:klass_decimal,
:klass_number,
:removeables,
:rest,
:serial

class << self
def remove_years(value)
Expand All @@ -80,5 +90,44 @@ def remove_years(value)
shortest_value
end
end

def reduce
value = remove_by_regex
value = value[0...(value.index(LOOSE_MONTHS_REGEX) || value.length)] # remove loose months

if serial
self.class.remove_years(value)
else
value.strip
end
end

private

# @note These are the original regex patterns from Stanford. However, VOL_PATTERN_LOOSER does not currently apply
# to any of our test data, so it has been commented-out of the procedure.
def remove_by_regex
case removeables
when VOL_PATTERN
call_number.slice(0...call_number.index(removeables[VOL_PATTERN])).strip
# when VOL_PATTERN_LOOSER
# call_number.slice(0...call_number.index(removeables[VOL_PATTERN_LOOSER])).strip
when /Blu-ray|DVD/
bluray_or_dvd
when VOL_PATTERN_LETTERS
call_number.slice(0...call_number.index(removeables[VOL_PATTERN_LETTERS])).strip
when ADDL_VOL_PATTERN
call_number.slice(0...call_number.index(removeables[ADDL_VOL_PATTERN])).strip
else
call_number
end
end

def bluray_or_dvd
element = removeables[ADDL_VOL_PATTERN]
return call_number unless element

call_number.slice(0...call_number.index(removeables[ADDL_VOL_PATTERN])).strip
end
end
end
Loading

0 comments on commit dd09213

Please sign in to comment.