Skip to content

Commit

Permalink
Building off of Stanford's traject repository, determines a base call
Browse files Browse the repository at this point in the history
number for Dewey classifications.

Using the Lcsort gem, creates shelf keys for dewey numbers to sort
in forward and reverse directions.
  • Loading branch information
Banu Kutlu committed Sep 9, 2021
1 parent bc0546c commit 15de925
Show file tree
Hide file tree
Showing 14 changed files with 362 additions and 120 deletions.
13 changes: 13 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ inherit_from:
AllCops:
DisplayCopNames: true
NewCops: enable
Exclude:
- 'config/ignorethis_*.rb'

Lint/ConstantDefinitionInBlock:
Exclude:
Expand All @@ -35,7 +37,18 @@ Lint/MissingSuper:
Lint/MixedRegexpCaptureTypes:
Exclude:
- 'lib/psulib_traject/processors/call_number/lc.rb'
- 'lib/psulib_traject/processors/call_number/dewey.rb'

RSpec/ImplicitSubject:
Exclude:
- 'spec/lib/psulib_traject/macros/subjects_spec.rb'

RSpec/NestedGroups:
Exclude:
- 'spec/lib/psulib_traject/processors/call_number/lc_spec.rb'
- 'spec/lib/psulib_traject/processors/call_number/dewey_spec.rb'

RSpec/ExpectActual:
Exclude:
- 'spec/lib/psulib_traject/processors/call_number/lc_spec.rb'
- 'spec/lib/psulib_traject/processors/call_number/dewey_spec.rb'
75 changes: 20 additions & 55 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,40 +1,31 @@
# This configuration was generated by
# `rubocop --auto-gen-config --auto-gen-only-exclude`
# on 2021-08-04 20:53:36 UTC using RuboCop version 0.93.1.
# `rubocop --auto-gen-config`
# on 2021-09-02 20:53:31 UTC using RuboCop version 0.93.1.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
# versions of RuboCop, may require this file to be generated again.

# Offense count: 4
# Configuration parameters: CountComments, Max, CountAsOne, ExcludedMethods.
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods.
# ExcludedMethods: refine
Metrics/BlockLength:
Exclude:
- 'spec/**/*'
- 'lib/tasks/docker.rake'
- 'lib/tasks/incrementals.rake'
Max: 75

# Offense count: 1
# Configuration parameters: CountComments, Max, CountAsOne.
# Configuration parameters: CountComments, CountAsOne.
Metrics/ClassLength:
Exclude:
- 'lib/psulib_traject/processors/media_type.rb'
Max: 138

# Offense count: 3
# Configuration parameters: IgnoredMethods, Max.
# Configuration parameters: IgnoredMethods.
Metrics/CyclomaticComplexity:
Exclude:
- 'lib/psulib_traject/holdings.rb'
- 'lib/psulib_traject/processors/media_type.rb'
- 'lib/psulib_traject/processors/preferred_format.rb'
Max: 10

# Offense count: 2
# Configuration parameters: IgnoredMethods, Max.
# Configuration parameters: IgnoredMethods.
Metrics/PerceivedComplexity:
Exclude:
- 'lib/psulib_traject/holdings.rb'
- 'lib/psulib_traject/marc_combining_reader.rb'
Max: 10

# Offense count: 2
# Configuration parameters: MinSize.
Expand All @@ -58,50 +49,24 @@ RSpec/DescribeClass:
- 'spec/integration/access_facet_spec.rb'
- 'spec/integration/bound_with_spec.rb'

# Offense count: 56
# Cop supports --auto-correct.
RSpec/ExpectActual:
Exclude:
- 'spec/routing/**/*'
- 'spec/lib/psulib_traject/processors/call_number/lc_spec.rb'

# Offense count: 20
# Configuration parameters: AllowSubject, Max.
# Offense count: 23
# Configuration parameters: AllowSubject.
RSpec/MultipleMemoizedHelpers:
Exclude:
- 'spec/integration/macros_spec.rb'
- 'spec/integration/title_spec.rb'
- 'spec/lib/psulib_traject/holdings_spec.rb'
Max: 9

# Offense count: 2
# Configuration parameters: Max.
RSpec/NestedGroups:
# Offense count: 1
Style/CombinableLoops:
Exclude:
- 'spec/lib/psulib_traject/processors/call_number/lc_spec.rb'
- 'config/traject.rb'

# Offense count: 3
# Offense count: 4
Style/MixinUsage:
Exclude:
- 'config/traject.rb'

# Offense count: 64
# Offense count: 71
# Cop supports --auto-correct.
# Configuration parameters: AutoCorrect, Max, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
# Configuration parameters: AutoCorrect, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
# URISchemes: http, https
Layout/LineLength:
Exclude:
- 'config/schedule.rb'
- 'config/traject.rb'
- 'lib/psulib_traject/hathi_overlap_reducer.rb'
- 'lib/psulib_traject/macros.rb'
- 'lib/psulib_traject/processors/call_number/base.rb'
- 'lib/psulib_traject/processors/media_type.rb'
- 'lib/psulib_traject/processors/pub_date.rb'
- 'spec/integration/bound_with_spec.rb'
- 'spec/integration/config_spec.rb'
- 'spec/integration/macros_spec.rb'
- 'spec/integration/subjects_spec.rb'
- 'spec/integration/title_spec.rb'
- 'spec/lib/psulib_traject/holdings_spec.rb'
- 'spec/lib/psulib_traject/marc_combining_reader_spec.rb'
- 'spec/lib/psulib_traject/processors/pub_date_spec.rb'
Max: 231
1 change: 1 addition & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ GEM
yell (2.2.2)

PLATFORMS
universal-java-10
universal-java-11
universal-java-14
x86_64-darwin-20
Expand Down
17 changes: 15 additions & 2 deletions config/traject.rb
Original file line number Diff line number Diff line change
Expand Up @@ -293,15 +293,28 @@
accumulator.replace [lc_rest]
end

# Determines a base call number from the record's holdings and creates forward and reverse shelfkeys
# Call Number Browse
#
## Determines a base call number from the record's holdings and creates forward and reverse shelfkeys for LC
each_record do |record, context|
call_numbers = PsulibTraject::Holdings.call(record: record, context: context, classification: ['LC', 'LCPER'])
next if call_numbers.empty?

context.add_output('call_number_lc_ssm', *call_numbers.map(&:value))
context.add_output('forward_lc_shelfkey', *call_numbers.map(&:forward_shelfkey))
context.add_output('reverse_lc_shelfkey', *call_numbers.map(&:reverse_shelfkey))
context.add_output('keymap_struct', *call_numbers.map(&:keymap).to_json)
context.add_output('keymap_lc_struct', *call_numbers.map(&:keymap).to_json)
end

## Determines a base call number from the record's holdings and creates forward and reverse shelfkeys for DEWEY
each_record do |record, context|
call_numbers = PsulibTraject::Holdings.call(record: record, context: context, classification: ['DEWEY'])
next if call_numbers.empty?

context.add_output('call_number_dewey_ssm', *call_numbers.map(&:value))
context.add_output('forward_dewey_shelfkey', *call_numbers.map(&:forward_shelfkey))
context.add_output('reverse_dewey_shelfkey', *call_numbers.map(&:reverse_shelfkey))
context.add_output('keymap_dewey_struct', *call_numbers.map(&:keymap).to_json)
end

# Material Characteristics
Expand Down
35 changes: 30 additions & 5 deletions lib/psulib_traject/call_number.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ class CallNumber
BNDSER-HY
).freeze

DEWEY_KLASS_PREFIX = 'AAA'

attr_reader :value, :classification, :location, :item_type, :leader

def initialize(value: '', classification: '', location: '', item_type: '', leader: '')
Expand All @@ -32,11 +34,17 @@ def reverse_shelfkey
shelf_key.reverse
end

def not_browsable?
return false unless lc? || dewey?

forward_shelfkey.nil? && reverse_shelfkey.nil?
end

def keymap
{
'call_number' => value,
'forward_key' => forward_shelfkey,
'reverse_key' => reverse_shelfkey
call_number: value,
forward_key: forward_shelfkey,
reverse_key: reverse_shelfkey
}
end

Expand Down Expand Up @@ -72,14 +80,31 @@ def base_value
when 'LC', 'LCPER'
PsulibTraject::Processors::CallNumber::LC.new(value, serial: serial?).reduce
when 'DEWEY'
PsulibTraject::Processors::CallNumber::Dewey.new(value).reduce
PsulibTraject::Processors::CallNumber::Dewey.new(value, serial: serial?).reduce
else
PsulibTraject::Processors::CallNumber::Other.new(value).reduce
end
end

def lc?
%w[LC LCPER].include? classification
end

def dewey?
classification == 'DEWEY'
end

# @note Adding a prefix to dewey call number so to be able to use lcsort to create shelf keys
def prefix
if dewey?
DEWEY_KLASS_PREFIX
else
''
end
end

def shelf_key
@shelf_key ||= ShelfKey.new(value)
@shelf_key ||= ShelfKey.new(value, prefix: prefix)
end
end
end
4 changes: 3 additions & 1 deletion lib/psulib_traject/holdings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ def initialize(record:, context:, classification:)
def resolve_base
return [] if online? || holdings.empty?

holdings.reject! { |call_number| call_number.exclude? || classification_not_requested?(call_number) }
holdings.reject! do |call_number|
call_number.exclude? || call_number.not_browsable? || classification_not_requested?(call_number)
end

if holdings.one?
holdings
Expand Down
4 changes: 2 additions & 2 deletions lib/psulib_traject/processors/call_number/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Base
shanah
tbd
).join('|')
.concat("[^a-z]t|v|vols?|vyp|k\\.|h\\.|ḥ\\.|#{ORDINALS}")
.concat("[^a-z]t|v|vols?|vyp|k\\.|h\\.|ḥ\\.|t\\.|#{ORDINALS}")

ADDL_VOL_PARTS = %w(
bklet
Expand Down Expand Up @@ -60,7 +60,7 @@ class Base

ADDL_VOL_PATTERN = /[:\/]?(#{ADDL_VOL_PARTS.join('|')}).*/i.freeze
VOL_PARTS_ALL = "((index|ind)\s)?(#{VOL_PARTS}|#{MONTHS})"
VOL_PATTERN = /([.:\/(])?(n\.s\.?,? ?)?[:\/]?#{VOL_PARTS_ALL}[. -\/]?\d+([\/-]\d+)?( \d{4}([\/-]\d{4})?)?( ?suppl\.?)?/i.freeze
VOL_PATTERN = /([.:\/(])?(n\.s\.?,? ?)?[:\/]?#{VOL_PARTS_ALL}[. \-\/]?\d+([\/-]\d+)?( \d{4}([\/-]\d{4})?)?( ?suppl\.?)?/i.freeze
VOL_PATTERN_LOOSER = /([.:\/(])?(n\.s\.?,? ?)?[:\/]?#{VOL_PARTS_ALL}[. -]?\d+.*/i.freeze
VOL_PATTERN_LETTERS = /([.:\/(])?(n\.s\.?,? ?)?[:\/]?#{VOL_PARTS_ALL}[\/. -]?[A-Z]?([\/-][A-Z]+)?.*/i.freeze
FOUR_DIGIT_YEAR_REGEX = /\W *(20|19|18|17|16|15|14)\d{2}\D?$?/.freeze
Expand Down
60 changes: 59 additions & 1 deletion lib/psulib_traject/processors/call_number/dewey.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,42 @@
module PsulibTraject::Processors::CallNumber
class Dewey < Base
attr_reader :call_number,
:cutter1,
:cutter2,
:cutter3,
:doon1,
:doon2,
:doon3,
:klass_decimal,
:klass_number,
:removables,
:rest,
:serial

def initialize(call_number, serial: false)
match_data = /
(?<klass_number>\d{1,3})(?<klass_decimal>\.?\d+)?\s*
(?<doon1>(\d{1,4})(?:ST|ND|RD|TH|D)?\s+)?\s*
(?<cutter1>[.\/]?[a-zA-Z]+\d+([a-zA-Z]*(?![0-9])))?\s*
(?<removables>(?<doon2>(\d{1,4})(?:ST|ND|RD|TH|D)?\s+)?\s*
(?<cutter2>[.\/]?[a-zA-Z]+\d+([a-zA-Z]*(?![0-9])))?\s*
(?<doon3>(\d{1,4})(?:ST|ND|RD|TH|D)?\s+)?\s*
(?<cutter3>[.\/]?[a-zA-Z]+\d+([a-zA-Z]*(?![0-9])))?\s*
(?<rest>.*))
/x.match(call_number)

@call_number = call_number
match_data ||= {}
@klass_number = match_data[:klass_number]
@klass_decimal = match_data[:klass_decimal]
@doon1 = match_data[:doon1]
@cutter1 = match_data[:cutter1]
@doon2 = match_data[:doon2]
@doon3 = match_data[:doon3]
@cutter2 = match_data[:cutter2]
@cutter3 = match_data[:cutter3]
@rest = match_data[:rest]
@removables = match_data[:removables]
@serial = serial
end

Expand All @@ -15,7 +47,33 @@ def scheme
end

def reduce
call_number
value = remove_by_regex
value = value[0...(value.index(LOOSE_MONTHS_REGEX) || value.length)] # remove loose months

if serial
self.class.remove_years(value)
else
value.strip
end
end

private

# @note These are the original regex patterns from Stanford. However, VOL_PATTERN_LOOSER does not currently apply
# to any of our data, so it has been commented-out of the procedure.
def remove_by_regex
case removables
when VOL_PATTERN
call_number.slice(0...call_number.index(removables[VOL_PATTERN])).strip
# when VOL_PATTERN_LOOSER
# call_number.slice(0...call_number.index(removables[VOL_PATTERN_LOOSER])).strip
when VOL_PATTERN_LETTERS
call_number.slice(0...call_number.index(removables[VOL_PATTERN_LETTERS])).strip
when ADDL_VOL_PATTERN
call_number.slice(0...call_number.index(removables[ADDL_VOL_PATTERN])).strip
else
call_number
end
end
end
end
6 changes: 3 additions & 3 deletions lib/psulib_traject/shelf_key.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class NullKey < NullObject; end
attr_reader :call_number

# @param [String] call_number
def initialize(call_number)
@call_number = call_number
def initialize(call_number, prefix: '')
@call_number = prefix + call_number
freeze
end

Expand All @@ -31,7 +31,7 @@ def reverse
.append('~')
.join
end

private

def normalize
Expand Down
Loading

0 comments on commit 15de925

Please sign in to comment.