diff --git a/api/src/services/itis-service.ts b/api/src/services/itis-service.ts index 11614ce3..9407d867 100644 --- a/api/src/services/itis-service.ts +++ b/api/src/services/itis-service.ts @@ -1,5 +1,5 @@ import axios from 'axios'; -import { sortExactMatches } from '../utils/itis-sort'; +import { sortTaxonSearchResults } from '../utils/itis-sort'; import { getLogger } from '../utils/logger'; import { TaxonSearchResult } from './taxonomy-service'; @@ -47,7 +47,7 @@ export class ItisService { const sanitizedResponse = this._sanitizeItisData(response.data.response.docs); // Sort the results to place exact matches at the top - const sortedResponse = sortExactMatches(sanitizedResponse, searchTerms); + const sortedResponse = sortTaxonSearchResults(sanitizedResponse, searchTerms); // Return only a subset of the records // More records than are returned here are requested from ITIS to help find and prioritize exact matches diff --git a/api/src/utils/itis-sort.test.ts b/api/src/utils/itis-sort.test.ts new file mode 100644 index 00000000..74e9e070 --- /dev/null +++ b/api/src/utils/itis-sort.test.ts @@ -0,0 +1,80 @@ +import { expect } from 'chai'; +import { describe } from 'mocha'; +import { TaxonSearchResult } from '../services/taxonomy-service'; +import { sortTaxonSearchResults } from './itis-sort'; + +describe.only('itis-sort', () => { + describe('sortTaxonSearchResults', () => { + it('Sorts the list when there is only 1 item', () => { + const data: TaxonSearchResult[] = [ + { + tsn: 1, + commonNames: ['Moose', 'moose'], + scientificName: 'Alces alces' + } + ]; + const searchTerms = ['Moose']; + + const result = sortTaxonSearchResults(data, searchTerms); + + expect(result.length).to.equal(data.length); + expect(result[0].tsn).to.equal(1); + }); + + it('Sorts the list when there are exact matches', () => { + const data: TaxonSearchResult[] = [ + { + tsn: 1, + commonNames: ['Goose', 'goose'], + scientificName: 'Goose goose' + }, + { + tsn: 2, + commonNames: ['Moose', 'moose'], + scientificName: 'Moose moose' + }, + { + tsn: 3, + commonNames: ['House'], + scientificName: 'House' + } + ]; + const searchTerms = ['Moose']; + + const result = sortTaxonSearchResults(data, searchTerms); + + expect(result.length).to.equal(data.length); + expect(result[0].tsn).to.equal(2); + expect(result[1].tsn).to.equal(1); + expect(result[2].tsn).to.equal(3); + }); + + it('Sorts the list when there are no exact matches', () => { + const data: TaxonSearchResult[] = [ + { + tsn: 1, + commonNames: ['Goose', 'goose'], + scientificName: 'Goose goose' + }, + { + tsn: 2, + commonNames: ['Moose', 'moose'], + scientificName: 'Moose moose' + }, + { + tsn: 3, + commonNames: ['House'], + scientificName: 'House' + } + ]; + const searchTerms = ['oose']; + + const result = sortTaxonSearchResults(data, searchTerms); + + expect(result.length).to.equal(data.length); + expect(result[0].tsn).to.equal(1); + expect(result[1].tsn).to.equal(2); + expect(result[2].tsn).to.equal(3); + }); + }); +}); diff --git a/api/src/utils/itis-sort.ts b/api/src/utils/itis-sort.ts index bd3c59ed..5e0d6038 100644 --- a/api/src/utils/itis-sort.ts +++ b/api/src/utils/itis-sort.ts @@ -3,103 +3,81 @@ import { TaxonSearchResult } from '../services/taxonomy-service'; /** * Sorts the ITIS response by how strongly records match the search terms * - * @param {ItisSolrSearchResponse[]} data + * @param {TaxonSearchResult[]} data * @param {string[]} searchTerms - * @memberof ItisService + * @return {*} {TaxonSearchResult[]} */ -export const sortExactMatches = (data: TaxonSearchResult[], searchTerms: string[]): TaxonSearchResult[] => { +export const sortTaxonSearchResults = (data: TaxonSearchResult[], searchTerms: string[]): TaxonSearchResult[] => { const searchTermsLower = searchTerms.map((item) => item.toLowerCase()); - const taxonNames = data.map((item) => { - item.scientificName = item.scientificName.toLowerCase().trim(); - item.commonNames = item.commonNames.map((name) => name.toLowerCase().trim()); - return item; - }); + const searchTermJoined = searchTermsLower.join(' '); - // Prioritize taxa where any word in the scientific or common name matches ANY of the search terms - // eg. ['Black', 'bear'] -> "Black" matches on "Black widow" - const containsAnyMatch = customSortContainsAnyMatchingSearchTerm(taxonNames, searchTermsLower); + // Caches the scientific name data + const scientificNameDataMap = new Map(); + // Caches the common name data + const commonNamesDataMap = new Map(); - // Prioritize taxa where either the scientific name or any common name CONTAINS the search terms joined - // eg. ['Black', 'bear'] -> "Black bear" matches on "American black bear" - const containsAnyMatchJoined = customSortContainsSearchTermsJoinedExact(containsAnyMatch, searchTermsLower); + // Returns the scientific name data, adding it to the cache if it doesn't exist + const getScientificNameData = (scientificName: string) => { + if (!scientificNameDataMap.has(scientificName)) { + const lowercased = scientificName.toLowerCase(); + scientificNameDataMap.set(scientificName, { words: lowercased.trim().split(' '), lowercased }); + } - // Prioritize taxa where either the scientific name or any common name is EXACTLY EQUAL to the search terms joined - // eg. ['Wolf'] -> "Wolf" is prioritized over "Forest Wolf" - const exactlyEquals = customSortEqualsSearchTermsExact(containsAnyMatchJoined, searchTermsLower); + return scientificNameDataMap.get(scientificName) as { words: string[]; lowercased: string }; + }; - return exactlyEquals; -}; + // Returns the common names data, adding it to the cache if it doesn't exist + const getCommonNamesData = (commonNames: string[]) => { + return commonNames.map((name) => { + if (!commonNamesDataMap.has(name)) { + const lowercased = name.toLowerCase(); + commonNamesDataMap.set(name, { words: lowercased.trim().split(' '), lowercased }); + } -/** - * Sorts the ITIS response to prioritize records where any word in the scientific or - * common name matches ANY of the search terms - * - * @param {ItisSolrSearchResponse[]} data - * @param {string[]} searchTerms - * @memberof ItisService - */ -export const customSortContainsAnyMatchingSearchTerm = ( - data: TaxonSearchResult[], - searchTerms: string[] -): TaxonSearchResult[] => - data.sort((a, b) => { - const checkForMatch = (item: TaxonSearchResult) => - searchTerms.some( - (searchTerm) => - item.scientificName.split(' ').includes(searchTerm) || - item.commonNames?.flatMap((name) => name.split(' ')).includes(searchTerm) - ); + return commonNamesDataMap.get(name) as { words: string[]; lowercased: string }; + }); + }; - const aInReference = checkForMatch(a); - const bInReference = checkForMatch(b); + /** + * Custom scoring function to determine how well a record matches the search terms + * + * @param {TaxonSearchResult} item + * @return {*} + */ + const calculateScore = (item: TaxonSearchResult) => { + let score = 0; - return aInReference && !bInReference ? -1 : !aInReference && bInReference ? 1 : 0; - }); + const scientificNameData = getScientificNameData(item.scientificName); + const commonNamesData = getCommonNamesData(item.commonNames); -/** - * Sorts the ITIS response to prioritize records where either the scientific name or - * any common name CONTAINS the search terms joined - * - * @param {ItisSolrSearchResponse[]} data - * @param {string[]} searchTerms - * @memberof ItisService - */ -export const customSortContainsSearchTermsJoinedExact = ( - data: TaxonSearchResult[], - searchTerms: string[] -): TaxonSearchResult[] => - data.sort((a, b) => { - const checkForMatch = (item: TaxonSearchResult) => { - return ( - item.commonNames.some((name) => name.includes(searchTerms.join(' '))) || - item.scientificName === searchTerms.join(' ') - ); - }; + // Check if any word in the scientific or common name matches ANY of the search terms + if ( + searchTermsLower.some( + (term) => scientificNameData.words.includes(term) || commonNamesData.some((data) => data.words.includes(term)) + ) + ) { + score += 1; + } - const aInReference = checkForMatch(a); - const bInReference = checkForMatch(b); + // Check if either the scientific name or any common name CONTAINS the search terms joined + if ( + scientificNameData.lowercased.includes(searchTermJoined) || + commonNamesData.some((data) => data.lowercased.includes(searchTermJoined)) + ) { + score += 2; + } - return aInReference && !bInReference ? -1 : 0; - }); + // Check if either the scientific name or any common name is EXACTLY EQUAL to the search terms joined + if ( + scientificNameData.lowercased === searchTermJoined || + commonNamesData.some((data) => data.lowercased === searchTermJoined) + ) { + score += 3; + } -/** - * Sorts the ITIS response to prioritize taxa where either the scientific name or - * any common name is EXACTLY EQUAL to the search terms joined - * - * @param {ItisSolrSearchResponse[]} data - * @param {string[]} searchTerms - * @memberof ItisService - */ -export const customSortEqualsSearchTermsExact = ( - data: TaxonSearchResult[], - searchTerms: string[] -): TaxonSearchResult[] => - data.sort((a, b) => { - const checkForMatch = (item: TaxonSearchResult) => - item.scientificName === searchTerms.join(' ') || item.commonNames.includes(searchTerms.join(' ')); - - const aInReference = checkForMatch(a); - const bInReference = checkForMatch(b); + return score; + }; - return aInReference && !bInReference ? -1 : 0; - }); + // Sort the data by the score + return data.sort((a, b) => calculateScore(b) - calculateScore(a)); +};