From a38f3134f5013f4671b3e1b42b391284998e2abf Mon Sep 17 00:00:00 2001 From: cristianpb Date: Sun, 25 Apr 2021 02:20:35 -0500 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=8E=A8=20Restructure=20score=20functi?= =?UTF-8?q?on=20to=20include=20explain=20attribute?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/src/models/entities.ts | 27 +++++++++++++++- backend/src/score.spec.ts | 8 ++--- backend/src/score.ts | 58 ++++++++++++++++------------------ 3 files changed, 58 insertions(+), 35 deletions(-) diff --git a/backend/src/models/entities.ts b/backend/src/models/entities.ts index a077e01e..91a82a3b 100644 --- a/backend/src/models/entities.ts +++ b/backend/src/models/entities.ts @@ -131,6 +131,31 @@ export interface Modification { review?: Review; }; +export interface Score { + score?: number; + birthDate?: number + deathDate?: number + name?: number; + sex?: number; + birthLocation?: number; + deathLocation?: number; +} + +export interface Explain { + name: { + particles: boolean; + nameSwap: boolean; + first: { + levenshteinScore: number; + soundexScore: number; + } + last: { + levenshteinScore: number; + soundexScore: number; + } + } +} + export interface Person { score: number; source: string; @@ -138,7 +163,7 @@ export interface Person { id: string; name: Name; sex: 'M'|'F'; - scores: ScoreResult; + scores: Score; birth: { date: string; location: Location; diff --git a/backend/src/score.spec.ts b/backend/src/score.spec.ts index 379d5aea..bc9a1050 100644 --- a/backend/src/score.spec.ts +++ b/backend/src/score.spec.ts @@ -47,8 +47,8 @@ describe('score.ts - Score function', () => { } } }); - expect(score).to.contain.all.keys(['score', 'birthDate', 'birthLocation', 'name']) - expect(score.score).to.equal(0.73); + expect(score.scores).to.contain.all.keys(['score', 'birthDate', 'birthLocation', 'name']) + expect(score.scores.score).to.equal(0.73); }); @@ -96,8 +96,8 @@ describe('score.ts - Score function', () => { } } }); - expect(score).to.contain.all.keys(['score', 'birthLocation', 'name']) - expect(score.birthLocation).to.contain.all.keys(['score', 'city', 'code']) + expect(score.scores).to.contain.all.keys(['score', 'birthLocation', 'name']) + expect(score.scores.birthLocation).to.contain.all.keys(['score', 'city', 'code']) }); diff --git a/backend/src/score.ts b/backend/src/score.ts index eb225e3d..20c70778 100644 --- a/backend/src/score.ts +++ b/backend/src/score.ts @@ -1,5 +1,5 @@ import { RequestBody } from './models/requestInput'; -import { Person, Location, Name, RequestField, ScoreParams } from './models/entities'; +import { Person, Location, Name, RequestField, ScoreParams, Score, Explain } from './models/entities'; import { distance } from 'fastest-levenshtein'; import damlev from 'damlev'; import fuzz from 'fuzzball'; @@ -631,48 +631,44 @@ scoreSex = timer(scoreSex, 'scoreSex',100); scoreDate = timer(scoreDate, 'scoreDate',1000); export class ScoreResult { - score: number; - birthDate?: number - deathDate?: number - name?: number; - sex?: number; - birthLocation?: number; - deathLocation?: number; + scores: Score; + explain: Explain; constructor(request: RequestBody, result: Person, params: ScoreParams = {}) { + this.scores = {} const pruneScore = params.pruneScore !== undefined ? params.pruneScore : defaultPruneScore if (request.birthDate) { - this.birthDate = scoreDate(request.birthDate, result.birth.date, params.dateFormat, + this.scores.birthDate = scoreDate(request.birthDate, result.birth.date, params.dateFormat, result.birth && result.birth.location && result.birth.location.countryCode && (result.birth.location.countryCode !== 'FRA') ); } if (request.firstName || request.lastName) { - if ((pruneScore < scoreReduce(this, true)) || !this.birthDate) { + if ((pruneScore < scoreReduce(this.scores, true)) || !this.scores.birthDate) { if (result.sex && result.sex === 'F') { if (request.legalName) { - this.name = scoreName({first: request.firstName, last: [request.lastName, request.legalName]}, result.name, 'F'); + this.scores.name = scoreName({first: request.firstName, last: [request.lastName, request.legalName]}, result.name, 'F'); } else { - this.name = scoreName({first: request.firstName, last: request.lastName}, result.name, 'F'); + this.scores.name = scoreName({first: request.firstName, last: request.lastName}, result.name, 'F'); } } else { - this.name = scoreName({first: request.firstName, last: request.lastName}, result.name, 'M'); + this.scores.name = scoreName({first: request.firstName, last: request.lastName}, result.name, 'M'); } } else { - this.score = 0 + this.scores.score = 0 } } if (request.sex) { - if (pruneScore < scoreReduce(this, true)) { - this.sex = scoreSex(request.sex, result.sex); + if (pruneScore < scoreReduce(this.scores, true)) { + this.scores.sex = scoreSex(request.sex, result.sex); } else { - this.score = 0 + this.scores.score = 0 } } else if (request.firstName && firstNameSexMismatch(request.firstName, result.name.first as string)) { - this.sex = firstNameSexPenalty; + this.scores.sex = firstNameSexPenalty; } // birthLocation - if (pruneScore < scoreReduce(this, true)) { - this.birthLocation = scoreLocation({ + if (pruneScore < scoreReduce(this.scores, true)) { + this.scores.birthLocation = scoreLocation({ city: request.birthCity, code: request.birthLocationCode, departmentCode: request.birthDepartment, @@ -681,20 +677,20 @@ export class ScoreResult { longitude: request.birthGeoPoint?.longitude }, result.birth.location); } else { - this.score = 0 + this.scores.score = 0 } if (request.deathDate || request.lastSeenAliveDate) { - if (pruneScore < scoreReduce(this, true)) { - this.deathDate = scoreDate(request.deathDate || `>${request.lastSeenAliveDate}`, result.death.date, params.dateFormat, + if (pruneScore < scoreReduce(this.scores, true)) { + this.scores.deathDate = scoreDate(request.deathDate || `>${request.lastSeenAliveDate}`, result.death.date, params.dateFormat, result.death && result.death.location && result.death.location.countryCode && (result.death.location.countryCode !== 'FRA') ); } else { - this.score = 0 + this.scores.score = 0 } } if ((request.deathCity || request.deathLocationCode || request.deathCountry || request.deathDepartment || request.deathGeoPoint)) { - if (pruneScore < scoreReduce(this, true)) { - this.deathLocation = scoreLocation({ + if (pruneScore < scoreReduce(this.scores, true)) { + this.scores.deathLocation = scoreLocation({ city: request.deathLocation, code: request.deathLocationCode, departmentCode: request.deathDepartment, @@ -703,11 +699,11 @@ export class ScoreResult { longitude: request.deathGeoPoint?.longitude }, result.death.location); } else { - this.score = 0 + this.scores.score = 0 } } - if (!this.score) { - this.score = scoreReduce(this, true) + if (!this.scores.score) { + this.scores.score = scoreReduce(this.scores, true) } } } @@ -728,7 +724,9 @@ export const scoreResults = (request: RequestBody, results: Person[], params: Sc .filter((result:any) => result.score > 0) .map((result:any) => { try { - result.scores = new ScoreResult(request, result, params); + const scoreResult = new ScoreResult(request, result, params); + result.scores = scoreResult.scores + result.explain = scoreResult.explain const perfectScores = ((result.scores.name && result.scores.name.score >= perfectScoreThreshold) ? 1 : 0) + ((result.scores.birtDate && result.scores.birthDate.score === 1) ? 1 : 0) + From 194c0d9b6f1df74ebc6d95591341fbfbb35ca17c Mon Sep 17 00:00:00 2001 From: cristianpb Date: Sun, 25 Apr 2021 04:47:21 -0500 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9C=A8=20Add=20levenshtein=20score=20to?= =?UTF-8?q?=20explain=20for=20name?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/src/models/entities.ts | 2 +- backend/src/score.ts | 38 ++++++++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/backend/src/models/entities.ts b/backend/src/models/entities.ts index 91a82a3b..d44f7ac7 100644 --- a/backend/src/models/entities.ts +++ b/backend/src/models/entities.ts @@ -142,7 +142,7 @@ export interface Score { } export interface Explain { - name: { + name?: { particles: boolean; nameSwap: boolean; first: { diff --git a/backend/src/score.ts b/backend/src/score.ts index 20c70778..c14d129d 100644 --- a/backend/src/score.ts +++ b/backend/src/score.ts @@ -182,9 +182,10 @@ const firstNameSexMismatch = (firstNameA: string, firstNameB: string): boolean = return /^.?(e|a)$/.test(firstA.replace(firstB, '')) || /^.?(e|a)$/.test(firstB.replace(firstA, '')); } -let scoreName = (nameA: Name, nameB: Name, sex: string): any => { +let scoreName = (nameA: Name, nameB: Name, sex: string, explainScore: boolean): any => { if ((!nameA.first && !nameA.last) || (!nameB.first && !nameB.last)) { return blindNameScore } let score:any; + let explain:any; const firstA = firstNameNorm(nameA.first as string|string[]); const lastA = lastNameNorm(nameA.last as string|string[]); const firstB = firstNameNorm(nameB.first as string|string[]); @@ -197,6 +198,14 @@ let scoreName = (nameA: Name, nameB: Name, sex: string): any => { let firstFirstA; let firstFirstB; let scoreFirstALastB; let fuzzScore; const scoreFirst = round(scoreToken(firstA, firstB)); const scoreLast = round(scoreToken(lastA, lastB)); + explain = { + first: { + levenshtein: scoreFirst + }, + last: { + levenshtein: scoreLast + } + } score = round(Math.max( scoreFirst * (scoreLast ** thisLastNamePenalty), Math.max( @@ -261,7 +270,11 @@ let scoreName = (nameA: Name, nameB: Name, sex: string): any => { score.particleScore = particleScore; } } - return score; + if (explainScore) { + return {score, explain}; + } else { + return {score}; + } } const scoreToken = (tokenA: string|string[], tokenB: string|string[], option?: any): number => { @@ -637,6 +650,10 @@ export class ScoreResult { constructor(request: RequestBody, result: Person, params: ScoreParams = {}) { this.scores = {} const pruneScore = params.pruneScore !== undefined ? params.pruneScore : defaultPruneScore + // TODO: use input parameter + // const explainScore = params.explainScore !== undefined ? params.explainScore : false; + const explainScore = true; + if (explainScore) this.explain = {} if (request.birthDate) { this.scores.birthDate = scoreDate(request.birthDate, result.birth.date, params.dateFormat, result.birth && result.birth.location && result.birth.location.countryCode && (result.birth.location.countryCode !== 'FRA') @@ -646,12 +663,18 @@ export class ScoreResult { if ((pruneScore < scoreReduce(this.scores, true)) || !this.scores.birthDate) { if (result.sex && result.sex === 'F') { if (request.legalName) { - this.scores.name = scoreName({first: request.firstName, last: [request.lastName, request.legalName]}, result.name, 'F'); + const scoreNameResult = scoreName({first: request.firstName, last: [request.lastName, request.legalName]}, result.name, 'F', explainScore); + this.scores.name = scoreNameResult.score + if (explainScore) this.explain.name = scoreNameResult.explain } else { - this.scores.name = scoreName({first: request.firstName, last: request.lastName}, result.name, 'F'); + const scoreNameResult = scoreName({first: request.firstName, last: request.lastName}, result.name, 'M', explainScore); + this.scores.name = scoreNameResult.score + if (explainScore) this.explain.name = scoreNameResult.explain } } else { - this.scores.name = scoreName({first: request.firstName, last: request.lastName}, result.name, 'M'); + const scoreNameResult = scoreName({first: request.firstName, last: request.lastName}, result.name, 'M', explainScore); + this.scores.name = scoreNameResult.score + if (explainScore) this.explain.name = scoreNameResult.explain } } else { this.scores.score = 0 @@ -710,6 +733,9 @@ export class ScoreResult { export const scoreResults = (request: RequestBody, results: Person[], params: ScoreParams): Person[] => { const pruneScore = params.pruneScore !== undefined ? params.pruneScore : defaultPruneScore + // TODO: use input parameter + // const explainScore = params.explainScore !== undefined ? params.explainScore : false; + const explainScore = true; const candidateNumber = params.candidateNumber || 1; let maxScore = 0; let perfectScoreNumber = 0; @@ -726,7 +752,7 @@ export const scoreResults = (request: RequestBody, results: Person[], params: Sc try { const scoreResult = new ScoreResult(request, result, params); result.scores = scoreResult.scores - result.explain = scoreResult.explain + if (explainScore) result.explain = scoreResult.explain const perfectScores = ((result.scores.name && result.scores.name.score >= perfectScoreThreshold) ? 1 : 0) + ((result.scores.birtDate && result.scores.birthDate.score === 1) ? 1 : 0) +