Skip to content
This repository has been archived by the owner on Oct 4, 2022. It is now read-only.

LIN-623 Fix keyphrase in slug researcher for compound keyphrases with hyphens #19

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ const expectedResults = {
},
urlKeyword: {
isApplicable: true,
score: 6,
resultText: "<a href='https://yoa.st/33o' target='_blank'>Keyphrase in slug</a>: (Part of) your keyphrase does not appear in the slug. <a href='https://yoa.st/33p' target='_blank'>Change that</a>!",
score: 9,
resultText: "<a href='https://yoa.st/33o' target='_blank'>Keyphrase in slug</a>: Great work!",
},
urlLength: {
isApplicable: true,
Expand Down
16 changes: 15 additions & 1 deletion packages/yoastseo/spec/researches/keywordCountInUrlSpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,27 @@ describe( "test to check url for keyword", function() {
expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 2, percentWordMatches: 100 } );
} );

it( "returns no matches for dashed words", function() {
it( "returns no matches for differently dashed words", function() {
const paper = new Paper( "", { url: "url-with-key-word", keyword: "keyword" } );
const researcher = new Researcher( paper );
researcher.addResearchData( "morphology", morphologyData );
expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 1, percentWordMatches: 0 } );
} );

it( "returns matches for equally dashed words", function() {
const paper = new Paper( "", { url: "url-with-key-word", keyword: "key-word" } );
const researcher = new Researcher( paper );
researcher.addResearchData( "morphology", morphologyData );
expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 2, percentWordMatches: 100 } );
} );

it( "returns matches for equally dashed words with more words around", function() {
const paper = new Paper( "", { url: "url-with-key-word", keyword: "exciting key-word exciting" } );
const researcher = new Researcher( paper );
researcher.addResearchData( "morphology", morphologyData );
expect( urlKeyword( paper, researcher ) ).toEqual( { keyphraseLength: 4, percentWordMatches: 50 } );
} );

it( "returns matches with diacritics", function() {
const paper = new Paper( "", { url: "url-with-key-word", keyword: "këyword" } );
const researcher = new Researcher( paper );
Expand Down
47 changes: 46 additions & 1 deletion packages/yoastseo/src/researches/keywordCountInUrl.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,51 @@

import { findTopicFormsInString } from "./findKeywordFormsInString.js";

/**
* Trims the modifier from compound words and makes it a separate keyphrase entry.
* E.g., for a keyphrase "modern pop-art" the current version of the morphological research generates forms
* [ [ modern, moderner, ...], [ pop-art, pop-arts, ...] ]. This is problematic for the research that searches for
* keyphrase in slug, because it treats compound words as 2 words. I.e., the research is looking for forms `pop` and `art`.
* This function takes the default-generated morphological forms and splits the compound words into two, such that
* the forms that serve input to the keyphraseCountInUrl researcher are
* [ [ modern, moderner, ...], [ pop ], [ art, arts, ...] ].
*
* @param {Array} topicForms The keyphraseForms and synonymsForms of the paper.
*
* @returns {Array} topicForms with split compounds.
*/
function dehyphenateKeyphraseForms( topicForms ) {
const dehyphenatedKeyphraseForms = [];

topicForms.keyphraseForms.forEach( function( lemma ) {
const firstWord = lemma[ 0 ];

if ( firstWord.indexOf( "-" ) === -1 ) {
dehyphenatedKeyphraseForms.push( lemma );
return;
}

const unchangedPart = firstWord.split( "-" )[ 0 ];

dehyphenatedKeyphraseForms.push( [ unchangedPart ] );

const dehyphenatedLemma = [];
lemma.forEach( function( wordInLemma ) {
if ( wordInLemma.indexOf( unchangedPart ) === 0 ) {
const trimmedWordInLemma = wordInLemma.slice( unchangedPart.length + 1, wordInLemma.length );
dehyphenatedLemma.push( trimmedWordInLemma );
}
} );

dehyphenatedKeyphraseForms.push( dehyphenatedLemma );
} );

topicForms.keyphraseForms = dehyphenatedKeyphraseForms;

return topicForms;
}


/**
* Matches the keyword in the URL. Replaces dashes and underscores with whitespaces and uses whitespace as wordboundary.
*
Expand All @@ -11,7 +56,7 @@ import { findTopicFormsInString } from "./findKeywordFormsInString.js";
* @returns {int} Number of times the keyword is found.
*/
export default function( paper, researcher ) {
const topicForms = researcher.getResearch( "morphology" );
const topicForms = dehyphenateKeyphraseForms( researcher.getResearch( "morphology" ) );
const slug = paper.getUrl().replace( /[-_]/ig, " " );

const keyphraseInSlug = findTopicFormsInString( topicForms, slug, false, paper.getLocale() );
Expand Down