Skip to content

Commit

Permalink
Add removeDocument to TFIDF (#749)
Browse files Browse the repository at this point in the history
* Add removeDocument to TFIDF

* Bug

* Typo
  • Loading branch information
Hugo-ter-Doest authored Jul 6, 2024
1 parent ed002eb commit bb2569e
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
19 changes: 19 additions & 0 deletions lib/natural/tfidf/tfidf.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,25 @@ class TfIdf {
}
}

// Remove a document from the corpus
// Returns true if the document was found
// Returns false if the document was not found
removeDocument (key) {
// Find the document
const index = this.documents.findIndex(function (document) {
return document.__key === key
})
// If found, remove it
if (index > -1) {
this.documents.splice(index, 1)
// Invalidate the cache
this._idfCache = Object.create(null)
return true
}

return false
}

// If restoreCache is set to true, all terms idf scores currently cached will be recomputed.
// Otherwise, the cache will just be wiped clean
addFileSync (path, encoding, key, restoreCache) {
Expand Down
14 changes: 14 additions & 0 deletions spec/tfidf_spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -283,4 +283,18 @@ describe('tfidf', function () {
expect(tfidf.setStopwords(stopwords)).toEqual(false)
})
})

describe('Remove documents', function () {
it('should remove a document', function () {
tfidf = new TfIdf()

tfidf.addDocument('this document is about node.', 0)
tfidf.addDocument('this document isn\'t about node.', 1)

const result1 = tfidf.removeDocument(0)

Check failure on line 294 in spec/tfidf_spec.ts

View workflow job for this annotation

GitHub Actions / build (16.x)

Property 'removeDocument' does not exist on type 'TfIdf'.

Check failure on line 294 in spec/tfidf_spec.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

Property 'removeDocument' does not exist on type 'TfIdf'.

Check failure on line 294 in spec/tfidf_spec.ts

View workflow job for this annotation

GitHub Actions / build (20.x)

Property 'removeDocument' does not exist on type 'TfIdf'.
expect(result1).toEqual(true)
const result2 = tfidf.removeDocument(0)

Check failure on line 296 in spec/tfidf_spec.ts

View workflow job for this annotation

GitHub Actions / build (16.x)

Property 'removeDocument' does not exist on type 'TfIdf'.

Check failure on line 296 in spec/tfidf_spec.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

Property 'removeDocument' does not exist on type 'TfIdf'.

Check failure on line 296 in spec/tfidf_spec.ts

View workflow job for this annotation

GitHub Actions / build (20.x)

Property 'removeDocument' does not exist on type 'TfIdf'.
expect(result2).toEqual(false)
})
})
})

0 comments on commit bb2569e

Please sign in to comment.