Skip to content

Commit

Permalink
updated removeStopwords
Browse files Browse the repository at this point in the history
  • Loading branch information
fergiemcdowall committed Dec 12, 2023
1 parent d705ed4 commit a12eb44
Show file tree
Hide file tree
Showing 9 changed files with 2,768 additions and 419 deletions.
12 changes: 4 additions & 8 deletions dist/stopword.cjs.js
Original file line number Diff line number Diff line change
Expand Up @@ -14273,16 +14273,12 @@ const zul = [
'ngelinye'
];

const defaultStopwords = eng;

const removeStopwords = function (tokens, stopwords) {
stopwords = stopwords || defaultStopwords;
if (typeof tokens !== 'object' || typeof stopwords !== 'object') {
// default to english stopword list
const removeStopwords = (tokens, stopwords = eng) => {
if (!Array.isArray(tokens) || !Array.isArray(stopwords)) {
throw new Error('expected Arrays try: removeStopwords(Array[, Array])')
}
return tokens.filter(function (value) {
return stopwords.indexOf(value.toLowerCase()) === -1
})
return tokens.filter(x => !stopwords.includes(x.toLowerCase()))
};

exports._123 = _123;
Expand Down
2 changes: 1 addition & 1 deletion dist/stopword.cjs.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/stopword.esm.min.mjs

Large diffs are not rendered by default.

12 changes: 4 additions & 8 deletions dist/stopword.esm.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -14269,16 +14269,12 @@ const zul = [
'ngelinye'
];

const defaultStopwords = eng;

const removeStopwords = function (tokens, stopwords) {
stopwords = stopwords || defaultStopwords;
if (typeof tokens !== 'object' || typeof stopwords !== 'object') {
// default to english stopword list
const removeStopwords = (tokens, stopwords = eng) => {
if (!Array.isArray(tokens) || !Array.isArray(stopwords)) {
throw new Error('expected Arrays try: removeStopwords(Array[, Array])')
}
return tokens.filter(function (value) {
return stopwords.indexOf(value.toLowerCase()) === -1
})
return tokens.filter(x => !stopwords.includes(x.toLowerCase()))
};

export { _123, afr, ara, ben, bre, bul, cat, ces, dan, deu, ell, eng, epo, est, eus, fas, fin, fra, gle, glg, guj, hau, heb, hin, hrv, hun, hye, ind, ita, jpn, kor, kur, lat, lav, lgg, lggNd, lit, mar, msa, mya, nld, nob, panGu, pol, por, porBr, removeStopwords, ron, rus, slk, slv, som, sot, spa, swa, swe, tgl, tha, tur, ukr, urd, vie, yor, zho, zul };
12 changes: 4 additions & 8 deletions dist/stopword.umd.js
Original file line number Diff line number Diff line change
Expand Up @@ -14275,16 +14275,12 @@
'ngelinye'
];

const defaultStopwords = eng;

const removeStopwords = function (tokens, stopwords) {
stopwords = stopwords || defaultStopwords;
if (typeof tokens !== 'object' || typeof stopwords !== 'object') {
// default to english stopword list
const removeStopwords = (tokens, stopwords = eng) => {
if (!Array.isArray(tokens) || !Array.isArray(stopwords)) {
throw new Error('expected Arrays try: removeStopwords(Array[, Array])')
}
return tokens.filter(function (value) {
return stopwords.indexOf(value.toLowerCase()) === -1
})
return tokens.filter(x => !stopwords.includes(x.toLowerCase()))
};

exports._123 = _123;
Expand Down
2 changes: 1 addition & 1 deletion dist/stopword.umd.min.js

Large diffs are not rendered by default.

80 changes: 71 additions & 9 deletions src/stopword.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,78 @@ import { vie } from './stopwords_vie.js'
import { yor } from './stopwords_yor.js'
import { zul } from './stopwords_zul.js'

const defaultStopwords = eng

const removeStopwords = function (tokens, stopwords) {
stopwords = stopwords || defaultStopwords
if (typeof tokens !== 'object' || typeof stopwords !== 'object') {
// default to english stopword list
const removeStopwords = (tokens, stopwords = eng) => {
if (!Array.isArray(tokens) || !Array.isArray(stopwords)) {
throw new Error('expected Arrays try: removeStopwords(Array[, Array])')
}
return tokens.filter(function (value) {
return stopwords.indexOf(value.toLowerCase()) === -1
})
return tokens.filter(x => !stopwords.includes(x.toLowerCase()))
}

export { removeStopwords, _123, afr, ara, hye, eus, ben, bre, bul, cat, zho, hrv, ces, dan, nld, eng, epo, est, fin, fra, glg, deu, ell, guj, hau, heb, hin, hun, ind, gle, ita, jpn, kor, kur, lat, lav, lit, lgg, lggNd, msa, mar, mya, nob, fas, pol, por, porBr, panGu, ron, rus, slk, slv, som, sot, spa, swa, swe, tha, tgl, tur, urd, ukr, vie, yor, zul }
export {
removeStopwords,
_123,
afr,
ara,
ben,
bre,
bul,
cat,
ces,
dan,
deu,
ell,
eng,
epo,
est,
eus,
fas,
fin,
fra,
gle,
glg,
guj,
hau,
heb,
hin,
hrv,
hun,
hye,
ind,
ita,
jpn,
kor,
kur,
lat,
lav,
lgg,
lggNd,
lit,
mar,
msa,
mya,
nld,
nob,
panGu,
pol,
por,
porBr,
ron,
rus,
slk,
slv,
som,
sot,
spa,
swa,
swe,
tgl,
tha,
tur,
ukr,
urd,
vie,
yor,
zho,
zul
}
1 change: 0 additions & 1 deletion src/stopwords__123.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

const num123 = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
const numFas = ['۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹', '۰']
const numKor = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
Expand Down
Loading

0 comments on commit a12eb44

Please sign in to comment.