diff --git a/plugins/arSolrPlugin/config/arSolrPluginConfiguration.class.php b/plugins/arSolrPlugin/config/arSolrPluginConfiguration.class.php index b169412dc2..db4e2bd767 100644 --- a/plugins/arSolrPlugin/config/arSolrPluginConfiguration.class.php +++ b/plugins/arSolrPlugin/config/arSolrPluginConfiguration.class.php @@ -38,5 +38,23 @@ public function initialize() $enabledModules = sfConfig::get('sf_enabled_modules'); $enabledModules[] = 'arSolrPlugin'; sfConfig::set('sf_enabled_modules', $enabledModules); + + if ($this->configuration instanceof sfApplicationConfiguration) { + // Use config cache in application context + $configCache = $this->configuration->getConfigCache(); + $configCache->registerConfigHandler(self::$configPath, 'arSolrConfigHandler'); + + self::$config = include $configCache->checkConfig(self::$configPath); + } else { + // Live parsing (task context) + self::reloadConfig($this->configuration); + } + } + + public static function reloadConfig($configuration) + { + $configPaths = $configuration->getConfigPaths(self::$configPath); + + self::$config = arSolrConfigHandler::getConfiguration($configPaths); } } diff --git a/plugins/arSolrPlugin/config/search.yml b/plugins/arSolrPlugin/config/search.yml index 2bfb71882c..86ab9a8c29 100644 --- a/plugins/arSolrPlugin/config/search.yml +++ b/plugins/arSolrPlugin/config/search.yml @@ -25,203 +25,234 @@ all: analyzer: default: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, preserved_asciifolding] # This is a special analyzer for autocomplete searches. It's used only # in some fields as it can make the index very big. autocomplete: - tokenizer: whitespace + tokenizer: solr.WhitespaceTokenizerFactory filter: [lowercase, engram, preserved_asciifolding] + # lanuage stopwords arabic: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, arabic_stop, preserved_asciifolding] armenian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, armenian_stop, preserved_asciifolding] basque: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, basque_stop, preserved_asciifolding] brazilian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, brazilian_stop, preserved_asciifolding] bulgarian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, bulgarian_stop, preserved_asciifolding] catalan: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, catalan_stop, preserved_asciifolding] czech: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, czech_stop, preserved_asciifolding] danish: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, danish_stop, preserved_asciifolding] dutch: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, dutch_stop, preserved_asciifolding] english: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, english_stop, preserved_asciifolding] finnish: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, finnish_stop, preserved_asciifolding] french: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, french_stop, preserved_asciifolding, french_elision] galician: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, galician_stop, preserved_asciifolding] german: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, german_stop, preserved_asciifolding] greek: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, greek_stop, preserved_asciifolding] hindi: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, hindi_stop, preserved_asciifolding] hungarian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, hungarian_stop, preserved_asciifolding] indonesian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, indonesian_stop, preserved_asciifolding] italian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, italian_stop, preserved_asciifolding] norwegian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, norwegian_stop, preserved_asciifolding] persian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, persian_stop, preserved_asciifolding] portuguese: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, portuguese_stop, preserved_asciifolding] romanian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, romanian_stop, preserved_asciifolding] russian: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, russian_stop, preserved_asciifolding] spanish: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, spanish_stop, preserved_asciifolding] swedish: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, swedish_stop, preserved_asciifolding] turkish: - tokenizer: standard + tokenizer: solr.StandardTokenizerFactory filter: [lowercase, turkish_stop, preserved_asciifolding] + # TODO: normalizer, look into solr.PatternReplaceCharFilterFactory normalizer: # Custom normalizer that lowercases text, removes punctation, and # does ascii folding for more natural alphabetic sorting alphasort: - type: custom - filter: [lowercase, preserved_asciifolding] - char_filter: [punctuation_filter] + class: custom + filter: [lowercase, preserved_asciifolding, punctuation_filter] filter: + # TODO: add engram to filter lists engram: - type: edgeNGram - min_gram: 3 - max_gram: 10 + class: solr.EdgeNGramFilterFactory + minGramSize: 3 + maxGramSize: 10 + # TODO: french_elision french_elision: - type: elision + class: elision articles: [l, m, t, qu, n, s, j, d, c, jusqu, quoiqu, lorsqu, puisqu] preserved_asciifolding: - type: asciifolding - preserve_original: true + class: solr.ASCIIFoldingFilterFactory + preserveOriginal: true + lowercase: + class: solr.LowerCaseFilterFactory # To make 'stopwords' works with other token filters the analyzers can't have # standard type and the 'stopwords' needs to be added as a token filter too arabic_stop: - type: stop - stopwords: _arabic_ + class: solr.StopFilterFactory + stopwords: stopwords/arabic.txt + ignoreCase: true armenian_stop: - type: stop - stopwords: _armenian_ + class: solr.StopFilterFactory + stopwords: stopwords/armenian.txt + ignoreCase: true basque_stop: - type: stop - stopwords: _basque_ + class: solr.StopFilterFactory + stopwords: stopwords/basque.txt + ignoreCase: true brazilian_stop: - type: stop - stopwords: _brazilian_ + class: solr.StopFilterFactory + stopwords: stopwords/brazilian.txt + ignoreCase: true bulgarian_stop: - type: stop - stopwords: _bulgarian_ + class: solr.StopFilterFactory + stopwords: stopwords/bulgarian.txt + ignoreCase: true catalan_stop: - type: stop - stopwords: _catalan_ + class: solr.StopFilterFactory + stopwords: stopwords/catalan.txt + ignoreCase: true czech_stop: - type: stop - stopwords: _czech_ + class: solr.StopFilterFactory + stopwords: stopwords/czech.txt + ignoreCase: true danish_stop: - type: stop - stopwords: _danish_ + class: solr.StopFilterFactory + stopwords: stopwords/danish.txt + ignoreCase: true dutch_stop: - type: stop - stopwords: _dutch_ + class: solr.StopFilterFactory + stopwords: stopwords/dutch.txt + ignoreCase: true english_stop: - type: stop - stopwords: _english_ + class: solr.StopFilterFactory + stopwords: stopwords/english.txt + ignoreCase: true finnish_stop: - type: stop - stopwords: _finnish_ + class: solr.StopFilterFactory + stopwords: stopwords/finnish.txt + ignoreCase: true french_stop: - type: stop - stopwords: _french_ + class: solr.StopFilterFactory + stopwords: stopwords/french.txt + ignoreCase: true galician_stop: - type: stop - stopwords: _galician_ + class: solr.StopFilterFactory + stopwords: stopwords/galician.txt + ignoreCase: true german_stop: - type: stop - stopwords: _german_ + class: solr.StopFilterFactory + stopwords: stopwords/german.txt + ignoreCase: true greek_stop: - type: stop - stopwords: _greek_ + class: solr.StopFilterFactory + stopwords: stopwords/greek.txt + ignoreCase: true hindi_stop: - type: stop - stopwords: _hindi_ + class: solr.StopFilterFactory + stopwords: stopwords/hindi.txt + ignoreCase: true hungarian_stop: - type: stop - stopwords: _hungarian_ + class: solr.StopFilterFactory + stopwords: stopwords/hungarian.txt + ignoreCase: true indonesian_stop: - type: stop - stopwords: _indonesian_ + class: solr.StopFilterFactory + stopwords: stopwords/indonesian.txt + ignoreCase: true italian_stop: - type: stop - stopwords: _italian_ + class: solr.StopFilterFactory + stopwords: stopwords/italian.txt + ignoreCase: true norwegian_stop: - type: stop - stopwords: _norwegian_ + class: solr.StopFilterFactory + stopwords: stopwords/norwegian.txt + ignoreCase: true persian_stop: - type: stop - stopwords: _persian_ + class: solr.StopFilterFactory + stopwords: stopwords/persian.txt + ignoreCase: true portuguese_stop: - type: stop - stopwords: _portuguese_ + class: solr.StopFilterFactory + stopwords: stopwords/portuguese.txt + ignoreCase: true romanian_stop: - type: stop - stopwords: _romanian_ + class: solr.StopFilterFactory + stopwords: stopwords/romanian.txt + ignoreCase: true russian_stop: - type: stop - stopwords: _russian_ + class: solr.StopFilterFactory + stopwords: stopwords/russian.txt + ignoreCase: true spanish_stop: - type: stop - stopwords: _spanish_ + class: solr.StopFilterFactory + stopwords: stopwords/spanish.txt + ignoreCase: true swedish_stop: - type: stop - stopwords: _swedish_ + class: solr.StopFilterFactory + stopwords: stopwords/swedish.txt + ignoreCase: true turkish_stop: - type: stop - stopwords: _turkish_ + class: solr.StopFilterFactory + stopwords: stopwords/turkish.txt + ignoreCase: true char_filter: - # This char_filter is added to all analyzers when the index # is created in arElasticSearchPlugin initialize when the # app_markdown_enabled setting is set to true. Ideally, the @@ -231,13 +262,13 @@ all: # we're replacing the following punctuation chars by spaces: # *_#![]()->`+\~:|^= strip_md: - type: pattern_replace + class: solr.PatternReplaceFilterFactory pattern: '[\*_#!\[\]\(\)\->`\+\\~:\|\^=]' replacement: ' ' # Strip punctation from a string punctuation_filter: - type: pattern_replace + class: solr.PatternReplaceFilterFactory pattern: '["''_\-\?!\.\(\)\[\]#\*`:;]' replacement: '' diff --git a/plugins/arSolrPlugin/config/stopwords/arabic.txt b/plugins/arSolrPlugin/config/stopwords/arabic.txt new file mode 100644 index 0000000000..c55e4dc3d9 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/arabic.txt @@ -0,0 +1,123 @@ +# Arabic Stopwords +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/plugins/arSolrPlugin/config/stopwords/armenian.txt b/plugins/arSolrPlugin/config/stopwords/armenian.txt new file mode 100644 index 0000000000..62897d62fa --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/armenian.txt @@ -0,0 +1,46 @@ +# Armenian Stopwords +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/plugins/arSolrPlugin/config/stopwords/basque.txt b/plugins/arSolrPlugin/config/stopwords/basque.txt new file mode 100644 index 0000000000..7672d6b40b --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/basque.txt @@ -0,0 +1,99 @@ +# Basque Stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/plugins/arSolrPlugin/config/stopwords/brazilian.txt b/plugins/arSolrPlugin/config/stopwords/brazilian.txt new file mode 100644 index 0000000000..d137affcae --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/brazilian.txt @@ -0,0 +1,129 @@ +# Brazilian Stopwords +a +ainda +alem +ambas +ambos +antes +ao +aonde +aos +apos +aquele +aqueles +as +assim +com +como +contra +contudo +cuja +cujas +cujo +cujos +da +das +de +dela +dele +deles +demais +depois +desde +desta +deste +dispoe +dispoem +diversa +diversas +diversos +do +dos +durante +e +ela +elas +ele +eles +em +entao +entre +essa +essas +esse +esses +esta +estas +este +estes +ha +isso +isto +logo +mais +mas +mediante +menos +mesma +mesmas +mesmo +mesmos +na +nas +nao +nas +nem +nesse +neste +nos +o +os +ou +outra +outras +outro +outros +pelas +pelas +pelo +pelos +perante +pois +por +porque +portanto +proprio +propios +quais +qual +qualquer +quando +quanto +que +quem +quer +se +seja +sem +sendo +seu +seus +sob +sobre +sua +suas +tal +tambem +teu +teus +toda +todas +todo +todos +tua +tuas +tudo +um +uma +umas +uns diff --git a/plugins/arSolrPlugin/config/stopwords/bulgarian.txt b/plugins/arSolrPlugin/config/stopwords/bulgarian.txt new file mode 100644 index 0000000000..f6d015ca6b --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/bulgarian.txt @@ -0,0 +1,194 @@ +# Bulgarian Stopwords +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/plugins/arSolrPlugin/config/stopwords/catalan.txt b/plugins/arSolrPlugin/config/stopwords/catalan.txt new file mode 100644 index 0000000000..9aecace985 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/catalan.txt @@ -0,0 +1,220 @@ +# Catalan Stopwords +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/plugins/arSolrPlugin/config/stopwords/czech.txt b/plugins/arSolrPlugin/config/stopwords/czech.txt new file mode 100644 index 0000000000..078c7c476d --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/czech.txt @@ -0,0 +1,173 @@ +# Czech Stopwords +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/plugins/arSolrPlugin/config/stopwords/danish.txt b/plugins/arSolrPlugin/config/stopwords/danish.txt new file mode 100644 index 0000000000..77d6e63160 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/danish.txt @@ -0,0 +1,95 @@ +# Danish Stopwords +og +i +jeg +det +at +en +den +til +er +som +på +de +med +han +af +for +ikke +der +var +mig +sig +men +et +har +om +vi +min +havde +ham +hun +nu +over +da +fra +du +ud +sin +dem +os +op +man +hans +hvor +eller +hvad +skal +selv +her +alle +vil +blev +kunne +ind +når +være +dog +noget +ville +jo +deres +efter +ned +skulle +denne +end +dette +mit +også +under +have +dig +anden +hende +mine +alt +meget +sit +sine +vor +mod +disse +hvis +din +nogle +hos +blive +mange +ad +bliver +hendes +været +thi +jer +sådan diff --git a/plugins/arSolrPlugin/config/stopwords/dutch.txt b/plugins/arSolrPlugin/config/stopwords/dutch.txt new file mode 100644 index 0000000000..aabab0a734 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/dutch.txt @@ -0,0 +1,105 @@ +# Dutch Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +de +en +van +ik +te +dat +die +in +een +hij +het +niet +zijn +is +was +op +aan +met +als +voor +had +er +maar +om +hem +dan +zou +of +wat +mijn +men +dit +zo +door +over +ze +zich +bij +ook +tot +je +mij +uit +der +daar +haar +naar +heb +hoe +heeft +hebben +deze +u +want +nog +zal +me +zij +nu +ge +geen +omdat +iets +worden +toch +al +waren +veel +meer +doen +toen +moet +ben +zonder +kan +hun +dus +alles +onder +ja +eens +hier +wie +werd +altijd +doch +wordt +wezen +kunnen +ons +zelf +tegen +na +reeds +wil +kon +niets +uw +iemand +geweest +andere diff --git a/plugins/arSolrPlugin/config/stopwords/english.txt b/plugins/arSolrPlugin/config/stopwords/english.txt new file mode 100644 index 0000000000..8550f17203 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/english.txt @@ -0,0 +1,178 @@ +# English Stopwords + | This file is distributed under the BSD License. + | See https://snowballstem.org/license.html + | Also see https://opensource.org/licenses/bsd-license.html +i +me +my +myself +we +our +ours +ourselves +you +your +yours +yourself +yourselves +he +him +his +himself +she +her +hers +herself +it +its +itself +they +them +their +theirs +themselves +what +which +who +whom +this +that +these +those +am +is +are +was +were +be +been +being +have +has +had +having +do +does +did +doing +would +should +could +ought +i'm +you're +he's +she's +it's +we're +they're +i've +you've +we've +they've +i'd +you'd +he'd +she'd +we'd +they'd +i'll +you'll +he'll +she'll +we'll +they'll +isn't +aren't +wasn't +weren't +hasn't +haven't +hadn't +doesn't +don't +didn't +won't +wouldn't +shan't +shouldn't +can't +cannot +couldn't +mustn't +let's +that's +who's +what's +here's +there's +when's +where's +why's +how's +a +an +the +and +but +if +or +because +as +until +while +of +at +by +for +with +about +against +between +into +through +during +before +after +above +below +to +from +up +down +in +out +on +off +over +under +again +further +then +once +here +there +when +where +why +how +all +any +both +each +few +more +most +other +some +such +no +nor +not +only +own +same +so +than +too +very diff --git a/plugins/arSolrPlugin/config/stopwords/finnish.txt b/plugins/arSolrPlugin/config/stopwords/finnish.txt new file mode 100644 index 0000000000..d571b6efde --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/finnish.txt @@ -0,0 +1,71 @@ +# Finnish Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +olla +olen +olet +on +olemme +olette +ovat +ole +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet +en +et +ei +emme +ette +eivät +minä +sinä +hän +me +te +he +tämä +tuo +se +nämä +nuo +ne +kuka +ketkä +mikä +mitkä +joka +jotka +että +ja +jos +koska +kuin +mutta +niin +sekä +sillä +tai +vaan +vai +vaikka +kanssa +mukaan +noin +poikki +yli +kun +nyt +itse diff --git a/plugins/arSolrPlugin/config/stopwords/french.txt b/plugins/arSolrPlugin/config/stopwords/french.txt new file mode 100644 index 0000000000..a10fdc623b --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/french.txt @@ -0,0 +1,158 @@ +# French Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +au +aux +avec +ce +ces +dans +de +des +du +elle +en +et +eux +il +je +la +le +leur +lui +ma +mais +me +même +mes +moi +mon +ne +nos +notre +nous +on +ou +par +pas +pour +qu +que +qui +sa +se +ses +sur +ta +te +tes +toi +ton +tu +un +une +vos +votre +vous +c +d +j +l +à +m +n +s +t +y +étée +étées +étant +suis +es +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fussions +fussiez +fussent +ayant +eu +eue +eues +eus +ai +avons +avez +ont +aurai +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent +ceci +cela +celà +cet +cette +ici +ils +les +leurs +quel +quels +quelle +quelles +sans +soi diff --git a/plugins/arSolrPlugin/config/stopwords/galican.txt b/plugins/arSolrPlugin/config/stopwords/galican.txt new file mode 100644 index 0000000000..1898110e83 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/galican.txt @@ -0,0 +1,161 @@ +# Galican Stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/plugins/arSolrPlugin/config/stopwords/german.txt b/plugins/arSolrPlugin/config/stopwords/german.txt new file mode 100644 index 0000000000..0566fad032 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/german.txt @@ -0,0 +1,235 @@ +# German Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +aber +alle +allem +allen +aller +alles +als +also +am +an +ander +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders +auch +auf +aus +bei +bin +bis +bist +da +damit +dann +der +den +des +dem +die +das +daß +derselbe +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe +dazu +dein +deine +deinem +deinen +deiner +deines +denn +derer +dessen +dich +dir +du +dies +diese +diesem +diesen +dieser +dieses +doch +dort +durch +ein +eine +einem +einen +einer +eines +einig +einige +einigem +einigen +einiger +einiges +einmal +er +ihn +ihm +es +etwas +euer +eure +eurem +euren +eurer +eures +für +gegen +gewesen +hab +habe +haben +hat +hatte +hatten +hier +hin +hinter +ich +mich +mir +ihr +ihre +ihrem +ihren +ihrer +ihres +euch +im +in +indem +ins +ist +jede +jedem +jeden +jeder +jedes +jene +jenem +jenen +jener +jenes +jetzt +kann +kein +keine +keinem +keinen +keiner +keines +können +könnte +machen +man +manche +manchem +manchen +mancher +manches +mein +meine +meinem +meinen +meiner +meines +mit +muss +musste +nach +nicht +nichts +noch +nun +nur +ob +oder +ohne +sehr +sein +seine +seinem +seinen +seiner +seines +selbst +sich +sie +ihnen +sind +so +solche +solchem +solchen +solcher +solches +soll +sollte +sondern +sonst +über +um +und +uns +unse +unsem +unsen +unser +unses +unter +viel +vom +von +vor +während +war +waren +warst +was +weg +weil +weiter +welche +welchem +welchen +welcher +welches +wenn +werde +werden +wie +wieder +will +wir +wird +wirst +wo +wollen +wollte +würde +würden +zu +zum +zur +zwar +zwischen diff --git a/plugins/arSolrPlugin/config/stopwords/greek.txt b/plugins/arSolrPlugin/config/stopwords/greek.txt new file mode 100644 index 0000000000..5f6a98fb5c --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/greek.txt @@ -0,0 +1,76 @@ +# Greek Stopwords +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/plugins/arSolrPlugin/config/stopwords/hindi.txt b/plugins/arSolrPlugin/config/stopwords/hindi.txt new file mode 100644 index 0000000000..e429ebbeb8 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/hindi.txt @@ -0,0 +1,231 @@ +# Hindi Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/plugins/arSolrPlugin/config/stopwords/hungarian.txt b/plugins/arSolrPlugin/config/stopwords/hungarian.txt new file mode 100644 index 0000000000..70253740b3 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/hungarian.txt @@ -0,0 +1,203 @@ +# Hungarian Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/plugins/arSolrPlugin/config/stopwords/indonesian.txt b/plugins/arSolrPlugin/config/stopwords/indonesian.txt new file mode 100644 index 0000000000..02d43dae4a --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/indonesian.txt @@ -0,0 +1,358 @@ +# Indonesia Stopwords +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/plugins/arSolrPlugin/config/stopwords/italian.txt b/plugins/arSolrPlugin/config/stopwords/italian.txt new file mode 100644 index 0000000000..5bea18560d --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/italian.txt @@ -0,0 +1,283 @@ +# Italian Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +ad +al +allo +ai +agli +all +agl +alla +alle +con +col +coi +da +dal +dallo +dai +dagli +dall +dagl +dalla +dalle +di +del +dello +dei +degli +dell +degl +della +delle +in +nel +nello +nei +negli +nell +negl +nella +nelle +su +sul +sullo +sui +sugli +sull +sugl +sulla +sulle +per +tra +contro +io +tu +lui +lei +noi +voi +loro +mio +mia +miei +mie +tuo +tua +tuoi +tue +suo +sua +suoi +sue +nostro +nostra +nostri +nostre +vostro +vostra +vostri +vostre +mi +ti +ci +vi +lo +la +li +le +gli +ne +il +un +uno +una +ma +ed +se +perché +anche +come +dov +dove +che +chi +cui +non +più +quale +quanto +quanti +quanta +quante +quello +quelli +quella +quelle +questo +questi +questa +queste +si +tutto +tutti +a +c +e +i +l +o +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/plugins/arSolrPlugin/config/stopwords/norwegian.txt b/plugins/arSolrPlugin/config/stopwords/norwegian.txt new file mode 100644 index 0000000000..fa27a0741b --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/norwegian.txt @@ -0,0 +1,176 @@ +# Norwegian Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +og +i +jeg +det +at +en +et +den +til +er +som +på +de +med +han +av +ikke +ikkje +der +så +var +meg +seg +men +ett +har +om +vi +min +mitt +ha +hadde +hun +nå +over +da +ved +fra +du +ut +sin +dem +oss +opp +man +kan +hans +hvor +eller +hva +skal +selv +sjøl +her +alle +vil +bli +ble +blei +blitt +kunne +inn +når +være +kom +noen +noe +ville +dere +deres +kun +ja +etter +ned +skulle +denne +for +deg +si +sine +sitt +mot +å +meget +hvorfor +dette +disse +uten +hvordan +ingen +din +ditt +blir +samme +hvilken +hvilke +sånn +inni +mellom +vår +hver +hvem +vors +hvis +både +bare +enn +fordi +før +mange +også +slik +vært +båe +begge +siden +dykk +dykkar +dei +deira +deires +deim +di +då +eg +ein +eit +eitt +elles +honom +hjå +ho +hoe +henne +hennar +hennes +hoss +hossen +ingi +inkje +korleis +korso +kva +kvar +kvarhelst +kven +kvi +kvifor +me +medan +mi +mine +mykje +no +nokon +noka +nokor +noko +nokre +sia +sidan +so +somt +somme +um +upp +vere +vore +verte +vort +varte +vart diff --git a/plugins/arSolrPlugin/config/stopwords/persian.txt b/plugins/arSolrPlugin/config/stopwords/persian.txt new file mode 100644 index 0000000000..e4ef1e87c9 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/persian.txt @@ -0,0 +1,312 @@ +# Persian Stopwords +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/plugins/arSolrPlugin/config/stopwords/portuguese.txt b/plugins/arSolrPlugin/config/stopwords/portuguese.txt new file mode 100644 index 0000000000..9a504e0fbb --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/portuguese.txt @@ -0,0 +1,207 @@ +# Portuguese Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +de +a +o +que +e +do +da +em +um +para +com +não +uma +os +no +se +na +por +mais +as +dos +como +mas +ao +ele +das +à +seu +sua +ou +quando +muito +nos +já +eu +também +só +pelo +pela +até +isso +ela +entre +depois +sem +mesmo +aos +seus +quem +nas +me +esse +eles +você +essa +num +nem +suas +meu +às +minha +numa +pelos +elas +qual +nós +lhe +deles +essas +esses +pelas +este +dele +tu +te +vocês +vos +lhes +meus +minhas +teu +tua +teus +tuas +nosso +nossa +nossos +nossas +dela +delas +esta +estes +estas +aquele +aquela +aqueles +aquelas +isto +aquilo +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/plugins/arSolrPlugin/config/stopwords/romanian.txt b/plugins/arSolrPlugin/config/stopwords/romanian.txt new file mode 100644 index 0000000000..73aa47567c --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/romanian.txt @@ -0,0 +1,258 @@ +# Romanian Stopwords +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +acești +aceşti +aceștia +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aș +aş +așadar +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăștia +ăştia +asupra +ați +aţi +au +avea +avem +aveți +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câți +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîți +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deși +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +ești +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiți +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îți +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulți +mulţi +ne +nicăieri +nici +nimeni +niște +nişte +noastră +noastre +noi +noștri +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +și +şi +sînt +sîntem +sînteți +sînteţi +spre +sub +sunt +suntem +sunteți +sunteţi +ta +tăi +tale +tău +te +ți +ţi +ție +ţie +tine +toată +toate +tot +toți +toţi +totuși +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voștri +voştri +vostru +vouă +vreo +vreun diff --git a/plugins/arSolrPlugin/config/stopwords/russian.txt b/plugins/arSolrPlugin/config/stopwords/russian.txt new file mode 100644 index 0000000000..998825837b --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/russian.txt @@ -0,0 +1,163 @@ +# Russian Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +и +в +во +не +что +он +на +я +с +со +как +а +то +все +она +так +его +но +да +ты +к +у +же +вы +за +бы +по +только +ее +мне +было +вот +от +меня +еще +нет +о +из +ему +теперь +когда +даже +ну +вдруг +ли +если +уже +или +ни +быть +был +него +до +вас +нибудь +опять +уж +вам +сказал +ведь +там +потом +себя +ничего +ей +может +они +тут +где +есть +надо +ней +для +мы +тебя +их +чем +была +сам +чтоб +без +будто +человек +чего +раз +тоже +себе +под +жизнь +будет +ж +тогда +кто +этот +говорил +того +потому +этого +какой +совсем +ним +здесь +этом +один +почти +мой +тем +чтобы +нее +кажется +сейчас +были +куда +зачем +сказать +всех +никогда +сегодня +можно +при +наконец +два +об +другой +хоть +после +над +больше +тот +через +эти +нас +про +всего +них +какая +много +разве +сказала +три +эту +моя +впрочем +хорошо +свою +этой +перед +иногда +лучше +чуть +том +нельзя +такой +им +более +всегда +конечно +всю +между diff --git a/plugins/arSolrPlugin/config/stopwords/spanish.txt b/plugins/arSolrPlugin/config/stopwords/spanish.txt new file mode 100644 index 0000000000..6ca3c53e15 --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/spanish.txt @@ -0,0 +1,312 @@ +# Spanish Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +de +la +que +el +en +y +a +los +del +se +las +por +un +para +con +no +una +su +al +lo +como +más +pero +sus +le +ya +o +este +sí +porque +esta +entre +cuando +muy +sin +sobre +también +me +hasta +hay +donde +quien +desde +todo +nos +durante +todos +uno +les +ni +contra +otros +ese +eso +ante +ellos +e +esto +mí +antes +algunos +qué +unos +yo +otro +otras +otra +él +tanto +esa +estos +mucho +quienes +nada +muchos +cual +poco +ella +estar +estas +algunas +algo +nosotros +mi +mis +tú +te +ti +tu +tus +ellas +nosotras +vosotros +vosotras +os +mío +mía +míos +mías +tuyo +tuya +tuyos +tuyas +suyo +suya +suyos +suyas +nuestro +nuestra +nuestros +nuestras +vuestro +vuestra +vuestros +vuestras +esos +esas +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened diff --git a/plugins/arSolrPlugin/config/stopwords/swedish.txt b/plugins/arSolrPlugin/config/stopwords/swedish.txt new file mode 100644 index 0000000000..1f0318f40e --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/swedish.txt @@ -0,0 +1,118 @@ +# Swedish Stopwords +# This file is distributed under the BSD License. +# See https://snowballstem.org/license.html +# Also see https://opensource.org/licenses/bsd-license.html +och +det +att +i +en +jag +hon +som +han +på +den +med +var +sig +för +så +till +är +men +ett +om +hade +de +av +icke +mig +du +henne +då +sin +nu +har +inte +hans +honom +skulle +hennes +där +min +man +ej +vid +kunde +något +från +ut +när +efter +upp +vi +dem +vara +vad +över +än +dig +kan +sina +här +ha +mot +alla +under +någon +eller +allt +mycket +sedan +ju +denna +själv +detta +åt +utan +varit +hur +ingen +mitt +ni +bli +blev +oss +din +dessa +några +deras +blir +mina +samma +vilken +er +sådan +vår +blivit +dess +inom +mellan +sådant +varför +varje +vilka +ditt +vem +vilket +sitt +sådana +vart +dina +vars +vårt +våra +ert +era +vilkas diff --git a/plugins/arSolrPlugin/config/stopwords/turkish.txt b/plugins/arSolrPlugin/config/stopwords/turkish.txt new file mode 100644 index 0000000000..fec07a605b --- /dev/null +++ b/plugins/arSolrPlugin/config/stopwords/turkish.txt @@ -0,0 +1,210 @@ +# Turkish Stopwords +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/plugins/arSolrPlugin/lib/arSolrPlugin.class.php b/plugins/arSolrPlugin/lib/arSolrPlugin.class.php index 417f2630e4..fd5eb2937b 100644 --- a/plugins/arSolrPlugin/lib/arSolrPlugin.class.php +++ b/plugins/arSolrPlugin/lib/arSolrPlugin.class.php @@ -63,6 +63,8 @@ public function __construct(array $options = []) { parent::__construct(); + $this->config = arSolrPluginConfiguration::$config; + $SOLR_COLLECTION = 'atom'; $this->solrClientOptions = [ 'hostname' => 'solr1', @@ -323,17 +325,34 @@ protected function initialize() && isset($this->config['index']['configuration']['analysis']['char_filter']['strip_md']) ) { foreach ($this->config['index']['configuration']['analysis']['analyzer'] as $key => $analyzer) { - $filters = ['strip_md']; + if (sfConfig::get('app_diacritics')) { + // TODO: create diacritics_lowercase class + $filters = array_push($filters, ['class' => 'diacritics_lowercase']); + } - if ($this->config['index']['configuration']['analysis']['analyzer'][$key]['char_filter']) { - $filters = array_merge($filters, $this->config['index']['configuration']['analysis']['analyzer'][$key]['char_filter']); + $charFilters = []; + foreach ($this->config['index']['configuration']['analysis']['char_filter'] as $filter) { + $charFilter = array_diff_key($filter, array_flip(['type'])); + array_push($charFilters, $charFilter); } - if (sfConfig::get('app_diacritics')) { - $filters = array_merge($filters, ['diacritics_lowercase']); + $filters = []; + foreach ($analyzer['filter'] as $f) { + $filter = array_diff_key($this->config['index']['configuration']['analysis']['filter'][$f], array_flip(['type', 'preserve_original'])); + array_push($filters, $filter); } - $this->config['index']['configuration']['analysis']['analyzer'][$key]['char_filter'] = $filters; + $query = ['add-field-type' => [ + 'name' => $key, + 'class' => 'solr.TextField', + 'analyzer' => [ + 'tokenizer' => ['class' => $analyzer['tokenizer']], + 'charFilters' => $charFilters, + 'filters' => $filters, + ] + ]]; + $url = $this->solrBaseUrl.'/solr/'.$this->solrClientOptions['collection'].'/schema/'; + arSolrPlugin::makeHttpRequest($url, 'POST', $query); } }