diff --git a/.env b/.env index 5afd327..105d50b 100644 --- a/.env +++ b/.env @@ -92,28 +92,25 @@ APP_TORRENT_WANTED_FTP_FOLDER=/yggtracker APP_TORRENT_WANTED_FTP_APPROVED_ONLY=1 # Enable search index for torrent name -APP_INDEX_TORRENT_NAME=1 +APP_INDEX_TORRENT_NAME_ENABLED=1 # Enable search index for torrent info hash v1 -APP_INDEX_TORRENT_HASH_V1=1 +APP_INDEX_TORRENT_HASH_V1_ENABLED=1 # Enable search index for torrent info hash v2 -APP_INDEX_TORRENT_HASH_V2=1 +APP_INDEX_TORRENT_HASH_V2_ENABLED=1 # Enable search index for torrent filenames -APP_INDEX_TORRENT_FILENAMES=1 +APP_INDEX_TORRENT_FILENAMES_ENABLED=1 # Enable search index for torrent source -APP_INDEX_TORRENT_SOURCE=1 +APP_INDEX_TORRENT_SOURCE_ENABLED=1 # Enable search index for torrent comment -APP_INDEX_TORRENT_COMMENT=1 +APP_INDEX_TORRENT_COMMENT_ENABLED=1 # Enable search index for words length greater than N chars APP_INDEX_WORD_LENGTH_MIN=3 # Enable search index for words length not greater than N chars -APP_INDEX_WORD_LENGTH_MAX=255 - -# Enable search index transliteration @TODO -APP_INDEX_TRANSLITERATION=1 \ No newline at end of file +APP_INDEX_WORD_LENGTH_MAX=255 \ No newline at end of file diff --git a/README.md b/README.md index 6d5d9a7..0d6d65e 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,7 @@ git checkout -b my-pr-branch-name * [SVG icons](https://icons.getbootstrap.com) * [Scrapper](https://github.com/medariox/scrapeer) / [Composer Edition](https://github.com/YGGverse/scrapeer) * [Bencode Library](https://github.com/Rhilip/Bencode) +* [Transliteration Library](https://github.com/ashtokalo/php-translit) * [Identicons](https://github.com/dmester/jdenticon-php) #### Support diff --git a/composer.json b/composer.json index c0d3e3c..a91504c 100644 --- a/composer.json +++ b/composer.json @@ -9,6 +9,7 @@ "php": ">=8.1", "ext-ctype": "*", "ext-iconv": "*", + "ashtokalo/php-translit": "^0.2.0", "doctrine/annotations": "^2.0", "doctrine/doctrine-bundle": "^2.10", "doctrine/doctrine-migrations-bundle": "^3.2", diff --git a/composer.lock b/composer.lock index e3eddf2..babbb48 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,51 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "3770ffcd80695bc10a22f8ece4f68d1f", + "content-hash": "4d930a43cf9a80e1622029c4a4048a6b", "packages": [ + { + "name": "ashtokalo/php-translit", + "version": "0.2.0", + "source": { + "type": "git", + "url": "https://github.com/ashtokalo/php-translit.git", + "reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/ashtokalo/php-translit/zipball/8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3", + "reference": "8ced36cbcd0ed8befb1388ef51935eb53bcfe5b3", + "shasum": "" + }, + "require": { + "php": ">=7.0" + }, + "require-dev": { + "phpunit/phpunit": "~7.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "ashtokalo\\translit\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "PHP library to convert text from one script to another.", + "keywords": [ + "latinization", + "romanization", + "translit", + "transliteration" + ], + "support": { + "issues": "https://github.com/ashtokalo/php-translit/issues", + "source": "https://github.com/ashtokalo/php-translit/tree/0.2.0" + }, + "time": "2022-09-26T09:05:24+00:00" + }, { "name": "doctrine/annotations", "version": "2.0.1", diff --git a/config/services.yaml b/config/services.yaml index a63214a..bb2762b 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -21,15 +21,14 @@ parameters: app.torrent.wanted.ftp.enabled: '%env(APP_TORRENT_WANTED_FTP_ENABLED)%' app.torrent.wanted.ftp.folder: '%env(APP_TORRENT_WANTED_FTP_FOLDER)%' app.torrent.wanted.ftp.approved: '%env(APP_TORRENT_WANTED_FTP_APPROVED_ONLY)%' - app.index.torrent.name: '%env(APP_INDEX_TORRENT_NAME)%' - app.index.torrent.filenames: '%env(APP_INDEX_TORRENT_FILENAMES)%' - app.index.torrent.hash.v1: '%env(APP_INDEX_TORRENT_HASH_V1)%' - app.index.torrent.hash.v2: '%env(APP_INDEX_TORRENT_HASH_V2)%' - app.index.torrent.source: '%env(APP_INDEX_TORRENT_SOURCE)%' - app.index.torrent.comment: '%env(APP_INDEX_TORRENT_COMMENT)%' + app.index.torrent.name.enabled: '%env(APP_INDEX_TORRENT_NAME_ENABLED)%' + app.index.torrent.filenames.enabled: '%env(APP_INDEX_TORRENT_FILENAMES_ENABLED)%' + app.index.torrent.hash.v1.enabled: '%env(APP_INDEX_TORRENT_HASH_V1_ENABLED)%' + app.index.torrent.hash.v2.enabled: '%env(APP_INDEX_TORRENT_HASH_V2_ENABLED)%' + app.index.torrent.source.enabled: '%env(APP_INDEX_TORRENT_SOURCE_ENABLED)%' + app.index.torrent.comment.enabled: '%env(APP_INDEX_TORRENT_COMMENT_ENABLED)%' app.index.word.length.min: '%env(APP_INDEX_WORD_LENGTH_MIN)%' app.index.word.length.max: '%env(APP_INDEX_WORD_LENGTH_MAX)%' - app.index.transliteration: '%env(APP_INDEX_TRANSLITERATION)%' services: # default configuration for services in *this* file diff --git a/src/Controller/TorrentController.php b/src/Controller/TorrentController.php index 713f020..0191315 100644 --- a/src/Controller/TorrentController.php +++ b/src/Controller/TorrentController.php @@ -229,8 +229,6 @@ public function search( $activityService ); - // - // Init request $query = $request->get('query') ? explode(' ', urldecode($request->get('query'))) : []; $page = $request->get('page') ? (int) $request->get('page') : 1; @@ -883,13 +881,12 @@ public function submit( $file->getPathName(), - (bool) $this->getParameter('app.index.torrent.name'), - (bool) $this->getParameter('app.index.torrent.filenames'), - (bool) $this->getParameter('app.index.torrent.hash.v1'), - (bool) $this->getParameter('app.index.torrent.hash.v2'), - (bool) $this->getParameter('app.index.torrent.source'), - (bool) $this->getParameter('app.index.torrent.comment'), - (bool) $this->getParameter('app.index.transliteration'), + (bool) $this->getParameter('app.index.torrent.name.enabled'), + (bool) $this->getParameter('app.index.torrent.filenames.enabled'), + (bool) $this->getParameter('app.index.torrent.hash.v1.enabled'), + (bool) $this->getParameter('app.index.torrent.hash.v2.enabled'), + (bool) $this->getParameter('app.index.torrent.source.enabled'), + (bool) $this->getParameter('app.index.torrent.comment.enabled'), (int) $this->getParameter('app.index.word.length.min'), (int) $this->getParameter('app.index.word.length.max'), @@ -2453,13 +2450,12 @@ public function reindex( { // Reindex keywords $torrentService->reindexTorrentKeywordsAll( - (bool) $this->getParameter('app.index.torrent.name'), - (bool) $this->getParameter('app.index.torrent.filenames'), - (bool) $this->getParameter('app.index.torrent.hash.v1'), - (bool) $this->getParameter('app.index.torrent.hash.v2'), - (bool) $this->getParameter('app.index.torrent.source'), - (bool) $this->getParameter('app.index.torrent.comment'), - (bool) $this->getParameter('app.index.transliteration'), + (bool) $this->getParameter('app.index.torrent.name.enabled'), + (bool) $this->getParameter('app.index.torrent.filenames.enabled'), + (bool) $this->getParameter('app.index.torrent.hash.v1.enabled'), + (bool) $this->getParameter('app.index.torrent.hash.v2.enabled'), + (bool) $this->getParameter('app.index.torrent.source.enabled'), + (bool) $this->getParameter('app.index.torrent.comment.enabled'), (int) $this->getParameter('app.index.word.length.min'), (int) $this->getParameter('app.index.word.length.max') ); diff --git a/src/Repository/TorrentRepository.php b/src/Repository/TorrentRepository.php index f960e90..1ccd425 100644 --- a/src/Repository/TorrentRepository.php +++ b/src/Repository/TorrentRepository.php @@ -73,27 +73,36 @@ private function getTorrentsQueryByFilter( int $userId, array $keywords, array $locales, - ?bool $sensitive = null, - ?bool $approved = null, - ?bool $status = null, + ?bool $sensitive = null, + ?bool $approved = null, + ?bool $status = null ): \Doctrine\ORM\QueryBuilder { $query = $this->createQueryBuilder('t'); if ($keywords) { - $andKeywords = $query->expr()->andX(); - foreach ($keywords as $i => $keyword) { - $keyword = mb_strtolower($keyword); // all keywords stored in lowercase + // Make query to the index case insensitive + $keyword = mb_strtolower($keyword); - $andKeywords->add("t.keywords LIKE :keyword{$i}"); + // Init OR condition for each word form + $orKeywords = $query->expr()->orX(); + $orKeywords->add("t.keywords LIKE :keyword{$i}"); $query->setParameter(":keyword{$i}", "%{$keyword}%"); - } - $query->andWhere($andKeywords); + // Generate word forms for each transliteration locale #33 + foreach ($this->generateWordForms($keyword) as $j => $wordForm) + { + $orKeywords->add("t.keywords LIKE :keyword{$i}{$j}"); + $query->setParameter(":keyword{$i}{$j}", "%{$wordForm}%"); + } + + // Append AND condition + $query->andWhere($orKeywords); + } } if ($locales) @@ -153,4 +162,59 @@ private function getTorrentsQueryByFilter( return $query; } + + // Word forms generator to improve search results + // e.g. transliteration rules for latin filenames + private function generateWordForms( + string $keyword, + // #33 supported locales: + // https://github.com/ashtokalo/php-translit + array $transliteration = [ + 'be', + 'bg', + 'el', + 'hy', + 'kk', + 'mk', + 'ru', + 'ka', + 'uk' + ], + // Additional char forms + array $charForms = + [ + 'c' => 'k', + 'k' => 'c', + ] + ): array + { + $wordForms = []; + + // Apply transliteration + foreach ($transliteration as $locale) + { + $wordForms[] = \ashtokalo\translit\Translit::object()->convert( + $keyword, + $locale + ); + } + + // Apply char forms + foreach ($wordForms as $wordForm) + { + foreach ($charForms as $from => $to) + { + $wordForms[] = str_replace( + $from, + $to, + $wordForm + ); + } + } + + // Remove duplicates + return array_unique( + $wordForms + ); + } } diff --git a/src/Service/TorrentService.php b/src/Service/TorrentService.php index 7546e1b..62cd0c5 100644 --- a/src/Service/TorrentService.php +++ b/src/Service/TorrentService.php @@ -64,7 +64,6 @@ public function readTorrentFileByTorrentId( public function generateTorrentKeywordsByString( string $string, - bool $transliteration, int $wordLengthMin, int $wordLengthMax, ): array @@ -97,11 +96,6 @@ public function generateTorrentKeywordsByString( { // Apply case insensitive search conversion $words[$key] = mb_strtolower($value); - - if ($transliteration) - { - // @TODO - } } } @@ -129,7 +123,6 @@ public function generateTorrentKeywordsByTorrentFilepath( bool $extractSource, bool $extractComment, - bool $wordTransliteration, int $wordLengthMin, int $wordLengthMax @@ -147,7 +140,6 @@ public function generateTorrentKeywordsByTorrentFilepath( $keywords, $this->generateTorrentKeywordsByString( $name, - $wordTransliteration, $wordLengthMin, $wordLengthMax ) @@ -163,7 +155,6 @@ public function generateTorrentKeywordsByTorrentFilepath( $keywords, $this->generateTorrentKeywordsByString( $list['path'], - $wordTransliteration, $wordLengthMin, $wordLengthMax ) @@ -179,7 +170,6 @@ public function generateTorrentKeywordsByTorrentFilepath( $keywords, $this->generateTorrentKeywordsByString( $source, - $wordTransliteration, $wordLengthMin, $wordLengthMax ) @@ -195,7 +185,6 @@ public function generateTorrentKeywordsByTorrentFilepath( $keywords, $this->generateTorrentKeywordsByString( $comment, - $wordTransliteration, $wordLengthMin, $wordLengthMax ) @@ -301,7 +290,6 @@ public function add( bool $extractSource, bool $extractComment, - bool $wordTransliteration, int $wordLengthMin, int $wordLengthMax, @@ -326,7 +314,6 @@ public function add( $extractInfoHashV2, $extractSource, $extractComment, - $wordTransliteration, $wordLengthMin, $wordLengthMax ), @@ -623,7 +610,6 @@ public function reindexTorrentKeywordsAll( bool $extractInfoHashV2, bool $extractSource, bool $extractComment, - bool $wordTransliteration, int $wordLengthMin, int $wordLengthMax ): void @@ -643,7 +629,6 @@ public function reindexTorrentKeywordsAll( $extractInfoHashV2, $extractSource, $extractComment, - $wordTransliteration, $wordLengthMin, $wordLengthMax )