Skip to content

Commit

Permalink
Merge "REST: Proof of concept REST API search"
Browse files Browse the repository at this point in the history
  • Loading branch information
jenkins-bot authored and Gerrit Code Review committed Dec 11, 2024
2 parents 95a1d86 + 657ec6c commit d07259e
Show file tree
Hide file tree
Showing 3 changed files with 290 additions and 1 deletion.
13 changes: 12 additions & 1 deletion repo/rest-api/routes.dev.json
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@
[]
[
{
"path": "/wikibase/v0/search/{entity_type}",
"method": "GET",
"factory": "Wikibase\\Repo\\RestApi\\RouteHandlers\\SearchEntitiesRouteHandler::factory"
},
{
"path": "/wikibase/v0/suggest/{entity_type}",
"method": "GET",
"factory": "Wikibase\\Repo\\RestApi\\RouteHandlers\\SuggestEntitiesRouteHandler::factory"
}
]
165 changes: 165 additions & 0 deletions repo/rest-api/src/RouteHandlers/SearchEntitiesRouteHandler.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
<?php declare( strict_types=1 );

namespace Wikibase\Repo\RestApi\RouteHandlers;

use ISearchResultSet;
use MediaWiki\Languages\LanguageFactory;
use MediaWiki\MediaWikiServices;
use MediaWiki\Registration\ExtensionRegistry;
use MediaWiki\Rest\Handler;
use MediaWiki\Rest\HttpException;
use MediaWiki\Rest\Response;
use MediaWiki\Rest\SimpleHandler;
use MediaWiki\Rest\StringStream;
use SearchResult;
use Wikibase\DataModel\Entity\EntityIdParser;
use Wikibase\DataModel\Entity\Item;
use Wikibase\DataModel\Entity\Property;
use Wikibase\DataModel\Term\TermTypes;
use Wikibase\Lib\Store\EntityNamespaceLookup;
use Wikibase\Lib\Store\FallbackLabelDescriptionLookupFactory;
use Wikibase\Repo\WikibaseRepo;
use Wikibase\Search\Elastic\EntityResult;
use Wikimedia\ParamValidator\ParamValidator;

/**
* @license GPL-2.0-or-later
*/
class SearchEntitiesRouteHandler extends SimpleHandler {

private const ENTITY_TYPE_PATH_PARAM = 'entity_type';
private const SEARCH_QUERY_PARAM = 'search';
// This is not actually used in the code here anywhere. MediaWiki picks up "uselang" on its own.
private const LANGUAGE_QUERY_PARAM = 'uselang';

private const ENTITY_TYPE_MAP = [
'items' => Item::ENTITY_TYPE,
'properties' => Property::ENTITY_TYPE,
];
private const RESULTS_LIMIT = 5;

private \SearchEngineFactory $searchEngineFactory;
private EntityNamespaceLookup $entityNamespaceLookup;
private FallbackLabelDescriptionLookupFactory $labelDescriptionLookupFactory;
private LanguageFactory $languageFactory;
private EntityIdParser $entityIdParser;

public function __construct(
\SearchEngineFactory $searchEngineFactory,
EntityNamespaceLookup $entityNamespaceLookup,
FallbackLabelDescriptionLookupFactory $labelDescriptionLookupFactory,
LanguageFactory $languageFactory,
EntityIdParser $entityIdParser
) {
$this->searchEngineFactory = $searchEngineFactory;
$this->entityNamespaceLookup = $entityNamespaceLookup;
$this->labelDescriptionLookupFactory = $labelDescriptionLookupFactory;
$this->languageFactory = $languageFactory;
$this->entityIdParser = $entityIdParser;
}

public static function factory(): Handler {
$mediaWikiServices = MediaWikiServices::getInstance();

return new self(
$mediaWikiServices->getSearchEngineFactory(),
WikibaseRepo::getEntityNamespaceLookup(),
WikibaseRepo::getFallbackLabelDescriptionLookupFactory(),
$mediaWikiServices->getLanguageFactory(),
WikibaseRepo::getEntityIdParser()
);
}

public function getParamSettings(): array {
return [
self::ENTITY_TYPE_PATH_PARAM => [
self::PARAM_SOURCE => 'path',
ParamValidator::PARAM_TYPE => array_keys( self::ENTITY_TYPE_MAP ),
ParamValidator::PARAM_REQUIRED => true,
ParamValidator::PARAM_ISMULTI => false,
],
self::SEARCH_QUERY_PARAM => [
self::PARAM_SOURCE => 'query',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_REQUIRED => true,
ParamValidator::PARAM_ISMULTI => false,
],
self::LANGUAGE_QUERY_PARAM => [
self::PARAM_SOURCE => 'query',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_REQUIRED => true,
ParamValidator::PARAM_ISMULTI => false,
],
];
}

public function run( string $entityType ): Response {
if ( !ExtensionRegistry::getInstance()->isLoaded( 'WikibaseCirrusSearch' ) ) {
throw new HttpException( 'This endpoint does not work because WikibaseCirrusSearch is not installed.' );
}

$searchTerm = $this->getValidatedParams()[self::SEARCH_QUERY_PARAM];
$results = $this->fullTextSearch( $entityType, $searchTerm );

$httpResponse = $this->getResponseFactory()->create();
$httpResponse->setHeader( 'Content-Type', 'application/json' );
$httpResponse->setBody( new StringStream(
json_encode( [ 'results' => $results ], JSON_UNESCAPED_SLASHES )
) );

return $httpResponse;
}

private function fullTextSearch( string $entityType, string $searchTerm ): array {
$searchEngine = $this->searchEngineFactory->create();
$searchEngine->setNamespaces( [ $this->entityNamespaceLookup->getEntityNamespace( self::ENTITY_TYPE_MAP[$entityType] ) ] );
$searchEngine->setLimitOffset( self::RESULTS_LIMIT );
$resultSet = $searchEngine->searchText( $searchTerm )->getValue();
if ( !( $resultSet instanceof ISearchResultSet ) ) {
return [];
}

// Not all search results are EntityResult instances, e.g. entities matched using haswbstatement are ArrayCirrusSearchResult objects
// which don't contain the label and description, so we need to look them up.
$labelDescriptionLookup = $this->labelDescriptionLookupFactory->newLabelDescriptionLookup(
$this->languageFactory->getLanguage( $this->getValidatedParams()[self::LANGUAGE_QUERY_PARAM] ),
array_map(
fn( SearchResult $result ) => $this->entityIdParser->parse( $result->getTitle()->getText() ),
array_filter(
$resultSet->extractResults(),
// @phan-suppress-next-line PhanUndeclaredClassInstanceof - phan does not know about WikibaseCirrusSearch
fn( SearchResult $result ) => !( $result instanceof EntityResult )
)
),
[ TermTypes::TYPE_LABEL, TermTypes::TYPE_DESCRIPTION ]
);

return array_map(
function ( SearchResult $result ) use ( $labelDescriptionLookup ) {
// @phan-suppress-next-line PhanUndeclaredClassInstanceof - phan does not know about WikibaseCirrusSearch
if ( $result instanceof EntityResult ) {
return [
// @phan-suppress-next-line PhanUndeclaredClassMethod - phan does not know about WikibaseCirrusSearch
'id' => $result->getTitle()->getText(),
// @phan-suppress-next-line PhanUndeclaredClassMethod - phan does not know about WikibaseCirrusSearch
'label' => $result->getLabelData()['value'] ?? null,
// @phan-suppress-next-line PhanUndeclaredClassMethod - phan does not know about WikibaseCirrusSearch
'description' => $result->getDescriptionData()['value'] ?: null,
];
}

$id = $this->entityIdParser->parse( $result->getTitle()->getText() );
$label = $labelDescriptionLookup->getLabel( $id );
$description = $labelDescriptionLookup->getDescription( $id );

return [
'id' => "$id",
'label' => $label ? $label->getText() : null,
'description' => $description ? $description->getText() : null,
];
},
$resultSet->extractResults()
);
}

}
113 changes: 113 additions & 0 deletions repo/rest-api/src/RouteHandlers/SuggestEntitiesRouteHandler.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
<?php declare( strict_types=1 );

namespace Wikibase\Repo\RestApi\RouteHandlers;

use MediaWiki\Registration\ExtensionRegistry;
use MediaWiki\Rest\Handler;
use MediaWiki\Rest\HttpException;
use MediaWiki\Rest\Response;
use MediaWiki\Rest\SimpleHandler;
use MediaWiki\Rest\StringStream;
use Wikibase\DataModel\Entity\Item;
use Wikibase\DataModel\Entity\Property;
use Wikibase\Lib\Interactors\TermSearchResult;
use Wikibase\Repo\Api\EntitySearchHelper;
use Wikibase\Repo\WikibaseRepo;
use Wikimedia\ParamValidator\ParamValidator;

/**
* @license GPL-2.0-or-later
*/
class SuggestEntitiesRouteHandler extends SimpleHandler {

private const ENTITY_TYPE_PATH_PARAM = 'entity_type';
private const SEARCH_QUERY_PARAM = 'search';
// We only actively use "uselang" in this class to set the language to search in, but MediaWiki picks it up automatically to set the
// result language. If we used a different query param name, the results would not automatically be shown in the language that was
// searched in.
private const LANGUAGE_QUERY_PARAM = 'uselang';

private const ENTITY_TYPE_MAP = [
'items' => Item::ENTITY_TYPE,
'properties' => Property::ENTITY_TYPE,
];
private const RESULTS_LIMIT = 5;

private EntitySearchHelper $entitySearch;

public function __construct(
EntitySearchHelper $entitySearch
) {
$this->entitySearch = $entitySearch;
}

public static function factory(): Handler {
return new self(
WikibaseRepo::getEntitySearchHelper(),
);
}

public function getParamSettings(): array {
return [
self::ENTITY_TYPE_PATH_PARAM => [
self::PARAM_SOURCE => 'path',
ParamValidator::PARAM_TYPE => array_keys( self::ENTITY_TYPE_MAP ),
ParamValidator::PARAM_REQUIRED => true,
ParamValidator::PARAM_ISMULTI => false,
],
self::SEARCH_QUERY_PARAM => [
self::PARAM_SOURCE => 'query',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_REQUIRED => true,
ParamValidator::PARAM_ISMULTI => false,
],
self::LANGUAGE_QUERY_PARAM => [
self::PARAM_SOURCE => 'query',
ParamValidator::PARAM_TYPE => 'string',
ParamValidator::PARAM_REQUIRED => true,
ParamValidator::PARAM_ISMULTI => false,
],
];
}

public function run( string $entityType ): Response {
if ( !ExtensionRegistry::getInstance()->isLoaded( 'WikibaseCirrusSearch' ) ) {
throw new HttpException( 'This endpoint does not work because WikibaseCirrusSearch is not installed.' );
}

$searchTerm = $this->getValidatedParams()[self::SEARCH_QUERY_PARAM];
$language = $this->getValidatedParams()[self::LANGUAGE_QUERY_PARAM];
$results = $this->prefixSearch( $entityType, $searchTerm, $language );

$httpResponse = $this->getResponseFactory()->create();
$httpResponse->setHeader( 'Content-Type', 'application/json' );
$httpResponse->setBody( new StringStream(
json_encode( [ 'results' => $results ], JSON_UNESCAPED_SLASHES )
) );

return $httpResponse;
}

private function prefixSearch( string $entityType, string $searchTerm, string $language ): array {
return array_map(
fn( TermSearchResult $searchResult ) => [
'id' => $searchResult->getEntityId()->getSerialization(),
'label' => $searchResult->getDisplayLabel()->getText(),
'description' => $searchResult->getDisplayDescription() ? $searchResult->getDisplayDescription()->getText() : null,
'match' => [
'type' => $searchResult->getMatchedTermType(),
'language' => $searchResult->getMatchedTerm()->getLanguageCode(),
'text' => $searchResult->getMatchedTerm()->getText(),
],
],
array_values( $this->entitySearch->getRankedSearchResults(
$searchTerm,
$language,
self::ENTITY_TYPE_MAP[$entityType],
self::RESULTS_LIMIT,
true,
null
) )
);
}
}

0 comments on commit d07259e

Please sign in to comment.