diff --git a/src/Examine.Core/IndexOptions.cs b/src/Examine.Core/IndexOptions.cs index c773f2f3d..83bea1133 100644 --- a/src/Examine.Core/IndexOptions.cs +++ b/src/Examine.Core/IndexOptions.cs @@ -2,9 +2,14 @@ namespace Examine { public class IndexOptions { - public IndexOptions() => FieldDefinitions = new FieldDefinitionCollection(); + public IndexOptions() { + FieldDefinitions = new FieldDefinitionCollection(); + RelevanceScorerDefinitions = new RelevanceScorerDefinitionCollection(); + } public FieldDefinitionCollection FieldDefinitions { get; set; } public IValueSetValidator Validator { get; set; } + + public RelevanceScorerDefinitionCollection RelevanceScorerDefinitions { get; set; } } } diff --git a/src/Examine.Core/ReadOnlyRelevanceScorerDefinitionCollection.cs b/src/Examine.Core/ReadOnlyRelevanceScorerDefinitionCollection.cs new file mode 100644 index 000000000..f70bb152f --- /dev/null +++ b/src/Examine.Core/ReadOnlyRelevanceScorerDefinitionCollection.cs @@ -0,0 +1,61 @@ +using System; +using System.Collections.Concurrent; +using System.Collections; +using System.Collections.Generic; +using System.Linq; + +namespace Examine +{ + public class ReadOnlyRelevanceScorerDefinitionCollection : IEnumerable + { + public ReadOnlyRelevanceScorerDefinitionCollection() + : this(Enumerable.Empty()) + { + } + + public ReadOnlyRelevanceScorerDefinitionCollection(params RelevanceScorerDefinition[] definitions) + : this((IEnumerable)definitions) + { + + } + + public ReadOnlyRelevanceScorerDefinitionCollection(IEnumerable definitions) + { + if (definitions == null) + { + return; + } + + foreach (var s in definitions.GroupBy(x => x.Name)) + { + var suggester = s.FirstOrDefault(); + if (suggester != default) + { + Definitions.TryAdd(s.Key, suggester); + } + } + } + + /// + /// Tries to get a by name + /// + /// + /// + /// + /// returns true if one was found otherwise false + /// + /// + /// Marked as virtual so developers can inherit this class and override this method in case + /// relevance definitions are dynamic. + /// + public virtual bool TryGetValue(string relevanceScorerName, out RelevanceScorerDefinition relevanceScorerDefinition) => Definitions.TryGetValue(relevanceScorerName, out relevanceScorerDefinition); + + public int Count => Definitions.Count; + + protected ConcurrentDictionary Definitions { get; } = new ConcurrentDictionary(StringComparer.InvariantCultureIgnoreCase); + + public IEnumerator GetEnumerator() => Definitions.Values.GetEnumerator(); + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + } +} diff --git a/src/Examine.Core/RelevanceScorerDefinition.cs b/src/Examine.Core/RelevanceScorerDefinition.cs new file mode 100644 index 000000000..7c0679864 --- /dev/null +++ b/src/Examine.Core/RelevanceScorerDefinition.cs @@ -0,0 +1,27 @@ +using System.Collections.Generic; + +namespace Examine +{ + /// + /// Defines how to score a document to affect it's relevance + /// + public class RelevanceScorerDefinition + { + public RelevanceScorerDefinition(string name, + IEnumerable functionScorerDefintions) + { + Name = name; + FunctionScorerDefintions = functionScorerDefintions; + } + + /// + /// Name + /// + public string Name { get; } + + /// + /// Field Boosting Function Defintions + /// + public IEnumerable FunctionScorerDefintions { get; } + } +} diff --git a/src/Examine.Core/RelevanceScorerDefinitionCollection.cs b/src/Examine.Core/RelevanceScorerDefinitionCollection.cs new file mode 100644 index 000000000..f411617ae --- /dev/null +++ b/src/Examine.Core/RelevanceScorerDefinitionCollection.cs @@ -0,0 +1,25 @@ +using System; + +namespace Examine +{ + public class RelevanceScorerDefinitionCollection : ReadOnlyRelevanceScorerDefinitionCollection + { + public RelevanceScorerDefinitionCollection(params RelevanceScorerDefinition[] definitions) : base(definitions) + { + } + + public RelevanceScorerDefinitionCollection() + { + } + + public RelevanceScorerDefinition GetOrAdd(string fieldName, Func add) => Definitions.GetOrAdd(fieldName, add); + + /// + /// Replace any definition with the specified one, if one doesn't exist then it is added + /// + /// + public void AddOrUpdate(RelevanceScorerDefinition definition) => Definitions.AddOrUpdate(definition.Name, definition, (s, factory) => definition); + + public bool TryAdd(RelevanceScorerDefinition definition) => Definitions.TryAdd(definition.Name, definition); + } +} diff --git a/src/Examine.Core/RelevanceScorerFunctionBaseDefintion.cs b/src/Examine.Core/RelevanceScorerFunctionBaseDefintion.cs new file mode 100644 index 000000000..b97b6817f --- /dev/null +++ b/src/Examine.Core/RelevanceScorerFunctionBaseDefintion.cs @@ -0,0 +1,29 @@ +namespace Examine +{ + /// + /// Base for Relevance Scorer Functions + /// + public abstract class RelevanceScorerFunctionBaseDefintion + { + /// + /// Constructor + /// + /// Name of the field for the function + /// Boost for the function + public RelevanceScorerFunctionBaseDefintion(string fieldName, float boost) + { + FieldName = fieldName; + Boost = boost; + } + + /// + /// Name of the field for the function + /// + public string FieldName { get; } + + /// + /// Boost for the function + /// + public float Boost { get; } + } +} diff --git a/src/Examine.Core/Scoring/TimeRelevanceScorerFunctionDefintion.cs b/src/Examine.Core/Scoring/TimeRelevanceScorerFunctionDefintion.cs new file mode 100644 index 000000000..f2cef0dcc --- /dev/null +++ b/src/Examine.Core/Scoring/TimeRelevanceScorerFunctionDefintion.cs @@ -0,0 +1,26 @@ +using System; + +namespace Examine.Scoring +{ + /// + /// Boosts relevance based on time recency + /// + public class TimeRelevanceScorerFunctionDefintion : RelevanceScorerFunctionBaseDefintion + { + /// + /// Constructor + /// + /// Name of the field + /// Boost + /// Duration from current time to boost from + public TimeRelevanceScorerFunctionDefintion(string fieldName, float boost, TimeSpan boostTimeRange) : base(fieldName, boost) + { + BoostTimeRange = boostTimeRange; + } + + /// + /// Time range to boost from + /// + public TimeSpan BoostTimeRange { get; } + } +} diff --git a/src/Examine.Core/Search/IBooleanOperation.cs b/src/Examine.Core/Search/IBooleanOperation.cs index 8ac3dd664..799ffc96c 100644 --- a/src/Examine.Core/Search/IBooleanOperation.cs +++ b/src/Examine.Core/Search/IBooleanOperation.cs @@ -6,7 +6,7 @@ namespace Examine.Search /// /// Defines the supported operation for addition of additional clauses in the fluent API /// - public interface IBooleanOperation : IOrdering + public interface IBooleanOperation : IScoreQuery { /// /// Sets the next operation to be AND diff --git a/src/Examine.Core/Search/IScoreQuery.cs b/src/Examine.Core/Search/IScoreQuery.cs new file mode 100644 index 000000000..2828ef1ad --- /dev/null +++ b/src/Examine.Core/Search/IScoreQuery.cs @@ -0,0 +1,12 @@ +using System; + +namespace Examine.Search +{ + /// + /// Defines the supported operation for addition of additional clauses in the fluent API + /// + public interface IScoreQuery : IOrdering + { + IScoreQuery ScoreWith(params string[] scorers); + } +} diff --git a/src/Examine.Lucene/LuceneIndexOptions.cs b/src/Examine.Lucene/LuceneIndexOptions.cs index 733a87e71..2f42bc50a 100644 --- a/src/Examine.Lucene/LuceneIndexOptions.cs +++ b/src/Examine.Lucene/LuceneIndexOptions.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Text; +using Examine.Lucene.Scoring; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Standard; using Lucene.Net.Index; diff --git a/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs b/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs index 0c4a2c29c..92613190d 100644 --- a/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs +++ b/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs @@ -3,6 +3,8 @@ using Lucene.Net.Search; using Examine.Lucene.Search; using Examine.Search; +using System.Collections.Generic; +using Examine.Lucene.Scoring; namespace Examine.Lucene.Providers { @@ -60,7 +62,7 @@ public override ISearchResults Search(string searchText, QueryOptions options = ///// ///// This is NOT used! however I'm leaving this here as example code - ///// + ///// ///// This is used to recursively set any query type that supports parameters for rewriting ///// before the search executes. ///// @@ -70,10 +72,10 @@ public override ISearchResults Search(string searchText, QueryOptions options = ///// that would need to be set eagerly before any query parsing takes place but if we want to do it lazily here's how. ///// So we need to manually update any query within the outer boolean query with the correct rewrite method, then the underlying LuceneSearcher will call rewrite ///// to update everything. - ///// + ///// ///// see https://github.com/Shazwazza/Examine/pull/89 ///// see https://lists.gt.net/lucene/java-user/92194 - ///// + ///// ///// //private void SetScoringBooleanQueryRewriteMethod(Query query) //{ diff --git a/src/Examine.Lucene/Providers/LuceneIndex.cs b/src/Examine.Lucene/Providers/LuceneIndex.cs index 6ac9124eb..bc669622a 100644 --- a/src/Examine.Lucene/Providers/LuceneIndex.cs +++ b/src/Examine.Lucene/Providers/LuceneIndex.cs @@ -44,6 +44,8 @@ private LuceneIndex( //initialize the field types _fieldValueTypeCollection = new Lazy(() => CreateFieldValueTypes(_options.IndexValueTypesFactory)); + _relevanceScorerDefinitionCollection = new Lazy(() => _options.RelevanceScorerDefinitions); + _searcher = new Lazy(CreateSearcher); _cancellationTokenSource = new CancellationTokenSource(); _cancellationToken = _cancellationTokenSource.Token; @@ -61,13 +63,13 @@ public LuceneIndex( : this(loggerFactory, name, (IOptionsMonitor)indexOptions) { LuceneDirectoryIndexOptions directoryOptions = indexOptions.GetNamedOptions(name); - + if (directoryOptions.DirectoryFactory == null) { throw new InvalidOperationException($"No {typeof(IDirectoryFactory)} assigned"); } - _directory = new Lazy(() => directoryOptions.DirectoryFactory.CreateDirectory(this, directoryOptions.UnlockIndex)); + _directory = new Lazy(() => directoryOptions.DirectoryFactory.CreateDirectory(this, directoryOptions.UnlockIndex)); } //TODO: The problem with this is that the writer would already need to be configured with a PerFieldAnalyzerWrapper @@ -137,6 +139,8 @@ internal LuceneIndex( private readonly Lazy _fieldValueTypeCollection; + private readonly Lazy _relevanceScorerDefinitionCollection; + // tracks the latest Generation value of what has been indexed.This can be used to force update a searcher to this generation. private long? _latestGen; @@ -147,6 +151,8 @@ internal LuceneIndex( /// public FieldValueTypeCollection FieldValueTypeCollection => _fieldValueTypeCollection.Value; + public RelevanceScorerDefinitionCollection RelevanceScorerDefinitionCollection => _relevanceScorerDefinitionCollection.Value; + /// /// The default analyzer to use when indexing content, by default, this is set to StandardAnalyzer /// @@ -310,7 +316,7 @@ public void EnsureIndex(bool forceOverwrite) var indexExists = IndexExists(); if (!indexExists || forceOverwrite) { - //if we can't acquire the lock exit - this will happen if this method is called multiple times but we don't want this + //if we can't acquire the lock exit - this will happen if this method is called multiple times but we don't want this // logic to actually execute multiple times if (Monitor.TryEnter(_writerLocker)) { @@ -341,12 +347,12 @@ public void EnsureIndex(bool forceOverwrite) //This will happen if the writer hasn't been created/initialized yet which // might occur if a rebuild is triggered before any indexing has been triggered. //In this case we need to initialize a writer and continue as normal. - //Since we are already inside the writer lock and it is null, we are allowed to + //Since we are already inside the writer lock and it is null, we are allowed to // make this call with out using GetIndexWriter() to do the initialization. _writer = CreateIndexWriterInternal(); } - //We're forcing an overwrite, + //We're forcing an overwrite, // this means that we need to cancel all operations currently in place, // clear the queue and delete all of the data in the index. @@ -441,10 +447,10 @@ public override void CreateIndex() } /// - /// Deletes a node from the index. + /// Deletes a node from the index. /// /// - /// When a content node is deleted, we also need to delete it's children from the index so we need to perform a + /// When a content node is deleted, we also need to delete it's children from the index so we need to perform a /// custom Lucene search to find all decendents and create Delete item queues for them too. /// /// ID of the node to delete @@ -675,7 +681,7 @@ private bool DeleteFromIndex(Term indexTerm, bool performCommit = true) return false; } } - + /// /// Collects the data for the fields and adds the document which is then committed into Lucene.Net's index /// @@ -823,7 +829,7 @@ public void ScheduleCommit() // and less than the delay DateTime.Now - _timestamp < TimeSpan.FromMilliseconds(WaitMilliseconds)) { - //Delay + //Delay _timer.Change(WaitMilliseconds, 0); } else @@ -903,7 +909,7 @@ private TrackingIndexWriter CreateIndexWriterInternal() { Directory dir = GetLuceneDirectory(); - // Unfortunatley if the appdomain is taken down this will remain locked, so we can + // Unfortunatley if the appdomain is taken down this will remain locked, so we can // ensure that it's unlocked here in that case. try { @@ -958,8 +964,8 @@ protected virtual IndexWriter CreateIndexWriter(Directory d) { System.IO.Directory.CreateDirectory(LuceneIndexFolder.FullName); _logOutput = new FileStream(Path.Combine(LuceneIndexFolder.FullName, DateTime.UtcNow.ToString("yyyy-MM-dd") + ".log"), FileMode.Append); - - + + } catch (Exception ex) { @@ -1048,7 +1054,7 @@ private LuceneSearcher CreateSearcher() // wait for most recent changes when first creating the searcher WaitForChanges(); - return new LuceneSearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection); + return new LuceneSearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection, RelevanceScorerDefinitionCollection); } /// diff --git a/src/Examine.Lucene/Providers/LuceneSearcher.cs b/src/Examine.Lucene/Providers/LuceneSearcher.cs index c20194a15..b27ca7060 100644 --- a/src/Examine.Lucene/Providers/LuceneSearcher.cs +++ b/src/Examine.Lucene/Providers/LuceneSearcher.cs @@ -2,7 +2,8 @@ using Examine.Lucene.Search; using Lucene.Net.Search; using Lucene.Net.Analysis; - +using System.Collections.Generic; +using Examine.Lucene.Scoring; namespace Examine.Lucene.Providers { @@ -14,6 +15,7 @@ public class LuceneSearcher : BaseLuceneSearcher, IDisposable { private readonly SearcherManager _searcherManager; private readonly FieldValueTypeCollection _fieldValueTypeCollection; + private readonly RelevanceScorerDefinitionCollection _relevanceScorerDefinitionCollection; private bool _disposedValue; /// @@ -23,15 +25,16 @@ public class LuceneSearcher : BaseLuceneSearcher, IDisposable /// /// /// - public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection) + public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection, RelevanceScorerDefinitionCollection relevanceScorerDefinitionCollection) : base(name, analyzer) { _searcherManager = searcherManager; _fieldValueTypeCollection = fieldValueTypeCollection; + _relevanceScorerDefinitionCollection = relevanceScorerDefinitionCollection; } public override ISearchContext GetSearchContext() - => new SearchContext(_searcherManager, _fieldValueTypeCollection); + => new SearchContext(_searcherManager, _fieldValueTypeCollection, _relevanceScorerDefinitionCollection); protected virtual void Dispose(bool disposing) { diff --git a/src/Examine.Lucene/Scoring/ILuceneRelevanceScorerFunctionDefintion.cs b/src/Examine.Lucene/Scoring/ILuceneRelevanceScorerFunctionDefintion.cs new file mode 100644 index 000000000..d1b1d29cb --- /dev/null +++ b/src/Examine.Lucene/Scoring/ILuceneRelevanceScorerFunctionDefintion.cs @@ -0,0 +1,9 @@ +using Lucene.Net.Search; + +namespace Examine.Lucene.Scoring +{ + public interface ILuceneRelevanceScorerFunctionDefintion + { + Query GetScoreQuery(Query inner); + } +} diff --git a/src/Examine.Lucene/Scoring/LuceneTimeRelevanceScorerFunctionDefintion.cs b/src/Examine.Lucene/Scoring/LuceneTimeRelevanceScorerFunctionDefintion.cs new file mode 100644 index 000000000..7ef31f894 --- /dev/null +++ b/src/Examine.Lucene/Scoring/LuceneTimeRelevanceScorerFunctionDefintion.cs @@ -0,0 +1,86 @@ +using System; +using Examine.Lucene.Scoring; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Queries; +using Lucene.Net.Search; + +namespace Examine.Scoring { + public class LuceneTimeRelevanceScorerFunctionDefintion : TimeRelevanceScorerFunctionDefintion, ILuceneRelevanceScorerFunctionDefintion + { + public LuceneTimeRelevanceScorerFunctionDefintion(string fieldName, float boost, TimeSpan boostTimeRange) : base(fieldName, boost, boostTimeRange) + { + } + + public Query GetScoreQuery(Query inner) => new FreshnessScoreQuery(inner, FieldName, BoostTimeRange, Boost); + } + + public class FreshnessScoreQuery : CustomScoreQuery + { + private readonly string _fieldName; + private readonly TimeSpan _duration; + private readonly float _boost; + + public FreshnessScoreQuery(Query subQuery, string fieldName, TimeSpan duration, float boost) : base(subQuery) + { + _fieldName = fieldName; + _duration = duration; + _boost = boost; + } + + protected override CustomScoreProvider GetCustomScoreProvider(AtomicReaderContext context) => new FreshnessScoreProvider(context, _fieldName, _duration, _boost); + + private class FreshnessScoreProvider : CustomScoreProvider + { + private readonly string _fieldName; + private readonly TimeSpan _duration; + private readonly float _boost; + + public FreshnessScoreProvider(AtomicReaderContext context, string fieldName, TimeSpan duration, float boost) : base(context) + { + _fieldName = fieldName; + _duration = duration; + _boost = boost; + } + + public override float CustomScore(int doc, float subQueryScore, float valSrcScore) + { + var date = GetDocumentDate(doc); + + var score = subQueryScore; + + if (date != null) + { + var end = DateTime.Now; + var start = end.Subtract(_duration); + + if ((date > start && date < end) || (date < start && date > end)) + { + score *= _boost; + } + } + + return score; + } + + private DateTime? GetDocumentDate(int doc) + { + var document = m_context.Reader.Document(doc); + + var field = document.GetField(_fieldName); + + if (field != null && field.NumericType == NumericFieldType.INT64) + { + var timestamp = field.GetInt64Value() ?? 0; + + var date = new DateTime(timestamp); + + return date; + } + + return null; + } + + } + } +} diff --git a/src/Examine.Lucene/Search/ISearchContext.cs b/src/Examine.Lucene/Search/ISearchContext.cs index 205a843dc..8308f1f28 100644 --- a/src/Examine.Lucene/Search/ISearchContext.cs +++ b/src/Examine.Lucene/Search/ISearchContext.cs @@ -9,5 +9,7 @@ public interface ISearchContext string[] SearchableFields { get; } IIndexFieldValueType GetFieldValueType(string fieldName); + + RelevanceScorerDefinition GetRelevanceScorer(string scorerName); } } diff --git a/src/Examine.Lucene/Search/LuceneBooleanOperation.cs b/src/Examine.Lucene/Search/LuceneBooleanOperation.cs index b38dd8422..18f39e971 100644 --- a/src/Examine.Lucene/Search/LuceneBooleanOperation.cs +++ b/src/Examine.Lucene/Search/LuceneBooleanOperation.cs @@ -13,7 +13,7 @@ namespace Examine.Lucene.Search public class LuceneBooleanOperation : LuceneBooleanOperationBase, IQueryExecutor { private readonly LuceneSearchQuery _search; - + public LuceneBooleanOperation(LuceneSearchQuery search) : base(search) { @@ -65,5 +65,6 @@ public LuceneBooleanOperation(LuceneSearchQuery search) #endregion public override string ToString() => _search.ToString(); + public override IScoreQuery ScoreWith(params string[] scorers) => _search.ScoreWith(scorers); } } diff --git a/src/Examine.Lucene/Search/LuceneBooleanOperationBase.cs b/src/Examine.Lucene/Search/LuceneBooleanOperationBase.cs index 2769479ad..918c79a75 100644 --- a/src/Examine.Lucene/Search/LuceneBooleanOperationBase.cs +++ b/src/Examine.Lucene/Search/LuceneBooleanOperationBase.cs @@ -18,13 +18,13 @@ protected LuceneBooleanOperationBase(LuceneSearchQueryBase search) public abstract IQuery Or(); public abstract IQuery Not(); - public IBooleanOperation And(Func inner, BooleanOperation defaultOp = BooleanOperation.And) + public IBooleanOperation And(Func inner, BooleanOperation defaultOp = BooleanOperation.And) => Op(inner, BooleanOperation.And, defaultOp); - public IBooleanOperation Or(Func inner, BooleanOperation defaultOp = BooleanOperation.And) + public IBooleanOperation Or(Func inner, BooleanOperation defaultOp = BooleanOperation.And) => Op(inner, BooleanOperation.Or, defaultOp); - public IBooleanOperation AndNot(Func inner, BooleanOperation defaultOp = BooleanOperation.And) + public IBooleanOperation AndNot(Func inner, BooleanOperation defaultOp = BooleanOperation.And) => Op(inner, BooleanOperation.Not, defaultOp); protected abstract INestedQuery AndNested(); @@ -35,7 +35,7 @@ public IBooleanOperation AndNot(Func inne INestedQuery INestedBooleanOperation.Or() => OrNested(); INestedQuery INestedBooleanOperation.Not() => NotNested(); - INestedBooleanOperation INestedBooleanOperation.And(Func inner, BooleanOperation defaultOp) + INestedBooleanOperation INestedBooleanOperation.And(Func inner, BooleanOperation defaultOp) => Op(inner, BooleanOperation.And, defaultOp); INestedBooleanOperation INestedBooleanOperation.Or(Func inner, BooleanOperation defaultOp) @@ -77,5 +77,6 @@ protected internal LuceneBooleanOperationBase Op( public abstract IOrdering SelectFields(ISet fieldNames); public abstract IOrdering SelectField(string fieldName); public abstract IOrdering SelectAllFields(); + public abstract IScoreQuery ScoreWith(params string[] scorers); } } diff --git a/src/Examine.Lucene/Search/LuceneSearchExecutor.cs b/src/Examine.Lucene/Search/LuceneSearchExecutor.cs index 374afa856..fb296ea43 100644 --- a/src/Examine.Lucene/Search/LuceneSearchExecutor.cs +++ b/src/Examine.Lucene/Search/LuceneSearchExecutor.cs @@ -4,6 +4,7 @@ using Examine.Search; using Lucene.Net.Documents; using Lucene.Net.Index; +using Lucene.Net.Queries; using Lucene.Net.Search; namespace Examine.Lucene.Search @@ -195,7 +196,7 @@ private static SearchAfterOptions GetSearchAfterOptions(TopDocs topDocs) private LuceneSearchResult GetSearchResult(int index, TopDocs topDocs, IndexSearcher luceneSearcher) { // I have seen IndexOutOfRangeException here which is strange as this is only called in one place - // and from that one place "i" is always less than the size of this collection. + // and from that one place "i" is always less than the size of this collection. // but we'll error check here anyways if (topDocs?.ScoreDocs.Length < index) { @@ -289,6 +290,11 @@ private bool CheckQueryForExtractTerms(Query query) return CheckQueryForExtractTerms(lbq.Wrapped); } + if(query is CustomScoreQuery csq) + { + return CheckQueryForExtractTerms(csq.SubQuery); + } + Type queryType = query.GetType(); if (typeof(TermRangeQuery).IsAssignableFrom(queryType) @@ -296,7 +302,7 @@ private bool CheckQueryForExtractTerms(Query query) || typeof(FuzzyQuery).IsAssignableFrom(queryType) || (queryType.IsGenericType && queryType.GetGenericTypeDefinition().IsAssignableFrom(typeof(NumericRangeQuery<>)))) { - return false; //ExtractTerms() not supported by TermRangeQuery, WildcardQuery,FuzzyQuery and will throw NotSupportedException + return false; //ExtractTerms() not supported by TermRangeQuery, WildcardQuery,FuzzyQuery and will throw NotSupportedException } return true; diff --git a/src/Examine.Lucene/Search/LuceneSearchExtensions.cs b/src/Examine.Lucene/Search/LuceneSearchExtensions.cs index 0661975a5..8f82294dd 100644 --- a/src/Examine.Lucene/Search/LuceneSearchExtensions.cs +++ b/src/Examine.Lucene/Search/LuceneSearchExtensions.cs @@ -1,4 +1,7 @@ using System; +using System.Runtime.CompilerServices; +using Examine.Lucene.Providers; +using Examine.Lucene.Scoring; using Examine.Search; using Lucene.Net.Search; @@ -14,7 +17,7 @@ public static class LuceneSearchExtensions /// Converts an Examine boolean operation to a Lucene representation /// /// The operation. - /// The translated Boolean operation + /// The translated Boolean operation public static Occur ToLuceneOccurrence(this BooleanOperation o) { switch (o) diff --git a/src/Examine.Lucene/Search/LuceneSearchQuery.cs b/src/Examine.Lucene/Search/LuceneSearchQuery.cs index 57f3cf484..417964ad4 100644 --- a/src/Examine.Lucene/Search/LuceneSearchQuery.cs +++ b/src/Examine.Lucene/Search/LuceneSearchQuery.cs @@ -3,6 +3,7 @@ using System.Diagnostics; using System.Linq; using Examine.Lucene.Indexing; +using Examine.Lucene.Scoring; using Examine.Search; using Lucene.Net.Analysis; using Lucene.Net.Search; @@ -22,7 +23,7 @@ public LuceneSearchQuery( ISearchContext searchContext, string category, Analyzer analyzer, LuceneSearchOptions searchOptions, BooleanOperation occurance) : base(CreateQueryParser(searchContext, analyzer, searchOptions), category, searchOptions, occurance) - { + { _searchContext = searchContext; } @@ -81,6 +82,8 @@ private static CustomMultiFieldQueryParser CreateQueryParser(ISearchContext sear public virtual IBooleanOperation OrderByDescending(params SortableField[] fields) => OrderByInternal(true, fields); + public virtual IScoreQuery ScoreWith(params string[] scorers) => ScoreWithInternal(scorers); + public override IBooleanOperation Field(string fieldName, T fieldValue) => RangeQueryInternal(new[] { fieldName }, fieldValue, fieldValue, true, true, Occurrence); @@ -108,7 +111,7 @@ internal LuceneBooleanOperationBase ManagedQueryInternal(string query, string[] var types = fields.Select(f => _searchContext.GetFieldValueType(f)).Where(t => t != null); - //Strangely we need an inner and outer query. If we don't do this then the lucene syntax returned is incorrect + //Strangely we need an inner and outer query. If we don't do this then the lucene syntax returned is incorrect //since it doesn't wrap in parenthesis properly. I'm unsure if this is a lucene issue (assume so) since that is what //is producing the resulting lucene string syntax. It might not be needed internally within Lucene since it's an object //so it might be the ToString() that is the issue. @@ -139,7 +142,7 @@ internal LuceneBooleanOperationBase RangeQueryInternal(string[] fields, T? mi { Query.Add(new LateBoundQuery(() => { - //Strangely we need an inner and outer query. If we don't do this then the lucene syntax returned is incorrect + //Strangely we need an inner and outer query. If we don't do this then the lucene syntax returned is incorrect //since it doesn't wrap in parenthesis properly. I'm unsure if this is a lucene issue (assume so) since that is what //is producing the resulting lucene string syntax. It might not be needed internally within Lucene since it's an object //so it might be the ToString() that is the issue. @@ -227,12 +230,25 @@ private ISearchResults Search(QueryOptions options) } } - var executor = new LuceneSearchExecutor(options, query, SortFields, _searchContext, _fieldsToLoad); + Query scoredQuery = query; + + foreach(var scorerDefinition in RelevanceScores) + { + foreach(var scoreFunction in scorerDefinition.FunctionScorerDefintions) + { + if(scoreFunction is ILuceneRelevanceScorerFunctionDefintion luceneScoreFunction) + { + scoredQuery = luceneScoreFunction.GetScoreQuery(scoredQuery); + } + } + } + + var executor = new LuceneSearchExecutor(options, scoredQuery, SortFields, _searchContext, _fieldsToLoad); var pagesResults = executor.Execute(); return pagesResults; - } + } /// /// Internal operation for adding the ordered results @@ -272,7 +288,7 @@ private LuceneBooleanOperationBase OrderByInternal(bool descending, params Sorta break; case SortType.Double: defaultSort = SortFieldType.DOUBLE; - break; + break; default: throw new ArgumentOutOfRangeException(); } @@ -289,6 +305,18 @@ private LuceneBooleanOperationBase OrderByInternal(bool descending, params Sorta return CreateOp(); } + internal LuceneBooleanOperationBase ScoreWithInternal(params string[] scorers) + { + foreach(var scorer in scorers) + { + var scorerDefinition = _searchContext.GetRelevanceScorer(scorer); + + RelevanceScores.Add(scorerDefinition); + } + + return CreateOp(); + } + internal IBooleanOperation SelectFieldsInternal(ISet loadedFieldNames) { _fieldsToLoad = loadedFieldNames; diff --git a/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs b/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs index 043b6df7c..88f526a18 100644 --- a/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs +++ b/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs @@ -18,6 +18,8 @@ public abstract class LuceneSearchQueryBase : IQuery, INestedQuery public IList SortFields { get; } = new List(); + public IList RelevanceScores { get; } = new List(); + protected Occur Occurrence { get; set; } private BooleanOperation _boolOp; @@ -71,7 +73,7 @@ public IBooleanOperation NativeQuery(string query) } /// - /// Adds a true Lucene Query + /// Adds a true Lucene Query /// /// /// @@ -229,12 +231,12 @@ protected internal LuceneBooleanOperationBase GroupedNotInternal(string[] fields //(!field1:query !field2:query !field3:query) //but Lucene will bork if you provide an array of length 1 (which is != to the field length) - // NOTE: This is important because we cannot prefix a + to a group of NOT's, that doesn't work. - // for example, it cannot be: +(-id:1 -id:2 -id:3) + // NOTE: This is important because we cannot prefix a + to a group of NOT's, that doesn't work. + // for example, it cannot be: +(-id:1 -id:2 -id:3) // and it cannot be: (-id:1 -id:2 -id:3) - this will be an optional list of must not's so really nothing is filtered // It needs to be: -id:1 -id:2 -id:3 - // So we get all clauses + // So we get all clauses var subQueries = GetMultiFieldQuery(fields, fieldVals, Occur.MUST_NOT, true); // then add each individual one directly to the query @@ -328,7 +330,7 @@ protected virtual Query GetFieldInternalQuery(string fieldName, IExamineValue fi { queryToAdd = _queryParser.GetFieldQueryInternal(fieldName, fieldValue.Value); if (queryToAdd != null) - { + { queryToAdd.Boost = fieldValue.Level; } } @@ -340,7 +342,7 @@ protected virtual Query GetFieldInternalQuery(string fieldName, IExamineValue fi } break; case Examineness.Proximity: - int proximity = Convert.ToInt32(fieldValue.Level); + int proximity = Convert.ToInt32(fieldValue.Level); if (useQueryParser) { queryToAdd = _queryParser.GetProximityQueryInternal(fieldName, fieldValue.Value, proximity); @@ -371,7 +373,7 @@ protected virtual Query GetFieldInternalQuery(string fieldName, IExamineValue fi } else { - //standard query + //standard query var proxQuery = fieldName + ":" + fieldValue.Value; queryToAdd = ParseRawQuery(proxQuery); } @@ -402,7 +404,7 @@ private Query ParseRawQuery(string rawQuery) /// /// /// The result of this seems to be better than the above since it does not include results that contain part of the phrase. - /// For example, 'codegarden 090' would be matched against the search term 'codegarden 09' with the above, whereas when using the + /// For example, 'codegarden 090' would be matched against the search term 'codegarden 09' with the above, whereas when using the /// PhraseQuery this is not the case /// private static Query CreatePhraseQuery(string field, string txt) @@ -426,32 +428,32 @@ private static Query CreatePhraseQuery(string field, string txt) /// /// /// docs about this are here: https://github.com/Shazwazza/Examine/wiki/Grouped-Operations - /// + /// /// if matchAllCombinations == false then... /// this will create a query that matches the field index to the value index if the value length is >= to the field length /// otherwise we will have to match all combinations. - /// + /// /// For example if we have these fields: /// bodyText, pageTitle /// and these values: /// "hello", "world" - /// + /// /// then the query output will be: - /// + /// /// bodyText: "hello" pageTitle: "world" - /// + /// /// if matchAllCombinations == true then... - /// This will create a query for all combinations of fields and values. + /// This will create a query for all combinations of fields and values. /// For example if we have these fields: /// bodyText, pageTitle /// and these values: /// "hello", "world" - /// + /// /// then the query output will be: - /// + /// /// bodyText: "hello" bodyText: "world" pageTitle: "hello" pageTitle: "world" - /// - /// + /// + /// private BooleanQuery GetMultiFieldQuery( IReadOnlyList fields, IReadOnlyList fieldVals, @@ -480,7 +482,7 @@ private BooleanQuery GetMultiFieldQuery( return qry; } - //This will align the key value pairs: + //This will align the key value pairs: for (int i = 0; i < fields.Count; i++) { var queryVal = fieldVals[i]; diff --git a/src/Examine.Lucene/Search/MultiSearchContext.cs b/src/Examine.Lucene/Search/MultiSearchContext.cs index 877661c35..06dbc7f64 100644 --- a/src/Examine.Lucene/Search/MultiSearchContext.cs +++ b/src/Examine.Lucene/Search/MultiSearchContext.cs @@ -8,9 +8,9 @@ namespace Examine.Lucene.Search public class MultiSearchContext : ISearchContext { private readonly ISearchContext[] _inner; - + private string[] _fields; - + public MultiSearchContext(ISearchContext[] inner) => _inner = inner; public ISearcherReference GetSearcher() @@ -21,5 +21,6 @@ public ISearcherReference GetSearcher() public IIndexFieldValueType GetFieldValueType(string fieldName) => _inner.Select(cc => cc.GetFieldValueType(fieldName)).FirstOrDefault(type => type != null); + public RelevanceScorerDefinition GetRelevanceScorer(string scorerName) => throw new System.NotImplementedException(); } } diff --git a/src/Examine.Lucene/Search/SearchContext.cs b/src/Examine.Lucene/Search/SearchContext.cs index 406a6941f..0c53a9c3d 100644 --- a/src/Examine.Lucene/Search/SearchContext.cs +++ b/src/Examine.Lucene/Search/SearchContext.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Linq; using Examine.Lucene.Indexing; +using Examine.Lucene.Scoring; using Lucene.Net.Index; using Lucene.Net.Search; @@ -12,12 +13,14 @@ public class SearchContext : ISearchContext { private readonly SearcherManager _searcherManager; private readonly FieldValueTypeCollection _fieldValueTypeCollection; + private readonly RelevanceScorerDefinitionCollection _relevanceScorerDefinitionCollection; private string[] _searchableFields; - public SearchContext(SearcherManager searcherManager, FieldValueTypeCollection fieldValueTypeCollection) + public SearchContext(SearcherManager searcherManager, FieldValueTypeCollection fieldValueTypeCollection, RelevanceScorerDefinitionCollection relevanceScorerDefinitionCollection) { - _searcherManager = searcherManager; + _searcherManager = searcherManager; _fieldValueTypeCollection = fieldValueTypeCollection ?? throw new ArgumentNullException(nameof(fieldValueTypeCollection)); + _relevanceScorerDefinitionCollection = relevanceScorerDefinitionCollection ?? throw new ArgumentNullException(nameof(relevanceScorerDefinitionCollection)); } public ISearcherReference GetSearcher() => new SearcherReference(_searcherManager); @@ -33,7 +36,7 @@ public string[] SearchableFields // performing a 'search'. We must ensure that the underlying reader has the correct reference counts. IndexSearcher searcher = _searcherManager.Acquire(); try - { + { var fields = MultiFields.GetMergedFieldInfos(searcher.IndexReader) .Select(x => x.Name) .ToList(); @@ -57,8 +60,18 @@ public IIndexFieldValueType GetFieldValueType(string fieldName) { //Get the value type for the field, or use the default if not defined return _fieldValueTypeCollection.GetValueType( - fieldName, + fieldName, _fieldValueTypeCollection.ValueTypeFactories.GetRequiredFactory(FieldDefinitionTypes.FullText)); } + + public RelevanceScorerDefinition GetRelevanceScorer(string scorerName) + { + if (!_relevanceScorerDefinitionCollection.TryGetValue(scorerName, out var scorer)) + { + throw new InvalidOperationException($"No {nameof(RelevanceScorerDefinition)} was found for scorer name {scorerName}"); + } + + return scorer; + } } } diff --git a/src/Examine.Test/Examine.Lucene/Search/RelevanceScorerTests.cs b/src/Examine.Test/Examine.Lucene/Search/RelevanceScorerTests.cs new file mode 100644 index 000000000..c9b302b0f --- /dev/null +++ b/src/Examine.Test/Examine.Lucene/Search/RelevanceScorerTests.cs @@ -0,0 +1,231 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Examine.Lucene.Search; +using Examine.Scoring; +using Lucene.Net.Analysis.Standard; +using NUnit.Framework; + +namespace Examine.Test.Examine.Lucene.Search +{ + [TestFixture] + public class ScoringProfileTests : ExamineBaseTest + { + [Test] + public void Score_No_Profiles() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")))) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = new DateTime(2000, 01, 02), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_Out_Of_Range() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(1, 0, 0, 0)) })))) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddDays(-2), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_In_Range() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(1, 0, 0, 0)) })))) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddHours(-5), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.287674516f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_Future_Date() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(1, 0, 0, 0)) })))) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddHours(5), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_Future_Date_With_Future_Duration() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(-1, 0, 0, 0)) })))) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddHours(5), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.287674516f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_Future_Duration() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(-1, 0, 0, 0)) })))) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddDays(-2), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); + } + } + } +} diff --git a/src/Examine.Test/ExamineBaseTest.cs b/src/Examine.Test/ExamineBaseTest.cs index 2d7b72618..fd7c70dbb 100644 --- a/src/Examine.Test/ExamineBaseTest.cs +++ b/src/Examine.Test/ExamineBaseTest.cs @@ -8,6 +8,7 @@ using Moq; using Examine.Lucene.Directories; using System.Collections.Generic; +using Examine.Lucene.Scoring; namespace Examine.Test { @@ -25,7 +26,7 @@ public virtual void Setup() [TearDown] public virtual void TearDown() => _loggerFactory.Dispose(); - public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCollection fieldDefinitions = null, IndexDeletionPolicy indexDeletionPolicy = null, IReadOnlyDictionary indexValueTypesFactory = null) + public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCollection fieldDefinitions = null, IndexDeletionPolicy indexDeletionPolicy = null, IReadOnlyDictionary indexValueTypesFactory = null, RelevanceScorerDefinitionCollection relevanceScorerDefinitions = null) => new TestIndex( _loggerFactory, Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions @@ -34,7 +35,8 @@ public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCol DirectoryFactory = new GenericDirectoryFactory(_ => d), Analyzer = analyzer, IndexDeletionPolicy = indexDeletionPolicy, - IndexValueTypesFactory = indexValueTypesFactory + IndexValueTypesFactory = indexValueTypesFactory, + RelevanceScorerDefinitions = relevanceScorerDefinitions ?? new RelevanceScorerDefinitionCollection() })); public TestIndex GetTestIndex(IndexWriter writer)