From 913c9ba207b0c512a4568b537ec40c6cd4435a88 Mon Sep 17 00:00:00 2001 From: Jon Whitter Date: Thu, 29 Jun 2023 00:04:34 +0100 Subject: [PATCH 1/2] added first pass of adding custom score queries --- src/Examine.Lucene/LuceneIndexOptions.cs | 3 + .../Providers/BaseLuceneSearcher.cs | 10 +- src/Examine.Lucene/Providers/LuceneIndex.cs | 2 +- .../Providers/LuceneSearcher.cs | 7 +- src/Examine.Lucene/Scoring/IScoringProfile.cs | 14 + .../Search/LuceneSearchExtensions.cs | 15 + .../Search/LuceneSearchQuery.cs | 21 +- .../Search/Scoing/FreshnessScoringProfile.cs | 94 +++++++ .../Search/ScoringProfileTests.cs | 257 ++++++++++++++++++ src/Examine.Test/ExamineBaseTest.cs | 6 +- 10 files changed, 418 insertions(+), 11 deletions(-) create mode 100644 src/Examine.Lucene/Scoring/IScoringProfile.cs create mode 100644 src/Examine.Test/Examine.Lucene/Search/Scoing/FreshnessScoringProfile.cs create mode 100644 src/Examine.Test/Examine.Lucene/Search/ScoringProfileTests.cs diff --git a/src/Examine.Lucene/LuceneIndexOptions.cs b/src/Examine.Lucene/LuceneIndexOptions.cs index 733a87e71..822cb6feb 100644 --- a/src/Examine.Lucene/LuceneIndexOptions.cs +++ b/src/Examine.Lucene/LuceneIndexOptions.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Text; +using Examine.Lucene.Scoring; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Standard; using Lucene.Net.Index; @@ -20,5 +21,7 @@ public class LuceneIndexOptions : IndexOptions /// This is generally used to initialize any custom value types for your indexer since the value type collection cannot be modified at runtime. /// public IReadOnlyDictionary IndexValueTypesFactory { get; set; } + + public IList ScoreProfiles { get; set; } } } diff --git a/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs b/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs index 0c4a2c29c..11228139c 100644 --- a/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs +++ b/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs @@ -3,6 +3,8 @@ using Lucene.Net.Search; using Examine.Lucene.Search; using Examine.Search; +using System.Collections.Generic; +using Examine.Lucene.Scoring; namespace Examine.Lucene.Providers { @@ -11,17 +13,21 @@ namespace Examine.Lucene.Providers /// public abstract class BaseLuceneSearcher : BaseSearchProvider { + public IList ScoringProfiles { get; } + /// /// Constructor to allow for creating an indexer at runtime /// /// /// - protected BaseLuceneSearcher(string name, Analyzer analyzer) + protected BaseLuceneSearcher(string name, Analyzer analyzer, IList scoringProfiles = null) : base(name) { if (string.IsNullOrWhiteSpace(name)) throw new ArgumentException("Value cannot be null or whitespace.", nameof(name)); LuceneAnalyzer = analyzer; + + ScoringProfiles = scoringProfiles ?? new List(); } /// @@ -48,7 +54,7 @@ public IQuery CreateQuery(string category, BooleanOperation defaultOperation, An if (luceneAnalyzer == null) throw new ArgumentNullException(nameof(luceneAnalyzer)); - return new LuceneSearchQuery(GetSearchContext(), category, luceneAnalyzer, searchOptions, defaultOperation); + return new LuceneSearchQuery(GetSearchContext(), category, luceneAnalyzer, searchOptions, defaultOperation, ScoringProfiles); } /// diff --git a/src/Examine.Lucene/Providers/LuceneIndex.cs b/src/Examine.Lucene/Providers/LuceneIndex.cs index 6ac9124eb..1827ca839 100644 --- a/src/Examine.Lucene/Providers/LuceneIndex.cs +++ b/src/Examine.Lucene/Providers/LuceneIndex.cs @@ -1048,7 +1048,7 @@ private LuceneSearcher CreateSearcher() // wait for most recent changes when first creating the searcher WaitForChanges(); - return new LuceneSearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection); + return new LuceneSearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection, _options.ScoreProfiles); } /// diff --git a/src/Examine.Lucene/Providers/LuceneSearcher.cs b/src/Examine.Lucene/Providers/LuceneSearcher.cs index c20194a15..05aa18622 100644 --- a/src/Examine.Lucene/Providers/LuceneSearcher.cs +++ b/src/Examine.Lucene/Providers/LuceneSearcher.cs @@ -2,7 +2,8 @@ using Examine.Lucene.Search; using Lucene.Net.Search; using Lucene.Net.Analysis; - +using System.Collections.Generic; +using Examine.Lucene.Scoring; namespace Examine.Lucene.Providers { @@ -23,8 +24,8 @@ public class LuceneSearcher : BaseLuceneSearcher, IDisposable /// /// /// - public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection) - : base(name, analyzer) + public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection, IList scoringProfiles = null) + : base(name, analyzer, scoringProfiles) { _searcherManager = searcherManager; _fieldValueTypeCollection = fieldValueTypeCollection; diff --git a/src/Examine.Lucene/Scoring/IScoringProfile.cs b/src/Examine.Lucene/Scoring/IScoringProfile.cs new file mode 100644 index 000000000..7c12334e9 --- /dev/null +++ b/src/Examine.Lucene/Scoring/IScoringProfile.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Lucene.Net.Search; + +namespace Examine.Lucene.Scoring +{ + public interface IScoringProfile + { + Query GetScoreQuery(Query inner); + } +} diff --git a/src/Examine.Lucene/Search/LuceneSearchExtensions.cs b/src/Examine.Lucene/Search/LuceneSearchExtensions.cs index 0661975a5..b1bf204f8 100644 --- a/src/Examine.Lucene/Search/LuceneSearchExtensions.cs +++ b/src/Examine.Lucene/Search/LuceneSearchExtensions.cs @@ -1,4 +1,7 @@ using System; +using System.Runtime.CompilerServices; +using Examine.Lucene.Providers; +using Examine.Lucene.Scoring; using Examine.Search; using Lucene.Net.Search; @@ -66,5 +69,17 @@ public static ILuceneSearchResults ExecuteWithLucene(this IQueryExecutor queryEx } throw new NotSupportedException("QueryExecutor is not Lucene.NET"); } + + public static void AddScoringProfile(this ISearcher searcher, IScoringProfile scoringProfile) + { + if(searcher is LuceneSearcher luceneSearcher) + { + luceneSearcher.ScoringProfiles.Add(scoringProfile); + + return; + } + + throw new NotSupportedException("Searcher is not Lucene.NET"); + } } } diff --git a/src/Examine.Lucene/Search/LuceneSearchQuery.cs b/src/Examine.Lucene/Search/LuceneSearchQuery.cs index 57f3cf484..db88ce3d4 100644 --- a/src/Examine.Lucene/Search/LuceneSearchQuery.cs +++ b/src/Examine.Lucene/Search/LuceneSearchQuery.cs @@ -3,6 +3,7 @@ using System.Diagnostics; using System.Linq; using Examine.Lucene.Indexing; +using Examine.Lucene.Scoring; using Examine.Search; using Lucene.Net.Analysis; using Lucene.Net.Search; @@ -16,14 +17,16 @@ namespace Examine.Lucene.Search public class LuceneSearchQuery : LuceneSearchQueryBase, IQueryExecutor { private readonly ISearchContext _searchContext; + private readonly IList _scoringProfiles; private ISet _fieldsToLoad = null; public LuceneSearchQuery( ISearchContext searchContext, - string category, Analyzer analyzer, LuceneSearchOptions searchOptions, BooleanOperation occurance) + string category, Analyzer analyzer, LuceneSearchOptions searchOptions, BooleanOperation occurance, IList scoringProfiles) : base(CreateQueryParser(searchContext, analyzer, searchOptions), category, searchOptions, occurance) { _searchContext = searchContext; + _scoringProfiles = scoringProfiles; } private static CustomMultiFieldQueryParser CreateQueryParser(ISearchContext searchContext, Analyzer analyzer, LuceneSearchOptions searchOptions) @@ -227,12 +230,24 @@ private ISearchResults Search(QueryOptions options) } } - var executor = new LuceneSearchExecutor(options, query, SortFields, _searchContext, _fieldsToLoad); + var scoredQuery = ApplyScoringProfiles(query); + + var executor = new LuceneSearchExecutor(options, scoredQuery, SortFields, _searchContext, _fieldsToLoad); var pagesResults = executor.Execute(); return pagesResults; - } + } + + protected Query ApplyScoringProfiles(Query query) + { + foreach (var profile in _scoringProfiles) + { + query = profile.GetScoreQuery(query); + } + + return query; + } /// /// Internal operation for adding the ordered results diff --git a/src/Examine.Test/Examine.Lucene/Search/Scoing/FreshnessScoringProfile.cs b/src/Examine.Test/Examine.Lucene/Search/Scoing/FreshnessScoringProfile.cs new file mode 100644 index 000000000..56305672e --- /dev/null +++ b/src/Examine.Test/Examine.Lucene/Search/Scoing/FreshnessScoringProfile.cs @@ -0,0 +1,94 @@ +using System; +using Examine.Lucene.Scoring; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Queries; +using Lucene.Net.Search; + +namespace Examine.Test.Examine.Lucene.Search.Scoring +{ + public class FreshnessScoringProfile : IScoringProfile + { + private readonly string _fieldName; + private readonly TimeSpan _duration; + private readonly float _boost; + + public FreshnessScoringProfile(string fieldName, TimeSpan duration, float boost) + { + _fieldName = fieldName; + _duration = duration; + _boost = boost; + } + + public Query GetScoreQuery(Query inner) => new FreshnessScoreQuery(inner, _fieldName, _duration, _boost); + } + + public class FreshnessScoreQuery : CustomScoreQuery + { + private readonly string _fieldName; + private readonly TimeSpan _duration; + private readonly float _boost; + + public FreshnessScoreQuery(Query subQuery, string fieldName, TimeSpan duration, float boost) : base(subQuery) + { + _fieldName = fieldName; + _duration = duration; + _boost = boost; + } + + protected override CustomScoreProvider GetCustomScoreProvider(AtomicReaderContext context) => new FreshnessScoreProvider(context, _fieldName, _duration, _boost); + + private class FreshnessScoreProvider : CustomScoreProvider + { + private readonly string _fieldName; + private readonly TimeSpan _duration; + private readonly float _boost; + + public FreshnessScoreProvider(AtomicReaderContext context, string fieldName, TimeSpan duration, float boost) : base(context) + { + _fieldName = fieldName; + _duration = duration; + _boost = boost; + } + + public override float CustomScore(int doc, float subQueryScore, float valSrcScore) + { + var date = GetDocumentDate(doc); + + var score = subQueryScore; + + if (date != null) + { + var end = DateTime.Now; + var start = end.Subtract(_duration); + + if (date > start && date < end || date < start && date > end) + { + score *= _boost; + } + } + + return score; + } + + private DateTime? GetDocumentDate(int doc) + { + var document = m_context.Reader.Document(doc); + + var field = document.GetField(_fieldName); + + if (field != null && field.NumericType == NumericFieldType.INT64) + { + var timestamp = field.GetInt64Value() ?? 0; + + var date = new DateTime(timestamp); + + return date; + } + + return null; + } + + } + } +} diff --git a/src/Examine.Test/Examine.Lucene/Search/ScoringProfileTests.cs b/src/Examine.Test/Examine.Lucene/Search/ScoringProfileTests.cs new file mode 100644 index 000000000..15b7bc4d3 --- /dev/null +++ b/src/Examine.Test/Examine.Lucene/Search/ScoringProfileTests.cs @@ -0,0 +1,257 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Examine.Lucene.Scoring; +using Examine.Lucene.Search; +using Examine.Test.Examine.Lucene.Search.Scoring; +using Lucene.Net.Analysis.Standard; +using NUnit.Framework; + +namespace Examine.Test.Examine.Lucene.Search +{ + [TestFixture] + public class ScoringProfileTests : ExamineBaseTest + { + [Test] + public void Score_No_Profiles() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")))) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = new DateTime(2000, 01, 02), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_Out_Of_Range() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(1, 0, 0, 0), 1.5f) })) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddDays(-2), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_In_Range() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(1, 0, 0, 0), 1.5f) })) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddHours(-5), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.287674516f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_Future_Date() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(1, 0, 0, 0), 1.5f) })) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddHours(5), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_Future_Date_With_Future_Duration() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(-1, 0, 0, 0), 1.5f) })) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddHours(5), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.287674516f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Profile_Future_Duration() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), + scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(-1, 0, 0, 0), 1.5f) })) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddDays(-2), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + var searcher = indexer.Searcher; + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); + } + } + + [Test] + public void Score_Freshness_Add_On_Searcher() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex( + luceneDir, + analyzer, + new FieldDefinitionCollection(new FieldDefinition("created", "datetime")))) + { + indexer.IndexItems(new[] + { + ValueSet.FromObject(123.ToString(), "content", + new + { + created = DateTime.Now.AddDays(-2), + bodyText = "lorem ipsum", + nodeTypeAlias = "CWS_Home" + }) + }); + + var searcher = indexer.Searcher; + + searcher.AddScoringProfile(new FreshnessScoringProfile("created", new TimeSpan(-1, 0, 0, 0), 1.5f)); + + var numberSortedCriteria = searcher.CreateQuery() + .Field("bodyText", "ipsum"); + + var numberSortedResult = numberSortedCriteria + .Execute(); + + Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); + } + } + } +} diff --git a/src/Examine.Test/ExamineBaseTest.cs b/src/Examine.Test/ExamineBaseTest.cs index 2d7b72618..9d02ba222 100644 --- a/src/Examine.Test/ExamineBaseTest.cs +++ b/src/Examine.Test/ExamineBaseTest.cs @@ -8,6 +8,7 @@ using Moq; using Examine.Lucene.Directories; using System.Collections.Generic; +using Examine.Lucene.Scoring; namespace Examine.Test { @@ -25,7 +26,7 @@ public virtual void Setup() [TearDown] public virtual void TearDown() => _loggerFactory.Dispose(); - public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCollection fieldDefinitions = null, IndexDeletionPolicy indexDeletionPolicy = null, IReadOnlyDictionary indexValueTypesFactory = null) + public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCollection fieldDefinitions = null, IndexDeletionPolicy indexDeletionPolicy = null, IReadOnlyDictionary indexValueTypesFactory = null, IList scoringProfiles = null) => new TestIndex( _loggerFactory, Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions @@ -34,7 +35,8 @@ public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCol DirectoryFactory = new GenericDirectoryFactory(_ => d), Analyzer = analyzer, IndexDeletionPolicy = indexDeletionPolicy, - IndexValueTypesFactory = indexValueTypesFactory + IndexValueTypesFactory = indexValueTypesFactory, + ScoreProfiles = scoringProfiles })); public TestIndex GetTestIndex(IndexWriter writer) From f8338244d9047f56ab9c9dfbd69f8f828ce95c39 Mon Sep 17 00:00:00 2001 From: Jon Whitter Date: Fri, 14 Jul 2023 15:59:41 +0100 Subject: [PATCH 2/2] added custom score Iquery extension --- src/Examine.Core/IndexOptions.cs | 7 +- ...OnlyRelevanceScorerDefinitionCollection.cs | 61 +++++++++++++++++ src/Examine.Core/RelevanceScorerDefinition.cs | 27 ++++++++ .../RelevanceScorerDefinitionCollection.cs | 25 +++++++ .../RelevanceScorerFunctionBaseDefintion.cs | 29 ++++++++ .../TimeRelevanceScorerFunctionDefintion.cs | 26 +++++++ src/Examine.Core/Search/IBooleanOperation.cs | 2 +- src/Examine.Core/Search/IScoreQuery.cs | 12 ++++ src/Examine.Lucene/LuceneIndexOptions.cs | 2 - .../Providers/BaseLuceneSearcher.cs | 14 ++-- src/Examine.Lucene/Providers/LuceneIndex.cs | 32 +++++---- .../Providers/LuceneSearcher.cs | 8 ++- ...ILuceneRelevanceScorerFunctionDefintion.cs | 9 +++ src/Examine.Lucene/Scoring/IScoringProfile.cs | 14 ---- ...neTimeRelevanceScorerFunctionDefintion.cs} | 18 ++--- src/Examine.Lucene/Search/ISearchContext.cs | 2 + .../Search/LuceneBooleanOperation.cs | 3 +- .../Search/LuceneBooleanOperationBase.cs | 9 +-- .../Search/LuceneSearchExecutor.cs | 10 ++- .../Search/LuceneSearchExtensions.cs | 14 +--- .../Search/LuceneSearchQuery.cs | 49 ++++++++----- .../Search/LuceneSearchQueryBase.cs | 40 +++++------ .../Search/MultiSearchContext.cs | 5 +- src/Examine.Lucene/Search/SearchContext.cs | 21 ++++-- ...rofileTests.cs => RelevanceScorerTests.cs} | 68 ++++++------------- src/Examine.Test/ExamineBaseTest.cs | 4 +- 26 files changed, 343 insertions(+), 168 deletions(-) create mode 100644 src/Examine.Core/ReadOnlyRelevanceScorerDefinitionCollection.cs create mode 100644 src/Examine.Core/RelevanceScorerDefinition.cs create mode 100644 src/Examine.Core/RelevanceScorerDefinitionCollection.cs create mode 100644 src/Examine.Core/RelevanceScorerFunctionBaseDefintion.cs create mode 100644 src/Examine.Core/Scoring/TimeRelevanceScorerFunctionDefintion.cs create mode 100644 src/Examine.Core/Search/IScoreQuery.cs create mode 100644 src/Examine.Lucene/Scoring/ILuceneRelevanceScorerFunctionDefintion.cs delete mode 100644 src/Examine.Lucene/Scoring/IScoringProfile.cs rename src/{Examine.Test/Examine.Lucene/Search/Scoing/FreshnessScoringProfile.cs => Examine.Lucene/Scoring/LuceneTimeRelevanceScorerFunctionDefintion.cs} (80%) rename src/Examine.Test/Examine.Lucene/Search/{ScoringProfileTests.cs => RelevanceScorerTests.cs} (75%) diff --git a/src/Examine.Core/IndexOptions.cs b/src/Examine.Core/IndexOptions.cs index c773f2f3d..83bea1133 100644 --- a/src/Examine.Core/IndexOptions.cs +++ b/src/Examine.Core/IndexOptions.cs @@ -2,9 +2,14 @@ namespace Examine { public class IndexOptions { - public IndexOptions() => FieldDefinitions = new FieldDefinitionCollection(); + public IndexOptions() { + FieldDefinitions = new FieldDefinitionCollection(); + RelevanceScorerDefinitions = new RelevanceScorerDefinitionCollection(); + } public FieldDefinitionCollection FieldDefinitions { get; set; } public IValueSetValidator Validator { get; set; } + + public RelevanceScorerDefinitionCollection RelevanceScorerDefinitions { get; set; } } } diff --git a/src/Examine.Core/ReadOnlyRelevanceScorerDefinitionCollection.cs b/src/Examine.Core/ReadOnlyRelevanceScorerDefinitionCollection.cs new file mode 100644 index 000000000..f70bb152f --- /dev/null +++ b/src/Examine.Core/ReadOnlyRelevanceScorerDefinitionCollection.cs @@ -0,0 +1,61 @@ +using System; +using System.Collections.Concurrent; +using System.Collections; +using System.Collections.Generic; +using System.Linq; + +namespace Examine +{ + public class ReadOnlyRelevanceScorerDefinitionCollection : IEnumerable + { + public ReadOnlyRelevanceScorerDefinitionCollection() + : this(Enumerable.Empty()) + { + } + + public ReadOnlyRelevanceScorerDefinitionCollection(params RelevanceScorerDefinition[] definitions) + : this((IEnumerable)definitions) + { + + } + + public ReadOnlyRelevanceScorerDefinitionCollection(IEnumerable definitions) + { + if (definitions == null) + { + return; + } + + foreach (var s in definitions.GroupBy(x => x.Name)) + { + var suggester = s.FirstOrDefault(); + if (suggester != default) + { + Definitions.TryAdd(s.Key, suggester); + } + } + } + + /// + /// Tries to get a by name + /// + /// + /// + /// + /// returns true if one was found otherwise false + /// + /// + /// Marked as virtual so developers can inherit this class and override this method in case + /// relevance definitions are dynamic. + /// + public virtual bool TryGetValue(string relevanceScorerName, out RelevanceScorerDefinition relevanceScorerDefinition) => Definitions.TryGetValue(relevanceScorerName, out relevanceScorerDefinition); + + public int Count => Definitions.Count; + + protected ConcurrentDictionary Definitions { get; } = new ConcurrentDictionary(StringComparer.InvariantCultureIgnoreCase); + + public IEnumerator GetEnumerator() => Definitions.Values.GetEnumerator(); + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + } +} diff --git a/src/Examine.Core/RelevanceScorerDefinition.cs b/src/Examine.Core/RelevanceScorerDefinition.cs new file mode 100644 index 000000000..7c0679864 --- /dev/null +++ b/src/Examine.Core/RelevanceScorerDefinition.cs @@ -0,0 +1,27 @@ +using System.Collections.Generic; + +namespace Examine +{ + /// + /// Defines how to score a document to affect it's relevance + /// + public class RelevanceScorerDefinition + { + public RelevanceScorerDefinition(string name, + IEnumerable functionScorerDefintions) + { + Name = name; + FunctionScorerDefintions = functionScorerDefintions; + } + + /// + /// Name + /// + public string Name { get; } + + /// + /// Field Boosting Function Defintions + /// + public IEnumerable FunctionScorerDefintions { get; } + } +} diff --git a/src/Examine.Core/RelevanceScorerDefinitionCollection.cs b/src/Examine.Core/RelevanceScorerDefinitionCollection.cs new file mode 100644 index 000000000..f411617ae --- /dev/null +++ b/src/Examine.Core/RelevanceScorerDefinitionCollection.cs @@ -0,0 +1,25 @@ +using System; + +namespace Examine +{ + public class RelevanceScorerDefinitionCollection : ReadOnlyRelevanceScorerDefinitionCollection + { + public RelevanceScorerDefinitionCollection(params RelevanceScorerDefinition[] definitions) : base(definitions) + { + } + + public RelevanceScorerDefinitionCollection() + { + } + + public RelevanceScorerDefinition GetOrAdd(string fieldName, Func add) => Definitions.GetOrAdd(fieldName, add); + + /// + /// Replace any definition with the specified one, if one doesn't exist then it is added + /// + /// + public void AddOrUpdate(RelevanceScorerDefinition definition) => Definitions.AddOrUpdate(definition.Name, definition, (s, factory) => definition); + + public bool TryAdd(RelevanceScorerDefinition definition) => Definitions.TryAdd(definition.Name, definition); + } +} diff --git a/src/Examine.Core/RelevanceScorerFunctionBaseDefintion.cs b/src/Examine.Core/RelevanceScorerFunctionBaseDefintion.cs new file mode 100644 index 000000000..b97b6817f --- /dev/null +++ b/src/Examine.Core/RelevanceScorerFunctionBaseDefintion.cs @@ -0,0 +1,29 @@ +namespace Examine +{ + /// + /// Base for Relevance Scorer Functions + /// + public abstract class RelevanceScorerFunctionBaseDefintion + { + /// + /// Constructor + /// + /// Name of the field for the function + /// Boost for the function + public RelevanceScorerFunctionBaseDefintion(string fieldName, float boost) + { + FieldName = fieldName; + Boost = boost; + } + + /// + /// Name of the field for the function + /// + public string FieldName { get; } + + /// + /// Boost for the function + /// + public float Boost { get; } + } +} diff --git a/src/Examine.Core/Scoring/TimeRelevanceScorerFunctionDefintion.cs b/src/Examine.Core/Scoring/TimeRelevanceScorerFunctionDefintion.cs new file mode 100644 index 000000000..f2cef0dcc --- /dev/null +++ b/src/Examine.Core/Scoring/TimeRelevanceScorerFunctionDefintion.cs @@ -0,0 +1,26 @@ +using System; + +namespace Examine.Scoring +{ + /// + /// Boosts relevance based on time recency + /// + public class TimeRelevanceScorerFunctionDefintion : RelevanceScorerFunctionBaseDefintion + { + /// + /// Constructor + /// + /// Name of the field + /// Boost + /// Duration from current time to boost from + public TimeRelevanceScorerFunctionDefintion(string fieldName, float boost, TimeSpan boostTimeRange) : base(fieldName, boost) + { + BoostTimeRange = boostTimeRange; + } + + /// + /// Time range to boost from + /// + public TimeSpan BoostTimeRange { get; } + } +} diff --git a/src/Examine.Core/Search/IBooleanOperation.cs b/src/Examine.Core/Search/IBooleanOperation.cs index 8ac3dd664..799ffc96c 100644 --- a/src/Examine.Core/Search/IBooleanOperation.cs +++ b/src/Examine.Core/Search/IBooleanOperation.cs @@ -6,7 +6,7 @@ namespace Examine.Search /// /// Defines the supported operation for addition of additional clauses in the fluent API /// - public interface IBooleanOperation : IOrdering + public interface IBooleanOperation : IScoreQuery { /// /// Sets the next operation to be AND diff --git a/src/Examine.Core/Search/IScoreQuery.cs b/src/Examine.Core/Search/IScoreQuery.cs new file mode 100644 index 000000000..2828ef1ad --- /dev/null +++ b/src/Examine.Core/Search/IScoreQuery.cs @@ -0,0 +1,12 @@ +using System; + +namespace Examine.Search +{ + /// + /// Defines the supported operation for addition of additional clauses in the fluent API + /// + public interface IScoreQuery : IOrdering + { + IScoreQuery ScoreWith(params string[] scorers); + } +} diff --git a/src/Examine.Lucene/LuceneIndexOptions.cs b/src/Examine.Lucene/LuceneIndexOptions.cs index 822cb6feb..2f42bc50a 100644 --- a/src/Examine.Lucene/LuceneIndexOptions.cs +++ b/src/Examine.Lucene/LuceneIndexOptions.cs @@ -21,7 +21,5 @@ public class LuceneIndexOptions : IndexOptions /// This is generally used to initialize any custom value types for your indexer since the value type collection cannot be modified at runtime. /// public IReadOnlyDictionary IndexValueTypesFactory { get; set; } - - public IList ScoreProfiles { get; set; } } } diff --git a/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs b/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs index 11228139c..92613190d 100644 --- a/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs +++ b/src/Examine.Lucene/Providers/BaseLuceneSearcher.cs @@ -13,21 +13,17 @@ namespace Examine.Lucene.Providers /// public abstract class BaseLuceneSearcher : BaseSearchProvider { - public IList ScoringProfiles { get; } - /// /// Constructor to allow for creating an indexer at runtime /// /// /// - protected BaseLuceneSearcher(string name, Analyzer analyzer, IList scoringProfiles = null) + protected BaseLuceneSearcher(string name, Analyzer analyzer) : base(name) { if (string.IsNullOrWhiteSpace(name)) throw new ArgumentException("Value cannot be null or whitespace.", nameof(name)); LuceneAnalyzer = analyzer; - - ScoringProfiles = scoringProfiles ?? new List(); } /// @@ -54,7 +50,7 @@ public IQuery CreateQuery(string category, BooleanOperation defaultOperation, An if (luceneAnalyzer == null) throw new ArgumentNullException(nameof(luceneAnalyzer)); - return new LuceneSearchQuery(GetSearchContext(), category, luceneAnalyzer, searchOptions, defaultOperation, ScoringProfiles); + return new LuceneSearchQuery(GetSearchContext(), category, luceneAnalyzer, searchOptions, defaultOperation); } /// @@ -66,7 +62,7 @@ public override ISearchResults Search(string searchText, QueryOptions options = ///// ///// This is NOT used! however I'm leaving this here as example code - ///// + ///// ///// This is used to recursively set any query type that supports parameters for rewriting ///// before the search executes. ///// @@ -76,10 +72,10 @@ public override ISearchResults Search(string searchText, QueryOptions options = ///// that would need to be set eagerly before any query parsing takes place but if we want to do it lazily here's how. ///// So we need to manually update any query within the outer boolean query with the correct rewrite method, then the underlying LuceneSearcher will call rewrite ///// to update everything. - ///// + ///// ///// see https://github.com/Shazwazza/Examine/pull/89 ///// see https://lists.gt.net/lucene/java-user/92194 - ///// + ///// ///// //private void SetScoringBooleanQueryRewriteMethod(Query query) //{ diff --git a/src/Examine.Lucene/Providers/LuceneIndex.cs b/src/Examine.Lucene/Providers/LuceneIndex.cs index 1827ca839..bc669622a 100644 --- a/src/Examine.Lucene/Providers/LuceneIndex.cs +++ b/src/Examine.Lucene/Providers/LuceneIndex.cs @@ -44,6 +44,8 @@ private LuceneIndex( //initialize the field types _fieldValueTypeCollection = new Lazy(() => CreateFieldValueTypes(_options.IndexValueTypesFactory)); + _relevanceScorerDefinitionCollection = new Lazy(() => _options.RelevanceScorerDefinitions); + _searcher = new Lazy(CreateSearcher); _cancellationTokenSource = new CancellationTokenSource(); _cancellationToken = _cancellationTokenSource.Token; @@ -61,13 +63,13 @@ public LuceneIndex( : this(loggerFactory, name, (IOptionsMonitor)indexOptions) { LuceneDirectoryIndexOptions directoryOptions = indexOptions.GetNamedOptions(name); - + if (directoryOptions.DirectoryFactory == null) { throw new InvalidOperationException($"No {typeof(IDirectoryFactory)} assigned"); } - _directory = new Lazy(() => directoryOptions.DirectoryFactory.CreateDirectory(this, directoryOptions.UnlockIndex)); + _directory = new Lazy(() => directoryOptions.DirectoryFactory.CreateDirectory(this, directoryOptions.UnlockIndex)); } //TODO: The problem with this is that the writer would already need to be configured with a PerFieldAnalyzerWrapper @@ -137,6 +139,8 @@ internal LuceneIndex( private readonly Lazy _fieldValueTypeCollection; + private readonly Lazy _relevanceScorerDefinitionCollection; + // tracks the latest Generation value of what has been indexed.This can be used to force update a searcher to this generation. private long? _latestGen; @@ -147,6 +151,8 @@ internal LuceneIndex( /// public FieldValueTypeCollection FieldValueTypeCollection => _fieldValueTypeCollection.Value; + public RelevanceScorerDefinitionCollection RelevanceScorerDefinitionCollection => _relevanceScorerDefinitionCollection.Value; + /// /// The default analyzer to use when indexing content, by default, this is set to StandardAnalyzer /// @@ -310,7 +316,7 @@ public void EnsureIndex(bool forceOverwrite) var indexExists = IndexExists(); if (!indexExists || forceOverwrite) { - //if we can't acquire the lock exit - this will happen if this method is called multiple times but we don't want this + //if we can't acquire the lock exit - this will happen if this method is called multiple times but we don't want this // logic to actually execute multiple times if (Monitor.TryEnter(_writerLocker)) { @@ -341,12 +347,12 @@ public void EnsureIndex(bool forceOverwrite) //This will happen if the writer hasn't been created/initialized yet which // might occur if a rebuild is triggered before any indexing has been triggered. //In this case we need to initialize a writer and continue as normal. - //Since we are already inside the writer lock and it is null, we are allowed to + //Since we are already inside the writer lock and it is null, we are allowed to // make this call with out using GetIndexWriter() to do the initialization. _writer = CreateIndexWriterInternal(); } - //We're forcing an overwrite, + //We're forcing an overwrite, // this means that we need to cancel all operations currently in place, // clear the queue and delete all of the data in the index. @@ -441,10 +447,10 @@ public override void CreateIndex() } /// - /// Deletes a node from the index. + /// Deletes a node from the index. /// /// - /// When a content node is deleted, we also need to delete it's children from the index so we need to perform a + /// When a content node is deleted, we also need to delete it's children from the index so we need to perform a /// custom Lucene search to find all decendents and create Delete item queues for them too. /// /// ID of the node to delete @@ -675,7 +681,7 @@ private bool DeleteFromIndex(Term indexTerm, bool performCommit = true) return false; } } - + /// /// Collects the data for the fields and adds the document which is then committed into Lucene.Net's index /// @@ -823,7 +829,7 @@ public void ScheduleCommit() // and less than the delay DateTime.Now - _timestamp < TimeSpan.FromMilliseconds(WaitMilliseconds)) { - //Delay + //Delay _timer.Change(WaitMilliseconds, 0); } else @@ -903,7 +909,7 @@ private TrackingIndexWriter CreateIndexWriterInternal() { Directory dir = GetLuceneDirectory(); - // Unfortunatley if the appdomain is taken down this will remain locked, so we can + // Unfortunatley if the appdomain is taken down this will remain locked, so we can // ensure that it's unlocked here in that case. try { @@ -958,8 +964,8 @@ protected virtual IndexWriter CreateIndexWriter(Directory d) { System.IO.Directory.CreateDirectory(LuceneIndexFolder.FullName); _logOutput = new FileStream(Path.Combine(LuceneIndexFolder.FullName, DateTime.UtcNow.ToString("yyyy-MM-dd") + ".log"), FileMode.Append); - - + + } catch (Exception ex) { @@ -1048,7 +1054,7 @@ private LuceneSearcher CreateSearcher() // wait for most recent changes when first creating the searcher WaitForChanges(); - return new LuceneSearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection, _options.ScoreProfiles); + return new LuceneSearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection, RelevanceScorerDefinitionCollection); } /// diff --git a/src/Examine.Lucene/Providers/LuceneSearcher.cs b/src/Examine.Lucene/Providers/LuceneSearcher.cs index 05aa18622..b27ca7060 100644 --- a/src/Examine.Lucene/Providers/LuceneSearcher.cs +++ b/src/Examine.Lucene/Providers/LuceneSearcher.cs @@ -15,6 +15,7 @@ public class LuceneSearcher : BaseLuceneSearcher, IDisposable { private readonly SearcherManager _searcherManager; private readonly FieldValueTypeCollection _fieldValueTypeCollection; + private readonly RelevanceScorerDefinitionCollection _relevanceScorerDefinitionCollection; private bool _disposedValue; /// @@ -24,15 +25,16 @@ public class LuceneSearcher : BaseLuceneSearcher, IDisposable /// /// /// - public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection, IList scoringProfiles = null) - : base(name, analyzer, scoringProfiles) + public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection, RelevanceScorerDefinitionCollection relevanceScorerDefinitionCollection) + : base(name, analyzer) { _searcherManager = searcherManager; _fieldValueTypeCollection = fieldValueTypeCollection; + _relevanceScorerDefinitionCollection = relevanceScorerDefinitionCollection; } public override ISearchContext GetSearchContext() - => new SearchContext(_searcherManager, _fieldValueTypeCollection); + => new SearchContext(_searcherManager, _fieldValueTypeCollection, _relevanceScorerDefinitionCollection); protected virtual void Dispose(bool disposing) { diff --git a/src/Examine.Lucene/Scoring/ILuceneRelevanceScorerFunctionDefintion.cs b/src/Examine.Lucene/Scoring/ILuceneRelevanceScorerFunctionDefintion.cs new file mode 100644 index 000000000..d1b1d29cb --- /dev/null +++ b/src/Examine.Lucene/Scoring/ILuceneRelevanceScorerFunctionDefintion.cs @@ -0,0 +1,9 @@ +using Lucene.Net.Search; + +namespace Examine.Lucene.Scoring +{ + public interface ILuceneRelevanceScorerFunctionDefintion + { + Query GetScoreQuery(Query inner); + } +} diff --git a/src/Examine.Lucene/Scoring/IScoringProfile.cs b/src/Examine.Lucene/Scoring/IScoringProfile.cs deleted file mode 100644 index 7c12334e9..000000000 --- a/src/Examine.Lucene/Scoring/IScoringProfile.cs +++ /dev/null @@ -1,14 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Lucene.Net.Search; - -namespace Examine.Lucene.Scoring -{ - public interface IScoringProfile - { - Query GetScoreQuery(Query inner); - } -} diff --git a/src/Examine.Test/Examine.Lucene/Search/Scoing/FreshnessScoringProfile.cs b/src/Examine.Lucene/Scoring/LuceneTimeRelevanceScorerFunctionDefintion.cs similarity index 80% rename from src/Examine.Test/Examine.Lucene/Search/Scoing/FreshnessScoringProfile.cs rename to src/Examine.Lucene/Scoring/LuceneTimeRelevanceScorerFunctionDefintion.cs index 56305672e..7ef31f894 100644 --- a/src/Examine.Test/Examine.Lucene/Search/Scoing/FreshnessScoringProfile.cs +++ b/src/Examine.Lucene/Scoring/LuceneTimeRelevanceScorerFunctionDefintion.cs @@ -5,22 +5,14 @@ using Lucene.Net.Queries; using Lucene.Net.Search; -namespace Examine.Test.Examine.Lucene.Search.Scoring -{ - public class FreshnessScoringProfile : IScoringProfile +namespace Examine.Scoring { + public class LuceneTimeRelevanceScorerFunctionDefintion : TimeRelevanceScorerFunctionDefintion, ILuceneRelevanceScorerFunctionDefintion { - private readonly string _fieldName; - private readonly TimeSpan _duration; - private readonly float _boost; - - public FreshnessScoringProfile(string fieldName, TimeSpan duration, float boost) + public LuceneTimeRelevanceScorerFunctionDefintion(string fieldName, float boost, TimeSpan boostTimeRange) : base(fieldName, boost, boostTimeRange) { - _fieldName = fieldName; - _duration = duration; - _boost = boost; } - public Query GetScoreQuery(Query inner) => new FreshnessScoreQuery(inner, _fieldName, _duration, _boost); + public Query GetScoreQuery(Query inner) => new FreshnessScoreQuery(inner, FieldName, BoostTimeRange, Boost); } public class FreshnessScoreQuery : CustomScoreQuery @@ -62,7 +54,7 @@ public override float CustomScore(int doc, float subQueryScore, float valSrcScor var end = DateTime.Now; var start = end.Subtract(_duration); - if (date > start && date < end || date < start && date > end) + if ((date > start && date < end) || (date < start && date > end)) { score *= _boost; } diff --git a/src/Examine.Lucene/Search/ISearchContext.cs b/src/Examine.Lucene/Search/ISearchContext.cs index 205a843dc..8308f1f28 100644 --- a/src/Examine.Lucene/Search/ISearchContext.cs +++ b/src/Examine.Lucene/Search/ISearchContext.cs @@ -9,5 +9,7 @@ public interface ISearchContext string[] SearchableFields { get; } IIndexFieldValueType GetFieldValueType(string fieldName); + + RelevanceScorerDefinition GetRelevanceScorer(string scorerName); } } diff --git a/src/Examine.Lucene/Search/LuceneBooleanOperation.cs b/src/Examine.Lucene/Search/LuceneBooleanOperation.cs index b38dd8422..18f39e971 100644 --- a/src/Examine.Lucene/Search/LuceneBooleanOperation.cs +++ b/src/Examine.Lucene/Search/LuceneBooleanOperation.cs @@ -13,7 +13,7 @@ namespace Examine.Lucene.Search public class LuceneBooleanOperation : LuceneBooleanOperationBase, IQueryExecutor { private readonly LuceneSearchQuery _search; - + public LuceneBooleanOperation(LuceneSearchQuery search) : base(search) { @@ -65,5 +65,6 @@ public LuceneBooleanOperation(LuceneSearchQuery search) #endregion public override string ToString() => _search.ToString(); + public override IScoreQuery ScoreWith(params string[] scorers) => _search.ScoreWith(scorers); } } diff --git a/src/Examine.Lucene/Search/LuceneBooleanOperationBase.cs b/src/Examine.Lucene/Search/LuceneBooleanOperationBase.cs index 2769479ad..918c79a75 100644 --- a/src/Examine.Lucene/Search/LuceneBooleanOperationBase.cs +++ b/src/Examine.Lucene/Search/LuceneBooleanOperationBase.cs @@ -18,13 +18,13 @@ protected LuceneBooleanOperationBase(LuceneSearchQueryBase search) public abstract IQuery Or(); public abstract IQuery Not(); - public IBooleanOperation And(Func inner, BooleanOperation defaultOp = BooleanOperation.And) + public IBooleanOperation And(Func inner, BooleanOperation defaultOp = BooleanOperation.And) => Op(inner, BooleanOperation.And, defaultOp); - public IBooleanOperation Or(Func inner, BooleanOperation defaultOp = BooleanOperation.And) + public IBooleanOperation Or(Func inner, BooleanOperation defaultOp = BooleanOperation.And) => Op(inner, BooleanOperation.Or, defaultOp); - public IBooleanOperation AndNot(Func inner, BooleanOperation defaultOp = BooleanOperation.And) + public IBooleanOperation AndNot(Func inner, BooleanOperation defaultOp = BooleanOperation.And) => Op(inner, BooleanOperation.Not, defaultOp); protected abstract INestedQuery AndNested(); @@ -35,7 +35,7 @@ public IBooleanOperation AndNot(Func inne INestedQuery INestedBooleanOperation.Or() => OrNested(); INestedQuery INestedBooleanOperation.Not() => NotNested(); - INestedBooleanOperation INestedBooleanOperation.And(Func inner, BooleanOperation defaultOp) + INestedBooleanOperation INestedBooleanOperation.And(Func inner, BooleanOperation defaultOp) => Op(inner, BooleanOperation.And, defaultOp); INestedBooleanOperation INestedBooleanOperation.Or(Func inner, BooleanOperation defaultOp) @@ -77,5 +77,6 @@ protected internal LuceneBooleanOperationBase Op( public abstract IOrdering SelectFields(ISet fieldNames); public abstract IOrdering SelectField(string fieldName); public abstract IOrdering SelectAllFields(); + public abstract IScoreQuery ScoreWith(params string[] scorers); } } diff --git a/src/Examine.Lucene/Search/LuceneSearchExecutor.cs b/src/Examine.Lucene/Search/LuceneSearchExecutor.cs index 374afa856..fb296ea43 100644 --- a/src/Examine.Lucene/Search/LuceneSearchExecutor.cs +++ b/src/Examine.Lucene/Search/LuceneSearchExecutor.cs @@ -4,6 +4,7 @@ using Examine.Search; using Lucene.Net.Documents; using Lucene.Net.Index; +using Lucene.Net.Queries; using Lucene.Net.Search; namespace Examine.Lucene.Search @@ -195,7 +196,7 @@ private static SearchAfterOptions GetSearchAfterOptions(TopDocs topDocs) private LuceneSearchResult GetSearchResult(int index, TopDocs topDocs, IndexSearcher luceneSearcher) { // I have seen IndexOutOfRangeException here which is strange as this is only called in one place - // and from that one place "i" is always less than the size of this collection. + // and from that one place "i" is always less than the size of this collection. // but we'll error check here anyways if (topDocs?.ScoreDocs.Length < index) { @@ -289,6 +290,11 @@ private bool CheckQueryForExtractTerms(Query query) return CheckQueryForExtractTerms(lbq.Wrapped); } + if(query is CustomScoreQuery csq) + { + return CheckQueryForExtractTerms(csq.SubQuery); + } + Type queryType = query.GetType(); if (typeof(TermRangeQuery).IsAssignableFrom(queryType) @@ -296,7 +302,7 @@ private bool CheckQueryForExtractTerms(Query query) || typeof(FuzzyQuery).IsAssignableFrom(queryType) || (queryType.IsGenericType && queryType.GetGenericTypeDefinition().IsAssignableFrom(typeof(NumericRangeQuery<>)))) { - return false; //ExtractTerms() not supported by TermRangeQuery, WildcardQuery,FuzzyQuery and will throw NotSupportedException + return false; //ExtractTerms() not supported by TermRangeQuery, WildcardQuery,FuzzyQuery and will throw NotSupportedException } return true; diff --git a/src/Examine.Lucene/Search/LuceneSearchExtensions.cs b/src/Examine.Lucene/Search/LuceneSearchExtensions.cs index b1bf204f8..8f82294dd 100644 --- a/src/Examine.Lucene/Search/LuceneSearchExtensions.cs +++ b/src/Examine.Lucene/Search/LuceneSearchExtensions.cs @@ -17,7 +17,7 @@ public static class LuceneSearchExtensions /// Converts an Examine boolean operation to a Lucene representation /// /// The operation. - /// The translated Boolean operation + /// The translated Boolean operation public static Occur ToLuceneOccurrence(this BooleanOperation o) { switch (o) @@ -69,17 +69,5 @@ public static ILuceneSearchResults ExecuteWithLucene(this IQueryExecutor queryEx } throw new NotSupportedException("QueryExecutor is not Lucene.NET"); } - - public static void AddScoringProfile(this ISearcher searcher, IScoringProfile scoringProfile) - { - if(searcher is LuceneSearcher luceneSearcher) - { - luceneSearcher.ScoringProfiles.Add(scoringProfile); - - return; - } - - throw new NotSupportedException("Searcher is not Lucene.NET"); - } } } diff --git a/src/Examine.Lucene/Search/LuceneSearchQuery.cs b/src/Examine.Lucene/Search/LuceneSearchQuery.cs index db88ce3d4..417964ad4 100644 --- a/src/Examine.Lucene/Search/LuceneSearchQuery.cs +++ b/src/Examine.Lucene/Search/LuceneSearchQuery.cs @@ -17,16 +17,14 @@ namespace Examine.Lucene.Search public class LuceneSearchQuery : LuceneSearchQueryBase, IQueryExecutor { private readonly ISearchContext _searchContext; - private readonly IList _scoringProfiles; private ISet _fieldsToLoad = null; public LuceneSearchQuery( ISearchContext searchContext, - string category, Analyzer analyzer, LuceneSearchOptions searchOptions, BooleanOperation occurance, IList scoringProfiles) + string category, Analyzer analyzer, LuceneSearchOptions searchOptions, BooleanOperation occurance) : base(CreateQueryParser(searchContext, analyzer, searchOptions), category, searchOptions, occurance) - { + { _searchContext = searchContext; - _scoringProfiles = scoringProfiles; } private static CustomMultiFieldQueryParser CreateQueryParser(ISearchContext searchContext, Analyzer analyzer, LuceneSearchOptions searchOptions) @@ -84,6 +82,8 @@ private static CustomMultiFieldQueryParser CreateQueryParser(ISearchContext sear public virtual IBooleanOperation OrderByDescending(params SortableField[] fields) => OrderByInternal(true, fields); + public virtual IScoreQuery ScoreWith(params string[] scorers) => ScoreWithInternal(scorers); + public override IBooleanOperation Field(string fieldName, T fieldValue) => RangeQueryInternal(new[] { fieldName }, fieldValue, fieldValue, true, true, Occurrence); @@ -111,7 +111,7 @@ internal LuceneBooleanOperationBase ManagedQueryInternal(string query, string[] var types = fields.Select(f => _searchContext.GetFieldValueType(f)).Where(t => t != null); - //Strangely we need an inner and outer query. If we don't do this then the lucene syntax returned is incorrect + //Strangely we need an inner and outer query. If we don't do this then the lucene syntax returned is incorrect //since it doesn't wrap in parenthesis properly. I'm unsure if this is a lucene issue (assume so) since that is what //is producing the resulting lucene string syntax. It might not be needed internally within Lucene since it's an object //so it might be the ToString() that is the issue. @@ -142,7 +142,7 @@ internal LuceneBooleanOperationBase RangeQueryInternal(string[] fields, T? mi { Query.Add(new LateBoundQuery(() => { - //Strangely we need an inner and outer query. If we don't do this then the lucene syntax returned is incorrect + //Strangely we need an inner and outer query. If we don't do this then the lucene syntax returned is incorrect //since it doesn't wrap in parenthesis properly. I'm unsure if this is a lucene issue (assume so) since that is what //is producing the resulting lucene string syntax. It might not be needed internally within Lucene since it's an object //so it might be the ToString() that is the issue. @@ -230,7 +230,18 @@ private ISearchResults Search(QueryOptions options) } } - var scoredQuery = ApplyScoringProfiles(query); + Query scoredQuery = query; + + foreach(var scorerDefinition in RelevanceScores) + { + foreach(var scoreFunction in scorerDefinition.FunctionScorerDefintions) + { + if(scoreFunction is ILuceneRelevanceScorerFunctionDefintion luceneScoreFunction) + { + scoredQuery = luceneScoreFunction.GetScoreQuery(scoredQuery); + } + } + } var executor = new LuceneSearchExecutor(options, scoredQuery, SortFields, _searchContext, _fieldsToLoad); @@ -239,16 +250,6 @@ private ISearchResults Search(QueryOptions options) return pagesResults; } - protected Query ApplyScoringProfiles(Query query) - { - foreach (var profile in _scoringProfiles) - { - query = profile.GetScoreQuery(query); - } - - return query; - } - /// /// Internal operation for adding the ordered results /// @@ -287,7 +288,7 @@ private LuceneBooleanOperationBase OrderByInternal(bool descending, params Sorta break; case SortType.Double: defaultSort = SortFieldType.DOUBLE; - break; + break; default: throw new ArgumentOutOfRangeException(); } @@ -304,6 +305,18 @@ private LuceneBooleanOperationBase OrderByInternal(bool descending, params Sorta return CreateOp(); } + internal LuceneBooleanOperationBase ScoreWithInternal(params string[] scorers) + { + foreach(var scorer in scorers) + { + var scorerDefinition = _searchContext.GetRelevanceScorer(scorer); + + RelevanceScores.Add(scorerDefinition); + } + + return CreateOp(); + } + internal IBooleanOperation SelectFieldsInternal(ISet loadedFieldNames) { _fieldsToLoad = loadedFieldNames; diff --git a/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs b/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs index 043b6df7c..88f526a18 100644 --- a/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs +++ b/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs @@ -18,6 +18,8 @@ public abstract class LuceneSearchQueryBase : IQuery, INestedQuery public IList SortFields { get; } = new List(); + public IList RelevanceScores { get; } = new List(); + protected Occur Occurrence { get; set; } private BooleanOperation _boolOp; @@ -71,7 +73,7 @@ public IBooleanOperation NativeQuery(string query) } /// - /// Adds a true Lucene Query + /// Adds a true Lucene Query /// /// /// @@ -229,12 +231,12 @@ protected internal LuceneBooleanOperationBase GroupedNotInternal(string[] fields //(!field1:query !field2:query !field3:query) //but Lucene will bork if you provide an array of length 1 (which is != to the field length) - // NOTE: This is important because we cannot prefix a + to a group of NOT's, that doesn't work. - // for example, it cannot be: +(-id:1 -id:2 -id:3) + // NOTE: This is important because we cannot prefix a + to a group of NOT's, that doesn't work. + // for example, it cannot be: +(-id:1 -id:2 -id:3) // and it cannot be: (-id:1 -id:2 -id:3) - this will be an optional list of must not's so really nothing is filtered // It needs to be: -id:1 -id:2 -id:3 - // So we get all clauses + // So we get all clauses var subQueries = GetMultiFieldQuery(fields, fieldVals, Occur.MUST_NOT, true); // then add each individual one directly to the query @@ -328,7 +330,7 @@ protected virtual Query GetFieldInternalQuery(string fieldName, IExamineValue fi { queryToAdd = _queryParser.GetFieldQueryInternal(fieldName, fieldValue.Value); if (queryToAdd != null) - { + { queryToAdd.Boost = fieldValue.Level; } } @@ -340,7 +342,7 @@ protected virtual Query GetFieldInternalQuery(string fieldName, IExamineValue fi } break; case Examineness.Proximity: - int proximity = Convert.ToInt32(fieldValue.Level); + int proximity = Convert.ToInt32(fieldValue.Level); if (useQueryParser) { queryToAdd = _queryParser.GetProximityQueryInternal(fieldName, fieldValue.Value, proximity); @@ -371,7 +373,7 @@ protected virtual Query GetFieldInternalQuery(string fieldName, IExamineValue fi } else { - //standard query + //standard query var proxQuery = fieldName + ":" + fieldValue.Value; queryToAdd = ParseRawQuery(proxQuery); } @@ -402,7 +404,7 @@ private Query ParseRawQuery(string rawQuery) /// /// /// The result of this seems to be better than the above since it does not include results that contain part of the phrase. - /// For example, 'codegarden 090' would be matched against the search term 'codegarden 09' with the above, whereas when using the + /// For example, 'codegarden 090' would be matched against the search term 'codegarden 09' with the above, whereas when using the /// PhraseQuery this is not the case /// private static Query CreatePhraseQuery(string field, string txt) @@ -426,32 +428,32 @@ private static Query CreatePhraseQuery(string field, string txt) /// /// /// docs about this are here: https://github.com/Shazwazza/Examine/wiki/Grouped-Operations - /// + /// /// if matchAllCombinations == false then... /// this will create a query that matches the field index to the value index if the value length is >= to the field length /// otherwise we will have to match all combinations. - /// + /// /// For example if we have these fields: /// bodyText, pageTitle /// and these values: /// "hello", "world" - /// + /// /// then the query output will be: - /// + /// /// bodyText: "hello" pageTitle: "world" - /// + /// /// if matchAllCombinations == true then... - /// This will create a query for all combinations of fields and values. + /// This will create a query for all combinations of fields and values. /// For example if we have these fields: /// bodyText, pageTitle /// and these values: /// "hello", "world" - /// + /// /// then the query output will be: - /// + /// /// bodyText: "hello" bodyText: "world" pageTitle: "hello" pageTitle: "world" - /// - /// + /// + /// private BooleanQuery GetMultiFieldQuery( IReadOnlyList fields, IReadOnlyList fieldVals, @@ -480,7 +482,7 @@ private BooleanQuery GetMultiFieldQuery( return qry; } - //This will align the key value pairs: + //This will align the key value pairs: for (int i = 0; i < fields.Count; i++) { var queryVal = fieldVals[i]; diff --git a/src/Examine.Lucene/Search/MultiSearchContext.cs b/src/Examine.Lucene/Search/MultiSearchContext.cs index 877661c35..06dbc7f64 100644 --- a/src/Examine.Lucene/Search/MultiSearchContext.cs +++ b/src/Examine.Lucene/Search/MultiSearchContext.cs @@ -8,9 +8,9 @@ namespace Examine.Lucene.Search public class MultiSearchContext : ISearchContext { private readonly ISearchContext[] _inner; - + private string[] _fields; - + public MultiSearchContext(ISearchContext[] inner) => _inner = inner; public ISearcherReference GetSearcher() @@ -21,5 +21,6 @@ public ISearcherReference GetSearcher() public IIndexFieldValueType GetFieldValueType(string fieldName) => _inner.Select(cc => cc.GetFieldValueType(fieldName)).FirstOrDefault(type => type != null); + public RelevanceScorerDefinition GetRelevanceScorer(string scorerName) => throw new System.NotImplementedException(); } } diff --git a/src/Examine.Lucene/Search/SearchContext.cs b/src/Examine.Lucene/Search/SearchContext.cs index 406a6941f..0c53a9c3d 100644 --- a/src/Examine.Lucene/Search/SearchContext.cs +++ b/src/Examine.Lucene/Search/SearchContext.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Linq; using Examine.Lucene.Indexing; +using Examine.Lucene.Scoring; using Lucene.Net.Index; using Lucene.Net.Search; @@ -12,12 +13,14 @@ public class SearchContext : ISearchContext { private readonly SearcherManager _searcherManager; private readonly FieldValueTypeCollection _fieldValueTypeCollection; + private readonly RelevanceScorerDefinitionCollection _relevanceScorerDefinitionCollection; private string[] _searchableFields; - public SearchContext(SearcherManager searcherManager, FieldValueTypeCollection fieldValueTypeCollection) + public SearchContext(SearcherManager searcherManager, FieldValueTypeCollection fieldValueTypeCollection, RelevanceScorerDefinitionCollection relevanceScorerDefinitionCollection) { - _searcherManager = searcherManager; + _searcherManager = searcherManager; _fieldValueTypeCollection = fieldValueTypeCollection ?? throw new ArgumentNullException(nameof(fieldValueTypeCollection)); + _relevanceScorerDefinitionCollection = relevanceScorerDefinitionCollection ?? throw new ArgumentNullException(nameof(relevanceScorerDefinitionCollection)); } public ISearcherReference GetSearcher() => new SearcherReference(_searcherManager); @@ -33,7 +36,7 @@ public string[] SearchableFields // performing a 'search'. We must ensure that the underlying reader has the correct reference counts. IndexSearcher searcher = _searcherManager.Acquire(); try - { + { var fields = MultiFields.GetMergedFieldInfos(searcher.IndexReader) .Select(x => x.Name) .ToList(); @@ -57,8 +60,18 @@ public IIndexFieldValueType GetFieldValueType(string fieldName) { //Get the value type for the field, or use the default if not defined return _fieldValueTypeCollection.GetValueType( - fieldName, + fieldName, _fieldValueTypeCollection.ValueTypeFactories.GetRequiredFactory(FieldDefinitionTypes.FullText)); } + + public RelevanceScorerDefinition GetRelevanceScorer(string scorerName) + { + if (!_relevanceScorerDefinitionCollection.TryGetValue(scorerName, out var scorer)) + { + throw new InvalidOperationException($"No {nameof(RelevanceScorerDefinition)} was found for scorer name {scorerName}"); + } + + return scorer; + } } } diff --git a/src/Examine.Test/Examine.Lucene/Search/ScoringProfileTests.cs b/src/Examine.Test/Examine.Lucene/Search/RelevanceScorerTests.cs similarity index 75% rename from src/Examine.Test/Examine.Lucene/Search/ScoringProfileTests.cs rename to src/Examine.Test/Examine.Lucene/Search/RelevanceScorerTests.cs index 15b7bc4d3..c9b302b0f 100644 --- a/src/Examine.Test/Examine.Lucene/Search/ScoringProfileTests.cs +++ b/src/Examine.Test/Examine.Lucene/Search/RelevanceScorerTests.cs @@ -1,9 +1,8 @@ using System; using System.Collections.Generic; using System.Linq; -using Examine.Lucene.Scoring; using Examine.Lucene.Search; -using Examine.Test.Examine.Lucene.Search.Scoring; +using Examine.Scoring; using Lucene.Net.Analysis.Standard; using NUnit.Framework; @@ -55,7 +54,8 @@ public void Score_Freshness_Profile_Out_Of_Range() luceneDir, analyzer, new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), - scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(1, 0, 0, 0), 1.5f) })) + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(1, 0, 0, 0)) })))) { indexer.IndexItems(new[] { @@ -72,7 +72,8 @@ public void Score_Freshness_Profile_Out_Of_Range() var searcher = indexer.Searcher; var numberSortedCriteria = searcher.CreateQuery() - .Field("bodyText", "ipsum"); + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); var numberSortedResult = numberSortedCriteria .Execute(); @@ -90,7 +91,8 @@ public void Score_Freshness_Profile_In_Range() luceneDir, analyzer, new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), - scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(1, 0, 0, 0), 1.5f) })) + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(1, 0, 0, 0)) })))) { indexer.IndexItems(new[] { @@ -107,7 +109,8 @@ public void Score_Freshness_Profile_In_Range() var searcher = indexer.Searcher; var numberSortedCriteria = searcher.CreateQuery() - .Field("bodyText", "ipsum"); + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); var numberSortedResult = numberSortedCriteria .Execute(); @@ -125,7 +128,8 @@ public void Score_Freshness_Profile_Future_Date() luceneDir, analyzer, new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), - scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(1, 0, 0, 0), 1.5f) })) + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(1, 0, 0, 0)) })))) { indexer.IndexItems(new[] { @@ -142,7 +146,8 @@ public void Score_Freshness_Profile_Future_Date() var searcher = indexer.Searcher; var numberSortedCriteria = searcher.CreateQuery() - .Field("bodyText", "ipsum"); + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); var numberSortedResult = numberSortedCriteria .Execute(); @@ -160,7 +165,8 @@ public void Score_Freshness_Profile_Future_Date_With_Future_Duration() luceneDir, analyzer, new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), - scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(-1, 0, 0, 0), 1.5f) })) + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(-1, 0, 0, 0)) })))) { indexer.IndexItems(new[] { @@ -176,7 +182,8 @@ public void Score_Freshness_Profile_Future_Date_With_Future_Duration() var searcher = indexer.Searcher; var numberSortedCriteria = searcher.CreateQuery() - .Field("bodyText", "ipsum"); + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); var numberSortedResult = numberSortedCriteria .Execute(); @@ -194,7 +201,8 @@ public void Score_Freshness_Profile_Future_Duration() luceneDir, analyzer, new FieldDefinitionCollection(new FieldDefinition("created", "datetime")), - scoringProfiles: new List { new FreshnessScoringProfile("created", new TimeSpan(-1, 0, 0, 0), 1.5f) })) + relevanceScorerDefinitions: new RelevanceScorerDefinitionCollection( + new RelevanceScorerDefinition("freshness", new[] { new LuceneTimeRelevanceScorerFunctionDefintion("created", 1.5f, new TimeSpan(-1, 0, 0, 0)) })))) { indexer.IndexItems(new[] { @@ -210,42 +218,8 @@ public void Score_Freshness_Profile_Future_Duration() var searcher = indexer.Searcher; var numberSortedCriteria = searcher.CreateQuery() - .Field("bodyText", "ipsum"); - - var numberSortedResult = numberSortedCriteria - .Execute(); - - Assert.AreEqual(0.191783011f, numberSortedResult.First().Score); - } - } - - [Test] - public void Score_Freshness_Add_On_Searcher() - { - var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); - using (var luceneDir = new RandomIdRAMDirectory()) - using (var indexer = GetTestIndex( - luceneDir, - analyzer, - new FieldDefinitionCollection(new FieldDefinition("created", "datetime")))) - { - indexer.IndexItems(new[] - { - ValueSet.FromObject(123.ToString(), "content", - new - { - created = DateTime.Now.AddDays(-2), - bodyText = "lorem ipsum", - nodeTypeAlias = "CWS_Home" - }) - }); - - var searcher = indexer.Searcher; - - searcher.AddScoringProfile(new FreshnessScoringProfile("created", new TimeSpan(-1, 0, 0, 0), 1.5f)); - - var numberSortedCriteria = searcher.CreateQuery() - .Field("bodyText", "ipsum"); + .Field("bodyText", "ipsum") + .ScoreWith("freshness"); var numberSortedResult = numberSortedCriteria .Execute(); diff --git a/src/Examine.Test/ExamineBaseTest.cs b/src/Examine.Test/ExamineBaseTest.cs index 9d02ba222..fd7c70dbb 100644 --- a/src/Examine.Test/ExamineBaseTest.cs +++ b/src/Examine.Test/ExamineBaseTest.cs @@ -26,7 +26,7 @@ public virtual void Setup() [TearDown] public virtual void TearDown() => _loggerFactory.Dispose(); - public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCollection fieldDefinitions = null, IndexDeletionPolicy indexDeletionPolicy = null, IReadOnlyDictionary indexValueTypesFactory = null, IList scoringProfiles = null) + public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCollection fieldDefinitions = null, IndexDeletionPolicy indexDeletionPolicy = null, IReadOnlyDictionary indexValueTypesFactory = null, RelevanceScorerDefinitionCollection relevanceScorerDefinitions = null) => new TestIndex( _loggerFactory, Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions @@ -36,7 +36,7 @@ public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCol Analyzer = analyzer, IndexDeletionPolicy = indexDeletionPolicy, IndexValueTypesFactory = indexValueTypesFactory, - ScoreProfiles = scoringProfiles + RelevanceScorerDefinitions = relevanceScorerDefinitions ?? new RelevanceScorerDefinitionCollection() })); public TestIndex GetTestIndex(IndexWriter writer)