-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch '0.3.9' of https://github.com/openturing/turing into 0.3.9
- Loading branch information
Showing
6 changed files
with
430 additions
and
101 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
175 changes: 175 additions & 0 deletions
175
turing-app/src/main/java/com/viglet/turing/sn/ac/SuggestionAutomaton.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
package com.viglet.turing.sn.ac; | ||
|
||
import lombok.extern.slf4j.Slf4j; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.jetbrains.annotations.NotNull; | ||
|
||
import java.util.*; | ||
|
||
/** | ||
* The SuggestionAutomaton class represents a finite state machine used to | ||
* validate suggestions. | ||
* <p> | ||
* Methods: | ||
* </p> | ||
* <ul> | ||
* <li>{@link #SuggestionAutomaton()}: Constructor that builds the | ||
* automaton.</li> | ||
* <li>{@link #run(String, int, List)}: Runs the automaton on a given | ||
* suggestion, number of words from query, and stop words list.</li> | ||
* <li>{@link #getTokenType(List, String, boolean)}: Determines the token type | ||
* of a given token.</li> | ||
* </ul> | ||
* | ||
* </pre> | ||
* | ||
* @author Gabriel F. Gomazako | ||
* @since 0.3.9 | ||
*/ | ||
@Slf4j | ||
public class SuggestionAutomaton { | ||
private State initialState; | ||
|
||
public SuggestionAutomaton() { | ||
buildAutomaton(); | ||
} | ||
|
||
/** | ||
* This constructs the finite state machine structure and binds to <code>initialState</code> parameter. | ||
*/ | ||
private void buildAutomaton() { | ||
State n0 = new State("N0"); | ||
State n1 = new State("N1"); | ||
State n2 = new State("N2"); | ||
State n3 = new State("N3"); | ||
State accept = new State("Accept", State.StateType.ACCEPT); | ||
State reject = new State("Error", State.StateType.REJECT); | ||
|
||
n0.transitions.put(TokenType.WORD, n1); | ||
n0.transitions.put(TokenType.STOP_WORD, reject); | ||
n0.transitions.put(TokenType.EMPTY, reject); | ||
n0.transitions.put(TokenType.SPECIAL_STOP_WORD, n2); | ||
|
||
n1.transitions.put(TokenType.WORD, reject); | ||
n1.transitions.put(TokenType.EMPTY, accept); | ||
n1.transitions.put(TokenType.STOP_WORD, reject); | ||
n1.transitions.put(TokenType.SPECIAL_STOP_WORD, n2); | ||
|
||
n2.transitions.put(TokenType.WORD, n3); | ||
n2.transitions.put(TokenType.STOP_WORD, n2); | ||
n2.transitions.put(TokenType.EMPTY, reject); | ||
n2.transitions.put(TokenType.SPECIAL_STOP_WORD, n2); | ||
|
||
n3.transitions.put(TokenType.WORD, reject); | ||
n3.transitions.put(TokenType.STOP_WORD, reject); | ||
n3.transitions.put(TokenType.EMPTY, accept); | ||
n3.transitions.put(TokenType.SPECIAL_STOP_WORD, reject); | ||
|
||
this.initialState = n0; | ||
} | ||
|
||
/** | ||
* Runs the suggestion automaton to determine if a given suggestion is valid. | ||
* | ||
* @param suggestion the suggestion string to be evaluated. | ||
* @param numberOfWordsFromQuery the number of words from current query. It will be used to know how many words the suggestion should have. | ||
* @param stopWords a list of stop words. | ||
* @return {@code true} if the suggestion is valid according to the automaton rules, {@code false} otherwise. | ||
*/ | ||
public boolean run(String suggestion, int numberOfWordsFromQuery, List<String> stopWords) { | ||
// TOP -> [ "Hello", "World" ] | ||
Deque<String> tokensDeque = new ArrayDeque<>(List.of(suggestion.split(" "))); | ||
|
||
// Suggestions should not start with a stop word when is the first term of the query. | ||
if (stopWords.contains(tokensDeque.peek()) && numberOfWordsFromQuery == 1) { | ||
return false; | ||
} | ||
|
||
// The suggestions will always include the query, so we need to ignore it. | ||
int wordsToRemove = numberOfWordsFromQuery - 1; | ||
// Query: "Hello my friend" -> numberOfWordsFromQuery = 3 | ||
// Query: "Hello my friend " -> numberOfWordsFromQuery = 4 | ||
while (wordsToRemove > 0 && !tokensDeque.isEmpty()) { | ||
tokensDeque.pop(); | ||
wordsToRemove--; | ||
} | ||
|
||
// Checks if now it starts with a stop word | ||
boolean firstTokenIsStopWord = stopWords.contains(tokensDeque.peek()); | ||
|
||
if (tokensDeque.isEmpty()) { | ||
log.warn("Suggestion is empty."); | ||
return false; | ||
} | ||
|
||
TokenType currentTokenType = null; | ||
State currentState = this.initialState; | ||
log.info("Testing suggestion: {}", suggestion); | ||
String currentToken = null; | ||
while (true) { | ||
if (currentState.stateType == State.StateType.REJECT) { | ||
return false; | ||
} else if (currentState.stateType == State.StateType.ACCEPT) { | ||
return true; | ||
} | ||
|
||
currentToken = tokensDeque.poll(); | ||
currentTokenType = getTokenType(stopWords, currentToken, firstTokenIsStopWord); | ||
|
||
log.info("Current token: {} - Type: {}", currentToken, currentTokenType); | ||
log.info("Current state: {}", currentState.name); | ||
|
||
currentState = currentState.getNextState(currentTokenType); | ||
} | ||
} | ||
|
||
@NotNull | ||
private TokenType getTokenType(List<String> stopWords, String currentToken, boolean firstTokenIsStopWord) { | ||
TokenType tokenType; | ||
if (StringUtils.isEmpty(currentToken)) { | ||
return TokenType.EMPTY; | ||
} | ||
if (stopWords.contains(currentToken)) { | ||
tokenType = TokenType.STOP_WORD; | ||
if (firstTokenIsStopWord) | ||
tokenType = TokenType.SPECIAL_STOP_WORD; | ||
} else { | ||
tokenType = TokenType.WORD; | ||
} | ||
return tokenType; | ||
} | ||
|
||
private enum TokenType { | ||
WORD, | ||
STOP_WORD, | ||
SPECIAL_STOP_WORD, | ||
EMPTY | ||
} | ||
|
||
private class State { | ||
private Map<TokenType, State> transitions = new EnumMap<>(TokenType.class); | ||
private StateType stateType; | ||
private String name; | ||
|
||
private State(String name) { | ||
this.stateType = StateType.NORMAL; | ||
this.name = name; | ||
} | ||
|
||
private State(String name, StateType stateType) { | ||
this.stateType = stateType; | ||
this.name = name; | ||
} | ||
|
||
private State getNextState(TokenType tokenType) { | ||
return this.transitions.get(tokenType); | ||
} | ||
|
||
private enum StateType { | ||
ACCEPT, | ||
REJECT, | ||
NORMAL | ||
} | ||
} | ||
|
||
} |
171 changes: 171 additions & 0 deletions
171
turing-app/src/main/java/com/viglet/turing/sn/ac/SuggestionFilter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
package com.viglet.turing.sn.ac; | ||
|
||
import lombok.extern.slf4j.Slf4j; | ||
|
||
import java.util.*; | ||
|
||
/** | ||
* The {@code SuggestionFilter} class is responsible for filtering a list of | ||
* suggestions | ||
* based on a specified strategy and a list of stop words. It currently supports | ||
* two | ||
* strategies: {@code DEFAULT} and {@code AUTOMATON}. | ||
* <p> | ||
* <ul> | ||
* <li>The {@code DEFAULT} strategy follows the original filter implemented in | ||
* "TurSNAutoComplete".</li> | ||
* <li>The {@code AUTOMATON} strategy uses a finite state machine to filter | ||
* suggestions. The behaviour of this strategy filters | ||
* suggestion for a single word, if the query has an space at the end, it will | ||
* consider the query as a two-word query and will suggests | ||
* a next word for the query. It will consider a stop word followed by a | ||
* non-stop as a valid suggestion.</li> | ||
* </ul> | ||
* | ||
* @author Gabriel F. Gomazako | ||
* @since 0.3.9 | ||
*/ | ||
@Slf4j | ||
public class SuggestionFilter { | ||
|
||
private static final String SPACE_CHAR = " "; | ||
private final List<String> stopWords; | ||
private int numberOfWordsFromQuery = 0; | ||
private SuggestionFilterStrategy strategy; | ||
private boolean useTermsQueryEqualsAutoComplete = true; | ||
|
||
public SuggestionFilter(List<String> stopWords) { | ||
this.stopWords = stopWords; | ||
} | ||
|
||
/** | ||
* Configures the suggestion filter to use the default strategy - Legacy | ||
* strategy. | ||
* | ||
* @param numberOfWordsFromQuery the number of words from the query to be | ||
* considered in the suggestion filter. | ||
*/ | ||
public void defaultStrategyConfig(int numberOfWordsFromQuery) { | ||
this.strategy = SuggestionFilterStrategy.DEFAULT; | ||
this.numberOfWordsFromQuery = numberOfWordsFromQuery; | ||
} | ||
|
||
/** | ||
* Configures the default strategy for suggestions - Legacy strategy. | ||
* | ||
* @param numberOfWordsFromQuery the number of words to consider from | ||
* the query. | ||
* @param useTermsQueryEqualsAutoComplete flag indicating whether to use terms | ||
* query equals auto-complete. | ||
*/ | ||
public void defaultStrategyConfig(int numberOfWordsFromQuery, boolean useTermsQueryEqualsAutoComplete) { | ||
this.defaultStrategyConfig(numberOfWordsFromQuery); | ||
this.useTermsQueryEqualsAutoComplete = useTermsQueryEqualsAutoComplete; | ||
} | ||
|
||
/** | ||
* Configures the suggestion filter to use the automaton strategy - New | ||
* strategy. | ||
* | ||
* @param numberOfWordsFromQuery the number of words from the query to be used | ||
* in the automaton strategy | ||
*/ | ||
public void automatonStrategyConfig(int numberOfWordsFromQuery) { | ||
this.strategy = SuggestionFilterStrategy.AUTOMATON; | ||
this.numberOfWordsFromQuery = numberOfWordsFromQuery; | ||
} | ||
|
||
/** | ||
* Filters a list of suggestions based on the defined strategy. | ||
* | ||
* @param suggestions the list of suggestions to be filtered | ||
* @return a list of filtered suggestions based on the strategy | ||
* @throws IllegalArgumentException if the suggestions list is null | ||
*/ | ||
public List<String> filter(List<String> suggestions) { | ||
if (suggestions == null) { | ||
throw new IllegalArgumentException("Suggestions list is null."); | ||
} | ||
|
||
List<String> suggestionsFiltered = new ArrayList<>(); | ||
switch (this.strategy) { | ||
case DEFAULT: | ||
for (String suggestion : suggestions) { | ||
if (defaultStrategy(suggestion)) { | ||
suggestionsFiltered.add(suggestion); | ||
} | ||
} | ||
break; | ||
case AUTOMATON: | ||
SuggestionAutomaton automaton = new SuggestionAutomaton(); | ||
for (String suggestion : suggestions) { | ||
if (automaton.run(suggestion, numberOfWordsFromQuery, stopWords)) { | ||
suggestionsFiltered.add(suggestion); | ||
} | ||
} | ||
break; | ||
default: | ||
log.warn("No strategy defined. Returning empty list."); | ||
return Collections.emptyList(); | ||
} | ||
|
||
return suggestionsFiltered; | ||
} | ||
|
||
public SuggestionFilterStrategy getStrategy() { | ||
return this.strategy; | ||
} | ||
|
||
public void setStrategy(SuggestionFilterStrategy strategy) { | ||
this.strategy = strategy; | ||
} | ||
|
||
private boolean defaultStrategy(String suggestion) { | ||
validateDefaultStrategyConfig(); | ||
|
||
// Example: Query: "Hello" suggestion: "Hello World" suggestion.split = | ||
// ["Hello", "World"].length = 2 | ||
String[] suggestionTokens = suggestion.split(SPACE_CHAR); | ||
int numberOfWordsFromAutoCompleteItem = suggestionTokens.length; | ||
|
||
// Case: Autocompletes the current word being typed. | ||
// Example: Query: "Hel" suggestions = ["Hello", "Hello world", "help"] Filtered | ||
// suggestions = ["Hello", "help"] | ||
// if query ends with space, number of words from query will have an extra word. | ||
// so it will suggest the next word. | ||
boolean numberOfWordsIsEqual = (numberOfWordsFromQuery == numberOfWordsFromAutoCompleteItem); | ||
|
||
// Case: If the first word from the suggestion is a stop word, it will not be | ||
// added to the list. | ||
// Example: Query: "The_" suggestions = ["The World", "The office"] Filtered | ||
// suggestions = [] | ||
boolean firstWordIsStopWord = stopWords.contains(suggestionTokens[0]); | ||
|
||
// Case: If the last word from the suggestion is a stop word, it will not be | ||
// added to the list. | ||
// Example: Query: "Hello_" suggestions = ["Hello my", "Hello world"] Filtered | ||
// suggestions = ["Hello world"] | ||
boolean lastWordIsStopWord = stopWords.contains(suggestionTokens[suggestionTokens.length - 1]); | ||
|
||
// Disable the use of terms query equals auto complete | ||
// Example: Query: "Hell" suggestions = ["Hello", "Hello world", "Help"] | ||
// Filtered suggestions = ["Hello", "Hello world", "Help"] | ||
numberOfWordsIsEqual = numberOfWordsIsEqual || !this.useTermsQueryEqualsAutoComplete; | ||
|
||
return (numberOfWordsIsEqual && !firstWordIsStopWord && !lastWordIsStopWord); | ||
} | ||
|
||
private void validateDefaultStrategyConfig() { | ||
if (this.stopWords == null) { | ||
throw new IllegalArgumentException("Stop words list is not defined."); | ||
} | ||
if (this.numberOfWordsFromQuery == 0) { | ||
throw new IllegalArgumentException("Number of words from query is not defined."); | ||
} | ||
} | ||
|
||
private enum SuggestionFilterStrategy { | ||
DEFAULT, | ||
AUTOMATON | ||
} | ||
} |
Oops, something went wrong.