Skip to content

Commit

Permalink
Merge pull request #42 from midas-isg/imprv_text_search
Browse files Browse the repository at this point in the history
added fuzzyMatch feature to text_search
  • Loading branch information
espinoj authored Sep 22, 2016
2 parents ec3c034 + 6f3df9c commit 8facf6c
Show file tree
Hide file tree
Showing 10 changed files with 135 additions and 52 deletions.
4 changes: 2 additions & 2 deletions app/controllers/LocationServices.java
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ public Result filterByTerm(
@ApiImplicitParam(
value = findExBody,
required = true,
dataType = "[model.Request]",
dataType = "models.Request",
paramType = "body"
)
})
Expand Down Expand Up @@ -282,7 +282,7 @@ private ArrayNode toArrayNode(JsonNode asJson) {
@ApiImplicitParam(
value = findBulkExBody,
required = true,
dataType = "List[JsonNode]",
dataType = "Array[models.Request]",
paramType = "body"
)
} )
Expand Down
5 changes: 5 additions & 0 deletions app/dao/LocationDao.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package dao;

import static interactors.Util.isTrue;

import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
Expand Down Expand Up @@ -119,6 +121,9 @@ public Long delete(Location location) {

public List<?> findByTerm(Request req) {
EntityManager em = JPA.em();
if(isTrue(req.isFuzzyMatch()) && req.getFuzzyMatchThreshold() != null)
em.createNativeQuery("SELECT set_limit(" + req.getFuzzyMatchThreshold() + ")"
).getSingleResult();
String q = new SearchSql().toQuerySqlString(req);
Query query = em.createNativeQuery(q);
query = setQueryParameters(req, query);
Expand Down
119 changes: 70 additions & 49 deletions app/dao/SearchSql.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import gateways.database.sql.SQLSanitizer;

import static interactors.Util.isTrue;

import java.sql.Date;
import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -30,6 +32,7 @@ private String toSqlQuery(Request req) {
codeTempTable);
q += unionTempTablesSql(req, qt, nameTempTable, otherNameTempTable,
codeTempTable);

return q;
}

Expand All @@ -43,33 +46,17 @@ private String toTempTablesSql(Request req, String qt,
searchSqls.add(names);

String otherNames = toOtherNameSearchSql(req, qt);
otherNames = (otherNames.isEmpty()) ? otherNames : otherNameTempTable + " AS ( " + otherNames;
if (!otherNames.isEmpty()){
if(!names.isEmpty())
otherNames += " AND " + exclude(nameTempTable);
otherNames += " ) ";
}
otherNames = (otherNames.isEmpty()) ? otherNames : otherNameTempTable + " AS ( " + otherNames + " ) ";
searchSqls.add(otherNames);

String codes = toCodeSearchSql(req, qt);
codes = (codes.isEmpty()) ? codes : codeTempTable + " AS ( " + codes;
if (!codes.isEmpty()){
if(!names.isEmpty())
codes += " AND " + exclude(nameTempTable);
if(!otherNames.isEmpty())
codes += " AND " + exclude(otherNameTempTable);
codes += " ) ";
}
codes = (codes.isEmpty()) ? codes : codeTempTable + " AS ( " + codes + " ) ";
searchSqls.add(codes);

String q = toWithStatementSql(searchSqls);
return q;
}

private String exclude(String nameTempTable) {
return " gid NOT IN (SELECT gid FROM " + nameTempTable + " ) ";
}

private String toWithStatementSql(List<String> searchSqls) {
String q = joinStringList(searchSqls, " , ");
if (q.isEmpty())
Expand All @@ -87,15 +74,12 @@ private String joinStringList(List<String> list, String delimiter) {
return q;
}

private String toTsVector(Request req, String columnName) {
String col = isTrue(req.isIgnoreAccent()) ? "unaccent_immutable("
+ columnName + ")" : columnName;
String tsVector = "to_tsvector('simple', " + col + ")";
return tsVector;
}

private String toQueryTerm(Request req) {
String queryText = toQueryText(req.getQueryTerm());
String queryText = req.getQueryTerm();
if(isTrue(req.isFuzzyMatch()))
return (isTrue(req.isIgnoreAccent())) ? "unaccent_immutable("
+ "'" + queryText + "'" + ")" : "'" + queryText + "'";
queryText = toQueryText(req.getQueryTerm());
String qt = (isTrue(req.isIgnoreAccent())) ? "unaccent_immutable("
+ "'" + queryText + "'" + ")\\:\\:tsquery" : "'" + queryText
+ "'";
Expand All @@ -114,26 +98,31 @@ private String unionTempTablesSql(Request req, String qt, String nameTempTable,
String q = joinStringList(sqlQueries, " UNION ");
if (q.isEmpty())
return q;
q = " SELECT * FROM ( " + q + " ) AS foo ";
q = " SELECT * FROM ( "
+ " SELECT DISTINCT ON (gid) gid, headline, rank, name "
+ " FROM ( " + q + " ) AS foo"
+ " ORDER BY gid, rank DESC "
+ " ) AS foo ";
q += " ORDER BY rank DESC, name ";
return q;
}

private String toCodeSearchSql(Request req, String qt) {
String q = "";
String codeTsVector = toTsVector(req, "code");
String codeCol = isTrue(req.isIgnoreAccent()) ? "unaccent_immutable(code)"
: "code";
String actualTerm = toActualTerm(req, "code");
if (isTrue(req.isSearchCodes())) {
String rankingStatement = toRankingStatement(req, qt, actualTerm);
String comparisonStatement = toComparisonStatement(req, qt, actualTerm);
String headlineStatement = toHeadlineStatement(req, qt, "code");
q += " SELECT DISTINCT ON(gid) gid, code AS name, "
+ "ts_rank_cd(" + codeTsVector + ", " + qt + ", 8) AS rank, "
+ " ts_headline('simple', " + codeCol + ", " + qt + " ) headline "
+ rankingStatement + " AS rank, "
+ headlineStatement + " AS headline "
+ " FROM ("
+ " SELECT gid, code FROM location WHERE code_type_id != 2 ";
if (containsFilters(req))
q += " AND " + toQueryFiltersSql(req);
q += " UNION select gid, code FROM alt_code) AS foo"
+ " WHERE " + codeTsVector + " @@ " + qt;
+ " WHERE " + comparisonStatement;
if (containsFilters(req))
q += " AND gid IN ( SELECT gid FROM location WHERE "
+ toQueryFiltersSql(req) + " ) ";
Expand All @@ -143,13 +132,15 @@ private String toCodeSearchSql(Request req, String qt) {

private String toOtherNameSearchSql(Request req, String qt) {
String q = "";
String nameTsVector = toTsVector(req, "name");
String nameCol = isTrue(req.isIgnoreAccent()) ? "unaccent_immutable(name)" : "name";
String actualTerm = toActualTerm(req, "name");
if (isTrue(req.isSearchOtherNames())) {
q += " SELECT DISTINCT ON(gid) gid, name, ts_rank_cd(" + nameTsVector + ", " + qt + ", 8) AS rank, "
+ " ts_headline('simple', " + nameCol + ", " + qt + " ) headline "
String rankingStatement = toRankingStatement(req, qt, actualTerm);
String comparisonStatement = toComparisonStatement(req, qt, actualTerm);
String headlineStatement = toHeadlineStatement(req, qt, "name");
q += " SELECT DISTINCT ON(gid) gid, name, " + rankingStatement + " AS rank, "
+ headlineStatement + " AS headline "
+ " FROM alt_name "
+ " WHERE " + nameTsVector + " @@ " + qt;
+ " WHERE " + comparisonStatement;
if (containsFilters(req))
q += " AND gid IN ( SELECT gid FROM location WHERE "
+ toQueryFiltersSql(req) + " ) ";
Expand All @@ -159,20 +150,54 @@ private String toOtherNameSearchSql(Request req, String qt) {

private String toNameSearchSql(Request req, String qt) {
String q = "";
String nameTsVector = toTsVector(req, "name");
String nameCol = isTrue(req.isIgnoreAccent()) ? "unaccent_immutable(name)"
: "name";
String actualTerm = toActualTerm(req, "name");
if (isTrue(req.isSearchNames())) {
q += " SELECT gid, name, ts_rank_cd(" + nameTsVector + ", " + qt + ", 8) AS rank, "
+ " ts_headline('simple', " + nameCol + ", " + qt + " ) headline "
String rankingStatement = toRankingStatement(req, qt, actualTerm);
String comparisonStatement = toComparisonStatement(req, qt, actualTerm);
String headlineStatement = toHeadlineStatement(req, qt, "name");
q += " SELECT gid, name, " + rankingStatement + " AS rank, "
+ headlineStatement + " AS headline "
+ " FROM location "
+ " WHERE " + nameTsVector + " @@ " + qt;
+ " WHERE " + comparisonStatement;
if (containsFilters(req))
q += " AND " + toQueryFiltersSql(req);
}
return q;
}

private String toHeadlineStatement(Request req, String qt, String columnName) {
String actualTerm = isTrue(req.isIgnoreAccent()) ?
"unaccent_immutable(" + columnName + ")" : columnName;
if(isTrue(req.isFuzzyMatch())){
qt = isTrue(req.isIgnoreAccent()) ?
"unaccent_immutable('" + toQueryText(req.getQueryTerm()) + "')" :
"'" + toQueryText(req.getQueryTerm()) + "'";
qt = " to_tsquery(" + qt + ") ";
}
return " ts_headline('simple', " + actualTerm + ", " + qt + " ) ";
}

private String toComparisonStatement(Request req, String qt, String actualTerm) {
if(isTrue(req.isFuzzyMatch()))
return actualTerm + " % " + qt;
return actualTerm + " @@ " + qt;
}

private String toActualTerm(Request req, String columnName) {
String actualTerm = isTrue(req.isIgnoreAccent()) ? "unaccent_immutable("
+ columnName + ")" : columnName;
if(isTrue(req.isFuzzyMatch()))
return actualTerm;
String tsVector = "to_tsvector('simple', " + actualTerm + ")";
return tsVector;
}

private String toRankingStatement(Request req, String qt, String actualTerm) {
if(isTrue(req.isFuzzyMatch()))
return " similarity(" + actualTerm + ", " + qt + ") ";
return " ts_rank_cd(" + actualTerm + ", " + qt + ", 8) ";
}

private boolean containsFilters(Request req) {
List<Integer> typeIds = req.getLocationTypeIds();
Date start = req.getStartDate();
Expand Down Expand Up @@ -210,8 +235,7 @@ private String toSelectStatementSql(String column, String qt, String tempTable)
String q = "";
if (tempTable == null)
return q;
q = " SELECT gid, ts_headline('simple', " + column + ", " + qt
+ " ) headline, rank, name " + " FROM " + tempTable + " ";
q = " SELECT gid, headline, rank, name " + " FROM " + tempTable + " ";
return q;
}

Expand Down Expand Up @@ -264,7 +288,4 @@ private void verifyQueryTerm(String value) {
}
}

private Boolean isTrue(Boolean param) {
return (param == null) ? false : param;
}
}
7 changes: 7 additions & 0 deletions app/interactors/GeoJsonRule.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ private static Map<String, Object> toProperties(Request req, int resultSize) {
putAsStringIfNotNull(properties, "includeOnly",
listToString(req.getIncludeOnly()));
putAsStringIfNotNull(properties, "resultSize", resultSize);
putAsStringIfNotNull(properties, "fuzzyMatch", req.isFuzzyMatch());
if (isTrue(req.isFuzzyMatch()))
putAsStringIfNotNull(properties, "fuzzyMatchThreshold", req.getFuzzyMatchThreshold());
return properties;
}

Expand Down Expand Up @@ -429,6 +432,10 @@ private static void setOtherParams(JsonNode node, Request req) {
JsonNode includeOnly = node.get("includeOnly");
if(includeOnly != null)
req.setIncludeOnly(interactors.Util.toListOfString(includeOnly));
value = returnDefaultIfKeyNotExists(node, "fuzzyMatch", false);
req.setFuzzyMatch(value);
if (containsKey(node, "fuzzyMatchThreshold"))
req.setFuzzyMatchThreshold((float) node.get("fuzzyMatchThreshold").asDouble());
}

private static void setEndDate(JsonNode node, Request req, String endDate) {
Expand Down
4 changes: 4 additions & 0 deletions app/interactors/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,8 @@ public static boolean contains(List<String> list, String item) {
return false;
return list.contains(item);
}

public static Boolean isTrue(Boolean param) {
return (param == null) ? false : param;
}
}
18 changes: 18 additions & 0 deletions app/models/Request.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ public class Request {
private Long rootALC;
private List<String> includeOnly;
private List<String> exclude;
private Boolean fuzzyMatch;
private Float fuzzyMatchThreshold = 0.3F;

public String getQueryTerm() {
return queryTerm;
Expand Down Expand Up @@ -131,4 +133,20 @@ public List<String> getExclude() {
public void setExclude(List<String> exclude) {
this.exclude = exclude;
}

public Boolean isFuzzyMatch() {
return fuzzyMatch;
}

public void setFuzzyMatch(Boolean fuzzyMatch) {
this.fuzzyMatch = fuzzyMatch;
}

public Float getFuzzyMatchThreshold() {
return fuzzyMatchThreshold;
}

public void setFuzzyMatchThreshold(Float fuzzyMatchThreshold) {
this.fuzzyMatchThreshold = fuzzyMatchThreshold;
}
}
4 changes: 3 additions & 1 deletion public/examples/api/find-bulk.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
"endDate": 1985,
"locationTypeIds":[16, 6],
"verbose":true,
"includeOnly":[]
"includeOnly":[],
"fuzzyMatch":true,
"fuzzyMatchThreshold":0.3
},
{
"queryTerm": "NEW JERSEY",
Expand Down
2 changes: 2 additions & 0 deletions public/examples/api/find-by-term.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
"searchCodes":true,
"ignoreAccent":true,
"includeOnly":[],
"fuzzyMatch":false,
"fuzzyMatchThreshold":0.3,
"limit":10,
"offset":0,
"verbose":true
Expand Down
15 changes: 15 additions & 0 deletions test/integrations/server/TestFindLocation.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public class TestFindLocation {
private final String findByTermRequestFile1 = "test/test-find-by-term-request-1.json";
private final String findByTermRequestFile2 = "test/test-find-by-term-request-2.json";
private final String findByTermRequestFile3 = "test/test-find-by-term-request-3.json";
private final String fuzzyMatchRequest1 = "test/fuzzy-match-request-1.json";
private final String findBulkRequestFile1 = "test/test-find-bulk-request-1.json";

public static Runnable test() {
Expand All @@ -63,6 +64,7 @@ public void testFindLocation() {
unsafeFindBulkTest();
unsafeFindByNameTest();
findByTypeId();
fuzzyMatchTest();
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
Expand All @@ -72,6 +74,19 @@ public void testFindLocation() {

}

private void fuzzyMatchTest() {
String body = KmlRule.getStringFromFile(fuzzyMatchRequest1);
String url = Server.makeTestUrl(findByTermPath);
WSResponse response = post(url, body, jsonContentType);
assertStatus(response, OK);
JsonNode jsonResp = response.asJson();
assertAreEqual(jsonResp.size(), 4);
assertAreEqual(jsonResp.get("features").size(), 1);
assertAreEqual(jsonResp.get("features").get(0).get("properties").get("rank").asDouble(), 0.33333334);
assertAreEqual(jsonResp.get("features").get(0).get("properties").get("matchedTerm").asText(), "ñámé wíth áccéñt");
assertAreEqual(jsonResp.get("properties").get("fuzzyMatchThreshold").asDouble(), 0.32);
}

private void findByTypeId() {
String url = Server.makeTestUrl(basePath + "/find-by-type/1");
WSResponse response = get(url);
Expand Down
9 changes: 9 additions & 0 deletions test/resources/test/fuzzy-match-request-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{"queryTerm":"namewit aksent",
"searchNames":true,
"searchOtherNames":true,
"searchCodes":true,
"ignoreAccent":true,
"fuzzyMatch":true,
"fuzzyMatchThreshold":0.32,
"limit":10
}

0 comments on commit 8facf6c

Please sign in to comment.