Skip to content

Commit

Permalink
fix: to_tsvector not applying explicitly to the full-text search filt…
Browse files Browse the repository at this point in the history
…ered column

It only applies when the existing column in the table is not of tsvector type.
  • Loading branch information
laurenceisla committed Jan 17, 2025
1 parent ce27425 commit 5e61a05
Show file tree
Hide file tree
Showing 10 changed files with 400 additions and 123 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- #3795, Clarify `Accept: vnd.pgrst.object` error message - @steve-chavez
- #3779, Always log the schema cache load time - @steve-chavez
- #3706, Fix insert with `missing=default` uses default value of domain instead of column - @taimoorzaeem
- #2255, Fix `to_tsvector()` not applying explicitly to the full-text search filtered column (on non `tsvector` types) - @laurenceisla

### Changed

Expand Down
48 changes: 27 additions & 21 deletions src/PostgREST/Plan.hs
Original file line number Diff line number Diff line change
Expand Up @@ -265,27 +265,29 @@ data ResolverContext = ResolverContext
, outputType :: Text -- ^ The output type for the response payload; e.g. "csv", "json", "binary".
}

resolveColumnField :: Column -> CoercibleField
resolveColumnField col = CoercibleField (colName col) mempty False (colNominalType col) Nothing (colDefault col) False
resolveColumnField :: Column -> ToTsVector -> CoercibleField
resolveColumnField col toTsV = CoercibleField (colName col) mempty False toTsV (colNominalType col) Nothing (colDefault col) False

resolveTableFieldName :: Table -> FieldName -> CoercibleField
resolveTableFieldName table fieldName =
resolveTableFieldName :: Table -> FieldName -> ToTsVector -> CoercibleField
resolveTableFieldName table fieldName toTsV=
fromMaybe (unknownField fieldName []) $ HMI.lookup fieldName (tableColumns table) >>=
Just . resolveColumnField
Just . flip resolveColumnField toTsV

-- | Resolve a type within the context based on the given field name and JSON path. Although there are situations where failure to resolve a field is considered an error (see `resolveOrError`), there are also situations where we allow it (RPC calls). If it should be an error and `resolveOrError` doesn't fit, ensure to check the `cfIRType` isn't empty.
resolveTypeOrUnknown :: ResolverContext -> Field -> CoercibleField
resolveTypeOrUnknown ResolverContext{..} (fn, jp) =
resolveTypeOrUnknown :: ResolverContext -> Field -> ToTsVector -> CoercibleField
resolveTypeOrUnknown ResolverContext{..} (fn, jp) toTsV =
case res of
-- types that are already json/jsonb don't need to be converted with `to_jsonb` for using arrow operators `data->attr`
-- this prevents indexes not applying https://github.com/PostgREST/postgrest/issues/2594
cf@CoercibleField{cfIRType="json"} -> cf{cfJsonPath=jp, cfToJson=False}
cf@CoercibleField{cfIRType="jsonb"} -> cf{cfJsonPath=jp, cfToJson=False}
cf@CoercibleField{cfIRType="json"} -> cf{cfJsonPath=jp, cfToJson=False}
cf@CoercibleField{cfIRType="jsonb"} -> cf{cfJsonPath=jp, cfToJson=False}
-- Do not apply to_tsvector to tsvector types
cf@CoercibleField{cfIRType="tsvector"} -> cf{cfJsonPath=jp, cfToJson=True, cfToTsVector=Nothing}
-- other types will get converted `to_jsonb(col)->attr`, even unknown types
cf -> cf{cfJsonPath=jp, cfToJson=True}
cf -> cf{cfJsonPath=jp, cfToJson=True}
where
res = fromMaybe (unknownField fn jp) $ HM.lookup qi tables >>=
Just . flip resolveTableFieldName fn
Just . (\t -> resolveTableFieldName t fn toTsV)

-- | Install any pre-defined data representation from source to target to coerce this reference.
--
Expand All @@ -311,11 +313,15 @@ withJsonParse ctx field@CoercibleField{cfIRType} = withTransformer ctx "json" cf

-- | Map the intermediate representation type to the output type defined by the resolver context (normally json), if available.
resolveOutputField :: ResolverContext -> Field -> CoercibleField
resolveOutputField ctx field = withOutputFormat ctx $ resolveTypeOrUnknown ctx field
resolveOutputField ctx field = withOutputFormat ctx $ resolveTypeOrUnknown ctx field Nothing

-- | Map the query string format of a value (text) into the intermediate representation type, if available.
resolveQueryInputField :: ResolverContext -> Field -> CoercibleField
resolveQueryInputField ctx field = withTextParse ctx $ resolveTypeOrUnknown ctx field
resolveQueryInputField :: ResolverContext -> Field -> OpExpr -> CoercibleField
resolveQueryInputField ctx field opExpr = withTextParse ctx $ resolveTypeOrUnknown ctx field toTsVector
where
toTsVector = case opExpr of
OpExpr _ (Fts _ lang _) -> Just lang
_ -> Nothing

-- | Builds the ReadPlan tree on a number of stages.
-- | Adds filters, order, limits on its respective nodes.
Expand Down Expand Up @@ -452,7 +458,7 @@ expandStarsForTable ctx@ResolverContext{representations, outputType} hasAgg rp@R

expandStarSelectField :: Bool -> [Column] -> CoercibleSelectField -> [CoercibleSelectField]
expandStarSelectField _ columns sel@CoercibleSelectField{csField=CoercibleField{cfName="*", cfJsonPath=[]}, csAggFunction=Nothing} =
map (\col -> sel { csField = withOutputFormat ctx $ resolveColumnField col }) columns
map (\col -> sel { csField = withOutputFormat ctx $ resolveColumnField col Nothing }) columns
expandStarSelectField True _ sel@CoercibleSelectField{csField=fld@CoercibleField{cfName="*", cfJsonPath=[]}, csAggFunction=Just Count} =
[sel { csField = fld { cfFullRow = True } }]
expandStarSelectField _ _ selectField = [selectField]
Expand Down Expand Up @@ -766,7 +772,7 @@ addOrders ctx ApiRequest{..} rReq =

resolveOrder :: ResolverContext -> OrderTerm -> CoercibleOrderTerm
resolveOrder _ (OrderRelationTerm a b c d) = CoercibleOrderRelationTerm a b c d
resolveOrder ctx (OrderTerm fld dir nulls) = CoercibleOrderTerm (resolveTypeOrUnknown ctx fld) dir nulls
resolveOrder ctx (OrderTerm fld dir nulls) = CoercibleOrderTerm (resolveTypeOrUnknown ctx fld Nothing) dir nulls

-- Validates that the related resource on the order is an embedded resource,
-- e.g. if `clients` is inside the `select` in /projects?order=clients(id)&select=*,clients(*),
Expand Down Expand Up @@ -831,7 +837,7 @@ addRelatedOrders (Node rp@ReadPlan{order,from} forest) = do
-- where_ = [
-- CoercibleStmnt (
-- CoercibleFilter {
-- field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson=False, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False},
-- field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson=False, cfToTsVector = Nothing, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False},
-- opExpr = op
-- }
-- )
Expand All @@ -847,7 +853,7 @@ addRelatedOrders (Node rp@ReadPlan{order,from} forest) = do
-- Don't do anything to the filter if there's no embedding (a subtree) on projects. Assume it's a normal filter.
--
-- >>> ReadPlan.where_ . rootLabel <$> addNullEmbedFilters (readPlanTree nullOp [])
-- Right [CoercibleStmnt (CoercibleFilter {field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson = False, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False}, opExpr = OpExpr True (Is IsNull)})]
-- Right [CoercibleStmnt (CoercibleFilter {field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson = False, cfToTsVector = Nothing, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False}, opExpr = OpExpr True (Is IsNull)})]
--
-- If there's an embedding on projects, then change the filter to use the internal aggregate name (`clients_projects_1`) so the filter can succeed later.
--
Expand All @@ -866,7 +872,7 @@ addNullEmbedFilters (Node rp@ReadPlan{where_=curLogic} forest) = do
newNullFilters rPlans = \case
(CoercibleExpr b lOp trees) ->
CoercibleExpr b lOp <$> (newNullFilters rPlans `traverse` trees)
flt@(CoercibleStmnt (CoercibleFilter (CoercibleField fld [] _ _ _ _ _) opExpr)) ->
flt@(CoercibleStmnt (CoercibleFilter (CoercibleField fld [] _ _ _ _ _ _) opExpr)) ->
let foundRP = find (\ReadPlan{relName, relAlias} -> fld == fromMaybe relName relAlias) rPlans in
case (foundRP, opExpr) of
(Just ReadPlan{relAggAlias}, OpExpr b (Is IsNull)) -> Right $ CoercibleStmnt $ CoercibleFilterNullEmbed b relAggAlias
Expand Down Expand Up @@ -900,7 +906,7 @@ resolveLogicTree ctx (Stmnt flt) = CoercibleStmnt $ resolveFilter ctx flt
resolveLogicTree ctx (Expr b op lts) = CoercibleExpr b op (map (resolveLogicTree ctx) lts)

resolveFilter :: ResolverContext -> Filter -> CoercibleFilter
resolveFilter ctx (Filter fld opExpr) = CoercibleFilter{field=resolveQueryInputField ctx fld, opExpr=opExpr}
resolveFilter ctx (Filter fld opExpr) = CoercibleFilter{field=resolveQueryInputField ctx fld opExpr, opExpr=opExpr}

-- Validates that spread embeds are only done on to-one relationships
validateSpreadEmbeds :: ReadPlanTree -> Either ApiRequestError ReadPlanTree
Expand Down Expand Up @@ -963,7 +969,7 @@ mutatePlan mutation qi ApiRequest{iPreferences=Preferences{..}, ..} SchemaCache{
resolveOrError :: ResolverContext -> Maybe Table -> FieldName -> Either ApiRequestError CoercibleField
resolveOrError _ Nothing _ = Left NotFound
resolveOrError ctx (Just table) field =
case resolveTableFieldName table field of
case resolveTableFieldName table field Nothing of
CoercibleField{cfIRType=""} -> Left $ ColumnNotFound (tableName table) field
cf -> Right $ withJsonParse ctx cf

Expand Down
19 changes: 11 additions & 8 deletions src/PostgREST/Plan/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module PostgREST.Plan.Types
, CoercibleLogicTree(..)
, CoercibleFilter(..)
, TransformerProc
, ToTsVector
, CoercibleOrderTerm(..)
, RelSelectField(..)
, RelJsonEmbedMode(..)
Expand All @@ -20,6 +21,7 @@ import PostgREST.SchemaCache.Identifiers (FieldName)
import Protolude

type TransformerProc = Text
type ToTsVector = Maybe (Maybe Text)

-- | A CoercibleField pairs the name of a query element with any type coercion information we need for some specific use case.
-- |
Expand All @@ -33,17 +35,18 @@ type TransformerProc = Text
-- |
-- | The type value is allowed to be the empty string. The analog here is soft type checking in programming languages: sometimes we don't need a variable to have a specified type and things will work anyhow. So the empty type variant is valid when we don't know and *don't need to know* about the specific type in some context. Note that this variation should not be used if it guarantees failure: in that case you should instead raise an error at the planning stage and bail out. For example, we can't parse JSON with `json_to_recordset` without knowing the types of each recipient field, and so error out. Using the empty string for the type would be incorrect and futile. On the other hand we use the empty type for RPC calls since type resolution isn't implemented for RPC, but it's fine because the query still works with Postgres' implicit coercion. In the future, hopefully we will support data representations across the board and then the empty type may be permanently retired.
data CoercibleField = CoercibleField
{ cfName :: FieldName
, cfJsonPath :: JsonPath
, cfToJson :: Bool
, cfIRType :: Text -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
, cfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
, cfDefault :: Maybe Text
, cfFullRow :: Bool -- ^ True if the field represents the whole selected row. Used in spread rels: instead of COUNT(*), it does a COUNT(<row>) in order to not mix with other spreaded resources.
{ cfName :: FieldName
, cfJsonPath :: JsonPath
, cfToJson :: Bool
, cfToTsVector:: ToTsVector -- ^ If the field should be converted using to_tsvector(<language>, <field>)
, cfIRType :: Text -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
, cfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
, cfDefault :: Maybe Text
, cfFullRow :: Bool -- ^ True if the field represents the whole selected row. Used in spread rels: instead of COUNT(*), it does a COUNT(<row>) in order to not mix with other spreaded resources.
} deriving (Eq, Show)

unknownField :: FieldName -> JsonPath -> CoercibleField
unknownField name path = CoercibleField name path False "" Nothing Nothing False
unknownField name path = CoercibleField name path False Nothing "" Nothing Nothing False

-- | Like an API request LogicTree, but with coercible field information.
data CoercibleLogicTree
Expand Down
2 changes: 1 addition & 1 deletion src/PostgREST/Query/QueryBuilder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ callPlanToQuery (FunctionCall qi params arguments returnsScalar returnsSetOfScal
KeyParams [] -> "FROM " <> callIt mempty
KeyParams prms -> case arguments of
DirectArgs args -> "FROM " <> callIt (fmtArgs prms args)
JsonArgs json -> fromJsonBodyF json ((\p -> CoercibleField (ppName p) mempty False (ppTypeMaxLength p) Nothing Nothing False) <$> prms) False True False <> ", " <>
JsonArgs json -> fromJsonBodyF json ((\p -> CoercibleField (ppName p) mempty False Nothing (ppTypeMaxLength p) Nothing Nothing False) <$> prms) False True False <> ", " <>
"LATERAL " <> callIt (fmtParams prms)

callIt :: SQL.Snippet -> SQL.Snippet
Expand Down
19 changes: 13 additions & 6 deletions src/PostgREST/Query/SqlFragment.hs
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,15 @@ pgFmtCallUnary :: Text -> SQL.Snippet -> SQL.Snippet
pgFmtCallUnary f x = SQL.sql (encodeUtf8 f) <> "(" <> x <> ")"

pgFmtField :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
pgFmtField table CoercibleField{cfFullRow=True} = fromQi table
pgFmtField table CoercibleField{cfName=fn, cfJsonPath=[]} = pgFmtColumn table fn
pgFmtField table CoercibleField{cfName=fn, cfToJson=doToJson, cfJsonPath=jp} | doToJson = "to_jsonb(" <> pgFmtColumn table fn <> ")" <> pgFmtJsonPath jp
| otherwise = pgFmtColumn table fn <> pgFmtJsonPath jp
pgFmtField table cf = case cfToTsVector cf of
Just lang -> "to_tsvector(" <> pgFmtFtsLang lang <> fmtFld <> ")"
_ -> fmtFld
where
fmtFld = case cf of
CoercibleField{cfFullRow=True} -> fromQi table
CoercibleField{cfName=fn, cfJsonPath=[]} -> pgFmtColumn table fn
CoercibleField{cfName=fn, cfToJson=doToJson, cfJsonPath=jp} | doToJson -> "to_jsonb(" <> pgFmtColumn table fn <> ")" <> pgFmtJsonPath jp
| otherwise -> pgFmtColumn table fn <> pgFmtJsonPath jp

-- Select the value of a named element from a table, applying its optional coercion mapping if any.
pgFmtTableCoerce :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
Expand Down Expand Up @@ -399,16 +404,18 @@ pgFmtFilter table (CoercibleFilter fld (OpExpr hasNot oper)) = notOp <> " " <> p
[""] -> "= ANY('{}') "
_ -> "= ANY (" <> pgFmtArrayLiteralForField vals fld <> ") "

Fts op lang val -> " " <> ftsOperator op <> "(" <> ftsLang lang <> unknownLiteral val <> ") "
Fts op lang val -> " " <> ftsOperator op <> "(" <> pgFmtFtsLang lang <> unknownLiteral val <> ") "
where
ftsLang = maybe mempty (\l -> unknownLiteral l <> ", ")
notOp = if hasNot then "NOT" else mempty
star c = if c == '*' then '%' else c
fmtQuant q val = case q of
Just QuantAny -> "ANY(" <> val <> ")"
Just QuantAll -> "ALL(" <> val <> ")"
Nothing -> val

pgFmtFtsLang :: Maybe Text -> SQL.Snippet
pgFmtFtsLang = maybe mempty (\l -> unknownLiteral l <> ", ")

pgFmtJoinCondition :: JoinCondition -> SQL.Snippet
pgFmtJoinCondition (JoinCondition (qi1, col1) (qi2, col2)) =
pgFmtColumn qi1 col1 <> " = " <> pgFmtColumn qi2 col2
Expand Down
13 changes: 12 additions & 1 deletion test/spec/Feature/Query/AndOrParamsSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ spec =
it "can handle is" $
get "/entities?and=(name.is.null,arr.is.null)&select=id" `shouldRespondWith`
[json|[{ "id": 4 }]|] { matchHeaders = [matchContentTypeJson] }
it "can handle fts" $ do
it "can handle fts on tsvector columns" $ do
get "/entities?or=(text_search_vector.fts.bar,text_search_vector.fts.baz)&select=id" `shouldRespondWith`
[json|[{ "id": 1 }, { "id": 2 }]|] { matchHeaders = [matchContentTypeJson] }
get "/tsearch?or=(text_search_vector.plfts(german).Art%20Spass, text_search_vector.plfts(french).amusant%20impossible, text_search_vector.fts(english).impossible)" `shouldRespondWith`
Expand All @@ -90,6 +90,17 @@ spec =
{"text_search_vector": "'amus':5 'fair':7 'impossibl':9 'peu':4" },
{"text_search_vector": "'art':4 'spass':5 'unmog':7"}
]|] { matchHeaders = [matchContentTypeJson] }
it "can handle fts on text and json columns" $ do
get "/grandchild_entities?or=(jsonb_col.fts.bar,jsonb_col.fts.foo)&select=jsonb_col" `shouldRespondWith`
[json|[
{ "jsonb_col": {"a": {"b":"foo"}} },
{ "jsonb_col": {"b":"bar"} }]
|] { matchHeaders = [matchContentTypeJson] }
get "/tsearch_to_tsvector?and=(text_search.not.plfts(german).Art%20Spass, text_search.not.plfts(french).amusant%20impossible, text_search.not.fts(english).impossible)&select=text_search" `shouldRespondWith`
[json|[
{ "text_search": "But also fun to do what is possible" },
{ "text_search": "Fat cats ate rats" }]
|] { matchHeaders = [matchContentTypeJson] }
it "can handle isdistinct" $
get "/entities?and=(id.gte.2,arr.isdistinct.{1,2})&select=id" `shouldRespondWith`
[json|[{ "id": 3 }, { "id": 4 }]|] { matchHeaders = [matchContentTypeJson] }
Expand Down
Loading

0 comments on commit 5e61a05

Please sign in to comment.