Skip to content

Commit

Permalink
feat: data representations allow custom parsing and formatting of API…
Browse files Browse the repository at this point in the history
… fields.

See PR PostgREST#2523. Most notable code changes:

- Load data representation casts into schema cache.
- Data representations for reads, filters, inserts, updates, views, over joins.
- `CoercibleField` represents name references in queries where coercion may be needed.
- `ResolverContext` help facilitate field resolution during planning.
- Planner 'resolves' names in the API query and pairs them with any implicit conversions to be used in the query builder stage.
- Tests for all of the above.
  • Loading branch information
aljungberg committed Jan 23, 2023
1 parent 9065ed6 commit 1d4b9fa
Show file tree
Hide file tree
Showing 17 changed files with 941 additions and 148 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

![Logo](static/bigger-logo.png "Logo")

[![Donate](https://img.shields.io/badge/Donate-Patreon-orange.svg?colorB=F96854)](https://www.patreon.com/postgrest)
Expand Down
1 change: 1 addition & 0 deletions postgrest.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ library
PostgREST.SchemaCache.Identifiers
PostgREST.SchemaCache.Proc
PostgREST.SchemaCache.Relationship
PostgREST.SchemaCache.Representations
PostgREST.SchemaCache.Table
PostgREST.Error
PostgREST.Logger
Expand Down
253 changes: 188 additions & 65 deletions src/PostgREST/Plan.hs

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions src/PostgREST/Plan/MutatePlan.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ where
import qualified Data.ByteString.Lazy as LBS

import PostgREST.ApiRequest.Preferences (PreferResolution)
import PostgREST.ApiRequest.Types (LogicTree, OrderTerm)
import PostgREST.Plan.Types (TypedField)
import PostgREST.ApiRequest.Types (OrderTerm)
import PostgREST.Plan.Types (CoercibleField,
TypedLogicTree)
import PostgREST.RangeQuery (NonnegRange)
import PostgREST.SchemaCache.Identifiers (FieldName,
QualifiedIdentifier)
Expand All @@ -18,25 +19,25 @@ import Protolude
data MutatePlan
= Insert
{ in_ :: QualifiedIdentifier
, insCols :: [TypedField]
, insCols :: [CoercibleField]
, insBody :: Maybe LBS.ByteString
, onConflict :: Maybe (PreferResolution, [FieldName])
, where_ :: [LogicTree]
, where_ :: [TypedLogicTree]
, returning :: [FieldName]
, insPkCols :: [FieldName]
}
| Update
{ in_ :: QualifiedIdentifier
, updCols :: [TypedField]
, updCols :: [CoercibleField]
, updBody :: Maybe LBS.ByteString
, where_ :: [LogicTree]
, where_ :: [TypedLogicTree]
, mutRange :: NonnegRange
, mutOrder :: [OrderTerm]
, returning :: [FieldName]
}
| Delete
{ in_ :: QualifiedIdentifier
, where_ :: [LogicTree]
, where_ :: [TypedLogicTree]
, mutRange :: NonnegRange
, mutOrder :: [OrderTerm]
, returning :: [FieldName]
Expand Down
12 changes: 7 additions & 5 deletions src/PostgREST/Plan/ReadPlan.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ module PostgREST.Plan.ReadPlan

import Data.Tree (Tree (..))

import PostgREST.ApiRequest.Types (Alias, Cast, Depth, Field,
Hint, JoinType, LogicTree,
NodeName, OrderTerm)
import PostgREST.ApiRequest.Types (Alias, Cast, Depth, Hint,
JoinType, NodeName,
OrderTerm)
import PostgREST.Plan.Types (CoercibleField (..),
TypedLogicTree)
import PostgREST.RangeQuery (NonnegRange)
import PostgREST.SchemaCache.Identifiers (FieldName,
QualifiedIdentifier)
Expand All @@ -26,10 +28,10 @@ data JoinCondition =
deriving (Eq)

data ReadPlan = ReadPlan
{ select :: [(Field, Maybe Cast, Maybe Alias)]
{ select :: [(CoercibleField, Maybe Cast, Maybe Alias)]
, from :: QualifiedIdentifier
, fromAlias :: Maybe Alias
, where_ :: [LogicTree]
, where_ :: [TypedLogicTree]
, order :: [OrderTerm]
, range_ :: NonnegRange
, relName :: NodeName
Expand Down
57 changes: 41 additions & 16 deletions src/PostgREST/Plan/Types.hs
Original file line number Diff line number Diff line change
@@ -1,24 +1,49 @@
module PostgREST.Plan.Types
( TypedField(..)
, resolveTableField

( CoercibleField(..)
, unknownField
, TypedLogicTree(..)
, TypedFilter(..)
, TransformerProc
) where

import qualified Data.HashMap.Strict.InsOrd as HMI
import PostgREST.ApiRequest.Types (JsonPath, LogicOperator, OpExpr)

import PostgREST.SchemaCache.Identifiers (FieldName)
import PostgREST.SchemaCache.Table (Column (..), Table (..))

import Protolude

-- | A TypedField is a field with sufficient information to be read from JSON with `json_to_recordset`.
data TypedField = TypedField
{ tfName :: FieldName
, tfIRType :: Text -- ^ The initial type of the field, before any casting.
} deriving (Eq)

resolveTableField :: Table -> FieldName -> Maybe TypedField
resolveTableField table fieldName =
case HMI.lookup fieldName (tableColumns table) of
Just column -> Just $ TypedField (colName column) (colNominalType column)
Nothing -> Nothing
type TransformerProc = Text

-- | A CoercibleField pairs the name of a query element with any type coercion information we need for some specific use case.
-- |
-- | As suggested by the name, it's often a reference to a field in a table but really it can be any nameable element (function parameter, calculation with an alias, etc) with a knowable type.
-- |
-- | In the simplest case, it allows us to parse JSON payloads with `json_to_recordset`, for which we need to know both the name and the type of each thing we'd like to extract. At a higher level, CoercibleField generalises to reflect that any value we work with in a query may need type specific handling.
-- |
-- | CoercibleField is the foundation for the Data Representations feature. This feature allow user-definable mappings between database types so that the same data can be presented or interpreted in various ways as needed. Sometimes the way Postgres coerces data implicitly isn't right for the job. Different mappings might be appropriate for different situations: parsing a filter from a query string requires one function (text -> field type) while parsing a payload from JSON takes another (json -> field type). And the reverse, outputting a field as JSON, requires yet a third (field type -> json). CoercibleField is that "job specific" reference to an element paired with the type we desire for that particular purpose and the function we'll use to get there, if any.
-- |
-- | In the planning phase, we "resolve" generic named elements into these specialised CoercibleFields. Again this is context specific: two different CoercibleFields both representing the exact same table column in the database, even in the same query, might have two different target types and mapping functions. For example, one might represent a column in a filter, and another the very same column in an output role to be sent in the response body.
-- |
-- | The type value is allowed to be the empty string. The analog here is soft type checking in programming languages: sometimes we don't need a variable to have a specified type and things will work anyhow. So the empty type variant is valid when we don't know and *don't need to know* about the specific type in some context. Note that this variation should not be used if it guarantees failure: in that case you should instead raise an error at the planning stage and bail out. For example, we can't parse JSON with `json_to_recordset` without knowing the types of each recipient field, and so error out. Using the empty string for the type would be incorrect and futile. On the other hand we use the empty type for RPC calls since type resolution isn't implemented for RPC, but it's fine because the query still works with Postgres' implicit coercion. In the future, hopefully we will support data representations across the board and then the empty type may be permanently retired.
data CoercibleField = CoercibleField
{ tfName :: FieldName
, tfJsonPath :: JsonPath
, tfIRType :: Text -- ^ The native Postgres type of the field, the type before mapping.
, tfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
} deriving (Eq)

unknownField :: FieldName -> JsonPath -> CoercibleField
unknownField name path = CoercibleField name path "" Nothing

-- | Like a regular LogicTree but with field type information.
data TypedLogicTree
= TypedExpr Bool LogicOperator [TypedLogicTree]
| TypedStmnt TypedFilter
deriving (Eq)

data TypedFilter = TypedFilter
{ typedField :: CoercibleField
, typedOpExpr :: OpExpr
}
| TypedFilterNullEmbed Bool FieldName
deriving (Eq)
2 changes: 1 addition & 1 deletion src/PostgREST/Query/QueryBuilder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ readPlanToQuery (Node ReadPlan{select,from=mainQi,fromAlias,where_=logicForest,o
where
fromFrag = fromF relToParent mainQi fromAlias
qi = getQualifiedIdentifier relToParent mainQi fromAlias
defSelect = [(("*", []), Nothing, Nothing)] -- gets all the columns in case of an empty select, ignoring/obtaining these columns is done at the aggregation stage
defSelect = [(unknownField "*" [], Nothing, Nothing)] -- gets all the columns in case of an empty select, ignoring/obtaining these columns is done at the aggregation stage
(selects, joins) = foldr getSelectsJoins ([],[]) forest

getSelectsJoins :: ReadPlanTree -> ([SQL.Snippet], [SQL.Snippet]) -> ([SQL.Snippet], [SQL.Snippet])
Expand Down
75 changes: 50 additions & 25 deletions src/PostgREST/Query/SqlFragment.hs
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,13 @@ import Control.Arrow ((***))
import Data.Foldable (foldr1)
import Text.InterpolatedString.Perl6 (qc)

import PostgREST.ApiRequest.Types (Alias, Cast, Field,
Filter (..),
import PostgREST.ApiRequest.Types (Alias, Cast,
FtsOperator (..),
JsonOperand (..),
JsonOperation (..),
JsonPath,
LogicOperator (..),
LogicTree (..), OpExpr (..),
Operation (..),
OpExpr (..), Operation (..),
OrderDirection (..),
OrderNulls (..),
OrderTerm (..),
Expand All @@ -75,7 +73,10 @@ import PostgREST.ApiRequest.Types (Alias, Cast, Field,
import PostgREST.MediaType (MTPlanFormat (..),
MTPlanOption (..))
import PostgREST.Plan.ReadPlan (JoinCondition (..))
import PostgREST.Plan.Types (TypedField (..))
import PostgREST.Plan.Types (CoercibleField (..),
TypedFilter (..),
TypedLogicTree (..),
unknownField)
import PostgREST.RangeQuery (NonnegRange, allRange,
rangeLimit, rangeOffset)
import PostgREST.SchemaCache.Identifiers (FieldName,
Expand Down Expand Up @@ -227,24 +228,37 @@ fromQi t = (if T.null s then mempty else pgFmtIdent s <> ".") <> pgFmtIdent n
n = qiName t
s = qiSchema t

pgFmtCallUnary :: Text -> SQL.Snippet -> SQL.Snippet
pgFmtCallUnary f x = SQL.sql (encodeUtf8 f) <> "(" <> x <> ")"

pgFmtColumn :: QualifiedIdentifier -> Text -> SqlFragment
pgFmtColumn table "*" = fromQi table <> ".*"
pgFmtColumn table c = fromQi table <> "." <> pgFmtIdent c

pgFmtField :: QualifiedIdentifier -> Field -> SQL.Snippet
pgFmtField table (c, []) = SQL.sql (pgFmtColumn table c)
pgFmtField :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
pgFmtField table CoercibleField{tfName=fn, tfJsonPath=[]} = SQL.sql (pgFmtColumn table fn)
-- Using to_jsonb instead of to_json to avoid missing operator errors when filtering:
-- "operator does not exist: json = unknown"
pgFmtField table (c, jp) = SQL.sql ("to_jsonb(" <> pgFmtColumn table c <> ")") <> pgFmtJsonPath jp
pgFmtField table CoercibleField{tfName=fn, tfJsonPath=jp} = SQL.sql ("to_jsonb(" <> pgFmtColumn table fn <> ")") <> pgFmtJsonPath jp

-- Select the value of a named element from a table, applying its optional coercion mapping if any.
pgFmtTableCoerce :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
pgFmtTableCoerce table fld@(CoercibleField{tfTransform=(Just formatterProc)}) = pgFmtCallUnary formatterProc (pgFmtField table fld)
pgFmtTableCoerce table f = pgFmtField table f

pgFmtSelectItem :: QualifiedIdentifier -> (Field, Maybe Cast, Maybe Alias) -> SQL.Snippet
pgFmtSelectItem table (f@(fName, jp), Nothing, alias) = pgFmtField table f <> SQL.sql (pgFmtAs fName jp alias)
-- | Like the previous but now we just have a name so no namespace or JSON paths.
pgFmtCoerceNamed :: CoercibleField -> SQL.Snippet
pgFmtCoerceNamed CoercibleField{tfName=fn, tfTransform=(Just formatterProc)} = pgFmtCallUnary formatterProc (SQL.sql (pgFmtIdent fn)) <> " AS " <> SQL.sql (pgFmtIdent fn)
pgFmtCoerceNamed CoercibleField{tfName=fn} = SQL.sql (pgFmtIdent fn)

pgFmtSelectItem :: QualifiedIdentifier -> (CoercibleField, Maybe Cast, Maybe Alias) -> SQL.Snippet
pgFmtSelectItem table (fld, Nothing, alias) = pgFmtTableCoerce table fld <> SQL.sql (pgFmtAs (tfName fld) (tfJsonPath fld) alias)
-- Ideally we'd quote the cast with "pgFmtIdent cast". However, that would invalidate common casts such as "int", "bigint", etc.
-- Try doing: `select 1::"bigint"` - it'll err, using "int8" will work though. There's some parser magic that pg does that's invalidated when quoting.
-- Not quoting should be fine, we validate the input on Parsers.
pgFmtSelectItem table (f@(fName, jp), Just cast, alias) = "CAST (" <> pgFmtField table f <> " AS " <> SQL.sql (encodeUtf8 cast) <> " )" <> SQL.sql (pgFmtAs fName jp alias)
pgFmtSelectItem table (fld, Just cast, alias) = "CAST (" <> pgFmtTableCoerce table fld <> " AS " <> SQL.sql (encodeUtf8 cast) <> " )" <> SQL.sql (pgFmtAs (tfName fld) (tfJsonPath fld) alias)

pgFmtSelectFromJson :: [TypedField] -> SQL.Snippet
pgFmtSelectFromJson :: [CoercibleField] -> SQL.Snippet
pgFmtSelectFromJson fields =
SQL.sql "SELECT " <> parsedCols <> " " <>
(if null fields
Expand All @@ -255,7 +269,7 @@ pgFmtSelectFromJson fields =
else SQL.sql ("FROM json_to_recordset (" <> selectBody <> ") AS _ " <> "(" <> typedCols <> ") ")
)
where
parsedCols = SQL.sql $ BS.intercalate ", " $ pgFmtIdent . tfName <$> fields
parsedCols = intercalateSnippet ", " $ pgFmtCoerceNamed <$> fields
typedCols = BS.intercalate ", " $ pgFmtIdent . tfName <> const " " <> encodeUtf8 . tfIRType <$> fields

pgFmtOrderTerm :: QualifiedIdentifier -> OrderTerm -> SQL.Snippet
Expand All @@ -266,24 +280,35 @@ pgFmtOrderTerm qi ot =
maybe mempty nullOrder $ otNullOrder ot])
where
fmtOTerm = \case
OrderTerm{otTerm} -> pgFmtField qi otTerm
OrderRelationTerm{otRelation, otRelTerm} -> pgFmtField (QualifiedIdentifier mempty otRelation) otRelTerm
OrderTerm{otTerm=(fn, jp)} -> pgFmtField qi (unknownField fn jp)
OrderRelationTerm{otRelation, otRelTerm=(fn, jp)} -> pgFmtField (QualifiedIdentifier mempty otRelation) (unknownField fn jp)

direction OrderAsc = "ASC"
direction OrderDesc = "DESC"

nullOrder OrderNullsFirst = "NULLS FIRST"
nullOrder OrderNullsLast = "NULLS LAST"


pgFmtFilter :: QualifiedIdentifier -> Filter -> SQL.Snippet
pgFmtFilter _ (FilterNullEmbed hasNot fld) = SQL.sql (pgFmtIdent fld) <> " IS " <> (if hasNot then "NOT" else mempty) <> " NULL"
pgFmtFilter _ (Filter _ (NoOpExpr _)) = mempty -- TODO unreachable because NoOpExpr is filtered on QueryParams
pgFmtFilter table (Filter fld (OpExpr hasNot oper)) = notOp <> " " <> case oper of
-- | Interpret a literal in the way the planner indicated through the CoercibleField.
pgFmtUnknownLiteralForField :: SQL.Snippet -> CoercibleField -> SQL.Snippet
pgFmtUnknownLiteralForField value CoercibleField{tfTransform=(Just parserProc)} = pgFmtCallUnary parserProc value
-- But when no transform is requested, we just use the literal as-is.
pgFmtUnknownLiteralForField value _ = value

-- | Array version of the above, used by ANY().
pgFmtArrayLiteralForField :: [Text] -> CoercibleField -> SQL.Snippet
pgFmtArrayLiteralForField values CoercibleField{tfTransform=(Just parserProc)} = SQL.sql "ARRAY[" <> intercalateSnippet ", " (pgFmtCallUnary parserProc . unknownLiteral <$> values) <> "]"
-- When no transformation is requested, use an array literal which should be simpler, maybe faster.
pgFmtArrayLiteralForField values _ = unknownLiteral (pgBuildArrayLiteral values)

pgFmtFilter :: QualifiedIdentifier -> TypedFilter -> SQL.Snippet
pgFmtFilter _ (TypedFilterNullEmbed hasNot fld) = SQL.sql (pgFmtIdent fld) <> " IS " <> (if hasNot then "NOT" else mempty) <> " NULL"
pgFmtFilter _ (TypedFilter _ (NoOpExpr _)) = mempty -- TODO unreachable because NoOpExpr is filtered on QueryParams
pgFmtFilter table (TypedFilter fld (OpExpr hasNot oper)) = notOp <> " " <> case oper of
Op op val -> pgFmtFieldOp op <> " " <> case op of
OpLike -> unknownLiteral (T.map star val)
OpILike -> unknownLiteral (T.map star val)
_ -> unknownLiteral val
_ -> pgFmtUnknownLiteralForField (unknownLiteral val) fld

-- IS cannot be prepared. `PREPARE boolplan AS SELECT * FROM projects where id IS $1` will give a syntax error.
-- The above can be fixed by using `PREPARE boolplan AS SELECT * FROM projects where id IS NOT DISTINCT FROM $1;`
Expand All @@ -300,7 +325,7 @@ pgFmtFilter table (Filter fld (OpExpr hasNot oper)) = notOp <> " " <> case oper
-- + Can invalidate prepared statements: multiple parameters on an IN($1, $2, $3) will lead to using different prepared statements and not take advantage of caching.
In vals -> pgFmtField table fld <> " " <> case vals of
[""] -> "= ANY('{}') "
_ -> "= ANY (" <> unknownLiteral (pgBuildArrayLiteral vals) <> ") "
_ -> "= ANY (" <> pgFmtArrayLiteralForField vals fld <> ") "

Fts op lang val ->
pgFmtFieldFts op <> "(" <> ftsLang lang <> unknownLiteral val <> ") "
Expand All @@ -315,14 +340,14 @@ pgFmtJoinCondition :: JoinCondition -> SQL.Snippet
pgFmtJoinCondition (JoinCondition (qi1, col1) (qi2, col2)) =
SQL.sql $ pgFmtColumn qi1 col1 <> " = " <> pgFmtColumn qi2 col2

pgFmtLogicTree :: QualifiedIdentifier -> LogicTree -> SQL.Snippet
pgFmtLogicTree qi (Expr hasNot op forest) = SQL.sql notOp <> " (" <> intercalateSnippet (opSql op) (pgFmtLogicTree qi <$> forest) <> ")"
pgFmtLogicTree :: QualifiedIdentifier -> TypedLogicTree -> SQL.Snippet
pgFmtLogicTree qi (TypedExpr hasNot op forest) = SQL.sql notOp <> " (" <> intercalateSnippet (opSql op) (pgFmtLogicTree qi <$> forest) <> ")"
where
notOp = if hasNot then "NOT" else mempty

opSql And = " AND "
opSql Or = " OR "
pgFmtLogicTree qi (Stmnt flt) = pgFmtFilter qi flt
pgFmtLogicTree qi (TypedStmnt flt) = pgFmtFilter qi flt

pgFmtJsonPath :: JsonPath -> SQL.Snippet
pgFmtJsonPath = \case
Expand Down
Loading

0 comments on commit 1d4b9fa

Please sign in to comment.