feat: data representations allow custom parsing and formatting of API…

… fields. See PR PostgREST#2523. Most notable code changes: - Load data representation casts into schema cache. - Data representations for reads, filters, inserts, updates, views, over joins. - `CoercibleField` represents name references in queries where coercion may be needed. - `ResolverContext` help facilitate field resolution during planning. - Planner 'resolves' names in the API query and pairs them with any implicit conversions to be used in the query builder stage. - Tests for all of the above.
Screenly · Jan 23, 2023 · 1d4b9fa · 1d4b9fa
1 parent 9065ed6
commit 1d4b9fa
Show file tree

Hide file tree

Showing 17 changed files with 941 additions and 148 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,4 @@
+
 ![Logo](static/bigger-logo.png "Logo")
 
 [![Donate](https://img.shields.io/badge/Donate-Patreon-orange.svg?colorB=F96854)](https://www.patreon.com/postgrest)

diff --git a/postgrest.cabal b/postgrest.cabal
@@ -48,6 +48,7 @@ library
                       PostgREST.SchemaCache.Identifiers
                       PostgREST.SchemaCache.Proc
                       PostgREST.SchemaCache.Relationship
+                      PostgREST.SchemaCache.Representations
                       PostgREST.SchemaCache.Table
                       PostgREST.Error
                       PostgREST.Logger

diff --git a/src/PostgREST/Plan.hs b/src/PostgREST/Plan.hs
diff --git a/src/PostgREST/Plan/MutatePlan.hs b/src/PostgREST/Plan/MutatePlan.hs
@@ -6,8 +6,9 @@ where
 import qualified Data.ByteString.Lazy as LBS
 
 import PostgREST.ApiRequest.Preferences  (PreferResolution)
-import PostgREST.ApiRequest.Types        (LogicTree, OrderTerm)
-import PostgREST.Plan.Types              (TypedField)
+import PostgREST.ApiRequest.Types        (OrderTerm)
+import PostgREST.Plan.Types              (CoercibleField,
+                                          TypedLogicTree)
 import PostgREST.RangeQuery              (NonnegRange)
 import PostgREST.SchemaCache.Identifiers (FieldName,
                                           QualifiedIdentifier)
@@ -18,25 +19,25 @@ import Protolude
 data MutatePlan
   = Insert
       { in_        :: QualifiedIdentifier
-      , insCols    :: [TypedField]
+      , insCols    :: [CoercibleField]
       , insBody    :: Maybe LBS.ByteString
       , onConflict :: Maybe (PreferResolution, [FieldName])
-      , where_     :: [LogicTree]
+      , where_     :: [TypedLogicTree]
       , returning  :: [FieldName]
       , insPkCols  :: [FieldName]
       }
   | Update
       { in_       :: QualifiedIdentifier
-      , updCols   :: [TypedField]
+      , updCols   :: [CoercibleField]
       , updBody   :: Maybe LBS.ByteString
-      , where_    :: [LogicTree]
+      , where_    :: [TypedLogicTree]
       , mutRange  :: NonnegRange
       , mutOrder  :: [OrderTerm]
       , returning :: [FieldName]
       }
   | Delete
       { in_       :: QualifiedIdentifier
-      , where_    :: [LogicTree]
+      , where_    :: [TypedLogicTree]
       , mutRange  :: NonnegRange
       , mutOrder  :: [OrderTerm]
       , returning :: [FieldName]

diff --git a/src/PostgREST/Plan/ReadPlan.hs b/src/PostgREST/Plan/ReadPlan.hs
@@ -6,9 +6,11 @@ module PostgREST.Plan.ReadPlan
 
 import Data.Tree (Tree (..))
 
-import PostgREST.ApiRequest.Types         (Alias, Cast, Depth, Field,
-                                           Hint, JoinType, LogicTree,
-                                           NodeName, OrderTerm)
+import PostgREST.ApiRequest.Types         (Alias, Cast, Depth, Hint,
+                                           JoinType, NodeName,
+                                           OrderTerm)
+import PostgREST.Plan.Types               (CoercibleField (..),
+                                           TypedLogicTree)
 import PostgREST.RangeQuery               (NonnegRange)
 import PostgREST.SchemaCache.Identifiers  (FieldName,
                                            QualifiedIdentifier)
@@ -26,10 +28,10 @@ data JoinCondition =
   deriving (Eq)
 
 data ReadPlan = ReadPlan
-  { select       :: [(Field, Maybe Cast, Maybe Alias)]
+  { select       :: [(CoercibleField, Maybe Cast, Maybe Alias)]
   , from         :: QualifiedIdentifier
   , fromAlias    :: Maybe Alias
-  , where_       :: [LogicTree]
+  , where_       :: [TypedLogicTree]
   , order        :: [OrderTerm]
   , range_       :: NonnegRange
   , relName      :: NodeName

diff --git a/src/PostgREST/Plan/Types.hs b/src/PostgREST/Plan/Types.hs
@@ -1,24 +1,49 @@
 module PostgREST.Plan.Types
-  ( TypedField(..)
-  , resolveTableField
-
+  ( CoercibleField(..)
+  , unknownField
+  , TypedLogicTree(..)
+  , TypedFilter(..)
+  , TransformerProc
   ) where
 
-import qualified Data.HashMap.Strict.InsOrd as HMI
+import PostgREST.ApiRequest.Types (JsonPath, LogicOperator, OpExpr)
 
 import PostgREST.SchemaCache.Identifiers (FieldName)
-import PostgREST.SchemaCache.Table       (Column (..), Table (..))
 
 import Protolude
 
--- | A TypedField is a field with sufficient information to be read from JSON with `json_to_recordset`.
-data TypedField = TypedField
-   { tfName   :: FieldName
-   , tfIRType :: Text -- ^ The initial type of the field, before any casting.
-   } deriving (Eq)
-
-resolveTableField :: Table -> FieldName -> Maybe TypedField
-resolveTableField table fieldName =
-  case HMI.lookup fieldName (tableColumns table) of
-    Just column -> Just $ TypedField (colName column) (colNominalType column)
-    Nothing     -> Nothing
+type TransformerProc = Text
+
+-- | A CoercibleField pairs the name of a query element with any type coercion information we need for some specific use case.
+-- |
+-- | As suggested by the name, it's often a reference to a field in a table but really it can be any nameable element (function parameter, calculation with an alias, etc) with a knowable type.
+-- |
+-- | In the simplest case, it allows us to parse JSON payloads with `json_to_recordset`, for which we need to know both the name and the type of each thing we'd like to extract. At a higher level, CoercibleField generalises to reflect that any value we work with in a query may need type specific handling.
+-- |
+-- | CoercibleField is the foundation for the Data Representations feature. This feature allow user-definable mappings between database types so that the same data can be presented or interpreted in various ways as needed. Sometimes the way Postgres coerces data implicitly isn't right for the job. Different mappings might be appropriate for different situations: parsing a filter from a query string requires one function (text -> field type) while parsing a payload from JSON takes another (json -> field type). And the reverse, outputting a field as JSON, requires yet a third (field type -> json). CoercibleField is that "job specific" reference to an element paired with the type we desire for that particular purpose and the function we'll use to get there, if any.
+-- |
+-- | In the planning phase, we "resolve" generic named elements into these specialised CoercibleFields. Again this is context specific: two different CoercibleFields both representing the exact same table column in the database, even in the same query, might have two different target types and mapping functions. For example, one might represent a column in a filter, and another the very same column in an output role to be sent in the response body.
+-- |
+-- | The type value is allowed to be the empty string. The analog here is soft type checking in programming languages: sometimes we don't need a variable to have a specified type and things will work anyhow. So the empty type variant is valid when we don't know and *don't need to know* about the specific type in some context. Note that this variation should not be used if it guarantees failure: in that case you should instead raise an error at the planning stage and bail out. For example, we can't parse JSON with `json_to_recordset` without knowing the types of each recipient field, and so error out. Using the empty string for the type would be incorrect and futile. On the other hand we use the empty type for RPC calls since type resolution isn't implemented for RPC, but it's fine because the query still works with Postgres' implicit coercion. In the future, hopefully we will support data representations across the board and then the empty type may be permanently retired.
+data CoercibleField = CoercibleField
+  { tfName      :: FieldName
+  , tfJsonPath  :: JsonPath
+  , tfIRType    :: Text -- ^ The native Postgres type of the field, the type before mapping.
+  , tfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
+  } deriving (Eq)
+
+unknownField :: FieldName -> JsonPath -> CoercibleField
+unknownField name path = CoercibleField name path "" Nothing
+
+-- | Like a regular LogicTree but with field type information.
+data TypedLogicTree
+  = TypedExpr Bool LogicOperator [TypedLogicTree]
+  | TypedStmnt TypedFilter
+  deriving (Eq)
+
+data TypedFilter = TypedFilter
+  { typedField  :: CoercibleField
+  , typedOpExpr :: OpExpr
+  }
+  | TypedFilterNullEmbed Bool FieldName
+  deriving (Eq)
diff --git a/src/PostgREST/Query/QueryBuilder.hs b/src/PostgREST/Query/QueryBuilder.hs
@@ -53,7 +53,7 @@ readPlanToQuery (Node ReadPlan{select,from=mainQi,fromAlias,where_=logicForest,o
   where
     fromFrag = fromF relToParent mainQi fromAlias
     qi = getQualifiedIdentifier relToParent mainQi fromAlias
-    defSelect = [(("*", []), Nothing, Nothing)] -- gets all the columns in case of an empty select, ignoring/obtaining these columns is done at the aggregation stage
+    defSelect = [(unknownField "*" [], Nothing, Nothing)] -- gets all the columns in case of an empty select, ignoring/obtaining these columns is done at the aggregation stage
     (selects, joins) = foldr getSelectsJoins ([],[]) forest
 
 getSelectsJoins :: ReadPlanTree -> ([SQL.Snippet], [SQL.Snippet]) -> ([SQL.Snippet], [SQL.Snippet])

diff --git a/src/PostgREST/Query/SqlFragment.hs b/src/PostgREST/Query/SqlFragment.hs
@@ -58,15 +58,13 @@ import Control.Arrow ((***))
 import Data.Foldable                 (foldr1)
 import Text.InterpolatedString.Perl6 (qc)
 
-import PostgREST.ApiRequest.Types        (Alias, Cast, Field,
-                                          Filter (..),
+import PostgREST.ApiRequest.Types        (Alias, Cast,
                                           FtsOperator (..),
                                           JsonOperand (..),
                                           JsonOperation (..),
                                           JsonPath,
                                           LogicOperator (..),
-                                          LogicTree (..), OpExpr (..),
-                                          Operation (..),
+                                          OpExpr (..), Operation (..),
                                           OrderDirection (..),
                                           OrderNulls (..),
                                           OrderTerm (..),
@@ -75,7 +73,10 @@ import PostgREST.ApiRequest.Types        (Alias, Cast, Field,
 import PostgREST.MediaType               (MTPlanFormat (..),
                                           MTPlanOption (..))
 import PostgREST.Plan.ReadPlan           (JoinCondition (..))
-import PostgREST.Plan.Types              (TypedField (..))
+import PostgREST.Plan.Types              (CoercibleField (..),
+                                          TypedFilter (..),
+                                          TypedLogicTree (..),
+                                          unknownField)
 import PostgREST.RangeQuery              (NonnegRange, allRange,
                                           rangeLimit, rangeOffset)
 import PostgREST.SchemaCache.Identifiers (FieldName,
@@ -227,24 +228,37 @@ fromQi t = (if T.null s then mempty else pgFmtIdent s <> ".") <> pgFmtIdent n
     n = qiName t
     s = qiSchema t
 
+pgFmtCallUnary :: Text -> SQL.Snippet -> SQL.Snippet
+pgFmtCallUnary f x = SQL.sql (encodeUtf8 f) <> "(" <> x <> ")"
+
 pgFmtColumn :: QualifiedIdentifier -> Text -> SqlFragment
 pgFmtColumn table "*" = fromQi table <> ".*"
 pgFmtColumn table c   = fromQi table <> "." <> pgFmtIdent c
 
-pgFmtField :: QualifiedIdentifier -> Field -> SQL.Snippet
-pgFmtField table (c, []) = SQL.sql (pgFmtColumn table c)
+pgFmtField :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
+pgFmtField table CoercibleField{tfName=fn, tfJsonPath=[]} = SQL.sql (pgFmtColumn table fn)
 -- Using to_jsonb instead of to_json to avoid missing operator errors when filtering:
 -- "operator does not exist: json = unknown"
-pgFmtField table (c, jp) = SQL.sql ("to_jsonb(" <> pgFmtColumn table c <> ")") <> pgFmtJsonPath jp
+pgFmtField table CoercibleField{tfName=fn, tfJsonPath=jp} = SQL.sql ("to_jsonb(" <> pgFmtColumn table fn <> ")") <> pgFmtJsonPath jp
+
+-- Select the value of a named element from a table, applying its optional coercion mapping if any.
+pgFmtTableCoerce :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
+pgFmtTableCoerce table fld@(CoercibleField{tfTransform=(Just formatterProc)}) = pgFmtCallUnary formatterProc (pgFmtField table fld)
+pgFmtTableCoerce table f = pgFmtField table f
 
-pgFmtSelectItem :: QualifiedIdentifier -> (Field, Maybe Cast, Maybe Alias) -> SQL.Snippet
-pgFmtSelectItem table (f@(fName, jp), Nothing, alias) = pgFmtField table f <> SQL.sql (pgFmtAs fName jp alias)
+-- | Like the previous but now we just have a name so no namespace or JSON paths.
+pgFmtCoerceNamed :: CoercibleField -> SQL.Snippet
+pgFmtCoerceNamed CoercibleField{tfName=fn, tfTransform=(Just formatterProc)} = pgFmtCallUnary formatterProc (SQL.sql (pgFmtIdent fn)) <> " AS " <> SQL.sql (pgFmtIdent fn)
+pgFmtCoerceNamed CoercibleField{tfName=fn} = SQL.sql (pgFmtIdent fn)
+
+pgFmtSelectItem :: QualifiedIdentifier -> (CoercibleField, Maybe Cast, Maybe Alias) -> SQL.Snippet
+pgFmtSelectItem table (fld, Nothing, alias) = pgFmtTableCoerce table fld <> SQL.sql (pgFmtAs (tfName fld) (tfJsonPath fld) alias)
 -- Ideally we'd quote the cast with "pgFmtIdent cast". However, that would invalidate common casts such as "int", "bigint", etc.
 -- Try doing: `select 1::"bigint"` - it'll err, using "int8" will work though. There's some parser magic that pg does that's invalidated when quoting.
 -- Not quoting should be fine, we validate the input on Parsers.
-pgFmtSelectItem table (f@(fName, jp), Just cast, alias) = "CAST (" <> pgFmtField table f <> " AS " <> SQL.sql (encodeUtf8 cast) <> " )" <> SQL.sql (pgFmtAs fName jp alias)
+pgFmtSelectItem table (fld, Just cast, alias) = "CAST (" <> pgFmtTableCoerce table fld <> " AS " <> SQL.sql (encodeUtf8 cast) <> " )" <> SQL.sql (pgFmtAs (tfName fld) (tfJsonPath fld) alias)
 
-pgFmtSelectFromJson :: [TypedField] -> SQL.Snippet
+pgFmtSelectFromJson :: [CoercibleField] -> SQL.Snippet
 pgFmtSelectFromJson fields =
   SQL.sql "SELECT " <> parsedCols <> " " <>
   (if null fields
@@ -255,7 +269,7 @@ pgFmtSelectFromJson fields =
     else SQL.sql ("FROM json_to_recordset (" <> selectBody <> ") AS _ " <> "(" <> typedCols <> ") ")
   )
   where
-    parsedCols = SQL.sql $ BS.intercalate ", " $ pgFmtIdent . tfName <$> fields
+    parsedCols = intercalateSnippet ", " $ pgFmtCoerceNamed <$> fields
     typedCols = BS.intercalate ", " $ pgFmtIdent . tfName <> const " " <> encodeUtf8 . tfIRType <$> fields
 
 pgFmtOrderTerm :: QualifiedIdentifier -> OrderTerm -> SQL.Snippet
@@ -266,24 +280,35 @@ pgFmtOrderTerm qi ot =
     maybe mempty nullOrder $ otNullOrder ot])
   where
     fmtOTerm = \case
-      OrderTerm{otTerm}                        -> pgFmtField qi otTerm
-      OrderRelationTerm{otRelation, otRelTerm} -> pgFmtField (QualifiedIdentifier mempty otRelation) otRelTerm
+      OrderTerm{otTerm=(fn, jp)}                        -> pgFmtField qi (unknownField fn jp)
+      OrderRelationTerm{otRelation, otRelTerm=(fn, jp)} -> pgFmtField (QualifiedIdentifier mempty otRelation) (unknownField fn jp)
 
     direction OrderAsc  = "ASC"
     direction OrderDesc = "DESC"
 
     nullOrder OrderNullsFirst = "NULLS FIRST"
     nullOrder OrderNullsLast  = "NULLS LAST"
 
-
-pgFmtFilter :: QualifiedIdentifier -> Filter -> SQL.Snippet
-pgFmtFilter _ (FilterNullEmbed hasNot fld) = SQL.sql (pgFmtIdent fld) <> " IS " <> (if hasNot then "NOT" else mempty) <> " NULL"
-pgFmtFilter _ (Filter _ (NoOpExpr _)) = mempty -- TODO unreachable because NoOpExpr is filtered on QueryParams
-pgFmtFilter table (Filter fld (OpExpr hasNot oper)) = notOp <> " " <> case oper of
+-- | Interpret a literal in the way the planner indicated through the CoercibleField.
+pgFmtUnknownLiteralForField :: SQL.Snippet -> CoercibleField -> SQL.Snippet
+pgFmtUnknownLiteralForField value CoercibleField{tfTransform=(Just parserProc)} = pgFmtCallUnary parserProc value
+-- But when no transform is requested, we just use the literal as-is.
+pgFmtUnknownLiteralForField value _ = value
+
+-- | Array version of the above, used by ANY().
+pgFmtArrayLiteralForField :: [Text] -> CoercibleField -> SQL.Snippet
+pgFmtArrayLiteralForField values CoercibleField{tfTransform=(Just parserProc)} = SQL.sql "ARRAY[" <> intercalateSnippet ", " (pgFmtCallUnary parserProc . unknownLiteral <$> values) <> "]"
+-- When no transformation is requested, use an array literal which should be simpler, maybe faster.
+pgFmtArrayLiteralForField values _ = unknownLiteral (pgBuildArrayLiteral values)
+
+pgFmtFilter :: QualifiedIdentifier -> TypedFilter -> SQL.Snippet
+pgFmtFilter _ (TypedFilterNullEmbed hasNot fld) = SQL.sql (pgFmtIdent fld) <> " IS " <> (if hasNot then "NOT" else mempty) <> " NULL"
+pgFmtFilter _ (TypedFilter _ (NoOpExpr _)) = mempty -- TODO unreachable because NoOpExpr is filtered on QueryParams
+pgFmtFilter table (TypedFilter fld (OpExpr hasNot oper)) = notOp <> " " <> case oper of
    Op op val  -> pgFmtFieldOp op <> " " <> case op of
      OpLike  -> unknownLiteral (T.map star val)
      OpILike -> unknownLiteral (T.map star val)
-     _       -> unknownLiteral val
+     _       -> pgFmtUnknownLiteralForField (unknownLiteral val) fld
 
    -- IS cannot be prepared. `PREPARE boolplan AS SELECT * FROM projects where id IS $1` will give a syntax error.
    -- The above can be fixed by using `PREPARE boolplan AS SELECT * FROM projects where id IS NOT DISTINCT FROM $1;`
@@ -300,7 +325,7 @@ pgFmtFilter table (Filter fld (OpExpr hasNot oper)) = notOp <> " " <> case oper
    -- + Can invalidate prepared statements: multiple parameters on an IN($1, $2, $3) will lead to using different prepared statements and not take advantage of caching.
    In vals -> pgFmtField table fld <> " " <> case vals of
       [""] -> "= ANY('{}') "
-      _    -> "= ANY (" <> unknownLiteral (pgBuildArrayLiteral vals) <> ") "
+      _    -> "= ANY (" <> pgFmtArrayLiteralForField vals fld <> ") "
 
    Fts op lang val ->
      pgFmtFieldFts op <> "(" <> ftsLang lang <> unknownLiteral val <> ") "
@@ -315,14 +340,14 @@ pgFmtJoinCondition :: JoinCondition -> SQL.Snippet
 pgFmtJoinCondition (JoinCondition (qi1, col1) (qi2, col2)) =
   SQL.sql $ pgFmtColumn qi1 col1 <> " = " <> pgFmtColumn qi2 col2
 
-pgFmtLogicTree :: QualifiedIdentifier -> LogicTree -> SQL.Snippet
-pgFmtLogicTree qi (Expr hasNot op forest) = SQL.sql notOp <> " (" <> intercalateSnippet (opSql op) (pgFmtLogicTree qi <$> forest) <> ")"
+pgFmtLogicTree :: QualifiedIdentifier -> TypedLogicTree -> SQL.Snippet
+pgFmtLogicTree qi (TypedExpr hasNot op forest) = SQL.sql notOp <> " (" <> intercalateSnippet (opSql op) (pgFmtLogicTree qi <$> forest) <> ")"
   where
     notOp =  if hasNot then "NOT" else mempty
 
     opSql And = " AND "
     opSql Or  = " OR "
-pgFmtLogicTree qi (Stmnt flt) = pgFmtFilter qi flt
+pgFmtLogicTree qi (TypedStmnt flt) = pgFmtFilter qi flt
 
 pgFmtJsonPath :: JsonPath -> SQL.Snippet
 pgFmtJsonPath = \case
Original file line number	Diff line number	Diff line change
		@@ -1,3 +1,4 @@

		![Logo](static/bigger-logo.png "Logo")

		[![Donate](https://img.shields.io/badge/Donate-Patreon-orange.svg?colorB=F96854)](https://www.patreon.com/postgrest)
Expand Down