PostgREST · aljungberg · Jan 20, 2023 · Jan 23, 2023 · Jan 23, 2023 · Jan 23, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,20 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 
 ## Unreleased
 
+ - #2523, Data representations - @aljungberg
+   + Allows for flexible API output formatting and input parsing on a per-column type basis using regular SQL functions configured in the database
+   + Enables greater flexibility in the form and shape of your APIs, both for output and input, making PostgREST a more versatile general-purpose API server
+   + Examples include base64 encode/decode your binary data (like a `bytea` column containing an image), choose whether to present a timestamp column as seconds since the Unix epoch or as an ISO 8601 string, or represent fixed precision decimals as strings, not doubles, to preserve precision
+   + ...and accept the same in `POST/PUT/PATCH` by configuring the reverse transformation(s)
+   + Other use-cases include custom representation of enums, arrays, nested objects, CSS hex colour strings, gzip compressed fields, metric to imperial conversions, and much more
+   + Works when using the `select` parameter to select only a subset of columns, embedding through complex joins, renaming fields, with views and computed columns
+   + Works when filtering on a formatted column without extra indexes by parsing to the canonical representation
+   + Works for data `RETURNING` operations, such as requesting the full body in a POST/PUT/PATCH with `Prefer: return=representation`
+   + Works for batch updates and inserts
+   + Completely optional, define the functions in the database and they will be used automatically everywhere
+   + Data representations preserve the ability to write to the original column and require no extra storage or complex triggers (compared to using `GENERATED ALWAYS` columns)
+   + Note: data representations require Postgres 10 (Postgres 11 if using `IN` predicates); data representations are not implemented for RPC 
+
 ### Added
 
  - #1414, Add related orders - @steve-chavez

diff --git a/README.md b/README.md
@@ -1,3 +1,4 @@
+
 ![Logo](static/bigger-logo.png "Logo")
 
 [![Donate](https://img.shields.io/badge/Donate-Patreon-orange.svg?colorB=F96854)](https://www.patreon.com/postgrest)

diff --git a/postgrest.cabal b/postgrest.cabal
@@ -48,6 +48,7 @@ library
                       PostgREST.SchemaCache.Identifiers
                       PostgREST.SchemaCache.Proc
                       PostgREST.SchemaCache.Relationship
+                      PostgREST.SchemaCache.Representations
                       PostgREST.SchemaCache.Table
                       PostgREST.Error
                       PostgREST.Logger

diff --git a/src/PostgREST/Plan.hs b/src/PostgREST/Plan.hs
diff --git a/src/PostgREST/Plan/MutatePlan.hs b/src/PostgREST/Plan/MutatePlan.hs
@@ -6,8 +6,9 @@ where
 import qualified Data.ByteString.Lazy as LBS
 
 import PostgREST.ApiRequest.Preferences  (PreferResolution)
-import PostgREST.ApiRequest.Types        (LogicTree, OrderTerm)
-import PostgREST.Plan.Types              (TypedField)
+import PostgREST.ApiRequest.Types        (OrderTerm)
+import PostgREST.Plan.Types              (CoercibleField,
+                                          CoercibleLogicTree)
 import PostgREST.RangeQuery              (NonnegRange)
 import PostgREST.SchemaCache.Identifiers (FieldName,
                                           QualifiedIdentifier)
@@ -18,25 +19,25 @@ import Protolude
 data MutatePlan
   = Insert
       { in_        :: QualifiedIdentifier
-      , insCols    :: [TypedField]
+      , insCols    :: [CoercibleField]
       , insBody    :: Maybe LBS.ByteString
       , onConflict :: Maybe (PreferResolution, [FieldName])
-      , where_     :: [LogicTree]
+      , where_     :: [CoercibleLogicTree]
       , returning  :: [FieldName]
       , insPkCols  :: [FieldName]
       }
   | Update
       { in_       :: QualifiedIdentifier
-      , updCols   :: [TypedField]
+      , updCols   :: [CoercibleField]
       , updBody   :: Maybe LBS.ByteString
-      , where_    :: [LogicTree]
+      , where_    :: [CoercibleLogicTree]
       , mutRange  :: NonnegRange
       , mutOrder  :: [OrderTerm]
       , returning :: [FieldName]
       }
   | Delete
       { in_       :: QualifiedIdentifier
-      , where_    :: [LogicTree]
+      , where_    :: [CoercibleLogicTree]
       , mutRange  :: NonnegRange
       , mutOrder  :: [OrderTerm]
       , returning :: [FieldName]

diff --git a/src/PostgREST/Plan/ReadPlan.hs b/src/PostgREST/Plan/ReadPlan.hs
@@ -6,9 +6,11 @@ module PostgREST.Plan.ReadPlan
 
 import Data.Tree (Tree (..))
 
-import PostgREST.ApiRequest.Types         (Alias, Cast, Depth, Field,
-                                           Hint, JoinType, LogicTree,
-                                           NodeName, OrderTerm)
+import PostgREST.ApiRequest.Types         (Alias, Cast, Depth, Hint,
+                                           JoinType, NodeName,
+                                           OrderTerm)
+import PostgREST.Plan.Types               (CoercibleField (..),
+                                           CoercibleLogicTree)
 import PostgREST.RangeQuery               (NonnegRange)
 import PostgREST.SchemaCache.Identifiers  (FieldName,
                                            QualifiedIdentifier)
@@ -26,10 +28,10 @@ data JoinCondition =
   deriving (Eq)
 
 data ReadPlan = ReadPlan
-  { select       :: [(Field, Maybe Cast, Maybe Alias)]
+  { select       :: [(CoercibleField, Maybe Cast, Maybe Alias)]
   , from         :: QualifiedIdentifier
   , fromAlias    :: Maybe Alias
-  , where_       :: [LogicTree]
+  , where_       :: [CoercibleLogicTree]
   , order        :: [OrderTerm]
   , range_       :: NonnegRange
   , relName      :: NodeName

diff --git a/src/PostgREST/Plan/Types.hs b/src/PostgREST/Plan/Types.hs
@@ -1,24 +1,49 @@
 module PostgREST.Plan.Types
-  ( TypedField(..)
-  , resolveTableField
-
+  ( CoercibleField(..)
+  , unknownField
+  , CoercibleLogicTree(..)
+  , CoercibleFilter(..)
+  , TransformerProc
   ) where
 
-import qualified Data.HashMap.Strict.InsOrd as HMI
+import PostgREST.ApiRequest.Types (JsonPath, LogicOperator, OpExpr)
 
 import PostgREST.SchemaCache.Identifiers (FieldName)
-import PostgREST.SchemaCache.Table       (Column (..), Table (..))
 
 import Protolude
 
--- | A TypedField is a field with sufficient information to be read from JSON with `json_to_recordset`.
-data TypedField = TypedField
-   { tfName   :: FieldName
-   , tfIRType :: Text -- ^ The initial type of the field, before any casting.
-   } deriving (Eq)
-
-resolveTableField :: Table -> FieldName -> Maybe TypedField
-resolveTableField table fieldName =
-  case HMI.lookup fieldName (tableColumns table) of
-    Just column -> Just $ TypedField (colName column) (colNominalType column)
-    Nothing     -> Nothing
+type TransformerProc = Text
+
+-- | A CoercibleField pairs the name of a query element with any type coercion information we need for some specific use case.
+-- |
+-- | As suggested by the name, it's often a reference to a field in a table but really it can be any nameable element (function parameter, calculation with an alias, etc) with a knowable type.
+-- |
+-- | In the simplest case, it allows us to parse JSON payloads with `json_to_recordset`, for which we need to know both the name and the type of each thing we'd like to extract. At a higher level, CoercibleField generalises to reflect that any value we work with in a query may need type specific handling.
+-- |
+-- | CoercibleField is the foundation for the Data Representations feature. This feature allow user-definable mappings between database types so that the same data can be presented or interpreted in various ways as needed. Sometimes the way Postgres coerces data implicitly isn't right for the job. Different mappings might be appropriate for different situations: parsing a filter from a query string requires one function (text -> field type) while parsing a payload from JSON takes another (json -> field type). And the reverse, outputting a field as JSON, requires yet a third (field type -> json). CoercibleField is that "job specific" reference to an element paired with the type we desire for that particular purpose and the function we'll use to get there, if any.
+-- |
+-- | In the planning phase, we "resolve" generic named elements into these specialised CoercibleFields. Again this is context specific: two different CoercibleFields both representing the exact same table column in the database, even in the same query, might have two different target types and mapping functions. For example, one might represent a column in a filter, and another the very same column in an output role to be sent in the response body.
+-- |
+-- | The type value is allowed to be the empty string. The analog here is soft type checking in programming languages: sometimes we don't need a variable to have a specified type and things will work anyhow. So the empty type variant is valid when we don't know and *don't need to know* about the specific type in some context. Note that this variation should not be used if it guarantees failure: in that case you should instead raise an error at the planning stage and bail out. For example, we can't parse JSON with `json_to_recordset` without knowing the types of each recipient field, and so error out. Using the empty string for the type would be incorrect and futile. On the other hand we use the empty type for RPC calls since type resolution isn't implemented for RPC, but it's fine because the query still works with Postgres' implicit coercion. In the future, hopefully we will support data representations across the board and then the empty type may be permanently retired.
+data CoercibleField = CoercibleField
+  { cfName      :: FieldName
+  , cfJsonPath  :: JsonPath
+  , cfIRType    :: Text -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
+  , cfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
+  } deriving (Eq)
+
+unknownField :: FieldName -> JsonPath -> CoercibleField
+unknownField name path = CoercibleField name path "" Nothing
+
+-- | Like an API request LogicTree, but with coercible field information.
+data CoercibleLogicTree
+  = CoercibleExpr Bool LogicOperator [CoercibleLogicTree]
+  | CoercibleStmnt CoercibleFilter
+  deriving (Eq)
+
+data CoercibleFilter = CoercibleFilter
+  { field  :: CoercibleField
+  , opExpr :: OpExpr
+  }
+  | CoercibleFilterNullEmbed Bool FieldName
+  deriving (Eq)
diff --git a/src/PostgREST/Query/QueryBuilder.hs b/src/PostgREST/Query/QueryBuilder.hs
@@ -53,7 +53,7 @@ readPlanToQuery (Node ReadPlan{select,from=mainQi,fromAlias,where_=logicForest,o
   where
     fromFrag = fromF relToParent mainQi fromAlias
     qi = getQualifiedIdentifier relToParent mainQi fromAlias
-    defSelect = [(("*", []), Nothing, Nothing)] -- gets all the columns in case of an empty select, ignoring/obtaining these columns is done at the aggregation stage
+    defSelect = [(unknownField "*" [], Nothing, Nothing)] -- gets all the columns in case of an empty select, ignoring/obtaining these columns is done at the aggregation stage
     (selects, joins) = foldr getSelectsJoins ([],[]) forest
 
 getSelectsJoins :: ReadPlanTree -> ([SQL.Snippet], [SQL.Snippet]) -> ([SQL.Snippet], [SQL.Snippet])
@@ -98,12 +98,12 @@ mutatePlanToQuery (Insert mainQi iCols body onConflct putConditions returnings _
         MergeDuplicates  ->
           if null iCols
              then "DO NOTHING"
-             else "DO UPDATE SET " <> BS.intercalate ", " ((pgFmtIdent . tfName) <> const " = EXCLUDED." <> (pgFmtIdent . tfName) <$> iCols)
+             else "DO UPDATE SET " <> BS.intercalate ", " ((pgFmtIdent . cfName) <> const " = EXCLUDED." <> (pgFmtIdent . cfName) <$> iCols)
       ) onConflct,
     returningF mainQi returnings
     ])
   where
-    cols = BS.intercalate ", " $ pgFmtIdent . tfName <$> iCols
+    cols = BS.intercalate ", " $ pgFmtIdent . cfName <$> iCols
 
 -- An update without a limit is always filtered with a WHERE
 mutatePlanToQuery (Update mainQi uCols body logicForest range ordts returnings)
@@ -138,8 +138,8 @@ mutatePlanToQuery (Update mainQi uCols body logicForest range ordts returnings)
     whereLogic = if null logicForest then mempty else " WHERE " <> intercalateSnippet " AND " (pgFmtLogicTree mainQi <$> logicForest)
     mainTbl = SQL.sql (fromQi mainQi)
     emptyBodyReturnedColumns = if null returnings then "NULL" else BS.intercalate ", " (pgFmtColumn (QualifiedIdentifier mempty $ qiName mainQi) <$> returnings)
-    nonRangeCols = BS.intercalate ", " (pgFmtIdent . tfName <> const " = _." <> pgFmtIdent . tfName <$> uCols)
-    rangeCols = BS.intercalate ", " ((\col -> pgFmtIdent (tfName col) <> " = (SELECT " <> pgFmtIdent (tfName col) <> " FROM pgrst_update_body) ") <$> uCols)
+    nonRangeCols = BS.intercalate ", " (pgFmtIdent . cfName <> const " = _." <> pgFmtIdent . cfName <$> uCols)
+    rangeCols = BS.intercalate ", " ((\col -> pgFmtIdent (cfName col) <> " = (SELECT " <> pgFmtIdent (cfName col) <> " FROM pgrst_update_body) ") <$> uCols)
     (whereRangeIdF, rangeIdF) = mutRangeF mainQi (fst . otTerm <$> ordts)
 
 mutatePlanToQuery (Delete mainQi logicForest range ordts returnings)
Original file line number	Diff line number	Diff line change
		@@ -1,3 +1,4 @@

		![Logo](static/bigger-logo.png "Logo")

		[![Donate](https://img.shields.io/badge/Donate-Patreon-orange.svg?colorB=F96854)](https://www.patreon.com/postgrest)
Expand Down