Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data Representations #2523

Closed
wants to merge 11 commits into from
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@ This project adheres to [Semantic Versioning](http://semver.org/).

## Unreleased

- #2523, Data representations - @aljungberg
+ Allows for flexible API output formatting and input parsing on a per-column type basis using regular SQL functions configured in the database
+ Enables greater flexibility in the form and shape of your APIs, both for output and input, making PostgREST a more versatile general-purpose API server
+ Examples include base64 encode/decode your binary data (like a `bytea` column containing an image), choose whether to present a timestamp column as seconds since the Unix epoch or as an ISO 8601 string, or represent fixed precision decimals as strings, not doubles, to preserve precision
+ ...and accept the same in `POST/PUT/PATCH` by configuring the reverse transformation(s)
+ Other use-cases include custom representation of enums, arrays, nested objects, CSS hex colour strings, gzip compressed fields, metric to imperial conversions, and much more
+ Works when using the `select` parameter to select only a subset of columns, embedding through complex joins, renaming fields, with views and computed columns
+ Works when filtering on a formatted column without extra indexes by parsing to the canonical representation
+ Works for data `RETURNING` operations, such as requesting the full body in a POST/PUT/PATCH with `Prefer: return=representation`
+ Works for batch updates and inserts
+ Completely optional, define the functions in the database and they will be used automatically everywhere
+ Data representations preserve the ability to write to the original column and require no extra storage or complex triggers (compared to using `GENERATED ALWAYS` columns)
+ Note: data representations require Postgres 10 (Postgres 11 if using `IN` predicates); data representations are not implemented for RPC

### Added

- #1414, Add related orders - @steve-chavez
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

![Logo](static/bigger-logo.png "Logo")

[![Donate](https://img.shields.io/badge/Donate-Patreon-orange.svg?colorB=F96854)](https://www.patreon.com/postgrest)
Expand Down
1 change: 1 addition & 0 deletions postgrest.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ library
PostgREST.SchemaCache.Identifiers
PostgREST.SchemaCache.Proc
PostgREST.SchemaCache.Relationship
PostgREST.SchemaCache.Representations
PostgREST.SchemaCache.Table
PostgREST.Error
PostgREST.Logger
Expand Down
255 changes: 189 additions & 66 deletions src/PostgREST/Plan.hs

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions src/PostgREST/Plan/MutatePlan.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ where
import qualified Data.ByteString.Lazy as LBS

import PostgREST.ApiRequest.Preferences (PreferResolution)
import PostgREST.ApiRequest.Types (LogicTree, OrderTerm)
import PostgREST.Plan.Types (TypedField)
import PostgREST.ApiRequest.Types (OrderTerm)
import PostgREST.Plan.Types (CoercibleField,
CoercibleLogicTree)
import PostgREST.RangeQuery (NonnegRange)
import PostgREST.SchemaCache.Identifiers (FieldName,
QualifiedIdentifier)
Expand All @@ -18,25 +19,25 @@ import Protolude
data MutatePlan
= Insert
{ in_ :: QualifiedIdentifier
, insCols :: [TypedField]
, insCols :: [CoercibleField]
, insBody :: Maybe LBS.ByteString
, onConflict :: Maybe (PreferResolution, [FieldName])
, where_ :: [LogicTree]
, where_ :: [CoercibleLogicTree]
, returning :: [FieldName]
, insPkCols :: [FieldName]
}
| Update
{ in_ :: QualifiedIdentifier
, updCols :: [TypedField]
, updCols :: [CoercibleField]
, updBody :: Maybe LBS.ByteString
, where_ :: [LogicTree]
, where_ :: [CoercibleLogicTree]
, mutRange :: NonnegRange
, mutOrder :: [OrderTerm]
, returning :: [FieldName]
}
| Delete
{ in_ :: QualifiedIdentifier
, where_ :: [LogicTree]
, where_ :: [CoercibleLogicTree]
, mutRange :: NonnegRange
, mutOrder :: [OrderTerm]
, returning :: [FieldName]
Expand Down
12 changes: 7 additions & 5 deletions src/PostgREST/Plan/ReadPlan.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ module PostgREST.Plan.ReadPlan

import Data.Tree (Tree (..))

import PostgREST.ApiRequest.Types (Alias, Cast, Depth, Field,
Hint, JoinType, LogicTree,
NodeName, OrderTerm)
import PostgREST.ApiRequest.Types (Alias, Cast, Depth, Hint,
JoinType, NodeName,
OrderTerm)
import PostgREST.Plan.Types (CoercibleField (..),
CoercibleLogicTree)
import PostgREST.RangeQuery (NonnegRange)
import PostgREST.SchemaCache.Identifiers (FieldName,
QualifiedIdentifier)
Expand All @@ -26,10 +28,10 @@ data JoinCondition =
deriving (Eq)

data ReadPlan = ReadPlan
{ select :: [(Field, Maybe Cast, Maybe Alias)]
{ select :: [(CoercibleField, Maybe Cast, Maybe Alias)]
, from :: QualifiedIdentifier
, fromAlias :: Maybe Alias
, where_ :: [LogicTree]
, where_ :: [CoercibleLogicTree]
, order :: [OrderTerm]
, range_ :: NonnegRange
, relName :: NodeName
Expand Down
57 changes: 41 additions & 16 deletions src/PostgREST/Plan/Types.hs
Original file line number Diff line number Diff line change
@@ -1,24 +1,49 @@
module PostgREST.Plan.Types
( TypedField(..)
, resolveTableField

( CoercibleField(..)
, unknownField
, CoercibleLogicTree(..)
, CoercibleFilter(..)
, TransformerProc
) where

import qualified Data.HashMap.Strict.InsOrd as HMI
import PostgREST.ApiRequest.Types (JsonPath, LogicOperator, OpExpr)

import PostgREST.SchemaCache.Identifiers (FieldName)
import PostgREST.SchemaCache.Table (Column (..), Table (..))

import Protolude

-- | A TypedField is a field with sufficient information to be read from JSON with `json_to_recordset`.
data TypedField = TypedField
{ tfName :: FieldName
, tfIRType :: Text -- ^ The initial type of the field, before any casting.
} deriving (Eq)

resolveTableField :: Table -> FieldName -> Maybe TypedField
resolveTableField table fieldName =
case HMI.lookup fieldName (tableColumns table) of
Just column -> Just $ TypedField (colName column) (colNominalType column)
Nothing -> Nothing
type TransformerProc = Text

-- | A CoercibleField pairs the name of a query element with any type coercion information we need for some specific use case.
-- |
-- | As suggested by the name, it's often a reference to a field in a table but really it can be any nameable element (function parameter, calculation with an alias, etc) with a knowable type.
-- |
-- | In the simplest case, it allows us to parse JSON payloads with `json_to_recordset`, for which we need to know both the name and the type of each thing we'd like to extract. At a higher level, CoercibleField generalises to reflect that any value we work with in a query may need type specific handling.
-- |
-- | CoercibleField is the foundation for the Data Representations feature. This feature allow user-definable mappings between database types so that the same data can be presented or interpreted in various ways as needed. Sometimes the way Postgres coerces data implicitly isn't right for the job. Different mappings might be appropriate for different situations: parsing a filter from a query string requires one function (text -> field type) while parsing a payload from JSON takes another (json -> field type). And the reverse, outputting a field as JSON, requires yet a third (field type -> json). CoercibleField is that "job specific" reference to an element paired with the type we desire for that particular purpose and the function we'll use to get there, if any.
-- |
-- | In the planning phase, we "resolve" generic named elements into these specialised CoercibleFields. Again this is context specific: two different CoercibleFields both representing the exact same table column in the database, even in the same query, might have two different target types and mapping functions. For example, one might represent a column in a filter, and another the very same column in an output role to be sent in the response body.
-- |
-- | The type value is allowed to be the empty string. The analog here is soft type checking in programming languages: sometimes we don't need a variable to have a specified type and things will work anyhow. So the empty type variant is valid when we don't know and *don't need to know* about the specific type in some context. Note that this variation should not be used if it guarantees failure: in that case you should instead raise an error at the planning stage and bail out. For example, we can't parse JSON with `json_to_recordset` without knowing the types of each recipient field, and so error out. Using the empty string for the type would be incorrect and futile. On the other hand we use the empty type for RPC calls since type resolution isn't implemented for RPC, but it's fine because the query still works with Postgres' implicit coercion. In the future, hopefully we will support data representations across the board and then the empty type may be permanently retired.
data CoercibleField = CoercibleField
{ cfName :: FieldName
, cfJsonPath :: JsonPath
, cfIRType :: Text -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
, cfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
} deriving (Eq)

unknownField :: FieldName -> JsonPath -> CoercibleField
unknownField name path = CoercibleField name path "" Nothing
steve-chavez marked this conversation as resolved.
Show resolved Hide resolved

-- | Like an API request LogicTree, but with coercible field information.
data CoercibleLogicTree
= CoercibleExpr Bool LogicOperator [CoercibleLogicTree]
| CoercibleStmnt CoercibleFilter
deriving (Eq)

data CoercibleFilter = CoercibleFilter
{ field :: CoercibleField
, opExpr :: OpExpr
}
| CoercibleFilterNullEmbed Bool FieldName
deriving (Eq)
10 changes: 5 additions & 5 deletions src/PostgREST/Query/QueryBuilder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ readPlanToQuery (Node ReadPlan{select,from=mainQi,fromAlias,where_=logicForest,o
where
fromFrag = fromF relToParent mainQi fromAlias
qi = getQualifiedIdentifier relToParent mainQi fromAlias
defSelect = [(("*", []), Nothing, Nothing)] -- gets all the columns in case of an empty select, ignoring/obtaining these columns is done at the aggregation stage
defSelect = [(unknownField "*" [], Nothing, Nothing)] -- gets all the columns in case of an empty select, ignoring/obtaining these columns is done at the aggregation stage
(selects, joins) = foldr getSelectsJoins ([],[]) forest

getSelectsJoins :: ReadPlanTree -> ([SQL.Snippet], [SQL.Snippet]) -> ([SQL.Snippet], [SQL.Snippet])
Expand Down Expand Up @@ -98,12 +98,12 @@ mutatePlanToQuery (Insert mainQi iCols body onConflct putConditions returnings _
MergeDuplicates ->
if null iCols
then "DO NOTHING"
else "DO UPDATE SET " <> BS.intercalate ", " ((pgFmtIdent . tfName) <> const " = EXCLUDED." <> (pgFmtIdent . tfName) <$> iCols)
else "DO UPDATE SET " <> BS.intercalate ", " ((pgFmtIdent . cfName) <> const " = EXCLUDED." <> (pgFmtIdent . cfName) <$> iCols)
) onConflct,
returningF mainQi returnings
])
where
cols = BS.intercalate ", " $ pgFmtIdent . tfName <$> iCols
cols = BS.intercalate ", " $ pgFmtIdent . cfName <$> iCols

-- An update without a limit is always filtered with a WHERE
mutatePlanToQuery (Update mainQi uCols body logicForest range ordts returnings)
Expand Down Expand Up @@ -138,8 +138,8 @@ mutatePlanToQuery (Update mainQi uCols body logicForest range ordts returnings)
whereLogic = if null logicForest then mempty else " WHERE " <> intercalateSnippet " AND " (pgFmtLogicTree mainQi <$> logicForest)
mainTbl = SQL.sql (fromQi mainQi)
emptyBodyReturnedColumns = if null returnings then "NULL" else BS.intercalate ", " (pgFmtColumn (QualifiedIdentifier mempty $ qiName mainQi) <$> returnings)
nonRangeCols = BS.intercalate ", " (pgFmtIdent . tfName <> const " = _." <> pgFmtIdent . tfName <$> uCols)
rangeCols = BS.intercalate ", " ((\col -> pgFmtIdent (tfName col) <> " = (SELECT " <> pgFmtIdent (tfName col) <> " FROM pgrst_update_body) ") <$> uCols)
nonRangeCols = BS.intercalate ", " (pgFmtIdent . cfName <> const " = _." <> pgFmtIdent . cfName <$> uCols)
rangeCols = BS.intercalate ", " ((\col -> pgFmtIdent (cfName col) <> " = (SELECT " <> pgFmtIdent (cfName col) <> " FROM pgrst_update_body) ") <$> uCols)
(whereRangeIdF, rangeIdF) = mutRangeF mainQi (fst . otTerm <$> ordts)

mutatePlanToQuery (Delete mainQi logicForest range ordts returnings)
Expand Down
Loading