From 86fcf7dfcc350085208ce3018a3bfd5d44b144ea Mon Sep 17 00:00:00 2001 From: szareiangm Date: Wed, 27 Jun 2018 20:52:37 -0400 Subject: [PATCH] Schema DDL: separate redshift-specific code from standard SQL (Closes #372) --- .../redshift/ColumnAttribute.scala | 17 +- .../iglu.schemaddl/redshift/DataType.scala | 54 +--- .../redshift/TableAttribute.scala | 5 +- .../redshift/generators/DdlGenerator.scala | 293 ++++++------------ .../generators/EncodeSuggestions.scala | 13 +- .../generators/MigrationGenerator.scala | 79 +---- .../redshift/generators/TypeSuggestions.scala | 189 +---------- .../{redshift => sql}/AlterTable.scala | 5 +- .../{redshift => sql}/Begin.scala | 2 +- .../{redshift => sql}/Column.scala | 2 +- .../iglu.schemaddl/sql/ColumnAttribute.scala | 27 ++ .../{redshift => sql}/ColumnConstraint.scala | 2 +- .../{redshift => sql}/CommentBlock.scala | 2 +- .../{redshift => sql}/CommentOn.scala | 2 +- .../{redshift => sql}/CreateSchema.scala | 2 +- .../{redshift => sql}/CreateTable.scala | 2 +- .../iglu.schemaddl/sql/DataType.scala | 64 ++++ .../{redshift => sql}/Ddl.scala | 2 +- .../{redshift => sql}/Empty.scala | 2 +- .../{redshift => sql}/End.scala | 2 +- .../{redshift => sql}/RefTable.scala | 2 +- .../{redshift => sql}/Statement.scala | 3 +- .../iglu.schemaddl/sql/TableAttribute.scala | 21 ++ .../{redshift => sql}/TableConstraint.scala | 2 +- .../generators/DdlFile.scala | 2 +- .../sql/generators/SqlDdlGenerator.scala | 203 ++++++++++++ .../generators/SqlJsonPathGenerator.scala} | 10 +- .../generators/SqlMigrationGenerator.scala | 92 ++++++ .../sql/generators/SqlTypeSuggestions.scala | 208 +++++++++++++ .../iglu/schemaddl/MigrationSpec.scala | 1 - .../jsonschema/SanityLinterSpec.scala | 2 +- .../redshift/generators/DdlFileSpec.scala | 15 +- .../generators/DdlGeneratorSpec.scala | 12 +- .../generators/MigrationGeneratorSpec.scala | 2 + .../generators/TypeSuggestionsSpec.scala | 19 +- 35 files changed, 783 insertions(+), 577 deletions(-) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/AlterTable.scala (97%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/Begin.scala (96%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/Column.scala (97%) create mode 100644 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/ColumnAttribute.scala rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/ColumnConstraint.scala (96%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/CommentBlock.scala (96%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/CommentOn.scala (95%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/CreateSchema.scala (93%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/CreateTable.scala (97%) create mode 100644 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/DataType.scala rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/Ddl.scala (98%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/Empty.scala (93%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/End.scala (93%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/RefTable.scala (95%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/Statement.scala (96%) create mode 100644 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/TableAttribute.scala rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/TableConstraint.scala (96%) rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift => sql}/generators/DdlFile.scala (98%) create mode 100644 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlDdlGenerator.scala rename 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/{redshift/generators/JsonPathGenerator.scala => sql/generators/SqlJsonPathGenerator.scala} (92%) create mode 100644 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlMigrationGenerator.scala create mode 100644 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlTypeSuggestions.scala diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnAttribute.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnAttribute.scala index b916f597..f1f7d34b 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnAttribute.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnAttribute.scala @@ -10,21 +10,10 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl +package redshift -/** - * column_attributes are: - * [ DEFAULT default_expr ] - * [ IDENTITY ( seed, step ) ] - * [ ENCODE encoding ] - * [ DISTKEY ] - * [ SORTKEY ] - */ -sealed trait ColumnAttribute extends Ddl - -case class Default(value: String) extends ColumnAttribute { - def toDdl = s"DEFAULT $value" -} +import sql.{ColumnAttribute, Ddl} case class Identity(seed: Int, step: Int) extends ColumnAttribute { def toDdl = s"IDENTITY ($seed, $step)" diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/DataType.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/DataType.scala index 8f6a7732..3f694c51 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/DataType.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/DataType.scala @@ -10,63 +10,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl +package redshift + +import sql.DataType /** * Data types * http://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html */ -sealed trait DataType extends Ddl - -case object RedshiftTimestamp extends DataType { - def toDdl = "TIMESTAMP" -} - -case object RedshiftDate extends DataType { - def toDdl = "DATE" -} - -case object RedshiftSmallInt extends DataType { - def toDdl = "SMALLINT" -} - -case object RedshiftInteger extends DataType { - def toDdl = "INT" -} - -case object RedshiftBigInt extends DataType { - def toDdl = "BIGINT" -} - -case object RedshiftReal extends DataType { - def toDdl = "REAL" -} - -case object RedshiftDouble extends DataType { - def toDdl = "DOUBLE PRECISION" -} - -case class RedshiftDecimal(precision: Option[Int], scale: Option[Int]) extends DataType { - def toDdl = (precision, scale) match { - case (Some(p), Some(s)) => s"DECIMAL ($p, $s)" - case _ => "DECIMAL" - } -} - -case object RedshiftBoolean extends DataType { - def toDdl = "BOOLEAN" -} - -case class RedshiftVarchar(size: Int) extends DataType { - def toDdl = s"VARCHAR($size)" -} - -case class RedshiftChar(size: Int) extends DataType { - def toDdl = s"CHAR($size)" -} // CUSTOM - /** * These predefined data types assembles into usual Redshift data types, but * can store additional information such as warnings. diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableAttribute.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableAttribute.scala index 35fb946f..db95fd2c 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableAttribute.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableAttribute.scala @@ -15,13 +15,16 @@ package com.snowplowanalytics.iglu.schemaddl.redshift // Scalaz import scalaz.NonEmptyList +// This project +import com.snowplowanalytics.iglu.schemaddl.sql.Ddl +import com.snowplowanalytics.iglu.schemaddl.sql.TableAttribute + /** * table_attributes are: * [ DISTSTYLE { EVEN | KEY | ALL } ] * [ DISTKEY ( column_name ) ] * [ [COMPOUND | INTERLEAVED ] SORTKEY ( column_name [, ...] ) ] */ -sealed trait TableAttribute extends Ddl sealed trait DiststyleValue extends Ddl case object Even extends DiststyleValue { def toDdl = "EVEN" } diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlGenerator.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlGenerator.scala index 585f5e98..3ae6db67 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlGenerator.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlGenerator.scala @@ -14,111 +14,42 @@ package com.snowplowanalytics.iglu.schemaddl package redshift package generators -// Scalaz -import scalaz._ - // Scala import scala.annotation.tailrec -// Iglu core -import com.snowplowanalytics.iglu.core.SchemaMap +// Scalaz +import scalaz._ // This project +import sql._ import EncodeSuggestions._ -import TypeSuggestions._ +import sql.generators.SqlDdlGenerator +import redshift.generators.EncodeSuggestions.EncodingSuggestion +import sql.generators.SqlTypeSuggestions.DataTypeSuggestion /** * Generates a Redshift DDL File from a Flattened JsonSchema */ -object DdlGenerator { - - /** - * Make a DDL header from the self-describing info - * - * @param schemaMap self-describing info - * @param schemaName optional schema name - * @return SQL comment - */ - def getTableComment(tableName: String, schemaName: Option[String], schemaMap: SchemaMap): CommentOn = { - val schema = schemaName.map(_ + ".").getOrElse("") - CommentOn(schema + tableName, schemaMap.toSchemaUri) - } +object DdlGenerator extends SqlDdlGenerator{ /** - * Make a DDL header from the file name - * - * @param tableName table name - * @param schemaName optional DB schema name - * @param fileName JSON Schema file name - * @return SQL comment - */ - def getTableComment(tableName: String, schemaName: Option[String], fileName: String): CommentOn = { - val schema = schemaName.map(_ + ".").getOrElse("") - CommentOn(schema + tableName, "Source: " + fileName) - } - - /** - * Generates Redshift CreateTable object with all columns, attributes and constraints - * - * @param flatSchema flat schema produced by the Schema flattening process - * @param name table name - * @param dbSchema optional redshift schema name - * @param rawMode do not produce any Snowplow specific columns (like root_id) - * @param size default length for VARCHAR - * @return CreateTable object with all data about table creation - */ - def generateTableDdl( - flatSchema: FlatSchema, - name: String, - dbSchema: Option[String], - size: Int, - rawMode: Boolean = false) - : CreateTable = { - - val columns = getColumnsDdl(flatSchema.elems, flatSchema.required, size) - .toList - .sortBy(c => (-c.columnConstraints.size, c.columnName)) - - if (rawMode) getRawTableDdl(dbSchema, name, columns) - else getAtomicTableDdl(dbSchema, name, columns) - } - - // Columns with data taken from self-describing schema - private[redshift] val selfDescSchemaColumns = List( - Column("schema_vendor", RedshiftVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("schema_name", RedshiftVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("schema_format", RedshiftVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("schema_version", RedshiftVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))) - ) - - // Snowplow-specific columns - private[redshift] val parentageColumns = List( - Column("root_id", RedshiftChar(36), Set(CompressionEncoding(RawEncoding)), Set(Nullability(NotNull))), - Column("root_tstamp", RedshiftTimestamp, Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("ref_root", RedshiftVarchar(255), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("ref_tree", RedshiftVarchar(1500), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), - Column("ref_parent", RedshiftVarchar(255), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))) - ) - - - /** - * Generate DDL for atomic (with Snowplow-specific columns and attributes) table - * - * @param dbSchema optional redshift schema name - * @param name table name - * @param columns list of generated DDLs for columns - * @return full CREATE TABLE statement ready to be rendered - */ - private def getAtomicTableDdl(dbSchema: Option[String], name: String, columns: List[Column]): CreateTable = { + * Generate DDL for atomic (with Snowplow-specific columns and attributes) table + * + * @param dbSchema optional redshift schema name + * @param name table name + * @param columns list of generated DDLs for columns + * @return full CREATE TABLE statement ready to be rendered + */ + protected def getAtomicTableDdl(dbSchema: Option[String], name: String, columns: List[Column]): CreateTable = { val schema = dbSchema.getOrElse("atomic") val fullTableName = schema + "." + name - val tableConstraints = Set[TableConstraint](RedshiftDdlDefaultForeignKey(schema)) + val tableConstraints = Set[TableConstraint](DdlDefaultForeignKey(schema)) val tableAttributes = Set[TableAttribute]( // Snowplow-specific attributes Diststyle(Key), DistKeyTable("root_id"), SortKeyTable(None, NonEmptyList("root_tstamp")) ) - + CreateTable( fullTableName, selfDescSchemaColumns ++ parentageColumns ++ columns, @@ -128,43 +59,68 @@ object DdlGenerator { } /** - * Generate DDL forraw (without Snowplow-specific columns and attributes) table - * - * @param dbSchema optional redshift schema name - * @param name table name - * @param columns list of generated DDLs for columns - * @return full CREATE TABLE statement ready to be rendered - */ - private def getRawTableDdl(dbSchema: Option[String], name: String, columns: List[Column]): CreateTable = { - val fullTableName = dbSchema.map(_ + "." + name).getOrElse(name) - CreateTable(fullTableName, columns) + * Takes each suggestion out of ``compressionEncodingSuggestions`` and + * decide whether current properties satisfy it, then return the compression + * encoding. + * If nothing suggested ZSTD Encoding returned as default + * + * @param properties is a string we need to recognize + * @param dataType redshift data type for current column + * @param columnName to produce warning + * @param suggestions list of functions can recognize encode type + * @return some format or none if nothing suites + */ + @tailrec protected[schemaddl] def getEncoding( + properties: Map[String, String], + dataType: DataType, + columnName: String, + suggestions: List[EncodingSuggestion] = encodingSuggestions) + : CompressionEncoding = { + + suggestions match { + case Nil => CompressionEncoding(ZstdEncoding) // ZSTD is default for user-generated + case suggestion :: tail => suggestion(properties, dataType, columnName) match { + case Some(encoding) => CompressionEncoding(encoding) + case None => getEncoding(properties, dataType, columnName, tail) + } + } } /** - * Get DDL for Foreign Key for specified schema - * - * @param schemaName Redshift's schema - * @return ForeignKey constraint - */ - private def RedshiftDdlDefaultForeignKey(schemaName: String) = { - val reftable = RefTable(schemaName + ".events", Some("event_id")) - ForeignKeyTable(NonEmptyList("root_id"), reftable) + * Takes each suggestion out of ``dataTypeSuggesions`` and decide whether + * current properties satisfy it, then return the data type + * If nothing suggested VARCHAR with ``varcharSize`` returned as default + * + * @param properties is a string we need to recognize + * @param varcharSize default size for unhandled properties and strings + * without any data about length + * @param columnName to produce warning + * @param suggestions list of functions can recognize encode type + * @return some format or none if nothing suites + */ + override final protected[schemaddl] def getDataType( + properties: Map[String, String], + varcharSize: Int, + columnName: String, + suggestions: List[DataTypeSuggestion] = SqlDdlGenerator.dataTypeSuggestions :+ TypeSuggestions.productSuggestion) + : DataType = { + super.getDataType(properties, varcharSize, columnName, suggestions) } /** - * Processes the Map of Data elements pulled from the JsonSchema and - * generates DDL object for it with it's name, constrains, attributes - * data type, etc - * - * @param flatDataElems The Map of Schema keys -> attributes which need to - * be processed - * @param required required fields to decide which columns are nullable - * @return a list of Column DDLs - */ + * Processes the Map of Data elements pulled from the JsonSchema and + * generates DDL object for it with it's name, constrains, attributes + * data type, etc + * + * @param flatDataElems The Map of Schema keys -> attributes which need to + * be processed + * @param required required fields to decide which columns are nullable + * @return a list of Column DDLs + */ private[schemaddl] def getColumnsDdl( - flatDataElems: PropertyList, - required: Set[String], - varcharSize: Int) + flatDataElems: PropertyList, + required: Set[String], + varcharSize: Int) : Iterable[Column] = { // Process each key pair in the map @@ -176,101 +132,28 @@ object DdlGenerator { val constraints = // only "NOT NULL" now if (checkNullability(properties, required.contains(columnName))) Set.empty[ColumnConstraint] else Set[ColumnConstraint](Nullability(NotNull)) - Column(columnName, dataType, columnAttributes = Set(encoding), columnConstraints = constraints) + Column(columnName, dataType, Set(encoding), constraints) } } - // List of data type suggestions - val dataTypeSuggestions: List[DataTypeSuggestion] = List( - complexEnumSuggestion, - productSuggestion, - timestampSuggestion, - dateSuggestion, - arraySuggestion, - integerSuggestion, - numberSuggestion, - booleanSuggestion, - charSuggestion, - uuidSuggestion, - varcharSuggestion + + // Columns with data taken from self-describing schema + protected[schemaddl] val selfDescSchemaColumns = List( + Column("schema_vendor", SqlVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), + Column("schema_name", SqlVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), + Column("schema_format", SqlVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), + Column("schema_version", SqlVarchar(128), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))) ) + // Snowplow-specific columns + protected[schemaddl] val parentageColumns = List( + Column("root_id", SqlChar(36), Set(CompressionEncoding(RawEncoding)), Set(Nullability(NotNull))), + Column("root_tstamp", SqlTimestamp, Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), + Column("ref_root", SqlVarchar(255), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), + Column("ref_tree", SqlVarchar(1500), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))), + Column("ref_parent", SqlVarchar(255), Set(CompressionEncoding(ZstdEncoding)), Set(Nullability(NotNull))) + ) // List of compression encoding suggestions val encodingSuggestions: List[EncodingSuggestion] = List(lzoSuggestion, zstdSuggestion) - - /** - * Takes each suggestion out of ``dataTypeSuggesions`` and decide whether - * current properties satisfy it, then return the data type - * If nothing suggested VARCHAR with ``varcharSize`` returned as default - * - * @param properties is a string we need to recognize - * @param varcharSize default size for unhandled properties and strings - * without any data about length - * @param columnName to produce warning - * @param suggestions list of functions can recognize encode type - * @return some format or none if nothing suites - */ - @tailrec private[schemaddl] def getDataType( - properties: Map[String, String], - varcharSize: Int, - columnName: String, - suggestions: List[DataTypeSuggestion] = dataTypeSuggestions) - : DataType = { - - suggestions match { - case Nil => RedshiftVarchar(varcharSize) // Generic - case suggestion :: tail => suggestion(properties, columnName) match { - case Some(format) => format - case None => getDataType(properties, varcharSize, columnName, tail) - } - } - } - - /** - * Takes each suggestion out of ``compressionEncodingSuggestions`` and - * decide whether current properties satisfy it, then return the compression - * encoding. - * If nothing suggested ZSTD Encoding returned as default - * - * @param properties is a string we need to recognize - * @param dataType redshift data type for current column - * @param columnName to produce warning - * @param suggestions list of functions can recognize encode type - * @return some format or none if nothing suites - */ - @tailrec private[schemaddl] def getEncoding( - properties: Map[String, String], - dataType: DataType, - columnName: String, - suggestions: List[EncodingSuggestion] = encodingSuggestions) - : CompressionEncoding = { - - suggestions match { - case Nil => CompressionEncoding(ZstdEncoding) // ZSTD is default for user-generated - case suggestion :: tail => suggestion(properties, dataType, columnName) match { - case Some(encoding) => CompressionEncoding(encoding) - case None => getEncoding(properties, dataType, columnName, tail) - } - } - } - - /** - * Check whether field can be null. - * Priority of factors: - * - "null" in type - * - null in enum - * - property is in required array - * - * @param properties hash map of JSON Schema properties for primitive type - * @param required whether this field listed in required array - * @return nullable or not - */ - private[schemaddl] def checkNullability(properties: Map[String, String], required: Boolean): Boolean = { - (properties.get("type"), properties.get("enum")) match { - case (Some(types), _) if types.contains("null") => true - case (_, Some(enum)) if enum.split(",").toList.contains("null") => true - case _ => !required - } - } } diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/EncodeSuggestions.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/EncodeSuggestions.scala index d19892ec..d8d41ab9 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/EncodeSuggestions.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/EncodeSuggestions.scala @@ -10,9 +10,12 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl +package redshift package generators +import sql._ + object EncodeSuggestions { /** * Type alias for function suggesting an compression encoding based on map of @@ -23,15 +26,15 @@ object EncodeSuggestions { // Suggest LZO Encoding for boolean, double precision and real val lzoSuggestion: EncodingSuggestion = (properties, dataType, columnName) => dataType match { - case RedshiftBoolean => Some(RunLengthEncoding) - case RedshiftDouble => Some(RawEncoding) - case RedshiftReal => Some(RawEncoding) + case SqlBoolean => Some(RunLengthEncoding) + case SqlDouble => Some(RawEncoding) + case SqlReal => Some(RawEncoding) case _ => None } val zstdSuggestion: EncodingSuggestion = (properties, dataType, columnName) => dataType match { - case RedshiftVarchar(_) => Some(ZstdEncoding) + case SqlVarchar(_) => Some(ZstdEncoding) case _ => None } } diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/MigrationGenerator.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/MigrationGenerator.scala index f2499af2..a357b7f5 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/MigrationGenerator.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/MigrationGenerator.scala @@ -10,93 +10,26 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql package generators -// Iglu Core -import com.snowplowanalytics.iglu.core._ - // This library -import com.snowplowanalytics.iglu.schemaddl.Migration import com.snowplowanalytics.iglu.schemaddl.StringUtils._ - -// This library -import DdlGenerator._ - +import com.snowplowanalytics.iglu.schemaddl.redshift.generators.DdlGenerator /** * Module containing all logic to generate DDL files with information required * to migration from one version of Schema to another */ -object MigrationGenerator { - - /** - * Generate full ready to be rendered DDL file containing all migration - * statements and additional data like previous version of table - * - * @param migration common JSON Schema migration object with - * path (from-to) and diff - * @param varcharSize size VARCHARs by default - * @param tableSchema DB schema for table (atomic by default) - * @return DDL file containing list of statements ready to be printed - */ - def generateMigration( - migration: Migration, - varcharSize: Int = 4096, - tableSchema: Option[String] = Some("atomic")) - : DdlFile = { - - val schemaKey = SchemaMap(migration.vendor, migration.name, "jsonschema", migration.to) - val oldSchemaUri = SchemaMap(migration.vendor, migration.name, "jsonschema", migration.from).toSchemaUri - val tableName = getTableName(schemaKey) // e.g. com_acme_event_1 - val tableNameFull = tableSchema.map(_ + ".").getOrElse("") + tableName // e.g. atomic.com_acme_event_1 - - val transaction = - if (migration.diff.added.nonEmpty) { - migration.diff.added.map(buildAlterTable(tableNameFull, varcharSize)) - } else { - List(CommentBlock("NO ADDED COLUMNS CAN BE EXPRESSED IN SQL MIGRATION", 3)) - } - - val header = getHeader(tableName, oldSchemaUri) - val comment = CommentOn(tableNameFull, schemaKey.toSchemaUri) - DdlFile(List(header, Empty, Begin(None, None), Empty) ++ transaction :+ Empty :+ comment :+ Empty :+ End) - } - - /** - * Generate comment block for for migration file with information about - * previous version of table - * - * @param tableName name of migrating table - * @param oldSchemaUri Schema URI extracted from internal database store - * @return DDL statement with header - */ - def getHeader(tableName: String, oldSchemaUri: String): CommentBlock = - CommentBlock(Vector( - "WARNING: only apply this file to your database if the following SQL returns the expected:", - "", - s"SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = '$tableName';", - " obj_description", - "-----------------", - s" $oldSchemaUri", - " (1 row)")) +object MigrationGenerator extends SqlMigrationGenerator { - /** - * Generate single ALTER TABLE statement for some new property - * - * @param tableName name of migrating table - * @param varcharSize default size for VARCHAR - * @param pair pair of property name and its Schema properties like - * length, maximum, etc - * @return DDL statement altering single column in table - */ def buildAlterTable(tableName: String, varcharSize: Int) (pair: (String, Map[String, String])): AlterTable = pair match { case (columnName, properties) => - val dataType = getDataType(properties, varcharSize, columnName) - val encoding = getEncoding(properties, dataType, columnName) + val dataType = DdlGenerator.getDataType(properties, varcharSize, columnName) + val encoding = DdlGenerator.getEncoding(properties, dataType, columnName) val nullable = - if (checkNullability(properties, required = false)) None + if (DdlGenerator.checkNullability(properties, required = false)) None else Some(Nullability(NotNull)) AlterTable(tableName, AddColumn(toSnakeCase(columnName), dataType, None, Some(encoding), nullable)) } diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/TypeSuggestions.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/TypeSuggestions.scala index 253bb77c..af3878a6 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/TypeSuggestions.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/TypeSuggestions.scala @@ -13,200 +13,19 @@ package com.snowplowanalytics.iglu.schemaddl.redshift package generators -// Scalaz -import scalaz._ -import Scalaz._ - -// This project -import com.snowplowanalytics.iglu.schemaddl.StringUtils._ +import com.snowplowanalytics.iglu.schemaddl.sql._ +import generators.SqlTypeSuggestions +import generators.SqlTypeSuggestions.DataTypeSuggestion /** * Module containing functions for data type suggestions */ object TypeSuggestions { - /** - * Type alias for function suggesting an encode type based on map of - * JSON Schema properties - */ - type DataTypeSuggestion = (Map[String, String], String) => Option[DataType] - - // For complex enums Suggest VARCHAR with length of longest element - val complexEnumSuggestion: DataTypeSuggestion = (properties, columnName) => - properties.get("enum") match { - case Some(enums) if isComplexEnum(enums) => - val longest = excludeNull(enums).map(_.length).max - Some(RedshiftVarchar(longest)) - case _ => None - } - // Suggest VARCHAR(4096) for all product types. Should be in the beginning val productSuggestion: DataTypeSuggestion = (properties, columnName) => properties.get("type") match { - case (Some(types)) if excludeNull(types).size > 1 => + case (Some(types)) if SqlTypeSuggestions.excludeNull(types).size > 1 => Some(ProductType(List(s"Product type $types encountered in $columnName"))) case _ => None } - - val timestampSuggestion: DataTypeSuggestion = (properties, columnName) => - (properties.get("type"), properties.get("format")) match { - case (Some(types), Some("date-time")) if types.contains("string") => - Some(RedshiftTimestamp) - case _ => None - } - - val dateSuggestion: DataTypeSuggestion = (properties, columnName) => - (properties.get("type"), properties.get("format")) match { - case(Some(types), Some("date")) if types.contains("string") => - Some(RedshiftDate) - case _ => None - } - - val arraySuggestion: DataTypeSuggestion = (properties, columnName) => - properties.get("type") match { - case Some(types) if types.contains("array") => - Some(RedshiftVarchar(5000)) - case _ => None - } - - val numberSuggestion: DataTypeSuggestion = (properties, columnName) => - (properties.get("type"), properties.get("multipleOf")) match { - case (Some(types), Some(multipleOf)) if types.contains("number") && multipleOf == "0.01" => - Some(RedshiftDecimal(Some(36), Some(2))) - case (Some(types), _) if types.contains("number") => - Some(RedshiftDouble) - case _ => None - } - - val integerSuggestion: DataTypeSuggestion = (properties, columnName) => { - (properties.get("type"), properties.get("maximum"), properties.get("enum"), properties.get("multipleOf")) match { - case (Some(types), Some(maximum), _, _) if excludeNull(types) == Set("integer") => - getIntSize(maximum) - // Contains only enum - case (types, _, Some(enum), _) if (types.isEmpty || excludeNull(types.get) == Set("integer")) && isIntegerList(enum) => - val max = enum.split(",").toList.map(el => try Some(el.toLong) catch { case e: NumberFormatException => None } ) - val maxLong = max.sequence.getOrElse(Nil).maximum - maxLong.flatMap(m => getIntSize(m)) // This will short-circuit integer suggestions on any non-integer enum - case (Some(types), _, _, _) if excludeNull(types) == Set("integer") => - Some(RedshiftBigInt) - case (Some(types), max, _, Some(multipleOf)) if types.contains("number") && multipleOf == "1" => - max.flatMap(m => getIntSize(m)).orElse(Some(RedshiftInteger)) - case _ => None - } - } - - val charSuggestion: DataTypeSuggestion = (properties, columnName) => { - (properties.get("type"), properties.get("minLength"), properties.get("maxLength")) match { - case (Some(types), Some(IntegerAsString(minLength)), Some(IntegerAsString(maxLength))) - if minLength == maxLength && excludeNull(types) == Set("string") => - Some(RedshiftChar(maxLength)) - case _ => None - } - } - - val booleanSuggestion: DataTypeSuggestion = (properties, columnName) => { - properties.get("type") match { - case Some(types) if excludeNull(types) == Set("boolean") => Some(RedshiftBoolean) - case _ => None - } - } - - val uuidSuggestion: DataTypeSuggestion = (properties, columnName) => { - (properties.get("type"), properties.get("format")) match { - case (Some(types), Some("uuid")) if types.contains("string") => - Some(RedshiftChar(36)) - case _ => None - } - } - - val varcharSuggestion: DataTypeSuggestion = (properties, columnName) => { - (properties.get("type"), properties.get("maxLength"), properties.get("enum"), properties.get("format")) match { - case (Some(types), _, _, Some("ipv6")) if types.contains("string") => - Some(RedshiftVarchar(39)) - case (Some(types), _, _, Some("ipv4")) if types.contains("string") => - Some(RedshiftVarchar(15)) - case (Some(types), _, _, Some("email")) if types.contains("string") => - Some(RedshiftVarchar(255)) - case (Some(types), Some(IntegerAsString(maxLength)), _, _) if types.contains("string") => - Some(RedshiftVarchar(maxLength)) - case (_, _, Some(enum), _) => { - val enumItems = enum.split(",") - val maxLength = enumItems.toList.reduceLeft((a, b) => if (a.length > b.length) a else b).length - if (enumItems.length == 1) { - Some(RedshiftChar(maxLength)) - } else { - Some(RedshiftVarchar(maxLength)) - } - } - case _ => None - } - } - - /** - * Get set of types or enum as string excluding null - * - * @param types comma-separated types - * @return set of strings - */ - private def excludeNull(types: String): Set[String] = types.split(",").toSet - "null" - - /** - * Helper function to get size of Integer - * - * @param max upper bound extracted from properties as string - * @return Long representing biggest possible value or None if it's not Int - */ - private def getIntSize(max: => String): Option[DataType] = - try { - val maxLong = max.toLong - getIntSize(maxLong) - } catch { - case e: NumberFormatException => None - } - - /** - * Helper function to get size of Integer - * - * @param max upper bound - * @return Long representing biggest possible value or None if it's not Int - */ - private def getIntSize(max: Long): Option[DataType] = - if (max <= Short.MaxValue) Some(RedshiftSmallInt) - else if (max <= Int.MaxValue) Some(RedshiftInteger) - else if (max <= Long.MaxValue) Some(RedshiftBigInt) - else None - - /** - * Check enum contains some different types - * (string and number or number and boolean) - */ - private def isComplexEnum(enum: String) = { - // Predicates - def isNumeric(s: String) = try { - s.toDouble - true - } catch { - case e: NumberFormatException => false - } - def isNonNumeric(s: String) = !isNumeric(s) - def isBoolean(s: String) = s == "true" || s == "false" - - val nonNullEnum = excludeNull(enum) - somePredicates(nonNullEnum, List(isNumeric _, isNonNumeric _, isBoolean _), 2) - } - - /** - * Check at least some `quantity` of `predicates` are true on `instances` - * - * @param instances list of instances to check on - * @param predicates list of predicates to check - * @param quantity required quantity - */ - private def somePredicates(instances: Set[String], predicates: List[String => Boolean], quantity: Int): Boolean = { - if (quantity == 0) true - else predicates match { - case Nil => false - case h :: tail if instances.exists(h) => somePredicates(instances, tail, quantity - 1) - case _ :: tail => somePredicates(instances, tail, quantity) - } - } } diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/AlterTable.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/AlterTable.scala similarity index 97% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/AlterTable.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/AlterTable.scala index 58701471..c4688a46 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/AlterTable.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/AlterTable.scala @@ -10,10 +10,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql /** * Class holding data to alter some table with single [[AlterTableStatement]] + * * @see http://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE.html * * ALTER TABLE table_name @@ -78,7 +79,7 @@ case class AddColumn( columnName: String, columnType: DataType, default: Option[Default], - encode: Option[CompressionEncoding], + encode: Option[ColumnAttribute], nullability: Option[Nullability] ) extends AlterTableStatement { def toDdl = { diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Begin.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Begin.scala similarity index 96% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Begin.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Begin.scala index ccd09ea0..864b4e2b 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Begin.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Begin.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql case class Begin(isolationLevel: Option[IsolationLevel.type], permission: Option[Permission]) extends Statement { def toDdl = { diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Column.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Column.scala similarity index 97% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Column.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Column.scala index a5f1594c..f2f56706 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Column.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Column.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql /** * Class holding all information about Redshift's column diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/ColumnAttribute.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/ColumnAttribute.scala new file mode 100644 index 00000000..372ad0ce --- /dev/null +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/ColumnAttribute.scala @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2012-2016 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.iglu.schemaddl.sql + +/** + * column_attributes are: + * [ DEFAULT default_expr ] + * [ IDENTITY ( seed, step ) ] + * [ ENCODE encoding ] + * [ DISTKEY ] + * [ SORTKEY ] + */ +trait ColumnAttribute extends Ddl + +case class Default(value: String) extends ColumnAttribute { + def toDdl = s"DEFAULT $value" +} \ No newline at end of file diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnConstraint.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/ColumnConstraint.scala similarity index 96% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnConstraint.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/ColumnConstraint.scala index 7080e038..92201930 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/ColumnConstraint.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/ColumnConstraint.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql /** * column_constraints are: diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentBlock.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CommentBlock.scala similarity index 96% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentBlock.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CommentBlock.scala index b657a0b4..bb5bde90 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentBlock.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CommentBlock.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql /** * Class representing comment block in Ddl file diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentOn.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CommentOn.scala similarity index 95% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentOn.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CommentOn.scala index 64dadd1b..89c31ae9 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CommentOn.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CommentOn.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql /** * COMMENT ON diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateSchema.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CreateSchema.scala similarity index 93% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateSchema.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CreateSchema.scala index bdc05a4a..dda5acf9 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateSchema.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CreateSchema.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql case class CreateSchema(schemaName: String) extends Statement { def toDdl = s"CREATE SCHEMA IF NOT EXISTS $schemaName" diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateTable.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CreateTable.scala similarity index 97% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateTable.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CreateTable.scala index de69b7d6..d046e437 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/CreateTable.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/CreateTable.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql /** * Class holding all information about Redshift's table diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/DataType.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/DataType.scala new file mode 100644 index 00000000..f861a515 --- /dev/null +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/DataType.scala @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014-2016 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.iglu.schemaddl +package sql + +trait DataType extends Ddl + +case object SqlTimestamp extends DataType { + def toDdl = "TIMESTAMP" +} + +case object SqlDate extends DataType { + def toDdl = "DATE" +} + +case object SqlSmallInt extends DataType { + def toDdl = "SMALLINT" +} + +case object SqlInteger extends DataType { + def toDdl = "INT" +} + +case object SqlBigInt extends DataType { + def toDdl = "BIGINT" +} + +case object SqlReal extends DataType { + def toDdl = "REAL" +} + +case object SqlDouble extends DataType { + def toDdl = "DOUBLE PRECISION" +} + +case class SqlDecimal(precision: Option[Int], scale: Option[Int]) extends DataType { + def toDdl = (precision, scale) match { + case (Some(p), Some(s)) => s"DECIMAL ($p, $s)" + case _ => "DECIMAL" + } +} + +case object SqlBoolean extends DataType { + def toDdl = "BOOLEAN" +} + +case class SqlVarchar(size: Int) extends DataType { + def toDdl = s"VARCHAR($size)" +} + +case class SqlChar(size: Int) extends DataType { + def toDdl = s"CHAR($size)" +} + diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Ddl.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Ddl.scala similarity index 98% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Ddl.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Ddl.scala index b5c16851..f226f1ff 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Ddl.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Ddl.scala @@ -11,7 +11,7 @@ * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ package com.snowplowanalytics.iglu.schemaddl -package redshift +package sql /** * Base class for everything that can be represented as Redshift DDL diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Empty.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Empty.scala similarity index 93% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Empty.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Empty.scala index bc81694c..833c475e 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Empty.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Empty.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql case object Empty extends Statement { override val separator = "" diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/End.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/End.scala similarity index 93% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/End.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/End.scala index 50baa2a7..d5b472c8 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/End.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/End.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql case object End extends Statement { def toDdl = "END TRANSACTION" diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/RefTable.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/RefTable.scala similarity index 95% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/RefTable.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/RefTable.scala index 170f79dc..67734b19 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/RefTable.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/RefTable.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql /** * Reference table. Used in foreign key and table constraint diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Statement.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Statement.scala similarity index 96% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Statement.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Statement.scala index 6568e0d4..472fac65 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/Statement.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/Statement.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql /** * Trait for *independent* SQL DDL statements. @@ -18,6 +18,7 @@ package com.snowplowanalytics.iglu.schemaddl.redshift * commands and be content of file. * We're always using semicolon in the end of statements */ + trait Statement extends Ddl with Product with Serializable { /** * Symbol used to separate statement from other. diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/TableAttribute.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/TableAttribute.scala new file mode 100644 index 00000000..40bf3428 --- /dev/null +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/TableAttribute.scala @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2014-2016 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.iglu.schemaddl.sql + +/** + * table_attributes are: + * [ DISTSTYLE { EVEN | KEY | ALL } ] + * [ DISTKEY ( column_name ) ] + * [ [COMPOUND | INTERLEAVED ] SORTKEY ( column_name [, ...] ) ] + */ +trait TableAttribute extends Ddl diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableConstraint.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/TableConstraint.scala similarity index 96% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableConstraint.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/TableConstraint.scala index 344ed988..9d5c56d0 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/TableConstraint.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/TableConstraint.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql // Scalaz import scalaz.NonEmptyList diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlFile.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/DdlFile.scala similarity index 98% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlFile.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/DdlFile.scala index 7be0acd7..a1c03cd2 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/DdlFile.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/DdlFile.scala @@ -10,7 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql package generators /** diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlDdlGenerator.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlDdlGenerator.scala new file mode 100644 index 00000000..73518b6f --- /dev/null +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlDdlGenerator.scala @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2014-2016 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.iglu.schemaddl +package sql +package generators + +// Scalaz +import scalaz._ + +// Iglu core +import com.snowplowanalytics.iglu.core.SchemaMap + +// This project +import sql.generators.SqlTypeSuggestions.DataTypeSuggestion + + +/** + * Generates a Redshift DDL File from a Flattened JsonSchema + */ +abstract class SqlDdlGenerator { + /** + * Make a DDL header from the self-describing info + * + * @param schemaMap self-describing info + * @param schemaName optional schema name + * @return SQL comment + */ + def getTableComment(tableName: String, schemaName: Option[String], schemaMap: SchemaMap): CommentOn = { + val schema = schemaName.map(_ + ".").getOrElse("") + CommentOn(schema + tableName, schemaMap.toSchemaUri) + } + + /** + * Make a DDL header from the file name + * + * @param tableName table name + * @param schemaName optional DB schema name + * @param fileName JSON Schema file name + * @return SQL comment + */ + def getTableComment(tableName: String, schemaName: Option[String], fileName: String): CommentOn = { + val schema = schemaName.map(_ + ".").getOrElse("") + CommentOn(schema + tableName, "Source: " + fileName) + } + + /** + * Generates Redshift CreateTable object with all columns, attributes and constraints + * + * @param flatSchema flat schema produced by the Schema flattening process + * @param name table name + * @param dbSchema optional redshift schema name + * @param rawMode do not produce any Snowplow specific columns (like root_id) + * @param size default length for VARCHAR + * @return CreateTable object with all data about table creation + */ + def generateTableDdl( + flatSchema: FlatSchema, + name: String, + dbSchema: Option[String], + size: Int, + rawMode: Boolean = false) + : CreateTable = { + + val columns = getColumnsDdl(flatSchema.elems, flatSchema.required, size) + .toList + .sortBy(c => (-c.columnConstraints.size, c.columnName)) + + if (rawMode) getRawTableDdl(dbSchema, name, columns) + else getAtomicTableDdl(dbSchema, name, columns) + } + + /** + * Generate DDL for atomic (with Snowplow-specific columns and attributes) table + * + * @param dbSchema optional redshift schema name + * @param name table name + * @param columns list of generated DDLs for columns + * @return full CREATE TABLE statement ready to be rendered + */ + protected def getAtomicTableDdl(dbSchema: Option[String], name: String, columns: List[Column]): CreateTable + + /** + * Generate DDL forraw (without Snowplow-specific columns and attributes) table + * + * @param dbSchema optional redshift schema name + * @param name table name + * @param columns list of generated DDLs for columns + * @return full CREATE TABLE statement ready to be rendered + */ + private def getRawTableDdl(dbSchema: Option[String], name: String, columns: List[Column]): CreateTable = { + val fullTableName = dbSchema.map(_ + "." + name).getOrElse(name) + CreateTable(fullTableName, columns) + } + + /** + * Get DDL for Foreign Key for specified schema + * + * @param schemaName Redshift's schema + * @return ForeignKey constraint + */ + protected def DdlDefaultForeignKey(schemaName: String) = { + val reftable = RefTable(schemaName + ".events", Some("event_id")) + ForeignKeyTable(NonEmptyList("root_id"), reftable) + } + + /** + * Takes each suggestion out of ``dataTypeSuggesions`` and decide whether + * current properties satisfy it, then return the data type + * If nothing suggested VARCHAR with ``varcharSize`` returned as default + * + * @param properties is a string we need to recognize + * @param varcharSize default size for unhandled properties and strings + * without any data about length + * @param columnName to produce warning + * @param suggestions list of functions can recognize encode type + * @return some format or none if nothing suites + */ + protected def getDataType( + properties: Map[String, String], + varcharSize: Int, + columnName: String, + suggestions: List[DataTypeSuggestion] = SqlDdlGenerator.dataTypeSuggestions) + : DataType = { + + suggestions match { + case Nil => SqlVarchar(varcharSize) // Generic + case suggestion :: tail => suggestion(properties, columnName) match { + case Some(format) => format + case None => getDataType(properties, varcharSize, columnName, tail) + } + } + } + + /** + * Check whether field can be null. + * Priority of factors: + * - "null" in type + * - null in enum + * - property is in required array + * + * @param properties hash map of JSON Schema properties for primitive type + * @param required whether this field listed in required array + * @return nullable or not + */ + private[schemaddl] def checkNullability(properties: Map[String, String], required: Boolean): Boolean = { + (properties.get("type"), properties.get("enum")) match { + case (Some(types), _) if types.contains("null") => true + case (_, Some(enum)) if enum.split(",").toList.contains("null") => true + case _ => !required + } + } + + private[schemaddl] def getColumnsDdl( + flatDataElems: PropertyList, + required: Set[String], + varcharSize: Int + ): Iterable[Column] + +} + +object SqlDdlGenerator { + // Columns with data taken from self-describing schema + val selfDescSchemaColumns = List( + Column("schema_vendor", SqlVarchar(128), Set(), Set(Nullability(NotNull))), + Column("schema_name", SqlVarchar(128), Set(), Set(Nullability(NotNull))), + Column("schema_format", SqlVarchar(128), Set(), Set(Nullability(NotNull))), + Column("schema_version", SqlVarchar(128), Set(), Set(Nullability(NotNull))) + ) + + // Snowplow-specific columns + val parentageColumns = List( + Column("root_id", SqlChar(36), Set(), Set(Nullability(NotNull))), + Column("root_tstamp", SqlTimestamp, Set(), Set(Nullability(NotNull))), + Column("ref_root", SqlVarchar(255), Set(), Set(Nullability(NotNull))), + Column("ref_tree", SqlVarchar(1500), Set(), Set(Nullability(NotNull))), + Column("ref_parent", SqlVarchar(255), Set(), Set(Nullability(NotNull))) + ) + + import SqlTypeSuggestions._ + // List of data type suggestions + lazy val dataTypeSuggestions: List[DataTypeSuggestion] = List( + complexEnumSuggestion, + timestampSuggestion, + dateSuggestion, + arraySuggestion, + integerSuggestion, + numberSuggestion, + booleanSuggestion, + charSuggestion, + uuidSuggestion, + varcharSuggestion + ) +} diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/JsonPathGenerator.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlJsonPathGenerator.scala similarity index 92% rename from 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/JsonPathGenerator.scala rename to 0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlJsonPathGenerator.scala index 37a9bffc..aa6a4942 100644 --- a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/redshift/generators/JsonPathGenerator.scala +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlJsonPathGenerator.scala @@ -10,16 +10,14 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.iglu.schemaddl.redshift +package com.snowplowanalytics.iglu.schemaddl.sql package generators -// This project -import DdlGenerator._ /** * Converts lists of keys into a JsonPath file. */ -object JsonPathGenerator { +trait SqlJsonPathGenerator { private object JsonPathPrefix { val Schema = "$.schema." @@ -65,8 +63,8 @@ object JsonPathGenerator { val columnNames: List[String] = if (rawMode) { columns.map(JsonPathPrefix.Data + _.columnName) } // everything is data in raw mode else { // add schema and hierarchy otherwise - val dataColumns = columns.filterNot(selfDescSchemaColumns.contains(_)) - .filterNot(parentageColumns.contains(_)) + val dataColumns = columns.filterNot(SqlDdlGenerator.selfDescSchemaColumns.contains(_)) + .filterNot(SqlDdlGenerator.parentageColumns.contains(_)) .map(_.columnName) val schemaFieldList = JsonPathSchemaFields.map(JsonPathPrefix.Schema + _) diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlMigrationGenerator.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlMigrationGenerator.scala new file mode 100644 index 00000000..95b79836 --- /dev/null +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlMigrationGenerator.scala @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2014-2016 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.iglu.schemaddl.sql +package generators + +// Iglu Core +import com.snowplowanalytics.iglu.core._ + +// This library +import com.snowplowanalytics.iglu.schemaddl.Migration +import com.snowplowanalytics.iglu.schemaddl.StringUtils._ + + +/** + * Module containing all logic to generate DDL files with information required + * to migration from one version of Schema to another + */ +trait SqlMigrationGenerator { + + /** + * Generate full ready to be rendered DDL file containing all migration + * statements and additional data like previous version of table + * + * @param migration common JSON Schema migration object with + * path (from-to) and diff + * @param varcharSize size VARCHARs by default + * @param tableSchema DB schema for table (atomic by default) + * @return DDL file containing list of statements ready to be printed + */ + def generateMigration( + migration: Migration, + varcharSize: Int = 4096, + tableSchema: Option[String] = Some("atomic")) + : DdlFile = { + + val schemaKey = SchemaMap(migration.vendor, migration.name, "jsonschema", migration.to) + val oldSchemaUri = SchemaMap(migration.vendor, migration.name, "jsonschema", migration.from).toSchemaUri + val tableName = getTableName(schemaKey) // e.g. com_acme_event_1 + val tableNameFull = tableSchema.map(_ + ".").getOrElse("") + tableName // e.g. atomic.com_acme_event_1 + + val transaction = + if (migration.diff.added.nonEmpty) { + migration.diff.added.map(buildAlterTable(tableNameFull, varcharSize)) + } else { + List(CommentBlock("NO ADDED COLUMNS CAN BE EXPRESSED IN SQL MIGRATION", 3)) + } + + val header = getHeader(tableName, oldSchemaUri) + val comment = CommentOn(tableNameFull, schemaKey.toSchemaUri) + DdlFile(List(header, Empty, Begin(None, None), Empty) ++ transaction :+ Empty :+ comment :+ Empty :+ End) + } + + /** + * Generate comment block for for migration file with information about + * previous version of table + * + * @param tableName name of migrating table + * @param oldSchemaUri Schema URI extracted from internal database store + * @return DDL statement with header + */ + def getHeader(tableName: String, oldSchemaUri: String): CommentBlock = + CommentBlock(Vector( + "WARNING: only apply this file to your database if the following SQL returns the expected:", + "", + s"SELECT pg_catalog.obj_description(c.oid) FROM pg_catalog.pg_class c WHERE c.relname = '$tableName';", + " obj_description", + "-----------------", + s" $oldSchemaUri", + " (1 row)")) + + /** + * Generate single ALTER TABLE statement for some new property + * + * @param tableName name of migrating table + * @param varcharSize default size for VARCHAR + * @param pair pair of property name and its Schema properties like + * length, maximum, etc + * @return DDL statement altering single column in table + */ + def buildAlterTable(tableName: String, varcharSize: Int) + (pair: (String, Map[String, String])): AlterTable +} \ No newline at end of file diff --git a/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlTypeSuggestions.scala b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlTypeSuggestions.scala new file mode 100644 index 00000000..6d5b44e6 --- /dev/null +++ b/0-common/schema-ddl/src/main/scala/com.snowplowanalytics/iglu.schemaddl/sql/generators/SqlTypeSuggestions.scala @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2014-2016 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.iglu.schemaddl +package sql +package generators + +// Scalaz +import scalaz._ +import Scalaz._ + +// This project +import StringUtils._ + +/** + * Object containing functions for data type suggestions + */ +object SqlTypeSuggestions { + /** + * Type alias for function suggesting an encode type based on map of + * JSON Schema properties + */ + type DataTypeSuggestion = (Map[String, String], String) => Option[DataType] + + // For complex enums Suggest VARCHAR with length of longest element + val complexEnumSuggestion: DataTypeSuggestion = (properties, columnName) => + properties.get("enum") match { + case Some(enums) if isComplexEnum(enums) => + val longest = excludeNull(enums).map(_.length).max + Some(SqlVarchar(longest)) + case _ => None + } + + val timestampSuggestion: DataTypeSuggestion = (properties, columnName) => + (properties.get("type"), properties.get("format")) match { + case (Some(types), Some("date-time")) if types.contains("string") => + Some(SqlTimestamp) + case _ => None + } + + val dateSuggestion: DataTypeSuggestion = (properties, columnName) => + (properties.get("type"), properties.get("format")) match { + case (Some(types), Some("date")) if types.contains("string") => + Some(SqlDate) + case _ => None + } + + val arraySuggestion: DataTypeSuggestion = (properties, columnName) => + properties.get("type") match { + case Some(types) if types.contains("array") => + Some(SqlVarchar(5000)) + case _ => None + } + + val numberSuggestion: DataTypeSuggestion = (properties, columnName) => + (properties.get("type"), properties.get("multipleOf")) match { + case (Some(types), Some(multipleOf)) if types.contains("number") && multipleOf == "0.01" => + Some(SqlDecimal(Some(36), Some(2))) + case (Some(types), _) if types.contains("number") => + Some(SqlDouble) + case _ => None + } + + val integerSuggestion: DataTypeSuggestion = (properties, columnName) => { + (properties.get("type"), properties.get("maximum"), properties.get("enum"), properties.get("multipleOf")) match { + case (Some(types), Some(maximum), _, _) if excludeNull(types) == Set("integer") => + getIntSize(maximum) + // Contains only enum + case (types, _, Some(enum), _) if (types.isEmpty || excludeNull(types.get) == Set("integer")) && isIntegerList(enum) => + val max = enum.split(",").toList.map(el => try Some(el.toLong) catch { + case e: NumberFormatException => None + }) + val maxLong = max.sequence.getOrElse(Nil).maximum + maxLong.flatMap(m => getIntSize(m)) // This will short-circuit integer suggestions on any non-integer enum + case (Some(types), _, _, _) if excludeNull(types) == Set("integer") => + Some(SqlBigInt) + case (Some(types), max, _, Some(multipleOf)) if types.contains("number") && multipleOf == "1" => + max.flatMap(m => getIntSize(m)).orElse(Some(SqlInteger)) + case _ => None + } + } + + val charSuggestion: DataTypeSuggestion = (properties, columnName) => { + (properties.get("type"), properties.get("minLength"), properties.get("maxLength")) match { + case (Some(types), Some(IntegerAsString(minLength)), Some(IntegerAsString(maxLength))) + if minLength == maxLength && excludeNull(types) == Set("string") => + Some(SqlChar(maxLength)) + case _ => None + } + } + + val booleanSuggestion: DataTypeSuggestion = (properties, columnName) => { + properties.get("type") match { + case Some(types) if excludeNull(types) == Set("boolean") => Some(SqlBoolean) + case _ => None + } + } + + val uuidSuggestion: DataTypeSuggestion = (properties, columnName) => { + (properties.get("type"), properties.get("format")) match { + case (Some(types), Some("uuid")) if types.contains("string") => + Some(SqlChar(36)) + case _ => None + } + } + + val varcharSuggestion: DataTypeSuggestion = (properties, columnName) => { + (properties.get("type"), properties.get("maxLength"), properties.get("enum"), properties.get("format")) match { + case (Some(types), _, _, Some("ipv6")) if types.contains("string") => + Some(SqlVarchar(39)) + case (Some(types), _, _, Some("ipv4")) if types.contains("string") => + Some(SqlVarchar(15)) + case (Some(types), _, _, Some("email")) if types.contains("string") => + Some(SqlVarchar(255)) + case (Some(types), Some(IntegerAsString(maxLength)), _, _) if types.contains("string") => + Some(SqlVarchar(maxLength)) + case (_, _, Some(enum), _) => { + val enumItems = enum.split(",") + val maxLength = enumItems.toList.reduceLeft((a, b) => if (a.length > b.length) a else b).length + if (enumItems.length == 1) { + Some(SqlChar(maxLength)) + } else { + Some(SqlVarchar(maxLength)) + } + } + case _ => None + } + } + + /** + * Get set of types or enum as string excluding null + * + * @param types comma-separated types + * @return set of strings + */ + private[schemaddl] def excludeNull(types: String): Set[String] = types.split(",").toSet - "null" + + /** + * Helper function to get size of Integer + * + * @param max upper bound extracted from properties as string + * @return Long representing biggest possible value or None if it's not Int + */ + private def getIntSize(max: => String): Option[DataType] = + try { + val maxLong = max.toLong + getIntSize(maxLong) + } catch { + case e: NumberFormatException => None + } + + /** + * Helper function to get size of Integer + * + * @param max upper bound + * @return Long representing biggest possible value or None if it's not Int + */ + private def getIntSize(max: Long): Option[DataType] = + if (max <= Short.MaxValue) Some(SqlSmallInt) + else if (max <= Int.MaxValue) Some(SqlInteger) + else if (max <= Long.MaxValue) Some(SqlBigInt) + else None + + /** + * Check enum contains some different types + * (string and number or number and boolean) + */ + private def isComplexEnum(enum: String) = { + // Predicates + def isNumeric(s: String) = try { + s.toDouble + true + } catch { + case e: NumberFormatException => false + } + def isNonNumeric(s: String) = !isNumeric(s) + def isBoolean(s: String) = s == "true" || s == "false" + + val nonNullEnum = excludeNull(enum) + somePredicates(nonNullEnum, List(isNumeric _, isNonNumeric _, isBoolean _), 2) + } + + /** + * Check at least some `quantity` of `predicates` are true on `instances` + * + * @param instances list of instances to check on + * @param predicates list of predicates to check + * @param quantity required quantity + */ + private def somePredicates(instances: Set[String], predicates: List[String => Boolean], quantity: Int): Boolean = { + if (quantity == 0) true + else predicates match { + case Nil => false + case h :: tail if instances.exists(h) => somePredicates(instances, tail, quantity - 1) + case _ :: tail => somePredicates(instances, tail, quantity) + } + } +} + diff --git a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/MigrationSpec.scala b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/MigrationSpec.scala index d4eee8b2..b07799a3 100644 --- a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/MigrationSpec.scala +++ b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/MigrationSpec.scala @@ -17,7 +17,6 @@ import scala.collection.immutable.ListMap // scalaz import scalaz._ -import Scalaz._ // Iglu Core import com.snowplowanalytics.iglu.core.{ SchemaMap, SchemaVer } diff --git a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/jsonschema/SanityLinterSpec.scala b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/jsonschema/SanityLinterSpec.scala index e37a5244..4acccb7d 100644 --- a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/jsonschema/SanityLinterSpec.scala +++ b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/jsonschema/SanityLinterSpec.scala @@ -12,7 +12,7 @@ */ package com.snowplowanalytics.iglu.schemaddl.jsonschema -import scalaz.{Failure, Success, NonEmptyList} +import scalaz.{Failure, NonEmptyList} // json4s import org.json4s._ diff --git a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlFileSpec.scala b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlFileSpec.scala index 89a6c514..761d5262 100644 --- a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlFileSpec.scala +++ b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlFileSpec.scala @@ -16,6 +16,9 @@ package generators // specs2 import org.specs2.Specification +import com.snowplowanalytics.iglu.schemaddl.sql._ +import com.snowplowanalytics.iglu.schemaddl.sql.generators.DdlFile + class DdlFileSpec extends Specification { def is = s2""" Check DDL File specification render correct table definition $e1 @@ -32,12 +35,12 @@ class DdlFileSpec extends Specification { def is = s2""" val createTable = CreateTable( "launch_missles_1", List( - Column("status", RedshiftVarchar(64), Set(DistKey), Set(Nullability(NotNull))), - Column("missionName", RedshiftVarchar(128), Set(), Set(Nullability(NotNull))), - Column("geo_longitude", RedshiftDouble, Set(), Set()), - Column("geo_latitude", RedshiftDouble, Set(), Set()), - Column("rocket.model", RedshiftInteger, Set(), Set(Nullability(NotNull))), - Column("rocket.series", RedshiftInteger, Set(), Set(Nullability(Null))) + Column("status", SqlVarchar(64), Set(DistKey), Set(Nullability(NotNull))), + Column("missionName", SqlVarchar(128), Set(), Set(Nullability(NotNull))), + Column("geo_longitude", SqlDouble, Set(), Set()), + Column("geo_latitude", SqlDouble, Set(), Set()), + Column("rocket.model", SqlInteger, Set(), Set(Nullability(NotNull))), + Column("rocket.series", SqlInteger, Set(), Set(Nullability(Null))) ) ) val commentOn = DdlGenerator.getTableComment( diff --git a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlGeneratorSpec.scala b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlGeneratorSpec.scala index 2db50910..5ab84b94 100644 --- a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlGeneratorSpec.scala +++ b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/DdlGeneratorSpec.scala @@ -24,7 +24,7 @@ import scalaz.NonEmptyList // This library import com.snowplowanalytics.iglu.schemaddl.FlatSchema - +import com.snowplowanalytics.iglu.schemaddl.sql._ class DdlGeneratorSpec extends Specification { def is = s2""" Check DDL generation specification @@ -46,8 +46,8 @@ class DdlGeneratorSpec extends Specification { def is = s2""" DdlGenerator.selfDescSchemaColumns ++ DdlGenerator.parentageColumns ++ List( - Column("foo",RedshiftVarchar(30),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), - Column("bar",RedshiftVarchar(5),Set(CompressionEncoding(ZstdEncoding)),Set()) + Column("foo",SqlVarchar(30),Set(CompressionEncoding(ZstdEncoding)),Set(Nullability(NotNull))), + Column("bar",SqlVarchar(5),Set(CompressionEncoding(ZstdEncoding)),Set()) ), Set(ForeignKeyTable(NonEmptyList("root_id"),RefTable("atomic.events",Some("event_id")))), Set(Diststyle(Key), DistKeyTable("root_id"),SortKeyTable(None,NonEmptyList("root_tstamp"))) @@ -73,9 +73,9 @@ class DdlGeneratorSpec extends Specification { def is = s2""" DdlGenerator.selfDescSchemaColumns ++ DdlGenerator.parentageColumns ++ List( - Column("foo",RedshiftBoolean,Set(CompressionEncoding(RunLengthEncoding)),Set(Nullability(NotNull))), - Column("bar",RedshiftVarchar(5),Set(CompressionEncoding(ZstdEncoding)),Set()), - Column("baz",RedshiftBoolean,Set(CompressionEncoding(RunLengthEncoding)),Set()) + Column("foo",SqlBoolean,Set(CompressionEncoding(RunLengthEncoding)),Set(Nullability(NotNull))), + Column("bar",SqlVarchar(5),Set(CompressionEncoding(ZstdEncoding)),Set()), + Column("baz",SqlBoolean,Set(CompressionEncoding(RunLengthEncoding)),Set()) ), Set(ForeignKeyTable(NonEmptyList("root_id"),RefTable("atomic.events",Some("event_id")))), Set(Diststyle(Key), DistKeyTable("root_id"),SortKeyTable(None,NonEmptyList("root_tstamp"))) diff --git a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/MigrationGeneratorSpec.scala b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/MigrationGeneratorSpec.scala index c9659f0e..9adf1c65 100644 --- a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/MigrationGeneratorSpec.scala +++ b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/MigrationGeneratorSpec.scala @@ -24,6 +24,8 @@ import com.snowplowanalytics.iglu.core.SchemaVer // This library import com.snowplowanalytics.iglu.schemaddl.Migration +import com.snowplowanalytics.iglu.schemaddl.sql.generators.MigrationGenerator + class MigrationGeneratorSpec extends Specification { def is = s2""" Check Redshift migrations generation diff --git a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/TypeSuggestionsSpec.scala b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/TypeSuggestionsSpec.scala index 58c9b4d1..b14b0f27 100644 --- a/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/TypeSuggestionsSpec.scala +++ b/0-common/schema-ddl/src/test/scala/com/snowplowanalytics/iglu/schemaddl/redshift/generators/TypeSuggestionsSpec.scala @@ -16,6 +16,9 @@ package generators // specs2 import org.specs2.Specification +//this project +import com.snowplowanalytics.iglu.schemaddl.sql._ + class TypeSuggestionsSpec extends Specification { def is = s2""" Check type suggestions suggest decimal for multipleOf == 0.01 $e1 @@ -30,41 +33,41 @@ class TypeSuggestionsSpec extends Specification { def is = s2""" def e1 = { val props = Map("type" -> "number", "multipleOf" -> "0.01") - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftDecimal(Some(36), Some(2))) + DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(SqlDecimal(Some(36), Some(2))) } def e2 = { val props = Map("type" -> "number", "multipleOf" -> "1") - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftInteger) + DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(SqlInteger) } def e3 = { val props = Map("type" -> "integer", "multipleOf" -> "1", "enum" -> "2,3,5,\"hello\",32") - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftVarchar(7)) + DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(SqlVarchar(7)) } def e4 = { val props = Map("type" -> "string,null", "minLength" -> "12", "maxLength" -> "12") - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftChar(12)) + DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(SqlChar(12)) } def e5 = { val props = Map("type" -> "number,null") - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftDouble) + DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(SqlDouble) } def e6 = { val props = Map("type" -> "integer,null") - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftBigInt) + DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(SqlBigInt) } def e7 = { val props = Map("type" -> "string", "format" -> "date-time") - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftTimestamp) + DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(SqlTimestamp) } def e8 = { val props = Map("type" -> "string", "format" -> "date") - DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(RedshiftDate) + DdlGenerator.getDataType(props, 16, "somecolumn") must beEqualTo(SqlDate) } }