Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Kafka Sink Headers Attribute #569

Merged
merged 1 commit into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/resources/sink_kafka.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ resource "materialize_sink_kafka" "example_sink_kafka" {
- `database_name` (String) The identifier for the sink database in Materialize. Defaults to `MZ_DATABASE` environment variable if set or `materialize` if environment variable is not set.
- `envelope` (Block List, Max: 1) How to interpret records (e.g. Debezium, Upsert). (see [below for nested schema](#nestedblock--envelope))
- `format` (Block List, Max: 1) How to decode raw bytes from different formats into data structures it can understand at runtime. (see [below for nested schema](#nestedblock--format))
- `headers` (String) The name of a column containing additional headers to add to each message emitted by the sink. The column must be of type map[text => text] or map[text => bytea].
- `key` (List of String) An optional list of columns to use for the Kafka key. If unspecified, the Kafka key is left unset.
- `key_not_enforced` (Boolean) Disable Materialize's validation of the key's uniqueness.
- `ownership_role` (String) The owernship role of the object.
Expand Down
29 changes: 29 additions & 0 deletions integration/sink.tf
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,35 @@ resource "materialize_sink_kafka" "sink_kafka_cluster" {
}
}


resource "materialize_sink_kafka" "sink_kafka_headers" {
name = "sink_kafka_headers"
schema_name = materialize_schema.schema.name
database_name = materialize_database.database.name
cluster_name = materialize_cluster.cluster_sink.name
topic = "topic1"
key = ["key_column"]
key_not_enforced = true
snapshot = true
headers = "kafka_header"
from {
name = materialize_table.simple_table_sink.name
database_name = materialize_table.simple_table_sink.database_name
schema_name = materialize_table.simple_table_sink.schema_name
}
kafka_connection {
name = materialize_connection_kafka.kafka_connection.name
database_name = materialize_connection_kafka.kafka_connection.database_name
schema_name = materialize_connection_kafka.kafka_connection.schema_name
}
format {
json = true
}
envelope {
upsert = true
}
}

output "qualified_sink_kafka" {
value = materialize_sink_kafka.sink_kafka.qualified_sql_name
}
Expand Down
19 changes: 19 additions & 0 deletions integration/table.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,25 @@ resource "materialize_table" "simple_table" {
}
}

resource "materialize_table" "simple_table_sink" {
name = "simple_table_sink"
schema_name = materialize_schema.schema.name
database_name = materialize_database.database.name
comment = "table sink comment"

column {
name = "key_column"
type = "text"
}
column {
name = "kafka_header"
type = "map[text => text]"
}
lifecycle {
ignore_changes = [column]
}
}

resource "materialize_table_grant" "table_grant_select" {
role_name = materialize_role.role_1.name
privilege = "SELECT"
Expand Down
10 changes: 10 additions & 0 deletions pkg/materialize/sink_kafka.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type SinkKafkaBuilder struct {
format SinkFormatSpecStruct
envelope KafkaSinkEnvelopeStruct
snapshot bool
headers string
keyNotEnforced bool
}

Expand Down Expand Up @@ -95,6 +96,11 @@ func (b *SinkKafkaBuilder) Snapshot(s bool) *SinkKafkaBuilder {
return b
}

func (b *SinkKafkaBuilder) Headers(h string) *SinkKafkaBuilder {
b.headers = h
return b
}

func (b *SinkKafkaBuilder) KeyNotEnforced(s bool) *SinkKafkaBuilder {
b.keyNotEnforced = true
return b
Expand Down Expand Up @@ -132,6 +138,10 @@ func (b *SinkKafkaBuilder) Create() error {
q.WriteString(` NOT ENFORCED`)
}

if b.headers != "" {
q.WriteString(fmt.Sprintf(` HEADERS %s`, b.headers))
}

if b.format.Json {
q.WriteString(` FORMAT JSON`)
}
Expand Down
36 changes: 36 additions & 0 deletions pkg/materialize/sink_kafka_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,3 +356,39 @@ func TestSinkKafkaAvroDocsCreate(t *testing.T) {
}
})
}

func TestSinkKafkaHeadersCreate(t *testing.T) {
testhelpers.WithMockDb(t, func(db *sqlx.DB, mock sqlmock.Sqlmock) {
mock.ExpectExec(
`CREATE SINK "database"."schema"."sink"
FROM "database"."schema"."src"
INTO KAFKA CONNECTION "database"."schema"."kafka_conn" \(TOPIC 'testdrive-snk1-seed'\)
HEADERS headers
FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION "materialize"."public"."csr_conn"
ENVELOPE DEBEZIUM;`,
).WillReturnResult(sqlmock.NewResult(1, 1))

o := MaterializeObject{Name: "sink", SchemaName: "schema", DatabaseName: "database"}
b := NewSinkKafkaBuilder(db, o)
b.From(IdentifierSchemaStruct{Name: "src", SchemaName: "schema", DatabaseName: "database"})
b.KafkaConnection(IdentifierSchemaStruct{Name: "kafka_conn", SchemaName: "schema", DatabaseName: "database"})
b.Topic("testdrive-snk1-seed")
b.Format(
SinkFormatSpecStruct{
Avro: &SinkAvroFormatSpec{
SchemaRegistryConnection: IdentifierSchemaStruct{
Name: "csr_conn",
DatabaseName: "materialize",
SchemaName: "public",
},
},
},
)
b.Headers("headers")
b.Envelope(KafkaSinkEnvelopeStruct{Debezium: true})

if err := b.Create(); err != nil {
t.Fatal(err)
}
})
}
46 changes: 46 additions & 0 deletions pkg/provider/acceptance_sink_kafka_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ func TestAccSinkKafka_basic(t *testing.T) {
resource.TestCheckResourceAttr("materialize_sink_kafka.test_role", "name", sink2Name),
resource.TestCheckResourceAttr("materialize_sink_kafka.test_role", "ownership_role", roleName),
resource.TestCheckResourceAttr("materialize_sink_kafka.test_role", "comment", "Comment"),
testAccCheckSinkKafkaExists("materialize_sink_kafka.sink_kafka_headers"),
resource.TestCheckResourceAttr("materialize_sink_kafka.sink_kafka_headers", "name", sinkName+"_sink_headers"),
resource.TestCheckResourceAttr("materialize_sink_kafka.sink_kafka_headers", "topic", "topic1"),
resource.TestCheckResourceAttr("materialize_sink_kafka.sink_kafka_headers", "key.0", "column_1"),
resource.TestCheckResourceAttr("materialize_sink_kafka.sink_kafka_headers", "key_not_enforced", "true"),
resource.TestCheckResourceAttr("materialize_sink_kafka.sink_kafka_headers", "snapshot", "true"),
resource.TestCheckResourceAttr("materialize_sink_kafka.sink_kafka_headers", "headers", "column_1"),
resource.TestCheckResourceAttr("materialize_sink_kafka.sink_kafka_headers", "envelope.0.upsert", "true"),
),
},
{
Expand Down Expand Up @@ -182,6 +190,18 @@ func testAccSinkKafkaResource(roleName, connName, tableName, sinkName, sink2Name
size = "3xsmall"
}

resource "materialize_table" "simple_table" {
name = "%[4]s_simple_table"

column {
name = "column_1"
type = "map[text => text]"
}
lifecycle {
ignore_changes = [column]
}
}

resource "materialize_connection_kafka" "test" {
name = "%[2]s"
kafka_broker {
Expand Down Expand Up @@ -248,6 +268,32 @@ func testAccSinkKafkaResource(roleName, connName, tableName, sinkName, sink2Name

depends_on = [materialize_role.test]
}

resource "materialize_sink_kafka" "sink_kafka_headers" {
name = "%[4]s_sink_headers"
cluster_name = materialize_cluster.test.name
topic = "topic1"
key = ["column_1"]
key_not_enforced = true
snapshot = true
headers = "column_1"
from {
name = materialize_table.simple_table.name
database_name = materialize_table.simple_table.database_name
schema_name = materialize_table.simple_table.schema_name
}
kafka_connection {
name = materialize_connection_kafka.test.name
database_name = materialize_connection_kafka.test.database_name
schema_name = materialize_connection_kafka.test.schema_name
}
format {
json = true
}
envelope {
upsert = true
}
}
`, roleName, connName, tableName, sinkName, sink2Name, sinkOwner, comment)
}

Expand Down
10 changes: 10 additions & 0 deletions pkg/resources/resource_sink_kafka.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ var sinkKafkaSchema = map[string]*schema.Schema{
ForceNew: true,
Default: false,
},
"headers": {
Description: "The name of a column containing additional headers to add to each message emitted by the sink. The column must be of type map[text => text] or map[text => bytea].",
Type: schema.TypeString,
Optional: true,
ForceNew: true,
},
"region": RegionSchema(),
}

Expand Down Expand Up @@ -173,6 +179,10 @@ func sinkKafkaCreate(ctx context.Context, d *schema.ResourceData, meta any) diag
b.Snapshot(v.(bool))
}

if v, ok := d.GetOk("headers"); ok {
b.Headers(v.(string))
}

// create resource
if err := b.Create(); err != nil {
return diag.FromErr(err)
Expand Down
3 changes: 2 additions & 1 deletion pkg/resources/resource_sink_kafka_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ var inSinkKafka = map[string]interface{}{
"compression_type": "gzip",
"key": []interface{}{"key_1", "key_2"},
"key_not_enforced": true,
"headers": "headers",
"format": []interface{}{
map[string]interface{}{
"avro": []interface{}{
Expand Down Expand Up @@ -100,7 +101,7 @@ func TestResourceSinkKafkaCreate(t *testing.T) {
IN CLUSTER "cluster" FROM "database"."public"."item"
INTO KAFKA CONNECTION "materialize"."public"."kafka_conn"
\(TOPIC 'topic', COMPRESSION TYPE = gzip\) KEY \(key_1, key_2\)
NOT ENFORCED FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION "database"."schema"."csr_conn"
NOT ENFORCED HEADERS headers FORMAT AVRO USING CONFLUENT SCHEMA REGISTRY CONNECTION "database"."schema"."csr_conn"
\(AVRO KEY FULLNAME 'avro_key_fullname' AVRO VALUE FULLNAME 'avro_value_fullname',
DOC ON TYPE "database"."public"."item" = 'top-level comment',
KEY DOC ON COLUMN "database"."public"."item"."c1" = 'comment on column only in key schema',
Expand Down
Loading