diff --git a/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_common_config.md b/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_common_config.md
index c0aa98aeab..aaac27b793 100644
--- a/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_common_config.md
+++ b/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_common_config.md
@@ -50,3 +50,25 @@ import Link from '@docusaurus/Link';
telemetry.userProvidedId
Optional. See here for more information.
+
+
inMemBatchBytes
+
Optional. Default value 25600000. Controls how many events are buffered in memory before saving the batch to local disk. The default value works well for most reasonably sized VMs.
+
+
+
cpuParallelismFactor
+
+ Optional. Default value 0.75.
+ Controls how the app splits the workload into concurrent batches which can be run in parallel.
+ E.g. If there are 4 available processors, and cpuParallelismFraction = 0.75, then we process 3 batches concurrently.
+ The default value works well for most workloads.
+
+
+
+
numEagerWindows
+
+ Optional. Default value 1.
+ Controls how eagerly the loader starts processing the next timed window even when the previous timed window is still finalizing (committing into the lake).
+ By default, we start processing a timed windows if the previous 1 window is still finalizing, but we do not start processing a timed window if any more older windows are still finalizing.
+ The default value works well for most workloads.
+
+
diff --git a/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_delta_config.md b/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_delta_config.md
index 905aeec54d..8c58ea874f 100644
--- a/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_delta_config.md
+++ b/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_delta_config.md
@@ -1,3 +1,7 @@
+```mdx-code-block
+import Link from '@docusaurus/Link';
+```
+
output.good.location
Required, e.g. gs://mybucket/events. URI of the bucket location to which to write Snowplow enriched events in Delta format. The URI should start with the following prefix:
@@ -9,6 +13,9 @@
-
output.good.dataSkippingColumns
-
Optional. A list of column names which will be brought to the "left-hand-side" of the events table, to enable Delta's data skipping feature. Defaults to the important Snowplow timestamp columns: load_tstamp, collector_tstamp, derived_tstamp, dvce_created_tstamp.
+
output.good.deltaTableProperties.*
+
+ Optional. A map of key/value strings corresponding to Delta's table properties.
+ These can be anything from the Delta table properties documentation.
+ The default properties include configuring Delta's data skipping feature for the important Snowplow timestamp columns: load_tstamp, collector_tstamp, derived_tstamp, dvce_created_tstamp.
Optional. A map of key/value strings corresponding to Hudi's configuration options for writing into a table. The default options configure `load_tstamp` as the table's partition field.
+
Optional. A map of key/value strings corresponding to Hudi's configuration options for writing into a table. The default options configure load_tstamp as the table's partition field.
-
output.good.hudiTableOptions.*
-
Optional. A map of key/value strings corresponding to Hudi's configuration options for creating a table. The default options configure `load_tstamp` as the table's partition field.
+
output.good.hudiTableProperties.*
+
Optional. A map of key/value strings corresponding to Hudi's configuration options for creating a table. The default options configure load_tstamp as the table's partition field.
+```mdx-code-block
+import Link from '@docusaurus/Link';
+```
+
output.good.location
Required, e.g. gs://mybucket/. URI of the bucket location to which to write Snowplow enriched events in Iceberg format. The URI should start with gs://.
@@ -18,6 +14,14 @@
output.good.table
Required. The name of the table in the BigLake database
+
+
output.good.icebergTableProperties.*
+
+ Optional. A map of key/value strings corresponding to Iceberg's table properties.
+ These can be anything from the Iceberg table properties documentation.
+ The default properties include configuring Iceberg's column-level statistics for the important Snowplow timestamp columns: load_tstamp, collector_tstamp, derived_tstamp, dvce_created_tstamp.
+
+
output.good.catalog.project
Required. The GCP project owning the BigLake catalog
diff --git a/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_iceberg_glue_config.md b/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_iceberg_glue_config.md
index f9b20fccda..2b6f522a0e 100644
--- a/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_iceberg_glue_config.md
+++ b/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_iceberg_glue_config.md
@@ -22,6 +22,14 @@ import Link from '@docusaurus/Link';
output.good.table
Required. The name of the table in the Glue database
+
+
output.good.icebergTableProperties.*
+
+ Optional. A map of key/value strings corresponding to Iceberg's table properties.
+ These can be anything from the Iceberg table properties documentation.
+ The default properties include configuring Iceberg's column-level statistics for the important Snowplow timestamp columns: load_tstamp, collector_tstamp, derived_tstamp, dvce_created_tstamp.
+
+
output.good.catalog.options.*
diff --git a/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_kafka_config.md b/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_kafka_config.md
index c48c622428..4ecf6a981d 100644
--- a/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_kafka_config.md
+++ b/docs/pipeline-components-and-applications/loaders-storage-targets/lake-loader/configuration-reference/_kafka_config.md
@@ -1,3 +1,7 @@
+```mdx-code-block
+import Link from '@docusaurus/Link';
+```
+
input.topicName
Required. Name of the Kafka topic for the source of enriched events.
Optional. Default value 1. The number of batches of events which are pre-fetched from kinesis. The default value is known to work well.
+
+
input.workerIdentifier
+
Optional. Defaults to the HOSTNAME environment variable. The name of this KCL worker used in the dynamodb lease table.
+
+
+
input.leaseDuration
+
Optional. Default value 10 seconds. The duration of shard leases. KCL workers must periodically refresh leases in the dynamodb table before this duration expires.
+
output.bad.streamName
Required. Name of the Kinesis stream that will receive failed events.