From b9ee14ac7fcdd512e5169077d8068d69ccbc4f53 Mon Sep 17 00:00:00 2001 From: Phil Date: Tue, 10 Oct 2023 18:56:05 -0400 Subject: [PATCH] sources: remove newlines from inferredSchemaIsNotAvailable The sentinel schema that's used when the inferred schema is not available contained newline characters, which make their way into the connector protocols. This breaks some connectors, which expect each message to be on its own line, since the schema value in, for example, a `Validate` request could contain newlines. This changes the handling of that sentinel schema to use plain `serde_json::Value`s instead of `RawValue`s, so that re-encoding it as JSON will cause the newlines to be removed. --- crates/models/src/schemas.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/models/src/schemas.rs b/crates/models/src/schemas.rs index bfa309e49d..8b02871be4 100644 --- a/crates/models/src/schemas.rs +++ b/crates/models/src/schemas.rs @@ -137,12 +137,17 @@ impl Schema { } "###, ); - let mut inferred_schema: Skim = serde_json::from_str(inferred_bundle).unwrap(); + // We don't use `Skim` here because we want the serde round trip to + // transform the sentinel schema from pretty-printed to dense. This + // is important because newlines in the schema could otherwise break + // connectors using the airbyte protocol. + let mut inferred_schema: BTreeMap = + serde_json::from_str(inferred_bundle).unwrap(); // Set $id to "flow://inferred-schema". _ = inferred_schema.insert( KEYWORD_ID.to_string(), - RawValue::from_value(&Value::String(Self::REF_INFERRED_SCHEMA_URL.to_string())), + Value::String(Self::REF_INFERRED_SCHEMA_URL.to_string()), ); // Add as a definition within the read schema. read_defs.insert(