diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 450cda4db4..77d419efbf 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(96, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(97, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(97, "lookup-region-snapshot-by-region-id"), KnownVersion::new(96, "inv-dataset"), KnownVersion::new(95, "turn-boot-on-fault-into-auto-restart"), KnownVersion::new(94, "put-back-creating-vmm-state"), diff --git a/nexus/db-queries/src/db/queries/volume.rs b/nexus/db-queries/src/db/queries/volume.rs index 2c1a9af19b..200702ecdf 100644 --- a/nexus/db-queries/src/db/queries/volume.rs +++ b/nexus/db-queries/src/db/queries/volume.rs @@ -179,12 +179,24 @@ impl ValidGrouping /// ```sql /// json_build_object('V3', /// json_build_object( -/// 'regions', (select json_agg(id) from region join t2 on region.id = t2.region_id where (t2.volume_references = 0 or t2.volume_references is null) and region.volume_id = ''), +/// 'regions', (select json_agg(id) from region left join region_snapshot on region.id = region_snapshot.region_id where (region_snapshot.volume_references = 0 or region_snapshot.volume_references is null) and region.volume_id = ''), /// 'region_snapshots', (select json_agg(json_build_object('dataset', dataset_id, 'region', region_id, 'snapshot', snapshot_id)) from t2 where t2.volume_references = 0) /// ) /// ) /// ``` /// +/// Note that the query that populates the `region_snapshots` field is intended +/// to use the output of `ConditionallyDecreaseReferences`, which only returns +/// `region_snapshot` rows modified by the UPDATE statement in that query +/// fragment. If the query that populates the `regions` field were to also use +/// only the modified rows, it would never match the actual regions of the +/// volume: the modified rows would be for parts of the read-only parent, where +/// the regions are in the sub-volumes. This author can't imagine a volume where +/// the read-only parent had region snapshots of its own regions... +/// +/// The query that populates the `regions` field uses a LEFT JOIN in the case +/// that there are no matching region snapshot rows. +/// /// Note if json_agg is executing over zero rows, then the output is `null`, not /// `[]`. For example, if the sub-query meant to return regions to clean up /// returned zero rows, the output of json_build_object would look like: @@ -254,21 +266,21 @@ impl QueryFragment for BuildJsonResourcesToCleanUp { out.push_identifier(dsl::id::NAME)?; out.push_sql(") FROM "); region_dsl::region.walk_ast(out.reborrow())?; - out.push_sql(" JOIN "); - out.push_sql(self.table); + out.push_sql(" LEFT JOIN "); + out.push_sql("region_snapshot"); out.push_sql(" ON "); out.push_identifier(region_dsl::id::NAME)?; out.push_sql(" = "); - out.push_sql(self.table); + out.push_sql("region_snapshot"); out.push_sql("."); out.push_identifier(region_snapshot_dsl::region_id::NAME)?; // table's schema is equivalent to region_snapshot out.push_sql(" WHERE ( "); - out.push_sql(self.table); + out.push_sql("region_snapshot"); out.push_sql("."); out.push_identifier(region_snapshot_dsl::volume_references::NAME)?; out.push_sql(" = 0 OR "); - out.push_sql(self.table); + out.push_sql("region_snapshot"); out.push_sql("."); out.push_identifier(region_snapshot_dsl::volume_references::NAME)?; out.push_sql(" IS NULL"); diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 692ea3ef1e..bdcecef04d 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -9,7 +9,9 @@ use chrono::Utc; use dropshot::test_util::ClientTestContext; use http::method::Method; use http::StatusCode; +use nexus_db_queries::context::OpContext; use nexus_db_queries::db; +use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::DataStore; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; @@ -3443,3 +3445,78 @@ async fn test_upstairs_notify_downstairs_client_stops( .await .unwrap(); } + +/// Assert the `decrease_crucible_resource_count_and_soft_delete_volume` CTE +/// returns the regions associated with the volume. +#[nexus_test] +async fn test_cte_returns_regions(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + let _disk_test = DiskTest::new(&cptestctx).await; + create_project_and_pool(client).await; + let disks_url = get_disks_url(); + + let disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: "disk".parse().unwrap(), + description: String::from("disk"), + }, + disk_source: params::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + size: ByteCount::from_gibibytes_u32(2), + }; + + let disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + let apictx = &cptestctx.server.server_context(); + let nexus = &apictx.nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + let disk_id = disk.identity.id; + + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk_id) + .fetch() + .await + .unwrap_or_else(|_| panic!("test disk {:?} should exist", disk_id)); + + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + + assert_eq!(allocated_regions.len(), 3); + + let resources_to_clean_up = datastore + .decrease_crucible_resource_count_and_soft_delete_volume( + db_disk.volume_id, + ) + .await + .unwrap(); + + let datasets_and_regions_to_clean = + datastore.regions_to_delete(&resources_to_clean_up).await.unwrap(); + + assert_eq!(datasets_and_regions_to_clean.len(), 3); + + assert_eq!( + datasets_and_regions_to_clean + .into_iter() + .map(|(_, region)| region.id()) + .collect::>(), + allocated_regions + .into_iter() + .map(|(_, region)| region.id()) + .collect::>(), + ); +} diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 49cf268010..9089c0032a 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -644,11 +644,15 @@ CREATE TABLE IF NOT EXISTS omicron.public.region_snapshot ( PRIMARY KEY (dataset_id, region_id, snapshot_id) ); -/* Index for use during join with region table */ +/* Indexes for use during join with region table */ CREATE INDEX IF NOT EXISTS lookup_region_by_dataset on omicron.public.region_snapshot ( dataset_id, region_id ); +CREATE INDEX IF NOT EXISTS lookup_region_snapshot_by_region_id on omicron.public.region_snapshot ( + region_id +); + /* * Index on volume_references and snapshot_addr for crucible * resource accounting lookup @@ -4284,7 +4288,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '96.0.0', NULL) + (TRUE, NOW(), NOW(), '97.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/lookup-region-snapshot-by-region-id/up.sql b/schema/crdb/lookup-region-snapshot-by-region-id/up.sql new file mode 100644 index 0000000000..ab916069d6 --- /dev/null +++ b/schema/crdb/lookup-region-snapshot-by-region-id/up.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS lookup_region_snapshot_by_region_id on omicron.public.region_snapshot ( + region_id +);