From 6e094f45e186745facb7d2f61ec04faf05e9aa8c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 2 Jun 2026 16:41:45 -0700 Subject: [PATCH 01/30] [fm] Add disk diagnoser with typed fact tables --- dev-tools/omdb/tests/successes.out | 50 +- dev-tools/omdb/tests/test_all_output.rs | 15 + nexus/db-model/src/fm.rs | 2 + nexus/db-model/src/fm/case.rs | 1 + nexus/db-model/src/fm/diagnosis_engine.rs | 7 + nexus/db-model/src/fm/fact_physical_disk.rs | 152 ++++ nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/src/db/datastore/fm.rs | 251 ++++-- nexus/db-schema/src/enums.rs | 1 + nexus/db-schema/src/schema.rs | 18 + nexus/fm/src/analysis_input.rs | 33 +- nexus/fm/src/builder/case.rs | 59 ++ nexus/fm/src/builder/rng.rs | 18 +- .../fm/src/{diagnosis.rs => diagnosis/mod.rs} | 24 +- nexus/fm/src/diagnosis/physical_disk.rs | 810 ++++++++++++++++++ nexus/src/app/background/tasks/fm_analysis.rs | 48 +- .../src/app/background/tasks/fm_rendezvous.rs | 7 + .../app/background/tasks/fm_sitrep_load.rs | 4 +- nexus/src/app/mod.rs | 9 +- nexus/src/lib.rs | 6 + nexus/test-interface/src/lib.rs | 4 + nexus/test-utils/src/nexus_test.rs | 32 + .../output/analysis_input_report_empty.out | 2 + .../output/analysis_input_report_same_inv.out | 2 + .../analysis_input_report_with_cases.out | 2 + nexus/types/src/fm.rs | 7 + nexus/types/src/fm/analysis_reports.rs | 26 +- nexus/types/src/fm/case.rs | 134 ++- nexus/types/src/fm/fact.rs | 77 ++ nexus/types/src/in_service_disk.rs | 41 + nexus/types/src/inventory.rs | 2 +- nexus/types/src/lib.rs | 1 + schema/crdb/dbinit.sql | 62 +- schema/crdb/fm-disk-de-and-facts/up1.sql | 1 + schema/crdb/fm-disk-de-and-facts/up2.sql | 3 + schema/crdb/fm-disk-de-and-facts/up3.sql | 44 + schema/crdb/fm-disk-de-and-facts/up4.sql | 1 + .../crdb/fm-disk-de-and-facts/up4.verify.sql | 2 + test-utils/src/dev/test_cmds.rs | 13 + uuid-kinds/src/lib.rs | 1 + 40 files changed, 1837 insertions(+), 138 deletions(-) create mode 100644 nexus/db-model/src/fm/fact_physical_disk.rs rename nexus/fm/src/{diagnosis.rs => diagnosis/mod.rs} (64%) create mode 100644 nexus/fm/src/diagnosis/physical_disk.rs create mode 100644 nexus/types/src/fm/fact.rs create mode 100644 nexus/types/src/in_service_disk.rs create mode 100644 schema/crdb/fm-disk-de-and-facts/up1.sql create mode 100644 schema/crdb/fm-disk-de-and-facts/up2.sql create mode 100644 schema/crdb/fm-disk-de-and-facts/up3.sql create mode 100644 schema/crdb/fm-disk-de-and-facts/up4.sql create mode 100644 schema/crdb/fm-disk-de-and-facts/up4.verify.sql diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index d6d6f89b40b..4aec2bb7e41 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -700,20 +700,23 @@ task: "fm_analysis" configured period: every m last completed activation: , triggered by started at (s ago) and ran for ms - parent sitrep ID: None + parent sitrep ID: Some(..................... (sitrep)) current inventory collection ID: Some(..................... (collection)) ereport classes consumed: (none) FAULT MANAGEMENT ANALYSIS SUMMARY ================================= -/!\ analysis failed: FM analysis is not yet implemented + no changes from the current situation report (Some(..................... (sitrep))) fault management analysis inputs -------------------------------- - parent sitrep: + parent sitrep: ..................... inventory collection: ..................... + no new ereports since the parent sitrep no cases copied forward + no in-service control plane disks + fault management analysis report -------------------------------- sitrep ID: ..................... @@ -724,28 +727,28 @@ task: "fm_rendezvous" configured period: every m last completed activation: , triggered by started at (s ago) and ran for ms -(i) no FM situation report loaded, so rendezvous was not performed + current sitrep: ..................... creating requested alerts: -(i) note: this operation was not executed + started at (s ago) and ran for ms alerts requested: 0 requested in this sitrep: 0 created in this activation: 0 already created: 0 errors: 0 creating requested support bundles: -(i) note: this operation was not executed + started at (s ago) and ran for ms support bundles requested: 0 requested in this sitrep: 0 created in this activation: 0 already created: 0 errors: 0 marking ereports as seen: -(i) note: this operation was not executed + started at (s ago) and ran for ms total ereports in sitrep: 0 not marked when the sitrep was loaded: 0 marked seen by this activation: 0 already marked seen: 0 - batch size: 0 + batch size: 1000 batches: 0 errors: 0 @@ -762,6 +765,8 @@ task: "fm_sitrep_gc" batches: 1 orphaned fm_ereport_in_case rows deleted: 0 batches: 1 + orphaned fm_fact_physical_disk rows deleted: 0 + batches: 1 orphaned fm_support_bundle_request rows deleted: 0 batches: 1 orphaned fm_support_bundle_request_data_selection_ereports rows deleted: 0 @@ -775,7 +780,9 @@ task: "fm_sitrep_loader" configured period: every s last completed activation: , triggered by started at (s ago) and ran for ms - no FM situation report available to load + loaded latest FM situation report as of : + sitrep ..................... (sitrep) (v1) + made current at: task: "instance_reincarnation" configured period: every m @@ -1391,20 +1398,23 @@ task: "fm_analysis" configured period: every m last completed activation: , triggered by started at (s ago) and ran for ms - parent sitrep ID: None + parent sitrep ID: Some(..................... (sitrep)) current inventory collection ID: Some(..................... (collection)) ereport classes consumed: (none) FAULT MANAGEMENT ANALYSIS SUMMARY ================================= -/!\ analysis failed: FM analysis is not yet implemented + no changes from the current situation report (Some(..................... (sitrep))) fault management analysis inputs -------------------------------- - parent sitrep: + parent sitrep: ..................... inventory collection: ..................... + no new ereports since the parent sitrep no cases copied forward + no in-service control plane disks + fault management analysis report -------------------------------- sitrep ID: ..................... @@ -1415,28 +1425,28 @@ task: "fm_rendezvous" configured period: every m last completed activation: , triggered by started at (s ago) and ran for ms -(i) no FM situation report loaded, so rendezvous was not performed + current sitrep: ..................... creating requested alerts: -(i) note: this operation was not executed + started at (s ago) and ran for ms alerts requested: 0 requested in this sitrep: 0 created in this activation: 0 already created: 0 errors: 0 creating requested support bundles: -(i) note: this operation was not executed + started at (s ago) and ran for ms support bundles requested: 0 requested in this sitrep: 0 created in this activation: 0 already created: 0 errors: 0 marking ereports as seen: -(i) note: this operation was not executed + started at (s ago) and ran for ms total ereports in sitrep: 0 not marked when the sitrep was loaded: 0 marked seen by this activation: 0 already marked seen: 0 - batch size: 0 + batch size: 1000 batches: 0 errors: 0 @@ -1453,6 +1463,8 @@ task: "fm_sitrep_gc" batches: 1 orphaned fm_ereport_in_case rows deleted: 0 batches: 1 + orphaned fm_fact_physical_disk rows deleted: 0 + batches: 1 orphaned fm_support_bundle_request rows deleted: 0 batches: 1 orphaned fm_support_bundle_request_data_selection_ereports rows deleted: 0 @@ -1466,7 +1478,9 @@ task: "fm_sitrep_loader" configured period: every s last completed activation: , triggered by started at (s ago) and ran for ms - no FM situation report available to load + loaded latest FM situation report as of : + sitrep ..................... (sitrep) (v1) + made current at: task: "instance_reincarnation" configured period: every m diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index bd124caae13..42bde1b627f 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -187,6 +187,12 @@ async fn test_omdb_success_cases() { .wait_for_at_least_one_inventory_collection(Duration::from_secs(60)) .await; + // Wait until `fm_analysis` has committed at least one sitrep, so that the + // omdb snapshot for FM tasks is stable. (Otherwise sitrep IDs render as + // `None` or `Some(...)` depending on whether the task's natural cadence + // had landed by the time we sample it.) + cptestctx.wait_for_at_least_one_sitrep(Duration::from_secs(60)).await; + let mut output = String::new(); let invocations: &[&[&str]] = &[ @@ -350,6 +356,15 @@ async fn test_omdb_success_cases() { .field("triggered by", r"[\w ]+") .section(&["task: \"tuf_artifact_replication\"", "request ringbuf:"]); + // The `fm_analysis` task's input report includes a line comparing the + // current inventory collection against the parent sitrep's collection, + // which can be either "same" or "different" depending on whether a new + // inventory was collected between sitreps. Collapse both forms. + redactor.variable_regex( + "fm_input_inv_comparison", + r" --> (same collection as parent sitrep|different from parent sitrep \(collection [-a-f0-9]+\))", + ); + // The `sp_ereport_ingester` task's output depends on how many simulated // sled agents ahppen to register with Nexus before its first execution. // These redactions work around the issue described in diff --git a/nexus/db-model/src/fm.rs b/nexus/db-model/src/fm.rs index 2f2d1693d8a..0d2f4cf85f6 100644 --- a/nexus/db-model/src/fm.rs +++ b/nexus/db-model/src/fm.rs @@ -26,6 +26,8 @@ mod case; pub use case::*; mod diagnosis_engine; pub use diagnosis_engine::*; +mod fact_physical_disk; +pub use fact_physical_disk::*; mod support_bundle_request; pub use support_bundle_request::*; diff --git a/nexus/db-model/src/fm/case.rs b/nexus/db-model/src/fm/case.rs index 04776f99539..d2e34cf06ed 100644 --- a/nexus/db-model/src/fm/case.rs +++ b/nexus/db-model/src/fm/case.rs @@ -64,6 +64,7 @@ impl CaseMetadata { alerts_requested: _, support_bundles_requested: _, ereports: _, + facts: _, } = case; Self { sitrep_id: sitrep_id.into(), diff --git a/nexus/db-model/src/fm/diagnosis_engine.rs b/nexus/db-model/src/fm/diagnosis_engine.rs index 7d354142bbb..a99ceeac1ad 100644 --- a/nexus/db-model/src/fm/diagnosis_engine.rs +++ b/nexus/db-model/src/fm/diagnosis_engine.rs @@ -24,6 +24,7 @@ impl_enum_type!( pub enum DiagnosisEngine; PowerShelf => b"power_shelf" + PhysicalDisk => b"physical_disk" ); @@ -31,6 +32,9 @@ impl From for fm::DiagnosisEngineKind { fn from(de: DiagnosisEngine) -> Self { match de { DiagnosisEngine::PowerShelf => fm::DiagnosisEngineKind::PowerShelf, + DiagnosisEngine::PhysicalDisk => { + fm::DiagnosisEngineKind::PhysicalDisk + } } } } @@ -39,6 +43,9 @@ impl From for DiagnosisEngine { fn from(fm_de: fm::DiagnosisEngineKind) -> Self { match fm_de { fm::DiagnosisEngineKind::PowerShelf => DiagnosisEngine::PowerShelf, + fm::DiagnosisEngineKind::PhysicalDisk => { + DiagnosisEngine::PhysicalDisk + } } } } diff --git a/nexus/db-model/src/fm/fact_physical_disk.rs b/nexus/db-model/src/fm/fact_physical_disk.rs new file mode 100644 index 00000000000..b6510590bff --- /dev/null +++ b/nexus/db-model/src/fm/fact_physical_disk.rs @@ -0,0 +1,152 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database representation of the physical-disk diagnosis engine's facts. +//! +//! Each physical-disk fact is stored as typed columns in the +//! `fm_fact_physical_disk` table. The `kind` discriminant selects which payload +//! columns are populated; a CHECK constraint +//! (`zpool_unhealthy_columns_present`) enforces that the right columns are +//! non-NULL for each kind. See [`nexus_types::fm::DiskFact`] for semantics. + +use crate::DbTypedUuid; +use crate::impl_enum_type; +use crate::inventory::InvZpoolHealth; +use chrono::{DateTime, Utc}; +use nexus_db_schema::schema::fm_fact_physical_disk; +use nexus_types::fm; +use nexus_types::fm::{DiskFact, FactPayload, ZpoolUnhealthyFactPayload}; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::{ + CaseKind, CollectionKind, FactKind, PhysicalDiskKind, SitrepKind, ZpoolKind, +}; + +impl_enum_type!( + FmFactPhysicalDiskKindEnum: + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Eq)] + pub enum FmFactPhysicalDiskKind; + + ZpoolUnhealthy => b"zpool_unhealthy" +); + +/// Diesel row for the `fm_fact_physical_disk` table. +/// +/// The payload columns are populated according to `kind`: a column is `Some` +/// iff it belongs to that `kind`'s payload. This is enforced in the database +/// by the per-kind CHECK constraints (e.g. `zpool_unhealthy_columns_present`). +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = fm_fact_physical_disk)] +pub struct FmFactPhysicalDisk { + pub id: DbTypedUuid, + /// The sitrep to which this fact belongs. + /// + /// This will change as the fact is carried forward from one sitrep to the + /// next. + pub sitrep_id: DbTypedUuid, + pub case_id: DbTypedUuid, + /// Sitrep in which this fact was first added. + /// + /// Preserved unchanged when the fact is carried forward; debug-only. + pub created_sitrep_id: DbTypedUuid, + pub comment: String, + pub kind: FmFactPhysicalDiskKind, + + // Columns for the `ZpoolUnhealthy` kind. + pub physical_disk_id: Option>, + pub zpool_id: Option>, + pub last_seen_health: Option, + pub observed_in_inv: Option>, + pub time_observed: Option>, +} + +impl FmFactPhysicalDisk { + /// Build a row from a fact's shared metadata (`fact`) and its + /// already-dispatched physical-disk payload (`disk_fact`). + /// + /// Callers route each fact to its engine's table by matching on + /// [`fact.payload`](fm::case::Fact::payload) and pass the matched payload + /// here, so this never has to interpret another engine's payload. + pub fn from_sitrep( + sitrep_id: impl Into>, + case_id: impl Into>, + fact: &fm::case::Fact, + disk_fact: &DiskFact, + ) -> Self { + let base = Self { + id: fact.id.into(), + sitrep_id: sitrep_id.into(), + case_id: case_id.into(), + created_sitrep_id: fact.created_sitrep_id.into(), + comment: fact.comment.clone(), + kind: FmFactPhysicalDiskKind::ZpoolUnhealthy, + physical_disk_id: None, + zpool_id: None, + last_seen_health: None, + observed_in_inv: None, + time_observed: None, + }; + match disk_fact { + DiskFact::ZpoolUnhealthy(p) => Self { + kind: FmFactPhysicalDiskKind::ZpoolUnhealthy, + physical_disk_id: Some(p.physical_disk_id.into()), + zpool_id: Some(p.zpool_id.into()), + last_seen_health: Some(p.last_seen_health.into()), + observed_in_inv: Some(p.observed_in_inv.into()), + time_observed: Some(p.time_observed), + ..base + }, + } + } + + /// Reconstruct an in-memory fact from a row. + /// + /// The payload columns the database's CHECK constraint guarantees are + /// non-NULL for this `kind` are unwrapped; a NULL where one is required + /// indicates a corrupt row (e.g. hand-edited) and yields an internal + /// error rather than a panic. + pub fn into_fact(self) -> Result { + let payload = match self.kind { + FmFactPhysicalDiskKind::ZpoolUnhealthy => { + FactPayload::PhysicalDisk(DiskFact::ZpoolUnhealthy( + ZpoolUnhealthyFactPayload { + physical_disk_id: self + .physical_disk_id + .ok_or_else(|| missing_column("physical_disk_id"))? + .into(), + zpool_id: self + .zpool_id + .ok_or_else(|| missing_column("zpool_id"))? + .into(), + last_seen_health: self + .last_seen_health + .ok_or_else(|| missing_column("last_seen_health"))? + .into(), + observed_in_inv: self + .observed_in_inv + .ok_or_else(|| missing_column("observed_in_inv"))? + .into(), + time_observed: self + .time_observed + .ok_or_else(|| missing_column("time_observed"))?, + }, + )) + } + }; + Ok(fm::case::Fact { + id: self.id.into(), + created_sitrep_id: self.created_sitrep_id.into(), + payload, + comment: self.comment, + }) + } +} + +fn missing_column(column: &str) -> Error { + Error::internal_error(&format!( + "fm_fact_physical_disk row of kind 'zpool_unhealthy' has a NULL \ + {column} (violates the zpool_unhealthy_columns_present CHECK \ + constraint)" + )) +} diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 5caf71e9e28..f14fbb76b0d 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(261, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(262, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ pub static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(262, "fm-disk-de-and-facts"), KnownVersion::new(261, "remove-add-zones-with-mupdate-override"), KnownVersion::new(260, "ereport-trim-serial-trailing-nulls"), KnownVersion::new(259, "vmm-failure-reason"), diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index 1552d463dfb..f16e1608c2d 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -34,6 +34,7 @@ use nexus_db_schema::schema::ereport::dsl as ereport_dsl; use nexus_db_schema::schema::fm_alert_request::dsl as alert_req_dsl; use nexus_db_schema::schema::fm_case::dsl as case_dsl; use nexus_db_schema::schema::fm_ereport_in_case::dsl as case_ereport_dsl; +use nexus_db_schema::schema::fm_fact_physical_disk::dsl as fact_pd_dsl; use nexus_db_schema::schema::fm_sitrep::dsl as sitrep_dsl; use nexus_db_schema::schema::fm_sitrep_history::dsl as history_dsl; use nexus_db_schema::schema::fm_support_bundle_request::dsl as support_bundle_req_dsl; @@ -48,6 +49,7 @@ use omicron_uuid_kinds::AlertUuid; use omicron_uuid_kinds::CaseEreportKind; use omicron_uuid_kinds::CaseKind; use omicron_uuid_kinds::CaseUuid; +use omicron_uuid_kinds::FactKind; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SitrepUuid; use omicron_uuid_kinds::SupportBundleKind; @@ -121,6 +123,7 @@ sitrep_child_tables! { SupportBundleRequestDataSelectionEreports => { table: "fm_support_bundle_request_data_selection_ereports" }, SupportBundleRequest => { table: "fm_support_bundle_request" }, Case => { table: "fm_case" }, + FmFactPhysicalDisk => { table: "fm_fact_physical_disk" }, } /// Per-child-table statistics from a single GC pass. @@ -366,6 +369,8 @@ impl DataStore { let mut support_bundle_requests = self.support_bundle_requests_read_on_conn(id, conn).await?; + let mut case_facts = self.fm_facts_read_on_conn(id, conn).await?; + // Next, load the case metadata entries and marry them to the sets of // ereports, alert requests, and support bundle requests for those // cases that we loaded in the previous steps. @@ -408,6 +413,7 @@ impl DataStore { alert_requests.remove(&id).unwrap_or_default(); let support_bundles_requested = support_bundle_requests.remove(&id).unwrap_or_default(); + let facts = case_facts.remove(&id).unwrap_or_default(); fm::Case { id, metadata: fm::case::Metadata { @@ -419,6 +425,7 @@ impl DataStore { alerts_requested, ereports, support_bundles_requested, + facts, } })); } @@ -497,6 +504,69 @@ impl DataStore { Ok(by_case) } + /// Fetch all case facts belonging to cases in the given sitrep, grouped + /// by `case_id`. + /// + /// Each diagnosis engine stores its facts in its own typed table, read by + /// its own paginated loop here and unioned into the same `by_case` map. A + /// case belongs to exactly one engine, so keys never collide across tables. + async fn fm_facts_read_on_conn( + &self, + id: SitrepUuid, + conn: &async_bb8_diesel::Connection, + ) -> Result>, Error> { + let mut by_case = + HashMap::>::new(); + + // NOTE: Each per-DE set of facts belongs to a distinct table. + // This would be a reasonable spot to insert parallel_task_set + // and do some concurrent queries. + // + // (... gonna punt on that until we actually have a couple different + // tables to query from). + + // --- physical-disk diagnosis engine facts --- + let mut paginator: Paginator> = + Paginator::new(SQL_BATCH_SIZE, PaginationOrder::Descending); + while let Some(p) = paginator.next() { + let batch = paginated( + fact_pd_dsl::fm_fact_physical_disk, + fact_pd_dsl::id, + &p.current_pagparams(), + ) + .filter(fact_pd_dsl::sitrep_id.eq(id.into_untyped_uuid())) + .select(model::fm::FmFactPhysicalDisk::as_select()) + .load_async(conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context("failed to load physical-disk case facts") + })?; + + paginator = p.found_batch(&batch, &|f| f.id); + for row in batch { + let case_id: CaseUuid = row.case_id.into(); + let fact = row.into_fact()?; + let id = fact.id; + by_case + .entry(case_id) + .or_default() + .insert_unique(fact) + .map_err(|_| { + let internal_message = format!( + "encountered multiple case facts for case \ + {case_id} with the same fact UUID {id}. this \ + should really not be possible, as the fact \ + UUID is a primary key!", + ); + Error::InternalError { internal_message } + })?; + } + } + + Ok(by_case) + } + /// Fetch all support bundle requests belonging to cases in the given /// sitrep, including their child data selection tables. async fn support_bundle_requests_read_on_conn( @@ -740,6 +810,7 @@ impl DataStore { let mut support_bundles_requested = Vec::new(); let mut bundle_data_selections_requested = Vec::new(); let mut case_ereports = Vec::new(); + let mut physical_disk_facts = Vec::new(); for case in sitrep.cases { let case_id = case.id; cases.push(model::fm::CaseMetadata::from_sitrep(sitrep_id, &case)); @@ -763,6 +834,18 @@ impl DataStore { ); bundle_data_selections_requested.push((req_id, data_selection)); } + for fact in case.facts.iter() { + // Dispatch each fact to its diagnosis engine's typed table. + match &fact.payload { + fm::FactPayload::PhysicalDisk(disk_fact) => { + physical_disk_facts.push( + model::fm::FmFactPhysicalDisk::from_sitrep( + sitrep_id, case_id, fact, disk_fact, + ), + ); + } + } + } } if !case_ereports.is_empty() { @@ -797,6 +880,19 @@ impl DataStore { ) .await?; + if !physical_disk_facts.is_empty() { + diesel::insert_into(fact_pd_dsl::fm_fact_physical_disk) + .values(physical_disk_facts) + .execute_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context( + "failed to insert physical-disk case facts", + ) + })?; + } + if !cases.is_empty() { diesel::insert_into(case_dsl::fm_case) .values(cases) @@ -1132,22 +1228,8 @@ impl DataStore { .map(|id| id.into_untyped_uuid()) .collect::>(); - struct SitrepDeleteResult { - sitreps_deleted: usize, - case_ereports_deleted: usize, - alert_requests_deleted: usize, - support_bundle_requests_deleted: usize, - cases_deleted: usize, - } - let err = OptionalError::new(); - let SitrepDeleteResult { - sitreps_deleted, - case_ereports_deleted, - alert_requests_deleted, - support_bundle_requests_deleted, - cases_deleted, - } = self + let (sitreps_deleted, child_rows_deleted) = self // Sitrep deletion is transactional to prevent a sitrep from being // left in a partially-deleted state should the Nexus instance // attempting the delete operation die suddenly. @@ -1168,37 +1250,33 @@ impl DataStore { } } - // Delete case ereport assignments - let case_ereports_deleted = diesel::delete( - case_ereport_dsl::fm_ereport_in_case.filter( - case_ereport_dsl::sitrep_id.eq_any(ids.clone()), - ), - ) - .execute_async(&conn) - .await?; - - // Delete case alert requests. - let alert_requests_deleted = diesel::delete( - alert_req_dsl::fm_alert_request.filter(alert_req_dsl::sitrep_id.eq_any(ids.clone())) - ) - .execute_async(&conn) - .await?; - - // Delete support bundle request child data selection rows, - // then the requests themselves. - let support_bundle_requests_deleted = - Self::support_bundle_requests_delete_on_conn(&conn, ids.clone()) - .await?; - - // Delete case metadata records. - let cases_deleted = diesel::delete( - case_dsl::fm_case - .filter(case_dsl::sitrep_id.eq_any(ids.clone())), - ) - .execute_async(&conn) - .await?; + // Delete every child row by sitrep_id. Driving this off + // SitrepChildTable::ALL keeps it in lockstep with the + // orphan GC: a child table registered in the + // `sitrep_child_tables!` macro is deleted here + // automatically. There are no foreign keys between these + // tables, so deletion order doesn't matter. + let mut child_rows_deleted = + BTreeMap::::new(); + for &table in SitrepChildTable::ALL { + let mut builder = QueryBuilder::new(); + builder.sql("DELETE FROM omicron.public."); + builder.sql(table.table_name()); + builder.sql(" WHERE "); + builder.sql(table.sitrep_id_column()); + builder.sql(" = ANY("); + builder + .param() + .bind::, _>( + ids.clone(), + ); + builder.sql(")"); + let deleted = + builder.query::<()>().execute_async(&conn).await?; + child_rows_deleted.insert(table, deleted); + } - // Delete sitrep metadata records. + // Delete sitrep metadata records last. let sitreps_deleted = diesel::delete( sitrep_dsl::fm_sitrep .filter(sitrep_dsl::id.eq_any(ids.clone())), @@ -1206,13 +1284,7 @@ impl DataStore { .execute_async(&conn) .await?; - Ok(SitrepDeleteResult { - sitreps_deleted, - cases_deleted, - alert_requests_deleted, - support_bundle_requests_deleted, - case_ereports_deleted, - }) + Ok((sitreps_deleted, child_rows_deleted)) } }) .await @@ -1226,10 +1298,7 @@ impl DataStore { "deleted {sitreps_deleted} of {} sitreps", ids.len(); "ids" => ?ids, "sitreps_deleted" => sitreps_deleted, - "cases_deleted" => cases_deleted, - "case_ereports_deleted" => case_ereports_deleted, - "alert_requests_deleted" => alert_requests_deleted, - "support_bundle_requests_deleted" => support_bundle_requests_deleted, + "child_rows_deleted" => ?child_rows_deleted, ); Ok(sitreps_deleted) @@ -1474,43 +1543,6 @@ impl DataStore { builder.query() } - /// Delete child data selection rows for support bundle requests, then the - /// support bundle request rows themselves. - #[cfg(test)] - async fn support_bundle_requests_delete_on_conn( - conn: &async_bb8_diesel::Connection, - sitrep_ids: Vec, - ) -> Result { - use nexus_db_schema::schema::fm_support_bundle_request_data_selection_ereports::dsl as ereports_dsl; - use nexus_db_schema::schema::fm_support_bundle_request_data_selection_flags::dsl as flags_dsl; - use nexus_db_schema::schema::fm_support_bundle_request_data_selection_host_info::dsl as host_info_dsl; - - diesel::delete( - flags_dsl::fm_support_bundle_request_data_selection_flags - .filter(flags_dsl::sitrep_id.eq_any(sitrep_ids.clone())), - ) - .execute_async(conn) - .await?; - diesel::delete( - host_info_dsl::fm_support_bundle_request_data_selection_host_info - .filter(host_info_dsl::sitrep_id.eq_any(sitrep_ids.clone())), - ) - .execute_async(conn) - .await?; - diesel::delete( - ereports_dsl::fm_support_bundle_request_data_selection_ereports - .filter(ereports_dsl::sitrep_id.eq_any(sitrep_ids.clone())), - ) - .execute_async(conn) - .await?; - diesel::delete( - support_bundle_req_dsl::fm_support_bundle_request - .filter(support_bundle_req_dsl::sitrep_id.eq_any(sitrep_ids)), - ) - .execute_async(conn) - .await - } - pub async fn fm_sitrep_version_list( &self, opctx: &OpContext, @@ -1569,6 +1601,7 @@ mod tests { use nexus_types::fm::ereport::{EreportData, Reporter}; use omicron_test_utils::dev; use omicron_uuid_kinds::CollectionUuid; + use omicron_uuid_kinds::FactUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SupportBundleUuid; use std::collections::BTreeMap; @@ -2037,6 +2070,7 @@ mod tests { ereports, alerts_requested, support_bundles_requested, + facts, } = case; let case_id = id; let Some(expected) = this.cases.get(&case_id) else { @@ -2068,6 +2102,7 @@ mod tests { &expected.metadata.de, de, "while checking case {case_id}" ); + assert_eq!(&expected.facts, facts, "while checking case {case_id}"); // Now, check that all the ereports are present in both cases. assert_eq!(ereports.len(), expected.ereports.len()); @@ -2241,17 +2276,43 @@ mod tests { .unwrap(); } + let mut facts = iddqd::IdOrdMap::new(); + facts + .insert_unique(fm::case::Fact { + id: FactUuid::new_v4(), + created_sitrep_id: sitrep_id, + payload: fm::FactPayload::PhysicalDisk( + fm::DiskFact::ZpoolUnhealthy( + fm::ZpoolUnhealthyFactPayload { + physical_disk_id: + omicron_uuid_kinds::PhysicalDiskUuid::new_v4( + ), + zpool_id: + omicron_uuid_kinds::ZpoolUuid::new_v4(), + last_seen_health: + nexus_types::inventory::ZpoolHealth::Degraded, + observed_in_inv: CollectionUuid::new_v4(), + time_observed: omicron_common::now_db_precision( + ), + }, + ), + ), + comment: "a representative fact for case 1".to_string(), + }) + .unwrap(); + fm::Case { id: omicron_uuid_kinds::CaseUuid::new_v4(), metadata: fm::case::Metadata { created_sitrep_id: sitrep_id, closed_sitrep_id: None, - de: fm::DiagnosisEngineKind::PowerShelf, + de: fm::DiagnosisEngineKind::PhysicalDisk, comment: "my cool case".to_string(), }, ereports, alerts_requested, support_bundles_requested, + facts, } }; @@ -2288,6 +2349,7 @@ mod tests { ereports, alerts_requested, support_bundles_requested: iddqd::IdOrdMap::new(), + facts: iddqd::IdOrdMap::new(), } }; @@ -2406,6 +2468,7 @@ mod tests { ereports: iddqd::IdOrdMap::new(), alerts_requested: iddqd::IdOrdMap::new(), support_bundles_requested, + facts: iddqd::IdOrdMap::new(), }; let mut cases = iddqd::IdOrdMap::new(); diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index cddaf8f4d36..88b7deb1b08 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -58,6 +58,7 @@ define_enums! { DownstairsClientStoppedReasonEnum => "downstairs_client_stopped_reason_type", EreporterTypeEnum => "ereporter_type", FailureDomainEnum => "failure_domain", + FmFactPhysicalDiskKindEnum => "fm_fact_physical_disk_kind", HwM2SlotEnum => "hw_m2_slot", HwPowerStateEnum => "hw_power_state", HwRotSlotEnum => "hw_rot_slot", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 05d6c96b1db..b1275bfe138 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -3243,6 +3243,22 @@ table! { } } +table! { + fm_fact_physical_disk (sitrep_id, id) { + id -> Uuid, + sitrep_id -> Uuid, + case_id -> Uuid, + created_sitrep_id -> Uuid, + comment -> Text, + kind -> crate::enums::FmFactPhysicalDiskKindEnum, + physical_disk_id -> Nullable, + zpool_id -> Nullable, + last_seen_health -> Nullable, + observed_in_inv -> Nullable, + time_observed -> Nullable, + } +} + table! { fm_ereport_in_case (sitrep_id, id) { id -> Uuid, @@ -3258,6 +3274,8 @@ table! { allow_tables_to_appear_in_same_query!(fm_ereport_in_case, ereport); allow_tables_to_appear_in_same_query!(fm_sitrep, fm_case); +allow_tables_to_appear_in_same_query!(fm_sitrep, fm_fact_physical_disk); +allow_tables_to_appear_in_same_query!(fm_case, fm_fact_physical_disk); table! { fm_alert_request (sitrep_id, id) { diff --git a/nexus/fm/src/analysis_input.rs b/nexus/fm/src/analysis_input.rs index c9d1ae57428..2f8e135cbb2 100644 --- a/nexus/fm/src/analysis_input.rs +++ b/nexus/fm/src/analysis_input.rs @@ -8,6 +8,7 @@ use chrono::{DateTime, Utc}; use iddqd::IdOrdMap; use nexus_types::fm::analysis_reports::ClosedCaseReport; use nexus_types::fm::{self, Sitrep, SitrepVersion}; +use nexus_types::in_service_disk::InServiceDisk; use nexus_types::inventory; use omicron_uuid_kinds::CollectionUuid; use std::collections::BTreeMap; @@ -39,6 +40,8 @@ pub struct Input { new_ereports: IdOrdMap, open_cases: IdOrdMap, closed_cases_copied_forward: IdOrdMap, + /// All control plane managed disks + in_service_disks: Arc>, } impl Input { @@ -65,11 +68,19 @@ impl Input { &self.closed_cases_copied_forward } + /// All control-plane-managed disks (`physical_disk.disk_policy = + /// in_service` in the DB), indexed by `physical_disk_id`. See the + /// field-level documentation on `Input::in_service_disks` for semantics. + pub fn in_service_disks(&self) -> &IdOrdMap { + &self.in_service_disks + } + /// Returns a [`Builder`] for constructing a new `Input` from the provided - /// `parent_sitrep` and inventory collection. + /// `parent_sitrep`, inventory collection, and in-service disks. pub fn builder( parent_sitrep: Option>, inv: Arc, + in_service_disks: Arc>, ) -> Result { // Before preparing analysis inputs, check that the proposed input // inventory collection is at least as new as the parent sitrep's @@ -94,6 +105,7 @@ impl Input { Ok(Builder { parent_sitrep, inv, + in_service_disks, new_ereports: IdOrdMap::default(), unmarked_seen_ereports: BTreeSet::default(), }) @@ -117,6 +129,7 @@ pub enum InvalidInputs { pub struct Builder { parent_sitrep: Option>, inv: Arc, + in_service_disks: Arc>, /// Ereports which are new and should be input to analysis in the next /// sitrep. new_ereports: IdOrdMap, @@ -184,6 +197,11 @@ impl Builder { .collect(), open_cases: BTreeMap::new(), closed_cases_copied_forward: BTreeMap::new(), + in_service_disks: self + .in_service_disks + .iter() + .map(|d| d.physical_disk_id) + .collect(), }; // Determine which cases must be copied forwards into the next sitrep. @@ -234,6 +252,7 @@ impl Builder { new_ereports: self.new_ereports, open_cases, closed_cases_copied_forward, + in_service_disks: self.in_service_disks, }; (input, report) @@ -346,6 +365,7 @@ mod tests { .collect(), alerts_requested: Default::default(), support_bundles_requested: Default::default(), + facts: Default::default(), } }; let open_case2 = { @@ -366,6 +386,7 @@ mod tests { .collect(), alerts_requested: Default::default(), support_bundles_requested: Default::default(), + facts: Default::default(), } }; let closed_case_with_unmarked = { @@ -393,6 +414,7 @@ mod tests { .collect(), alerts_requested: Default::default(), support_bundles_requested: Default::default(), + facts: Default::default(), } }; let closed_case_without_unmarked = { @@ -414,6 +436,7 @@ mod tests { .collect(), alerts_requested: Default::default(), support_bundles_requested: Default::default(), + facts: Default::default(), } }; @@ -463,8 +486,12 @@ mod tests { // Build analysis input let (input, report) = { - let mut builder = Input::builder(Some(parent_sitrep), inv) - .expect("collection start time check should always pass"); + let mut builder = Input::builder( + Some(parent_sitrep), + inv, + Arc::new(IdOrdMap::new()), + ) + .expect("collection start time check should always pass"); // Pass in four ereports: // - two that are in the open cases of the parent sitrep // - one that is in the (to-be-copied-forward) closed case diff --git a/nexus/fm/src/builder/case.rs b/nexus/fm/src/builder/case.rs index d4d8c5e4d4e..8f9e2bd022a 100644 --- a/nexus/fm/src/builder/case.rs +++ b/nexus/fm/src/builder/case.rs @@ -10,6 +10,7 @@ use nexus_types::alert::AlertClass; use nexus_types::fm; use nexus_types::support_bundle::BundleDataSelection; use omicron_uuid_kinds::CaseUuid; +use omicron_uuid_kinds::FactUuid; use omicron_uuid_kinds::SitrepUuid; use std::sync::Arc; @@ -76,6 +77,7 @@ impl AllCases { ereports: Default::default(), alerts_requested: Default::default(), support_bundles_requested: Default::default(), + facts: Default::default(), }; let mut builder = CaseBuilder::new(&self.log, sitrep_id, case, case_rng); @@ -215,6 +217,63 @@ impl CaseBuilder { self.report_log.entry("case closed").comment(comment); } + /// Replace this case's free-form comment string. + pub fn set_comment(&mut self, comment: impl ToString) { + self.case.metadata.comment = comment.to_string(); + } + + /// Emit a new fact under this case. The fact's UUID is freshly + /// allocated from the case's deterministic RNG. + pub fn add_fact( + &mut self, + payload: impl Into, + comment: impl ToString, + ) -> FactUuid { + let id = loop { + let id = self.rng.next_fact(); + if !self.case.facts.contains_key(&id) { + break id; + } + }; + let payload = payload.into(); + let comment = comment.to_string(); + slog::info!( + &self.log, + "added a fact"; + "fact_id" => %id, + "payload" => ?payload, + "comment" => %comment, + ); + self.report_log + .entry("added fact") + .kv("fact_id", id) + .kv("payload", &payload) + .comment(comment.clone()); + let fact = fm::case::Fact { + id, + created_sitrep_id: self.sitrep_id, + payload, + comment, + }; + self.case.facts.insert_unique(fact).expect("UUID should be unused"); + id + } + + /// Remove a fact from this case. The fact will not be carried forward + /// into the next sitrep. + pub fn remove_fact(&mut self, id: FactUuid) { + if self.case.facts.remove(&id).is_some() { + slog::info!(&self.log, "removed a fact"; "fact_id" => %id); + self.report_log.entry("removed fact").kv("fact_id", id); + } + } + + /// Iterate the facts currently attached to this case (including any that + /// were carried forward from the parent sitrep). + pub fn facts(&self) -> impl Iterator { + self.case.facts.iter() + } + pub fn add_ereport( &mut self, report: &Arc, diff --git a/nexus/fm/src/builder/rng.rs b/nexus/fm/src/builder/rng.rs index 5490ef67ae9..9ba02eca5cc 100644 --- a/nexus/fm/src/builder/rng.rs +++ b/nexus/fm/src/builder/rng.rs @@ -14,6 +14,8 @@ use omicron_uuid_kinds::CaseEreportKind; use omicron_uuid_kinds::CaseEreportUuid; use omicron_uuid_kinds::CaseKind; use omicron_uuid_kinds::CaseUuid; +use omicron_uuid_kinds::FactKind; +use omicron_uuid_kinds::FactUuid; use omicron_uuid_kinds::SitrepUuid; use omicron_uuid_kinds::SupportBundleKind; use omicron_uuid_kinds::SupportBundleUuid; @@ -59,11 +61,17 @@ impl SitrepBuilderRng { } } +/// Per-case child RNGs. Each `next_*` returns the next deterministic UUID +/// in its stream; collisions across calls are statistically impossible, but +/// callers in `builder/case.rs` still guard inserts with a `contains_key` +/// loop so that a future change here (e.g., reseeding mid-build) can't +/// silently corrupt a case. #[derive(Clone, Debug)] pub(super) struct CaseBuilderRng { ereport_assignment_rng: TypedUuidRng, alert_rng: TypedUuidRng, support_bundle_rng: TypedUuidRng, + fact_rng: TypedUuidRng, } impl CaseBuilderRng { @@ -85,7 +93,11 @@ impl CaseBuilderRng { &mut sitrep.parent, (case_id, "support-bundle"), ); - Self { alert_rng, ereport_assignment_rng, support_bundle_rng } + let fact_rng = TypedUuidRng::from_parent_rng( + &mut sitrep.parent, + (case_id, "case-fact"), + ); + Self { alert_rng, ereport_assignment_rng, support_bundle_rng, fact_rng } } pub(super) fn next_alert(&mut self) -> AlertUuid { @@ -99,4 +111,8 @@ impl CaseBuilderRng { pub(super) fn next_support_bundle(&mut self) -> SupportBundleUuid { self.support_bundle_rng.next() } + + pub(super) fn next_fact(&mut self) -> FactUuid { + self.fact_rng.next() + } } diff --git a/nexus/fm/src/diagnosis.rs b/nexus/fm/src/diagnosis/mod.rs similarity index 64% rename from nexus/fm/src/diagnosis.rs rename to nexus/fm/src/diagnosis/mod.rs index 96ea491f1b1..b62e676fb36 100644 --- a/nexus/fm/src/diagnosis.rs +++ b/nexus/fm/src/diagnosis/mod.rs @@ -2,22 +2,30 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Fault management diagnosis engines. +//! +//! Each submodule defines one diagnosis engine (DE). `analyze` dispatches to +//! each engine in turn; engines are deterministic and idempotent per RFD 603, +//! so the dispatch order does not matter. + use crate::SitrepBuilder; use crate::analysis_input::Input; +mod physical_disk; + pub fn analyze( - _input: &Input, - _builder: &mut SitrepBuilder<'_>, + input: &Input, + builder: &mut SitrepBuilder<'_>, ) -> anyhow::Result<()> { - anyhow::bail!("FM analysis is not yet implemented") + physical_disk::analyze(input, builder)?; + Ok(()) } -/// Ereport classes that the diagnosis engine currently understands. -/// Preparation only surfaces ereports whose class is in this set — there is -/// no value in loading ereports FM analysis cannot consume. +/// Ereport classes that any diagnosis engine in this build of Nexus knows +/// how to consume. The background task uses this to filter loaded ereports. /// -/// Empty until [`analyze`] gains real handling. Grow this alongside FM -/// analysis as new classes gain support. +/// Empty today: Grow this list alongside FM analysis as new classes gain +/// ereport support. /// /// **NULL-class ereports are intentionally excluded by the loader's SQL /// filter** (`class = ANY(...)` never matches NULL). If FM analysis ever diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs new file mode 100644 index 00000000000..3992c9cb46c --- /dev/null +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -0,0 +1,810 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Disk diagnosis engine. + +use crate::SitrepBuilder; +use crate::analysis_input::Input; +use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; +use nexus_types::fm::DiagnosisEngineKind; +use nexus_types::fm::{DiskFact, ZpoolUnhealthyFactPayload}; +use nexus_types::inventory::ZpoolHealth; +use omicron_uuid_kinds::{CaseUuid, FactUuid, PhysicalDiskUuid, ZpoolUuid}; +use std::collections::BTreeMap; + +/// A [`DiskFact::ZpoolUnhealthy`] payload paired with the `FactUuid` it +/// lives under. Used to build in-memory indices over facts during +/// analysis; not serialized. +#[derive(Clone, Copy, Debug)] +struct ZpoolUnhealthyFact { + fact_id: FactUuid, + payload: ZpoolUnhealthyFactPayload, +} + +impl IdOrdItem for ZpoolUnhealthyFact { + type Key<'a> = FactUuid; + fn key(&self) -> Self::Key<'_> { + self.fact_id + } + id_upcast!(); +} + +/// One in-service disk paired with the current observed health of its zpool. +/// `zpool_health` is `None` when the disk's zpool was not seen in the current +/// inventory (e.g., sled down, lossy collection). +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct DiskHealthSnapshot { + physical_disk_id: PhysicalDiskUuid, + zpool_id: ZpoolUuid, + zpool_health: Option, +} + +impl IdOrdItem for DiskHealthSnapshot { + type Key<'a> = PhysicalDiskUuid; + fn key(&self) -> Self::Key<'_> { + self.physical_disk_id + } + id_upcast!(); +} + +/// Per-case summary built from a case's facts. Each Disk case is about a +/// single physical disk; every fact on the case must reference that disk. +struct ParentCaseSummary { + /// The physical disk this case is about. + physical_disk_id: PhysicalDiskUuid, + /// All `ZpoolUnhealthy` facts on this case. Normally one; pathological + /// cases may have multiple. Regardless, the diagnosis engine keeps all of + /// them. + unhealthy_facts: IdOrdMap, +} + +pub(super) fn analyze( + input: &Input, + builder: &mut SitrepBuilder<'_>, +) -> anyhow::Result<()> { + let inv_collection_id = input.inventory().id; + let inv_time_done = input.inventory().time_done; + + // Index every zpool we observed in this inventory, so we can distinguish + // "saw it, it's Online" from "didn't see it at all" when looking up by + // an in-service disk's zpool below. + let observed_health: BTreeMap = input + .inventory() + .sled_agents + .iter() + .flat_map(|sa| sa.zpools.iter()) + .map(|z| (z.id, z.health)) + .collect(); + + // The current health snapshot for every in-service disk, keyed by + // physical_disk_id. Absence from this index is a positive signal that + // the control plane has moved on from the disk (expungement / + // decommissioning); see prepare_inputs in + // nexus/src/app/background/tasks/fm_analysis.rs. + let in_service_health: IdOrdMap = input + .in_service_disks() + .iter() + .map(|d| DiskHealthSnapshot { + physical_disk_id: d.physical_disk_id, + zpool_id: d.zpool_id, + zpool_health: observed_health.get(&d.zpool_id).copied(), + }) + .collect(); + + // Index parent-forwarded Disk cases from the input — the state copied + // from the parent sitrep. + // Every case is about one physical disk; we derive the disk + // from its facts. Skip (with a warning) any case we can't safely + // interpret. + let parent_cases: BTreeMap = input + .open_cases() + .iter() + .filter(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) + .filter_map(|c| { + let case_id = c.id; + let mut unhealthy_facts: IdOrdMap = + IdOrdMap::new(); + let mut case_disk_id: Option = None; + for fact in c.facts.iter() { + // Every fact on a physical-disk case must carry a + // physical-disk payload. A foreign payload is a data-model + // violation; skip the whole case rather than acting on + // inconsistent state. + let Some(disk_fact) = fact.payload.as_physical_disk() else { + slog::warn!( + &builder.log, + "skipping Disk case: fact payload does not belong to \ + the physical-disk diagnosis engine"; + "case_id" => %case_id, + "fact_id" => %fact.id, + ); + return None; + }; + match disk_fact { + DiskFact::ZpoolUnhealthy(payload) => { + let payload = *payload; + let disk_id = *case_disk_id + .get_or_insert(payload.physical_disk_id); + if disk_id != payload.physical_disk_id { + slog::warn!( + &builder.log, + "skipping Disk case: facts reference \ + different physical disks (1 expected)"; + "case_id" => %case_id, + "expected_physical_disk_id" => %disk_id, + "fact_physical_disk_id" => + %payload.physical_disk_id, + ); + return None; + } + unhealthy_facts + .insert_unique(ZpoolUnhealthyFact { + fact_id: fact.id, + payload, + }) + .expect("fact ids are unique within a case"); + } + } + } + let Some(physical_disk_id) = case_disk_id else { + slog::warn!( + &builder.log, + "skipping Disk case with no facts; cannot derive disk id"; + "case_id" => %case_id, + ); + return None; + }; + Some(( + case_id, + ParentCaseSummary { physical_disk_id, unhealthy_facts }, + )) + }) + .collect(); + + // For each parent case, decide what to do based on its disk's current + // state: + // - disk no longer in service → close the case (expungement) + // - disk's zpool back to Online → close the case (recovery) + // - disk still unhealthy → drop any facts whose recorded health no + // longer matches; the matching loop below will re-add a fresh fact + // - disk in service but absent from inventory → leave alone (absence + // is NOT a recovery signal: sled could be powered off, or + // inventory could be lossy) + for (case_id, summary) in &parent_cases { + let mut case_mut = builder + .cases + .case_mut(case_id) + .expect("case_id came from iterating builder.cases"); + match in_service_health.get(&summary.physical_disk_id) { + None => { + case_mut.close(format!( + "disk {} no longer in service", + summary.physical_disk_id, + )); + } + Some(snap) if snap.zpool_health == Some(ZpoolHealth::Online) => { + case_mut + .close(format!("zpool {} back to Online", snap.zpool_id,)); + } + Some(snap) => { + let Some(current_health) = snap.zpool_health else { + continue; + }; + for fact_ref in summary.unhealthy_facts.iter() { + if fact_ref.payload.last_seen_health != current_health { + case_mut.remove_fact(fact_ref.fact_id); + } + } + } + } + } + + // For each currently-faulty in-service disk: ensure a case exists + // (reusing the parent-forwarded one for this disk if any) and add a + // fresh fact if one with this exact health isn't already present. + for disk in in_service_health.iter() { + let Some(current_health) = disk.zpool_health else { + continue; + }; + if current_health == ZpoolHealth::Online { + continue; + } + + let parent_for_disk = + parent_cases.iter().find_map(|(case_id, summary)| { + if summary.physical_disk_id == disk.physical_disk_id { + Some((*case_id, summary)) + } else { + None + } + }); + + let case_id_for_fact = match parent_for_disk { + // Parent case already has an accurate fact — fully covered. + Some((_, summary)) + if summary + .unhealthy_facts + .iter() + .any(|f| f.payload.last_seen_health == current_health) => + { + continue; + } + // Parent case exists; its stale facts were removed above. + // Refresh under the same case. + Some((case_id, _)) => case_id, + // No parent case for this disk — open one. + None => { + let mut new_case = + builder.cases.open_case(DiagnosisEngineKind::PhysicalDisk); + new_case.set_comment(format!( + "physical disk {} unhealthy", + disk.physical_disk_id, + )); + new_case.id + } + }; + + builder + .cases + .case_mut(&case_id_for_fact) + .expect("case_id came from this fn") + .add_fact( + DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + physical_disk_id: disk.physical_disk_id, + zpool_id: disk.zpool_id, + last_seen_health: current_health, + observed_in_inv: inv_collection_id, + time_observed: inv_time_done, + }), + format!("zpool {} health={current_health}", disk.zpool_id,), + ); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::analysis_input::Input; + use crate::builder::{SitrepBuilder, SitrepBuilderRng}; + use crate::test_util::FmTest; + use chrono::Utc; + use iddqd::IdOrdMap; + use nexus_types::external_api::physical_disk::PhysicalDiskKind; + use nexus_types::fm::{self, Sitrep, SitrepVersion}; + use nexus_types::in_service_disk::InServiceDisk; + use nexus_types::inventory; + use omicron_test_utils::dev; + use omicron_uuid_kinds::{ + OmicronZoneUuid, PhysicalDiskUuid, SitrepUuid, SledUuid, + }; + use std::sync::Arc; + + /// Synthesize a synthetic in-service disk set from a list of zpool IDs. + /// Each zpool gets its own fresh `PhysicalDiskUuid` and dummy identity + /// facts — tests in this module only care about the zpool dimension. + fn mk_in_service( + zpool_ids: impl IntoIterator, + ) -> IdOrdMap { + zpool_ids + .into_iter() + .map(|zpool_id| InServiceDisk { + physical_disk_id: PhysicalDiskUuid::new_v4(), + zpool_id, + sled_id: SledUuid::new_v4(), + vendor: "test-vendor".to_string(), + serial: format!("test-serial-{zpool_id}"), + model: "test-model".to_string(), + variant: PhysicalDiskKind::U2, + }) + .collect() + } + + /// Find the `physical_disk_id` for the given `zpool_id` in the + /// in-service set, or fabricate a fresh one if not present (e.g., when + /// simulating an expunged disk whose case should still reference some + /// stable PhysicalDiskUuid). + fn disk_id_for( + in_service: &IdOrdMap, + zpool_id: ZpoolUuid, + ) -> PhysicalDiskUuid { + in_service + .iter() + .find(|d| d.zpool_id == zpool_id) + .map(|d| d.physical_disk_id) + .unwrap_or_else(PhysicalDiskUuid::new_v4) + } + + /// Make a synthetic test scenario from the example system: returns a + /// `LogContext` (the caller must `cleanup_successful()` it), the + /// example collection, and every zpool ID in that collection. + fn setup( + test_name: &'static str, + ) -> (dev::LogContext, inventory::Collection, Vec) { + let (fm_test, logctx) = FmTest::new_with_logctx(test_name); + // Build the example system once to get a Collection with zpools. + let (example, _bp) = fm_test.system_builder.build(); + let zpool_ids: Vec = example + .collection + .sled_agents + .iter() + .flat_map(|sa| sa.zpools.iter().map(|z| z.id)) + .collect(); + assert!( + !zpool_ids.is_empty(), + "example system should have at least one zpool" + ); + (logctx, example.collection, zpool_ids) + } + + /// Set the zpool with `zpool_id` to `health`, panicking if not found. + fn set_health( + collection: &mut inventory::Collection, + zpool_id: ZpoolUuid, + health: ZpoolHealth, + ) { + for mut sa in collection.sled_agents.iter_mut() { + for z in sa.zpools.iter_mut() { + if z.id == zpool_id { + z.health = health; + return; + } + } + } + panic!("zpool {zpool_id} not found in collection"); + } + + /// Build an `Input` from a collection, an optional parent sitrep, and a + /// pre-built set of in-service disks. + fn build_input( + collection: inventory::Collection, + parent_sitrep: Option, + in_service: IdOrdMap, + ) -> Input { + let parent = parent_sitrep.map(|s| { + Arc::new(( + SitrepVersion { + id: s.id(), + version: 0, + time_made_current: Utc::now(), + }, + s, + )) + }); + let builder = + Input::builder(parent, Arc::new(collection), Arc::new(in_service)) + .expect("input builder should accept fresh inventory"); + let (input, _report) = builder.build(); + input + } + + /// Run `disk::analyze` over an input and return the resulting Sitrep + /// along with the analysis report (whose log entries the close-comment + /// assertions in `closes_*` tests inspect). + fn run_analyze( + log: &slog::Logger, + input: &Input, + ) -> (Sitrep, fm::analysis_reports::AnalysisReport) { + let mut builder = SitrepBuilder::new_with_rng( + log, + input, + SitrepBuilderRng::from_seed("disk-analyze"), + ); + analyze(input, &mut builder).expect("analyze ok"); + builder.build(OmicronZoneUuid::new_v4(), Utc::now()) + } + + fn make_parent_with_disk_case( + parent_sitrep_id: SitrepUuid, + inv_collection_id: omicron_uuid_kinds::CollectionUuid, + physical_disk_id: PhysicalDiskUuid, + zpool_id: ZpoolUuid, + ) -> Sitrep { + let mut cases = iddqd::IdOrdMap::new(); + let case_id = omicron_uuid_kinds::CaseUuid::new_v4(); + let mut facts = iddqd::IdOrdMap::new(); + facts + .insert_unique(fm::case::Fact { + id: omicron_uuid_kinds::FactUuid::new_v4(), + created_sitrep_id: parent_sitrep_id, + payload: DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + physical_disk_id, + zpool_id, + last_seen_health: ZpoolHealth::Degraded, + observed_in_inv: inv_collection_id, + time_observed: Utc::now(), + }) + .into(), + comment: format!("zpool {zpool_id} degraded"), + }) + .unwrap(); + cases + .insert_unique(fm::Case { + id: case_id, + metadata: fm::case::Metadata { + created_sitrep_id: parent_sitrep_id, + closed_sitrep_id: None, + de: DiagnosisEngineKind::PhysicalDisk, + comment: format!("zpool {zpool_id} degraded"), + }, + ereports: Default::default(), + alerts_requested: Default::default(), + support_bundles_requested: Default::default(), + facts, + }) + .unwrap(); + Sitrep { + metadata: fm::SitrepMetadata { + id: parent_sitrep_id, + inv_collection_id, + creator_id: OmicronZoneUuid::new_v4(), + parent_sitrep_id: None, + time_created: Utc::now(), + next_inv_min_time_started: Utc::now(), + comment: String::new(), + }, + cases, + ereports_by_id: Default::default(), + } + } + + /// Helper: collect (case, fact, DiskFact) triples for every fact on a + /// physical-disk case in a sitrep. Optionally filtered to open cases + /// only. + fn disk_facts( + sitrep: &Sitrep, + open_only: bool, + ) -> Vec<(&fm::Case, &fm::case::Fact, DiskFact)> { + sitrep + .cases + .iter() + .filter(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) + .filter(|c| !open_only || c.is_open()) + .flat_map(|c| { + c.facts.iter().filter_map(move |f| { + f.payload.as_physical_disk().map(|d| (c, f, d.clone())) + }) + }) + .collect() + } + + #[test] + fn opens_on_degraded_in_service() { + let (logctx, mut collection, zpools) = + setup("disk_open_degraded_in_service"); + let target = zpools[0]; + set_health(&mut collection, target, ZpoolHealth::Degraded); + let in_service = mk_in_service(zpools.iter().copied()); + let expected_disk_id = disk_id_for(&in_service, target); + let input = build_input(collection, None, in_service); + + let (sitrep, _report) = run_analyze(&logctx.log, &input); + let facts = disk_facts(&sitrep, true); + assert_eq!(facts.len(), 1); + match &facts[0].2 { + DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + physical_disk_id, + zpool_id, + last_seen_health, + .. + }) => { + assert_eq!(*physical_disk_id, expected_disk_id); + assert_eq!(*zpool_id, target); + assert_eq!(*last_seen_health, ZpoolHealth::Degraded); + } + } + logctx.cleanup_successful(); + } + + #[test] + fn skips_degraded_when_expunged() { + let (logctx, mut collection, zpools) = setup("disk_skip_expunged"); + let target = zpools[0]; + set_health(&mut collection, target, ZpoolHealth::Faulted); + // target is *not* in the in-service set. + let in_service = mk_in_service(zpools.iter().copied().skip(1)); + let input = build_input(collection, None, in_service); + + let (sitrep, _report) = run_analyze(&logctx.log, &input); + let cases = disk_facts(&sitrep, false); + assert!( + cases.is_empty(), + "no Disk cases should be opened for expunged zpool, got: {:?}", + cases + ); + logctx.cleanup_successful(); + } + + #[test] + fn idempotent_when_case_already_open() { + let (logctx, mut collection, zpools) = setup("disk_idempotent"); + let target = zpools[0]; + set_health(&mut collection, target, ZpoolHealth::Degraded); + let in_service = mk_in_service(zpools.iter().copied()); + let target_disk_id = disk_id_for(&in_service, target); + let parent_id = SitrepUuid::new_v4(); + let parent = make_parent_with_disk_case( + parent_id, + collection.id, + target_disk_id, + target, + ); + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + let open_cases = disk_facts(&sitrep, true); + assert_eq!(open_cases.len(), 1); + match &open_cases[0].2 { + DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + zpool_id, + .. + }) => { + assert_eq!(*zpool_id, target); + } + } + logctx.cleanup_successful(); + } + + #[test] + fn closes_on_recovery() { + let (logctx, collection, zpools) = setup("disk_close_on_recovery"); + let target = zpools[0]; + // The example system reports zpools as Online by default. + let in_service = mk_in_service(zpools.iter().copied()); + let target_disk_id = disk_id_for(&in_service, target); + let parent_id = SitrepUuid::new_v4(); + let parent = make_parent_with_disk_case( + parent_id, + collection.id, + target_disk_id, + target, + ); + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, report) = run_analyze(&logctx.log, &input); + let all = disk_facts(&sitrep, false); + assert_eq!(all.len(), 1); + assert!( + !all[0].0.is_open(), + "case should be closed when zpool returns to Online", + ); + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("back to Online"), + "close comment should call out the recovery cause, got: \ + {report_str}", + ); + logctx.cleanup_successful(); + } + + #[test] + fn closes_on_expungement() { + let (logctx, mut collection, zpools) = + setup("disk_close_on_expungement"); + let target = zpools[0]; + set_health(&mut collection, target, ZpoolHealth::Degraded); + // Target is NOT in-service in this sitrep (just expunged). + let in_service = mk_in_service(zpools.iter().copied().skip(1)); + // Target isn't in the in-service set; fabricate a stable PhysicalDiskUuid. + let target_disk_id = disk_id_for(&in_service, target); + let parent_id = SitrepUuid::new_v4(); + let parent = make_parent_with_disk_case( + parent_id, + collection.id, + target_disk_id, + target, + ); + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, report) = run_analyze(&logctx.log, &input); + let all = disk_facts(&sitrep, false); + assert_eq!(all.len(), 1); + assert!( + !all[0].0.is_open(), + "case should be closed when zpool's disk is expunged", + ); + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("no longer in service"), + "close comment should call out the expungement cause, got: \ + {report_str}", + ); + logctx.cleanup_successful(); + } + + #[test] + fn keeps_open_on_absence_from_inventory() { + // A zpool the case is about does NOT appear in the inventory at all + // (sled powered off, lossy collection, etc.). The case should stay + // open: absence is not a recovery signal. + let (logctx, collection, zpools) = setup("disk_keep_open_on_absence"); + let phantom = ZpoolUuid::new_v4(); + assert!(!zpools.contains(&phantom)); + let in_service = mk_in_service(zpools.iter().copied().chain([phantom])); + let phantom_disk_id = disk_id_for(&in_service, phantom); + let parent_id = SitrepUuid::new_v4(); + let parent = make_parent_with_disk_case( + parent_id, + collection.id, + phantom_disk_id, + phantom, + ); + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + let all = disk_facts(&sitrep, false); + assert_eq!(all.len(), 1); + assert!( + all[0].0.is_open(), + "case should remain open when its zpool is absent from the \ + current inventory collection (sled could be down or inventory \ + is lossy)", + ); + logctx.cleanup_successful(); + } + + /// A parent Disk case with zero facts has no derivable disk ID, so + /// the diagnosis engine leaves it alone (carried forward unchanged). + #[test] + fn empty_case_is_left_open() { + let (logctx, collection, _zpools) = setup("disk_empty_case_left_open"); + let in_service = mk_in_service(std::iter::empty()); + + let parent_sitrep_id = SitrepUuid::new_v4(); + let empty_case_id = omicron_uuid_kinds::CaseUuid::new_v4(); + let mut parent_cases = iddqd::IdOrdMap::new(); + parent_cases + .insert_unique(fm::Case { + id: empty_case_id, + metadata: fm::case::Metadata { + created_sitrep_id: parent_sitrep_id, + closed_sitrep_id: None, + de: DiagnosisEngineKind::PhysicalDisk, + comment: "an open case with no facts".to_string(), + }, + ereports: Default::default(), + alerts_requested: Default::default(), + support_bundles_requested: Default::default(), + facts: Default::default(), + }) + .unwrap(); + let parent = Sitrep { + metadata: fm::SitrepMetadata { + id: parent_sitrep_id, + inv_collection_id: collection.id, + creator_id: OmicronZoneUuid::new_v4(), + parent_sitrep_id: None, + time_created: Utc::now(), + next_inv_min_time_started: Utc::now(), + comment: String::new(), + }, + cases: parent_cases, + ereports_by_id: Default::default(), + }; + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + + let case = sitrep + .cases + .iter() + .find(|c| c.id == empty_case_id) + .expect("empty case should still be in the output sitrep"); + assert!( + case.is_open(), + "empty case should be left open (no disk to verify)", + ); + logctx.cleanup_successful(); + } + + /// When the parent sitrep's fact content matches the diagnosis engine's current + /// observation, the fact carries forward with the same UUID — no + /// remove-and-readd churn. + #[test] + fn fact_uuid_stable_when_observation_unchanged() { + let (logctx, mut collection, zpools) = setup("disk_fact_uuid_stable"); + let target = zpools[0]; + set_health(&mut collection, target, ZpoolHealth::Degraded); + let in_service = mk_in_service(zpools.iter().copied()); + let target_disk_id = disk_id_for(&in_service, target); + let parent_id = SitrepUuid::new_v4(); + let parent = make_parent_with_disk_case( + parent_id, + collection.id, + target_disk_id, + target, + ); + // Capture the parent's fact UUID for the target zpool. + let parent_fact_id = parent + .cases + .iter() + .find(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) + .expect("parent should have one Disk case") + .facts + .iter() + .next() + .expect("parent case should have one fact") + .id; + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + let open = disk_facts(&sitrep, true); + assert_eq!(open.len(), 1, "expected exactly one open Disk fact"); + assert_eq!( + open[0].1.id, parent_fact_id, + "fact UUID should be stable across sitreps when the \ + observation hasn't changed", + ); + match &open[0].2 { + DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + zpool_id, + last_seen_health, + .. + }) => { + assert_eq!(*zpool_id, target); + assert_eq!(*last_seen_health, ZpoolHealth::Degraded); + } + } + logctx.cleanup_successful(); + } + + /// When the parent's fact recorded a different `last_seen_health` than + /// what we observe now, the diagnosis engine removes the stale fact and emits + /// a fresh one (new UUID). The case stays open because the zpool is + /// still unhealthy — just with a different value. + #[test] + fn fact_uuid_rotates_when_observation_changes() { + let (logctx, mut collection, zpools) = setup("disk_fact_uuid_rotates"); + let target = zpools[0]; + // Parent recorded Degraded; current inventory shows Faulted. + set_health(&mut collection, target, ZpoolHealth::Faulted); + let in_service = mk_in_service(zpools.iter().copied()); + let target_disk_id = disk_id_for(&in_service, target); + let parent_id = SitrepUuid::new_v4(); + let parent = make_parent_with_disk_case( + parent_id, + collection.id, + target_disk_id, + target, + ); + let parent_fact_id = parent + .cases + .iter() + .find(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) + .expect("parent should have one Disk case") + .facts + .iter() + .next() + .expect("parent case should have one fact") + .id; + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + let open = disk_facts(&sitrep, true); + assert_eq!( + open.len(), + 1, + "expected exactly one open Disk fact (the refreshed one)", + ); + assert_ne!( + open[0].1.id, parent_fact_id, + "fact UUID should rotate because last_seen_health changed", + ); + match &open[0].2 { + DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + zpool_id, + last_seen_health, + .. + }) => { + assert_eq!(*zpool_id, target); + assert_eq!(*last_seen_health, ZpoolHealth::Faulted); + } + } + // The case itself should still be the same one that was carried + // forward — only the fact rotated. + assert!(open[0].0.is_open()); + logctx.cleanup_successful(); + } +} diff --git a/nexus/src/app/background/tasks/fm_analysis.rs b/nexus/src/app/background/tasks/fm_analysis.rs index 10173be4021..17cb3fddd48 100644 --- a/nexus/src/app/background/tasks/fm_analysis.rs +++ b/nexus/src/app/background/tasks/fm_analysis.rs @@ -9,11 +9,15 @@ use anyhow::Context; use chrono::Utc; use fm::analysis_input::InvalidInputs; use futures::future::BoxFuture; +use iddqd::IdOrdMap; +use nexus_db_model::PhysicalDiskPolicy; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_db_queries::db::datastore; +use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::pagination::Paginator; use nexus_fm as fm; +use nexus_types::in_service_disk::InServiceDisk; use nexus_types::internal_api::background::FmAnalysisStatus; use nexus_types::internal_api::background::fm_analysis as status; use nexus_types::inventory; @@ -217,8 +221,48 @@ impl FmAnalysis { (fm::analysis_input::Input, status::PreparationStatus), PreparationError, > { - let mut builder = - fm::analysis_input::Input::builder(parent_sitrep, inv)?; + // Load all external (U.2) zpools and project them down to FM's + // `InServiceDisk` view, filtering on `disk_policy = in_service` and a + // live (non-soft-deleted) physical_disk row. M.2 disks are not + // represented as control plane disks today, so the U.2-only filter + // on the underlying query matches reality. + // + // This is the executed view from the DB — flipped only after sagas / + // cleaners have actually drained resources, not while a planner is + // merely proposing changes. A faulty disk a planner proposes to + // expunge is still the diagnoser's concern until the control plane + // has actually moved on. + let zpools_and_disks = self + .datastore + .zpool_list_all_external_batched(opctx) + .await + .context("failed to load in-service control plane disks")?; + let mut in_service_disks_map = IdOrdMap::new(); + for (zpool, disk) in zpools_and_disks { + if disk.disk_policy != PhysicalDiskPolicy::InService { + continue; + } + in_service_disks_map + .insert_unique(InServiceDisk { + physical_disk_id: disk.id(), + zpool_id: zpool.id(), + sled_id: disk.sled_id.into(), + vendor: disk.vendor, + serial: disk.serial, + model: disk.model, + variant: disk.variant.into(), + }) + .expect( + "physical_disk.id is a primary key, so duplicates are \ + impossible", + ); + } + let in_service_disks = Arc::new(in_service_disks_map); + let mut builder = fm::analysis_input::Input::builder( + parent_sitrep, + inv, + in_service_disks, + )?; let mut errors = Vec::new(); self.load_new_ereports(opctx, &mut builder, &mut errors) .await diff --git a/nexus/src/app/background/tasks/fm_rendezvous.rs b/nexus/src/app/background/tasks/fm_rendezvous.rs index ef76c1db310..0e2696f4778 100644 --- a/nexus/src/app/background/tasks/fm_rendezvous.rs +++ b/nexus/src/app/background/tasks/fm_rendezvous.rs @@ -552,6 +552,7 @@ mod tests { alerts_requested: iddqd::IdOrdMap::new(), ereports: iddqd::IdOrdMap::new(), support_bundles_requested: iddqd::IdOrdMap::new(), + facts: iddqd::IdOrdMap::new(), }; case1 .alerts_requested @@ -631,6 +632,7 @@ mod tests { alerts_requested: iddqd::IdOrdMap::new(), ereports: iddqd::IdOrdMap::new(), support_bundles_requested: iddqd::IdOrdMap::new(), + facts: iddqd::IdOrdMap::new(), }; case2 .alerts_requested @@ -933,6 +935,7 @@ mod tests { ereports, alerts_requested: iddqd::IdOrdMap::new(), support_bundles_requested: iddqd::IdOrdMap::new(), + facts: iddqd::IdOrdMap::new(), } }; @@ -1145,6 +1148,7 @@ mod tests { ereports, alerts_requested: iddqd::IdOrdMap::new(), support_bundles_requested: iddqd::IdOrdMap::new(), + facts: iddqd::IdOrdMap::new(), } }; @@ -1254,6 +1258,7 @@ mod tests { ereports, alerts_requested: iddqd::IdOrdMap::new(), support_bundles_requested: iddqd::IdOrdMap::new(), + facts: iddqd::IdOrdMap::new(), } }; @@ -1449,6 +1454,7 @@ mod tests { alerts_requested: iddqd::IdOrdMap::new(), ereports: iddqd::IdOrdMap::new(), support_bundles_requested: iddqd::IdOrdMap::new(), + facts: iddqd::IdOrdMap::new(), }; case1 .support_bundles_requested @@ -1625,6 +1631,7 @@ mod tests { alerts_requested: iddqd::IdOrdMap::new(), ereports: iddqd::IdOrdMap::new(), support_bundles_requested: iddqd::IdOrdMap::new(), + facts: iddqd::IdOrdMap::new(), }; case.support_bundles_requested .insert_unique(fm::case::SupportBundleRequest { diff --git a/nexus/src/app/background/tasks/fm_sitrep_load.rs b/nexus/src/app/background/tasks/fm_sitrep_load.rs index a6afdcb93ee..b2dca967abd 100644 --- a/nexus/src/app/background/tasks/fm_sitrep_load.rs +++ b/nexus/src/app/background/tasks/fm_sitrep_load.rs @@ -10,7 +10,6 @@ use chrono::Utc; use futures::future::BoxFuture; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; -use nexus_types::fm::Sitrep; use nexus_types::fm::SitrepVersion; use nexus_types::internal_api::background::SitrepLoadStatus as Status; use serde_json::json; @@ -23,7 +22,7 @@ pub struct SitrepLoader { tx: watch::Sender>, } -pub type CurrentSitrep = Arc<(SitrepVersion, Sitrep)>; +pub use nexus_types::fm::CurrentSitrep; impl BackgroundTask for SitrepLoader { fn activate<'a>( @@ -192,6 +191,7 @@ mod test { use super::*; use crate::app::background::BackgroundTask; use nexus_db_queries::db::pub_test_utils::TestDatabase; + use nexus_types::fm::Sitrep; use nexus_types::fm::SitrepMetadata; use omicron_test_utils::dev; use omicron_uuid_kinds::CollectionUuid; diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 00f8dc27544..716a00b36de 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -306,7 +306,6 @@ pub struct Nexus { repo_depot_resolver: Box, /// Watch channel containing the currently-loaded fault management sitrep. - #[allow(dead_code)] sitrep_load_rx: watch::Receiver>, /// handle to pull update status data @@ -1129,6 +1128,14 @@ impl Nexus { &self.db_datastore } + pub(crate) fn sitrep_load_rx( + &self, + ) -> watch::Receiver> { + let mut rx = self.sitrep_load_rx.clone(); + rx.mark_unchanged(); + rx + } + pub(crate) fn samael_max_issue_delay(&self) -> Option { let mid = self.samael_max_issue_delay.lock().unwrap(); *mid diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 1774bf35b28..e82ba37c2a6 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -532,6 +532,12 @@ impl nexus_test_interface::NexusServer for Server { self.apictx.context.nexus.inventory_load_rx() } + fn sitrep_load_rx( + &self, + ) -> watch::Receiver> { + self.apictx.context.nexus.sitrep_load_rx() + } + fn get_http_server_external_address(&self) -> SocketAddr { self.apictx.context.nexus.get_external_server_address().unwrap() } diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs index 35f726740e1..53d254b44ca 100644 --- a/nexus/test-interface/src/lib.rs +++ b/nexus/test-interface/src/lib.rs @@ -88,6 +88,10 @@ pub trait NexusServer: Send + Sync + 'static { fn inventory_load_rx(&self) -> watch::Receiver>>; + fn sitrep_load_rx( + &self, + ) -> watch::Receiver>; + fn get_http_server_external_address(&self) -> SocketAddr; fn get_http_server_techport_address(&self) -> SocketAddr; fn get_http_server_internal_address(&self) -> SocketAddr; diff --git a/nexus/test-utils/src/nexus_test.rs b/nexus/test-utils/src/nexus_test.rs index 693aea88732..745ea7b31bc 100644 --- a/nexus/test-utils/src/nexus_test.rs +++ b/nexus/test-utils/src/nexus_test.rs @@ -200,6 +200,38 @@ impl ControlPlaneTestContext { } } + /// Wait until at least one fault management sitrep has been committed and + /// loaded. + /// + /// # Panics + /// + /// Panics if no sitrep is loaded within `timeout`. + pub async fn wait_for_at_least_one_sitrep(&self, timeout: Duration) { + let mut sitrep_rx = self.server.sitrep_load_rx(); + + match wait_for_watch_channel_condition( + &mut sitrep_rx, + async |sitrep| { + if sitrep.is_some() { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + timeout, + ) + .await + { + Ok(()) => (), + Err(poll::Error::TimedOut(elapsed)) => { + panic!("no sitrep found within {elapsed:?}"); + } + Err(poll::Error::PermanentError(())) => { + unreachable!("check can only fail via timeout") + } + } + } + pub fn internal_client(&self) -> nexus_client::Client { nexus_client::Client::new( &format!("http://{}", self.internal_client.bind_address), diff --git a/nexus/types/output/analysis_input_report_empty.out b/nexus/types/output/analysis_input_report_empty.out index f221b3960a0..a479e87d36c 100644 --- a/nexus/types/output/analysis_input_report_empty.out +++ b/nexus/types/output/analysis_input_report_empty.out @@ -4,3 +4,5 @@ parent sitrep: inventory collection: bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb no new ereports since the parent sitrep no cases copied forward + +no in-service control plane disks diff --git a/nexus/types/output/analysis_input_report_same_inv.out b/nexus/types/output/analysis_input_report_same_inv.out index 5ed6b60d151..25631755f78 100644 --- a/nexus/types/output/analysis_input_report_same_inv.out +++ b/nexus/types/output/analysis_input_report_same_inv.out @@ -5,3 +5,5 @@ inventory collection: bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb --> same collection as parent sitrep no new ereports since the parent sitrep no cases copied forward + +no in-service control plane disks diff --git a/nexus/types/output/analysis_input_report_with_cases.out b/nexus/types/output/analysis_input_report_with_cases.out index f3af0fe55ff..a1fc877ef72 100644 --- a/nexus/types/output/analysis_input_report_with_cases.out +++ b/nexus/types/output/analysis_input_report_with_cases.out @@ -22,3 +22,5 @@ cases (2 total): closed in sitrep: 55555555-5555-5555-5555-555555555555 copied forwards because these ereports haven't been marked seen yet: * ereport dddddddd-dddd-dddd-dddd-dddddddddddd:2 + +no in-service control plane disks diff --git a/nexus/types/src/fm.rs b/nexus/types/src/fm.rs index fd2957704aa..1c4dba06532 100644 --- a/nexus/types/src/fm.rs +++ b/nexus/types/src/fm.rs @@ -12,6 +12,8 @@ pub mod ereport; pub use ereport::{Ereport, EreportId}; pub mod case; pub use case::Case; +pub mod fact; +pub use fact::{DiskFact, FactPayload, ZpoolUnhealthyFactPayload}; pub(crate) mod json_display; use case::AlertRequest; @@ -202,6 +204,10 @@ pub struct SitrepVersion { pub time_made_current: DateTime, } +/// The current sitrep paired with its [`SitrepVersion`] metadata, wrapped in an +/// `Arc` so it can be shared cheaply (e.g., across `watch` channels). +pub type CurrentSitrep = Arc<(SitrepVersion, Sitrep)>; + #[derive( Copy, Clone, @@ -217,4 +223,5 @@ pub struct SitrepVersion { #[strum(serialize_all = "snake_case")] pub enum DiagnosisEngineKind { PowerShelf, + PhysicalDisk, } diff --git a/nexus/types/src/fm/analysis_reports.rs b/nexus/types/src/fm/analysis_reports.rs index 249e26f1b8b..42b6c6537a4 100644 --- a/nexus/types/src/fm/analysis_reports.rs +++ b/nexus/types/src/fm/analysis_reports.rs @@ -9,7 +9,9 @@ use super::case; use super::ereport::EreportId; use super::json_display::fmt_json_value; use iddqd::IdOrdMap; -use omicron_uuid_kinds::{CaseUuid, CollectionUuid, SitrepUuid}; +use omicron_uuid_kinds::{ + CaseUuid, CollectionUuid, PhysicalDiskUuid, SitrepUuid, +}; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -227,6 +229,9 @@ pub struct InputReport { /// Cases which have closed, but which have been copied forwards as they /// contain ereports which have not yet been marked seen. pub closed_cases_copied_forward: BTreeMap, + /// All control-plane-managed physical disks visible to the diagnosis + /// engines for this analysis pass. + pub in_service_disks: BTreeSet, } #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] @@ -257,6 +262,7 @@ impl fmt::Display for InputReportMultilineDisplay<'_> { new_ereport_ids, open_cases, closed_cases_copied_forward, + in_service_disks, }, indent, } = self; @@ -357,6 +363,21 @@ impl fmt::Display for InputReportMultilineDisplay<'_> { writeln!(f, "{:indent$}no cases copied forward", "")?; } + if in_service_disks.is_empty() { + writeln!(f, "\n{:indent$}no in-service control plane disks", "")?; + } else { + writeln!( + f, + "\n{:indent$}in-service control plane disks ({} total):", + "", + in_service_disks.len() + )?; + let indent = indent + 2; + for disk_id in in_service_disks { + writeln!(f, "{:indent$}* disk {disk_id}", "")?; + } + } + Ok(()) } } @@ -441,6 +462,7 @@ mod tests { new_ereport_ids, open_cases, closed_cases_copied_forward, + in_service_disks: BTreeSet::new(), } } @@ -456,6 +478,7 @@ mod tests { new_ereport_ids: BTreeSet::new(), open_cases: BTreeMap::new(), closed_cases_copied_forward: BTreeMap::new(), + in_service_disks: BTreeSet::new(), } } @@ -474,6 +497,7 @@ mod tests { new_ereport_ids: BTreeSet::new(), open_cases: BTreeMap::new(), closed_cases_copied_forward: BTreeMap::new(), + in_service_disks: BTreeSet::new(), } } diff --git a/nexus/types/src/fm/case.rs b/nexus/types/src/fm/case.rs index 03e930e4d64..b6b8576e68f 100644 --- a/nexus/types/src/fm/case.rs +++ b/nexus/types/src/fm/case.rs @@ -2,14 +2,17 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use super::json_display::fmt_json_value; use crate::alert::AlertClass; use crate::fm::DiagnosisEngineKind; use crate::fm::Ereport; use crate::fm::EreportId; +use crate::fm::FactPayload; use crate::support_bundle::BundleDataSelection; use iddqd::{IdOrdItem, IdOrdMap}; use omicron_uuid_kinds::{ - AlertUuid, CaseEreportUuid, CaseUuid, SitrepUuid, SupportBundleUuid, + AlertUuid, CaseEreportUuid, CaseUuid, FactUuid, SitrepUuid, + SupportBundleUuid, }; use serde::{Deserialize, Serialize}; use std::fmt; @@ -24,6 +27,9 @@ pub struct Case { pub ereports: IdOrdMap, pub alerts_requested: IdOrdMap, pub support_bundles_requested: IdOrdMap, + /// Diagnosis-engine-derived facts attached to this case. See + /// [`Fact`] for semantics. + pub facts: IdOrdMap, } impl Case { @@ -159,6 +165,84 @@ impl CaseEreport { } } +/// A diagnosis-engine-derived fact attached to a [`Case`]. +/// +/// Facts are **immutable**: to "update" a fact, the diagnosis engine +/// removes the old one and adds a fresh one. As long as a fact's content +/// matches the engine's current view, the same fact is carried forward +/// across sitreps unchanged. +/// +/// The `payload` is a fully-typed [`FactPayload`] whose variant is owned by +/// the case's diagnosis engine (see [`Metadata::de`]). +/// +/// `Eq`/`PartialEq` derive over all fields, including the typed payload. +/// This is the equality the DB round-trip test needs. +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct Fact { + pub id: FactUuid, + /// The sitrep in which this fact was first added. Preserved + /// unchanged when the fact is carried forward into a child sitrep. + /// Debug-only. + pub created_sitrep_id: SitrepUuid, + pub payload: FactPayload, + pub comment: String, +} + +impl IdOrdItem for Fact { + type Key<'a> = &'a FactUuid; + fn key(&self) -> Self::Key<'_> { + &self.id + } + iddqd::id_upcast!(); +} + +impl Fact { + pub fn display_multiline( + &self, + indent: usize, + sitrep_id: Option, + ) -> impl fmt::Display + '_ { + struct DisplayFact<'a> { + fact: &'a Fact, + indent: usize, + sitrep_id: Option, + } + + impl fmt::Display for DisplayFact<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + const BULLET: &str = "* "; + const ADDED_IN: &str = "added in:"; + const COMMENT: &str = "comment:"; + const WIDTH: usize = const_max_len(&[ADDED_IN, COMMENT]); + + let &Self { + fact: Fact { id, created_sitrep_id, payload, comment }, + indent, + sitrep_id, + } = self; + let this_sitrep = |s| { + if Some(s) == sitrep_id { " <-- this sitrep" } else { "" } + }; + + writeln!(f, "{BULLET:>indent$}fact {id}")?; + writeln!( + f, + "{:>indent$}{ADDED_IN:indent$}{COMMENT: { ereports, alerts_requested, support_bundles_requested, + facts, }, indent, sitrep_id, @@ -286,6 +371,16 @@ impl fmt::Display for DisplayCase<'_> { } } + if !facts.is_empty() { + writeln!(f, "\n{:>indent$}facts:", "")?; + writeln!(f, "{:>indent$}------", "")?; + + let indent = indent + 2; + for fact in facts.iter() { + fact.display_multiline(indent, sitrep_id).fmt(f)?; + } + } + if !alerts_requested.is_empty() { writeln!(f, "\n{:>indent$}alerts requested:", "")?; writeln!(f, "{:>indent$}-----------------", "")?; @@ -373,12 +468,14 @@ mod tests { use super::*; use crate::fm::DiagnosisEngineKind; use crate::fm::ereport::EreportFilters; - use crate::inventory::SpType; + use crate::fm::{DiskFact, ZpoolUnhealthyFactPayload}; + use crate::inventory::{SpType, ZpoolHealth}; use crate::support_bundle::BundleDataSelection; use ereport_types::{Ena, EreportId}; use omicron_uuid_kinds::{ - AlertUuid, CaseUuid, EreporterRestartUuid, OmicronZoneUuid, SitrepUuid, - SupportBundleUuid, + AlertUuid, CaseUuid, CollectionUuid, EreporterRestartUuid, FactUuid, + OmicronZoneUuid, PhysicalDiskUuid, SitrepUuid, SupportBundleUuid, + ZpoolUuid, }; use std::str::FromStr; use std::sync::Arc; @@ -514,6 +611,34 @@ mod tests { }) .unwrap(); + let mut facts = IdOrdMap::new(); + facts + .insert_unique(Fact { + id: FactUuid::from_str("f00f00f0-0f00-4f00-8f00-f00f00f00f00") + .unwrap(), + created_sitrep_id, + payload: FactPayload::PhysicalDisk(DiskFact::ZpoolUnhealthy( + ZpoolUnhealthyFactPayload { + physical_disk_id: PhysicalDiskUuid::from_str( + "d15d15d1-5d15-4d15-8d15-d15d15d15d15", + ) + .unwrap(), + zpool_id: ZpoolUuid::from_str( + "200100f0-0100-4f00-8f00-f00f00f00f00", + ) + .unwrap(), + last_seen_health: ZpoolHealth::Degraded, + observed_in_inv: CollectionUuid::from_str( + "c0110011-c011-4011-8011-c0110011c011", + ) + .unwrap(), + time_observed: chrono::DateTime::::MIN_UTC, + }, + )), + comment: "made-up fact for display test".to_string(), + }) + .unwrap(); + // Create the case let case = Case { id: case_id, @@ -527,6 +652,7 @@ mod tests { ereports, alerts_requested, support_bundles_requested, + facts, }; eprintln!("example case display:"); diff --git a/nexus/types/src/fm/fact.rs b/nexus/types/src/fm/fact.rs new file mode 100644 index 00000000000..ae270e86b5f --- /dev/null +++ b/nexus/types/src/fm/fact.rs @@ -0,0 +1,77 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Typed payloads of diagnosis-engine-derived [`Fact`]s. +//! +//! Each diagnosis engine owns one variant of [`FactPayload`]. A [`Case`] +//! belongs to exactly one diagnosis engine (see [`Metadata::de`]), so every +//! fact on a case carries that engine's variant. +//! +//! [`Fact`]: super::case::Fact +//! [`Case`]: super::case::Case +//! [`Metadata::de`]: super::case::Metadata::de + +use crate::inventory::ZpoolHealth; +use chrono::{DateTime, Utc}; +use omicron_uuid_kinds::{CollectionUuid, PhysicalDiskUuid, ZpoolUuid}; +use serde::{Deserialize, Serialize}; + +/// The typed payload of a [`Fact`](super::case::Fact). +/// +/// One variant per diagnosis engine. The variant a fact carries always +/// matches its case's [`de`](super::case::Metadata::de); other engines and +/// shared FM code must not interpret another engine's variant. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[serde(tag = "engine", rename_all = "snake_case")] +pub enum FactPayload { + /// A fact owned by the physical-disk diagnosis engine. + PhysicalDisk(DiskFact), +} + +impl From for FactPayload { + fn from(fact: DiskFact) -> Self { + FactPayload::PhysicalDisk(fact) + } +} + +impl FactPayload { + /// The physical-disk payload, or `None` if this fact belongs to a + /// different diagnosis engine. + pub fn as_physical_disk(&self) -> Option<&DiskFact> { + #[allow(unreachable_patterns)] + match self { + FactPayload::PhysicalDisk(fact) => Some(fact), + _ => None, + } + } +} + +/// Per-fact state for the physical-disk diagnosis engine. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum DiskFact { + /// The zpool's most recently observed health is non-`Online`. + ZpoolUnhealthy(ZpoolUnhealthyFactPayload), +} + +/// Payload of a [`DiskFact::ZpoolUnhealthy`] fact. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct ZpoolUnhealthyFactPayload { + /// The physical disk this fact (and its parent case) is about. + /// Every fact on a Disk case must agree on this value. + pub physical_disk_id: PhysicalDiskUuid, + /// The zpool whose health was observed. Kept for provenance — the + /// case is keyed by `physical_disk_id`, but knowing the exact zpool + /// makes the fact self-describing when read in isolation. + pub zpool_id: ZpoolUuid, + pub last_seen_health: ZpoolHealth, + /// Inventory collection that produced this observation. Recorded for + /// provenance only: the diagnosis engine never looks this collection + /// back up (it may well have been GC'd by the time anyone reads the + /// fact). If multiple `ZpoolUnhealthy` facts ever end up on the same + /// case, this lets a human reader see which inventory each came from. + pub observed_in_inv: CollectionUuid, + /// `time_done` of `observed_in_inv`. + pub time_observed: DateTime, +} diff --git a/nexus/types/src/in_service_disk.rs b/nexus/types/src/in_service_disk.rs new file mode 100644 index 00000000000..bc7814ce2cb --- /dev/null +++ b/nexus/types/src/in_service_disk.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! "Currently in-service control-plane disks" — the executed view from +//! the `physical_disk` and `zpool` DB tables. +//! +//! This is distinct from the planned view in `BlueprintPhysicalDiskConfig`: +//! a disk is in this set only after the control plane has actually committed +//! to managing it (`physical_disk.disk_policy = 'in_service'`), not while a +//! planner is merely proposing to expunge or adopt it. Consumers that need +//! the *committed* view of which disks are part of the rack — fault +//! management diagnosers in particular — should read this rather than the +//! target blueprint. + +use crate::external_api::physical_disk::PhysicalDiskKind; +use iddqd::{IdOrdItem, id_upcast}; +use omicron_uuid_kinds::PhysicalDiskUuid; +use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; + +/// One control-plane-managed physical disk, joined with its zpool and the +/// stable identity facts the DB already knows about it. +#[derive(Clone, Debug, PartialEq)] +pub struct InServiceDisk { + pub physical_disk_id: PhysicalDiskUuid, + pub zpool_id: ZpoolUuid, + pub sled_id: SledUuid, + pub vendor: String, + pub serial: String, + pub model: String, + pub variant: PhysicalDiskKind, +} + +impl IdOrdItem for InServiceDisk { + type Key<'a> = PhysicalDiskUuid; + fn key(&self) -> Self::Key<'_> { + self.physical_disk_id + } + id_upcast!(); +} diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 9fccee9ef5f..2370cdd3ce1 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -45,7 +45,7 @@ use sled_agent_types_versions::latest::inventory::SingleMeasurementInventory; use sled_agent_types_versions::latest::inventory::SledCpuFamily; use sled_agent_types_versions::latest::inventory::SledRole; use sled_agent_types_versions::latest::inventory::SvcsEnabledNotOnlineResult; -use sled_agent_types_versions::latest::inventory::ZpoolHealth; +pub use sled_agent_types_versions::latest::inventory::ZpoolHealth; use sled_hardware_types::BaseboardId; use std::collections::BTreeMap; use std::collections::BTreeSet; diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index 36e945f58eb..9a5eeb77c06 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -35,6 +35,7 @@ pub mod deployment; pub mod external_api; pub mod fm; pub mod identity; +pub mod in_service_disk; pub mod instance; pub mod internal_api; pub mod inventory; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 308379c85c5..939b744188e 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7620,7 +7620,8 @@ ON omicron.public.fm_sitrep_history (sitrep_id); CREATE TYPE IF NOT EXISTS omicron.public.diagnosis_engine AS ENUM ( - 'power_shelf' + 'power_shelf', + 'physical_disk' ); CREATE TABLE IF NOT EXISTS omicron.public.fm_case ( @@ -7647,6 +7648,63 @@ CREATE INDEX IF NOT EXISTS lookup_fm_cases_for_sitrep ON omicron.public.fm_case (sitrep_id); +-- Per-engine "facts" attached to a case. Each diagnosis engine persists its +-- facts in its own table (one table per engine), with a fact's content +-- represented as typed columns. The `fm_fact_physical_disk` table below holds +-- the physical-disk engine's facts. +CREATE TYPE IF NOT EXISTS omicron.public.fm_fact_physical_disk_kind AS ENUM ( + 'zpool_unhealthy' +); + +CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( + -- Stable UUID for this fact across sitreps. + id UUID NOT NULL, + -- Sitrep this row belongs to. + sitrep_id UUID NOT NULL, + -- UUID of the case this fact attaches to. + case_id UUID NOT NULL, + -- UUID of the sitrep in which this fact was first added. Preserved + -- unchanged when the fact is carried forward into a child sitrep, so + -- this can be used to tell at a glance how long a fact has been + -- attached to its case. Debug-only. + created_sitrep_id UUID NOT NULL, + -- Free-form, debug-only comment. + comment TEXT NOT NULL, + + -- Which physical-disk fact this row represents. The columns below are + -- populated according to this discriminant (see the CHECK constraint). + kind omicron.public.fm_fact_physical_disk_kind NOT NULL, + + -- Columns for a 'zpool_unhealthy' fact. NULL for any other kind. + physical_disk_id UUID, + zpool_id UUID, + last_seen_health omicron.public.inv_zpool_health, + observed_in_inv UUID, + time_observed TIMESTAMPTZ, + + PRIMARY KEY (sitrep_id, id), + + CONSTRAINT zpool_unhealthy_columns_present CHECK ( + (kind = 'zpool_unhealthy' + AND physical_disk_id IS NOT NULL + AND zpool_id IS NOT NULL + AND last_seen_health IS NOT NULL + AND observed_in_inv IS NOT NULL + AND time_observed IS NOT NULL) + OR + (kind != 'zpool_unhealthy' + AND physical_disk_id IS NULL + AND zpool_id IS NULL + AND last_seen_health IS NULL + AND observed_in_inv IS NULL + AND time_observed IS NULL) + ) +); + +CREATE INDEX IF NOT EXISTS + lookup_fm_fact_physical_disk_for_case +ON omicron.public.fm_fact_physical_disk (sitrep_id, case_id); + CREATE TABLE IF NOT EXISTS omicron.public.fm_ereport_in_case ( -- ID of this association. When an ereport is assigned to a case, that -- association is assigned a UUID. These are used primarily to aid in @@ -8623,7 +8681,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '261.0.0', NULL) + (TRUE, NOW(), NOW(), '262.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/fm-disk-de-and-facts/up1.sql b/schema/crdb/fm-disk-de-and-facts/up1.sql new file mode 100644 index 00000000000..602e91c8175 --- /dev/null +++ b/schema/crdb/fm-disk-de-and-facts/up1.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.diagnosis_engine ADD VALUE IF NOT EXISTS 'physical_disk'; diff --git a/schema/crdb/fm-disk-de-and-facts/up2.sql b/schema/crdb/fm-disk-de-and-facts/up2.sql new file mode 100644 index 00000000000..3aad0937d4c --- /dev/null +++ b/schema/crdb/fm-disk-de-and-facts/up2.sql @@ -0,0 +1,3 @@ +CREATE TYPE IF NOT EXISTS omicron.public.fm_fact_physical_disk_kind AS ENUM ( + 'zpool_unhealthy' +); diff --git a/schema/crdb/fm-disk-de-and-facts/up3.sql b/schema/crdb/fm-disk-de-and-facts/up3.sql new file mode 100644 index 00000000000..c095cf8d564 --- /dev/null +++ b/schema/crdb/fm-disk-de-and-facts/up3.sql @@ -0,0 +1,44 @@ +CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( + -- Stable UUID for this fact across sitreps. + id UUID NOT NULL, + -- Sitrep this row belongs to. + sitrep_id UUID NOT NULL, + -- UUID of the case this fact attaches to. + case_id UUID NOT NULL, + -- UUID of the sitrep in which this fact was first added. Preserved + -- unchanged when the fact is carried forward into a child sitrep, so + -- this can be used to tell at a glance how long a fact has been + -- attached to its case. Debug-only. + created_sitrep_id UUID NOT NULL, + -- Free-form, debug-only comment. + comment TEXT NOT NULL, + + -- Which physical-disk fact this row represents. The columns below are + -- populated according to this discriminant (see the CHECK constraint). + kind omicron.public.fm_fact_physical_disk_kind NOT NULL, + + -- Columns for a 'zpool_unhealthy' fact. NULL for any other kind. + physical_disk_id UUID, + zpool_id UUID, + last_seen_health omicron.public.inv_zpool_health, + observed_in_inv UUID, + time_observed TIMESTAMPTZ, + + PRIMARY KEY (sitrep_id, id), + + CONSTRAINT zpool_unhealthy_columns_present CHECK ( + (kind = 'zpool_unhealthy' + AND physical_disk_id IS NOT NULL + AND zpool_id IS NOT NULL + AND last_seen_health IS NOT NULL + AND observed_in_inv IS NOT NULL + AND time_observed IS NOT NULL) + OR + (kind != 'zpool_unhealthy' + AND physical_disk_id IS NULL + AND zpool_id IS NULL + AND last_seen_health IS NULL + AND observed_in_inv IS NULL + AND time_observed IS NULL) + ) +); diff --git a/schema/crdb/fm-disk-de-and-facts/up4.sql b/schema/crdb/fm-disk-de-and-facts/up4.sql new file mode 100644 index 00000000000..83d3461270d --- /dev/null +++ b/schema/crdb/fm-disk-de-and-facts/up4.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS lookup_fm_fact_physical_disk_for_case ON omicron.public.fm_fact_physical_disk (sitrep_id, case_id); diff --git a/schema/crdb/fm-disk-de-and-facts/up4.verify.sql b/schema/crdb/fm-disk-de-and-facts/up4.verify.sql new file mode 100644 index 00000000000..788cdeebc19 --- /dev/null +++ b/schema/crdb/fm-disk-de-and-facts/up4.verify.sql @@ -0,0 +1,2 @@ +-- DO NOT EDIT. Generated by test_migration_verification_files. +SELECT CAST(IF((SELECT true WHERE EXISTS (SELECT index_name FROM omicron.crdb_internal.table_indexes WHERE descriptor_name = 'fm_fact_physical_disk' AND index_name = 'lookup_fm_fact_physical_disk_for_case')),'true','Schema change verification failed: index lookup_fm_fact_physical_disk_for_case on table fm_fact_physical_disk does not exist') AS BOOL); diff --git a/test-utils/src/dev/test_cmds.rs b/test-utils/src/dev/test_cmds.rs index f254c06726e..60919f4311f 100644 --- a/test-utils/src/dev/test_cmds.rs +++ b/test-utils/src/dev/test_cmds.rs @@ -244,6 +244,19 @@ impl<'a> Redactor<'a> { self } + /// Replace any text matching `pattern` with ``. + /// + /// More flexible than [`Self::field`], which requires a fixed + /// `namevalue` shape. Use this when the entire variable substring + /// is itself a regex (e.g., the substring may take one of several + /// alternative forms across runs). + pub fn variable_regex(&mut self, name: &str, pattern: &str) -> &mut Self { + let re = regex::Regex::new(pattern).unwrap(); + let replacement = format!("<{}_REDACTED>", name.to_uppercase()); + self.extra_regex.push((re, replacement)); + self + } + /// Redact an entire indented section. /// /// This can be used if the shape of a section might change from run to run. diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 4687e2d5710..304e122201a 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -60,6 +60,7 @@ impl_typed_uuid_kinds! { ExternalIp = {}, ExternalSubnet = {}, ExternalZpool = {}, + Fact = {}, FmdHostCase = {}, FmdResource = {}, Instance = {}, From bfedaa533e2e860dd5eef2517e3df88546f1a5af Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 3 Jun 2026 11:51:03 -0700 Subject: [PATCH 02/30] better errors --- nexus/db-model/src/fm/fact_physical_disk.rs | 31 +++++++++++++-------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/nexus/db-model/src/fm/fact_physical_disk.rs b/nexus/db-model/src/fm/fact_physical_disk.rs index b6510590bff..42a84d904e8 100644 --- a/nexus/db-model/src/fm/fact_physical_disk.rs +++ b/nexus/db-model/src/fm/fact_physical_disk.rs @@ -107,29 +107,36 @@ impl FmFactPhysicalDisk { /// indicates a corrupt row (e.g. hand-edited) and yields an internal /// error rather than a panic. pub fn into_fact(self) -> Result { - let payload = match self.kind { + let kind = self.kind; + let payload = match kind { FmFactPhysicalDiskKind::ZpoolUnhealthy => { FactPayload::PhysicalDisk(DiskFact::ZpoolUnhealthy( ZpoolUnhealthyFactPayload { physical_disk_id: self .physical_disk_id - .ok_or_else(|| missing_column("physical_disk_id"))? + .ok_or_else(|| { + missing_column(kind, "physical_disk_id") + })? .into(), zpool_id: self .zpool_id - .ok_or_else(|| missing_column("zpool_id"))? + .ok_or_else(|| missing_column(kind, "zpool_id"))? .into(), last_seen_health: self .last_seen_health - .ok_or_else(|| missing_column("last_seen_health"))? + .ok_or_else(|| { + missing_column(kind, "last_seen_health") + })? .into(), observed_in_inv: self .observed_in_inv - .ok_or_else(|| missing_column("observed_in_inv"))? + .ok_or_else(|| { + missing_column(kind, "observed_in_inv") + })? .into(), - time_observed: self - .time_observed - .ok_or_else(|| missing_column("time_observed"))?, + time_observed: self.time_observed.ok_or_else(|| { + missing_column(kind, "time_observed") + })?, }, )) } @@ -143,10 +150,10 @@ impl FmFactPhysicalDisk { } } -fn missing_column(column: &str) -> Error { +fn missing_column(kind: FmFactPhysicalDiskKind, column: &str) -> Error { Error::internal_error(&format!( - "fm_fact_physical_disk row of kind 'zpool_unhealthy' has a NULL \ - {column} (violates the zpool_unhealthy_columns_present CHECK \ - constraint)" + "fm_fact_physical_disk row of kind {kind:?} has a NULL {column}, \ + violating the CHECK constraint requiring it to be non-NULL for \ + this kind" )) } From 1b5106a9217a35501886f03c2fb8ab9839f54bd7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 3 Jun 2026 15:55:16 -0700 Subject: [PATCH 03/30] clearer constraints, a non-null column for physical_id --- nexus/db-model/src/fm/fact_physical_disk.rs | 15 ++++++--------- nexus/db-schema/src/schema.rs | 2 +- nexus/types/src/fm/fact.rs | 11 +++++++++++ schema/crdb/dbinit.sql | 18 +++++++----------- schema/crdb/fm-disk-de-and-facts/up3.sql | 18 +++++++----------- 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/nexus/db-model/src/fm/fact_physical_disk.rs b/nexus/db-model/src/fm/fact_physical_disk.rs index 42a84d904e8..60e9ef25d6d 100644 --- a/nexus/db-model/src/fm/fact_physical_disk.rs +++ b/nexus/db-model/src/fm/fact_physical_disk.rs @@ -51,10 +51,13 @@ pub struct FmFactPhysicalDisk { /// Preserved unchanged when the fact is carried forward; debug-only. pub created_sitrep_id: DbTypedUuid, pub comment: String, + + /// The physical disk this fact is about. Common to every `kind`, so it is + /// always present (the column is `NOT NULL`). + pub physical_disk_id: DbTypedUuid, pub kind: FmFactPhysicalDiskKind, // Columns for the `ZpoolUnhealthy` kind. - pub physical_disk_id: Option>, pub zpool_id: Option>, pub last_seen_health: Option, pub observed_in_inv: Option>, @@ -80,8 +83,8 @@ impl FmFactPhysicalDisk { case_id: case_id.into(), created_sitrep_id: fact.created_sitrep_id.into(), comment: fact.comment.clone(), + physical_disk_id: disk_fact.physical_disk_id().into(), kind: FmFactPhysicalDiskKind::ZpoolUnhealthy, - physical_disk_id: None, zpool_id: None, last_seen_health: None, observed_in_inv: None, @@ -90,7 +93,6 @@ impl FmFactPhysicalDisk { match disk_fact { DiskFact::ZpoolUnhealthy(p) => Self { kind: FmFactPhysicalDiskKind::ZpoolUnhealthy, - physical_disk_id: Some(p.physical_disk_id.into()), zpool_id: Some(p.zpool_id.into()), last_seen_health: Some(p.last_seen_health.into()), observed_in_inv: Some(p.observed_in_inv.into()), @@ -112,12 +114,7 @@ impl FmFactPhysicalDisk { FmFactPhysicalDiskKind::ZpoolUnhealthy => { FactPayload::PhysicalDisk(DiskFact::ZpoolUnhealthy( ZpoolUnhealthyFactPayload { - physical_disk_id: self - .physical_disk_id - .ok_or_else(|| { - missing_column(kind, "physical_disk_id") - })? - .into(), + physical_disk_id: self.physical_disk_id.into(), zpool_id: self .zpool_id .ok_or_else(|| missing_column(kind, "zpool_id"))? diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index b1275bfe138..62a72c0bad8 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -3250,8 +3250,8 @@ table! { case_id -> Uuid, created_sitrep_id -> Uuid, comment -> Text, + physical_disk_id -> Uuid, kind -> crate::enums::FmFactPhysicalDiskKindEnum, - physical_disk_id -> Nullable, zpool_id -> Nullable, last_seen_health -> Nullable, observed_in_inv -> Nullable, diff --git a/nexus/types/src/fm/fact.rs b/nexus/types/src/fm/fact.rs index ae270e86b5f..714f6660c2b 100644 --- a/nexus/types/src/fm/fact.rs +++ b/nexus/types/src/fm/fact.rs @@ -55,6 +55,17 @@ pub enum DiskFact { ZpoolUnhealthy(ZpoolUnhealthyFactPayload), } +impl DiskFact { + /// The physical disk this fact is about. Common to every kind of disk + /// fact: a Disk case is keyed by its physical disk, and every fact on + /// the case agrees on this value. + pub fn physical_disk_id(&self) -> PhysicalDiskUuid { + match self { + DiskFact::ZpoolUnhealthy(p) => p.physical_disk_id, + } + } +} + /// Payload of a [`DiskFact::ZpoolUnhealthy`] fact. #[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct ZpoolUnhealthyFactPayload { diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 939b744188e..b39b88d184a 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7671,12 +7671,16 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( -- Free-form, debug-only comment. comment TEXT NOT NULL, + -- The physical disk this fact is about. Common to every kind of + -- physical-disk fact (the case is keyed by it), so it is always present + -- regardless of `kind`. + physical_disk_id UUID NOT NULL, + -- Which physical-disk fact this row represents. The columns below are -- populated according to this discriminant (see the CHECK constraint). kind omicron.public.fm_fact_physical_disk_kind NOT NULL, -- Columns for a 'zpool_unhealthy' fact. NULL for any other kind. - physical_disk_id UUID, zpool_id UUID, last_seen_health omicron.public.inv_zpool_health, observed_in_inv UUID, @@ -7685,19 +7689,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( PRIMARY KEY (sitrep_id, id), CONSTRAINT zpool_unhealthy_columns_present CHECK ( - (kind = 'zpool_unhealthy' - AND physical_disk_id IS NOT NULL + kind = 'zpool_unhealthy' AND zpool_id IS NOT NULL AND last_seen_health IS NOT NULL AND observed_in_inv IS NOT NULL - AND time_observed IS NOT NULL) - OR - (kind != 'zpool_unhealthy' - AND physical_disk_id IS NULL - AND zpool_id IS NULL - AND last_seen_health IS NULL - AND observed_in_inv IS NULL - AND time_observed IS NULL) + AND time_observed IS NOT NULL ) ); diff --git a/schema/crdb/fm-disk-de-and-facts/up3.sql b/schema/crdb/fm-disk-de-and-facts/up3.sql index c095cf8d564..4249378b4a1 100644 --- a/schema/crdb/fm-disk-de-and-facts/up3.sql +++ b/schema/crdb/fm-disk-de-and-facts/up3.sql @@ -13,12 +13,16 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( -- Free-form, debug-only comment. comment TEXT NOT NULL, + -- The physical disk this fact is about. Common to every kind of + -- physical-disk fact (the case is keyed by it), so it is always present + -- regardless of `kind`. + physical_disk_id UUID NOT NULL, + -- Which physical-disk fact this row represents. The columns below are -- populated according to this discriminant (see the CHECK constraint). kind omicron.public.fm_fact_physical_disk_kind NOT NULL, -- Columns for a 'zpool_unhealthy' fact. NULL for any other kind. - physical_disk_id UUID, zpool_id UUID, last_seen_health omicron.public.inv_zpool_health, observed_in_inv UUID, @@ -27,18 +31,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( PRIMARY KEY (sitrep_id, id), CONSTRAINT zpool_unhealthy_columns_present CHECK ( - (kind = 'zpool_unhealthy' - AND physical_disk_id IS NOT NULL + kind = 'zpool_unhealthy' AND zpool_id IS NOT NULL AND last_seen_health IS NOT NULL AND observed_in_inv IS NOT NULL - AND time_observed IS NOT NULL) - OR - (kind != 'zpool_unhealthy' - AND physical_disk_id IS NULL - AND zpool_id IS NULL - AND last_seen_health IS NULL - AND observed_in_inv IS NULL - AND time_observed IS NULL) + AND time_observed IS NOT NULL ) ); From dfab3cfa35b5fd031ca6af069d0854f1654295e3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 3 Jun 2026 18:01:41 -0700 Subject: [PATCH 04/30] [fm] Add saga diagnosis engine The second fault management diagnosis engine: opens a case (keyed by saga_id) for any non-terminal saga that is either not making progress (no node event recorded within STALE_SAGA_THRESHOLD) or orphaned (owned by a Nexus that is no longer of the current generation). These are two independent fact kinds; a saga's case may carry either or both. A case closes when the saga reaches a terminal state. Tracks #10530. Supporting infrastructure: - DiagnosisEngineKind::Saga variant (Rust + DB enum) - fm_fact_saga typed child table with two fact kinds (not_progressing, owner_not_current_generation), per-kind nullable columns gated by a CHECK, participating in copy-forward + GC like other sitrep child tables - SagaFact / FactPayload::Saga and the ObservedSaga nexus-types projection - saga_list_running_or_unwinding_batched and a grouped saga_latest_node_event_times datastore query (the wall-clock progress signal); owner currency read DB-direct via the existing get_db_metadata_nexus_in_state Schema migration: fm-saga-de (version 263) adds the 'saga' enum value, the fm_fact_saga_kind and fm_fact_saga_orphan_reason enums, and the fm_fact_saga table. --- Cargo.lock | 3 + nexus/db-model/src/fm.rs | 2 + nexus/db-model/src/fm/diagnosis_engine.rs | 3 + nexus/db-model/src/fm/fact_saga.rs | 243 ++++++ nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/src/db/datastore/fm.rs | 124 +++- nexus/db-queries/src/db/datastore/saga.rs | 64 ++ nexus/db-schema/src/enums.rs | 2 + nexus/db-schema/src/schema.rs | 21 + nexus/fm/Cargo.toml | 3 + nexus/fm/src/analysis_input.rs | 19 +- nexus/fm/src/diagnosis/mod.rs | 2 + nexus/fm/src/diagnosis/physical_disk.rs | 10 +- nexus/fm/src/diagnosis/saga.rs | 696 ++++++++++++++++++ nexus/src/app/background/tasks/fm_analysis.rs | 101 +++ nexus/types/src/fm.rs | 6 +- nexus/types/src/fm/fact.rs | 94 ++- nexus/types/src/lib.rs | 1 + nexus/types/src/observed_saga.rs | 108 +++ schema/crdb/dbinit.sql | 82 ++- schema/crdb/fm-saga-de/up1.sql | 1 + schema/crdb/fm-saga-de/up2.sql | 4 + schema/crdb/fm-saga-de/up3.sql | 4 + schema/crdb/fm-saga-de/up4.sql | 60 ++ schema/crdb/fm-saga-de/up5.sql | 1 + schema/crdb/fm-saga-de/up5.verify.sql | 2 + 26 files changed, 1636 insertions(+), 23 deletions(-) create mode 100644 nexus/db-model/src/fm/fact_saga.rs create mode 100644 nexus/fm/src/diagnosis/saga.rs create mode 100644 nexus/types/src/observed_saga.rs create mode 100644 schema/crdb/fm-saga-de/up1.sql create mode 100644 schema/crdb/fm-saga-de/up2.sql create mode 100644 schema/crdb/fm-saga-de/up3.sql create mode 100644 schema/crdb/fm-saga-de/up4.sql create mode 100644 schema/crdb/fm-saga-de/up5.sql create mode 100644 schema/crdb/fm-saga-de/up5.verify.sql diff --git a/Cargo.lock b/Cargo.lock index 025f43fdc78..65e808f7ee0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7115,6 +7115,7 @@ dependencies = [ "iddqd", "nexus-reconfigurator-planning", "nexus-types", + "omicron-common", "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", @@ -7123,8 +7124,10 @@ dependencies = [ "serde_json", "slog", "slog-error-chain", + "steno", "thiserror 2.0.18", "typed-rng", + "uuid", ] [[package]] diff --git a/nexus/db-model/src/fm.rs b/nexus/db-model/src/fm.rs index 0d2f4cf85f6..12af02686f2 100644 --- a/nexus/db-model/src/fm.rs +++ b/nexus/db-model/src/fm.rs @@ -28,6 +28,8 @@ mod diagnosis_engine; pub use diagnosis_engine::*; mod fact_physical_disk; pub use fact_physical_disk::*; +mod fact_saga; +pub use fact_saga::*; mod support_bundle_request; pub use support_bundle_request::*; diff --git a/nexus/db-model/src/fm/diagnosis_engine.rs b/nexus/db-model/src/fm/diagnosis_engine.rs index a99ceeac1ad..951c5749396 100644 --- a/nexus/db-model/src/fm/diagnosis_engine.rs +++ b/nexus/db-model/src/fm/diagnosis_engine.rs @@ -25,6 +25,7 @@ impl_enum_type!( PowerShelf => b"power_shelf" PhysicalDisk => b"physical_disk" + Saga => b"saga" ); @@ -35,6 +36,7 @@ impl From for fm::DiagnosisEngineKind { DiagnosisEngine::PhysicalDisk => { fm::DiagnosisEngineKind::PhysicalDisk } + DiagnosisEngine::Saga => fm::DiagnosisEngineKind::Saga, } } } @@ -46,6 +48,7 @@ impl From for DiagnosisEngine { fm::DiagnosisEngineKind::PhysicalDisk => { DiagnosisEngine::PhysicalDisk } + fm::DiagnosisEngineKind::Saga => DiagnosisEngine::Saga, } } } diff --git a/nexus/db-model/src/fm/fact_saga.rs b/nexus/db-model/src/fm/fact_saga.rs new file mode 100644 index 00000000000..5e2bfb8c03f --- /dev/null +++ b/nexus/db-model/src/fm/fact_saga.rs @@ -0,0 +1,243 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database representation of the saga diagnosis engine's facts. +//! +//! Each saga fact is stored as typed columns in the `fm_fact_saga` table. The +//! `kind` discriminant selects which payload columns are populated; a CHECK +//! constraint (`fm_fact_saga_columns_match_kind`) enforces that the right +//! columns are non-NULL for each kind. See [`nexus_types::fm::SagaFact`] for +//! semantics. + +use crate::Generation; +use crate::DbTypedUuid; +use crate::SagaState; +use crate::impl_enum_type; +use chrono::{DateTime, Utc}; +use nexus_db_schema::schema::fm_fact_saga; +use nexus_types::fm; +use nexus_types::fm::{ + FactPayload, SagaFact, SagaNotProgressingFactPayload, + SagaOwnerNotCurrentFactPayload, +}; +use nexus_types::observed_saga::{OrphanedReason, SagaProgressState}; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::{CaseKind, FactKind, OmicronZoneKind, SitrepKind}; +use uuid::Uuid; + +impl_enum_type!( + FmFactSagaKindEnum: + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Eq)] + pub enum FmFactSagaKind; + + NotProgressing => b"not_progressing" + OwnerNotCurrentGeneration => b"owner_not_current_generation" +); + +impl_enum_type!( + FmFactSagaOrphanReasonEnum: + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Eq)] + pub enum FmFactSagaOrphanReason; + + Quiesced => b"quiesced" + Expunged => b"expunged" +); + +impl From for FmFactSagaOrphanReason { + fn from(reason: OrphanedReason) -> Self { + match reason { + OrphanedReason::Quiesced => FmFactSagaOrphanReason::Quiesced, + OrphanedReason::Expunged => FmFactSagaOrphanReason::Expunged, + } + } +} + +impl From for OrphanedReason { + fn from(reason: FmFactSagaOrphanReason) -> Self { + match reason { + FmFactSagaOrphanReason::Quiesced => OrphanedReason::Quiesced, + FmFactSagaOrphanReason::Expunged => OrphanedReason::Expunged, + } + } +} + +impl From for SagaState { + fn from(state: SagaProgressState) -> Self { + match state { + SagaProgressState::Running => SagaState::Running, + SagaProgressState::Unwinding => SagaState::Unwinding, + } + } +} + +/// Convert a DB `saga_state` back into the non-terminal [`SagaProgressState`] +/// recorded on a fact. Terminal states never appear on a saga fact (the case +/// is closed once the saga terminates), so they are treated as corrupt rows. +fn saga_progress_state( + state: SagaState, +) -> Result { + match state { + SagaState::Running => Ok(SagaProgressState::Running), + SagaState::Unwinding => Ok(SagaProgressState::Unwinding), + SagaState::Done | SagaState::Abandoned => { + Err(Error::internal_error(&format!( + "fm_fact_saga row has terminal saga_state {state:?}, which \ + should never be recorded on a saga fact" + ))) + } + } +} + +/// Diesel row for the `fm_fact_saga` table. +/// +/// The payload columns are populated according to `kind`: a column is `Some` +/// iff it belongs to that `kind`'s payload, enforced in the database by the +/// `fm_fact_saga_columns_match_kind` CHECK constraint. +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = fm_fact_saga)] +pub struct FmFactSaga { + pub id: DbTypedUuid, + /// The sitrep to which this fact belongs. + /// + /// This will change as the fact is carried forward from one sitrep to the + /// next. + pub sitrep_id: DbTypedUuid, + pub case_id: DbTypedUuid, + /// Sitrep in which this fact was first added. + /// + /// Preserved unchanged when the fact is carried forward; debug-only. + pub created_sitrep_id: DbTypedUuid, + pub comment: String, + + /// The saga this fact is about. Common to every `kind`, so it is always + /// present (the column is `NOT NULL`). + pub saga_id: Uuid, + /// The saga's name. Common to every `kind`, always present. + pub saga_name: String, + pub kind: FmFactSagaKind, + + // Columns for the `NotProgressing` kind. + pub saga_state: Option, + pub time_created: Option>, + pub last_event_time: Option>, + + // Columns for the `OwnerNotCurrentGeneration` kind. + pub current_sec: Option>, + pub orphan_reason: Option, + pub adopt_generation: Option, +} + +impl FmFactSaga { + /// Build a row from a fact's shared metadata (`fact`) and its + /// already-dispatched saga payload (`saga_fact`). + pub fn from_sitrep( + sitrep_id: impl Into>, + case_id: impl Into>, + fact: &fm::case::Fact, + saga_fact: &SagaFact, + ) -> Self { + let base = Self { + id: fact.id.into(), + sitrep_id: sitrep_id.into(), + case_id: case_id.into(), + created_sitrep_id: fact.created_sitrep_id.into(), + comment: fact.comment.clone(), + saga_id: saga_fact.saga_id().0, + saga_name: saga_fact.saga_name().to_string(), + kind: FmFactSagaKind::NotProgressing, + saga_state: None, + time_created: None, + last_event_time: None, + current_sec: None, + orphan_reason: None, + adopt_generation: None, + }; + match saga_fact { + SagaFact::NotProgressing(p) => Self { + kind: FmFactSagaKind::NotProgressing, + saga_state: Some(p.saga_state.into()), + time_created: Some(p.time_created), + last_event_time: Some(p.last_event_time), + ..base + }, + SagaFact::OwnerNotCurrentGeneration(p) => Self { + kind: FmFactSagaKind::OwnerNotCurrentGeneration, + current_sec: Some(p.current_sec.into()), + orphan_reason: Some(p.orphan_reason.into()), + adopt_generation: Some(Generation::from(p.adopt_generation)), + ..base + }, + } + } + + /// Reconstruct an in-memory fact from a row. + /// + /// Columns the database's CHECK constraint guarantees are non-NULL for this + /// `kind` are unwrapped; a NULL where one is required indicates a corrupt + /// row and yields an internal error rather than a panic. + pub fn into_fact(self) -> Result { + let kind = self.kind; + let saga_id = steno::SagaId(self.saga_id); + let saga_name = self.saga_name; + let payload = match kind { + FmFactSagaKind::NotProgressing => { + FactPayload::Saga(SagaFact::NotProgressing( + SagaNotProgressingFactPayload { + saga_id, + saga_name, + saga_state: saga_progress_state( + self.saga_state.ok_or_else(|| { + missing_column(kind, "saga_state") + })?, + )?, + time_created: self.time_created.ok_or_else(|| { + missing_column(kind, "time_created") + })?, + last_event_time: self.last_event_time.ok_or_else( + || missing_column(kind, "last_event_time"), + )?, + }, + )) + } + FmFactSagaKind::OwnerNotCurrentGeneration => { + FactPayload::Saga(SagaFact::OwnerNotCurrentGeneration( + SagaOwnerNotCurrentFactPayload { + saga_id, + saga_name, + current_sec: self + .current_sec + .ok_or_else(|| { + missing_column(kind, "current_sec") + })? + .into(), + orphan_reason: self + .orphan_reason + .ok_or_else(|| { + missing_column(kind, "orphan_reason") + })? + .into(), + adopt_generation: *self.adopt_generation.ok_or_else( + || missing_column(kind, "adopt_generation"), + )?, + }, + )) + } + }; + Ok(fm::case::Fact { + id: self.id.into(), + created_sitrep_id: self.created_sitrep_id.into(), + payload, + comment: self.comment, + }) + } +} + +fn missing_column(kind: FmFactSagaKind, column: &str) -> Error { + Error::internal_error(&format!( + "fm_fact_saga row of kind {kind:?} has a NULL {column}, violating the \ + CHECK constraint requiring it to be non-NULL for this kind" + )) +} diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index f14fbb76b0d..e9c865c5317 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(262, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(263, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ pub static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(263, "fm-saga-de"), KnownVersion::new(262, "fm-disk-de-and-facts"), KnownVersion::new(261, "remove-add-zones-with-mupdate-override"), KnownVersion::new(260, "ereport-trim-serial-trailing-nulls"), diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index f16e1608c2d..5bb966303f9 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -35,6 +35,7 @@ use nexus_db_schema::schema::fm_alert_request::dsl as alert_req_dsl; use nexus_db_schema::schema::fm_case::dsl as case_dsl; use nexus_db_schema::schema::fm_ereport_in_case::dsl as case_ereport_dsl; use nexus_db_schema::schema::fm_fact_physical_disk::dsl as fact_pd_dsl; +use nexus_db_schema::schema::fm_fact_saga::dsl as fact_saga_dsl; use nexus_db_schema::schema::fm_sitrep::dsl as sitrep_dsl; use nexus_db_schema::schema::fm_sitrep_history::dsl as history_dsl; use nexus_db_schema::schema::fm_support_bundle_request::dsl as support_bundle_req_dsl; @@ -124,6 +125,25 @@ sitrep_child_tables! { SupportBundleRequest => { table: "fm_support_bundle_request" }, Case => { table: "fm_case" }, FmFactPhysicalDisk => { table: "fm_fact_physical_disk" }, + FmFactSaga => { table: "fm_fact_saga" }, +} + +/// Insert a reconstructed fact into the per-case map, erroring if two facts +/// share a UUID (impossible, as the fact UUID is a primary key). +fn insert_fact_for_case( + by_case: &mut HashMap>, + case_id: CaseUuid, + fact: fm::case::Fact, +) -> Result<(), Error> { + let id = fact.id; + by_case.entry(case_id).or_default().insert_unique(fact).map_err(|_| { + let internal_message = format!( + "encountered multiple case facts for case {case_id} with the same \ + fact UUID {id}. this should really not be possible, as the fact \ + UUID is a primary key!", + ); + Error::InternalError { internal_message } + }) } /// Per-child-table statistics from a single GC pass. @@ -547,20 +567,33 @@ impl DataStore { for row in batch { let case_id: CaseUuid = row.case_id.into(); let fact = row.into_fact()?; - let id = fact.id; - by_case - .entry(case_id) - .or_default() - .insert_unique(fact) - .map_err(|_| { - let internal_message = format!( - "encountered multiple case facts for case \ - {case_id} with the same fact UUID {id}. this \ - should really not be possible, as the fact \ - UUID is a primary key!", - ); - Error::InternalError { internal_message } - })?; + insert_fact_for_case(&mut by_case, case_id, fact)?; + } + } + + // --- saga diagnosis engine facts --- + let mut paginator: Paginator> = + Paginator::new(SQL_BATCH_SIZE, PaginationOrder::Descending); + while let Some(p) = paginator.next() { + let batch = paginated( + fact_saga_dsl::fm_fact_saga, + fact_saga_dsl::id, + &p.current_pagparams(), + ) + .filter(fact_saga_dsl::sitrep_id.eq(id.into_untyped_uuid())) + .select(model::fm::FmFactSaga::as_select()) + .load_async(conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context("failed to load saga case facts") + })?; + + paginator = p.found_batch(&batch, &|f| f.id); + for row in batch { + let case_id: CaseUuid = row.case_id.into(); + let fact = row.into_fact()?; + insert_fact_for_case(&mut by_case, case_id, fact)?; } } @@ -811,6 +844,7 @@ impl DataStore { let mut bundle_data_selections_requested = Vec::new(); let mut case_ereports = Vec::new(); let mut physical_disk_facts = Vec::new(); + let mut saga_facts = Vec::new(); for case in sitrep.cases { let case_id = case.id; cases.push(model::fm::CaseMetadata::from_sitrep(sitrep_id, &case)); @@ -844,6 +878,11 @@ impl DataStore { ), ); } + fm::FactPayload::Saga(saga_fact) => { + saga_facts.push(model::fm::FmFactSaga::from_sitrep( + sitrep_id, case_id, fact, saga_fact, + )); + } } } } @@ -893,6 +932,17 @@ impl DataStore { })?; } + if !saga_facts.is_empty() { + diesel::insert_into(fact_saga_dsl::fm_fact_saga) + .values(saga_facts) + .execute_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + .internal_context("failed to insert saga case facts") + })?; + } + if !cases.is_empty() { diesel::insert_into(case_dsl::fm_case) .values(cases) @@ -2300,6 +2350,52 @@ mod tests { comment: "a representative fact for case 1".to_string(), }) .unwrap(); + // Saga facts (both kinds), to exercise the fm_fact_saga + // read/write/GC paths. Storage fidelity is what's under test here, + // so it's fine that these sit on a physical-disk case. + facts + .insert_unique(fm::case::Fact { + id: FactUuid::new_v4(), + created_sitrep_id: sitrep_id, + payload: fm::FactPayload::Saga( + fm::SagaFact::NotProgressing( + fm::SagaNotProgressingFactPayload { + saga_id: steno::SagaId(uuid::Uuid::new_v4()), + saga_name: "test-saga".to_string(), + saga_state: + nexus_types::observed_saga::SagaProgressState::Unwinding, + time_created: + omicron_common::now_db_precision(), + last_event_time: + omicron_common::now_db_precision(), + }, + ), + ), + comment: "a representative not-progressing saga fact" + .to_string(), + }) + .unwrap(); + facts + .insert_unique(fm::case::Fact { + id: FactUuid::new_v4(), + created_sitrep_id: sitrep_id, + payload: fm::FactPayload::Saga( + fm::SagaFact::OwnerNotCurrentGeneration( + fm::SagaOwnerNotCurrentFactPayload { + saga_id: steno::SagaId(uuid::Uuid::new_v4()), + saga_name: "test-saga".to_string(), + current_sec: + omicron_uuid_kinds::OmicronZoneUuid::new_v4(), + orphan_reason: + nexus_types::observed_saga::OrphanedReason::Quiesced, + adopt_generation: + omicron_common::api::external::Generation::new(), + }, + ), + ), + comment: "a representative orphaned saga fact".to_string(), + }) + .unwrap(); fm::Case { id: omicron_uuid_kinds::CaseUuid::new_v4(), diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs index c1ac332ebee..c87b00978fd 100644 --- a/nexus/db-queries/src/db/datastore/saga.rs +++ b/nexus/db-queries/src/db/datastore/saga.rs @@ -191,6 +191,70 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Returns all sagas in a running or unwinding (i.e. non-terminal) state, + /// making as many queries as needed (in batches) to get them all. + /// + /// Unlike [`Self::saga_list_running_or_unwinding_older_than`], this has no + /// age filter and no result cap: it is used by fault management, which + /// needs the complete set of non-terminal sagas (a lossy list would cause + /// it to incorrectly close cases for sagas it simply didn't see). + pub async fn saga_list_running_or_unwinding_batched( + &self, + opctx: &OpContext, + ) -> Result, Error> { + let mut sagas = vec![]; + let mut paginator = Paginator::new( + SQL_BATCH_SIZE, + dropshot::PaginationOrder::Ascending, + ); + let conn = self.pool_connection_authorized(opctx).await?; + while let Some(p) = paginator.next() { + use nexus_db_schema::schema::saga::dsl; + + let mut batch = + paginated(dsl::saga, dsl::id, &p.current_pagparams()) + .filter( + dsl::saga_state + .eq_any(SagaState::RECOVERY_CANDIDATE_STATES), + ) + .select(db::saga_types::Saga::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|row| row.id); + sagas.append(&mut batch); + } + Ok(sagas) + } + + /// For each of the given sagas, returns the timestamp of its most recent + /// node event (`MAX(event_time)`), i.e. the last durably-recorded forward + /// or undo step. Sagas with no node events are absent from the result. + /// + /// This is the saga diagnosis engine's progress signal: `now - max` is how + /// long a saga has gone without recording progress. The query seeks by the + /// `saga_node_event` primary-key prefix (`saga_id`), so it does not scan + /// the whole table. + pub async fn saga_latest_node_event_times( + &self, + opctx: &OpContext, + saga_ids: &[db::saga_types::SagaId], + ) -> Result>)>, Error> + { + use nexus_db_schema::schema::saga_node_event::dsl; + let conn = self.pool_connection_authorized(opctx).await?; + dsl::saga_node_event + .filter(dsl::saga_id.eq_any(saga_ids.to_vec())) + .group_by(dsl::saga_id) + .select((dsl::saga_id, diesel::dsl::max(dsl::event_time))) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Returns a list of all saga log entries for the given saga, making as /// many queries as needed (in batches) to get them all pub async fn saga_fetch_log_batched( diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 88b7deb1b08..4c8fa13acaa 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -59,6 +59,8 @@ define_enums! { EreporterTypeEnum => "ereporter_type", FailureDomainEnum => "failure_domain", FmFactPhysicalDiskKindEnum => "fm_fact_physical_disk_kind", + FmFactSagaKindEnum => "fm_fact_saga_kind", + FmFactSagaOrphanReasonEnum => "fm_fact_saga_orphan_reason", HwM2SlotEnum => "hw_m2_slot", HwPowerStateEnum => "hw_power_state", HwRotSlotEnum => "hw_rot_slot", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 62a72c0bad8..d50b4878de6 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -3259,6 +3259,25 @@ table! { } } +table! { + fm_fact_saga (sitrep_id, id) { + id -> Uuid, + sitrep_id -> Uuid, + case_id -> Uuid, + created_sitrep_id -> Uuid, + comment -> Text, + saga_id -> Uuid, + saga_name -> Text, + kind -> crate::enums::FmFactSagaKindEnum, + saga_state -> Nullable, + time_created -> Nullable, + last_event_time -> Nullable, + current_sec -> Nullable, + orphan_reason -> Nullable, + adopt_generation -> Nullable, + } +} + table! { fm_ereport_in_case (sitrep_id, id) { id -> Uuid, @@ -3276,6 +3295,8 @@ allow_tables_to_appear_in_same_query!(fm_ereport_in_case, ereport); allow_tables_to_appear_in_same_query!(fm_sitrep, fm_case); allow_tables_to_appear_in_same_query!(fm_sitrep, fm_fact_physical_disk); allow_tables_to_appear_in_same_query!(fm_case, fm_fact_physical_disk); +allow_tables_to_appear_in_same_query!(fm_sitrep, fm_fact_saga); +allow_tables_to_appear_in_same_query!(fm_case, fm_fact_saga); table! { fm_alert_request (sitrep_id, id) { diff --git a/nexus/fm/Cargo.toml b/nexus/fm/Cargo.toml index 5e88e88b88a..0acc91b5234 100644 --- a/nexus/fm/Cargo.toml +++ b/nexus/fm/Cargo.toml @@ -18,12 +18,14 @@ anyhow.workspace = true chrono.workspace = true iddqd.workspace = true nexus-types.workspace = true +omicron-common.workspace = true omicron-uuid-kinds.workspace = true rand.workspace = true serde.workspace = true serde_json.workspace = true slog.workspace = true slog-error-chain.workspace = true +steno.workspace = true thiserror.workspace = true typed-rng.workspace = true @@ -38,3 +40,4 @@ omicron-workspace-hack.workspace = true omicron-test-utils.workspace = true nexus-reconfigurator-planning.workspace = true ereport-types.workspace = true +uuid.workspace = true diff --git a/nexus/fm/src/analysis_input.rs b/nexus/fm/src/analysis_input.rs index 2f8e135cbb2..0e61408cfe5 100644 --- a/nexus/fm/src/analysis_input.rs +++ b/nexus/fm/src/analysis_input.rs @@ -10,6 +10,7 @@ use nexus_types::fm::analysis_reports::ClosedCaseReport; use nexus_types::fm::{self, Sitrep, SitrepVersion}; use nexus_types::in_service_disk::InServiceDisk; use nexus_types::inventory; +use nexus_types::observed_saga::ObservedSaga; use omicron_uuid_kinds::CollectionUuid; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -42,6 +43,9 @@ pub struct Input { closed_cases_copied_forward: IdOrdMap, /// All control plane managed disks in_service_disks: Arc>, + /// All non-terminal (running/unwinding) sagas, annotated with their + /// latest node-event time and owning-Nexus state. + observed_sagas: Arc>, } impl Input { @@ -75,12 +79,21 @@ impl Input { &self.in_service_disks } + /// All non-terminal sagas observed in the database, indexed by `saga_id`. + /// See the saga diagnosis engine for how absence (a saga that has reached + /// a terminal state) drives case closure. + pub fn observed_sagas(&self) -> &IdOrdMap { + &self.observed_sagas + } + /// Returns a [`Builder`] for constructing a new `Input` from the provided - /// `parent_sitrep`, inventory collection, and in-service disks. + /// `parent_sitrep`, inventory collection, in-service disks, and observed + /// sagas. pub fn builder( parent_sitrep: Option>, inv: Arc, in_service_disks: Arc>, + observed_sagas: Arc>, ) -> Result { // Before preparing analysis inputs, check that the proposed input // inventory collection is at least as new as the parent sitrep's @@ -106,6 +119,7 @@ impl Input { parent_sitrep, inv, in_service_disks, + observed_sagas, new_ereports: IdOrdMap::default(), unmarked_seen_ereports: BTreeSet::default(), }) @@ -130,6 +144,7 @@ pub struct Builder { parent_sitrep: Option>, inv: Arc, in_service_disks: Arc>, + observed_sagas: Arc>, /// Ereports which are new and should be input to analysis in the next /// sitrep. new_ereports: IdOrdMap, @@ -253,6 +268,7 @@ impl Builder { open_cases, closed_cases_copied_forward, in_service_disks: self.in_service_disks, + observed_sagas: self.observed_sagas, }; (input, report) @@ -490,6 +506,7 @@ mod tests { Some(parent_sitrep), inv, Arc::new(IdOrdMap::new()), + Arc::new(IdOrdMap::new()), ) .expect("collection start time check should always pass"); // Pass in four ereports: diff --git a/nexus/fm/src/diagnosis/mod.rs b/nexus/fm/src/diagnosis/mod.rs index b62e676fb36..6e8594ce19c 100644 --- a/nexus/fm/src/diagnosis/mod.rs +++ b/nexus/fm/src/diagnosis/mod.rs @@ -12,12 +12,14 @@ use crate::SitrepBuilder; use crate::analysis_input::Input; mod physical_disk; +mod saga; pub fn analyze( input: &Input, builder: &mut SitrepBuilder<'_>, ) -> anyhow::Result<()> { physical_disk::analyze(input, builder)?; + saga::analyze(input, builder)?; Ok(()) } diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index 3992c9cb46c..ef30bcf286f 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -373,9 +373,13 @@ mod tests { s, )) }); - let builder = - Input::builder(parent, Arc::new(collection), Arc::new(in_service)) - .expect("input builder should accept fresh inventory"); + let builder = Input::builder( + parent, + Arc::new(collection), + Arc::new(in_service), + Arc::new(IdOrdMap::new()), + ) + .expect("input builder should accept fresh inventory"); let (input, _report) = builder.build(); input } diff --git a/nexus/fm/src/diagnosis/saga.rs b/nexus/fm/src/diagnosis/saga.rs new file mode 100644 index 00000000000..8d9bf23b580 --- /dev/null +++ b/nexus/fm/src/diagnosis/saga.rs @@ -0,0 +1,696 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Saga diagnosis engine. +//! +//! Opens a case (keyed by `saga_id`) for any non-terminal saga that is either +//! *not making progress* (no node event recorded for a while) or *orphaned* +//! (owned by a Nexus that is no longer of the current generation). These are +//! two independent fact kinds; a saga's case may carry either or both. A case +//! is closed once the saga reaches a terminal state (it drops out of the set +//! of non-terminal sagas the preparation phase observed). +//! +//! See omicron#10530 for motivation. + +use crate::SitrepBuilder; +use crate::analysis_input::Input; +use chrono::{DateTime, TimeDelta, Utc}; +use nexus_types::fm::DiagnosisEngineKind; +use nexus_types::fm::{ + SagaFact, SagaNotProgressingFactPayload, SagaOwnerNotCurrentFactPayload, +}; +use nexus_types::observed_saga::ObservedSaga; +use omicron_uuid_kinds::{CaseUuid, FactUuid}; +use std::collections::BTreeMap; + +/// A saga is flagged as "not progressing" once it has recorded no node event +/// for at least this long. This is a wall-clock, cadence-independent quantity +/// (`reference_time - last_event_time`), deliberately not a count of analysis +/// passes. +const STALE_SAGA_THRESHOLD: TimeDelta = TimeDelta::minutes(30); + +/// Per-case view of a parent saga case, built from its facts. Every fact on a +/// saga case is about the same `saga_id`; at most one fact of each kind is +/// expected. +struct ParentSagaCase { + saga_id: steno::SagaId, + not_progressing: Option<(FactUuid, SagaNotProgressingFactPayload)>, + owner_not_current: Option<(FactUuid, SagaOwnerNotCurrentFactPayload)>, +} + +pub(super) fn analyze( + input: &Input, + builder: &mut SitrepBuilder<'_>, +) -> anyhow::Result<()> { + // Reference "now" for staleness. We use the inventory collection's + // completion time (rather than `Utc::now()`) so analysis is deterministic + // and reproducible in tests, matching the physical-disk engine's use of + // inventory timestamps. + let reference_time = input.inventory().time_done; + let observed = input.observed_sagas(); + + // Index parent-forwarded Saga cases by case ID, and maintain a saga_id -> + // case_id index for the second pass. Every case is about one saga, derived + // from its facts; skip (with a warning) any case we can't interpret. + let mut parent_cases: BTreeMap = BTreeMap::new(); + let mut case_for_saga: BTreeMap = BTreeMap::new(); + 'cases: for case in input + .open_cases() + .iter() + .filter(|c| c.metadata.de == DiagnosisEngineKind::Saga) + { + let case_id = case.id; + let mut saga_id: Option = None; + let mut not_progressing = None; + let mut owner_not_current = None; + for fact in case.facts.iter() { + let Some(saga_fact) = fact.payload.as_saga() else { + slog::warn!( + &builder.log, + "skipping Saga case: fact payload does not belong to the \ + saga diagnosis engine"; + "case_id" => %case_id, + "fact_id" => %fact.id, + ); + continue 'cases; + }; + let this_saga = saga_fact.saga_id(); + if *saga_id.get_or_insert(this_saga) != this_saga { + slog::warn!( + &builder.log, + "skipping Saga case: facts reference different sagas"; + "case_id" => %case_id, + ); + continue 'cases; + } + match saga_fact { + SagaFact::NotProgressing(p) => { + not_progressing = Some((fact.id, p.clone())); + } + SagaFact::OwnerNotCurrentGeneration(p) => { + owner_not_current = Some((fact.id, p.clone())); + } + } + } + let Some(saga_id) = saga_id else { + slog::warn!( + &builder.log, + "skipping Saga case with no facts; cannot derive saga id"; + "case_id" => %case_id, + ); + continue 'cases; + }; + parent_cases.insert( + case_id, + ParentSagaCase { saga_id, not_progressing, owner_not_current }, + ); + case_for_saga.insert(saga_id, case_id); + } + + // First pass: for each parent case, close it if its saga has reached a + // terminal state (no longer observed), otherwise drop any facts whose + // recorded contents no longer match the current observation. The second + // pass re-adds a fresh fact if the condition still holds. + for (case_id, summary) in &parent_cases { + let mut case_mut = builder + .cases + .case_mut(case_id) + .expect("case_id came from iterating builder.cases"); + let Some(obs) = observed.get(&summary.saga_id) else { + case_mut.close(format!( + "saga {} reached a terminal state", + summary.saga_id, + )); + continue; + }; + let desired_np = desired_not_progressing(obs, reference_time); + let desired_owner = desired_owner_not_current(obs); + if let Some((fact_id, payload)) = &summary.not_progressing { + if desired_np.as_ref() != Some(payload) { + case_mut.remove_fact(*fact_id); + } + } + if let Some((fact_id, payload)) = &summary.owner_not_current { + if desired_owner.as_ref() != Some(payload) { + case_mut.remove_fact(*fact_id); + } + } + } + + // Second pass: for each observed saga with a problem, ensure a case exists + // (reusing the parent-forwarded one if any) and add a fresh fact for each + // condition that isn't already represented by a matching, carried-forward + // fact. + for obs in observed.iter() { + let desired_np = desired_not_progressing(obs, reference_time); + let desired_owner = desired_owner_not_current(obs); + if desired_np.is_none() && desired_owner.is_none() { + continue; + } + + let parent = case_for_saga + .get(&obs.saga_id) + .and_then(|case_id| parent_cases.get(case_id).map(|s| (*case_id, s))); + + // A carried-forward fact already covers a condition only if its + // recorded payload exactly matches what we'd emit now (otherwise the + // first pass removed it). + let np_already = matches!( + (&desired_np, parent.and_then(|(_, s)| s.not_progressing.as_ref())), + (Some(want), Some((_, have))) if want == have + ); + let owner_already = matches!( + ( + &desired_owner, + parent.and_then(|(_, s)| s.owner_not_current.as_ref()), + ), + (Some(want), Some((_, have))) if want == have + ); + + let case_id = match parent { + Some((case_id, _)) => case_id, + None => { + let mut new_case = + builder.cases.open_case(DiagnosisEngineKind::Saga); + new_case.set_comment(format!( + "saga {} ({}) needs attention", + obs.saga_id, obs.saga_name, + )); + new_case.id + } + }; + + if let Some(payload) = desired_np { + if !np_already { + let staleness = reference_time + .signed_duration_since(payload.last_event_time); + let comment = format!( + "no saga node event in {}", + omicron_common::format_time_delta(staleness), + ); + builder + .cases + .case_mut(&case_id) + .expect("case_id came from this fn") + .add_fact(SagaFact::NotProgressing(payload), comment); + } + } + if let Some(payload) = desired_owner { + if !owner_already { + let comment = format!( + "owned by non-current Nexus {} ({:?})", + payload.current_sec, payload.orphan_reason, + ); + builder + .cases + .case_mut(&case_id) + .expect("case_id came from this fn") + .add_fact( + SagaFact::OwnerNotCurrentGeneration(payload), + comment, + ); + } + } + } + + Ok(()) +} + +/// The `NotProgressing` fact this saga should carry now, if any. When a saga +/// has recorded no node event at all, its creation time stands in for the +/// last-progress timestamp (a saga that has existed past the threshold without +/// recording a single step is itself stuck at the start). +fn desired_not_progressing( + obs: &ObservedSaga, + reference_time: DateTime, +) -> Option { + let last_progress = obs.last_event_time.unwrap_or(obs.time_created); + if reference_time.signed_duration_since(last_progress) + > STALE_SAGA_THRESHOLD + { + Some(SagaNotProgressingFactPayload { + saga_id: obs.saga_id, + saga_name: obs.saga_name.clone(), + saga_state: obs.saga_state, + time_created: obs.time_created, + last_event_time: last_progress, + }) + } else { + None + } +} + +/// The `OwnerNotCurrentGeneration` fact this saga should carry now, if any. +/// Only fires when the saga has a `current_sec` whose owner is orphaned +/// (quiesced or expunged); a saga with no current SEC is between adoptions and +/// is not treated as orphaned. +fn desired_owner_not_current( + obs: &ObservedSaga, +) -> Option { + let reason = obs.owner_state?.orphaned_reason()?; + let current_sec = obs.current_sec?; + Some(SagaOwnerNotCurrentFactPayload { + saga_id: obs.saga_id, + saga_name: obs.saga_name.clone(), + current_sec, + orphan_reason: reason, + adopt_generation: obs.adopt_generation, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::builder::{SitrepBuilder, SitrepBuilderRng}; + use crate::test_util::FmTest; + use chrono::Utc; + use iddqd::IdOrdMap; + use nexus_types::fm::{self, Sitrep, SitrepVersion}; + use nexus_types::inventory; + use nexus_types::observed_saga::{ + OrphanedReason, SagaOwnerState, SagaProgressState, + }; + use omicron_common::api::external::Generation; + use omicron_test_utils::dev; + use omicron_uuid_kinds::{OmicronZoneUuid, SitrepUuid}; + use std::sync::Arc; + + fn saga_id(n: u128) -> steno::SagaId { + steno::SagaId(uuid::Uuid::from_u128(n)) + } + + /// Build a synthetic example collection (only used here for its + /// `time_done`, which is the staleness reference time). + fn setup( + test_name: &'static str, + ) -> (dev::LogContext, inventory::Collection) { + let (fm_test, logctx) = FmTest::new_with_logctx(test_name); + let (example, _bp) = fm_test.system_builder.build(); + (logctx, example.collection) + } + + /// An observed saga with sensible defaults; callers override the fields + /// each test cares about. + fn mk_observed( + id: steno::SagaId, + last_event_time: Option>, + current_sec: Option, + owner_state: Option, + ) -> ObservedSaga { + ObservedSaga { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created: Utc::now() - TimeDelta::days(1), + current_sec, + adopt_generation: Generation::new(), + last_event_time, + owner_state, + } + } + + fn observed_map( + sagas: impl IntoIterator, + ) -> IdOrdMap { + sagas.into_iter().collect() + } + + fn build_input( + collection: inventory::Collection, + parent_sitrep: Option, + observed: IdOrdMap, + ) -> Input { + let parent = parent_sitrep.map(|s| { + Arc::new(( + SitrepVersion { + id: s.id(), + version: 0, + time_made_current: Utc::now(), + }, + s, + )) + }); + let builder = Input::builder( + parent, + Arc::new(collection), + Arc::new(IdOrdMap::new()), + Arc::new(observed), + ) + .expect("input builder should accept fresh inventory"); + builder.build().0 + } + + fn run_analyze(log: &slog::Logger, input: &Input) -> Sitrep { + let mut builder = SitrepBuilder::new_with_rng( + log, + input, + SitrepBuilderRng::from_seed("saga-analyze"), + ); + analyze(input, &mut builder).expect("analyze ok"); + builder.build(OmicronZoneUuid::new_v4(), Utc::now()).0 + } + + /// Collect every saga fact in the sitrep, optionally only on open cases. + fn saga_facts( + sitrep: &Sitrep, + open_only: bool, + ) -> Vec<(fm::case::Fact, SagaFact)> { + sitrep + .cases + .iter() + .filter(|c| c.metadata.de == DiagnosisEngineKind::Saga) + .filter(|c| !open_only || c.is_open()) + .flat_map(|c| { + c.facts.iter().filter_map(|f| { + f.payload.as_saga().map(|s| (f.clone(), s.clone())) + }) + }) + .collect() + } + + fn make_parent_with_saga_case( + parent_sitrep_id: SitrepUuid, + inv_collection_id: omicron_uuid_kinds::CollectionUuid, + fact_payload: SagaFact, + ) -> Sitrep { + let mut facts = IdOrdMap::new(); + facts + .insert_unique(fm::case::Fact { + id: omicron_uuid_kinds::FactUuid::new_v4(), + created_sitrep_id: parent_sitrep_id, + payload: fact_payload.into(), + comment: "parent saga fact".to_string(), + }) + .unwrap(); + let mut cases = IdOrdMap::new(); + cases + .insert_unique(fm::Case { + id: omicron_uuid_kinds::CaseUuid::new_v4(), + metadata: fm::case::Metadata { + created_sitrep_id: parent_sitrep_id, + closed_sitrep_id: None, + de: DiagnosisEngineKind::Saga, + comment: "parent saga case".to_string(), + }, + ereports: Default::default(), + alerts_requested: Default::default(), + support_bundles_requested: Default::default(), + facts, + }) + .unwrap(); + Sitrep { + metadata: fm::SitrepMetadata { + id: parent_sitrep_id, + inv_collection_id, + creator_id: OmicronZoneUuid::new_v4(), + parent_sitrep_id: None, + time_created: Utc::now(), + next_inv_min_time_started: Utc::now(), + comment: String::new(), + }, + cases, + ereports_by_id: Default::default(), + } + } + + #[test] + fn opens_not_progressing_when_stale() { + let (logctx, collection) = setup("saga_open_not_progressing"); + let stale = collection.time_done - (STALE_SAGA_THRESHOLD + TimeDelta::minutes(1)); + let id = saga_id(1); + let observed = + observed_map([mk_observed(id, Some(stale), None, None)]); + let input = build_input(collection, None, observed); + let sitrep = run_analyze(&logctx.log, &input); + + let facts = saga_facts(&sitrep, true); + assert_eq!(facts.len(), 1); + match &facts[0].1 { + SagaFact::NotProgressing(p) => { + assert_eq!(p.saga_id, id); + assert_eq!(p.last_event_time, stale); + } + other => panic!("expected NotProgressing, got {other:?}"), + } + logctx.cleanup_successful(); + } + + #[test] + fn no_case_when_progress_recent() { + let (logctx, collection) = setup("saga_no_case_when_recent"); + let recent = collection.time_done - TimeDelta::minutes(1); + let observed = observed_map([mk_observed( + saga_id(1), + Some(recent), + None, + None, + )]); + let input = build_input(collection, None, observed); + let sitrep = run_analyze(&logctx.log, &input); + assert!( + saga_facts(&sitrep, false).is_empty(), + "a saga making recent progress should not be flagged", + ); + logctx.cleanup_successful(); + } + + #[test] + fn opens_owner_not_current_when_quiesced() { + let (logctx, collection) = setup("saga_open_owner_quiesced"); + // Recent progress, so the only problem is the orphaned owner. + let recent = collection.time_done - TimeDelta::minutes(1); + let sec = OmicronZoneUuid::new_v4(); + let observed = observed_map([mk_observed( + saga_id(1), + Some(recent), + Some(sec), + Some(SagaOwnerState::Quiesced), + )]); + let input = build_input(collection, None, observed); + let sitrep = run_analyze(&logctx.log, &input); + + let facts = saga_facts(&sitrep, true); + assert_eq!(facts.len(), 1); + match &facts[0].1 { + SagaFact::OwnerNotCurrentGeneration(p) => { + assert_eq!(p.current_sec, sec); + assert_eq!(p.orphan_reason, OrphanedReason::Quiesced); + } + other => panic!("expected OwnerNotCurrentGeneration, got {other:?}"), + } + logctx.cleanup_successful(); + } + + #[test] + fn active_owner_not_flagged() { + let (logctx, collection) = setup("saga_active_owner_not_flagged"); + let recent = collection.time_done - TimeDelta::minutes(1); + let observed = observed_map([mk_observed( + saga_id(1), + Some(recent), + Some(OmicronZoneUuid::new_v4()), + Some(SagaOwnerState::Active), + )]); + let input = build_input(collection, None, observed); + let sitrep = run_analyze(&logctx.log, &input); + assert!(saga_facts(&sitrep, false).is_empty()); + logctx.cleanup_successful(); + } + + #[test] + fn notyet_owner_not_flagged() { + let (logctx, collection) = setup("saga_notyet_owner_not_flagged"); + let recent = collection.time_done - TimeDelta::minutes(1); + let observed = observed_map([mk_observed( + saga_id(1), + Some(recent), + Some(OmicronZoneUuid::new_v4()), + Some(SagaOwnerState::NotYet), + )]); + let input = build_input(collection, None, observed); + let sitrep = run_analyze(&logctx.log, &input); + assert!(saga_facts(&sitrep, false).is_empty()); + logctx.cleanup_successful(); + } + + #[test] + fn both_facts_on_one_case() { + let (logctx, collection) = setup("saga_both_facts"); + let stale = collection.time_done + - (STALE_SAGA_THRESHOLD + TimeDelta::minutes(1)); + let sec = OmicronZoneUuid::new_v4(); + let observed = observed_map([mk_observed( + saga_id(1), + Some(stale), + Some(sec), + Some(SagaOwnerState::Absent), + )]); + let input = build_input(collection, None, observed); + let sitrep = run_analyze(&logctx.log, &input); + + let facts = saga_facts(&sitrep, true); + assert_eq!(facts.len(), 2, "expected both fact kinds on one case"); + // Both facts must be on the same case (one case per saga). + let open_cases: Vec<_> = sitrep + .cases + .iter() + .filter(|c| c.metadata.de == DiagnosisEngineKind::Saga && c.is_open()) + .collect(); + assert_eq!(open_cases.len(), 1); + assert!( + facts.iter().any(|(_, f)| matches!(f, SagaFact::NotProgressing(_))) + ); + assert!(facts.iter().any(|(_, f)| matches!( + f, + SagaFact::OwnerNotCurrentGeneration(p) + if p.orphan_reason == OrphanedReason::Expunged + ))); + logctx.cleanup_successful(); + } + + #[test] + fn closes_on_terminal() { + let (logctx, collection) = setup("saga_closes_on_terminal"); + let id = saga_id(1); + let stale = collection.time_done + - (STALE_SAGA_THRESHOLD + TimeDelta::minutes(1)); + let parent_id = SitrepUuid::new_v4(); + let inv_id = collection.id; + let parent = make_parent_with_saga_case( + parent_id, + inv_id, + SagaFact::NotProgressing(SagaNotProgressingFactPayload { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created: Utc::now() - TimeDelta::days(1), + last_event_time: stale, + }), + ); + // The saga is no longer observed (it reached a terminal state). + let input = build_input(collection, Some(parent), observed_map([])); + let sitrep = run_analyze(&logctx.log, &input); + let all = saga_facts(&sitrep, false); + assert_eq!(all.len(), 1, "the fact carries forward on the closed case"); + let case = sitrep + .cases + .iter() + .find(|c| c.metadata.de == DiagnosisEngineKind::Saga) + .expect("saga case should still be present"); + assert!(!case.is_open(), "case should be closed when saga terminates"); + logctx.cleanup_successful(); + } + + #[test] + fn fact_uuid_stable_when_unchanged() { + let (logctx, collection) = setup("saga_fact_stable"); + let id = saga_id(1); + let stale = collection.time_done + - (STALE_SAGA_THRESHOLD + TimeDelta::minutes(5)); + let payload = SagaNotProgressingFactPayload { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created: Utc::now() - TimeDelta::days(1), + last_event_time: stale, + }; + let parent_id = SitrepUuid::new_v4(); + let inv_id = collection.id; + let parent = make_parent_with_saga_case( + parent_id, + inv_id, + SagaFact::NotProgressing(payload.clone()), + ); + let parent_fact_id = parent + .cases + .iter() + .next() + .unwrap() + .facts + .iter() + .next() + .unwrap() + .id; + // Observed saga matches the parent fact exactly (same last_event_time, + // same created time, same state). + let observed = observed_map([ObservedSaga { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created: payload.time_created, + current_sec: None, + adopt_generation: Generation::new(), + last_event_time: Some(stale), + owner_state: None, + }]); + let input = build_input(collection, Some(parent), observed); + let sitrep = run_analyze(&logctx.log, &input); + let facts = saga_facts(&sitrep, true); + assert_eq!(facts.len(), 1); + assert_eq!( + facts[0].0.id, parent_fact_id, + "fact UUID should be stable when the observation is unchanged", + ); + logctx.cleanup_successful(); + } + + #[test] + fn fact_uuid_rotates_when_last_event_changes() { + let (logctx, collection) = setup("saga_fact_rotates"); + let id = saga_id(1); + let old = collection.time_done + - (STALE_SAGA_THRESHOLD + TimeDelta::hours(2)); + let new = collection.time_done + - (STALE_SAGA_THRESHOLD + TimeDelta::minutes(1)); + let time_created = Utc::now() - TimeDelta::days(1); + let inv_id = collection.id; + let parent = make_parent_with_saga_case( + SitrepUuid::new_v4(), + inv_id, + SagaFact::NotProgressing(SagaNotProgressingFactPayload { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created, + last_event_time: old, + }), + ); + let parent_fact_id = parent + .cases + .iter() + .next() + .unwrap() + .facts + .iter() + .next() + .unwrap() + .id; + // Still stale, but last_event_time advanced. + let observed = observed_map([ObservedSaga { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created, + current_sec: None, + adopt_generation: Generation::new(), + last_event_time: Some(new), + owner_state: None, + }]); + let input = build_input(collection, Some(parent), observed); + let sitrep = run_analyze(&logctx.log, &input); + let facts = saga_facts(&sitrep, true); + assert_eq!(facts.len(), 1); + assert_ne!( + facts[0].0.id, parent_fact_id, + "fact UUID should rotate when last_event_time changes", + ); + match &facts[0].1 { + SagaFact::NotProgressing(p) => { + assert_eq!(p.last_event_time, new) + } + other => panic!("expected NotProgressing, got {other:?}"), + } + logctx.cleanup_successful(); + } +} diff --git a/nexus/src/app/background/tasks/fm_analysis.rs b/nexus/src/app/background/tasks/fm_analysis.rs index 17cb3fddd48..f67e4f46197 100644 --- a/nexus/src/app/background/tasks/fm_analysis.rs +++ b/nexus/src/app/background/tasks/fm_analysis.rs @@ -10,7 +10,9 @@ use chrono::Utc; use fm::analysis_input::InvalidInputs; use futures::future::BoxFuture; use iddqd::IdOrdMap; +use nexus_db_model::DbMetadataNexusState; use nexus_db_model::PhysicalDiskPolicy; +use nexus_db_model::SagaState; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_db_queries::db::datastore; @@ -18,6 +20,9 @@ use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::pagination::Paginator; use nexus_fm as fm; use nexus_types::in_service_disk::InServiceDisk; +use nexus_types::observed_saga::{ + ObservedSaga, SagaOwnerState, SagaProgressState, +}; use nexus_types::internal_api::background::FmAnalysisStatus; use nexus_types::internal_api::background::fm_analysis as status; use nexus_types::inventory; @@ -258,10 +263,13 @@ impl FmAnalysis { ); } let in_service_disks = Arc::new(in_service_disks_map); + let observed_sagas = + Arc::new(self.prepare_observed_sagas(opctx).await?); let mut builder = fm::analysis_input::Input::builder( parent_sitrep, inv, in_service_disks, + observed_sagas, )?; let mut errors = Vec::new(); self.load_new_ereports(opctx, &mut builder, &mut errors) @@ -272,6 +280,99 @@ impl FmAnalysis { Ok((input, status::PreparationStatus { errors, report })) } + /// Build the saga diagnosis engine's input: every non-terminal saga, + /// annotated with the timestamp of its latest node event (the progress + /// signal) and the state of its owning Nexus. + async fn prepare_observed_sagas( + &self, + opctx: &OpContext, + ) -> anyhow::Result> { + use std::collections::BTreeMap; + + // All running/unwinding sagas. Terminal sagas are excluded; a parent + // case whose saga is absent from this set is closed by the engine. + let sagas = self + .datastore + .saga_list_running_or_unwinding_batched(opctx) + .await + .context("failed to list non-terminal sagas")?; + + // Latest node-event time per saga: the last durably-recorded step. + let saga_ids: Vec<_> = sagas.iter().map(|s| s.id).collect(); + let last_event_times: BTreeMap< + steno::SagaId, + Option>, + > = self + .datastore + .saga_latest_node_event_times(opctx, &saga_ids) + .await + .context("failed to load saga node-event times")? + .into_iter() + .map(|(id, t)| (id.0, t)) + .collect(); + + // Classify each owning Nexus (current_sec) against db_metadata_nexus. + let nexus_states: BTreeMap = + self.datastore + .get_db_metadata_nexus_in_state( + opctx, + vec![ + DbMetadataNexusState::Active, + DbMetadataNexusState::NotYet, + DbMetadataNexusState::Quiesced, + ], + ) + .await + .context("failed to load db_metadata_nexus records")? + .into_iter() + .map(|n| (n.nexus_id(), n.state())) + .collect(); + + let mut observed = IdOrdMap::new(); + for saga in sagas { + let saga_state = match saga.saga_state { + SagaState::Running => SagaProgressState::Running, + SagaState::Unwinding => SagaProgressState::Unwinding, + // The query filters to non-terminal states; defend anyway. + SagaState::Done | SagaState::Abandoned => continue, + }; + let current_sec = saga + .current_sec + .map(|sec| OmicronZoneUuid::from_untyped_uuid(sec.0)); + let owner_state = current_sec.map(|sec_id| { + match nexus_states.get(&sec_id) { + Some(DbMetadataNexusState::Active) => { + SagaOwnerState::Active + } + Some(DbMetadataNexusState::NotYet) => { + SagaOwnerState::NotYet + } + Some(DbMetadataNexusState::Quiesced) => { + SagaOwnerState::Quiesced + } + None => SagaOwnerState::Absent, + } + }); + let last_event_time = + last_event_times.get(&saga.id.0).copied().flatten(); + observed + .insert_unique(ObservedSaga { + saga_id: saga.id.0, + saga_name: saga.name, + saga_state, + time_created: saga.time_created, + current_sec, + adopt_generation: *saga.adopt_generation, + last_event_time, + owner_state, + }) + .expect( + "saga.id is a primary key, so duplicates are impossible", + ); + } + Ok(observed) + } + async fn load_new_ereports( &mut self, opctx: &OpContext, diff --git a/nexus/types/src/fm.rs b/nexus/types/src/fm.rs index 1c4dba06532..538940a50e6 100644 --- a/nexus/types/src/fm.rs +++ b/nexus/types/src/fm.rs @@ -13,7 +13,10 @@ pub use ereport::{Ereport, EreportId}; pub mod case; pub use case::Case; pub mod fact; -pub use fact::{DiskFact, FactPayload, ZpoolUnhealthyFactPayload}; +pub use fact::{ + DiskFact, FactPayload, SagaFact, SagaNotProgressingFactPayload, + SagaOwnerNotCurrentFactPayload, ZpoolUnhealthyFactPayload, +}; pub(crate) mod json_display; use case::AlertRequest; @@ -224,4 +227,5 @@ pub type CurrentSitrep = Arc<(SitrepVersion, Sitrep)>; pub enum DiagnosisEngineKind { PowerShelf, PhysicalDisk, + Saga, } diff --git a/nexus/types/src/fm/fact.rs b/nexus/types/src/fm/fact.rs index 714f6660c2b..622836f1433 100644 --- a/nexus/types/src/fm/fact.rs +++ b/nexus/types/src/fm/fact.rs @@ -13,8 +13,12 @@ //! [`Metadata::de`]: super::case::Metadata::de use crate::inventory::ZpoolHealth; +use crate::observed_saga::{OrphanedReason, SagaProgressState}; use chrono::{DateTime, Utc}; -use omicron_uuid_kinds::{CollectionUuid, PhysicalDiskUuid, ZpoolUuid}; +use omicron_common::api::external::Generation; +use omicron_uuid_kinds::{ + CollectionUuid, OmicronZoneUuid, PhysicalDiskUuid, ZpoolUuid, +}; use serde::{Deserialize, Serialize}; /// The typed payload of a [`Fact`](super::case::Fact). @@ -27,6 +31,8 @@ use serde::{Deserialize, Serialize}; pub enum FactPayload { /// A fact owned by the physical-disk diagnosis engine. PhysicalDisk(DiskFact), + /// A fact owned by the saga diagnosis engine. + Saga(SagaFact), } impl From for FactPayload { @@ -35,6 +41,12 @@ impl From for FactPayload { } } +impl From for FactPayload { + fn from(fact: SagaFact) -> Self { + FactPayload::Saga(fact) + } +} + impl FactPayload { /// The physical-disk payload, or `None` if this fact belongs to a /// different diagnosis engine. @@ -45,6 +57,16 @@ impl FactPayload { _ => None, } } + + /// The saga payload, or `None` if this fact belongs to a different + /// diagnosis engine. + pub fn as_saga(&self) -> Option<&SagaFact> { + #[allow(unreachable_patterns)] + match self { + FactPayload::Saga(fact) => Some(fact), + _ => None, + } + } } /// Per-fact state for the physical-disk diagnosis engine. @@ -86,3 +108,73 @@ pub struct ZpoolUnhealthyFactPayload { /// `time_done` of `observed_in_inv`. pub time_observed: DateTime, } + +/// Per-fact state for the saga diagnosis engine. +/// +/// A saga case (keyed by `saga_id`) may carry either or both of these, +/// reflecting two independent problems with the same saga. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum SagaFact { + /// The saga is non-terminal but has recorded no node event in a long + /// time, i.e. it is not making durable forward or undo progress. + NotProgressing(SagaNotProgressingFactPayload), + /// The saga is owned by a Nexus that is no longer of the current + /// generation (quiesced or expunged), so that Nexus will not advance it. + OwnerNotCurrentGeneration(SagaOwnerNotCurrentFactPayload), +} + +impl SagaFact { + /// The saga this fact (and its parent case) is about. Common to every + /// kind of saga fact. + pub fn saga_id(&self) -> steno::SagaId { + match self { + SagaFact::NotProgressing(p) => p.saga_id, + SagaFact::OwnerNotCurrentGeneration(p) => p.saga_id, + } + } + + /// The saga's name. Common to every kind of saga fact. + pub fn saga_name(&self) -> &str { + match self { + SagaFact::NotProgressing(p) => &p.saga_name, + SagaFact::OwnerNotCurrentGeneration(p) => &p.saga_name, + } + } +} + +/// Payload of a [`SagaFact::NotProgressing`] fact. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct SagaNotProgressingFactPayload { + /// The saga this fact (and its parent case) is about. + pub saga_id: steno::SagaId, + /// The saga's name (e.g. `instance-start`). + pub saga_name: String, + /// Whether the saga is running forward or unwinding. Unwinding-but-stuck + /// is the more dangerous case (it may have half-torn-down resources). + pub saga_state: SagaProgressState, + /// When the saga was created. + pub time_created: DateTime, + /// The latest `saga_node_event.event_time` observed for this saga, i.e. + /// the last durably-recorded step. The case was opened because + /// `now - last_event_time` exceeded the staleness threshold. + pub last_event_time: DateTime, +} + +/// Payload of a [`SagaFact::OwnerNotCurrentGeneration`] fact. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct SagaOwnerNotCurrentFactPayload { + /// The saga this fact (and its parent case) is about. + pub saga_id: steno::SagaId, + /// The saga's name (e.g. `instance-start`). + pub saga_name: String, + /// The owning Nexus zone (`saga.current_sec`). This fact only fires when + /// the saga has a current SEC, so it is always present. + pub current_sec: OmicronZoneUuid, + /// Why the owner is not current: quiesced (older generation) or expunged + /// (no `db_metadata_nexus` record). + pub orphan_reason: OrphanedReason, + /// `saga.adopt_generation`: how many times the saga has been re-adopted + /// to a SEC. Recorded for triage (thrashing across Nexus restarts). + pub adopt_generation: Generation, +} diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index 9a5eeb77c06..deb4059fbcc 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -40,6 +40,7 @@ pub mod instance; pub mod internal_api; pub mod inventory; pub mod multicast; +pub mod observed_saga; pub mod quiesce; pub mod saga; pub mod silo; diff --git a/nexus/types/src/observed_saga.rs b/nexus/types/src/observed_saga.rs new file mode 100644 index 00000000000..eadc4273e04 --- /dev/null +++ b/nexus/types/src/observed_saga.rs @@ -0,0 +1,108 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! "Currently non-terminal sagas" — the executed view from the `saga` and +//! `saga_node_event` DB tables, annotated with the state of each saga's +//! owning Nexus. +//! +//! This is the saga diagnosis engine's analog of +//! [`InServiceDisk`](crate::in_service_disk::InServiceDisk): the fault +//! management preparation phase reads it directly from the database, and the +//! saga diagnosis engine consumes it to decide whether a saga is stuck (not +//! making progress) or orphaned (owned by a Nexus that is no longer of the +//! current generation). + +use chrono::{DateTime, Utc}; +use iddqd::{IdOrdItem, id_upcast}; +use omicron_common::api::external::Generation; +use omicron_uuid_kinds::OmicronZoneUuid; +use serde::{Deserialize, Serialize}; + +/// The non-terminal execution state of a saga, as recorded in the `saga` +/// table's `saga_state` column. Terminal states (`done`, `abandoned`) are +/// deliberately excluded: a saga that has reached one of those is no longer a +/// candidate for the saga diagnosis engine (its case, if any, is closed). +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SagaProgressState { + /// The saga is executing forward actions. + Running, + /// One or more actions failed and the saga is executing undo actions. + Unwinding, +} + +/// The state of a saga's owning Nexus (`saga.current_sec`), classified against +/// `db_metadata_nexus`. Drives the saga diagnosis engine's "orphaned" path. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SagaOwnerState { + /// The owning Nexus is the current, active generation. + Active, + /// The owning Nexus is a newer generation that is not yet active. + NotYet, + /// The owning Nexus has quiesced (an older generation handed off). + Quiesced, + /// The owning Nexus has no `db_metadata_nexus` record at all (expunged), + /// or the saga has no `current_sec`. + Absent, +} + +impl SagaOwnerState { + /// If this owner state means the saga is orphaned — owned by a Nexus that + /// will not make progress on it — the reason why; otherwise `None`. + pub fn orphaned_reason(self) -> Option { + match self { + SagaOwnerState::Active | SagaOwnerState::NotYet => None, + SagaOwnerState::Quiesced => Some(OrphanedReason::Quiesced), + SagaOwnerState::Absent => Some(OrphanedReason::Expunged), + } + } +} + +/// Why a saga is orphaned: its owning Nexus exists but will not advance it. +/// The reduced, only-ever-stored form of [`SagaOwnerState`] (the `Active` and +/// `NotYet` states never produce a fact). +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum OrphanedReason { + /// The owning Nexus quiesced (an older generation that handed off). + Quiesced, + /// The owning Nexus has no `db_metadata_nexus` record (expunged). + Expunged, +} + +/// One non-terminal saga, joined with the timestamp of its most recent node +/// event and the state of its owning Nexus. +#[derive(Clone, Debug, PartialEq)] +pub struct ObservedSaga { + pub saga_id: steno::SagaId, + pub saga_name: String, + pub saga_state: SagaProgressState, + /// When the saga was created (`saga.time_created`). + pub time_created: DateTime, + /// The owning Nexus zone (`saga.current_sec`), or `None` if the saga has no + /// current SEC. + pub current_sec: Option, + /// `saga.adopt_generation`: bumps each time the saga is re-adopted to a + /// SEC (failover), not on progress. + pub adopt_generation: Generation, + /// The latest `saga_node_event.event_time` for this saga, i.e. the last + /// durably-recorded forward or undo step. `None` if the saga somehow has + /// no node events yet. This is the "last progress" signal: + /// `now - last_event_time` is how long the saga has gone without + /// recording progress. + pub last_event_time: Option>, + /// The classified state of the owning Nexus, or `None` if the saga has no + /// `current_sec` (it is between adoptions and not classifiable — and not + /// treated as orphaned). Always `Some` when `current_sec` is `Some`. + pub owner_state: Option, +} + +impl IdOrdItem for ObservedSaga { + type Key<'a> = steno::SagaId; + fn key(&self) -> Self::Key<'_> { + self.saga_id + } + id_upcast!(); +} diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index b39b88d184a..6e1dfbfa9f0 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7621,7 +7621,8 @@ ON omicron.public.fm_sitrep_history (sitrep_id); CREATE TYPE IF NOT EXISTS omicron.public.diagnosis_engine AS ENUM ( 'power_shelf', - 'physical_disk' + 'physical_disk', + 'saga' ); CREATE TABLE IF NOT EXISTS omicron.public.fm_case ( @@ -7701,6 +7702,83 @@ CREATE INDEX IF NOT EXISTS lookup_fm_fact_physical_disk_for_case ON omicron.public.fm_fact_physical_disk (sitrep_id, case_id); +-- The saga diagnosis engine's facts. See the comment on the physical-disk +-- engine above: one table per engine, fact content as typed columns. +CREATE TYPE IF NOT EXISTS omicron.public.fm_fact_saga_kind AS ENUM ( + 'not_progressing', + 'owner_not_current_generation' +); + +CREATE TYPE IF NOT EXISTS omicron.public.fm_fact_saga_orphan_reason AS ENUM ( + 'quiesced', + 'expunged' +); + +CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( + -- Stable UUID for this fact across sitreps. + id UUID NOT NULL, + -- Sitrep this row belongs to. + sitrep_id UUID NOT NULL, + -- UUID of the case this fact attaches to. + case_id UUID NOT NULL, + -- UUID of the sitrep in which this fact was first added. Preserved + -- unchanged when the fact is carried forward into a child sitrep. + -- Debug-only. + created_sitrep_id UUID NOT NULL, + -- Free-form, debug-only comment. + comment TEXT NOT NULL, + + -- The saga this fact is about. Common to every kind of saga fact (the + -- case is keyed by it), so it is always present regardless of `kind`. + saga_id UUID NOT NULL, + -- The saga's name (e.g. 'instance-start'). Common to every kind. + saga_name TEXT NOT NULL, + + -- Which saga fact this row represents. The columns below are populated + -- according to this discriminant (see the CHECK constraint). + kind omicron.public.fm_fact_saga_kind NOT NULL, + + -- Columns for a 'not_progressing' fact. NULL for any other kind. + saga_state omicron.public.saga_state, + time_created TIMESTAMPTZ, + last_event_time TIMESTAMPTZ, + + -- Columns for an 'owner_not_current_generation' fact. NULL for any other + -- kind. + current_sec UUID, + orphan_reason omicron.public.fm_fact_saga_orphan_reason, + adopt_generation INT8, + + PRIMARY KEY (sitrep_id, id), + + -- Exactly one kind's columns are populated; all other kinds' columns are + -- NULL. A `kind` not handled here fails closed (ELSE false), so adding a + -- new kind requires extending this constraint. + CONSTRAINT fm_fact_saga_columns_match_kind CHECK ( + CASE kind + WHEN 'not_progressing' THEN + saga_state IS NOT NULL + AND time_created IS NOT NULL + AND last_event_time IS NOT NULL + AND current_sec IS NULL + AND orphan_reason IS NULL + AND adopt_generation IS NULL + WHEN 'owner_not_current_generation' THEN + current_sec IS NOT NULL + AND orphan_reason IS NOT NULL + AND adopt_generation IS NOT NULL + AND saga_state IS NULL + AND time_created IS NULL + AND last_event_time IS NULL + ELSE false + END + ) +); + +CREATE INDEX IF NOT EXISTS + lookup_fm_fact_saga_for_case +ON omicron.public.fm_fact_saga (sitrep_id, case_id); + CREATE TABLE IF NOT EXISTS omicron.public.fm_ereport_in_case ( -- ID of this association. When an ereport is assigned to a case, that -- association is assigned a UUID. These are used primarily to aid in @@ -8677,7 +8755,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '262.0.0', NULL) + (TRUE, NOW(), NOW(), '263.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/fm-saga-de/up1.sql b/schema/crdb/fm-saga-de/up1.sql new file mode 100644 index 00000000000..c40b0e8a3f6 --- /dev/null +++ b/schema/crdb/fm-saga-de/up1.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.diagnosis_engine ADD VALUE IF NOT EXISTS 'saga'; diff --git a/schema/crdb/fm-saga-de/up2.sql b/schema/crdb/fm-saga-de/up2.sql new file mode 100644 index 00000000000..78065156c3e --- /dev/null +++ b/schema/crdb/fm-saga-de/up2.sql @@ -0,0 +1,4 @@ +CREATE TYPE IF NOT EXISTS omicron.public.fm_fact_saga_kind AS ENUM ( + 'not_progressing', + 'owner_not_current_generation' +); diff --git a/schema/crdb/fm-saga-de/up3.sql b/schema/crdb/fm-saga-de/up3.sql new file mode 100644 index 00000000000..4860384923b --- /dev/null +++ b/schema/crdb/fm-saga-de/up3.sql @@ -0,0 +1,4 @@ +CREATE TYPE IF NOT EXISTS omicron.public.fm_fact_saga_orphan_reason AS ENUM ( + 'quiesced', + 'expunged' +); diff --git a/schema/crdb/fm-saga-de/up4.sql b/schema/crdb/fm-saga-de/up4.sql new file mode 100644 index 00000000000..261a2ce3513 --- /dev/null +++ b/schema/crdb/fm-saga-de/up4.sql @@ -0,0 +1,60 @@ +CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( + -- Stable UUID for this fact across sitreps. + id UUID NOT NULL, + -- Sitrep this row belongs to. + sitrep_id UUID NOT NULL, + -- UUID of the case this fact attaches to. + case_id UUID NOT NULL, + -- UUID of the sitrep in which this fact was first added. Preserved + -- unchanged when the fact is carried forward into a child sitrep. + -- Debug-only. + created_sitrep_id UUID NOT NULL, + -- Free-form, debug-only comment. + comment TEXT NOT NULL, + + -- The saga this fact is about. Common to every kind of saga fact (the + -- case is keyed by it), so it is always present regardless of `kind`. + saga_id UUID NOT NULL, + -- The saga's name (e.g. 'instance-start'). Common to every kind. + saga_name TEXT NOT NULL, + + -- Which saga fact this row represents. The columns below are populated + -- according to this discriminant (see the CHECK constraint). + kind omicron.public.fm_fact_saga_kind NOT NULL, + + -- Columns for a 'not_progressing' fact. NULL for any other kind. + saga_state omicron.public.saga_state, + time_created TIMESTAMPTZ, + last_event_time TIMESTAMPTZ, + + -- Columns for an 'owner_not_current_generation' fact. NULL for any other + -- kind. + current_sec UUID, + orphan_reason omicron.public.fm_fact_saga_orphan_reason, + adopt_generation INT8, + + PRIMARY KEY (sitrep_id, id), + + -- Exactly one kind's columns are populated; all other kinds' columns are + -- NULL. A `kind` not handled here fails closed (ELSE false), so adding a + -- new kind requires extending this constraint. + CONSTRAINT fm_fact_saga_columns_match_kind CHECK ( + CASE kind + WHEN 'not_progressing' THEN + saga_state IS NOT NULL + AND time_created IS NOT NULL + AND last_event_time IS NOT NULL + AND current_sec IS NULL + AND orphan_reason IS NULL + AND adopt_generation IS NULL + WHEN 'owner_not_current_generation' THEN + current_sec IS NOT NULL + AND orphan_reason IS NOT NULL + AND adopt_generation IS NOT NULL + AND saga_state IS NULL + AND time_created IS NULL + AND last_event_time IS NULL + ELSE false + END + ) +); diff --git a/schema/crdb/fm-saga-de/up5.sql b/schema/crdb/fm-saga-de/up5.sql new file mode 100644 index 00000000000..c4372905384 --- /dev/null +++ b/schema/crdb/fm-saga-de/up5.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS lookup_fm_fact_saga_for_case ON omicron.public.fm_fact_saga (sitrep_id, case_id); diff --git a/schema/crdb/fm-saga-de/up5.verify.sql b/schema/crdb/fm-saga-de/up5.verify.sql new file mode 100644 index 00000000000..263f3b6a78b --- /dev/null +++ b/schema/crdb/fm-saga-de/up5.verify.sql @@ -0,0 +1,2 @@ +-- DO NOT EDIT. Generated by test_migration_verification_files. +SELECT CAST(IF((SELECT true WHERE EXISTS (SELECT index_name FROM omicron.crdb_internal.table_indexes WHERE descriptor_name = 'fm_fact_saga' AND index_name = 'lookup_fm_fact_saga_for_case')),'true','Schema change verification failed: index lookup_fm_fact_saga_for_case on table fm_fact_saga does not exist') AS BOOL); From 4b665e79053be4b53f6fee5e08e36e4a86129ce4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 12:36:38 -0700 Subject: [PATCH 05/30] PR feedback: growable fact CHECK constraint, filter deleted disks, tolerate duplicate zpools - The per-kind CHECK constraint on fm_fact_physical_disk is now an implication (kind != 'zpool_unhealthy' OR columns present), so future fact kinds add their own constraint instead of rewriting this one - The fm_analysis in-service disk projection skips soft-deleted physical_disk rows, matching what its comment already claimed - A duplicate physical disk in the zpool listing now logs a warning and keeps the first zpool instead of panicking the background task --- nexus/db-model/src/fm/fact_physical_disk.rs | 6 ++-- nexus/src/app/background/tasks/fm_analysis.rs | 29 ++++++++++++++----- schema/crdb/dbinit.sql | 8 +++-- schema/crdb/fm-disk-de-and-facts/up3.sql | 8 +++-- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/nexus/db-model/src/fm/fact_physical_disk.rs b/nexus/db-model/src/fm/fact_physical_disk.rs index 60e9ef25d6d..2b85de0e1b4 100644 --- a/nexus/db-model/src/fm/fact_physical_disk.rs +++ b/nexus/db-model/src/fm/fact_physical_disk.rs @@ -34,8 +34,10 @@ impl_enum_type!( /// Diesel row for the `fm_fact_physical_disk` table. /// /// The payload columns are populated according to `kind`: a column is `Some` -/// iff it belongs to that `kind`'s payload. This is enforced in the database -/// by the per-kind CHECK constraints (e.g. `zpool_unhealthy_columns_present`). +/// if it belongs to that `kind`'s payload, and `None` otherwise (by +/// convention; the per-kind CHECK constraints, e.g. +/// `zpool_unhealthy_columns_present`, enforce only the "present for the +/// matching kind" direction so that future kinds may share columns). #[derive(Queryable, Insertable, Clone, Debug, Selectable)] #[diesel(table_name = fm_fact_physical_disk)] pub struct FmFactPhysicalDisk { diff --git a/nexus/src/app/background/tasks/fm_analysis.rs b/nexus/src/app/background/tasks/fm_analysis.rs index 17cb3fddd48..f26352627f1 100644 --- a/nexus/src/app/background/tasks/fm_analysis.rs +++ b/nexus/src/app/background/tasks/fm_analysis.rs @@ -227,7 +227,7 @@ impl FmAnalysis { // represented as control plane disks today, so the U.2-only filter // on the underlying query matches reality. // - // This is the executed view from the DB — flipped only after sagas / + // This is the executed view from the DB, flipped only after sagas / // cleaners have actually drained resources, not while a planner is // merely proposing changes. A faulty disk a planner proposes to // expunge is still the diagnoser's concern until the control plane @@ -239,23 +239,36 @@ impl FmAnalysis { .context("failed to load in-service control plane disks")?; let mut in_service_disks_map = IdOrdMap::new(); for (zpool, disk) in zpools_and_disks { - if disk.disk_policy != PhysicalDiskPolicy::InService { + if disk.time_deleted().is_some() + || disk.disk_policy != PhysicalDiskPolicy::InService + { continue; } - in_service_disks_map + let physical_disk_id = disk.id(); + let zpool_id = zpool.id(); + if in_service_disks_map .insert_unique(InServiceDisk { - physical_disk_id: disk.id(), - zpool_id: zpool.id(), + physical_disk_id, + zpool_id, sled_id: disk.sled_id.into(), vendor: disk.vendor, serial: disk.serial, model: disk.model, variant: disk.variant.into(), }) - .expect( - "physical_disk.id is a primary key, so duplicates are \ - impossible", + .is_err() + { + // One live zpool per disk is a code-maintained invariant, + // not a schema constraint. Tolerate a violation rather than + // panicking the analysis task: keep the first zpool seen + // for the disk. + slog::warn!( + &opctx.log, + "multiple live zpools reference the same physical disk"; + "physical_disk_id" => %physical_disk_id, + "zpool_id" => %zpool_id, ); + } } let in_service_disks = Arc::new(in_service_disks_map); let mut builder = fm::analysis_input::Input::builder( diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 172fa967cbb..2490ec9573a 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7758,12 +7758,16 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( PRIMARY KEY (sitrep_id, id), + -- Each variant validates that the columns it expects are present. + -- Future variants should add their own constraint like this one, + -- leaving existing constraints untouched. CONSTRAINT zpool_unhealthy_columns_present CHECK ( - kind = 'zpool_unhealthy' - AND zpool_id IS NOT NULL + kind != 'zpool_unhealthy' OR ( + zpool_id IS NOT NULL AND last_seen_health IS NOT NULL AND observed_in_inv IS NOT NULL AND time_observed IS NOT NULL + ) ) ); diff --git a/schema/crdb/fm-disk-de-and-facts/up3.sql b/schema/crdb/fm-disk-de-and-facts/up3.sql index 4249378b4a1..51692acc720 100644 --- a/schema/crdb/fm-disk-de-and-facts/up3.sql +++ b/schema/crdb/fm-disk-de-and-facts/up3.sql @@ -30,11 +30,15 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( PRIMARY KEY (sitrep_id, id), + -- Each variant validates that the columns it expects are present. + -- Future variants should add their own constraint like this one, + -- leaving existing constraints untouched. CONSTRAINT zpool_unhealthy_columns_present CHECK ( - kind = 'zpool_unhealthy' - AND zpool_id IS NOT NULL + kind != 'zpool_unhealthy' OR ( + zpool_id IS NOT NULL AND last_seen_health IS NOT NULL AND observed_in_inv IS NOT NULL AND time_observed IS NOT NULL + ) ) ); From 272b368a01feb0b6cbcbe36c13099656d63cdee7 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 13:20:22 -0700 Subject: [PATCH 06/30] [fm] diagnosis engines read their Input from the SitrepBuilder SitrepBuilder::cases is seeded from the open cases of the Input the builder was constructed from, but diagnosis::analyze() previously took the Input as a separate argument; nothing tied the two together, and an engine handed a mismatched pair would panic looking up a case that was never seeded. Store the Input on the builder and have engines read it from there, making the mismatch unrepresentable. --- nexus/fm/src/analysis_input.rs | 1 + nexus/fm/src/builder.rs | 14 ++++++++++++++ nexus/fm/src/diagnosis/mod.rs | 8 ++------ nexus/fm/src/diagnosis/physical_disk.rs | 19 +++++++++---------- nexus/src/app/background/tasks/fm_analysis.rs | 2 +- 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/nexus/fm/src/analysis_input.rs b/nexus/fm/src/analysis_input.rs index 2f8e135cbb2..8073e32493d 100644 --- a/nexus/fm/src/analysis_input.rs +++ b/nexus/fm/src/analysis_input.rs @@ -32,6 +32,7 @@ pub use nexus_types::fm::analysis_reports::InputReport as Report; /// is constructed, the inputs are immutable and cannot be modified. To /// construct a new `Input` as part of a preparaation phase, use /// [`Input::builder`]. +#[derive(Debug)] pub struct Input { parent_sitrep: Option>, inv: Arc, diff --git a/nexus/fm/src/builder.rs b/nexus/fm/src/builder.rs index d528b69c0a4..8245943239f 100644 --- a/nexus/fm/src/builder.rs +++ b/nexus/fm/src/builder.rs @@ -24,6 +24,11 @@ pub struct SitrepBuilder<'a> { pub parent_sitrep: Option<&'a fm::Sitrep>, pub sitrep_id: SitrepUuid, pub cases: case::AllCases, + /// The analysis input this builder was constructed from. `cases` is + /// seeded from this input's open cases, so diagnosis engines must read + /// their inputs from here rather than taking a separate `Input` argument + /// that might disagree. + input: &'a analysis_input::Input, closed_cases_copied_forward: &'a IdOrdMap, comment: String, } @@ -66,11 +71,20 @@ impl<'a> SitrepBuilder<'a> { inventory, parent_sitrep, comment: String::new(), + input: inputs, closed_cases_copied_forward, cases, } } + /// The analysis input this builder was constructed from. + /// + /// The returned reference borrows the input (lifetime `'a`), not the + /// builder, so callers may hold it while mutating the builder. + pub fn input(&self) -> &'a analysis_input::Input { + self.input + } + pub fn comment(&self) -> &str { &self.comment } diff --git a/nexus/fm/src/diagnosis/mod.rs b/nexus/fm/src/diagnosis/mod.rs index b62e676fb36..4a2d7d21856 100644 --- a/nexus/fm/src/diagnosis/mod.rs +++ b/nexus/fm/src/diagnosis/mod.rs @@ -9,15 +9,11 @@ //! so the dispatch order does not matter. use crate::SitrepBuilder; -use crate::analysis_input::Input; mod physical_disk; -pub fn analyze( - input: &Input, - builder: &mut SitrepBuilder<'_>, -) -> anyhow::Result<()> { - physical_disk::analyze(input, builder)?; +pub fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { + physical_disk::analyze(builder)?; Ok(()) } diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index 3992c9cb46c..ab6bb1d4777 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -5,7 +5,6 @@ //! Disk diagnosis engine. use crate::SitrepBuilder; -use crate::analysis_input::Input; use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; use nexus_types::fm::DiagnosisEngineKind; use nexus_types::fm::{DiskFact, ZpoolUnhealthyFactPayload}; @@ -59,10 +58,10 @@ struct ParentCaseSummary { unhealthy_facts: IdOrdMap, } -pub(super) fn analyze( - input: &Input, - builder: &mut SitrepBuilder<'_>, -) -> anyhow::Result<()> { +pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { + // The input borrow has lifetime 'a, not a borrow of `builder`, so we may + // hold it while mutating the builder below. + let input = builder.input(); let inv_collection_id = input.inventory().id; let inv_time_done = input.inventory().time_done; @@ -172,10 +171,10 @@ pub(super) fn analyze( // is NOT a recovery signal: sled could be powered off, or // inventory could be lossy) for (case_id, summary) in &parent_cases { - let mut case_mut = builder - .cases - .case_mut(case_id) - .expect("case_id came from iterating builder.cases"); + let mut case_mut = builder.cases.case_mut(case_id).expect( + "builder.cases is seeded from the open cases of builder.input(), \ + which is where this case_id came from", + ); match in_service_health.get(&summary.physical_disk_id) { None => { case_mut.close(format!( @@ -392,7 +391,7 @@ mod tests { input, SitrepBuilderRng::from_seed("disk-analyze"), ); - analyze(input, &mut builder).expect("analyze ok"); + analyze(&mut builder).expect("analyze ok"); builder.build(OmicronZoneUuid::new_v4(), Utc::now()) } diff --git a/nexus/src/app/background/tasks/fm_analysis.rs b/nexus/src/app/background/tasks/fm_analysis.rs index f26352627f1..3bf66f5432c 100644 --- a/nexus/src/app/background/tasks/fm_analysis.rs +++ b/nexus/src/app/background/tasks/fm_analysis.rs @@ -345,7 +345,7 @@ impl FmAnalysis { ) -> status::AnalysisStatus { let start_time = Utc::now(); let mut sitrep_builder = fm::SitrepBuilder::new(&opctx.log, &inputs); - let result = fm::diagnosis::analyze(&inputs, &mut sitrep_builder); + let result = fm::diagnosis::analyze(&mut sitrep_builder); let end_time = Utc::now(); let (sitrep, report) = sitrep_builder.build(self.nexus_id, end_time); From 14c2a06fbc5a37b283df851400735f8412f9d455 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 13:46:23 -0700 Subject: [PATCH 07/30] drop unused lookup_fm_fact_physical_disk_for_case index Facts are only read during sitrep load, which paginates over the (sitrep_id, id) primary key; nothing queries by case directly. Easy to re-add with a migration if omdb grows a case-scoped fact query. --- schema/crdb/dbinit.sql | 4 ---- schema/crdb/fm-disk-de-and-facts/up4.sql | 1 - schema/crdb/fm-disk-de-and-facts/up4.verify.sql | 2 -- 3 files changed, 7 deletions(-) delete mode 100644 schema/crdb/fm-disk-de-and-facts/up4.sql delete mode 100644 schema/crdb/fm-disk-de-and-facts/up4.verify.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 2490ec9573a..34d17d8f6b4 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7771,10 +7771,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( ) ); -CREATE INDEX IF NOT EXISTS - lookup_fm_fact_physical_disk_for_case -ON omicron.public.fm_fact_physical_disk (sitrep_id, case_id); - CREATE TABLE IF NOT EXISTS omicron.public.fm_ereport_in_case ( -- ID of this association. When an ereport is assigned to a case, that -- association is assigned a UUID. These are used primarily to aid in diff --git a/schema/crdb/fm-disk-de-and-facts/up4.sql b/schema/crdb/fm-disk-de-and-facts/up4.sql deleted file mode 100644 index 83d3461270d..00000000000 --- a/schema/crdb/fm-disk-de-and-facts/up4.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE INDEX IF NOT EXISTS lookup_fm_fact_physical_disk_for_case ON omicron.public.fm_fact_physical_disk (sitrep_id, case_id); diff --git a/schema/crdb/fm-disk-de-and-facts/up4.verify.sql b/schema/crdb/fm-disk-de-and-facts/up4.verify.sql deleted file mode 100644 index 788cdeebc19..00000000000 --- a/schema/crdb/fm-disk-de-and-facts/up4.verify.sql +++ /dev/null @@ -1,2 +0,0 @@ --- DO NOT EDIT. Generated by test_migration_verification_files. -SELECT CAST(IF((SELECT true WHERE EXISTS (SELECT index_name FROM omicron.crdb_internal.table_indexes WHERE descriptor_name = 'fm_fact_physical_disk' AND index_name = 'lookup_fm_fact_physical_disk_for_case')),'true','Schema change verification failed: index lookup_fm_fact_physical_disk_for_case on table fm_fact_physical_disk does not exist') AS BOOL); From d2a676775e9a1f3fe7829e507960c2cff4842537 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 13:50:48 -0700 Subject: [PATCH 08/30] [fm] disk diagnoser: index parent cases by disk Replace the per-disk linear scan of parent cases with a one-time inverse index. First case ID wins on (pathological) collisions, same as the scan it replaces. --- nexus/fm/src/diagnosis/physical_disk.rs | 26 +++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index ab6bb1d4777..f7b2658f8cb 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -161,6 +161,19 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { }) .collect(); + // Inverse index: which parent case is about which disk. Cases are + // per-disk, so a disk with two parent cases is pathological; keep the + // lowest case ID. + let mut case_by_disk: BTreeMap< + PhysicalDiskUuid, + (CaseUuid, &ParentCaseSummary), + > = BTreeMap::new(); + for (case_id, summary) in &parent_cases { + case_by_disk + .entry(summary.physical_disk_id) + .or_insert((*case_id, summary)); + } + // For each parent case, decide what to do based on its disk's current // state: // - disk no longer in service → close the case (expungement) @@ -210,17 +223,10 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { continue; } - let parent_for_disk = - parent_cases.iter().find_map(|(case_id, summary)| { - if summary.physical_disk_id == disk.physical_disk_id { - Some((*case_id, summary)) - } else { - None - } - }); + let parent_for_disk = case_by_disk.get(&disk.physical_disk_id).copied(); let case_id_for_fact = match parent_for_disk { - // Parent case already has an accurate fact — fully covered. + // Parent case already has an accurate fact; fully covered. Some((_, summary)) if summary .unhealthy_facts @@ -232,7 +238,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { // Parent case exists; its stale facts were removed above. // Refresh under the same case. Some((case_id, _)) => case_id, - // No parent case for this disk — open one. + // No parent case for this disk; open one. None => { let mut new_case = builder.cases.open_case(DiagnosisEngineKind::PhysicalDisk); From 89c04db62759cc07e68fd9c4ec6e50d5ebc484f2 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 14:33:43 -0700 Subject: [PATCH 09/30] [fm] fm_fact_saga: per-kind implication CHECK constraints Match the fm_fact_physical_disk convention: each kind's constraint validates that the columns it expects are present, and future kinds add their own constraint instead of rewriting an exhaustive CASE. This also stops enforcing that other kinds' columns are NULL, leaving room for kinds to share columns. --- nexus/db-model/src/fm/fact_saga.rs | 14 ++++++----- schema/crdb/dbinit.sql | 37 +++++++++++++----------------- schema/crdb/fm-saga-de/up4.sql | 37 +++++++++++++----------------- 3 files changed, 40 insertions(+), 48 deletions(-) diff --git a/nexus/db-model/src/fm/fact_saga.rs b/nexus/db-model/src/fm/fact_saga.rs index ef6d7add529..ed631dd2f88 100644 --- a/nexus/db-model/src/fm/fact_saga.rs +++ b/nexus/db-model/src/fm/fact_saga.rs @@ -5,10 +5,10 @@ //! Database representation of the saga diagnosis engine's facts. //! //! Each saga fact is stored as typed columns in the `fm_fact_saga` table. The -//! `kind` discriminant selects which payload columns are populated; a CHECK -//! constraint (`fm_fact_saga_columns_match_kind`) enforces that the right -//! columns are non-NULL for each kind. See [`nexus_types::fm::SagaFact`] for -//! semantics. +//! `kind` discriminant selects which payload columns are populated; per-kind +//! CHECK constraints (e.g. `not_progressing_columns_present`) enforce that +//! the right columns are non-NULL for each kind. See +//! [`nexus_types::fm::SagaFact`] for semantics. use crate::DbTypedUuid; use crate::Generation; @@ -92,8 +92,10 @@ fn saga_progress_state(state: SagaState) -> Result { /// Diesel row for the `fm_fact_saga` table. /// /// The payload columns are populated according to `kind`: a column is `Some` -/// iff it belongs to that `kind`'s payload, enforced in the database by the -/// `fm_fact_saga_columns_match_kind` CHECK constraint. +/// if it belongs to that `kind`'s payload, and `None` otherwise (by +/// convention; the per-kind CHECK constraints, e.g. +/// `not_progressing_columns_present`, enforce only the "present for the +/// matching kind" direction so that future kinds may share columns). #[derive(Queryable, Insertable, Clone, Debug, Selectable)] #[diesel(table_name = fm_fact_saga)] pub struct FmFactSaga { diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 60bdf819963..75f6116f843 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7821,27 +7821,22 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( PRIMARY KEY (sitrep_id, id), - -- Exactly one kind's columns are populated; all other kinds' columns are - -- NULL. A `kind` not handled here fails closed (ELSE false), so adding a - -- new kind requires extending this constraint. - CONSTRAINT fm_fact_saga_columns_match_kind CHECK ( - CASE kind - WHEN 'not_progressing' THEN - saga_state IS NOT NULL - AND time_created IS NOT NULL - AND last_event_time IS NOT NULL - AND current_sec IS NULL - AND orphan_reason IS NULL - AND adopt_generation IS NULL - WHEN 'owner_not_current_generation' THEN - current_sec IS NOT NULL - AND orphan_reason IS NOT NULL - AND adopt_generation IS NOT NULL - AND saga_state IS NULL - AND time_created IS NULL - AND last_event_time IS NULL - ELSE false - END + -- Each variant validates that the columns it expects are present. + -- Future variants should add their own constraint like this one, + -- leaving existing constraints untouched. + CONSTRAINT not_progressing_columns_present CHECK ( + kind != 'not_progressing' OR ( + saga_state IS NOT NULL + AND time_created IS NOT NULL + AND last_event_time IS NOT NULL + ) + ), + CONSTRAINT owner_not_current_generation_columns_present CHECK ( + kind != 'owner_not_current_generation' OR ( + current_sec IS NOT NULL + AND orphan_reason IS NOT NULL + AND adopt_generation IS NOT NULL + ) ) ); diff --git a/schema/crdb/fm-saga-de/up4.sql b/schema/crdb/fm-saga-de/up4.sql index 261a2ce3513..7c3ea742229 100644 --- a/schema/crdb/fm-saga-de/up4.sql +++ b/schema/crdb/fm-saga-de/up4.sql @@ -35,26 +35,21 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( PRIMARY KEY (sitrep_id, id), - -- Exactly one kind's columns are populated; all other kinds' columns are - -- NULL. A `kind` not handled here fails closed (ELSE false), so adding a - -- new kind requires extending this constraint. - CONSTRAINT fm_fact_saga_columns_match_kind CHECK ( - CASE kind - WHEN 'not_progressing' THEN - saga_state IS NOT NULL - AND time_created IS NOT NULL - AND last_event_time IS NOT NULL - AND current_sec IS NULL - AND orphan_reason IS NULL - AND adopt_generation IS NULL - WHEN 'owner_not_current_generation' THEN - current_sec IS NOT NULL - AND orphan_reason IS NOT NULL - AND adopt_generation IS NOT NULL - AND saga_state IS NULL - AND time_created IS NULL - AND last_event_time IS NULL - ELSE false - END + -- Each variant validates that the columns it expects are present. + -- Future variants should add their own constraint like this one, + -- leaving existing constraints untouched. + CONSTRAINT not_progressing_columns_present CHECK ( + kind != 'not_progressing' OR ( + saga_state IS NOT NULL + AND time_created IS NOT NULL + AND last_event_time IS NOT NULL + ) + ), + CONSTRAINT owner_not_current_generation_columns_present CHECK ( + kind != 'owner_not_current_generation' OR ( + current_sec IS NOT NULL + AND orphan_reason IS NOT NULL + AND adopt_generation IS NOT NULL + ) ) ); From bb308b589402facb5cab5490595908abbb0d6af5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 14:51:28 -0700 Subject: [PATCH 10/30] [fm] saga diagnoser: close cases on recovery A case is an episode of a problem, not a dossier on the saga. When a flagged saga is still running but no condition holds anymore (progress resumed, owner re-adopted by a current Nexus), close the case rather than stripping its facts and leaving it open. Previously the case would be left open with zero facts, making it uninterpretable to the next analysis pass, which would then carry it forward unexamined forever, even after the saga terminated. The closing case keeps its facts attached as the record of why it existed; they age out with the case once it stops being copied forward and its sitreps are GC'd. --- nexus/fm/src/diagnosis/saga.rs | 247 ++++++++++++++++++++++++++++----- 1 file changed, 215 insertions(+), 32 deletions(-) diff --git a/nexus/fm/src/diagnosis/saga.rs b/nexus/fm/src/diagnosis/saga.rs index 2bc421c42dd..5d716d41391 100644 --- a/nexus/fm/src/diagnosis/saga.rs +++ b/nexus/fm/src/diagnosis/saga.rs @@ -108,14 +108,15 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { } // First pass: for each parent case, close it if its saga has reached a - // terminal state (no longer observed), otherwise drop any facts whose - // recorded contents no longer match the current observation. The second - // pass re-adds a fresh fact if the condition still holds. + // terminal state (no longer observed) or has fully recovered (no + // condition holds anymore), otherwise drop any facts whose recorded + // contents no longer match the current observation. The second pass + // re-adds a fresh fact if the condition still holds. for (case_id, summary) in &parent_cases { - let mut case_mut = builder - .cases - .case_mut(case_id) - .expect("case_id came from iterating builder.cases"); + let mut case_mut = builder.cases.case_mut(case_id).expect( + "builder.cases is seeded from the open cases of builder.input(), \ + which is where this case_id came from", + ); let Some(obs) = observed.get(&summary.saga_id) else { case_mut.close(format!( "saga {} reached a terminal state", @@ -125,6 +126,18 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { }; let desired_np = desired_not_progressing(obs, reference_time); let desired_owner = desired_owner_not_current(obs); + // A case is an episode of a problem, not a dossier on the saga: when + // no condition holds anymore, the episode is over and the case + // closes. Its facts stay attached as the record of why it existed; + // they age out with the case once it stops being copied forward. If + // the saga becomes a problem again later, a fresh case opens. + if desired_np.is_none() && desired_owner.is_none() { + case_mut.close(format!( + "saga {} is progressing under a current owner again", + summary.saga_id, + )); + continue; + } if let Some((fact_id, payload)) = &summary.not_progressing { if desired_np.as_ref() != Some(payload) { case_mut.remove_fact(*fact_id); @@ -341,14 +354,17 @@ mod tests { builder.build().0 } - fn run_analyze(log: &slog::Logger, input: &Input) -> Sitrep { + fn run_analyze( + log: &slog::Logger, + input: &Input, + ) -> (Sitrep, fm::analysis_reports::AnalysisReport) { let mut builder = SitrepBuilder::new_with_rng( log, input, SitrepBuilderRng::from_seed("saga-analyze"), ); analyze(&mut builder).expect("analyze ok"); - builder.build(OmicronZoneUuid::new_v4(), Utc::now()).0 + builder.build(OmicronZoneUuid::new_v4(), Utc::now()) } /// Collect every saga fact in the sitrep, optionally only on open cases. @@ -372,17 +388,19 @@ mod tests { fn make_parent_with_saga_case( parent_sitrep_id: SitrepUuid, inv_collection_id: omicron_uuid_kinds::CollectionUuid, - fact_payload: SagaFact, + fact_payloads: impl IntoIterator, ) -> Sitrep { let mut facts = IdOrdMap::new(); - facts - .insert_unique(fm::case::Fact { - id: omicron_uuid_kinds::FactUuid::new_v4(), - created_sitrep_id: parent_sitrep_id, - payload: fact_payload.into(), - comment: "parent saga fact".to_string(), - }) - .unwrap(); + for fact_payload in fact_payloads { + facts + .insert_unique(fm::case::Fact { + id: omicron_uuid_kinds::FactUuid::new_v4(), + created_sitrep_id: parent_sitrep_id, + payload: fact_payload.into(), + comment: "parent saga fact".to_string(), + }) + .unwrap(); + } let mut cases = IdOrdMap::new(); cases .insert_unique(fm::Case { @@ -422,7 +440,7 @@ mod tests { let id = saga_id(1); let observed = observed_map([mk_observed(id, Some(stale), None, None)]); let input = build_input(collection, None, observed); - let sitrep = run_analyze(&logctx.log, &input); + let (sitrep, _report) = run_analyze(&logctx.log, &input); let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); @@ -443,7 +461,7 @@ mod tests { let observed = observed_map([mk_observed(saga_id(1), Some(recent), None, None)]); let input = build_input(collection, None, observed); - let sitrep = run_analyze(&logctx.log, &input); + let (sitrep, _report) = run_analyze(&logctx.log, &input); assert!( saga_facts(&sitrep, false).is_empty(), "a saga making recent progress should not be flagged", @@ -464,7 +482,7 @@ mod tests { Some(SagaOwnerState::Quiesced), )]); let input = build_input(collection, None, observed); - let sitrep = run_analyze(&logctx.log, &input); + let (sitrep, _report) = run_analyze(&logctx.log, &input); let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); @@ -491,7 +509,7 @@ mod tests { Some(SagaOwnerState::Active), )]); let input = build_input(collection, None, observed); - let sitrep = run_analyze(&logctx.log, &input); + let (sitrep, _report) = run_analyze(&logctx.log, &input); assert!(saga_facts(&sitrep, false).is_empty()); logctx.cleanup_successful(); } @@ -507,7 +525,7 @@ mod tests { Some(SagaOwnerState::NotYet), )]); let input = build_input(collection, None, observed); - let sitrep = run_analyze(&logctx.log, &input); + let (sitrep, _report) = run_analyze(&logctx.log, &input); assert!(saga_facts(&sitrep, false).is_empty()); logctx.cleanup_successful(); } @@ -525,7 +543,7 @@ mod tests { Some(SagaOwnerState::Absent), )]); let input = build_input(collection, None, observed); - let sitrep = run_analyze(&logctx.log, &input); + let (sitrep, _report) = run_analyze(&logctx.log, &input); let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 2, "expected both fact kinds on one case"); @@ -560,17 +578,17 @@ mod tests { let parent = make_parent_with_saga_case( parent_id, inv_id, - SagaFact::NotProgressing(SagaNotProgressingFactPayload { + [SagaFact::NotProgressing(SagaNotProgressingFactPayload { saga_id: id, saga_name: "test-saga".to_string(), saga_state: SagaProgressState::Unwinding, time_created: Utc::now() - TimeDelta::days(1), last_event_time: stale, - }), + })], ); // The saga is no longer observed (it reached a terminal state). let input = build_input(collection, Some(parent), observed_map([])); - let sitrep = run_analyze(&logctx.log, &input); + let (sitrep, _report) = run_analyze(&logctx.log, &input); let all = saga_facts(&sitrep, false); assert_eq!(all.len(), 1, "the fact carries forward on the closed case"); let case = sitrep @@ -600,7 +618,7 @@ mod tests { let parent = make_parent_with_saga_case( parent_id, inv_id, - SagaFact::NotProgressing(payload.clone()), + [SagaFact::NotProgressing(payload.clone())], ); let parent_fact_id = parent.cases.iter().next().unwrap().facts.iter().next().unwrap().id; @@ -617,7 +635,7 @@ mod tests { owner_state: None, }]); let input = build_input(collection, Some(parent), observed); - let sitrep = run_analyze(&logctx.log, &input); + let (sitrep, _report) = run_analyze(&logctx.log, &input); let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); assert_eq!( @@ -640,13 +658,13 @@ mod tests { let parent = make_parent_with_saga_case( SitrepUuid::new_v4(), inv_id, - SagaFact::NotProgressing(SagaNotProgressingFactPayload { + [SagaFact::NotProgressing(SagaNotProgressingFactPayload { saga_id: id, saga_name: "test-saga".to_string(), saga_state: SagaProgressState::Unwinding, time_created, last_event_time: old, - }), + })], ); let parent_fact_id = parent.cases.iter().next().unwrap().facts.iter().next().unwrap().id; @@ -662,7 +680,7 @@ mod tests { owner_state: None, }]); let input = build_input(collection, Some(parent), observed); - let sitrep = run_analyze(&logctx.log, &input); + let (sitrep, _report) = run_analyze(&logctx.log, &input); let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); assert_ne!( @@ -677,4 +695,169 @@ mod tests { } logctx.cleanup_successful(); } + + /// A saga that was flagged as not progressing but has since resumed + /// recording node events: the episode is over, so the case closes. Its + /// fact stays attached as the record of why the case existed. + #[test] + fn closes_on_progress_resumed() { + let (logctx, collection) = setup("saga_closes_on_progress_resumed"); + let id = saga_id(1); + let stale = + collection.time_done - (STALE_SAGA_THRESHOLD + TimeDelta::hours(1)); + let recent = collection.time_done - TimeDelta::minutes(1); + let parent = make_parent_with_saga_case( + SitrepUuid::new_v4(), + collection.id, + [SagaFact::NotProgressing(SagaNotProgressingFactPayload { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created: Utc::now() - TimeDelta::days(1), + last_event_time: stale, + })], + ); + let observed = + observed_map([mk_observed(id, Some(recent), None, None)]); + let input = build_input(collection, Some(parent), observed); + let (sitrep, report) = run_analyze(&logctx.log, &input); + + let case = sitrep + .cases + .iter() + .find(|c| c.metadata.de == DiagnosisEngineKind::Saga) + .expect("saga case should be present in the closing sitrep"); + assert!( + !case.is_open(), + "case should close when the saga resumes progress", + ); + assert_eq!( + case.facts.len(), + 1, + "the stale fact stays attached to the closed case as evidence", + ); + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("progressing under a current owner again"), + "close comment should call out the recovery, got: {report_str}", + ); + logctx.cleanup_successful(); + } + + /// A saga whose case was opened because its owner was quiesced, since + /// re-adopted by an active Nexus: the case closes, fact attached. + #[test] + fn closes_on_owner_readopted() { + let (logctx, collection) = setup("saga_closes_on_owner_readopted"); + let id = saga_id(1); + let recent = collection.time_done - TimeDelta::minutes(1); + let parent = make_parent_with_saga_case( + SitrepUuid::new_v4(), + collection.id, + [SagaFact::OwnerNotCurrentGeneration( + SagaOwnerNotCurrentFactPayload { + saga_id: id, + saga_name: "test-saga".to_string(), + current_sec: OmicronZoneUuid::new_v4(), + orphan_reason: OrphanedReason::Quiesced, + adopt_generation: Generation::new(), + }, + )], + ); + // The saga has been re-adopted by an active Nexus and is making + // progress. + let observed = observed_map([mk_observed( + id, + Some(recent), + Some(OmicronZoneUuid::new_v4()), + Some(SagaOwnerState::Active), + )]); + let input = build_input(collection, Some(parent), observed); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + + let case = sitrep + .cases + .iter() + .find(|c| c.metadata.de == DiagnosisEngineKind::Saga) + .expect("saga case should be present in the closing sitrep"); + assert!( + !case.is_open(), + "case should close when the saga is re-adopted by a current Nexus", + ); + assert_eq!(case.facts.len(), 1); + logctx.cleanup_successful(); + } + + /// One condition clears (progress resumes) while the other persists + /// (owner still quiesced): the case stays open, the cleared condition's + /// fact is removed, and the persisting fact carries forward with a + /// stable UUID. + #[test] + fn partial_recovery_keeps_case_open() { + let (logctx, collection) = setup("saga_partial_recovery"); + let id = saga_id(1); + let stale = + collection.time_done - (STALE_SAGA_THRESHOLD + TimeDelta::hours(1)); + let recent = collection.time_done - TimeDelta::minutes(1); + let sec = OmicronZoneUuid::new_v4(); + let owner_payload = SagaOwnerNotCurrentFactPayload { + saga_id: id, + saga_name: "test-saga".to_string(), + current_sec: sec, + orphan_reason: OrphanedReason::Quiesced, + adopt_generation: Generation::new(), + }; + let parent = make_parent_with_saga_case( + SitrepUuid::new_v4(), + collection.id, + [ + SagaFact::NotProgressing(SagaNotProgressingFactPayload { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created: Utc::now() - TimeDelta::days(1), + last_event_time: stale, + }), + SagaFact::OwnerNotCurrentGeneration(owner_payload.clone()), + ], + ); + let parent_owner_fact_id = parent + .cases + .iter() + .next() + .unwrap() + .facts + .iter() + .find_map(|f| match f.payload.as_saga() { + Some(SagaFact::OwnerNotCurrentGeneration(_)) => Some(f.id), + _ => None, + }) + .expect("parent case should have an owner fact"); + // Progress has resumed, but the owner is still quiesced with the + // same SEC and adopt generation, so the owner fact still matches. + let observed = observed_map([mk_observed( + id, + Some(recent), + Some(sec), + Some(SagaOwnerState::Quiesced), + )]); + let input = build_input(collection, Some(parent), observed); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + + let facts = saga_facts(&sitrep, true); + assert_eq!( + facts.len(), + 1, + "only the owner fact should remain on the open case", + ); + assert_eq!( + facts[0].0.id, parent_owner_fact_id, + "the persisting fact carries forward with a stable UUID", + ); + assert!(matches!( + &facts[0].1, + SagaFact::OwnerNotCurrentGeneration(p) if p.current_sec == sec + )); + logctx.cleanup_successful(); + } } From 3c1f9767b24f6157f1ff9093ef50201601ae149b Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 15:01:28 -0700 Subject: [PATCH 11/30] drop unused lookup_fm_fact_saga_for_case index Facts are only read during sitrep load, which paginates over the (sitrep_id, id) primary key; nothing queries by case directly. Matches the same change to fm_fact_physical_disk. --- schema/crdb/dbinit.sql | 4 ---- schema/crdb/fm-saga-de/up5.sql | 1 - schema/crdb/fm-saga-de/up5.verify.sql | 2 -- 3 files changed, 7 deletions(-) delete mode 100644 schema/crdb/fm-saga-de/up5.sql delete mode 100644 schema/crdb/fm-saga-de/up5.verify.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 75f6116f843..c14b69e6270 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7840,10 +7840,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( ) ); -CREATE INDEX IF NOT EXISTS - lookup_fm_fact_saga_for_case -ON omicron.public.fm_fact_saga (sitrep_id, case_id); - CREATE TABLE IF NOT EXISTS omicron.public.fm_ereport_in_case ( -- ID of this association. When an ereport is assigned to a case, that -- association is assigned a UUID. These are used primarily to aid in diff --git a/schema/crdb/fm-saga-de/up5.sql b/schema/crdb/fm-saga-de/up5.sql deleted file mode 100644 index c4372905384..00000000000 --- a/schema/crdb/fm-saga-de/up5.sql +++ /dev/null @@ -1 +0,0 @@ -CREATE INDEX IF NOT EXISTS lookup_fm_fact_saga_for_case ON omicron.public.fm_fact_saga (sitrep_id, case_id); diff --git a/schema/crdb/fm-saga-de/up5.verify.sql b/schema/crdb/fm-saga-de/up5.verify.sql deleted file mode 100644 index 263f3b6a78b..00000000000 --- a/schema/crdb/fm-saga-de/up5.verify.sql +++ /dev/null @@ -1,2 +0,0 @@ --- DO NOT EDIT. Generated by test_migration_verification_files. -SELECT CAST(IF((SELECT true WHERE EXISTS (SELECT index_name FROM omicron.crdb_internal.table_indexes WHERE descriptor_name = 'fm_fact_saga' AND index_name = 'lookup_fm_fact_saga_for_case')),'true','Schema change verification failed: index lookup_fm_fact_saga_for_case on table fm_fact_saga does not exist') AS BOOL); From 5b18ede1bbba0c136114dad9221285718b2c07d3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 15:11:55 -0700 Subject: [PATCH 12/30] [fm] include observed sagas in the analysis input report The input report listed in-service disks but said nothing about the sagas visible to the saga diagnosis engine, leaving no way to answer "why did FM (not) flag this saga" from the report. List each non-terminal saga with its name, state, latest node-event time, and owner classification. --- dev-tools/omdb/tests/successes.out | 8 ++ nexus/fm/src/analysis_input.rs | 15 ++++ .../output/analysis_input_report_empty.out | 2 + .../output/analysis_input_report_same_inv.out | 2 + .../analysis_input_report_with_cases.out | 4 + nexus/types/src/fm/analysis_reports.rs | 87 +++++++++++++++++++ 6 files changed, 118 insertions(+) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 4aec2bb7e41..44cd1962a49 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -717,6 +717,8 @@ task: "fm_analysis" no in-service control plane disks + no non-terminal sagas observed + fault management analysis report -------------------------------- sitrep ID: ..................... @@ -767,6 +769,8 @@ task: "fm_sitrep_gc" batches: 1 orphaned fm_fact_physical_disk rows deleted: 0 batches: 1 + orphaned fm_fact_saga rows deleted: 0 + batches: 1 orphaned fm_support_bundle_request rows deleted: 0 batches: 1 orphaned fm_support_bundle_request_data_selection_ereports rows deleted: 0 @@ -1415,6 +1419,8 @@ task: "fm_analysis" no in-service control plane disks + no non-terminal sagas observed + fault management analysis report -------------------------------- sitrep ID: ..................... @@ -1465,6 +1471,8 @@ task: "fm_sitrep_gc" batches: 1 orphaned fm_fact_physical_disk rows deleted: 0 batches: 1 + orphaned fm_fact_saga rows deleted: 0 + batches: 1 orphaned fm_support_bundle_request rows deleted: 0 batches: 1 orphaned fm_support_bundle_request_data_selection_ereports rows deleted: 0 diff --git a/nexus/fm/src/analysis_input.rs b/nexus/fm/src/analysis_input.rs index cde247e904a..78d6352edde 100644 --- a/nexus/fm/src/analysis_input.rs +++ b/nexus/fm/src/analysis_input.rs @@ -218,6 +218,21 @@ impl Builder { .iter() .map(|d| d.physical_disk_id) .collect(), + observed_sagas: self + .observed_sagas + .iter() + .map(|s| { + ( + s.saga_id, + fm::analysis_reports::ObservedSagaReport { + saga_name: s.saga_name.clone(), + saga_state: s.saga_state, + last_event_time: s.last_event_time, + owner_state: s.owner_state, + }, + ) + }) + .collect(), }; // Determine which cases must be copied forwards into the next sitrep. diff --git a/nexus/types/output/analysis_input_report_empty.out b/nexus/types/output/analysis_input_report_empty.out index a479e87d36c..f89eac2f3e7 100644 --- a/nexus/types/output/analysis_input_report_empty.out +++ b/nexus/types/output/analysis_input_report_empty.out @@ -6,3 +6,5 @@ no new ereports since the parent sitrep no cases copied forward no in-service control plane disks + +no non-terminal sagas observed diff --git a/nexus/types/output/analysis_input_report_same_inv.out b/nexus/types/output/analysis_input_report_same_inv.out index 25631755f78..6f901fd0056 100644 --- a/nexus/types/output/analysis_input_report_same_inv.out +++ b/nexus/types/output/analysis_input_report_same_inv.out @@ -7,3 +7,5 @@ no new ereports since the parent sitrep no cases copied forward no in-service control plane disks + +no non-terminal sagas observed diff --git a/nexus/types/output/analysis_input_report_with_cases.out b/nexus/types/output/analysis_input_report_with_cases.out index a1fc877ef72..3a939dfc330 100644 --- a/nexus/types/output/analysis_input_report_with_cases.out +++ b/nexus/types/output/analysis_input_report_with_cases.out @@ -24,3 +24,7 @@ cases (2 total): * ereport dddddddd-dddd-dddd-dddd-dddddddddddd:2 no in-service control plane disks + +non-terminal sagas observed (2 total): + * saga 5a9a0001-5a9a-45a9-85a9-5a9a5a9a5a9a (fake-saga): Unwinding, last event: 1970-01-01 00:00:00 UTC, owner: Quiesced + * saga 5a9a0002-5a9a-45a9-85a9-5a9a5a9a5a9a (another-fake-saga): Running, last event: , owner: diff --git a/nexus/types/src/fm/analysis_reports.rs b/nexus/types/src/fm/analysis_reports.rs index 42b6c6537a4..af1687132e4 100644 --- a/nexus/types/src/fm/analysis_reports.rs +++ b/nexus/types/src/fm/analysis_reports.rs @@ -8,6 +8,8 @@ use super::case; use super::ereport::EreportId; use super::json_display::fmt_json_value; +use crate::observed_saga::{SagaOwnerState, SagaProgressState}; +use chrono::{DateTime, Utc}; use iddqd::IdOrdMap; use omicron_uuid_kinds::{ CaseUuid, CollectionUuid, PhysicalDiskUuid, SitrepUuid, @@ -232,6 +234,22 @@ pub struct InputReport { /// All control-plane-managed physical disks visible to the diagnosis /// engines for this analysis pass. pub in_service_disks: BTreeSet, + /// All non-terminal sagas visible to the diagnosis engines for this + /// analysis pass. + pub observed_sagas: BTreeMap, +} + +/// Summary of one non-terminal saga in an [`InputReport`]. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct ObservedSagaReport { + pub saga_name: String, + pub saga_state: SagaProgressState, + /// The latest node event recorded for this saga, or `None` if it has + /// recorded none. + pub last_event_time: Option>, + /// The classified state of the owning Nexus, or `None` if the saga has + /// no current SEC. + pub owner_state: Option, } #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] @@ -263,6 +281,7 @@ impl fmt::Display for InputReportMultilineDisplay<'_> { open_cases, closed_cases_copied_forward, in_service_disks, + observed_sagas, }, indent, } = self; @@ -378,6 +397,40 @@ impl fmt::Display for InputReportMultilineDisplay<'_> { } } + if observed_sagas.is_empty() { + writeln!(f, "\n{:indent$}no non-terminal sagas observed", "")?; + } else { + writeln!( + f, + "\n{:indent$}non-terminal sagas observed ({} total):", + "", + observed_sagas.len() + )?; + let indent = indent + 2; + for (saga_id, saga) in observed_sagas { + let ObservedSagaReport { + saga_name, + saga_state, + last_event_time, + owner_state, + } = saga; + write!( + f, + "{:indent$}* saga {saga_id} ({saga_name}): \ + {saga_state:?}, last event: ", + "" + )?; + match last_event_time { + Some(t) => write!(f, "{t}")?, + None => write!(f, "")?, + } + match owner_state { + Some(s) => writeln!(f, ", owner: {s:?}")?, + None => writeln!(f, ", owner: ")?, + } + } + } + Ok(()) } } @@ -455,6 +508,8 @@ mod tests { }, ); + let observed_sagas = example_observed_sagas(); + InputReport { parent_sitrep_id: Some(parent_sitrep_id), parent_inv_id: Some(parent_inv_id), @@ -463,9 +518,39 @@ mod tests { open_cases, closed_cases_copied_forward, in_service_disks: BTreeSet::new(), + observed_sagas, } } + fn example_observed_sagas() -> BTreeMap { + let mut observed_sagas = BTreeMap::new(); + observed_sagas.insert( + steno::SagaId( + uuid::Uuid::from_str("5a9a0001-5a9a-45a9-85a9-5a9a5a9a5a9a") + .unwrap(), + ), + ObservedSagaReport { + saga_name: "fake-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + last_event_time: DateTime::from_timestamp(0, 0), + owner_state: Some(SagaOwnerState::Quiesced), + }, + ); + observed_sagas.insert( + steno::SagaId( + uuid::Uuid::from_str("5a9a0002-5a9a-45a9-85a9-5a9a5a9a5a9a") + .unwrap(), + ), + ObservedSagaReport { + saga_name: "another-fake-saga".to_string(), + saga_state: SagaProgressState::Running, + last_event_time: None, + owner_state: None, + }, + ); + observed_sagas + } + fn example_report_empty() -> InputReport { let inv_id = CollectionUuid::from_str("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb") @@ -479,6 +564,7 @@ mod tests { open_cases: BTreeMap::new(), closed_cases_copied_forward: BTreeMap::new(), in_service_disks: BTreeSet::new(), + observed_sagas: BTreeMap::new(), } } @@ -498,6 +584,7 @@ mod tests { open_cases: BTreeMap::new(), closed_cases_copied_forward: BTreeMap::new(), in_service_disks: BTreeSet::new(), + observed_sagas: BTreeMap::new(), } } From c2ebef77cc9623cccb1bc3ed06a1bc3874f0d656 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 15:24:28 -0700 Subject: [PATCH 13/30] [fm] chunk saga_latest_node_event_times by SQL_BATCH_SIZE A mass-stuck-saga incident is exactly when this query runs with a large ID list; chunk the eq_any so it never becomes one giant statement. Chunks are disjoint, so GROUP BY keys never cross them and the concatenated results are identical to the single-statement form. --- nexus/db-queries/src/db/datastore/saga.rs | 26 +++++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs index c87b00978fd..203cf258f85 100644 --- a/nexus/db-queries/src/db/datastore/saga.rs +++ b/nexus/db-queries/src/db/datastore/saga.rs @@ -246,13 +246,25 @@ impl DataStore { { use nexus_db_schema::schema::saga_node_event::dsl; let conn = self.pool_connection_authorized(opctx).await?; - dsl::saga_node_event - .filter(dsl::saga_id.eq_any(saga_ids.to_vec())) - .group_by(dsl::saga_id) - .select((dsl::saga_id, diesel::dsl::max(dsl::event_time))) - .load_async(&*conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + let mut results = Vec::with_capacity(saga_ids.len()); + // Chunk the IDs so a large non-terminal saga population (exactly the + // situation fault management exists for) doesn't produce one giant + // statement. Chunks are disjoint, so GROUP BY keys never cross them + // and concatenating per-chunk results is the full result set. + for chunk in saga_ids.chunks(SQL_BATCH_SIZE.get() as usize) { + let batch: Vec<(db::saga_types::SagaId, Option>)> = + dsl::saga_node_event + .filter(dsl::saga_id.eq_any(chunk.to_vec())) + .group_by(dsl::saga_id) + .select((dsl::saga_id, diesel::dsl::max(dsl::event_time))) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + results.extend(batch); + } + Ok(results) } /// Returns a list of all saga log entries for the given saga, making as From 28fb5fac382342cadcad4cb1d178f0b02b10c532 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 16:00:33 -0700 Subject: [PATCH 14/30] [fm] saga diagnoser: remove duplicate facts of the same kind A saga case carries at most one fact per kind. The parent-case summary previously kept whichever duplicate happened to have the highest fact UUID and lost track of the rest, so a (pathological) duplicate would be carried forward and re-persisted in every sitrep, and removing the tracked copy while an untracked match survived would re-add a fresh fact, regenerating the duplicate pair. ParentSagaCase now separates the fact to consider when advancing the case (the lowest fact UUID of each kind) from duplicates, which are removed unconditionally with a warning. A corrupt case converges to one fact per kind in a single pass. --- nexus/fm/src/diagnosis/saga.rs | 222 +++++++++++++++++++++++++++++++-- 1 file changed, 215 insertions(+), 7 deletions(-) diff --git a/nexus/fm/src/diagnosis/saga.rs b/nexus/fm/src/diagnosis/saga.rs index 5d716d41391..d9d6564d0ca 100644 --- a/nexus/fm/src/diagnosis/saga.rs +++ b/nexus/fm/src/diagnosis/saga.rs @@ -30,12 +30,18 @@ use std::collections::BTreeMap; const STALE_SAGA_THRESHOLD: TimeDelta = TimeDelta::minutes(30); /// Per-case view of a parent saga case, built from its facts. Every fact on a -/// saga case is about the same `saga_id`; at most one fact of each kind is -/// expected. +/// saga case is about the same `saga_id`, and a case carries at most one fact +/// of each kind. struct ParentSagaCase { saga_id: steno::SagaId, + /// The fact to consider when advancing the case: at most one per kind + /// (the lowest fact UUID wins if a case pathologically carries several). not_progressing: Option<(FactUuid, SagaNotProgressingFactPayload)>, owner_not_current: Option<(FactUuid, SagaOwnerNotCurrentFactPayload)>, + /// Facts that should not exist: duplicates of a kind beyond the first. + /// These carry no information the kept fact doesn't; they are removed + /// unconditionally, regardless of what the observation says. + duplicate_facts: Vec, } pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { @@ -61,8 +67,17 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { { let case_id = case.id; let mut saga_id: Option = None; - let mut not_progressing = None; - let mut owner_not_current = None; + let mut not_progressing: Option<( + FactUuid, + SagaNotProgressingFactPayload, + )> = None; + let mut owner_not_current: Option<( + FactUuid, + SagaOwnerNotCurrentFactPayload, + )> = None; + let mut duplicate_facts = Vec::new(); + // `case.facts` iterates in fact UUID order, so the kept fact for + // each kind is deterministically the one with the lowest UUID. for fact in case.facts.iter() { let Some(saga_fact) = fact.payload.as_saga() else { slog::warn!( @@ -85,13 +100,30 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { } match saga_fact { SagaFact::NotProgressing(p) => { - not_progressing = Some((fact.id, p.clone())); + if not_progressing.is_none() { + not_progressing = Some((fact.id, p.clone())); + } else { + duplicate_facts.push(fact.id); + } } SagaFact::OwnerNotCurrentGeneration(p) => { - owner_not_current = Some((fact.id, p.clone())); + if owner_not_current.is_none() { + owner_not_current = Some((fact.id, p.clone())); + } else { + duplicate_facts.push(fact.id); + } } } } + if !duplicate_facts.is_empty() { + slog::warn!( + &builder.log, + "Saga case has more than one fact of the same kind; \ + the duplicates will be removed"; + "case_id" => %case_id, + "duplicate_fact_ids" => ?duplicate_facts, + ); + } let Some(saga_id) = saga_id else { slog::warn!( &builder.log, @@ -102,7 +134,12 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { }; parent_cases.insert( case_id, - ParentSagaCase { saga_id, not_progressing, owner_not_current }, + ParentSagaCase { + saga_id, + not_progressing, + owner_not_current, + duplicate_facts, + }, ); case_for_saga.insert(saga_id, case_id); } @@ -138,6 +175,11 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { )); continue; } + // Duplicate facts carry no information the kept facts don't; remove + // them regardless of what the observation says. + for fact_id in &summary.duplicate_facts { + case_mut.remove_fact(*fact_id); + } if let Some((fact_id, payload)) = &summary.not_progressing { if desired_np.as_ref() != Some(payload) { case_mut.remove_fact(*fact_id); @@ -401,6 +443,20 @@ mod tests { }) .unwrap(); } + make_parent_with_saga_case_from_facts( + parent_sitrep_id, + inv_collection_id, + facts, + ) + } + + /// Like [`make_parent_with_saga_case`], but with caller-controlled + /// `Fact`s (e.g., for tests that need specific fact UUIDs). + fn make_parent_with_saga_case_from_facts( + parent_sitrep_id: SitrepUuid, + inv_collection_id: omicron_uuid_kinds::CollectionUuid, + facts: IdOrdMap, + ) -> Sitrep { let mut cases = IdOrdMap::new(); cases .insert_unique(fm::Case { @@ -860,4 +916,156 @@ mod tests { )); logctx.cleanup_successful(); } + + fn fact_uuid(n: u128) -> FactUuid { + use omicron_uuid_kinds::GenericUuid; + FactUuid::from_untyped_uuid(uuid::Uuid::from_u128(n)) + } + + fn np_fact( + id: FactUuid, + created_sitrep_id: SitrepUuid, + saga: steno::SagaId, + time_created: chrono::DateTime, + last_event_time: chrono::DateTime, + ) -> fm::case::Fact { + fm::case::Fact { + id, + created_sitrep_id, + payload: SagaFact::NotProgressing(SagaNotProgressingFactPayload { + saga_id: saga, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created, + last_event_time, + }) + .into(), + comment: "parent saga fact".to_string(), + } + } + + /// A pathological parent case carrying two `NotProgressing` facts + /// (which the engine never creates itself), where the kept fact (lowest + /// UUID) matches the current observation: the duplicate is removed and + /// the kept fact survives with its UUID intact. + #[test] + fn duplicate_fact_removed_when_kept_fact_matches() { + let (logctx, collection) = setup("saga_dup_kept_matches"); + let id = saga_id(1); + let time_created = collection.time_done - TimeDelta::days(1); + let current = collection.time_done + - (STALE_SAGA_THRESHOLD + TimeDelta::minutes(5)); + let old = + collection.time_done - (STALE_SAGA_THRESHOLD + TimeDelta::hours(2)); + let parent_id = SitrepUuid::new_v4(); + let kept_id = fact_uuid(1); + let dup_id = fact_uuid(2); + let mut parent_facts = IdOrdMap::new(); + parent_facts + .insert_unique(np_fact( + kept_id, + parent_id, + id, + time_created, + current, + )) + .unwrap(); + parent_facts + .insert_unique(np_fact(dup_id, parent_id, id, time_created, old)) + .unwrap(); + let parent = make_parent_with_saga_case_from_facts( + parent_id, + collection.id, + parent_facts, + ); + // Still stale; the observation matches the kept fact exactly. + let observed = observed_map([ObservedSaga { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created, + current_sec: None, + adopt_generation: Generation::new(), + last_event_time: Some(current), + owner_state: None, + }]); + let input = build_input(collection, Some(parent), observed); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + + let facts = saga_facts(&sitrep, true); + assert_eq!(facts.len(), 1, "the duplicate fact should be removed"); + assert_eq!( + facts[0].0.id, kept_id, + "the kept fact matches the observation, so its UUID is stable", + ); + logctx.cleanup_successful(); + } + + /// As above, but the kept fact (lowest UUID) is the stale one: both + /// parent facts are removed (the kept one for mismatching, the + /// duplicate unconditionally) and one fresh fact replaces them. + #[test] + fn duplicate_fact_removed_when_kept_fact_is_stale() { + let (logctx, collection) = setup("saga_dup_kept_stale"); + let id = saga_id(1); + let time_created = collection.time_done - TimeDelta::days(1); + let current = collection.time_done + - (STALE_SAGA_THRESHOLD + TimeDelta::minutes(5)); + let old = + collection.time_done - (STALE_SAGA_THRESHOLD + TimeDelta::hours(2)); + let parent_id = SitrepUuid::new_v4(); + let kept_id = fact_uuid(1); + let dup_id = fact_uuid(2); + let mut parent_facts = IdOrdMap::new(); + parent_facts + .insert_unique(np_fact(kept_id, parent_id, id, time_created, old)) + .unwrap(); + parent_facts + .insert_unique(np_fact( + dup_id, + parent_id, + id, + time_created, + current, + )) + .unwrap(); + let parent = make_parent_with_saga_case_from_facts( + parent_id, + collection.id, + parent_facts, + ); + // Still stale; the observation matches the duplicate, not the kept + // fact. + let observed = observed_map([ObservedSaga { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: SagaProgressState::Unwinding, + time_created, + current_sec: None, + adopt_generation: Generation::new(), + last_event_time: Some(current), + owner_state: None, + }]); + let input = build_input(collection, Some(parent), observed); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + + let facts = saga_facts(&sitrep, true); + assert_eq!( + facts.len(), + 1, + "both parent facts should be removed and one fresh fact added", + ); + assert_ne!(facts[0].0.id, kept_id, "the stale kept fact was removed"); + assert_ne!( + facts[0].0.id, dup_id, + "the duplicate was removed unconditionally", + ); + match &facts[0].1 { + SagaFact::NotProgressing(p) => { + assert_eq!(p.last_event_time, current); + } + other => panic!("expected NotProgressing, got {other:?}"), + } + logctx.cleanup_successful(); + } } From f799268be1e28d3668babfd6a2f9d88767f8591a Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 16:06:32 -0700 Subject: [PATCH 15/30] [fm] fix SagaOwnerState::Absent doc A saga with no current_sec yields owner_state = None, not Absent; the variant doc claimed otherwise. Also replace em-dashes in this file's comments. --- nexus/types/src/observed_saga.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nexus/types/src/observed_saga.rs b/nexus/types/src/observed_saga.rs index eadc4273e04..55537aa9cff 100644 --- a/nexus/types/src/observed_saga.rs +++ b/nexus/types/src/observed_saga.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! "Currently non-terminal sagas" — the executed view from the `saga` and +//! "Currently non-terminal sagas": the executed view from the `saga` and //! `saga_node_event` DB tables, annotated with the state of each saga's //! owning Nexus. //! @@ -43,14 +43,16 @@ pub enum SagaOwnerState { NotYet, /// The owning Nexus has quiesced (an older generation handed off). Quiesced, - /// The owning Nexus has no `db_metadata_nexus` record at all (expunged), - /// or the saga has no `current_sec`. + /// The owning Nexus has no `db_metadata_nexus` record at all (expunged). + /// + /// A saga with no `current_sec` is *not* classified as `Absent`; it has + /// no owner state at all (see [`ObservedSaga::owner_state`]). Absent, } impl SagaOwnerState { - /// If this owner state means the saga is orphaned — owned by a Nexus that - /// will not make progress on it — the reason why; otherwise `None`. + /// If this owner state means the saga is orphaned (owned by a Nexus that + /// will not make progress on it), the reason why; otherwise `None`. pub fn orphaned_reason(self) -> Option { match self { SagaOwnerState::Active | SagaOwnerState::NotYet => None, @@ -94,7 +96,7 @@ pub struct ObservedSaga { /// recording progress. pub last_event_time: Option>, /// The classified state of the owning Nexus, or `None` if the saga has no - /// `current_sec` (it is between adoptions and not classifiable — and not + /// `current_sec` (it is between adoptions, not classifiable, and not /// treated as orphaned). Always `Some` when `current_sec` is `Some`. pub owner_state: Option, } From 7b245fc0019b8c873d714e18f9bc117356c650c4 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 16:58:30 -0700 Subject: [PATCH 16/30] [fm] saga facts: payloads carry only condition-defining fields A fact payload contains exactly what defines the condition for the analysis loop: the subject's ID plus the parameters whose change should rotate the fact. Anything a human wants for presentation is looked up from the database when a case is acted on; a case is only open while its saga row still exists, so the lookup always works. Accordingly: - saga_name leaves both payloads and the fm_fact_saga table; it never defines a condition. Names still appear in debug comments. - time_created leaves NotProgressing; the staleness condition folds it into last_event_time already. - adopt_generation leaves OwnerNotCurrentGeneration; it is not condition-defining, and sagas_reassign_sec bumping it would have rotated the fact UUID over a meaningless ownership shuffle. --- nexus/db-model/src/fm/fact_saga.rs | 19 ----------- nexus/db-queries/src/db/datastore/fm.rs | 6 ---- nexus/db-schema/src/schema.rs | 3 -- nexus/fm/src/diagnosis/saga.rs | 45 ++++--------------------- nexus/types/src/fm/fact.rs | 35 ++++++++----------- schema/crdb/dbinit.sql | 10 +++--- schema/crdb/fm-saga-de/up4.sql | 10 +++--- 7 files changed, 28 insertions(+), 100 deletions(-) diff --git a/nexus/db-model/src/fm/fact_saga.rs b/nexus/db-model/src/fm/fact_saga.rs index ed631dd2f88..3eabbbe6186 100644 --- a/nexus/db-model/src/fm/fact_saga.rs +++ b/nexus/db-model/src/fm/fact_saga.rs @@ -11,7 +11,6 @@ //! [`nexus_types::fm::SagaFact`] for semantics. use crate::DbTypedUuid; -use crate::Generation; use crate::SagaState; use crate::impl_enum_type; use chrono::{DateTime, Utc}; @@ -115,19 +114,15 @@ pub struct FmFactSaga { /// The saga this fact is about. Common to every `kind`, so it is always /// present (the column is `NOT NULL`). pub saga_id: Uuid, - /// The saga's name. Common to every `kind`, always present. - pub saga_name: String, pub kind: FmFactSagaKind, // Columns for the `NotProgressing` kind. pub saga_state: Option, - pub time_created: Option>, pub last_event_time: Option>, // Columns for the `OwnerNotCurrentGeneration` kind. pub current_sec: Option>, pub orphan_reason: Option, - pub adopt_generation: Option, } impl FmFactSaga { @@ -146,20 +141,16 @@ impl FmFactSaga { created_sitrep_id: fact.created_sitrep_id.into(), comment: fact.comment.clone(), saga_id: saga_fact.saga_id().0, - saga_name: saga_fact.saga_name().to_string(), kind: FmFactSagaKind::NotProgressing, saga_state: None, - time_created: None, last_event_time: None, current_sec: None, orphan_reason: None, - adopt_generation: None, }; match saga_fact { SagaFact::NotProgressing(p) => Self { kind: FmFactSagaKind::NotProgressing, saga_state: Some(p.saga_state.into()), - time_created: Some(p.time_created), last_event_time: Some(p.last_event_time), ..base }, @@ -167,7 +158,6 @@ impl FmFactSaga { kind: FmFactSagaKind::OwnerNotCurrentGeneration, current_sec: Some(p.current_sec.into()), orphan_reason: Some(p.orphan_reason.into()), - adopt_generation: Some(Generation::from(p.adopt_generation)), ..base }, } @@ -181,20 +171,15 @@ impl FmFactSaga { pub fn into_fact(self) -> Result { let kind = self.kind; let saga_id = steno::SagaId(self.saga_id); - let saga_name = self.saga_name; let payload = match kind { FmFactSagaKind::NotProgressing => FactPayload::Saga( SagaFact::NotProgressing(SagaNotProgressingFactPayload { saga_id, - saga_name, saga_state: saga_progress_state( self.saga_state.ok_or_else(|| { missing_column(kind, "saga_state") })?, )?, - time_created: self - .time_created - .ok_or_else(|| missing_column(kind, "time_created"))?, last_event_time: self.last_event_time.ok_or_else(|| { missing_column(kind, "last_event_time") })?, @@ -204,7 +189,6 @@ impl FmFactSaga { FactPayload::Saga(SagaFact::OwnerNotCurrentGeneration( SagaOwnerNotCurrentFactPayload { saga_id, - saga_name, current_sec: self .current_sec .ok_or_else(|| missing_column(kind, "current_sec"))? @@ -215,9 +199,6 @@ impl FmFactSaga { missing_column(kind, "orphan_reason") })? .into(), - adopt_generation: *self.adopt_generation.ok_or_else( - || missing_column(kind, "adopt_generation"), - )?, }, )) } diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index 5bb966303f9..d692e186012 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -2361,11 +2361,8 @@ mod tests { fm::SagaFact::NotProgressing( fm::SagaNotProgressingFactPayload { saga_id: steno::SagaId(uuid::Uuid::new_v4()), - saga_name: "test-saga".to_string(), saga_state: nexus_types::observed_saga::SagaProgressState::Unwinding, - time_created: - omicron_common::now_db_precision(), last_event_time: omicron_common::now_db_precision(), }, @@ -2383,13 +2380,10 @@ mod tests { fm::SagaFact::OwnerNotCurrentGeneration( fm::SagaOwnerNotCurrentFactPayload { saga_id: steno::SagaId(uuid::Uuid::new_v4()), - saga_name: "test-saga".to_string(), current_sec: omicron_uuid_kinds::OmicronZoneUuid::new_v4(), orphan_reason: nexus_types::observed_saga::OrphanedReason::Quiesced, - adopt_generation: - omicron_common::api::external::Generation::new(), }, ), ), diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 98acb61cc6d..df918045688 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -3276,14 +3276,11 @@ table! { created_sitrep_id -> Uuid, comment -> Text, saga_id -> Uuid, - saga_name -> Text, kind -> crate::enums::FmFactSagaKindEnum, saga_state -> Nullable, - time_created -> Nullable, last_event_time -> Nullable, current_sec -> Nullable, orphan_reason -> Nullable, - adopt_generation -> Nullable, } } diff --git a/nexus/fm/src/diagnosis/saga.rs b/nexus/fm/src/diagnosis/saga.rs index d9d6564d0ca..0dc81efad01 100644 --- a/nexus/fm/src/diagnosis/saga.rs +++ b/nexus/fm/src/diagnosis/saga.rs @@ -285,9 +285,7 @@ fn desired_not_progressing( { Some(SagaNotProgressingFactPayload { saga_id: obs.saga_id, - saga_name: obs.saga_name.clone(), saga_state: obs.saga_state, - time_created: obs.time_created, last_event_time: last_progress, }) } else { @@ -306,10 +304,8 @@ fn desired_owner_not_current( let current_sec = obs.current_sec?; Some(SagaOwnerNotCurrentFactPayload { saga_id: obs.saga_id, - saga_name: obs.saga_name.clone(), current_sec, orphan_reason: reason, - adopt_generation: obs.adopt_generation, }) } @@ -636,9 +632,7 @@ mod tests { inv_id, [SagaFact::NotProgressing(SagaNotProgressingFactPayload { saga_id: id, - saga_name: "test-saga".to_string(), saga_state: SagaProgressState::Unwinding, - time_created: Utc::now() - TimeDelta::days(1), last_event_time: stale, })], ); @@ -664,9 +658,7 @@ mod tests { - (STALE_SAGA_THRESHOLD + TimeDelta::minutes(5)); let payload = SagaNotProgressingFactPayload { saga_id: id, - saga_name: "test-saga".to_string(), saga_state: SagaProgressState::Unwinding, - time_created: Utc::now() - TimeDelta::days(1), last_event_time: stale, }; let parent_id = SitrepUuid::new_v4(); @@ -679,12 +671,12 @@ mod tests { let parent_fact_id = parent.cases.iter().next().unwrap().facts.iter().next().unwrap().id; // Observed saga matches the parent fact exactly (same last_event_time, - // same created time, same state). + // same state). let observed = observed_map([ObservedSaga { saga_id: id, saga_name: "test-saga".to_string(), saga_state: SagaProgressState::Unwinding, - time_created: payload.time_created, + time_created: Utc::now() - TimeDelta::days(1), current_sec: None, adopt_generation: Generation::new(), last_event_time: Some(stale), @@ -716,9 +708,7 @@ mod tests { inv_id, [SagaFact::NotProgressing(SagaNotProgressingFactPayload { saga_id: id, - saga_name: "test-saga".to_string(), saga_state: SagaProgressState::Unwinding, - time_created, last_event_time: old, })], ); @@ -767,9 +757,7 @@ mod tests { collection.id, [SagaFact::NotProgressing(SagaNotProgressingFactPayload { saga_id: id, - saga_name: "test-saga".to_string(), saga_state: SagaProgressState::Unwinding, - time_created: Utc::now() - TimeDelta::days(1), last_event_time: stale, })], ); @@ -813,10 +801,8 @@ mod tests { [SagaFact::OwnerNotCurrentGeneration( SagaOwnerNotCurrentFactPayload { saga_id: id, - saga_name: "test-saga".to_string(), current_sec: OmicronZoneUuid::new_v4(), orphan_reason: OrphanedReason::Quiesced, - adopt_generation: Generation::new(), }, )], ); @@ -858,10 +844,8 @@ mod tests { let sec = OmicronZoneUuid::new_v4(); let owner_payload = SagaOwnerNotCurrentFactPayload { saga_id: id, - saga_name: "test-saga".to_string(), current_sec: sec, orphan_reason: OrphanedReason::Quiesced, - adopt_generation: Generation::new(), }; let parent = make_parent_with_saga_case( SitrepUuid::new_v4(), @@ -869,9 +853,7 @@ mod tests { [ SagaFact::NotProgressing(SagaNotProgressingFactPayload { saga_id: id, - saga_name: "test-saga".to_string(), saga_state: SagaProgressState::Unwinding, - time_created: Utc::now() - TimeDelta::days(1), last_event_time: stale, }), SagaFact::OwnerNotCurrentGeneration(owner_payload.clone()), @@ -926,7 +908,6 @@ mod tests { id: FactUuid, created_sitrep_id: SitrepUuid, saga: steno::SagaId, - time_created: chrono::DateTime, last_event_time: chrono::DateTime, ) -> fm::case::Fact { fm::case::Fact { @@ -934,9 +915,7 @@ mod tests { created_sitrep_id, payload: SagaFact::NotProgressing(SagaNotProgressingFactPayload { saga_id: saga, - saga_name: "test-saga".to_string(), saga_state: SagaProgressState::Unwinding, - time_created, last_event_time, }) .into(), @@ -962,16 +941,10 @@ mod tests { let dup_id = fact_uuid(2); let mut parent_facts = IdOrdMap::new(); parent_facts - .insert_unique(np_fact( - kept_id, - parent_id, - id, - time_created, - current, - )) + .insert_unique(np_fact(kept_id, parent_id, id, current)) .unwrap(); parent_facts - .insert_unique(np_fact(dup_id, parent_id, id, time_created, old)) + .insert_unique(np_fact(dup_id, parent_id, id, old)) .unwrap(); let parent = make_parent_with_saga_case_from_facts( parent_id, @@ -1018,16 +991,10 @@ mod tests { let dup_id = fact_uuid(2); let mut parent_facts = IdOrdMap::new(); parent_facts - .insert_unique(np_fact(kept_id, parent_id, id, time_created, old)) + .insert_unique(np_fact(kept_id, parent_id, id, old)) .unwrap(); parent_facts - .insert_unique(np_fact( - dup_id, - parent_id, - id, - time_created, - current, - )) + .insert_unique(np_fact(dup_id, parent_id, id, current)) .unwrap(); let parent = make_parent_with_saga_case_from_facts( parent_id, diff --git a/nexus/types/src/fm/fact.rs b/nexus/types/src/fm/fact.rs index 622836f1433..7e780b8340e 100644 --- a/nexus/types/src/fm/fact.rs +++ b/nexus/types/src/fm/fact.rs @@ -15,7 +15,6 @@ use crate::inventory::ZpoolHealth; use crate::observed_saga::{OrphanedReason, SagaProgressState}; use chrono::{DateTime, Utc}; -use omicron_common::api::external::Generation; use omicron_uuid_kinds::{ CollectionUuid, OmicronZoneUuid, PhysicalDiskUuid, ZpoolUuid, }; @@ -133,48 +132,42 @@ impl SagaFact { SagaFact::OwnerNotCurrentGeneration(p) => p.saga_id, } } - - /// The saga's name. Common to every kind of saga fact. - pub fn saga_name(&self) -> &str { - match self { - SagaFact::NotProgressing(p) => &p.saga_name, - SagaFact::OwnerNotCurrentGeneration(p) => &p.saga_name, - } - } } /// Payload of a [`SagaFact::NotProgressing`] fact. +/// +/// Payloads carry only the fields that define the condition: the subject's +/// ID, plus the parameters whose change means the condition itself changed +/// (which rotates the fact). Anything a human wants for presentation (e.g., +/// the saga's name) is looked up from the database when a case is acted on; +/// a case is only open while its saga row still exists. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct SagaNotProgressingFactPayload { /// The saga this fact (and its parent case) is about. pub saga_id: steno::SagaId, - /// The saga's name (e.g. `instance-start`). - pub saga_name: String, /// Whether the saga is running forward or unwinding. Unwinding-but-stuck /// is the more dangerous case (it may have half-torn-down resources). pub saga_state: SagaProgressState, - /// When the saga was created. - pub time_created: DateTime, - /// The latest `saga_node_event.event_time` observed for this saga, i.e. - /// the last durably-recorded step. The case was opened because - /// `now - last_event_time` exceeded the staleness threshold. + /// The last durably-recorded progress: the latest + /// `saga_node_event.event_time` observed for this saga, or the saga's + /// creation time if it has recorded no node events at all. The case was + /// opened because `now - last_event_time` exceeded the staleness + /// threshold. pub last_event_time: DateTime, } /// Payload of a [`SagaFact::OwnerNotCurrentGeneration`] fact. +/// +/// See [`SagaNotProgressingFactPayload`] for why payloads carry only +/// condition-defining fields. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub struct SagaOwnerNotCurrentFactPayload { /// The saga this fact (and its parent case) is about. pub saga_id: steno::SagaId, - /// The saga's name (e.g. `instance-start`). - pub saga_name: String, /// The owning Nexus zone (`saga.current_sec`). This fact only fires when /// the saga has a current SEC, so it is always present. pub current_sec: OmicronZoneUuid, /// Why the owner is not current: quiesced (older generation) or expunged /// (no `db_metadata_nexus` record). pub orphan_reason: OrphanedReason, - /// `saga.adopt_generation`: how many times the saga has been re-adopted - /// to a SEC. Recorded for triage (thrashing across Nexus restarts). - pub adopt_generation: Generation, } diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index c14b69e6270..99067ea5c68 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7800,9 +7800,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( -- The saga this fact is about. Common to every kind of saga fact (the -- case is keyed by it), so it is always present regardless of `kind`. + -- + -- Fact payloads carry only the fields that define the condition; data + -- that merely describes the saga (e.g., its name) is looked up from the + -- saga table when a case is acted on. saga_id UUID NOT NULL, - -- The saga's name (e.g. 'instance-start'). Common to every kind. - saga_name TEXT NOT NULL, -- Which saga fact this row represents. The columns below are populated -- according to this discriminant (see the CHECK constraint). @@ -7810,14 +7812,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( -- Columns for a 'not_progressing' fact. NULL for any other kind. saga_state omicron.public.saga_state, - time_created TIMESTAMPTZ, last_event_time TIMESTAMPTZ, -- Columns for an 'owner_not_current_generation' fact. NULL for any other -- kind. current_sec UUID, orphan_reason omicron.public.fm_fact_saga_orphan_reason, - adopt_generation INT8, PRIMARY KEY (sitrep_id, id), @@ -7827,7 +7827,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( CONSTRAINT not_progressing_columns_present CHECK ( kind != 'not_progressing' OR ( saga_state IS NOT NULL - AND time_created IS NOT NULL AND last_event_time IS NOT NULL ) ), @@ -7835,7 +7834,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( kind != 'owner_not_current_generation' OR ( current_sec IS NOT NULL AND orphan_reason IS NOT NULL - AND adopt_generation IS NOT NULL ) ) ); diff --git a/schema/crdb/fm-saga-de/up4.sql b/schema/crdb/fm-saga-de/up4.sql index 7c3ea742229..7e7a3c690d9 100644 --- a/schema/crdb/fm-saga-de/up4.sql +++ b/schema/crdb/fm-saga-de/up4.sql @@ -14,9 +14,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( -- The saga this fact is about. Common to every kind of saga fact (the -- case is keyed by it), so it is always present regardless of `kind`. + -- + -- Fact payloads carry only the fields that define the condition; data + -- that merely describes the saga (e.g., its name) is looked up from the + -- saga table when a case is acted on. saga_id UUID NOT NULL, - -- The saga's name (e.g. 'instance-start'). Common to every kind. - saga_name TEXT NOT NULL, -- Which saga fact this row represents. The columns below are populated -- according to this discriminant (see the CHECK constraint). @@ -24,14 +26,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( -- Columns for a 'not_progressing' fact. NULL for any other kind. saga_state omicron.public.saga_state, - time_created TIMESTAMPTZ, last_event_time TIMESTAMPTZ, -- Columns for an 'owner_not_current_generation' fact. NULL for any other -- kind. current_sec UUID, orphan_reason omicron.public.fm_fact_saga_orphan_reason, - adopt_generation INT8, PRIMARY KEY (sitrep_id, id), @@ -41,7 +41,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( CONSTRAINT not_progressing_columns_present CHECK ( kind != 'not_progressing' OR ( saga_state IS NOT NULL - AND time_created IS NOT NULL AND last_event_time IS NOT NULL ) ), @@ -49,7 +48,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_saga ( kind != 'owner_not_current_generation' OR ( current_sec IS NOT NULL AND orphan_reason IS NOT NULL - AND adopt_generation IS NOT NULL ) ) ); From 027622a30cca8749eb4c242de057932437d0a2ef Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jun 2026 17:21:58 -0700 Subject: [PATCH 17/30] [fm] saga diagnoser: Abandoned fact kind Per omicron#10581, Nexus will explicitly abandon sagas it fails to recover for non-transient reasons. Abandonment is the beginning of an escalation, not a resolution: the saga may be holding partially-allocated resources and needs saga-specific manual remediation (RFD 555). Without this change the engine would close the case the moment a stuck saga was abandoned, exactly when the escalation should begin. - The projection now lists unfinished sagas (running, unwinding, or abandoned); only 'done' drops a saga from observation. ObservedSaga carries the three-variant ObservedSagaState; SagaProgressState remains the live-saga subset recorded in NotProgressing facts. - New SagaFact::Abandoned with a pure-identity payload (the condition is boolean, so nothing can rotate it). Abandonment supersedes the live-saga conditions: NotProgressing and OwnerNotCurrentGeneration facts are removed when a saga is abandoned, and the case carries the Abandoned fact alone, staying open until the saga row is deleted. - The module doc also records why the owner fact detects stranded sagas rather than wrongly-resumed ones, and what mitigates the latter. --- nexus/db-model/src/fm/fact_saga.rs | 13 +- nexus/db-queries/src/db/datastore/fm.rs | 12 + nexus/db-queries/src/db/datastore/saga.rs | 17 +- nexus/fm/src/diagnosis/saga.rs | 271 ++++++++++++++++-- nexus/src/app/background/tasks/fm_analysis.rs | 20 +- .../analysis_input_report_with_cases.out | 2 +- nexus/types/src/fm.rs | 5 +- nexus/types/src/fm/analysis_reports.rs | 8 +- nexus/types/src/fm/fact.rs | 29 +- nexus/types/src/observed_saga.rs | 36 ++- schema/crdb/dbinit.sql | 3 +- schema/crdb/fm-saga-de/up2.sql | 3 +- 12 files changed, 363 insertions(+), 56 deletions(-) diff --git a/nexus/db-model/src/fm/fact_saga.rs b/nexus/db-model/src/fm/fact_saga.rs index 3eabbbe6186..806fbde217f 100644 --- a/nexus/db-model/src/fm/fact_saga.rs +++ b/nexus/db-model/src/fm/fact_saga.rs @@ -17,8 +17,8 @@ use chrono::{DateTime, Utc}; use nexus_db_schema::schema::fm_fact_saga; use nexus_types::fm; use nexus_types::fm::{ - FactPayload, SagaFact, SagaNotProgressingFactPayload, - SagaOwnerNotCurrentFactPayload, + FactPayload, SagaAbandonedFactPayload, SagaFact, + SagaNotProgressingFactPayload, SagaOwnerNotCurrentFactPayload, }; use nexus_types::observed_saga::{OrphanedReason, SagaProgressState}; use omicron_common::api::external::Error; @@ -33,6 +33,7 @@ impl_enum_type!( NotProgressing => b"not_progressing" OwnerNotCurrentGeneration => b"owner_not_current_generation" + Abandoned => b"abandoned" ); impl_enum_type!( @@ -160,6 +161,11 @@ impl FmFactSaga { orphan_reason: Some(p.orphan_reason.into()), ..base }, + // The Abandoned payload is pure identity (the condition is + // boolean), so the row carries only the common columns. + SagaFact::Abandoned(_) => { + Self { kind: FmFactSagaKind::Abandoned, ..base } + } } } @@ -202,6 +208,9 @@ impl FmFactSaga { }, )) } + FmFactSagaKind::Abandoned => FactPayload::Saga( + SagaFact::Abandoned(SagaAbandonedFactPayload { saga_id }), + ), }; Ok(fm::case::Fact { id: self.id.into(), diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index d692e186012..ae006342c43 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -2390,6 +2390,18 @@ mod tests { comment: "a representative orphaned saga fact".to_string(), }) .unwrap(); + facts + .insert_unique(fm::case::Fact { + id: FactUuid::new_v4(), + created_sitrep_id: sitrep_id, + payload: fm::FactPayload::Saga(fm::SagaFact::Abandoned( + fm::SagaAbandonedFactPayload { + saga_id: steno::SagaId(uuid::Uuid::new_v4()), + }, + )), + comment: "a representative abandoned saga fact".to_string(), + }) + .unwrap(); fm::Case { id: omicron_uuid_kinds::CaseUuid::new_v4(), diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs index 203cf258f85..2d8b660c526 100644 --- a/nexus/db-queries/src/db/datastore/saga.rs +++ b/nexus/db-queries/src/db/datastore/saga.rs @@ -191,17 +191,21 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// Returns all sagas in a running or unwinding (i.e. non-terminal) state, - /// making as many queries as needed (in batches) to get them all. + /// Returns all unfinished sagas: running, unwinding, or abandoned. Makes + /// as many queries as needed (in batches) to get them all. /// /// Unlike [`Self::saga_list_running_or_unwinding_older_than`], this has no /// age filter and no result cap: it is used by fault management, which - /// needs the complete set of non-terminal sagas (a lossy list would cause + /// needs the complete set of unfinished sagas (a lossy list would cause /// it to incorrectly close cases for sagas it simply didn't see). - pub async fn saga_list_running_or_unwinding_batched( + /// Abandoned sagas are included because they need manual remediation; + /// only `done` means a saga requires no further attention. + pub async fn saga_list_unfinished_batched( &self, opctx: &OpContext, ) -> Result, Error> { + const UNFINISHED_STATES: &[SagaState] = + &[SagaState::Running, SagaState::Unwinding, SagaState::Abandoned]; let mut sagas = vec![]; let mut paginator = Paginator::new( SQL_BATCH_SIZE, @@ -213,10 +217,7 @@ impl DataStore { let mut batch = paginated(dsl::saga, dsl::id, &p.current_pagparams()) - .filter( - dsl::saga_state - .eq_any(SagaState::RECOVERY_CANDIDATE_STATES), - ) + .filter(dsl::saga_state.eq_any(UNFINISHED_STATES)) .select(db::saga_types::Saga::as_select()) .load_async(&*conn) .await diff --git a/nexus/fm/src/diagnosis/saga.rs b/nexus/fm/src/diagnosis/saga.rs index 0dc81efad01..1eb19882414 100644 --- a/nexus/fm/src/diagnosis/saga.rs +++ b/nexus/fm/src/diagnosis/saga.rs @@ -4,12 +4,22 @@ //! Saga diagnosis engine. //! -//! Opens a case (keyed by `saga_id`) for any non-terminal saga that is either -//! *not making progress* (no node event recorded for a while) or *orphaned* -//! (owned by a Nexus that is no longer of the current generation). These are -//! two independent fact kinds; a saga's case may carry either or both. A case -//! is closed once the saga reaches a terminal state (it drops out of the set -//! of non-terminal sagas the preparation phase observed). +//! Opens a case (keyed by `saga_id`) for any unfinished saga that is +//! *not making progress* (no node event recorded for a while), *orphaned* +//! (owned by a Nexus that is no longer of the current generation), or +//! *abandoned* (Nexus permanently gave up on recovering it; see +//! omicron#10581 / RFD 555). A live saga's case may carry the first two +//! facts together; abandonment supersedes both. A case closes when its +//! saga completes or recovers, or, for an abandoned saga, only when the +//! saga row is removed from the database: abandonment is the beginning of +//! an escalation, not a resolution. +//! +//! The owner facts detect *stranded* sagas, not *wrongly-resumed* ones: if +//! an orphaned saga is re-adopted by a current-generation Nexus, the owner +//! fact clears, even though running a saga built by a different Nexus +//! version is itself dangerous. That risk is mitigated upstream: recovery +//! abandons sagas it cannot recover (omicron#10581), and saga quiesce +//! prevents cross-version adoption in the normal path. //! //! See omicron#10530 for motivation. @@ -17,9 +27,12 @@ use crate::SitrepBuilder; use chrono::{DateTime, TimeDelta, Utc}; use nexus_types::fm::DiagnosisEngineKind; use nexus_types::fm::{ - SagaFact, SagaNotProgressingFactPayload, SagaOwnerNotCurrentFactPayload, + SagaAbandonedFactPayload, SagaFact, SagaNotProgressingFactPayload, + SagaOwnerNotCurrentFactPayload, +}; +use nexus_types::observed_saga::{ + ObservedSaga, ObservedSagaState, SagaProgressState, }; -use nexus_types::observed_saga::ObservedSaga; use omicron_uuid_kinds::{CaseUuid, FactUuid}; use std::collections::BTreeMap; @@ -38,6 +51,7 @@ struct ParentSagaCase { /// (the lowest fact UUID wins if a case pathologically carries several). not_progressing: Option<(FactUuid, SagaNotProgressingFactPayload)>, owner_not_current: Option<(FactUuid, SagaOwnerNotCurrentFactPayload)>, + abandoned: Option<(FactUuid, SagaAbandonedFactPayload)>, /// Facts that should not exist: duplicates of a kind beyond the first. /// These carry no information the kept fact doesn't; they are removed /// unconditionally, regardless of what the observation says. @@ -75,6 +89,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { FactUuid, SagaOwnerNotCurrentFactPayload, )> = None; + let mut abandoned: Option<(FactUuid, SagaAbandonedFactPayload)> = None; let mut duplicate_facts = Vec::new(); // `case.facts` iterates in fact UUID order, so the kept fact for // each kind is deterministically the one with the lowest UUID. @@ -113,6 +128,13 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { duplicate_facts.push(fact.id); } } + SagaFact::Abandoned(p) => { + if abandoned.is_none() { + abandoned = Some((fact.id, p.clone())); + } else { + duplicate_facts.push(fact.id); + } + } } } if !duplicate_facts.is_empty() { @@ -138,6 +160,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { saga_id, not_progressing, owner_not_current, + abandoned, duplicate_facts, }, ); @@ -156,19 +179,27 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { ); let Some(obs) = observed.get(&summary.saga_id) else { case_mut.close(format!( - "saga {} reached a terminal state", + "saga {} completed or was removed", summary.saga_id, )); continue; }; let desired_np = desired_not_progressing(obs, reference_time); let desired_owner = desired_owner_not_current(obs); + let desired_abandoned = desired_abandoned(obs); // A case is an episode of a problem, not a dossier on the saga: when // no condition holds anymore, the episode is over and the case // closes. Its facts stay attached as the record of why it existed; // they age out with the case once it stops being copied forward. If // the saga becomes a problem again later, a fresh case opens. - if desired_np.is_none() && desired_owner.is_none() { + // + // An abandoned saga never reaches this close: `desired_abandoned` + // holds until the saga row itself is removed (the `else` branch + // above), keeping the case open while remediation is pending. + if desired_np.is_none() + && desired_owner.is_none() + && desired_abandoned.is_none() + { case_mut.close(format!( "saga {} is progressing under a current owner again", summary.saga_id, @@ -190,6 +221,11 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { case_mut.remove_fact(*fact_id); } } + if let Some((fact_id, payload)) = &summary.abandoned { + if desired_abandoned.as_ref() != Some(payload) { + case_mut.remove_fact(*fact_id); + } + } } // Second pass: for each observed saga with a problem, ensure a case exists @@ -199,7 +235,11 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { for obs in observed.iter() { let desired_np = desired_not_progressing(obs, reference_time); let desired_owner = desired_owner_not_current(obs); - if desired_np.is_none() && desired_owner.is_none() { + let desired_abandoned = desired_abandoned(obs); + if desired_np.is_none() + && desired_owner.is_none() + && desired_abandoned.is_none() + { continue; } @@ -221,6 +261,10 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { ), (Some(want), Some((_, have))) if want == have ); + let abandoned_already = matches!( + (&desired_abandoned, parent.and_then(|(_, s)| s.abandoned.as_ref())), + (Some(want), Some((_, have))) if want == have + ); let case_id = match parent { Some((case_id, _)) => case_id, @@ -266,6 +310,28 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { ); } } + if let Some(payload) = desired_abandoned { + if !abandoned_already { + // The payload is pure identity; the human-readable context + // (which a promoted problem would otherwise look up from the + // saga row) goes in the comment. + let comment = format!( + "saga {} ({}) abandoned by Nexus; created at {}, last \ + node event: {}", + obs.saga_id, + obs.saga_name, + obs.time_created, + obs.last_event_time + .map(|t| t.to_string()) + .unwrap_or_else(|| "".to_string()), + ); + builder + .cases + .case_mut(&case_id) + .expect("case_id came from this fn") + .add_fact(SagaFact::Abandoned(payload), comment); + } + } } Ok(()) @@ -275,17 +341,25 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { /// has recorded no node event at all, its creation time stands in for the /// last-progress timestamp (a saga that has existed past the threshold without /// recording a single step is itself stuck at the start). +/// +/// An abandoned saga is never "not progressing": [`desired_abandoned`] +/// supersedes the live-saga conditions. fn desired_not_progressing( obs: &ObservedSaga, reference_time: DateTime, ) -> Option { + let saga_state = match obs.saga_state { + ObservedSagaState::Running => SagaProgressState::Running, + ObservedSagaState::Unwinding => SagaProgressState::Unwinding, + ObservedSagaState::Abandoned => return None, + }; let last_progress = obs.last_event_time.unwrap_or(obs.time_created); if reference_time.signed_duration_since(last_progress) > STALE_SAGA_THRESHOLD { Some(SagaNotProgressingFactPayload { saga_id: obs.saga_id, - saga_state: obs.saga_state, + saga_state, last_event_time: last_progress, }) } else { @@ -297,9 +371,15 @@ fn desired_not_progressing( /// Only fires when the saga has a `current_sec` whose owner is orphaned /// (quiesced or expunged); a saga with no current SEC is between adoptions and /// is not treated as orphaned. +/// +/// An abandoned saga has no meaningful owner (nobody will ever run it): +/// [`desired_abandoned`] supersedes the live-saga conditions. fn desired_owner_not_current( obs: &ObservedSaga, ) -> Option { + if obs.saga_state == ObservedSagaState::Abandoned { + return None; + } let reason = obs.owner_state?.orphaned_reason()?; let current_sec = obs.current_sec?; Some(SagaOwnerNotCurrentFactPayload { @@ -309,6 +389,14 @@ fn desired_owner_not_current( }) } +/// The `Abandoned` fact this saga should carry now, if any. The condition is +/// boolean, so the payload is pure identity and never rotates; the case stays +/// open, carrying this fact, until the saga row is removed from the database. +fn desired_abandoned(obs: &ObservedSaga) -> Option { + (obs.saga_state == ObservedSagaState::Abandoned) + .then(|| SagaAbandonedFactPayload { saga_id: obs.saga_id }) +} + #[cfg(test)] mod tests { use super::*; @@ -352,7 +440,7 @@ mod tests { ObservedSaga { saga_id: id, saga_name: "test-saga".to_string(), - saga_state: SagaProgressState::Unwinding, + saga_state: ObservedSagaState::Unwinding, time_created: Utc::now() - TimeDelta::days(1), current_sec, adopt_generation: Generation::new(), @@ -675,7 +763,7 @@ mod tests { let observed = observed_map([ObservedSaga { saga_id: id, saga_name: "test-saga".to_string(), - saga_state: SagaProgressState::Unwinding, + saga_state: ObservedSagaState::Unwinding, time_created: Utc::now() - TimeDelta::days(1), current_sec: None, adopt_generation: Generation::new(), @@ -718,7 +806,7 @@ mod tests { let observed = observed_map([ObservedSaga { saga_id: id, saga_name: "test-saga".to_string(), - saga_state: SagaProgressState::Unwinding, + saga_state: ObservedSagaState::Unwinding, time_created, current_sec: None, adopt_generation: Generation::new(), @@ -955,7 +1043,7 @@ mod tests { let observed = observed_map([ObservedSaga { saga_id: id, saga_name: "test-saga".to_string(), - saga_state: SagaProgressState::Unwinding, + saga_state: ObservedSagaState::Unwinding, time_created, current_sec: None, adopt_generation: Generation::new(), @@ -1006,7 +1094,7 @@ mod tests { let observed = observed_map([ObservedSaga { saga_id: id, saga_name: "test-saga".to_string(), - saga_state: SagaProgressState::Unwinding, + saga_state: ObservedSagaState::Unwinding, time_created, current_sec: None, adopt_generation: Generation::new(), @@ -1035,4 +1123,153 @@ mod tests { } logctx.cleanup_successful(); } + + fn mk_abandoned(id: steno::SagaId) -> ObservedSaga { + ObservedSaga { + saga_id: id, + saga_name: "test-saga".to_string(), + saga_state: ObservedSagaState::Abandoned, + time_created: Utc::now() - TimeDelta::days(1), + current_sec: None, + adopt_generation: Generation::new(), + last_event_time: None, + owner_state: None, + } + } + + fn abandoned_fact( + id: FactUuid, + created_sitrep_id: SitrepUuid, + saga: steno::SagaId, + ) -> fm::case::Fact { + fm::case::Fact { + id, + created_sitrep_id, + payload: SagaFact::Abandoned(SagaAbandonedFactPayload { + saga_id: saga, + }) + .into(), + comment: "parent abandoned fact".to_string(), + } + } + + /// An abandoned saga opens a case carrying a single `Abandoned` fact. + #[test] + fn opens_abandoned_case() { + let (logctx, collection) = setup("saga_opens_abandoned"); + let id = saga_id(1); + let observed = observed_map([mk_abandoned(id)]); + let input = build_input(collection, None, observed); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + + let facts = saga_facts(&sitrep, true); + assert_eq!(facts.len(), 1); + match &facts[0].1 { + SagaFact::Abandoned(p) => assert_eq!(p.saga_id, id), + other => panic!("expected Abandoned, got {other:?}"), + } + logctx.cleanup_successful(); + } + + /// A stuck saga that Nexus then abandons keeps its case (it is the same + /// episode, escalated), swapping the `NotProgressing` fact for an + /// `Abandoned` fact. + #[test] + fn stuck_saga_escalates_to_abandoned() { + let (logctx, collection) = setup("saga_escalates_to_abandoned"); + let id = saga_id(1); + let stale = + collection.time_done - (STALE_SAGA_THRESHOLD + TimeDelta::hours(1)); + let parent = make_parent_with_saga_case( + SitrepUuid::new_v4(), + collection.id, + [SagaFact::NotProgressing(SagaNotProgressingFactPayload { + saga_id: id, + saga_state: SagaProgressState::Unwinding, + last_event_time: stale, + })], + ); + let parent_case_id = parent.cases.iter().next().unwrap().id; + let observed = observed_map([mk_abandoned(id)]); + let input = build_input(collection, Some(parent), observed); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + + let case = sitrep + .cases + .iter() + .find(|c| c.metadata.de == DiagnosisEngineKind::Saga) + .expect("saga case should be present"); + assert_eq!( + case.id, parent_case_id, + "the same case carries forward; abandonment escalates the \ + episode rather than starting a new one", + ); + assert!(case.is_open(), "abandonment must not close the case"); + let facts = saga_facts(&sitrep, true); + assert_eq!(facts.len(), 1, "the NotProgressing fact is superseded"); + match &facts[0].1 { + SagaFact::Abandoned(p) => assert_eq!(p.saga_id, id), + other => panic!("expected Abandoned, got {other:?}"), + } + logctx.cleanup_successful(); + } + + /// The `Abandoned` payload is pure identity, so the fact carries forward + /// with a stable UUID for as long as the saga stays abandoned. + #[test] + fn abandoned_fact_uuid_stable() { + let (logctx, collection) = setup("saga_abandoned_stable"); + let id = saga_id(1); + let parent_id = SitrepUuid::new_v4(); + let parent_fact_id = fact_uuid(1); + let mut parent_facts = IdOrdMap::new(); + parent_facts + .insert_unique(abandoned_fact(parent_fact_id, parent_id, id)) + .unwrap(); + let parent = make_parent_with_saga_case_from_facts( + parent_id, + collection.id, + parent_facts, + ); + let observed = observed_map([mk_abandoned(id)]); + let input = build_input(collection, Some(parent), observed); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + + let facts = saga_facts(&sitrep, true); + assert_eq!(facts.len(), 1); + assert_eq!( + facts[0].0.id, parent_fact_id, + "the Abandoned fact UUID is stable across sitreps", + ); + logctx.cleanup_successful(); + } + + /// An abandoned saga's case closes only when the saga row is removed + /// from the database (i.e., it drops out of the observed set). + #[test] + fn abandoned_case_closes_when_saga_removed() { + let (logctx, collection) = setup("saga_abandoned_closes_on_removal"); + let id = saga_id(1); + let parent_id = SitrepUuid::new_v4(); + let mut parent_facts = IdOrdMap::new(); + parent_facts + .insert_unique(abandoned_fact(fact_uuid(1), parent_id, id)) + .unwrap(); + let parent = make_parent_with_saga_case_from_facts( + parent_id, + collection.id, + parent_facts, + ); + let input = build_input(collection, Some(parent), observed_map([])); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + + let case = sitrep + .cases + .iter() + .find(|c| c.metadata.de == DiagnosisEngineKind::Saga) + .expect("saga case should be present in the closing sitrep"); + assert!(!case.is_open(), "case should close once the saga row is gone",); + assert_eq!(case.facts.len(), 1, "the fact stays attached as evidence"); + logctx.cleanup_successful(); + } } diff --git a/nexus/src/app/background/tasks/fm_analysis.rs b/nexus/src/app/background/tasks/fm_analysis.rs index 7502b122ea7..e66ee5ff466 100644 --- a/nexus/src/app/background/tasks/fm_analysis.rs +++ b/nexus/src/app/background/tasks/fm_analysis.rs @@ -24,7 +24,7 @@ use nexus_types::internal_api::background::FmAnalysisStatus; use nexus_types::internal_api::background::fm_analysis as status; use nexus_types::inventory; use nexus_types::observed_saga::{ - ObservedSaga, SagaOwnerState, SagaProgressState, + ObservedSaga, ObservedSagaState, SagaOwnerState, }; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; @@ -302,13 +302,14 @@ impl FmAnalysis { ) -> anyhow::Result> { use std::collections::BTreeMap; - // All running/unwinding sagas. Terminal sagas are excluded; a parent - // case whose saga is absent from this set is closed by the engine. + // All unfinished (running, unwinding, or abandoned) sagas. Completed + // sagas are excluded; a parent case whose saga is absent from this + // set is closed by the engine. let sagas = self .datastore - .saga_list_running_or_unwinding_batched(opctx) + .saga_list_unfinished_batched(opctx) .await - .context("failed to list non-terminal sagas")?; + .context("failed to list unfinished sagas")?; // Latest node-event time per saga: the last durably-recorded step. let saga_ids: Vec<_> = sagas.iter().map(|s| s.id).collect(); @@ -344,10 +345,11 @@ impl FmAnalysis { let mut observed = IdOrdMap::new(); for saga in sagas { let saga_state = match saga.saga_state { - SagaState::Running => SagaProgressState::Running, - SagaState::Unwinding => SagaProgressState::Unwinding, - // The query filters to non-terminal states; defend anyway. - SagaState::Done | SagaState::Abandoned => continue, + SagaState::Running => ObservedSagaState::Running, + SagaState::Unwinding => ObservedSagaState::Unwinding, + SagaState::Abandoned => ObservedSagaState::Abandoned, + // The query filters to unfinished states; defend anyway. + SagaState::Done => continue, }; let current_sec = saga .current_sec diff --git a/nexus/types/output/analysis_input_report_with_cases.out b/nexus/types/output/analysis_input_report_with_cases.out index 3a939dfc330..a2ea5aaa382 100644 --- a/nexus/types/output/analysis_input_report_with_cases.out +++ b/nexus/types/output/analysis_input_report_with_cases.out @@ -27,4 +27,4 @@ no in-service control plane disks non-terminal sagas observed (2 total): * saga 5a9a0001-5a9a-45a9-85a9-5a9a5a9a5a9a (fake-saga): Unwinding, last event: 1970-01-01 00:00:00 UTC, owner: Quiesced - * saga 5a9a0002-5a9a-45a9-85a9-5a9a5a9a5a9a (another-fake-saga): Running, last event: , owner: + * saga 5a9a0002-5a9a-45a9-85a9-5a9a5a9a5a9a (another-fake-saga): Abandoned, last event: , owner: diff --git a/nexus/types/src/fm.rs b/nexus/types/src/fm.rs index 538940a50e6..aa6f610efd6 100644 --- a/nexus/types/src/fm.rs +++ b/nexus/types/src/fm.rs @@ -14,8 +14,9 @@ pub mod case; pub use case::Case; pub mod fact; pub use fact::{ - DiskFact, FactPayload, SagaFact, SagaNotProgressingFactPayload, - SagaOwnerNotCurrentFactPayload, ZpoolUnhealthyFactPayload, + DiskFact, FactPayload, SagaAbandonedFactPayload, SagaFact, + SagaNotProgressingFactPayload, SagaOwnerNotCurrentFactPayload, + ZpoolUnhealthyFactPayload, }; pub(crate) mod json_display; diff --git a/nexus/types/src/fm/analysis_reports.rs b/nexus/types/src/fm/analysis_reports.rs index af1687132e4..98e5b9836f6 100644 --- a/nexus/types/src/fm/analysis_reports.rs +++ b/nexus/types/src/fm/analysis_reports.rs @@ -8,7 +8,7 @@ use super::case; use super::ereport::EreportId; use super::json_display::fmt_json_value; -use crate::observed_saga::{SagaOwnerState, SagaProgressState}; +use crate::observed_saga::{ObservedSagaState, SagaOwnerState}; use chrono::{DateTime, Utc}; use iddqd::IdOrdMap; use omicron_uuid_kinds::{ @@ -243,7 +243,7 @@ pub struct InputReport { #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] pub struct ObservedSagaReport { pub saga_name: String, - pub saga_state: SagaProgressState, + pub saga_state: ObservedSagaState, /// The latest node event recorded for this saga, or `None` if it has /// recorded none. pub last_event_time: Option>, @@ -531,7 +531,7 @@ mod tests { ), ObservedSagaReport { saga_name: "fake-saga".to_string(), - saga_state: SagaProgressState::Unwinding, + saga_state: ObservedSagaState::Unwinding, last_event_time: DateTime::from_timestamp(0, 0), owner_state: Some(SagaOwnerState::Quiesced), }, @@ -543,7 +543,7 @@ mod tests { ), ObservedSagaReport { saga_name: "another-fake-saga".to_string(), - saga_state: SagaProgressState::Running, + saga_state: ObservedSagaState::Abandoned, last_event_time: None, owner_state: None, }, diff --git a/nexus/types/src/fm/fact.rs b/nexus/types/src/fm/fact.rs index 7e780b8340e..baee3ddddbd 100644 --- a/nexus/types/src/fm/fact.rs +++ b/nexus/types/src/fm/fact.rs @@ -110,17 +110,26 @@ pub struct ZpoolUnhealthyFactPayload { /// Per-fact state for the saga diagnosis engine. /// -/// A saga case (keyed by `saga_id`) may carry either or both of these, -/// reflecting two independent problems with the same saga. +/// A saga case (keyed by `saga_id`) may carry `NotProgressing` and +/// `OwnerNotCurrentGeneration` together (two independent problems with a +/// live saga). `Abandoned` supersedes both: once Nexus has permanently +/// given up on a saga, the live-saga conditions are vacuous, and the case +/// carries the `Abandoned` fact alone. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum SagaFact { - /// The saga is non-terminal but has recorded no node event in a long - /// time, i.e. it is not making durable forward or undo progress. + /// The saga is live but has recorded no node event in a long time, + /// i.e. it is not making durable forward or undo progress. NotProgressing(SagaNotProgressingFactPayload), /// The saga is owned by a Nexus that is no longer of the current /// generation (quiesced or expunged), so that Nexus will not advance it. OwnerNotCurrentGeneration(SagaOwnerNotCurrentFactPayload), + /// Nexus failed to recover the saga for a non-transient reason and has + /// permanently given up on running it. The saga may be holding + /// partially-allocated resources; remediation is manual and + /// saga-specific (see omicron#10581 / RFD 555). The case stays open + /// until the saga row is removed from the database. + Abandoned(SagaAbandonedFactPayload), } impl SagaFact { @@ -130,6 +139,7 @@ impl SagaFact { match self { SagaFact::NotProgressing(p) => p.saga_id, SagaFact::OwnerNotCurrentGeneration(p) => p.saga_id, + SagaFact::Abandoned(p) => p.saga_id, } } } @@ -171,3 +181,14 @@ pub struct SagaOwnerNotCurrentFactPayload { /// (no `db_metadata_nexus` record). pub orphan_reason: OrphanedReason, } + +/// Payload of a [`SagaFact::Abandoned`] fact. +/// +/// The condition is boolean (the saga is abandoned or it isn't), so the +/// payload is pure identity. See [`SagaNotProgressingFactPayload`] for why +/// payloads carry only condition-defining fields. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct SagaAbandonedFactPayload { + /// The saga this fact (and its parent case) is about. + pub saga_id: steno::SagaId, +} diff --git a/nexus/types/src/observed_saga.rs b/nexus/types/src/observed_saga.rs index 55537aa9cff..97838fe9905 100644 --- a/nexus/types/src/observed_saga.rs +++ b/nexus/types/src/observed_saga.rs @@ -19,10 +19,31 @@ use omicron_common::api::external::Generation; use omicron_uuid_kinds::OmicronZoneUuid; use serde::{Deserialize, Serialize}; -/// The non-terminal execution state of a saga, as recorded in the `saga` -/// table's `saga_state` column. Terminal states (`done`, `abandoned`) are -/// deliberately excluded: a saga that has reached one of those is no longer a -/// candidate for the saga diagnosis engine (its case, if any, is closed). +/// The state of an observed saga, as recorded in the `saga` table's +/// `saga_state` column. +/// +/// `done` is deliberately excluded: a saga that completed (including a +/// completed unwind) needs no attention, and its case, if any, is closed. +/// `abandoned` is *included*: Nexus has permanently given up on the saga +/// without completing it, so it may be holding partially-allocated +/// resources and needs saga-specific manual remediation (see +/// omicron#10581 / RFD 555). +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ObservedSagaState { + /// The saga is executing forward actions. + Running, + /// One or more actions failed and the saga is executing undo actions. + Unwinding, + /// Nexus failed to recover the saga for a non-transient reason and has + /// permanently given up on running it. + Abandoned, +} + +/// The execution state of a *live* (running or unwinding) saga. This is the +/// subset of [`ObservedSagaState`] that can appear in a `NotProgressing` +/// fact: an abandoned saga is never "not progressing" (it is never going to +/// progress); it carries an `Abandoned` fact instead. #[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum SagaProgressState { @@ -74,13 +95,14 @@ pub enum OrphanedReason { Expunged, } -/// One non-terminal saga, joined with the timestamp of its most recent node -/// event and the state of its owning Nexus. +/// One unfinished (running, unwinding, or abandoned) saga, joined with the +/// timestamp of its most recent node event and the state of its owning +/// Nexus. #[derive(Clone, Debug, PartialEq)] pub struct ObservedSaga { pub saga_id: steno::SagaId, pub saga_name: String, - pub saga_state: SagaProgressState, + pub saga_state: ObservedSagaState, /// When the saga was created (`saga.time_created`). pub time_created: DateTime, /// The owning Nexus zone (`saga.current_sec`), or `None` if the saga has no diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 99067ea5c68..fc5e69eca53 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7776,7 +7776,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.fm_fact_physical_disk ( -- engine above: one table per engine, fact content as typed columns. CREATE TYPE IF NOT EXISTS omicron.public.fm_fact_saga_kind AS ENUM ( 'not_progressing', - 'owner_not_current_generation' + 'owner_not_current_generation', + 'abandoned' ); CREATE TYPE IF NOT EXISTS omicron.public.fm_fact_saga_orphan_reason AS ENUM ( diff --git a/schema/crdb/fm-saga-de/up2.sql b/schema/crdb/fm-saga-de/up2.sql index 78065156c3e..d9fae63cb9f 100644 --- a/schema/crdb/fm-saga-de/up2.sql +++ b/schema/crdb/fm-saga-de/up2.sql @@ -1,4 +1,5 @@ CREATE TYPE IF NOT EXISTS omicron.public.fm_fact_saga_kind AS ENUM ( 'not_progressing', - 'owner_not_current_generation' + 'owner_not_current_generation', + 'abandoned' ); From 82ef44fd5bd6e156df2eee547bfe7626d39b3d3c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 11 Jun 2026 12:49:48 -0700 Subject: [PATCH 18/30] simplify comments --- nexus/db-model/src/fm/fact_physical_disk.rs | 17 ++------ nexus/fm/src/analysis_input.rs | 4 +- nexus/fm/src/builder.rs | 6 +-- nexus/fm/src/builder/rng.rs | 5 --- nexus/fm/src/diagnosis/mod.rs | 3 +- nexus/fm/src/diagnosis/physical_disk.rs | 43 ++++++++----------- nexus/src/app/background/tasks/fm_analysis.rs | 7 +-- nexus/types/src/fm/case.rs | 3 -- nexus/types/src/fm/fact.rs | 2 +- nexus/types/src/in_service_disk.rs | 6 +-- 10 files changed, 32 insertions(+), 64 deletions(-) diff --git a/nexus/db-model/src/fm/fact_physical_disk.rs b/nexus/db-model/src/fm/fact_physical_disk.rs index 2b85de0e1b4..f81df308b35 100644 --- a/nexus/db-model/src/fm/fact_physical_disk.rs +++ b/nexus/db-model/src/fm/fact_physical_disk.rs @@ -34,10 +34,7 @@ impl_enum_type!( /// Diesel row for the `fm_fact_physical_disk` table. /// /// The payload columns are populated according to `kind`: a column is `Some` -/// if it belongs to that `kind`'s payload, and `None` otherwise (by -/// convention; the per-kind CHECK constraints, e.g. -/// `zpool_unhealthy_columns_present`, enforce only the "present for the -/// matching kind" direction so that future kinds may share columns). +/// if it belongs to that `kind`'s payload, and `None` otherwise. #[derive(Queryable, Insertable, Clone, Debug, Selectable)] #[diesel(table_name = fm_fact_physical_disk)] pub struct FmFactPhysicalDisk { @@ -68,11 +65,7 @@ pub struct FmFactPhysicalDisk { impl FmFactPhysicalDisk { /// Build a row from a fact's shared metadata (`fact`) and its - /// already-dispatched physical-disk payload (`disk_fact`). - /// - /// Callers route each fact to its engine's table by matching on - /// [`fact.payload`](fm::case::Fact::payload) and pass the matched payload - /// here, so this never has to interpret another engine's payload. + /// physical-disk payload (`disk_fact`). pub fn from_sitrep( sitrep_id: impl Into>, case_id: impl Into>, @@ -106,10 +99,8 @@ impl FmFactPhysicalDisk { /// Reconstruct an in-memory fact from a row. /// - /// The payload columns the database's CHECK constraint guarantees are - /// non-NULL for this `kind` are unwrapped; a NULL where one is required - /// indicates a corrupt row (e.g. hand-edited) and yields an internal - /// error rather than a panic. + /// A NULL in a column the CHECK constraint requires for this `kind` + /// yields an internal error rather than a panic. pub fn into_fact(self) -> Result { let kind = self.kind; let payload = match kind { diff --git a/nexus/fm/src/analysis_input.rs b/nexus/fm/src/analysis_input.rs index 8073e32493d..93751409172 100644 --- a/nexus/fm/src/analysis_input.rs +++ b/nexus/fm/src/analysis_input.rs @@ -41,7 +41,6 @@ pub struct Input { new_ereports: IdOrdMap, open_cases: IdOrdMap, closed_cases_copied_forward: IdOrdMap, - /// All control plane managed disks in_service_disks: Arc>, } @@ -70,8 +69,7 @@ impl Input { } /// All control-plane-managed disks (`physical_disk.disk_policy = - /// in_service` in the DB), indexed by `physical_disk_id`. See the - /// field-level documentation on `Input::in_service_disks` for semantics. + /// in_service` in the DB), indexed by `physical_disk_id`. pub fn in_service_disks(&self) -> &IdOrdMap { &self.in_service_disks } diff --git a/nexus/fm/src/builder.rs b/nexus/fm/src/builder.rs index 8245943239f..2037fbe5ea8 100644 --- a/nexus/fm/src/builder.rs +++ b/nexus/fm/src/builder.rs @@ -24,10 +24,8 @@ pub struct SitrepBuilder<'a> { pub parent_sitrep: Option<&'a fm::Sitrep>, pub sitrep_id: SitrepUuid, pub cases: case::AllCases, - /// The analysis input this builder was constructed from. `cases` is - /// seeded from this input's open cases, so diagnosis engines must read - /// their inputs from here rather than taking a separate `Input` argument - /// that might disagree. + /// The analysis input this builder was constructed from; `cases` is + /// seeded from its open cases. input: &'a analysis_input::Input, closed_cases_copied_forward: &'a IdOrdMap, comment: String, diff --git a/nexus/fm/src/builder/rng.rs b/nexus/fm/src/builder/rng.rs index 9ba02eca5cc..86b6a1a7da9 100644 --- a/nexus/fm/src/builder/rng.rs +++ b/nexus/fm/src/builder/rng.rs @@ -61,11 +61,6 @@ impl SitrepBuilderRng { } } -/// Per-case child RNGs. Each `next_*` returns the next deterministic UUID -/// in its stream; collisions across calls are statistically impossible, but -/// callers in `builder/case.rs` still guard inserts with a `contains_key` -/// loop so that a future change here (e.g., reseeding mid-build) can't -/// silently corrupt a case. #[derive(Clone, Debug)] pub(super) struct CaseBuilderRng { ereport_assignment_rng: TypedUuidRng, diff --git a/nexus/fm/src/diagnosis/mod.rs b/nexus/fm/src/diagnosis/mod.rs index 4a2d7d21856..48a3c3a184c 100644 --- a/nexus/fm/src/diagnosis/mod.rs +++ b/nexus/fm/src/diagnosis/mod.rs @@ -5,8 +5,7 @@ //! Fault management diagnosis engines. //! //! Each submodule defines one diagnosis engine (DE). `analyze` dispatches to -//! each engine in turn; engines are deterministic and idempotent per RFD 603, -//! so the dispatch order does not matter. +//! each engine in turn; engines are deterministic and idempotent per RFD 603. use crate::SitrepBuilder; diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index f7b2658f8cb..794c0cdca1d 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -13,8 +13,7 @@ use omicron_uuid_kinds::{CaseUuid, FactUuid, PhysicalDiskUuid, ZpoolUuid}; use std::collections::BTreeMap; /// A [`DiskFact::ZpoolUnhealthy`] payload paired with the `FactUuid` it -/// lives under. Used to build in-memory indices over facts during -/// analysis; not serialized. +/// lives under. #[derive(Clone, Copy, Debug)] struct ZpoolUnhealthyFact { fact_id: FactUuid, @@ -59,8 +58,6 @@ struct ParentCaseSummary { } pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { - // The input borrow has lifetime 'a, not a borrow of `builder`, so we may - // hold it while mutating the builder below. let input = builder.input(); let inv_collection_id = input.inventory().id; let inv_time_done = input.inventory().time_done; @@ -79,8 +76,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { // The current health snapshot for every in-service disk, keyed by // physical_disk_id. Absence from this index is a positive signal that // the control plane has moved on from the disk (expungement / - // decommissioning); see prepare_inputs in - // nexus/src/app/background/tasks/fm_analysis.rs. + // decommissioning); see the analysis task's input preparation. let in_service_health: IdOrdMap = input .in_service_disks() .iter() @@ -91,11 +87,9 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { }) .collect(); - // Index parent-forwarded Disk cases from the input — the state copied - // from the parent sitrep. - // Every case is about one physical disk; we derive the disk - // from its facts. Skip (with a warning) any case we can't safely - // interpret. + // Index the Disk cases copied forward from the parent sitrep. Every case + // is about one physical disk; we derive the disk from its facts. Skip + // (with a warning) any case we can't safely interpret. let parent_cases: BTreeMap = input .open_cases() .iter() @@ -287,9 +281,9 @@ mod tests { }; use std::sync::Arc; - /// Synthesize a synthetic in-service disk set from a list of zpool IDs. - /// Each zpool gets its own fresh `PhysicalDiskUuid` and dummy identity - /// facts — tests in this module only care about the zpool dimension. + /// Make an in-service disk set from a list of zpool IDs. Each zpool gets + /// its own fresh `PhysicalDiskUuid` and dummy identity facts; tests in + /// this module only care about the zpool dimension. fn mk_in_service( zpool_ids: impl IntoIterator, ) -> IdOrdMap { @@ -385,9 +379,8 @@ mod tests { input } - /// Run `disk::analyze` over an input and return the resulting Sitrep - /// along with the analysis report (whose log entries the close-comment - /// assertions in `closes_*` tests inspect). + /// Run `analyze` over an input and return the resulting Sitrep along + /// with the analysis report. fn run_analyze( log: &slog::Logger, input: &Input, @@ -455,7 +448,7 @@ mod tests { } } - /// Helper: collect (case, fact, DiskFact) triples for every fact on a + /// Collect (case, fact, DiskFact) triples for every fact on a /// physical-disk case in a sitrep. Optionally filtered to open cases /// only. fn disk_facts( @@ -590,9 +583,9 @@ mod tests { setup("disk_close_on_expungement"); let target = zpools[0]; set_health(&mut collection, target, ZpoolHealth::Degraded); - // Target is NOT in-service in this sitrep (just expunged). + // Target is NOT in-service in this sitrep (just expunged), so + // disk_id_for fabricates a stable PhysicalDiskUuid for it. let in_service = mk_in_service(zpools.iter().copied().skip(1)); - // Target isn't in the in-service set; fabricate a stable PhysicalDiskUuid. let target_disk_id = disk_id_for(&in_service, target); let parent_id = SitrepUuid::new_v4(); let parent = make_parent_with_disk_case( @@ -704,9 +697,9 @@ mod tests { logctx.cleanup_successful(); } - /// When the parent sitrep's fact content matches the diagnosis engine's current - /// observation, the fact carries forward with the same UUID — no - /// remove-and-readd churn. + /// When the parent sitrep's fact content matches the diagnosis engine's + /// current observation, the fact carries forward with the same UUID, + /// with no remove-and-readd churn. #[test] fn fact_uuid_stable_when_observation_unchanged() { let (logctx, mut collection, zpools) = setup("disk_fact_uuid_stable"); @@ -758,7 +751,7 @@ mod tests { /// When the parent's fact recorded a different `last_seen_health` than /// what we observe now, the diagnosis engine removes the stale fact and emits /// a fresh one (new UUID). The case stays open because the zpool is - /// still unhealthy — just with a different value. + /// still unhealthy, just with a different value. #[test] fn fact_uuid_rotates_when_observation_changes() { let (logctx, mut collection, zpools) = setup("disk_fact_uuid_rotates"); @@ -808,7 +801,7 @@ mod tests { } } // The case itself should still be the same one that was carried - // forward — only the fact rotated. + // forward; only the fact rotated. assert!(open[0].0.is_open()); logctx.cleanup_successful(); } diff --git a/nexus/src/app/background/tasks/fm_analysis.rs b/nexus/src/app/background/tasks/fm_analysis.rs index 3bf66f5432c..e355583b159 100644 --- a/nexus/src/app/background/tasks/fm_analysis.rs +++ b/nexus/src/app/background/tasks/fm_analysis.rs @@ -227,11 +227,8 @@ impl FmAnalysis { // represented as control plane disks today, so the U.2-only filter // on the underlying query matches reality. // - // This is the executed view from the DB, flipped only after sagas / - // cleaners have actually drained resources, not while a planner is - // merely proposing changes. A faulty disk a planner proposes to - // expunge is still the diagnoser's concern until the control plane - // has actually moved on. + // See `nexus_types::in_service_disk` for why FM reads the executed + // DB view rather than the target blueprint. let zpools_and_disks = self .datastore .zpool_list_all_external_batched(opctx) diff --git a/nexus/types/src/fm/case.rs b/nexus/types/src/fm/case.rs index b6b8576e68f..d96fc723aed 100644 --- a/nexus/types/src/fm/case.rs +++ b/nexus/types/src/fm/case.rs @@ -174,9 +174,6 @@ impl CaseEreport { /// /// The `payload` is a fully-typed [`FactPayload`] whose variant is owned by /// the case's diagnosis engine (see [`Metadata::de`]). -/// -/// `Eq`/`PartialEq` derive over all fields, including the typed payload. -/// This is the equality the DB round-trip test needs. #[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct Fact { pub id: FactUuid, diff --git a/nexus/types/src/fm/fact.rs b/nexus/types/src/fm/fact.rs index 714f6660c2b..490ee7446f1 100644 --- a/nexus/types/src/fm/fact.rs +++ b/nexus/types/src/fm/fact.rs @@ -72,7 +72,7 @@ pub struct ZpoolUnhealthyFactPayload { /// The physical disk this fact (and its parent case) is about. /// Every fact on a Disk case must agree on this value. pub physical_disk_id: PhysicalDiskUuid, - /// The zpool whose health was observed. Kept for provenance — the + /// The zpool whose health was observed. Kept for provenance: the /// case is keyed by `physical_disk_id`, but knowing the exact zpool /// makes the fact self-describing when read in isolation. pub zpool_id: ZpoolUuid, diff --git a/nexus/types/src/in_service_disk.rs b/nexus/types/src/in_service_disk.rs index bc7814ce2cb..b3ce6c2a597 100644 --- a/nexus/types/src/in_service_disk.rs +++ b/nexus/types/src/in_service_disk.rs @@ -2,15 +2,15 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! "Currently in-service control-plane disks" — the executed view from +//! "Currently in-service control-plane disks": the executed view from //! the `physical_disk` and `zpool` DB tables. //! //! This is distinct from the planned view in `BlueprintPhysicalDiskConfig`: //! a disk is in this set only after the control plane has actually committed //! to managing it (`physical_disk.disk_policy = 'in_service'`), not while a //! planner is merely proposing to expunge or adopt it. Consumers that need -//! the *committed* view of which disks are part of the rack — fault -//! management diagnosers in particular — should read this rather than the +//! the *committed* view of which disks are part of the rack (fault +//! management diagnosers in particular) should read this rather than the //! target blueprint. use crate::external_api::physical_disk::PhysicalDiskKind; From 6f2e4a59af71db57cbb94afda7b0f982358d785d Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 11 Jun 2026 13:49:46 -0700 Subject: [PATCH 19/30] [fm] disk diagnoser: close uninterpretable cases; deflake omdb test Hardening from a pre-review pass: - Include zpool_id in the "same observation" comparison for facts, so a zpool that is destroyed and recreated (still unhealthy) rotates the fact instead of carrying a stale zpool reference forward. - Close cases the diagnosis engine cannot interpret (foreign fact payload, facts disagreeing on the disk, no facts) and duplicate cases for the same disk, rather than carrying them forward unprocessable forever. Closing is safe for fault coverage: detection is independent of case bookkeeping, so a still-faulty disk gets a fresh well-formed case in the same analysis pass. Reasons are a typed enum (UninterpretableCase) surfaced in the close comment and warn logs. - In the omdb output test, drive the FM tasks to their steady state with explicit background task activations (analysis -> loader -> analysis -> rendezvous) instead of racing the watch-channel triggers. This makes the expected output deterministic for both fm_analysis and fm_rendezvous, and removes the sitrep_load_rx test plumbing (NexusServer trait method, accessors, wait helper) that existed only to support the old wait. --- dev-tools/omdb/tests/test_all_output.rs | 26 +- nexus/fm/src/diagnosis/physical_disk.rs | 570 +++++++++++++----- .../app/background/tasks/fm_sitrep_load.rs | 4 +- nexus/src/app/mod.rs | 9 +- nexus/src/lib.rs | 6 - nexus/test-interface/src/lib.rs | 4 - nexus/test-utils/src/nexus_test.rs | 29 - nexus/types/src/fm.rs | 4 - 8 files changed, 451 insertions(+), 201 deletions(-) diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 42bde1b627f..1a43237539c 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -11,6 +11,7 @@ use dropshot::Method; use expectorate::assert_contents; use gateway_client::ClientInfo as _; use http::StatusCode; +use nexus_test_utils::background::activate_background_task; use nexus_test_utils::wait_for_producer; use nexus_test_utils::{OXIMETER_UUID, PRODUCER_UUID}; use nexus_test_utils_macros::nexus_test; @@ -187,11 +188,26 @@ async fn test_omdb_success_cases() { .wait_for_at_least_one_inventory_collection(Duration::from_secs(60)) .await; - // Wait until `fm_analysis` has committed at least one sitrep, so that the - // omdb snapshot for FM tasks is stable. (Otherwise sitrep IDs render as - // `None` or `Some(...)` depending on whether the task's natural cadence - // had landed by the time we sample it.) - cptestctx.wait_for_at_least_one_sitrep(Duration::from_secs(60)).await; + // Drive the FM task pipeline to its steady state, so that the omdb + // snapshot of each task's last completed activation is deterministic + // rather than racing the tasks' watch-channel triggers: + // + // 1. `fm_analysis` commits the first sitrep (unless its natural cadence + // already has). This run reports "committed new sitrep". + // 2. `fm_sitrep_loader` loads that sitrep and publishes it on the sitrep + // watch channel. + // 3. `fm_analysis` re-runs with the loaded sitrep as its parent and + // reports "no changes" -- the steady-state output asserted below. + // (No later activation ever commits another sitrep here: this + // environment has no in-service control plane disks and no + // consumable ereports, so every post-load analysis is a no-op.) + // 4. `fm_rendezvous` runs against the loaded sitrep, so its status shows + // the executed operations rather than "no FM situation report loaded". + let lockstep_client = &cptestctx.lockstep_client; + activate_background_task(lockstep_client, "fm_analysis").await; + activate_background_task(lockstep_client, "fm_sitrep_loader").await; + activate_background_task(lockstep_client, "fm_analysis").await; + activate_background_task(lockstep_client, "fm_rendezvous").await; let mut output = String::new(); diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index 794c0cdca1d..401251c60e1 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -6,6 +6,7 @@ use crate::SitrepBuilder; use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; +use nexus_types::fm; use nexus_types::fm::DiagnosisEngineKind; use nexus_types::fm::{DiskFact, ZpoolUnhealthyFactPayload}; use nexus_types::inventory::ZpoolHealth; @@ -57,6 +58,67 @@ struct ParentCaseSummary { unhealthy_facts: IdOrdMap, } +/// Why a parent-forwarded Disk case could not be interpreted. +/// +/// Uninterpretable cases are closed by [`analyze`]: an open case this engine +/// cannot process would otherwise be carried forward into every future +/// sitrep with no path to closure. +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +enum UninterpretableCase { + #[error( + "fact {fact_id} does not belong to the physical-disk diagnosis engine" + )] + ForeignFactPayload { fact_id: FactUuid }, + #[error( + "facts reference different physical disks ({expected} and {found}, \ + 1 expected)" + )] + DisagreeingDisks { expected: PhysicalDiskUuid, found: PhysicalDiskUuid }, + #[error("case has no facts, so the disk it concerns cannot be determined")] + NoFacts, +} + +/// Summarize one parent-forwarded Disk case, or explain why it cannot be +/// interpreted. +fn summarize_case( + case: &fm::Case, +) -> Result { + let mut unhealthy_facts: IdOrdMap = IdOrdMap::new(); + let mut case_disk_id: Option = None; + for fact in case.facts.iter() { + // Every fact on a physical-disk case must carry a physical-disk + // payload; a foreign payload is a data-model violation. + let Some(disk_fact) = fact.payload.as_physical_disk() else { + return Err(UninterpretableCase::ForeignFactPayload { + fact_id: fact.id, + }); + }; + match disk_fact { + DiskFact::ZpoolUnhealthy(payload) => { + let payload = *payload; + let disk_id = + *case_disk_id.get_or_insert(payload.physical_disk_id); + if disk_id != payload.physical_disk_id { + return Err(UninterpretableCase::DisagreeingDisks { + expected: disk_id, + found: payload.physical_disk_id, + }); + } + unhealthy_facts + .insert_unique(ZpoolUnhealthyFact { + fact_id: fact.id, + payload, + }) + .expect("fact ids are unique within a case"); + } + } + } + let Some(physical_disk_id) = case_disk_id else { + return Err(UninterpretableCase::NoFacts); + }; + Ok(ParentCaseSummary { physical_disk_id, unhealthy_facts }) +} + pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { let input = builder.input(); let inv_collection_id = input.inventory().id; @@ -88,76 +150,48 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { .collect(); // Index the Disk cases copied forward from the parent sitrep. Every case - // is about one physical disk; we derive the disk from its facts. Skip - // (with a warning) any case we can't safely interpret. - let parent_cases: BTreeMap = input + // is about one physical disk; we derive the disk from its facts. + let mut parent_cases = BTreeMap::::new(); + let mut uninterpretable = Vec::<(CaseUuid, UninterpretableCase)>::new(); + for case in input .open_cases() .iter() .filter(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) - .filter_map(|c| { - let case_id = c.id; - let mut unhealthy_facts: IdOrdMap = - IdOrdMap::new(); - let mut case_disk_id: Option = None; - for fact in c.facts.iter() { - // Every fact on a physical-disk case must carry a - // physical-disk payload. A foreign payload is a data-model - // violation; skip the whole case rather than acting on - // inconsistent state. - let Some(disk_fact) = fact.payload.as_physical_disk() else { - slog::warn!( - &builder.log, - "skipping Disk case: fact payload does not belong to \ - the physical-disk diagnosis engine"; - "case_id" => %case_id, - "fact_id" => %fact.id, - ); - return None; - }; - match disk_fact { - DiskFact::ZpoolUnhealthy(payload) => { - let payload = *payload; - let disk_id = *case_disk_id - .get_or_insert(payload.physical_disk_id); - if disk_id != payload.physical_disk_id { - slog::warn!( - &builder.log, - "skipping Disk case: facts reference \ - different physical disks (1 expected)"; - "case_id" => %case_id, - "expected_physical_disk_id" => %disk_id, - "fact_physical_disk_id" => - %payload.physical_disk_id, - ); - return None; - } - unhealthy_facts - .insert_unique(ZpoolUnhealthyFact { - fact_id: fact.id, - payload, - }) - .expect("fact ids are unique within a case"); - } - } + { + match summarize_case(case) { + Ok(summary) => { + parent_cases.insert(case.id, summary); } - let Some(physical_disk_id) = case_disk_id else { + Err(reason) => { slog::warn!( &builder.log, - "skipping Disk case with no facts; cannot derive disk id"; - "case_id" => %case_id, + "closing uninterpretable Disk case"; + "case_id" => %case.id, + "reason" => %reason, ); - return None; - }; - Some(( - case_id, - ParentCaseSummary { physical_disk_id, unhealthy_facts }, - )) - }) - .collect(); + uninterpretable.push((case.id, reason)); + } + } + } + + // Close the cases we couldn't interpret, so they don't ride along as + // open-but-unprocessable in every future sitrep. This is safe with + // respect to fault coverage: detection below is independent of case + // bookkeeping, so if a closed case concerned a disk that is genuinely + // unhealthy and in service, a fresh, well-formed case is opened in this + // same pass. + for (case_id, reason) in uninterpretable { + builder + .cases + .case_mut(&case_id) + .expect("case came from builder.input()'s open cases") + .close(format!("cannot interpret case: {reason}")); + } // Inverse index: which parent case is about which disk. Cases are // per-disk, so a disk with two parent cases is pathological; keep the - // lowest case ID. + // lowest case ID and close the rest as duplicates. (A half-maintained + // duplicate would otherwise decay into an uninterpretable empty case.) let mut case_by_disk: BTreeMap< PhysicalDiskUuid, (CaseUuid, &ParentCaseSummary), @@ -167,18 +201,39 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { .entry(summary.physical_disk_id) .or_insert((*case_id, summary)); } + for (case_id, summary) in &parent_cases { + let (kept_case_id, _) = case_by_disk[&summary.physical_disk_id]; + if *case_id != kept_case_id { + slog::warn!( + &builder.log, + "closing duplicate Disk case"; + "case_id" => %case_id, + "kept_case_id" => %kept_case_id, + "physical_disk_id" => %summary.physical_disk_id, + ); + builder + .cases + .case_mut(case_id) + .expect("case came from builder.input()'s open cases") + .close(format!( + "duplicate of case {kept_case_id} for disk {}", + summary.physical_disk_id, + )); + } + } - // For each parent case, decide what to do based on its disk's current - // state: + // For each disk's surviving parent case, decide what to do based on the + // disk's current state: // - disk no longer in service → close the case (expungement) // - disk's zpool back to Online → close the case (recovery) - // - disk still unhealthy → drop any facts whose recorded health no - // longer matches; the matching loop below will re-add a fresh fact + // - disk still unhealthy → drop any facts whose recorded observation + // (zpool + health) no longer matches; the matching loop below will + // re-add a fresh fact // - disk in service but absent from inventory → leave alone (absence // is NOT a recovery signal: sled could be powered off, or // inventory could be lossy) - for (case_id, summary) in &parent_cases { - let mut case_mut = builder.cases.case_mut(case_id).expect( + for &(case_id, summary) in case_by_disk.values() { + let mut case_mut = builder.cases.case_mut(&case_id).expect( "builder.cases is seeded from the open cases of builder.input(), \ which is where this case_id came from", ); @@ -198,7 +253,9 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { continue; }; for fact_ref in summary.unhealthy_facts.iter() { - if fact_ref.payload.last_seen_health != current_health { + if fact_ref.payload.last_seen_health != current_health + || fact_ref.payload.zpool_id != snap.zpool_id + { case_mut.remove_fact(fact_ref.fact_id); } } @@ -208,7 +265,8 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { // For each currently-faulty in-service disk: ensure a case exists // (reusing the parent-forwarded one for this disk if any) and add a - // fresh fact if one with this exact health isn't already present. + // fresh fact if one with this exact observation (zpool + health) isn't + // already present. for disk in in_service_health.iter() { let Some(current_health) = disk.zpool_health else { continue; @@ -222,10 +280,10 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { let case_id_for_fact = match parent_for_disk { // Parent case already has an accurate fact; fully covered. Some((_, summary)) - if summary - .unhealthy_facts - .iter() - .any(|f| f.payload.last_seen_health == current_health) => + if summary.unhealthy_facts.iter().any(|f| { + f.payload.last_seen_health == current_health + && f.payload.zpool_id == disk.zpool_id + }) => { continue; } @@ -394,45 +452,63 @@ mod tests { builder.build(OmicronZoneUuid::new_v4(), Utc::now()) } - fn make_parent_with_disk_case( + /// Make a `ZpoolUnhealthy` (Degraded) fact for the given disk and zpool. + fn make_degraded_fact( parent_sitrep_id: SitrepUuid, inv_collection_id: omicron_uuid_kinds::CollectionUuid, physical_disk_id: PhysicalDiskUuid, zpool_id: ZpoolUuid, - ) -> Sitrep { - let mut cases = iddqd::IdOrdMap::new(); - let case_id = omicron_uuid_kinds::CaseUuid::new_v4(); - let mut facts = iddqd::IdOrdMap::new(); - facts - .insert_unique(fm::case::Fact { - id: omicron_uuid_kinds::FactUuid::new_v4(), - created_sitrep_id: parent_sitrep_id, - payload: DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { - physical_disk_id, - zpool_id, - last_seen_health: ZpoolHealth::Degraded, - observed_in_inv: inv_collection_id, - time_observed: Utc::now(), - }) - .into(), - comment: format!("zpool {zpool_id} degraded"), - }) - .unwrap(); - cases - .insert_unique(fm::Case { - id: case_id, - metadata: fm::case::Metadata { - created_sitrep_id: parent_sitrep_id, - closed_sitrep_id: None, - de: DiagnosisEngineKind::PhysicalDisk, - comment: format!("zpool {zpool_id} degraded"), - }, - ereports: Default::default(), - alerts_requested: Default::default(), - support_bundles_requested: Default::default(), - facts, + ) -> fm::case::Fact { + fm::case::Fact { + id: omicron_uuid_kinds::FactUuid::new_v4(), + created_sitrep_id: parent_sitrep_id, + payload: DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + physical_disk_id, + zpool_id, + last_seen_health: ZpoolHealth::Degraded, + observed_in_inv: inv_collection_id, + time_observed: Utc::now(), }) - .unwrap(); + .into(), + comment: format!("zpool {zpool_id} degraded"), + } + } + + /// Make an open `PhysicalDisk` case carrying the given facts. + fn make_disk_case( + case_id: omicron_uuid_kinds::CaseUuid, + parent_sitrep_id: SitrepUuid, + facts: impl IntoIterator, + ) -> fm::Case { + let mut fact_map = iddqd::IdOrdMap::new(); + for fact in facts { + fact_map.insert_unique(fact).unwrap(); + } + fm::Case { + id: case_id, + metadata: fm::case::Metadata { + created_sitrep_id: parent_sitrep_id, + closed_sitrep_id: None, + de: DiagnosisEngineKind::PhysicalDisk, + comment: "a disk case".to_string(), + }, + ereports: Default::default(), + alerts_requested: Default::default(), + support_bundles_requested: Default::default(), + facts: fact_map, + } + } + + /// Make a parent sitrep containing the given cases. + fn make_parent_sitrep( + parent_sitrep_id: SitrepUuid, + inv_collection_id: omicron_uuid_kinds::CollectionUuid, + cases: impl IntoIterator, + ) -> Sitrep { + let mut case_map = iddqd::IdOrdMap::new(); + for case in cases { + case_map.insert_unique(case).unwrap(); + } Sitrep { metadata: fm::SitrepMetadata { id: parent_sitrep_id, @@ -443,11 +519,31 @@ mod tests { next_inv_min_time_started: Utc::now(), comment: String::new(), }, - cases, + cases: case_map, ereports_by_id: Default::default(), } } + fn make_parent_with_disk_case( + parent_sitrep_id: SitrepUuid, + inv_collection_id: omicron_uuid_kinds::CollectionUuid, + physical_disk_id: PhysicalDiskUuid, + zpool_id: ZpoolUuid, + ) -> Sitrep { + let fact = make_degraded_fact( + parent_sitrep_id, + inv_collection_id, + physical_disk_id, + zpool_id, + ); + let case = make_disk_case( + omicron_uuid_kinds::CaseUuid::new_v4(), + parent_sitrep_id, + [fact], + ); + make_parent_sitrep(parent_sitrep_id, inv_collection_id, [case]) + } + /// Collect (case, fact, DiskFact) triples for every fact on a /// physical-disk case in a sitrep. Optionally filtered to open cases /// only. @@ -643,56 +739,180 @@ mod tests { logctx.cleanup_successful(); } - /// A parent Disk case with zero facts has no derivable disk ID, so - /// the diagnosis engine leaves it alone (carried forward unchanged). + /// A parent Disk case with zero facts has no derivable disk ID; the + /// engine closes it as uninterpretable rather than carrying an + /// unprocessable open case forward into every future sitrep. #[test] - fn empty_case_is_left_open() { - let (logctx, collection, _zpools) = setup("disk_empty_case_left_open"); + fn empty_case_is_closed() { + let (logctx, collection, _zpools) = setup("disk_empty_case_closed"); let in_service = mk_in_service(std::iter::empty()); let parent_sitrep_id = SitrepUuid::new_v4(); let empty_case_id = omicron_uuid_kinds::CaseUuid::new_v4(); - let mut parent_cases = iddqd::IdOrdMap::new(); - parent_cases - .insert_unique(fm::Case { - id: empty_case_id, - metadata: fm::case::Metadata { - created_sitrep_id: parent_sitrep_id, - closed_sitrep_id: None, - de: DiagnosisEngineKind::PhysicalDisk, - comment: "an open case with no facts".to_string(), - }, - ereports: Default::default(), - alerts_requested: Default::default(), - support_bundles_requested: Default::default(), - facts: Default::default(), - }) - .unwrap(); - let parent = Sitrep { - metadata: fm::SitrepMetadata { - id: parent_sitrep_id, - inv_collection_id: collection.id, - creator_id: OmicronZoneUuid::new_v4(), - parent_sitrep_id: None, - time_created: Utc::now(), - next_inv_min_time_started: Utc::now(), - comment: String::new(), - }, - cases: parent_cases, - ereports_by_id: Default::default(), - }; + let empty_case = make_disk_case(empty_case_id, parent_sitrep_id, []); + let parent = + make_parent_sitrep(parent_sitrep_id, collection.id, [empty_case]); let input = build_input(collection, Some(parent), in_service); - let (sitrep, _report) = run_analyze(&logctx.log, &input); + let (sitrep, report) = run_analyze(&logctx.log, &input); let case = sitrep .cases .iter() .find(|c| c.id == empty_case_id) .expect("empty case should still be in the output sitrep"); + assert!(!case.is_open(), "uninterpretable empty case should be closed",); + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("cannot interpret case"), + "close comment should say the case was uninterpretable, got: \ + {report_str}", + ); + logctx.cleanup_successful(); + } + + /// Two open parent cases about the same disk: the engine keeps and + /// maintains the one with the lowest case ID, and closes the other as a + /// duplicate. (A half-maintained duplicate would otherwise decay into + /// an uninterpretable empty case.) + #[test] + fn duplicate_case_is_closed() { + let (logctx, mut collection, zpools) = setup("disk_duplicate_closed"); + let target = zpools[0]; + set_health(&mut collection, target, ZpoolHealth::Degraded); + let in_service = mk_in_service(zpools.iter().copied()); + let target_disk_id = disk_id_for(&in_service, target); + let parent_id = SitrepUuid::new_v4(); + + let id_a = omicron_uuid_kinds::CaseUuid::new_v4(); + let id_b = omicron_uuid_kinds::CaseUuid::new_v4(); + let (kept_id, dup_id) = + if id_a < id_b { (id_a, id_b) } else { (id_b, id_a) }; + let kept_fact = make_degraded_fact( + parent_id, + collection.id, + target_disk_id, + target, + ); + let kept_fact_id = kept_fact.id; + let dup_fact = make_degraded_fact( + parent_id, + collection.id, + target_disk_id, + target, + ); + let parent = make_parent_sitrep( + parent_id, + collection.id, + [ + make_disk_case(kept_id, parent_id, [kept_fact]), + make_disk_case(dup_id, parent_id, [dup_fact]), + ], + ); + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, report) = run_analyze(&logctx.log, &input); + + let kept = sitrep.cases.get(&kept_id).expect("kept case present"); + let dup = sitrep.cases.get(&dup_id).expect("duplicate case present"); + assert!(kept.is_open(), "lowest-ID case should remain open"); + assert!(!dup.is_open(), "duplicate case should be closed"); + // The kept case's fact is still accurate (Degraded), so it should + // carry forward unchanged. + assert!( + kept.facts.contains_key(&kept_fact_id), + "kept case should retain its fact", + ); + // No third case should have been opened for this disk. + let disk_case_count = sitrep + .cases + .iter() + .filter(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) + .count(); + assert_eq!(disk_case_count, 2); + let report_str = format!("{}", report.display_multiline(0)); assert!( - case.is_open(), - "empty case should be left open (no disk to verify)", + report_str.contains("duplicate of case"), + "close comment should call out the duplicate, got: {report_str}", + ); + logctx.cleanup_successful(); + } + + /// A case whose facts disagree about which disk they concern is closed + /// as uninterpretable. Because fault detection is independent of case + /// bookkeeping, the still-unhealthy disk gets a fresh, well-formed case + /// in the same pass. + #[test] + fn uninterpretable_case_is_replaced() { + let (logctx, mut collection, zpools) = + setup("disk_uninterpretable_replaced"); + let target = zpools[0]; + set_health(&mut collection, target, ZpoolHealth::Degraded); + let in_service = mk_in_service(zpools.iter().copied()); + let target_disk_id = disk_id_for(&in_service, target); + let parent_id = SitrepUuid::new_v4(); + + // One fact about the target disk, one about an unrelated disk: the + // case is self-contradictory. + let corrupt_case_id = omicron_uuid_kinds::CaseUuid::new_v4(); + let corrupt_case = make_disk_case( + corrupt_case_id, + parent_id, + [ + make_degraded_fact( + parent_id, + collection.id, + target_disk_id, + target, + ), + make_degraded_fact( + parent_id, + collection.id, + PhysicalDiskUuid::new_v4(), + ZpoolUuid::new_v4(), + ), + ], + ); + let parent = + make_parent_sitrep(parent_id, collection.id, [corrupt_case]); + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, report) = run_analyze(&logctx.log, &input); + + let corrupt = sitrep + .cases + .get(&corrupt_case_id) + .expect("corrupt case should still be in the output sitrep"); + assert!( + !corrupt.is_open(), + "case with disagreeing facts should be closed", + ); + // The target disk is still Degraded and in service, so a fresh case + // should have been opened for it. + let open = disk_facts(&sitrep, true); + assert_eq!( + open.len(), + 1, + "expected exactly one open Disk fact (on the replacement case)", + ); + assert_ne!(open[0].0.id, corrupt_case_id); + match &open[0].2 { + DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + physical_disk_id, + zpool_id, + last_seen_health, + .. + }) => { + assert_eq!(*physical_disk_id, target_disk_id); + assert_eq!(*zpool_id, target); + assert_eq!(*last_seen_health, ZpoolHealth::Degraded); + } + } + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("cannot interpret case"), + "close comment should say the case was uninterpretable, got: \ + {report_str}", ); logctx.cleanup_successful(); } @@ -805,4 +1025,68 @@ mod tests { assert!(open[0].0.is_open()); logctx.cleanup_successful(); } + + /// When the parent's fact references a different zpool than the one + /// currently backing the disk (the zpool was destroyed and recreated), + /// the fact rotates even though the observed health is unchanged. + /// Today's adoption flow mints a new physical disk UUID alongside a new + /// zpool, so this can't happen via normal control plane operation; this + /// pins the engine's behavior if that ever changes. + #[test] + fn fact_uuid_rotates_when_zpool_replaced() { + let (logctx, mut collection, zpools) = + setup("disk_fact_uuid_rotates_zpool_replaced"); + let target = zpools[0]; + set_health(&mut collection, target, ZpoolHealth::Degraded); + let in_service = mk_in_service(zpools.iter().copied()); + let target_disk_id = disk_id_for(&in_service, target); + // The parent's fact records the same disk and the same health + // (Degraded), but a zpool that no longer exists. + let old_zpool_id = ZpoolUuid::new_v4(); + let parent_id = SitrepUuid::new_v4(); + let parent = make_parent_with_disk_case( + parent_id, + collection.id, + target_disk_id, + old_zpool_id, + ); + let parent_fact_id = parent + .cases + .iter() + .find(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) + .expect("parent should have one Disk case") + .facts + .iter() + .next() + .expect("parent case should have one fact") + .id; + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, _report) = run_analyze(&logctx.log, &input); + let open = disk_facts(&sitrep, true); + assert_eq!( + open.len(), + 1, + "expected exactly one open Disk fact (the refreshed one)", + ); + assert_ne!( + open[0].1.id, parent_fact_id, + "fact UUID should rotate because the disk's zpool changed", + ); + match &open[0].2 { + DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + zpool_id, + last_seen_health, + .. + }) => { + assert_eq!( + *zpool_id, target, + "refreshed fact should reference the current zpool", + ); + assert_eq!(*last_seen_health, ZpoolHealth::Degraded); + } + } + assert!(open[0].0.is_open()); + logctx.cleanup_successful(); + } } diff --git a/nexus/src/app/background/tasks/fm_sitrep_load.rs b/nexus/src/app/background/tasks/fm_sitrep_load.rs index b2dca967abd..a6afdcb93ee 100644 --- a/nexus/src/app/background/tasks/fm_sitrep_load.rs +++ b/nexus/src/app/background/tasks/fm_sitrep_load.rs @@ -10,6 +10,7 @@ use chrono::Utc; use futures::future::BoxFuture; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use nexus_types::fm::Sitrep; use nexus_types::fm::SitrepVersion; use nexus_types::internal_api::background::SitrepLoadStatus as Status; use serde_json::json; @@ -22,7 +23,7 @@ pub struct SitrepLoader { tx: watch::Sender>, } -pub use nexus_types::fm::CurrentSitrep; +pub type CurrentSitrep = Arc<(SitrepVersion, Sitrep)>; impl BackgroundTask for SitrepLoader { fn activate<'a>( @@ -191,7 +192,6 @@ mod test { use super::*; use crate::app::background::BackgroundTask; use nexus_db_queries::db::pub_test_utils::TestDatabase; - use nexus_types::fm::Sitrep; use nexus_types::fm::SitrepMetadata; use omicron_test_utils::dev; use omicron_uuid_kinds::CollectionUuid; diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 815409efa16..9863cbf7ffd 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -307,6 +307,7 @@ pub struct Nexus { repo_depot_resolver: Box, /// Watch channel containing the currently-loaded fault management sitrep. + #[allow(dead_code)] sitrep_load_rx: watch::Receiver>, /// handle to pull update status data @@ -1129,14 +1130,6 @@ impl Nexus { &self.db_datastore } - pub(crate) fn sitrep_load_rx( - &self, - ) -> watch::Receiver> { - let mut rx = self.sitrep_load_rx.clone(); - rx.mark_unchanged(); - rx - } - pub(crate) fn samael_max_issue_delay(&self) -> Option { let mid = self.samael_max_issue_delay.lock().unwrap(); *mid diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index c4308ed4f66..8293bd9b9fc 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -533,12 +533,6 @@ impl nexus_test_interface::NexusServer for Server { self.apictx.context.nexus.inventory_load_rx() } - fn sitrep_load_rx( - &self, - ) -> watch::Receiver> { - self.apictx.context.nexus.sitrep_load_rx() - } - fn get_http_server_external_address(&self) -> SocketAddr { self.apictx.context.nexus.get_external_server_address().unwrap() } diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs index 53d254b44ca..35f726740e1 100644 --- a/nexus/test-interface/src/lib.rs +++ b/nexus/test-interface/src/lib.rs @@ -88,10 +88,6 @@ pub trait NexusServer: Send + Sync + 'static { fn inventory_load_rx(&self) -> watch::Receiver>>; - fn sitrep_load_rx( - &self, - ) -> watch::Receiver>; - fn get_http_server_external_address(&self) -> SocketAddr; fn get_http_server_techport_address(&self) -> SocketAddr; fn get_http_server_internal_address(&self) -> SocketAddr; diff --git a/nexus/test-utils/src/nexus_test.rs b/nexus/test-utils/src/nexus_test.rs index f1cd46402d9..496b66b9747 100644 --- a/nexus/test-utils/src/nexus_test.rs +++ b/nexus/test-utils/src/nexus_test.rs @@ -198,35 +198,6 @@ impl ControlPlaneTestContext { } } - /// Wait until at least one fault management sitrep has been committed and - /// loaded. - /// - /// # Panics - /// - /// Panics if no sitrep is loaded within `timeout`. - pub async fn wait_for_at_least_one_sitrep(&self, timeout: Duration) { - let mut sitrep_rx = self.server.sitrep_load_rx(); - - match wait_for_watch_channel_condition( - &mut sitrep_rx, - |sitrep| sitrep.is_some(), - timeout, - ) - .await - { - Ok(()) => (), - Err(poll::WatchChannelError::TimedOut(elapsed)) => { - panic!("no sitrep found within {elapsed:?}"); - } - Err(poll::WatchChannelError::SenderDropped) => { - panic!( - "sitrep watch channel sender dropped before a \ - sitrep was available" - ); - } - } - } - pub fn internal_client(&self) -> nexus_client::Client { nexus_client::Client::new( &format!("http://{}", self.internal_client.bind_address), diff --git a/nexus/types/src/fm.rs b/nexus/types/src/fm.rs index 1c4dba06532..f80d5fd9eff 100644 --- a/nexus/types/src/fm.rs +++ b/nexus/types/src/fm.rs @@ -204,10 +204,6 @@ pub struct SitrepVersion { pub time_made_current: DateTime, } -/// The current sitrep paired with its [`SitrepVersion`] metadata, wrapped in an -/// `Arc` so it can be shared cheaply (e.g., across `watch` channels). -pub type CurrentSitrep = Arc<(SitrepVersion, Sitrep)>; - #[derive( Copy, Clone, From ee63736055d959126febf3a07ee46484382c42fb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 11 Jun 2026 15:56:56 -0700 Subject: [PATCH 20/30] [fm] saga diagnoser: close uninterpretable and duplicate cases Apply the lessons from the disk diagnoser review (the same pattern landed there in 6f2e4a59): - Close cases the engine cannot interpret (foreign fact payload, facts disagreeing on the saga, no facts) instead of skipping them, which left them open and unprocessable in every future sitrep with no path to closure. Reasons are a typed UninterpretableCase enum surfaced in the close comment and warn logs. Closing is safe for fault coverage: detection iterates observed sagas independently of case bookkeeping, so a saga that still needs attention gets a fresh well-formed case in the same pass. - Close duplicate cases for the same saga as superseded, keeping the lowest case ID. Previously the last-indexed case silently won the saga_id index while both cases had stale facts removed, so the loser decayed into an empty open case that could never close. - New tests: empty case closed, duplicate closed, corrupt case replaced by a fresh one in the same pass, and foreign-payload cases closed in both engines (newly testable, since two FactPayload variants now exist). --- nexus/fm/src/diagnosis/physical_disk.rs | 48 ++ nexus/fm/src/diagnosis/saga.rs | 575 +++++++++++++++++++----- 2 files changed, 501 insertions(+), 122 deletions(-) diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index 937ef1effe7..f2ac39c1636 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -1093,4 +1093,52 @@ mod tests { assert!(open[0].0.is_open()); logctx.cleanup_successful(); } + + /// A Disk case carrying a saga fact payload is a data-model violation; + /// the engine closes it as uninterpretable. (This exercises the + /// `ForeignFactPayload` path, which only became constructible once a + /// second diagnosis engine's payload variant existed.) + #[test] + fn foreign_payload_case_is_closed() { + let (logctx, collection, _zpools) = + setup("disk_foreign_payload_closed"); + let inv_id = collection.id; + let in_service = mk_in_service(std::iter::empty()); + let parent_id = SitrepUuid::new_v4(); + let case_id = omicron_uuid_kinds::CaseUuid::new_v4(); + let foreign_fact = fm::case::Fact { + id: omicron_uuid_kinds::FactUuid::new_v4(), + created_sitrep_id: parent_id, + payload: fm::FactPayload::Saga(fm::SagaFact::Abandoned( + fm::SagaAbandonedFactPayload { + saga_id: steno::SagaId(uuid::Uuid::from_u128(1)), + }, + )), + comment: "a fact belonging to the saga engine".to_string(), + }; + let parent = make_parent_sitrep( + parent_id, + inv_id, + [make_disk_case(case_id, parent_id, [foreign_fact])], + ); + + let input = build_input(collection, Some(parent), in_service); + let (sitrep, report) = run_analyze(&logctx.log, &input); + + let case = sitrep + .cases + .get(&case_id) + .expect("case should still be in the output sitrep"); + assert!( + !case.is_open(), + "case with a foreign fact payload should be closed", + ); + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("cannot interpret case"), + "close comment should say the case was uninterpretable, got: \ + {report_str}", + ); + logctx.cleanup_successful(); + } } diff --git a/nexus/fm/src/diagnosis/saga.rs b/nexus/fm/src/diagnosis/saga.rs index 1eb19882414..17fb16b35b0 100644 --- a/nexus/fm/src/diagnosis/saga.rs +++ b/nexus/fm/src/diagnosis/saga.rs @@ -25,6 +25,7 @@ use crate::SitrepBuilder; use chrono::{DateTime, TimeDelta, Utc}; +use nexus_types::fm; use nexus_types::fm::DiagnosisEngineKind; use nexus_types::fm::{ SagaAbandonedFactPayload, SagaFact, SagaNotProgressingFactPayload, @@ -58,9 +59,90 @@ struct ParentSagaCase { duplicate_facts: Vec, } +/// Why a parent-forwarded Saga case could not be interpreted. +/// +/// Uninterpretable cases are closed by [`analyze`]: an open case this engine +/// cannot process would otherwise be carried forward into every future +/// sitrep with no path to closure. +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +enum UninterpretableCase { + #[error("fact {fact_id} does not belong to the saga diagnosis engine")] + ForeignFactPayload { fact_id: FactUuid }, + #[error( + "facts reference different sagas ({expected} and {found}, 1 expected)" + )] + DisagreeingSagas { expected: steno::SagaId, found: steno::SagaId }, + #[error("case has no facts, so the saga it concerns cannot be determined")] + NoFacts, +} + +/// Summarize one parent-forwarded Saga case, or explain why it cannot be +/// interpreted. +fn summarize_case( + case: &fm::Case, +) -> Result { + let mut saga_id: Option = None; + let mut not_progressing: Option<(FactUuid, SagaNotProgressingFactPayload)> = + None; + let mut owner_not_current: Option<( + FactUuid, + SagaOwnerNotCurrentFactPayload, + )> = None; + let mut abandoned: Option<(FactUuid, SagaAbandonedFactPayload)> = None; + let mut duplicate_facts = Vec::new(); + // `case.facts` iterates in fact UUID order, so the kept fact for + // each kind is deterministically the one with the lowest UUID. + for fact in case.facts.iter() { + let Some(saga_fact) = fact.payload.as_saga() else { + return Err(UninterpretableCase::ForeignFactPayload { + fact_id: fact.id, + }); + }; + let this_saga = saga_fact.saga_id(); + let expected = *saga_id.get_or_insert(this_saga); + if expected != this_saga { + return Err(UninterpretableCase::DisagreeingSagas { + expected, + found: this_saga, + }); + } + match saga_fact { + SagaFact::NotProgressing(p) => { + if not_progressing.is_none() { + not_progressing = Some((fact.id, p.clone())); + } else { + duplicate_facts.push(fact.id); + } + } + SagaFact::OwnerNotCurrentGeneration(p) => { + if owner_not_current.is_none() { + owner_not_current = Some((fact.id, p.clone())); + } else { + duplicate_facts.push(fact.id); + } + } + SagaFact::Abandoned(p) => { + if abandoned.is_none() { + abandoned = Some((fact.id, p.clone())); + } else { + duplicate_facts.push(fact.id); + } + } + } + } + let Some(saga_id) = saga_id else { + return Err(UninterpretableCase::NoFacts); + }; + Ok(ParentSagaCase { + saga_id, + not_progressing, + owner_not_current, + abandoned, + duplicate_facts, + }) +} + pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { - // The input borrow has lifetime 'a, not a borrow of `builder`, so we may - // hold it while mutating the builder below. let input = builder.input(); // Reference "now" for staleness. We use the inventory collection's // completion time (rather than `Utc::now()`) so analysis is deterministic @@ -70,109 +152,89 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { let observed = input.observed_sagas(); // Index parent-forwarded Saga cases by case ID, and maintain a saga_id -> - // case_id index for the second pass. Every case is about one saga, derived - // from its facts; skip (with a warning) any case we can't interpret. + // case_id index for the second pass. Every case is about one saga, + // derived from its facts. let mut parent_cases: BTreeMap = BTreeMap::new(); let mut case_for_saga: BTreeMap = BTreeMap::new(); - 'cases: for case in input + let mut uninterpretable = Vec::<(CaseUuid, UninterpretableCase)>::new(); + for case in input .open_cases() .iter() .filter(|c| c.metadata.de == DiagnosisEngineKind::Saga) { - let case_id = case.id; - let mut saga_id: Option = None; - let mut not_progressing: Option<( - FactUuid, - SagaNotProgressingFactPayload, - )> = None; - let mut owner_not_current: Option<( - FactUuid, - SagaOwnerNotCurrentFactPayload, - )> = None; - let mut abandoned: Option<(FactUuid, SagaAbandonedFactPayload)> = None; - let mut duplicate_facts = Vec::new(); - // `case.facts` iterates in fact UUID order, so the kept fact for - // each kind is deterministically the one with the lowest UUID. - for fact in case.facts.iter() { - let Some(saga_fact) = fact.payload.as_saga() else { + let summary = match summarize_case(case) { + Ok(summary) => summary, + Err(reason) => { slog::warn!( &builder.log, - "skipping Saga case: fact payload does not belong to the \ - saga diagnosis engine"; - "case_id" => %case_id, - "fact_id" => %fact.id, + "closing uninterpretable Saga case"; + "case_id" => %case.id, + "reason" => %reason, ); - continue 'cases; - }; - let this_saga = saga_fact.saga_id(); - if *saga_id.get_or_insert(this_saga) != this_saga { - slog::warn!( - &builder.log, - "skipping Saga case: facts reference different sagas"; - "case_id" => %case_id, - ); - continue 'cases; + uninterpretable.push((case.id, reason)); + continue; } - match saga_fact { - SagaFact::NotProgressing(p) => { - if not_progressing.is_none() { - not_progressing = Some((fact.id, p.clone())); - } else { - duplicate_facts.push(fact.id); - } - } - SagaFact::OwnerNotCurrentGeneration(p) => { - if owner_not_current.is_none() { - owner_not_current = Some((fact.id, p.clone())); - } else { - duplicate_facts.push(fact.id); - } - } - SagaFact::Abandoned(p) => { - if abandoned.is_none() { - abandoned = Some((fact.id, p.clone())); - } else { - duplicate_facts.push(fact.id); - } - } - } - } - if !duplicate_facts.is_empty() { + }; + if !summary.duplicate_facts.is_empty() { slog::warn!( &builder.log, "Saga case has more than one fact of the same kind; \ the duplicates will be removed"; - "case_id" => %case_id, - "duplicate_fact_ids" => ?duplicate_facts, + "case_id" => %case.id, + "duplicate_fact_ids" => ?summary.duplicate_facts, ); } - let Some(saga_id) = saga_id else { + // Cases iterate in UUID order, so the kept case for a saga is + // deterministically the one with the lowest case UUID. + case_for_saga.entry(summary.saga_id).or_insert(case.id); + parent_cases.insert(case.id, summary); + } + + // Close the cases we couldn't interpret, so they don't ride along as + // open-but-unprocessable in every future sitrep. This is safe with + // respect to fault coverage: detection below is independent of case + // bookkeeping, so if a closed case concerned a saga that genuinely + // needs attention, a fresh, well-formed case is opened in this same + // pass. + for (case_id, reason) in uninterpretable { + builder + .cases + .case_mut(&case_id) + .expect("case came from builder.input()'s open cases") + .close(format!("cannot interpret case: {reason}")); + } + + // Close duplicate cases: a saga with two parent cases is pathological; + // the lowest case ID is kept and maintained below. (A half-maintained + // duplicate would otherwise decay into an uninterpretable empty case.) + for (case_id, summary) in &parent_cases { + let kept_case_id = case_for_saga[&summary.saga_id]; + if *case_id != kept_case_id { slog::warn!( &builder.log, - "skipping Saga case with no facts; cannot derive saga id"; + "closing duplicate Saga case"; "case_id" => %case_id, + "kept_case_id" => %kept_case_id, + "saga_id" => %summary.saga_id, ); - continue 'cases; - }; - parent_cases.insert( - case_id, - ParentSagaCase { - saga_id, - not_progressing, - owner_not_current, - abandoned, - duplicate_facts, - }, - ); - case_for_saga.insert(saga_id, case_id); + builder + .cases + .case_mut(case_id) + .expect("case came from builder.input()'s open cases") + .close(format!( + "duplicate of case {kept_case_id} for saga {}", + summary.saga_id, + )); + } } - // First pass: for each parent case, close it if its saga has reached a - // terminal state (no longer observed) or has fully recovered (no - // condition holds anymore), otherwise drop any facts whose recorded - // contents no longer match the current observation. The second pass - // re-adds a fresh fact if the condition still holds. - for (case_id, summary) in &parent_cases { + // First pass: for each saga's surviving parent case, close it if its + // saga has reached a terminal state (no longer observed) or has fully + // recovered (no condition holds anymore), otherwise drop any facts whose + // recorded contents no longer match the current observation. The second + // pass re-adds a fresh fact if the condition still holds. + for case_id in case_for_saga.values() { + let summary = &parent_cases[case_id]; let mut case_mut = builder.cases.case_mut(case_id).expect( "builder.cases is seeded from the open cases of builder.input(), \ which is where this case_id came from", @@ -511,52 +573,60 @@ mod tests { .collect() } - fn make_parent_with_saga_case( + /// Make a `Fact` carrying the given saga payload. + fn mk_fact( parent_sitrep_id: SitrepUuid, - inv_collection_id: omicron_uuid_kinds::CollectionUuid, - fact_payloads: impl IntoIterator, - ) -> Sitrep { - let mut facts = IdOrdMap::new(); - for fact_payload in fact_payloads { - facts - .insert_unique(fm::case::Fact { - id: omicron_uuid_kinds::FactUuid::new_v4(), - created_sitrep_id: parent_sitrep_id, - payload: fact_payload.into(), - comment: "parent saga fact".to_string(), - }) - .unwrap(); + payload: SagaFact, + ) -> fm::case::Fact { + fm::case::Fact { + id: omicron_uuid_kinds::FactUuid::new_v4(), + created_sitrep_id: parent_sitrep_id, + payload: payload.into(), + comment: "parent saga fact".to_string(), } - make_parent_with_saga_case_from_facts( - parent_sitrep_id, - inv_collection_id, + } + + fn fact_map( + facts: impl IntoIterator, + ) -> IdOrdMap { + let mut map = IdOrdMap::new(); + for fact in facts { + map.insert_unique(fact).unwrap(); + } + map + } + + /// Make an open `Saga` case carrying the given facts. + fn make_saga_case( + case_id: omicron_uuid_kinds::CaseUuid, + parent_sitrep_id: SitrepUuid, + facts: IdOrdMap, + ) -> fm::Case { + fm::Case { + id: case_id, + metadata: fm::case::Metadata { + created_sitrep_id: parent_sitrep_id, + closed_sitrep_id: None, + de: DiagnosisEngineKind::Saga, + comment: "parent saga case".to_string(), + }, + ereports: Default::default(), + alerts_requested: Default::default(), + support_bundles_requested: Default::default(), facts, - ) + } } - /// Like [`make_parent_with_saga_case`], but with caller-controlled - /// `Fact`s (e.g., for tests that need specific fact UUIDs). - fn make_parent_with_saga_case_from_facts( + /// Make a parent sitrep containing the given cases. + fn make_parent_sitrep( parent_sitrep_id: SitrepUuid, inv_collection_id: omicron_uuid_kinds::CollectionUuid, - facts: IdOrdMap, + cases: impl IntoIterator, ) -> Sitrep { - let mut cases = IdOrdMap::new(); - cases - .insert_unique(fm::Case { - id: omicron_uuid_kinds::CaseUuid::new_v4(), - metadata: fm::case::Metadata { - created_sitrep_id: parent_sitrep_id, - closed_sitrep_id: None, - de: DiagnosisEngineKind::Saga, - comment: "parent saga case".to_string(), - }, - ereports: Default::default(), - alerts_requested: Default::default(), - support_bundles_requested: Default::default(), - facts, - }) - .unwrap(); + let mut case_map = IdOrdMap::new(); + for case in cases { + case_map.insert_unique(case).unwrap(); + } Sitrep { metadata: fm::SitrepMetadata { id: parent_sitrep_id, @@ -567,11 +637,43 @@ mod tests { next_inv_min_time_started: Utc::now(), comment: String::new(), }, - cases, + cases: case_map, ereports_by_id: Default::default(), } } + fn make_parent_with_saga_case( + parent_sitrep_id: SitrepUuid, + inv_collection_id: omicron_uuid_kinds::CollectionUuid, + fact_payloads: impl IntoIterator, + ) -> Sitrep { + let facts = fact_map( + fact_payloads + .into_iter() + .map(|payload| mk_fact(parent_sitrep_id, payload)), + ); + make_parent_with_saga_case_from_facts( + parent_sitrep_id, + inv_collection_id, + facts, + ) + } + + /// Like [`make_parent_with_saga_case`], but with caller-controlled + /// `Fact`s (e.g., for tests that need specific fact UUIDs). + fn make_parent_with_saga_case_from_facts( + parent_sitrep_id: SitrepUuid, + inv_collection_id: omicron_uuid_kinds::CollectionUuid, + facts: IdOrdMap, + ) -> Sitrep { + let case = make_saga_case( + omicron_uuid_kinds::CaseUuid::new_v4(), + parent_sitrep_id, + facts, + ); + make_parent_sitrep(parent_sitrep_id, inv_collection_id, [case]) + } + #[test] fn opens_not_progressing_when_stale() { let (logctx, collection) = setup("saga_open_not_progressing"); @@ -1272,4 +1374,233 @@ mod tests { assert_eq!(case.facts.len(), 1, "the fact stays attached as evidence"); logctx.cleanup_successful(); } + + /// A parent Saga case with zero facts has no derivable saga ID; the + /// engine closes it as uninterpretable rather than carrying an + /// unprocessable open case forward into every future sitrep. + #[test] + fn empty_case_is_closed() { + let (logctx, collection) = setup("saga_empty_case_closed"); + let inv_id = collection.id; + let parent_id = SitrepUuid::new_v4(); + let empty_case_id = omicron_uuid_kinds::CaseUuid::new_v4(); + let parent = make_parent_sitrep( + parent_id, + inv_id, + [make_saga_case(empty_case_id, parent_id, IdOrdMap::new())], + ); + + let input = build_input(collection, Some(parent), observed_map([])); + let (sitrep, report) = run_analyze(&logctx.log, &input); + + let case = sitrep + .cases + .get(&empty_case_id) + .expect("empty case should still be in the output sitrep"); + assert!(!case.is_open(), "uninterpretable empty case should be closed",); + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("cannot interpret case"), + "close comment should say the case was uninterpretable, got: \ + {report_str}", + ); + logctx.cleanup_successful(); + } + + /// Two open parent cases about the same saga: the engine keeps and + /// maintains the one with the lowest case ID, and closes the other as a + /// duplicate. (A half-maintained duplicate would otherwise decay into + /// an uninterpretable empty case.) + #[test] + fn duplicate_case_is_closed() { + let (logctx, collection) = setup("saga_duplicate_closed"); + let inv_id = collection.id; + let reference_time = collection.time_done; + let parent_id = SitrepUuid::new_v4(); + let id = saga_id(1); + // Stale enough to keep the NotProgressing condition holding, and + // matching the observation exactly so the kept fact carries forward. + let last_event = reference_time - TimeDelta::hours(2); + let payload = SagaNotProgressingFactPayload { + saga_id: id, + saga_state: SagaProgressState::Unwinding, + last_event_time: last_event, + }; + + let id_a = omicron_uuid_kinds::CaseUuid::new_v4(); + let id_b = omicron_uuid_kinds::CaseUuid::new_v4(); + let (kept_id, dup_id) = + if id_a < id_b { (id_a, id_b) } else { (id_b, id_a) }; + let kept_fact = + mk_fact(parent_id, SagaFact::NotProgressing(payload.clone())); + let kept_fact_id = kept_fact.id; + let dup_fact = + mk_fact(parent_id, SagaFact::NotProgressing(payload.clone())); + let parent = make_parent_sitrep( + parent_id, + inv_id, + [ + make_saga_case(kept_id, parent_id, fact_map([kept_fact])), + make_saga_case(dup_id, parent_id, fact_map([dup_fact])), + ], + ); + + let observed = + observed_map([mk_observed(id, Some(last_event), None, None)]); + let input = build_input(collection, Some(parent), observed); + let (sitrep, report) = run_analyze(&logctx.log, &input); + + let kept = sitrep.cases.get(&kept_id).expect("kept case present"); + let dup = sitrep.cases.get(&dup_id).expect("duplicate case present"); + assert!(kept.is_open(), "lowest-ID case should remain open"); + assert!(!dup.is_open(), "duplicate case should be closed"); + // The kept case's fact is still accurate, so it should carry + // forward unchanged. + assert!( + kept.facts.contains_key(&kept_fact_id), + "kept case should retain its fact", + ); + // No third case should have been opened for this saga. + let saga_case_count = sitrep + .cases + .iter() + .filter(|c| c.metadata.de == DiagnosisEngineKind::Saga) + .count(); + assert_eq!(saga_case_count, 2); + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("duplicate of case"), + "close comment should call out the duplicate, got: {report_str}", + ); + logctx.cleanup_successful(); + } + + /// A case whose facts disagree about which saga they concern is closed + /// as uninterpretable. Because detection is independent of case + /// bookkeeping, the still-stuck saga gets a fresh, well-formed case in + /// the same pass. + #[test] + fn uninterpretable_case_is_replaced() { + let (logctx, collection) = setup("saga_uninterpretable_replaced"); + let inv_id = collection.id; + let reference_time = collection.time_done; + let parent_id = SitrepUuid::new_v4(); + let stuck_saga = saga_id(1); + let last_event = reference_time - TimeDelta::hours(2); + + // One fact about the stuck saga, one about an unrelated saga: the + // case is self-contradictory. + let corrupt_case_id = omicron_uuid_kinds::CaseUuid::new_v4(); + let corrupt_case = make_saga_case( + corrupt_case_id, + parent_id, + fact_map([ + mk_fact( + parent_id, + SagaFact::NotProgressing(SagaNotProgressingFactPayload { + saga_id: stuck_saga, + saga_state: SagaProgressState::Unwinding, + last_event_time: last_event, + }), + ), + mk_fact( + parent_id, + SagaFact::Abandoned(SagaAbandonedFactPayload { + saga_id: saga_id(2), + }), + ), + ]), + ); + let parent = make_parent_sitrep(parent_id, inv_id, [corrupt_case]); + + let observed = observed_map([mk_observed( + stuck_saga, + Some(last_event), + None, + None, + )]); + let input = build_input(collection, Some(parent), observed); + let (sitrep, report) = run_analyze(&logctx.log, &input); + + let corrupt = sitrep + .cases + .get(&corrupt_case_id) + .expect("corrupt case should still be in the output sitrep"); + assert!( + !corrupt.is_open(), + "case with disagreeing facts should be closed", + ); + // The stuck saga still needs attention, so a fresh case should have + // been opened for it. + let open = saga_facts(&sitrep, true); + assert_eq!( + open.len(), + 1, + "expected exactly one open Saga fact (on the replacement case)", + ); + match &open[0].1 { + SagaFact::NotProgressing(p) => { + assert_eq!(p.saga_id, stuck_saga); + assert_eq!(p.last_event_time, last_event); + } + other => panic!("expected NotProgressing fact, got {other:?}"), + } + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("cannot interpret case"), + "close comment should say the case was uninterpretable, got: \ + {report_str}", + ); + logctx.cleanup_successful(); + } + + /// A Saga case carrying a physical-disk fact payload is a data-model + /// violation; the engine closes it as uninterpretable. + #[test] + fn foreign_payload_case_is_closed() { + let (logctx, collection) = setup("saga_foreign_payload_closed"); + let inv_id = collection.id; + let parent_id = SitrepUuid::new_v4(); + let case_id = omicron_uuid_kinds::CaseUuid::new_v4(); + let foreign_fact = fm::case::Fact { + id: omicron_uuid_kinds::FactUuid::new_v4(), + created_sitrep_id: parent_id, + payload: fm::FactPayload::PhysicalDisk( + fm::DiskFact::ZpoolUnhealthy(fm::ZpoolUnhealthyFactPayload { + physical_disk_id: + omicron_uuid_kinds::PhysicalDiskUuid::new_v4(), + zpool_id: omicron_uuid_kinds::ZpoolUuid::new_v4(), + last_seen_health: + nexus_types::inventory::ZpoolHealth::Degraded, + observed_in_inv: inv_id, + time_observed: Utc::now(), + }), + ), + comment: "a fact belonging to the physical-disk engine".to_string(), + }; + let parent = make_parent_sitrep( + parent_id, + inv_id, + [make_saga_case(case_id, parent_id, fact_map([foreign_fact]))], + ); + + let input = build_input(collection, Some(parent), observed_map([])); + let (sitrep, report) = run_analyze(&logctx.log, &input); + + let case = sitrep + .cases + .get(&case_id) + .expect("case should still be in the output sitrep"); + assert!( + !case.is_open(), + "case with a foreign fact payload should be closed", + ); + let report_str = format!("{}", report.display_multiline(0)); + assert!( + report_str.contains("cannot interpret case"), + "close comment should say the case was uninterpretable, got: \ + {report_str}", + ); + logctx.cleanup_successful(); + } } From 83c73ad6d0c18b5d32b3e99eeada3a941362ac54 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 12 Jun 2026 14:48:35 -0700 Subject: [PATCH 21/30] [fm] derive fact row kind from payload via exhaustive match Previously from_sitrep() set a default kind in the base row that every match arm had to remember to override; a future DiskFact variant that forgot would silently inherit the wrong discriminant. Route kind through an exhaustive match on the payload so that can't compile. --- nexus/db-model/src/fm/fact_physical_disk.rs | 32 ++++++++++++++------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/nexus/db-model/src/fm/fact_physical_disk.rs b/nexus/db-model/src/fm/fact_physical_disk.rs index f81df308b35..2caf8c8a0d3 100644 --- a/nexus/db-model/src/fm/fact_physical_disk.rs +++ b/nexus/db-model/src/fm/fact_physical_disk.rs @@ -72,29 +72,31 @@ impl FmFactPhysicalDisk { fact: &fm::case::Fact, disk_fact: &DiskFact, ) -> Self { - let base = Self { + let mut row = Self { id: fact.id.into(), sitrep_id: sitrep_id.into(), case_id: case_id.into(), created_sitrep_id: fact.created_sitrep_id.into(), comment: fact.comment.clone(), physical_disk_id: disk_fact.physical_disk_id().into(), - kind: FmFactPhysicalDiskKind::ZpoolUnhealthy, + kind: db_kind(disk_fact), zpool_id: None, last_seen_health: None, observed_in_inv: None, time_observed: None, }; + // Each arm populates the columns belonging to its `kind` and leaves + // every other payload column `None`. A column missed here is caught + // at insert time by the table's per-kind CHECK constraint. match disk_fact { - DiskFact::ZpoolUnhealthy(p) => Self { - kind: FmFactPhysicalDiskKind::ZpoolUnhealthy, - zpool_id: Some(p.zpool_id.into()), - last_seen_health: Some(p.last_seen_health.into()), - observed_in_inv: Some(p.observed_in_inv.into()), - time_observed: Some(p.time_observed), - ..base - }, + DiskFact::ZpoolUnhealthy(p) => { + row.zpool_id = Some(p.zpool_id.into()); + row.last_seen_health = Some(p.last_seen_health.into()); + row.observed_in_inv = Some(p.observed_in_inv.into()); + row.time_observed = Some(p.time_observed); + } } + row } /// Reconstruct an in-memory fact from a row. @@ -140,6 +142,16 @@ impl FmFactPhysicalDisk { } } +/// The `kind` discriminant for a fact's payload. Exhaustive by construction: +/// adding a `DiskFact` variant will not compile until it is mapped here, so +/// `from_sitrep` can never write a row whose `kind` was defaulted rather than +/// derived from the payload. +fn db_kind(disk_fact: &DiskFact) -> FmFactPhysicalDiskKind { + match disk_fact { + DiskFact::ZpoolUnhealthy(_) => FmFactPhysicalDiskKind::ZpoolUnhealthy, + } +} + fn missing_column(kind: FmFactPhysicalDiskKind, column: &str) -> Error { Error::internal_error(&format!( "fm_fact_physical_disk row of kind {kind:?} has a NULL {column}, \ From 546fc5d6cd4501aa57be001dc2458b2574be2ce5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 15 Jun 2026 09:50:01 -0700 Subject: [PATCH 22/30] [fm] tidy disk diagnoser per review feedback Remove the unused CaseBuilder::facts() accessor, collapse the three identical multi-line .expect() strings in analyze() to one short message, and drop the redundant case-level comment on a freshly opened disk case (its specifics already live in the fact added alongside it). --- nexus/fm/src/builder/case.rs | 6 ------ nexus/fm/src/diagnosis/physical_disk.rs | 20 +++++++------------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/nexus/fm/src/builder/case.rs b/nexus/fm/src/builder/case.rs index b2c6726942a..46bd7f40b26 100644 --- a/nexus/fm/src/builder/case.rs +++ b/nexus/fm/src/builder/case.rs @@ -279,12 +279,6 @@ impl CaseBuilder { } } - /// Iterate the facts currently attached to this case (including any that - /// were carried forward from the parent sitrep). - pub fn facts(&self) -> impl Iterator { - self.case.facts.iter() - } - pub fn add_ereport( &mut self, report: &Arc, diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index 401251c60e1..907716cb45c 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -184,7 +184,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { builder .cases .case_mut(&case_id) - .expect("case came from builder.input()'s open cases") + .expect("case_id came from builder's open cases") .close(format!("cannot interpret case: {reason}")); } @@ -214,7 +214,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { builder .cases .case_mut(case_id) - .expect("case came from builder.input()'s open cases") + .expect("case_id came from builder's open cases") .close(format!( "duplicate of case {kept_case_id} for disk {}", summary.physical_disk_id, @@ -233,10 +233,10 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { // is NOT a recovery signal: sled could be powered off, or // inventory could be lossy) for &(case_id, summary) in case_by_disk.values() { - let mut case_mut = builder.cases.case_mut(&case_id).expect( - "builder.cases is seeded from the open cases of builder.input(), \ - which is where this case_id came from", - ); + let mut case_mut = builder + .cases + .case_mut(&case_id) + .expect("case_id came from builder's open cases"); match in_service_health.get(&summary.physical_disk_id) { None => { case_mut.close(format!( @@ -292,13 +292,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { Some((case_id, _)) => case_id, // No parent case for this disk; open one. None => { - let mut new_case = - builder.cases.open_case(DiagnosisEngineKind::PhysicalDisk); - new_case.set_comment(format!( - "physical disk {} unhealthy", - disk.physical_disk_id, - )); - new_case.id + builder.cases.open_case(DiagnosisEngineKind::PhysicalDisk).id } }; From 271875579992dda0c7aec445a11f6ed3210738cf Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 15 Jun 2026 10:26:55 -0700 Subject: [PATCH 23/30] [fm] split Fact into FactMetadata + payload Lets the per-DE row constructor take (metadata, payload) instead of a whole Fact plus a redundant DiskFact, removing the consistency footgun where the two could disagree. Mirrors Case's metadata/payload split. --- nexus/db-model/src/fm/fact_physical_disk.rs | 26 +++++++++--------- nexus/db-queries/src/db/datastore/fm.rs | 15 +++++++---- nexus/fm/src/builder/case.rs | 8 +++--- nexus/fm/src/diagnosis/physical_disk.rs | 23 +++++++++------- nexus/types/src/fm/case.rs | 30 ++++++++++++++++----- 5 files changed, 66 insertions(+), 36 deletions(-) diff --git a/nexus/db-model/src/fm/fact_physical_disk.rs b/nexus/db-model/src/fm/fact_physical_disk.rs index 2caf8c8a0d3..d5d3820a384 100644 --- a/nexus/db-model/src/fm/fact_physical_disk.rs +++ b/nexus/db-model/src/fm/fact_physical_disk.rs @@ -6,9 +6,7 @@ //! //! Each physical-disk fact is stored as typed columns in the //! `fm_fact_physical_disk` table. The `kind` discriminant selects which payload -//! columns are populated; a CHECK constraint -//! (`zpool_unhealthy_columns_present`) enforces that the right columns are -//! non-NULL for each kind. See [`nexus_types::fm::DiskFact`] for semantics. +//! columns are populated. use crate::DbTypedUuid; use crate::impl_enum_type; @@ -16,6 +14,7 @@ use crate::inventory::InvZpoolHealth; use chrono::{DateTime, Utc}; use nexus_db_schema::schema::fm_fact_physical_disk; use nexus_types::fm; +use nexus_types::fm::case::FactMetadata; use nexus_types::fm::{DiskFact, FactPayload, ZpoolUnhealthyFactPayload}; use omicron_common::api::external::Error; use omicron_uuid_kinds::{ @@ -51,8 +50,7 @@ pub struct FmFactPhysicalDisk { pub created_sitrep_id: DbTypedUuid, pub comment: String, - /// The physical disk this fact is about. Common to every `kind`, so it is - /// always present (the column is `NOT NULL`). + /// The physical disk this fact is about. Common to every `kind`. pub physical_disk_id: DbTypedUuid, pub kind: FmFactPhysicalDiskKind, @@ -64,20 +62,20 @@ pub struct FmFactPhysicalDisk { } impl FmFactPhysicalDisk { - /// Build a row from a fact's shared metadata (`fact`) and its + /// Build a row from a fact's shared metadata (`metadata`) and its /// physical-disk payload (`disk_fact`). pub fn from_sitrep( sitrep_id: impl Into>, case_id: impl Into>, - fact: &fm::case::Fact, + metadata: &FactMetadata, disk_fact: &DiskFact, ) -> Self { let mut row = Self { - id: fact.id.into(), + id: metadata.id.into(), sitrep_id: sitrep_id.into(), case_id: case_id.into(), - created_sitrep_id: fact.created_sitrep_id.into(), - comment: fact.comment.clone(), + created_sitrep_id: metadata.created_sitrep_id.into(), + comment: metadata.comment.clone(), physical_disk_id: disk_fact.physical_disk_id().into(), kind: db_kind(disk_fact), zpool_id: None, @@ -134,10 +132,12 @@ impl FmFactPhysicalDisk { } }; Ok(fm::case::Fact { - id: self.id.into(), - created_sitrep_id: self.created_sitrep_id.into(), + metadata: FactMetadata { + id: self.id.into(), + created_sitrep_id: self.created_sitrep_id.into(), + comment: self.comment, + }, payload, - comment: self.comment, }) } } diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index 3aa71000b4b..8743f8733d2 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -547,7 +547,7 @@ impl DataStore { for row in batch { let case_id: CaseUuid = row.case_id.into(); let fact = row.into_fact()?; - let id = fact.id; + let id = fact.metadata.id; by_case .entry(case_id) .or_default() @@ -840,7 +840,10 @@ impl DataStore { fm::FactPayload::PhysicalDisk(disk_fact) => { physical_disk_facts.push( model::fm::FmFactPhysicalDisk::from_sitrep( - sitrep_id, case_id, fact, disk_fact, + sitrep_id, + case_id, + &fact.metadata, + disk_fact, ), ); } @@ -2281,8 +2284,11 @@ mod tests { let mut facts = iddqd::IdOrdMap::new(); facts .insert_unique(fm::case::Fact { - id: FactUuid::new_v4(), - created_sitrep_id: sitrep_id, + metadata: fm::case::FactMetadata { + id: FactUuid::new_v4(), + created_sitrep_id: sitrep_id, + comment: "a representative fact for case 1".to_string(), + }, payload: fm::FactPayload::PhysicalDisk( fm::DiskFact::ZpoolUnhealthy( fm::ZpoolUnhealthyFactPayload { @@ -2299,7 +2305,6 @@ mod tests { }, ), ), - comment: "a representative fact for case 1".to_string(), }) .unwrap(); diff --git a/nexus/fm/src/builder/case.rs b/nexus/fm/src/builder/case.rs index 46bd7f40b26..438f4932406 100644 --- a/nexus/fm/src/builder/case.rs +++ b/nexus/fm/src/builder/case.rs @@ -261,10 +261,12 @@ impl CaseBuilder { .kv("payload", &payload) .comment(comment.clone()); let fact = fm::case::Fact { - id, - created_sitrep_id: self.sitrep_id, + metadata: fm::case::FactMetadata { + id, + created_sitrep_id: self.sitrep_id, + comment, + }, payload, - comment, }; self.case.facts.insert_unique(fact).expect("UUID should be unused"); id diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index 907716cb45c..bf6936d2193 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -90,7 +90,7 @@ fn summarize_case( // payload; a foreign payload is a data-model violation. let Some(disk_fact) = fact.payload.as_physical_disk() else { return Err(UninterpretableCase::ForeignFactPayload { - fact_id: fact.id, + fact_id: fact.metadata.id, }); }; match disk_fact { @@ -106,7 +106,7 @@ fn summarize_case( } unhealthy_facts .insert_unique(ZpoolUnhealthyFact { - fact_id: fact.id, + fact_id: fact.metadata.id, payload, }) .expect("fact ids are unique within a case"); @@ -454,8 +454,11 @@ mod tests { zpool_id: ZpoolUuid, ) -> fm::case::Fact { fm::case::Fact { - id: omicron_uuid_kinds::FactUuid::new_v4(), - created_sitrep_id: parent_sitrep_id, + metadata: fm::case::FactMetadata { + id: omicron_uuid_kinds::FactUuid::new_v4(), + created_sitrep_id: parent_sitrep_id, + comment: format!("zpool {zpool_id} degraded"), + }, payload: DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { physical_disk_id, zpool_id, @@ -464,7 +467,6 @@ mod tests { time_observed: Utc::now(), }) .into(), - comment: format!("zpool {zpool_id} degraded"), } } @@ -788,7 +790,7 @@ mod tests { target_disk_id, target, ); - let kept_fact_id = kept_fact.id; + let kept_fact_id = kept_fact.metadata.id; let dup_fact = make_degraded_fact( parent_id, collection.id, @@ -938,6 +940,7 @@ mod tests { .iter() .next() .expect("parent case should have one fact") + .metadata .id; let input = build_input(collection, Some(parent), in_service); @@ -945,7 +948,7 @@ mod tests { let open = disk_facts(&sitrep, true); assert_eq!(open.len(), 1, "expected exactly one open Disk fact"); assert_eq!( - open[0].1.id, parent_fact_id, + open[0].1.metadata.id, parent_fact_id, "fact UUID should be stable across sitreps when the \ observation hasn't changed", ); @@ -990,6 +993,7 @@ mod tests { .iter() .next() .expect("parent case should have one fact") + .metadata .id; let input = build_input(collection, Some(parent), in_service); @@ -1001,7 +1005,7 @@ mod tests { "expected exactly one open Disk fact (the refreshed one)", ); assert_ne!( - open[0].1.id, parent_fact_id, + open[0].1.metadata.id, parent_fact_id, "fact UUID should rotate because last_seen_health changed", ); match &open[0].2 { @@ -1053,6 +1057,7 @@ mod tests { .iter() .next() .expect("parent case should have one fact") + .metadata .id; let input = build_input(collection, Some(parent), in_service); @@ -1064,7 +1069,7 @@ mod tests { "expected exactly one open Disk fact (the refreshed one)", ); assert_ne!( - open[0].1.id, parent_fact_id, + open[0].1.metadata.id, parent_fact_id, "fact UUID should rotate because the disk's zpool changed", ); match &open[0].2 { diff --git a/nexus/types/src/fm/case.rs b/nexus/types/src/fm/case.rs index 6410eee3009..686ef49eec0 100644 --- a/nexus/types/src/fm/case.rs +++ b/nexus/types/src/fm/case.rs @@ -176,19 +176,28 @@ impl CaseEreport { /// the case's diagnosis engine (see [`Metadata::de`]). #[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] pub struct Fact { + #[serde(flatten)] + pub metadata: FactMetadata, + pub payload: FactPayload, +} + +/// The diagnosis-engine-agnostic part of a [`Fact`]: everything that is not +/// the typed [`payload`](Fact::payload). Every diagnosis engine's facts share +/// these fields. +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct FactMetadata { pub id: FactUuid, /// The sitrep in which this fact was first added. Preserved /// unchanged when the fact is carried forward into a child sitrep. /// Debug-only. pub created_sitrep_id: SitrepUuid, - pub payload: FactPayload, pub comment: String, } impl IdOrdItem for Fact { type Key<'a> = &'a FactUuid; fn key(&self) -> Self::Key<'_> { - &self.id + &self.metadata.id } iddqd::id_upcast!(); } @@ -213,7 +222,12 @@ impl Fact { const WIDTH: usize = const_max_len(&[ADDED_IN, COMMENT]); let &Self { - fact: Fact { id, created_sitrep_id, payload, comment }, + fact: + Fact { + metadata: + FactMetadata { id, created_sitrep_id, comment }, + payload, + }, indent, sitrep_id, } = self; @@ -619,9 +633,14 @@ mod tests { let mut facts = IdOrdMap::new(); facts .insert_unique(Fact { - id: FactUuid::from_str("f00f00f0-0f00-4f00-8f00-f00f00f00f00") + metadata: FactMetadata { + id: FactUuid::from_str( + "f00f00f0-0f00-4f00-8f00-f00f00f00f00", + ) .unwrap(), - created_sitrep_id, + created_sitrep_id, + comment: "made-up fact for display test".to_string(), + }, payload: FactPayload::PhysicalDisk(DiskFact::ZpoolUnhealthy( ZpoolUnhealthyFactPayload { physical_disk_id: PhysicalDiskUuid::from_str( @@ -640,7 +659,6 @@ mod tests { time_observed: chrono::DateTime::::MIN_UTC, }, )), - comment: "made-up fact for display test".to_string(), }) .unwrap(); From 0ada45de69421ce033ecf7a40fde22cd5d4172a0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 15 Jun 2026 11:41:03 -0700 Subject: [PATCH 24/30] [fm] disk diagnoser: clearer names for parsing a case observed_health -> observed_zpool_health (keeps the load-bearing 'observed' semantic), summarize_case -> parse_case (it validates and rejects, not just condenses), and ParentCaseSummary -> ParsedDiskCase to match. --- nexus/fm/src/diagnosis/physical_disk.rs | 57 ++++++++++++------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index bf6936d2193..13ab573d685 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -47,9 +47,10 @@ impl IdOrdItem for DiskHealthSnapshot { id_upcast!(); } -/// Per-case summary built from a case's facts. Each Disk case is about a -/// single physical disk; every fact on the case must reference that disk. -struct ParentCaseSummary { +/// A parent-forwarded Disk case, parsed into the form this engine acts on. +/// Each Disk case is about a single physical disk; every fact on the case +/// must reference that disk. +struct ParsedDiskCase { /// The physical disk this case is about. physical_disk_id: PhysicalDiskUuid, /// All `ZpoolUnhealthy` facts on this case. Normally one; pathological @@ -78,11 +79,9 @@ enum UninterpretableCase { NoFacts, } -/// Summarize one parent-forwarded Disk case, or explain why it cannot be -/// interpreted. -fn summarize_case( - case: &fm::Case, -) -> Result { +/// Parse one parent-forwarded Disk case into a [`ParsedDiskCase`], or explain +/// why it cannot be interpreted. +fn parse_case(case: &fm::Case) -> Result { let mut unhealthy_facts: IdOrdMap = IdOrdMap::new(); let mut case_disk_id: Option = None; for fact in case.facts.iter() { @@ -116,7 +115,7 @@ fn summarize_case( let Some(physical_disk_id) = case_disk_id else { return Err(UninterpretableCase::NoFacts); }; - Ok(ParentCaseSummary { physical_disk_id, unhealthy_facts }) + Ok(ParsedDiskCase { physical_disk_id, unhealthy_facts }) } pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { @@ -127,7 +126,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { // Index every zpool we observed in this inventory, so we can distinguish // "saw it, it's Online" from "didn't see it at all" when looking up by // an in-service disk's zpool below. - let observed_health: BTreeMap = input + let observed_zpool_health: BTreeMap = input .inventory() .sled_agents .iter() @@ -145,22 +144,22 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { .map(|d| DiskHealthSnapshot { physical_disk_id: d.physical_disk_id, zpool_id: d.zpool_id, - zpool_health: observed_health.get(&d.zpool_id).copied(), + zpool_health: observed_zpool_health.get(&d.zpool_id).copied(), }) .collect(); // Index the Disk cases copied forward from the parent sitrep. Every case // is about one physical disk; we derive the disk from its facts. - let mut parent_cases = BTreeMap::::new(); + let mut parent_cases = BTreeMap::::new(); let mut uninterpretable = Vec::<(CaseUuid, UninterpretableCase)>::new(); for case in input .open_cases() .iter() .filter(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) { - match summarize_case(case) { - Ok(summary) => { - parent_cases.insert(case.id, summary); + match parse_case(case) { + Ok(parsed_case) => { + parent_cases.insert(case.id, parsed_case); } Err(reason) => { slog::warn!( @@ -194,22 +193,22 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { // duplicate would otherwise decay into an uninterpretable empty case.) let mut case_by_disk: BTreeMap< PhysicalDiskUuid, - (CaseUuid, &ParentCaseSummary), + (CaseUuid, &ParsedDiskCase), > = BTreeMap::new(); - for (case_id, summary) in &parent_cases { + for (case_id, parsed_case) in &parent_cases { case_by_disk - .entry(summary.physical_disk_id) - .or_insert((*case_id, summary)); + .entry(parsed_case.physical_disk_id) + .or_insert((*case_id, parsed_case)); } - for (case_id, summary) in &parent_cases { - let (kept_case_id, _) = case_by_disk[&summary.physical_disk_id]; + for (case_id, parsed_case) in &parent_cases { + let (kept_case_id, _) = case_by_disk[&parsed_case.physical_disk_id]; if *case_id != kept_case_id { slog::warn!( &builder.log, "closing duplicate Disk case"; "case_id" => %case_id, "kept_case_id" => %kept_case_id, - "physical_disk_id" => %summary.physical_disk_id, + "physical_disk_id" => %parsed_case.physical_disk_id, ); builder .cases @@ -217,7 +216,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { .expect("case_id came from builder's open cases") .close(format!( "duplicate of case {kept_case_id} for disk {}", - summary.physical_disk_id, + parsed_case.physical_disk_id, )); } } @@ -232,16 +231,16 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { // - disk in service but absent from inventory → leave alone (absence // is NOT a recovery signal: sled could be powered off, or // inventory could be lossy) - for &(case_id, summary) in case_by_disk.values() { + for &(case_id, parsed_case) in case_by_disk.values() { let mut case_mut = builder .cases .case_mut(&case_id) .expect("case_id came from builder's open cases"); - match in_service_health.get(&summary.physical_disk_id) { + match in_service_health.get(&parsed_case.physical_disk_id) { None => { case_mut.close(format!( "disk {} no longer in service", - summary.physical_disk_id, + parsed_case.physical_disk_id, )); } Some(snap) if snap.zpool_health == Some(ZpoolHealth::Online) => { @@ -252,7 +251,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { let Some(current_health) = snap.zpool_health else { continue; }; - for fact_ref in summary.unhealthy_facts.iter() { + for fact_ref in parsed_case.unhealthy_facts.iter() { if fact_ref.payload.last_seen_health != current_health || fact_ref.payload.zpool_id != snap.zpool_id { @@ -279,8 +278,8 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { let case_id_for_fact = match parent_for_disk { // Parent case already has an accurate fact; fully covered. - Some((_, summary)) - if summary.unhealthy_facts.iter().any(|f| { + Some((_, parsed_case)) + if parsed_case.unhealthy_facts.iter().any(|f| { f.payload.last_seen_health == current_health && f.payload.zpool_id == disk.zpool_id }) => From 39e4228c756286885644afe2c8c9d43812a72cd5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 15 Jun 2026 11:51:14 -0700 Subject: [PATCH 25/30] [fm] disk diagnoser: build case_by_disk in a single pass The duplicate-case detection re-scanned parent_cases and re-looked-up case_by_disk to find the cases that weren't kept. Since parent_cases iterates ascending by CaseUuid, the keep-lowest / close-the-rest decision can be made in one pass with the Entry API. --- nexus/fm/src/diagnosis/physical_disk.rs | 53 +++++++++++++------------ 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index 13ab573d685..52e33fa0e5d 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -12,6 +12,7 @@ use nexus_types::fm::{DiskFact, ZpoolUnhealthyFactPayload}; use nexus_types::inventory::ZpoolHealth; use omicron_uuid_kinds::{CaseUuid, FactUuid, PhysicalDiskUuid, ZpoolUuid}; use std::collections::BTreeMap; +use std::collections::btree_map::Entry; /// A [`DiskFact::ZpoolUnhealthy`] payload paired with the `FactUuid` it /// lives under. @@ -188,36 +189,38 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { } // Inverse index: which parent case is about which disk. Cases are - // per-disk, so a disk with two parent cases is pathological; keep the - // lowest case ID and close the rest as duplicates. (A half-maintained - // duplicate would otherwise decay into an uninterpretable empty case.) + // per-disk, so a disk with two parent cases is pathological. `parent_cases` + // iterates ascending by CaseUuid, so the first case we see for a disk is + // the lowest-ID one: keep it, and close any later case for the same disk as + // a duplicate. (A half-maintained duplicate would otherwise decay into an + // uninterpretable empty case.) let mut case_by_disk: BTreeMap< PhysicalDiskUuid, (CaseUuid, &ParsedDiskCase), > = BTreeMap::new(); for (case_id, parsed_case) in &parent_cases { - case_by_disk - .entry(parsed_case.physical_disk_id) - .or_insert((*case_id, parsed_case)); - } - for (case_id, parsed_case) in &parent_cases { - let (kept_case_id, _) = case_by_disk[&parsed_case.physical_disk_id]; - if *case_id != kept_case_id { - slog::warn!( - &builder.log, - "closing duplicate Disk case"; - "case_id" => %case_id, - "kept_case_id" => %kept_case_id, - "physical_disk_id" => %parsed_case.physical_disk_id, - ); - builder - .cases - .case_mut(case_id) - .expect("case_id came from builder's open cases") - .close(format!( - "duplicate of case {kept_case_id} for disk {}", - parsed_case.physical_disk_id, - )); + match case_by_disk.entry(parsed_case.physical_disk_id) { + Entry::Vacant(slot) => { + slot.insert((*case_id, parsed_case)); + } + Entry::Occupied(kept) => { + let (kept_case_id, _) = *kept.get(); + slog::warn!( + &builder.log, + "closing duplicate Disk case"; + "case_id" => %case_id, + "kept_case_id" => %kept_case_id, + "physical_disk_id" => %parsed_case.physical_disk_id, + ); + builder + .cases + .case_mut(case_id) + .expect("case_id came from builder's open cases") + .close(format!( + "duplicate of case {kept_case_id} for disk {}", + parsed_case.physical_disk_id, + )); + } } } From 4b6e9284f1a5e5aadc33c04d72278529bf05ea4e Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 15 Jun 2026 11:54:40 -0700 Subject: [PATCH 26/30] [fm] disk diagnoser: separate case-closing from fact reconciliation Fact reconciliation was split across two loops over two collections: the first loop (over parent cases) removed stale facts, the second (over faulty in-service disks) added fresh ones, coupled by a 'removed above' comment. Make the first loop closing-only and let the second own all fact state for a disk (remove stale + add fresh) in one place. --- nexus/fm/src/diagnosis/physical_disk.rs | 105 +++++++++++------------- 1 file changed, 46 insertions(+), 59 deletions(-) diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index 52e33fa0e5d..c08a0d1d18f 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -224,16 +224,11 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { } } - // For each disk's surviving parent case, decide what to do based on the - // disk's current state: - // - disk no longer in service → close the case (expungement) - // - disk's zpool back to Online → close the case (recovery) - // - disk still unhealthy → drop any facts whose recorded observation - // (zpool + health) no longer matches; the matching loop below will - // re-add a fresh fact - // - disk in service but absent from inventory → leave alone (absence - // is NOT a recovery signal: sled could be powered off, or - // inventory could be lossy) + // Close the surviving parent case for any disk that has recovered or left + // service. A still-faulty disk's facts are reconciled in the next loop; a + // disk in service but absent from this inventory is left alone (absence is + // NOT a recovery signal: the sled could be powered off, or the collection + // could be lossy). for &(case_id, parsed_case) in case_by_disk.values() { let mut case_mut = builder .cases @@ -248,27 +243,18 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { } Some(snap) if snap.zpool_health == Some(ZpoolHealth::Online) => { case_mut - .close(format!("zpool {} back to Online", snap.zpool_id,)); - } - Some(snap) => { - let Some(current_health) = snap.zpool_health else { - continue; - }; - for fact_ref in parsed_case.unhealthy_facts.iter() { - if fact_ref.payload.last_seen_health != current_health - || fact_ref.payload.zpool_id != snap.zpool_id - { - case_mut.remove_fact(fact_ref.fact_id); - } - } + .close(format!("zpool {} back to Online", snap.zpool_id)); } + // Faulty or absent from inventory: leave open; the next loop + // reconciles the faulty ones and leaves the absent ones untouched. + Some(_) => {} } } - // For each currently-faulty in-service disk: ensure a case exists - // (reusing the parent-forwarded one for this disk if any) and add a - // fresh fact if one with this exact observation (zpool + health) isn't - // already present. + // For each currently-faulty in-service disk, ensure its case carries + // exactly one fact matching the current observation: reuse the + // parent-forwarded case if any (dropping any stale facts), otherwise open + // a fresh case. for disk in in_service_health.iter() { let Some(current_health) = disk.zpool_health else { continue; @@ -277,41 +263,42 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { continue; } - let parent_for_disk = case_by_disk.get(&disk.physical_disk_id).copied(); - - let case_id_for_fact = match parent_for_disk { - // Parent case already has an accurate fact; fully covered. - Some((_, parsed_case)) - if parsed_case.unhealthy_facts.iter().any(|f| { - f.payload.last_seen_health == current_health - && f.payload.zpool_id == disk.zpool_id - }) => - { - continue; - } - // Parent case exists; its stale facts were removed above. - // Refresh under the same case. - Some((case_id, _)) => case_id, - // No parent case for this disk; open one. - None => { - builder.cases.open_case(DiagnosisEngineKind::PhysicalDisk).id + let mut case_mut = match case_by_disk.get(&disk.physical_disk_id) { + Some(&(case_id, parsed_case)) => { + let mut case_mut = builder + .cases + .case_mut(&case_id) + .expect("case_id came from builder's open cases"); + let mut has_match = false; + for fact in parsed_case.unhealthy_facts.iter() { + if fact.payload.zpool_id == disk.zpool_id + && fact.payload.last_seen_health == current_health + { + // An accurate fact is already present; keep it. + has_match = true; + } else { + // Stale observation; drop it. + case_mut.remove_fact(fact.fact_id); + } + } + if has_match { + continue; + } + case_mut } + None => builder.cases.open_case(DiagnosisEngineKind::PhysicalDisk), }; - builder - .cases - .case_mut(&case_id_for_fact) - .expect("case_id came from this fn") - .add_fact( - DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { - physical_disk_id: disk.physical_disk_id, - zpool_id: disk.zpool_id, - last_seen_health: current_health, - observed_in_inv: inv_collection_id, - time_observed: inv_time_done, - }), - format!("zpool {} health={current_health}", disk.zpool_id,), - ); + case_mut.add_fact( + DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { + physical_disk_id: disk.physical_disk_id, + zpool_id: disk.zpool_id, + last_seen_health: current_health, + observed_in_inv: inv_collection_id, + time_observed: inv_time_done, + }), + format!("zpool {} health={current_health}", disk.zpool_id), + ); } Ok(()) From e731c5de53fb9e09a6d675dd0fdaa3cc0e16406c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 15 Jun 2026 17:23:58 -0700 Subject: [PATCH 27/30] [fm] disk diagnoser tests: name the disk-fact triple, dedup fact-id lookup Replace the (case, fact, DiskFact) tuple returned by disk_facts with a named DiskFactRef so call sites read .case/.fact/.disk_fact instead of .0/.1/.2, and extract the repeated parent-fact-id chain into a sole_disk_fact_id helper. Also fix a stale doc comment (identity fields, not facts). --- nexus/fm/src/diagnosis/physical_disk.rs | 104 +++++++++++------------- 1 file changed, 46 insertions(+), 58 deletions(-) diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index c08a0d1d18f..c20ffa3d4f8 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -323,8 +323,9 @@ mod tests { use std::sync::Arc; /// Make an in-service disk set from a list of zpool IDs. Each zpool gets - /// its own fresh `PhysicalDiskUuid` and dummy identity facts; tests in - /// this module only care about the zpool dimension. + /// its own fresh `PhysicalDiskUuid` and dummy identity fields + /// (vendor/serial/model); tests in this module only care about the zpool + /// dimension. fn mk_in_service( zpool_ids: impl IntoIterator, ) -> IdOrdMap { @@ -529,13 +530,18 @@ mod tests { make_parent_sitrep(parent_sitrep_id, inv_collection_id, [case]) } - /// Collect (case, fact, DiskFact) triples for every fact on a - /// physical-disk case in a sitrep. Optionally filtered to open cases - /// only. - fn disk_facts( - sitrep: &Sitrep, - open_only: bool, - ) -> Vec<(&fm::Case, &fm::case::Fact, DiskFact)> { + /// A physical-disk fact found in a sitrep, paired with the case it lives + /// on and its decoded [`DiskFact`] payload. + #[derive(Debug)] + struct DiskFactRef<'a> { + case: &'a fm::Case, + fact: &'a fm::case::Fact, + disk_fact: DiskFact, + } + + /// Collect every physical-disk fact in a sitrep, with its case and decoded + /// payload. Optionally filtered to open cases only. + fn disk_facts(sitrep: &Sitrep, open_only: bool) -> Vec> { sitrep .cases .iter() @@ -543,12 +549,24 @@ mod tests { .filter(|c| !open_only || c.is_open()) .flat_map(|c| { c.facts.iter().filter_map(move |f| { - f.payload.as_physical_disk().map(|d| (c, f, d.clone())) + f.payload.as_physical_disk().map(|d| DiskFactRef { + case: c, + fact: f, + disk_fact: d.clone(), + }) }) }) .collect() } + /// The fact UUID of the one physical-disk fact on the one physical-disk + /// case in `sitrep`. Panics unless there is exactly one. + fn sole_disk_fact_id(sitrep: &Sitrep) -> omicron_uuid_kinds::FactUuid { + let facts = disk_facts(sitrep, false); + assert_eq!(facts.len(), 1, "expected exactly one physical-disk fact"); + facts[0].fact.metadata.id + } + #[test] fn opens_on_degraded_in_service() { let (logctx, mut collection, zpools) = @@ -562,7 +580,7 @@ mod tests { let (sitrep, _report) = run_analyze(&logctx.log, &input); let facts = disk_facts(&sitrep, true); assert_eq!(facts.len(), 1); - match &facts[0].2 { + match &facts[0].disk_fact { DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { physical_disk_id, zpool_id, @@ -615,7 +633,7 @@ mod tests { let (sitrep, _report) = run_analyze(&logctx.log, &input); let open_cases = disk_facts(&sitrep, true); assert_eq!(open_cases.len(), 1); - match &open_cases[0].2 { + match &open_cases[0].disk_fact { DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { zpool_id, .. @@ -646,7 +664,7 @@ mod tests { let all = disk_facts(&sitrep, false); assert_eq!(all.len(), 1); assert!( - !all[0].0.is_open(), + !all[0].case.is_open(), "case should be closed when zpool returns to Online", ); let report_str = format!("{}", report.display_multiline(0)); @@ -681,7 +699,7 @@ mod tests { let all = disk_facts(&sitrep, false); assert_eq!(all.len(), 1); assert!( - !all[0].0.is_open(), + !all[0].case.is_open(), "case should be closed when zpool's disk is expunged", ); let report_str = format!("{}", report.display_multiline(0)); @@ -716,7 +734,7 @@ mod tests { let all = disk_facts(&sitrep, false); assert_eq!(all.len(), 1); assert!( - all[0].0.is_open(), + all[0].case.is_open(), "case should remain open when its zpool is absent from the \ current inventory collection (sled could be down or inventory \ is lossy)", @@ -880,8 +898,8 @@ mod tests { 1, "expected exactly one open Disk fact (on the replacement case)", ); - assert_ne!(open[0].0.id, corrupt_case_id); - match &open[0].2 { + assert_ne!(open[0].case.id, corrupt_case_id); + match &open[0].disk_fact { DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { physical_disk_id, zpool_id, @@ -920,28 +938,18 @@ mod tests { target, ); // Capture the parent's fact UUID for the target zpool. - let parent_fact_id = parent - .cases - .iter() - .find(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) - .expect("parent should have one Disk case") - .facts - .iter() - .next() - .expect("parent case should have one fact") - .metadata - .id; + let parent_fact_id = sole_disk_fact_id(&parent); let input = build_input(collection, Some(parent), in_service); let (sitrep, _report) = run_analyze(&logctx.log, &input); let open = disk_facts(&sitrep, true); assert_eq!(open.len(), 1, "expected exactly one open Disk fact"); assert_eq!( - open[0].1.metadata.id, parent_fact_id, + open[0].fact.metadata.id, parent_fact_id, "fact UUID should be stable across sitreps when the \ observation hasn't changed", ); - match &open[0].2 { + match &open[0].disk_fact { DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { zpool_id, last_seen_health, @@ -973,17 +981,7 @@ mod tests { target_disk_id, target, ); - let parent_fact_id = parent - .cases - .iter() - .find(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) - .expect("parent should have one Disk case") - .facts - .iter() - .next() - .expect("parent case should have one fact") - .metadata - .id; + let parent_fact_id = sole_disk_fact_id(&parent); let input = build_input(collection, Some(parent), in_service); let (sitrep, _report) = run_analyze(&logctx.log, &input); @@ -994,10 +992,10 @@ mod tests { "expected exactly one open Disk fact (the refreshed one)", ); assert_ne!( - open[0].1.metadata.id, parent_fact_id, + open[0].fact.metadata.id, parent_fact_id, "fact UUID should rotate because last_seen_health changed", ); - match &open[0].2 { + match &open[0].disk_fact { DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { zpool_id, last_seen_health, @@ -1009,7 +1007,7 @@ mod tests { } // The case itself should still be the same one that was carried // forward; only the fact rotated. - assert!(open[0].0.is_open()); + assert!(open[0].case.is_open()); logctx.cleanup_successful(); } @@ -1037,17 +1035,7 @@ mod tests { target_disk_id, old_zpool_id, ); - let parent_fact_id = parent - .cases - .iter() - .find(|c| c.metadata.de == DiagnosisEngineKind::PhysicalDisk) - .expect("parent should have one Disk case") - .facts - .iter() - .next() - .expect("parent case should have one fact") - .metadata - .id; + let parent_fact_id = sole_disk_fact_id(&parent); let input = build_input(collection, Some(parent), in_service); let (sitrep, _report) = run_analyze(&logctx.log, &input); @@ -1058,10 +1046,10 @@ mod tests { "expected exactly one open Disk fact (the refreshed one)", ); assert_ne!( - open[0].1.metadata.id, parent_fact_id, + open[0].fact.metadata.id, parent_fact_id, "fact UUID should rotate because the disk's zpool changed", ); - match &open[0].2 { + match &open[0].disk_fact { DiskFact::ZpoolUnhealthy(ZpoolUnhealthyFactPayload { zpool_id, last_seen_health, @@ -1074,7 +1062,7 @@ mod tests { assert_eq!(*last_seen_health, ZpoolHealth::Degraded); } } - assert!(open[0].0.is_open()); + assert!(open[0].case.is_open()); logctx.cleanup_successful(); } } From f9479fcd0205427347ea264be1e1f6dccac50885 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 15 Jun 2026 17:59:34 -0700 Subject: [PATCH 28/30] [fm] address review feedback from eliza on #10541 Exhaustively destructure FactMetadata in from_sitrep so a new field fails to compile until it is mapped to a column. remove_fact now takes a comment (logged on removal) and logs the removed fact's payload; it warns if asked to remove a fact that does not exist. Drop a redundant 'Empty today' line from known_ereport_classes' doc. Close uninterpretable Disk cases inline in the parse loop instead of collecting them into a vec and looping again (the iterator borrows the input, not the builder), which also drops the now-redundant separate warn. Reframe the duplicate-case tie-break comment: keeping the lowest-ID case is arbitrary-but-deterministic, not 'correct'. --- nexus/db-model/src/fm/fact_physical_disk.rs | 9 ++-- nexus/fm/src/builder/case.rs | 28 +++++++++-- nexus/fm/src/diagnosis/mod.rs | 3 -- nexus/fm/src/diagnosis/physical_disk.rs | 54 ++++++++++----------- 4 files changed, 54 insertions(+), 40 deletions(-) diff --git a/nexus/db-model/src/fm/fact_physical_disk.rs b/nexus/db-model/src/fm/fact_physical_disk.rs index d5d3820a384..2c1518f0077 100644 --- a/nexus/db-model/src/fm/fact_physical_disk.rs +++ b/nexus/db-model/src/fm/fact_physical_disk.rs @@ -70,12 +70,15 @@ impl FmFactPhysicalDisk { metadata: &FactMetadata, disk_fact: &DiskFact, ) -> Self { + // Destructure exhaustively: a new `FactMetadata` field will fail to + // compile here until it is mapped to a column. + let FactMetadata { id, created_sitrep_id, comment } = metadata; let mut row = Self { - id: metadata.id.into(), + id: (*id).into(), sitrep_id: sitrep_id.into(), case_id: case_id.into(), - created_sitrep_id: metadata.created_sitrep_id.into(), - comment: metadata.comment.clone(), + created_sitrep_id: (*created_sitrep_id).into(), + comment: comment.clone(), physical_disk_id: disk_fact.physical_disk_id().into(), kind: db_kind(disk_fact), zpool_id: None, diff --git a/nexus/fm/src/builder/case.rs b/nexus/fm/src/builder/case.rs index 438f4932406..ab6a02ef4ec 100644 --- a/nexus/fm/src/builder/case.rs +++ b/nexus/fm/src/builder/case.rs @@ -273,11 +273,29 @@ impl CaseBuilder { } /// Remove a fact from this case. The fact will not be carried forward - /// into the next sitrep. - pub fn remove_fact(&mut self, id: FactUuid) { - if self.case.facts.remove(&id).is_some() { - slog::info!(&self.log, "removed a fact"; "fact_id" => %id); - self.report_log.entry("removed fact").kv("fact_id", id); + /// into the next sitrep. `comment` records why it was removed. + pub fn remove_fact(&mut self, id: FactUuid, comment: impl ToString) { + let comment = comment.to_string(); + if let Some(fact) = self.case.facts.remove(&id) { + slog::info!( + &self.log, + "removed a fact"; + "fact_id" => %id, + "payload" => ?fact.payload, + "comment" => %comment, + ); + self.report_log + .entry("removed fact") + .kv("fact_id", id) + .kv("payload", &fact.payload) + .comment(comment); + } else { + slog::warn!( + &self.log, + "tried to remove a fact that does not exist"; + "fact_id" => %id, + "comment" => %comment, + ); } } diff --git a/nexus/fm/src/diagnosis/mod.rs b/nexus/fm/src/diagnosis/mod.rs index 48a3c3a184c..30e210779e3 100644 --- a/nexus/fm/src/diagnosis/mod.rs +++ b/nexus/fm/src/diagnosis/mod.rs @@ -19,9 +19,6 @@ pub fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { /// Ereport classes that any diagnosis engine in this build of Nexus knows /// how to consume. The background task uses this to filter loaded ereports. /// -/// Empty today: Grow this list alongside FM analysis as new classes gain -/// ereport support. -/// /// **NULL-class ereports are intentionally excluded by the loader's SQL /// filter** (`class = ANY(...)` never matches NULL). If FM analysis ever /// needs to handle the "couldn't extract a class" or "reporter doesn't know diff --git a/nexus/fm/src/diagnosis/physical_disk.rs b/nexus/fm/src/diagnosis/physical_disk.rs index c20ffa3d4f8..5832c1f9d6b 100644 --- a/nexus/fm/src/diagnosis/physical_disk.rs +++ b/nexus/fm/src/diagnosis/physical_disk.rs @@ -152,7 +152,6 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { // Index the Disk cases copied forward from the parent sitrep. Every case // is about one physical disk; we derive the disk from its facts. let mut parent_cases = BTreeMap::::new(); - let mut uninterpretable = Vec::<(CaseUuid, UninterpretableCase)>::new(); for case in input .open_cases() .iter() @@ -163,37 +162,30 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { parent_cases.insert(case.id, parsed_case); } Err(reason) => { - slog::warn!( - &builder.log, - "closing uninterpretable Disk case"; - "case_id" => %case.id, - "reason" => %reason, - ); - uninterpretable.push((case.id, reason)); + // Close the cases we couldn't interpret, so they don't ride + // along as open-but-unprocessable in every future sitrep. This + // is safe with respect to fault coverage: detection below is + // independent of case bookkeeping, so if a closed case + // concerned a disk that is genuinely unhealthy and in service, + // a fresh, well-formed case is opened in this same pass. + builder + .cases + .case_mut(&case.id) + .expect("case_id came from builder's open cases") + .close(format!("cannot interpret case: {reason}")); } } } - // Close the cases we couldn't interpret, so they don't ride along as - // open-but-unprocessable in every future sitrep. This is safe with - // respect to fault coverage: detection below is independent of case - // bookkeeping, so if a closed case concerned a disk that is genuinely - // unhealthy and in service, a fresh, well-formed case is opened in this - // same pass. - for (case_id, reason) in uninterpretable { - builder - .cases - .case_mut(&case_id) - .expect("case_id came from builder's open cases") - .close(format!("cannot interpret case: {reason}")); - } - - // Inverse index: which parent case is about which disk. Cases are - // per-disk, so a disk with two parent cases is pathological. `parent_cases` - // iterates ascending by CaseUuid, so the first case we see for a disk is - // the lowest-ID one: keep it, and close any later case for the same disk as - // a duplicate. (A half-maintained duplicate would otherwise decay into an - // uninterpretable empty case.) + // Inverse index: which parent case is about which disk. Cases are per-disk, + // so a disk with two parent cases is already pathological. We keep one and + // close the rest as duplicates; which one we keep is arbitrary. + // `parent_cases` iterates ascending by CaseUuid, so we deterministically + // keep the lowest-ID case, but the ID ordering carries no meaning here + // (keeping the oldest would be nicer, but that needs sitrep version numbers + // we don't thread through here). Closing the duplicate matters: a + // half-maintained one would otherwise decay into an uninterpretable empty + // case. let mut case_by_disk: BTreeMap< PhysicalDiskUuid, (CaseUuid, &ParsedDiskCase), @@ -278,7 +270,11 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { has_match = true; } else { // Stale observation; drop it. - case_mut.remove_fact(fact.fact_id); + case_mut.remove_fact( + fact.fact_id, + "stale zpool observation, superseded by current \ + inventory", + ); } } if has_match { From 5b48c0821ca52fb000bedec78e5c36a72997dbeb Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 16 Jun 2026 10:01:43 -0700 Subject: [PATCH 29/30] [fm] saga diagnoser: apply lessons from disk diagnoser (#10541) Mirror the disk diagnoser cleanups in the saga engine, now that both share the FactMetadata/payload split: - summarize_case -> parse_case (it validates and rejects, not just condenses) and ParentSagaCase -> ParsedSagaCase to match. - observed -> observed_sagas. - Close uninterpretable cases inline in the parse loop instead of collecting them into a vec and looping again. - Build case_for_saga in a single pass with the Entry API, closing duplicate cases inline; reframe the tie-break comment as arbitrary-but-deterministic rather than 'correct'. - Separate case-closing from fact reconciliation: the first pass over surviving cases is now closing-only, and the second pass owns all of a saga's fact state (drop duplicate + stale facts, add fresh) in one place. A new covered() helper folds the three per-kind match arms. - FmFactSaga::from_sitrep takes (metadata, payload) and destructures FactMetadata exhaustively, so a new field fails to compile until it is mapped to a column. - Tests: replace the (Fact, SagaFact) tuple from saga_facts with a named SagaFactRef, and extract the repeated sole-fact lookup into a sole_saga_fact_id helper. --- nexus/db-model/src/fm/fact_saga.rs | 14 +- nexus/db-queries/src/db/datastore/fm.rs | 5 +- nexus/fm/src/diagnosis/saga.rs | 443 +++++++++++------------- 3 files changed, 215 insertions(+), 247 deletions(-) diff --git a/nexus/db-model/src/fm/fact_saga.rs b/nexus/db-model/src/fm/fact_saga.rs index 077f7021e29..e206f1d3e3a 100644 --- a/nexus/db-model/src/fm/fact_saga.rs +++ b/nexus/db-model/src/fm/fact_saga.rs @@ -16,6 +16,7 @@ use crate::impl_enum_type; use chrono::{DateTime, Utc}; use nexus_db_schema::schema::fm_fact_saga; use nexus_types::fm; +use nexus_types::fm::case::FactMetadata; use nexus_types::fm::{ FactPayload, SagaAbandonedFactPayload, SagaFact, SagaNotProgressingFactPayload, SagaOwnerNotCurrentFactPayload, @@ -127,20 +128,23 @@ pub struct FmFactSaga { } impl FmFactSaga { - /// Build a row from a fact's shared metadata (`fact`) and its + /// Build a row from a fact's shared metadata (`metadata`) and its /// already-dispatched saga payload (`saga_fact`). pub fn from_sitrep( sitrep_id: impl Into>, case_id: impl Into>, - fact: &fm::case::Fact, + metadata: &FactMetadata, saga_fact: &SagaFact, ) -> Self { + // Destructure exhaustively: a new `FactMetadata` field will fail to + // compile here until it is mapped to a column. + let FactMetadata { id, created_sitrep_id, comment } = metadata; let base = Self { - id: fact.metadata.id.into(), + id: (*id).into(), sitrep_id: sitrep_id.into(), case_id: case_id.into(), - created_sitrep_id: fact.metadata.created_sitrep_id.into(), - comment: fact.metadata.comment.clone(), + created_sitrep_id: (*created_sitrep_id).into(), + comment: comment.clone(), saga_id: saga_fact.saga_id().0, kind: FmFactSagaKind::NotProgressing, saga_state: None, diff --git a/nexus/db-queries/src/db/datastore/fm.rs b/nexus/db-queries/src/db/datastore/fm.rs index 571fe8d84fa..49b8264bcba 100644 --- a/nexus/db-queries/src/db/datastore/fm.rs +++ b/nexus/db-queries/src/db/datastore/fm.rs @@ -883,7 +883,10 @@ impl DataStore { } fm::FactPayload::Saga(saga_fact) => { saga_facts.push(model::fm::FmFactSaga::from_sitrep( - sitrep_id, case_id, fact, saga_fact, + sitrep_id, + case_id, + &fact.metadata, + saga_fact, )); } } diff --git a/nexus/fm/src/diagnosis/saga.rs b/nexus/fm/src/diagnosis/saga.rs index 0507c4354df..be3421cbd9e 100644 --- a/nexus/fm/src/diagnosis/saga.rs +++ b/nexus/fm/src/diagnosis/saga.rs @@ -36,6 +36,7 @@ use nexus_types::observed_saga::{ }; use omicron_uuid_kinds::{CaseUuid, FactUuid}; use std::collections::BTreeMap; +use std::collections::btree_map::Entry; /// A saga is flagged as "not progressing" once it has recorded no node event /// for at least this long. This is a wall-clock, cadence-independent quantity @@ -43,10 +44,10 @@ use std::collections::BTreeMap; /// passes. const STALE_SAGA_THRESHOLD: TimeDelta = TimeDelta::minutes(30); -/// Per-case view of a parent saga case, built from its facts. Every fact on a -/// saga case is about the same `saga_id`, and a case carries at most one fact -/// of each kind. -struct ParentSagaCase { +/// A parent-forwarded Saga case, parsed into the form this engine acts on. +/// Every fact on a saga case is about the same `saga_id`, and a case carries +/// at most one fact of each kind. +struct ParsedSagaCase { saga_id: steno::SagaId, /// The fact to consider when advancing the case: at most one per kind /// (the lowest fact UUID wins if a case pathologically carries several). @@ -76,11 +77,9 @@ enum UninterpretableCase { NoFacts, } -/// Summarize one parent-forwarded Saga case, or explain why it cannot be -/// interpreted. -fn summarize_case( - case: &fm::Case, -) -> Result { +/// Parse one parent-forwarded Saga case into a [`ParsedSagaCase`], or explain +/// why it cannot be interpreted. +fn parse_case(case: &fm::Case) -> Result { let mut saga_id: Option = None; let mut not_progressing: Option<(FactUuid, SagaNotProgressingFactPayload)> = None; @@ -133,7 +132,7 @@ fn summarize_case( let Some(saga_id) = saga_id else { return Err(UninterpretableCase::NoFacts); }; - Ok(ParentSagaCase { + Ok(ParsedSagaCase { saga_id, not_progressing, owner_not_current, @@ -149,167 +148,116 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { // and reproducible in tests, matching the physical-disk engine's use of // inventory timestamps. let reference_time = input.inventory().time_done; - let observed = input.observed_sagas(); - - // Index parent-forwarded Saga cases by case ID, and maintain a saga_id -> - // case_id index for the second pass. Every case is about one saga, - // derived from its facts. - let mut parent_cases: BTreeMap = BTreeMap::new(); - let mut case_for_saga: BTreeMap = BTreeMap::new(); - let mut uninterpretable = Vec::<(CaseUuid, UninterpretableCase)>::new(); + let observed_sagas = input.observed_sagas(); + + // Parse the Saga cases copied forward from the parent sitrep. Every case + // is about one saga, derived from its facts. Cases we cannot interpret are + // closed inline, so they don't ride along as open-but-unprocessable in + // every future sitrep. This is safe with respect to fault coverage: + // detection below is independent of case bookkeeping, so if a closed case + // concerned a saga that genuinely needs attention, a fresh, well-formed + // case is opened in this same pass. + let mut parent_cases: BTreeMap = BTreeMap::new(); for case in input .open_cases() .iter() .filter(|c| c.metadata.de == DiagnosisEngineKind::Saga) { - let summary = match summarize_case(case) { - Ok(summary) => summary, + match parse_case(case) { + Ok(parsed_case) => { + if !parsed_case.duplicate_facts.is_empty() { + slog::warn!( + &builder.log, + "Saga case has more than one fact of the same kind; \ + the duplicates will be removed"; + "case_id" => %case.id, + "duplicate_fact_ids" => ?parsed_case.duplicate_facts, + ); + } + parent_cases.insert(case.id, parsed_case); + } Err(reason) => { + builder + .cases + .case_mut(&case.id) + .expect("case_id came from builder's open cases") + .close(format!("cannot interpret case: {reason}")); + } + } + } + + // Inverse index: which parent case is about which saga. Cases are per-saga, + // so a saga with two parent cases is already pathological. We keep one and + // close the rest as duplicates; which one we keep is arbitrary. + // `parent_cases` iterates ascending by CaseUuid, so we deterministically + // keep the lowest-ID case, but the ID ordering carries no meaning here. + // Closing the duplicate matters: a half-maintained one would otherwise + // decay into an uninterpretable empty case. + let mut case_for_saga: BTreeMap = BTreeMap::new(); + for (case_id, parsed_case) in &parent_cases { + match case_for_saga.entry(parsed_case.saga_id) { + Entry::Vacant(slot) => { + slot.insert(*case_id); + } + Entry::Occupied(kept) => { + let kept_case_id = *kept.get(); slog::warn!( &builder.log, - "closing uninterpretable Saga case"; - "case_id" => %case.id, - "reason" => %reason, + "closing duplicate Saga case"; + "case_id" => %case_id, + "kept_case_id" => %kept_case_id, + "saga_id" => %parsed_case.saga_id, ); - uninterpretable.push((case.id, reason)); - continue; + builder + .cases + .case_mut(case_id) + .expect("case_id came from builder's open cases") + .close(format!( + "duplicate of case {kept_case_id} for saga {}", + parsed_case.saga_id, + )); } - }; - if !summary.duplicate_facts.is_empty() { - slog::warn!( - &builder.log, - "Saga case has more than one fact of the same kind; \ - the duplicates will be removed"; - "case_id" => %case.id, - "duplicate_fact_ids" => ?summary.duplicate_facts, - ); } - // Cases iterate in UUID order, so the kept case for a saga is - // deterministically the one with the lowest case UUID. - case_for_saga.entry(summary.saga_id).or_insert(case.id); - parent_cases.insert(case.id, summary); } - // Close the cases we couldn't interpret, so they don't ride along as - // open-but-unprocessable in every future sitrep. This is safe with - // respect to fault coverage: detection below is independent of case - // bookkeeping, so if a closed case concerned a saga that genuinely - // needs attention, a fresh, well-formed case is opened in this same - // pass. - for (case_id, reason) in uninterpretable { - builder + // Close the surviving parent case for any saga that has reached a terminal + // state (no longer observed) or has fully recovered (no condition holds + // anymore). A still-problematic saga's facts are reconciled in the next + // loop, which owns all fact state for the saga. + for (saga_id, case_id) in &case_for_saga { + let mut case_mut = builder .cases - .case_mut(&case_id) - .expect("case came from builder.input()'s open cases") - .close(format!("cannot interpret case: {reason}")); - } - - // Close duplicate cases: a saga with two parent cases is pathological; - // the lowest case ID is kept and maintained below. (A half-maintained - // duplicate would otherwise decay into an uninterpretable empty case.) - for (case_id, summary) in &parent_cases { - let kept_case_id = case_for_saga[&summary.saga_id]; - if *case_id != kept_case_id { - slog::warn!( - &builder.log, - "closing duplicate Saga case"; - "case_id" => %case_id, - "kept_case_id" => %kept_case_id, - "saga_id" => %summary.saga_id, - ); - builder - .cases - .case_mut(case_id) - .expect("case came from builder.input()'s open cases") - .close(format!( - "duplicate of case {kept_case_id} for saga {}", - summary.saga_id, - )); - } - } - - // First pass: for each saga's surviving parent case, close it if its - // saga has reached a terminal state (no longer observed) or has fully - // recovered (no condition holds anymore), otherwise drop any facts whose - // recorded contents no longer match the current observation. The second - // pass re-adds a fresh fact if the condition still holds. - for case_id in case_for_saga.values() { - let summary = &parent_cases[case_id]; - let mut case_mut = builder.cases.case_mut(case_id).expect( - "builder.cases is seeded from the open cases of builder.input(), \ - which is where this case_id came from", - ); - let Some(obs) = observed.get(&summary.saga_id) else { - case_mut.close(format!( - "saga {} completed or was removed", - summary.saga_id, - )); + .case_mut(case_id) + .expect("case_id came from builder's open cases"); + let Some(obs) = observed_sagas.get(saga_id) else { + case_mut.close(format!("saga {saga_id} completed or was removed")); continue; }; - let desired_np = desired_not_progressing(obs, reference_time); - let desired_owner = desired_owner_not_current(obs); - let desired_abandoned = desired_abandoned(obs); // A case is an episode of a problem, not a dossier on the saga: when - // no condition holds anymore, the episode is over and the case - // closes. Its facts stay attached as the record of why it existed; - // they age out with the case once it stops being copied forward. If - // the saga becomes a problem again later, a fresh case opens. + // no condition holds anymore, the episode is over and the case closes. + // Its facts stay attached as the record of why it existed; they age + // out with the case once it stops being copied forward. If the saga + // becomes a problem again later, a fresh case opens. // // An abandoned saga never reaches this close: `desired_abandoned` - // holds until the saga row itself is removed (the `else` branch - // above), keeping the case open while remediation is pending. - if desired_np.is_none() - && desired_owner.is_none() - && desired_abandoned.is_none() + // holds until the saga row itself is removed (the `else` branch above), + // keeping the case open while remediation is pending. + if desired_not_progressing(obs, reference_time).is_none() + && desired_owner_not_current(obs).is_none() + && desired_abandoned(obs).is_none() { case_mut.close(format!( - "saga {} is progressing under a current owner again", - summary.saga_id, + "saga {saga_id} is progressing under a current owner again", )); - continue; - } - // Duplicate facts carry no information the kept facts don't; remove - // them regardless of what the observation says. - for fact_id in &summary.duplicate_facts { - case_mut.remove_fact( - *fact_id, - "duplicate fact of the same kind on the case", - ); - } - if let Some((fact_id, payload)) = &summary.not_progressing { - if desired_np.as_ref() != Some(payload) { - case_mut.remove_fact( - *fact_id, - "stale NotProgressing observation, superseded by current \ - saga state", - ); - } - } - if let Some((fact_id, payload)) = &summary.owner_not_current { - if desired_owner.as_ref() != Some(payload) { - case_mut.remove_fact( - *fact_id, - "stale OwnerNotCurrentGeneration observation, superseded \ - by current saga state", - ); - } - } - if let Some((fact_id, payload)) = &summary.abandoned { - if desired_abandoned.as_ref() != Some(payload) { - case_mut.remove_fact( - *fact_id, - "stale Abandoned observation, superseded by current saga \ - state", - ); - } } + // Faulty: leave open; the next loop reconciles its facts. } - // Second pass: for each observed saga with a problem, ensure a case exists - // (reusing the parent-forwarded one if any) and add a fresh fact for each - // condition that isn't already represented by a matching, carried-forward - // fact. - for obs in observed.iter() { + // For each observed saga with a problem, ensure its case carries exactly + // the facts matching the current observation: reuse the parent-forwarded + // case if any (dropping duplicate and stale facts), otherwise open a fresh + // case. This loop owns all fact state for a saga. + for obs in observed_sagas.iter() { let desired_np = desired_not_progressing(obs, reference_time); let desired_owner = desired_owner_not_current(obs); let desired_abandoned = desired_abandoned(obs); @@ -320,31 +268,15 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { continue; } - let parent = case_for_saga.get(&obs.saga_id).and_then(|case_id| { - parent_cases.get(case_id).map(|s| (*case_id, s)) - }); - - // A carried-forward fact already covers a condition only if its - // recorded payload exactly matches what we'd emit now (otherwise the - // first pass removed it). - let np_already = matches!( - (&desired_np, parent.and_then(|(_, s)| s.not_progressing.as_ref())), - (Some(want), Some((_, have))) if want == have - ); - let owner_already = matches!( - ( - &desired_owner, - parent.and_then(|(_, s)| s.owner_not_current.as_ref()), - ), - (Some(want), Some((_, have))) if want == have - ); - let abandoned_already = matches!( - (&desired_abandoned, parent.and_then(|(_, s)| s.abandoned.as_ref())), - (Some(want), Some((_, have))) if want == have - ); + let parent = case_for_saga + .get(&obs.saga_id) + .map(|case_id| (*case_id, &parent_cases[case_id])); - let case_id = match parent { - Some((case_id, _)) => case_id, + let mut case_mut = match parent { + Some((case_id, _)) => builder + .cases + .case_mut(&case_id) + .expect("case_id came from builder's open cases"), None => { let mut new_case = builder.cases.open_case(DiagnosisEngineKind::Saga); @@ -352,43 +284,78 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { "saga {} ({}) needs attention", obs.saga_id, obs.saga_name, )); - new_case.id + new_case } }; - if let Some(payload) = desired_np { - if !np_already { + // Duplicate facts carry no information the kept fact doesn't; remove + // them regardless of what the observation says. + if let Some((_, parsed_case)) = parent { + for fact_id in &parsed_case.duplicate_facts { + case_mut.remove_fact( + *fact_id, + "duplicate fact of the same kind on the case", + ); + } + } + + // NotProgressing: keep the carried fact if it already records exactly + // what we'd emit now; otherwise drop the stale one (if any) and add a + // fresh one. + let carried_np = parent.and_then(|(_, p)| p.not_progressing.as_ref()); + if carried_np.map(|(_, p)| p) != desired_np.as_ref() { + if let Some((fact_id, _)) = carried_np { + case_mut.remove_fact( + *fact_id, + "NotProgressing fact no longer matches the current saga \ + state", + ); + } + if let Some(payload) = desired_np { let staleness = reference_time .signed_duration_since(payload.last_event_time); let comment = format!( "no saga node event in {}", omicron_common::format_time_delta(staleness), ); - builder - .cases - .case_mut(&case_id) - .expect("case_id came from this fn") - .add_fact(SagaFact::NotProgressing(payload), comment); + case_mut.add_fact(SagaFact::NotProgressing(payload), comment); } } - if let Some(payload) = desired_owner { - if !owner_already { + + // OwnerNotCurrentGeneration: same reconciliation. + let carried_owner = + parent.and_then(|(_, p)| p.owner_not_current.as_ref()); + if carried_owner.map(|(_, p)| p) != desired_owner.as_ref() { + if let Some((fact_id, _)) = carried_owner { + case_mut.remove_fact( + *fact_id, + "OwnerNotCurrentGeneration fact no longer matches the \ + current saga state", + ); + } + if let Some(payload) = desired_owner { let comment = format!( "owned by non-current Nexus {} ({:?})", payload.current_sec, payload.orphan_reason, ); - builder - .cases - .case_mut(&case_id) - .expect("case_id came from this fn") - .add_fact( - SagaFact::OwnerNotCurrentGeneration(payload), - comment, - ); + case_mut.add_fact( + SagaFact::OwnerNotCurrentGeneration(payload), + comment, + ); } } - if let Some(payload) = desired_abandoned { - if !abandoned_already { + + // Abandoned: same reconciliation. The payload is pure identity, so it + // only ever drops when the condition clears, never on a value change. + let carried_abandoned = parent.and_then(|(_, p)| p.abandoned.as_ref()); + if carried_abandoned.map(|(_, p)| p) != desired_abandoned.as_ref() { + if let Some((fact_id, _)) = carried_abandoned { + case_mut.remove_fact( + *fact_id, + "Abandoned fact no longer matches the current saga state", + ); + } + if let Some(payload) = desired_abandoned { // The payload is pure identity; the human-readable context // (which a promoted problem would otherwise look up from the // saga row) goes in the comment. @@ -402,11 +369,7 @@ pub(super) fn analyze(builder: &mut SitrepBuilder<'_>) -> anyhow::Result<()> { .map(|t| t.to_string()) .unwrap_or_else(|| "".to_string()), ); - builder - .cases - .case_mut(&case_id) - .expect("case_id came from this fn") - .add_fact(SagaFact::Abandoned(payload), comment); + case_mut.add_fact(SagaFact::Abandoned(payload), comment); } } } @@ -570,11 +533,17 @@ mod tests { builder.build(OmicronZoneUuid::new_v4(), Utc::now()) } - /// Collect every saga fact in the sitrep, optionally only on open cases. - fn saga_facts( - sitrep: &Sitrep, - open_only: bool, - ) -> Vec<(fm::case::Fact, SagaFact)> { + /// A saga fact found in a sitrep, paired with its decoded [`SagaFact`] + /// payload. + #[derive(Debug)] + struct SagaFactRef<'a> { + fact: &'a fm::case::Fact, + saga_fact: SagaFact, + } + + /// Collect every saga fact in the sitrep, with its decoded payload. + /// Optionally filtered to open cases only. + fn saga_facts(sitrep: &Sitrep, open_only: bool) -> Vec> { sitrep .cases .iter() @@ -582,12 +551,22 @@ mod tests { .filter(|c| !open_only || c.is_open()) .flat_map(|c| { c.facts.iter().filter_map(|f| { - f.payload.as_saga().map(|s| (f.clone(), s.clone())) + f.payload + .as_saga() + .map(|s| SagaFactRef { fact: f, saga_fact: s.clone() }) }) }) .collect() } + /// The fact UUID of the one saga fact on the one saga case in `sitrep`. + /// Panics unless there is exactly one. + fn sole_saga_fact_id(sitrep: &Sitrep) -> FactUuid { + let facts = saga_facts(sitrep, false); + assert_eq!(facts.len(), 1, "expected exactly one saga fact"); + facts[0].fact.metadata.id + } + /// Make a `Fact` carrying the given saga payload. fn mk_fact( parent_sitrep_id: SitrepUuid, @@ -703,7 +682,7 @@ mod tests { let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); - match &facts[0].1 { + match &facts[0].saga_fact { SagaFact::NotProgressing(p) => { assert_eq!(p.saga_id, id); assert_eq!(p.last_event_time, stale); @@ -745,7 +724,7 @@ mod tests { let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); - match &facts[0].1 { + match &facts[0].saga_fact { SagaFact::OwnerNotCurrentGeneration(p) => { assert_eq!(p.current_sec, sec); assert_eq!(p.orphan_reason, OrphanedReason::Quiesced); @@ -816,10 +795,12 @@ mod tests { .collect(); assert_eq!(open_cases.len(), 1); assert!( - facts.iter().any(|(_, f)| matches!(f, SagaFact::NotProgressing(_))) + facts + .iter() + .any(|fr| matches!(&fr.saga_fact, SagaFact::NotProgressing(_))) ); - assert!(facts.iter().any(|(_, f)| matches!( - f, + assert!(facts.iter().any(|fr| matches!( + &fr.saga_fact, SagaFact::OwnerNotCurrentGeneration(p) if p.orphan_reason == OrphanedReason::Expunged ))); @@ -875,17 +856,7 @@ mod tests { inv_id, [SagaFact::NotProgressing(payload.clone())], ); - let parent_fact_id = parent - .cases - .iter() - .next() - .unwrap() - .facts - .iter() - .next() - .unwrap() - .metadata - .id; + let parent_fact_id = sole_saga_fact_id(&parent); // Observed saga matches the parent fact exactly (same last_event_time, // same state). let observed = observed_map([ObservedSaga { @@ -903,7 +874,7 @@ mod tests { let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); assert_eq!( - facts[0].0.metadata.id, parent_fact_id, + facts[0].fact.metadata.id, parent_fact_id, "fact UUID should be stable when the observation is unchanged", ); logctx.cleanup_successful(); @@ -928,17 +899,7 @@ mod tests { last_event_time: old, })], ); - let parent_fact_id = parent - .cases - .iter() - .next() - .unwrap() - .facts - .iter() - .next() - .unwrap() - .metadata - .id; + let parent_fact_id = sole_saga_fact_id(&parent); // Still stale, but last_event_time advanced. let observed = observed_map([ObservedSaga { saga_id: id, @@ -955,10 +916,10 @@ mod tests { let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); assert_ne!( - facts[0].0.metadata.id, parent_fact_id, + facts[0].fact.metadata.id, parent_fact_id, "fact UUID should rotate when last_event_time changes", ); - match &facts[0].1 { + match &facts[0].saga_fact { SagaFact::NotProgressing(p) => { assert_eq!(p.last_event_time, new) } @@ -1116,11 +1077,11 @@ mod tests { "only the owner fact should remain on the open case", ); assert_eq!( - facts[0].0.metadata.id, parent_owner_fact_id, + facts[0].fact.metadata.id, parent_owner_fact_id, "the persisting fact carries forward with a stable UUID", ); assert!(matches!( - &facts[0].1, + &facts[0].saga_fact, SagaFact::OwnerNotCurrentGeneration(p) if p.current_sec == sec )); logctx.cleanup_successful(); @@ -1197,7 +1158,7 @@ mod tests { let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1, "the duplicate fact should be removed"); assert_eq!( - facts[0].0.metadata.id, kept_id, + facts[0].fact.metadata.id, kept_id, "the kept fact matches the observation, so its UUID is stable", ); logctx.cleanup_successful(); @@ -1252,14 +1213,14 @@ mod tests { "both parent facts should be removed and one fresh fact added", ); assert_ne!( - facts[0].0.metadata.id, kept_id, + facts[0].fact.metadata.id, kept_id, "the stale kept fact was removed" ); assert_ne!( - facts[0].0.metadata.id, dup_id, + facts[0].fact.metadata.id, dup_id, "the duplicate was removed unconditionally", ); - match &facts[0].1 { + match &facts[0].saga_fact { SagaFact::NotProgressing(p) => { assert_eq!(p.last_event_time, current); } @@ -1310,7 +1271,7 @@ mod tests { let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); - match &facts[0].1 { + match &facts[0].saga_fact { SagaFact::Abandoned(p) => assert_eq!(p.saga_id, id), other => panic!("expected Abandoned, got {other:?}"), } @@ -1353,7 +1314,7 @@ mod tests { assert!(case.is_open(), "abandonment must not close the case"); let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1, "the NotProgressing fact is superseded"); - match &facts[0].1 { + match &facts[0].saga_fact { SagaFact::Abandoned(p) => assert_eq!(p.saga_id, id), other => panic!("expected Abandoned, got {other:?}"), } @@ -1384,7 +1345,7 @@ mod tests { let facts = saga_facts(&sitrep, true); assert_eq!(facts.len(), 1); assert_eq!( - facts[0].0.metadata.id, parent_fact_id, + facts[0].fact.metadata.id, parent_fact_id, "the Abandoned fact UUID is stable across sitreps", ); logctx.cleanup_successful(); @@ -1582,7 +1543,7 @@ mod tests { 1, "expected exactly one open Saga fact (on the replacement case)", ); - match &open[0].1 { + match &open[0].saga_fact { SagaFact::NotProgressing(p) => { assert_eq!(p.saga_id, stuck_saga); assert_eq!(p.last_event_time, last_event); From 0604e5e48d43a5682f6fd074f07bff2ec37270f5 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 16 Jun 2026 16:34:14 -0700 Subject: [PATCH 30/30] [fm] omdb test: redact racy fm_sitrep_gc orphan counts The fm_sitrep_gc task's "orphaned sitreps deleted" and "orphaned fm_sitrep_analysis_report rows deleted" counts are timing-dependent. When two fm_analysis activations overlap (e.g. the boot-time activation and the explicit drive in the test), both can insert a first sitrep before either is made current; the loser's sitrep and its stashed analysis report are inserted but orphaned (fm_sitrep_insert's ParentNotCurrent path), then deleted by a later GC pass. Whether that race occurred before the omdb snapshot is non-deterministic, which made test_omdb_success_cases flaky. Redact both counts. --- dev-tools/omdb/tests/successes.out | 8 ++++---- dev-tools/omdb/tests/test_all_output.rs | 13 +++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index a4c8c0886d1..ee1f67b7393 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -755,7 +755,7 @@ task: "fm_sitrep_gc" last completed activation: , triggered by started at (s ago) and ran for ms batch size: 1000 - orphaned sitreps deleted: 0 + orphaned sitreps deleted: batches: 1 orphaned fm_alert_request rows deleted: 0 batches: 1 @@ -765,7 +765,7 @@ task: "fm_sitrep_gc" batches: 1 orphaned fm_fact_physical_disk rows deleted: 0 batches: 1 - orphaned fm_sitrep_analysis_report rows deleted: 0 + orphaned fm_sitrep_analysis_report rows deleted: batches: 1 orphaned fm_support_bundle_request rows deleted: 0 batches: 1 @@ -1453,7 +1453,7 @@ task: "fm_sitrep_gc" last completed activation: , triggered by started at (s ago) and ran for ms batch size: 1000 - orphaned sitreps deleted: 0 + orphaned sitreps deleted: batches: 1 orphaned fm_alert_request rows deleted: 0 batches: 1 @@ -1463,7 +1463,7 @@ task: "fm_sitrep_gc" batches: 1 orphaned fm_fact_physical_disk rows deleted: 0 batches: 1 - orphaned fm_sitrep_analysis_report rows deleted: 0 + orphaned fm_sitrep_analysis_report rows deleted: batches: 1 orphaned fm_support_bundle_request rows deleted: 0 batches: 1 diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 625ccef5f57..777a9c6047f 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -389,6 +389,19 @@ async fn test_omdb_success_cases() { r" --> (same collection as parent sitrep|different from parent sitrep \(collection [-a-f0-9]+\))", ); + // The `fm_sitrep_gc` task's orphan counts are racy. When two `fm_analysis` + // activations overlap (e.g. the boot-time activation and the explicit drive + // above), both can insert a first sitrep before either is made current; the + // loser's sitrep and its stashed analysis report are inserted but orphaned + // (see `DataStore::fm_sitrep_insert`'s `ParentNotCurrent` path), and a + // later GC pass deletes them. Whether that race happened before this + // snapshot is timing-dependent, so redact both counts. Other child tables + // stay zero: with no faults, orphaned sitreps carry no cases, facts, + // ereports, or bundles. + redactor + .field("orphaned sitreps deleted:", r"\d+") + .field("orphaned fm_sitrep_analysis_report rows deleted:", r"\d+"); + // The `sp_ereport_ingester` task's output depends on how many simulated // sled agents ahppen to register with Nexus before its first execution. // These redactions work around the issue described in