diff --git a/bazel/rules/rules_score/README.md b/bazel/rules/rules_score/README.md index a4fc0092..ae0cce81 100644 --- a/bazel/rules/rules_score/README.md +++ b/bazel/rules/rules_score/README.md @@ -90,7 +90,8 @@ architectural_design( **`bazel build`** — runs `puml_parser` on every `.puml` file, producing: - a `.fbs.bin` FlatBuffers binary (diagram AST) — consumed by validation/core checks - a `.lobster` traceability file (Interface elements only) — consumed by LOBSTER -- a `plantuml_links.json` — consumed by the `clickable_plantuml` Sphinx extension +- a `.idmap.json` sidecar — consumed by the `clickable_plantuml` Sphinx extension + to resolve cross-diagram links based on element *defines/references* roles Diagrams in `public_api` are classified separately so their lobster items flow through `public_api_lobster_files` for failure-mode traceability. diff --git a/bazel/rules/rules_score/docs/tooling_architecture.rst b/bazel/rules/rules_score/docs/tooling_architecture.rst index 4838664d..22e73eea 100644 --- a/bazel/rules/rules_score/docs/tooling_architecture.rst +++ b/bazel/rules/rules_score/docs/tooling_architecture.rst @@ -88,13 +88,14 @@ are rendered under :doc:`tool_reference/index`. - Converts RST requirement directives (``feat_req``, ``comp_req``, …) into ``.trlc`` records so requirements can be authored in either RST or TRLC. * - **PlantUML Parser** - - ``@score_tooling//plantuml/parser:parser`` (Rust) + ``:linker`` + - ``@score_tooling//plantuml/parser:parser`` (Rust) - ``architectural_design``, ``unit_design`` - Parses ``.puml`` diagrams into a FlatBuffers AST (``.fbs.bin``, one - ``root_type`` per diagram kind) and extracts interface ``.lobster`` - items. The **linker** merges the FlatBuffers into ``plantuml_links.json`` - for the ``clickable_plantuml`` Sphinx extension. Rejects syntactically - invalid diagrams with a non-zero exit code. + ``root_type`` per diagram kind), extracts interface ``.lobster`` items, + and emits ``.idmap.json`` sidecars recording the *defines/references* + roles of each element. The ``clickable_plantuml`` Sphinx extension reads + these sidecars to resolve cross-diagram links without a separate linker + step. Rejects syntactically invalid diagrams with a non-zero exit code. * - **safety_analysis_tools** - ``//bazel/rules/rules_score:safety_analysis_tools`` (``src/safety_analysis_tools.py``, local) diff --git a/bazel/rules/rules_score/private/architectural_design.bzl b/bazel/rules/rules_score/private/architectural_design.bzl index 43dfe4e9..c094e0e6 100644 --- a/bazel/rules/rules_score/private/architectural_design.bzl +++ b/bazel/rules/rules_score/private/architectural_design.bzl @@ -31,19 +31,23 @@ load("//bazel/rules/rules_score/private:verbosity.bzl", "VERBOSITY_ATTR", "get_l # ============================================================================ def _run_puml_parser(ctx, puml_file): - """Run the PlantUML parser on a single .puml file to produce a FlatBuffers binary - and a lobster traceability file. + """Run the PlantUML parser on a single .puml file. - The diagram type is auto-detected by the parser and encoded in the - FlatBuffers schema (each diagram type uses its own root_type). - Lobster output is produced in-process for component diagrams. + Produces three output files: + - a FlatBuffers binary (``.fbs.bin``), + - a LOBSTER traceability file (``.lobster``), and + - an idmap sidecar (``.idmap.json``) used by the + ``clickable_plantuml`` Sphinx extension to resolve cross-diagram links. + + ``puml_file.short_path`` (workspace-relative) is passed as ``--source-name`` + so the idmap ``source`` field is a stable, path-unique identifier. Args: ctx: Rule context puml_file: The .puml File object to parse Returns: - Tuple of (fbs_output, lobster_output) declared output Files. + Tuple of (fbs_output, lobster_output, idmap_output) declared output Files. """ file_stem = puml_file.basename.rsplit(".", 1)[0] fbs_output = ctx.actions.declare_file( @@ -52,25 +56,32 @@ def _run_puml_parser(ctx, puml_file): lobster_output = ctx.actions.declare_file( "{}/{}.lobster".format(ctx.label.name, file_stem), ) + idmap_output = ctx.actions.declare_file( + "{}/{}.idmap.json".format(ctx.label.name, file_stem), + ) ctx.actions.run( inputs = [puml_file], - outputs = [fbs_output, lobster_output], + outputs = [fbs_output, lobster_output, idmap_output], executable = ctx.executable._puml_parser, arguments = [ "--file", puml_file.path, + "--source-name", + puml_file.short_path, "--fbs-output-dir", fbs_output.dirname, "--lobster-output-dir", lobster_output.dirname, + "--idmap-output-dir", + idmap_output.dirname, "--log-level", get_log_level(ctx), ], progress_message = "Parsing PlantUML diagram: %s" % puml_file.short_path, ) - return fbs_output, lobster_output + return fbs_output, lobster_output, idmap_output def _parse_puml_diagrams(ctx, files): """Run the PlantUML parser on all .puml/.plantuml files in a list. @@ -80,16 +91,18 @@ def _parse_puml_diagrams(ctx, files): files: List of File objects Returns: - Tuple of (fbs_outputs, lobster_outputs) lists of generated Files. + Tuple of (fbs_outputs, lobster_outputs, idmap_outputs) lists of generated Files. """ fbs_outputs = [] lobster_outputs = [] + idmap_outputs = [] for f in files: if f.extension in ("puml", "plantuml"): - fbs, lobster = _run_puml_parser(ctx, f) + fbs, lobster, idmap = _run_puml_parser(ctx, f) fbs_outputs.append(fbs) lobster_outputs.append(lobster) - return fbs_outputs, lobster_outputs + idmap_outputs.append(idmap) + return fbs_outputs, lobster_outputs, idmap_outputs def _architectural_design_impl(ctx): """Implementation for architectural_design rule. @@ -110,46 +123,26 @@ def _architectural_design_impl(ctx): """ # Parse each architectural view separately so each provider field carries - # the flatbuffers for its own category. - static_fbs_list, static_lobster_list = _parse_puml_diagrams(ctx, ctx.files.static) - dynamic_fbs_list, dynamic_lobster_list = _parse_puml_diagrams(ctx, ctx.files.dynamic) - public_api_fbs_list, public_api_lobster_list = _parse_puml_diagrams(ctx, ctx.files.public_api) - internal_api_fbs_list, _internal_api_lobster_list = _parse_puml_diagrams(ctx, ctx.files.internal_api) + # the flatbuffers (and idmap sidecars) for its own category. + static_fbs_list, static_lobster_list, static_idmap_list = _parse_puml_diagrams(ctx, ctx.files.static) + dynamic_fbs_list, dynamic_lobster_list, dynamic_idmap_list = _parse_puml_diagrams(ctx, ctx.files.dynamic) + public_api_fbs_list, public_api_lobster_list, public_api_idmap_list = _parse_puml_diagrams(ctx, ctx.files.public_api) + internal_api_fbs_list, _internal_api_lobster_list, internal_api_idmap_list = _parse_puml_diagrams(ctx, ctx.files.internal_api) static_fbs = depset(static_fbs_list) dynamic_fbs = depset(dynamic_fbs_list) public_api_fbs = depset(public_api_fbs_list) internal_api_fbs = depset(internal_api_fbs_list) public_api_lobster = depset(public_api_lobster_list) + all_idmaps = depset(static_idmap_list + dynamic_idmap_list + public_api_idmap_list + internal_api_idmap_list) # Source files for SphinxSourcesInfo (sphinx documentation pipeline) all_source_files = depset( transitive = [depset(ctx.files.static), depset(ctx.files.dynamic), depset(ctx.files.public_api), depset(ctx.files.internal_api)], ) - # Run the linker on all generated .fbs.bin files to produce a - # plantuml_links.json for the clickable_plantuml Sphinx extension. - all_fbs_files = static_fbs.to_list() + dynamic_fbs.to_list() + public_api_fbs.to_list() + internal_api_fbs.to_list() - plantuml_links_json = ctx.actions.declare_file( - "{}/plantuml_links.json".format(ctx.label.name), - ) - if all_fbs_files: - ctx.actions.run( - inputs = all_fbs_files, - outputs = [plantuml_links_json], - executable = ctx.executable._linker, - arguments = ["--fbs-files"] + [f.path for f in all_fbs_files] + ["--output", plantuml_links_json.path, "--log-level", get_log_level(ctx)], - progress_message = "Generating PlantUML links JSON for %s" % ctx.label.name, - ) - else: - ctx.actions.write( - output = plantuml_links_json, - content = '{"links":[]}', - ) - sphinx_files = depset( - [plantuml_links_json], - transitive = [all_source_files], + transitive = [all_source_files, all_idmaps], ) # Generate a thin RST wrapper for every .puml diagram so it appears as a @@ -164,7 +157,10 @@ def _architectural_design_impl(ctx): sphinx_srcs = depset(rst_wrappers, transitive = [sphinx_files]) return [ - DefaultInfo(files = all_source_files), + # Include idmaps alongside source files so that plain sphinx_module + # consumers (without dependable_element) can pick up idmap sidecars + # simply by adding this target to their srcs. + DefaultInfo(files = depset(transitive = [all_source_files, all_idmaps])), ArchitecturalDesignInfo( static = static_fbs, dynamic = dynamic_fbs, @@ -172,7 +168,8 @@ def _architectural_design_impl(ctx): name = ctx.label.name, public_api_lobster_files = public_api_lobster, ), - # Source diagram files + plantuml_links.json for the sphinx documentation build + # Source diagram files + .idmap.json sidecars for the sphinx documentation build. + # The clickable_plantuml extension reads *.idmap.json to resolve cross-diagram links. SphinxSourcesInfo( srcs = sphinx_srcs, deps = sphinx_srcs, @@ -219,13 +216,7 @@ _architectural_design = rule( default = Label("@score_tooling//plantuml/parser:parser"), executable = True, cfg = "exec", - doc = "PlantUML parser tool that generates FlatBuffers from .puml files", - ), - "_linker": attr.label( - default = Label("@score_tooling//plantuml/parser:linker"), - executable = True, - cfg = "exec", - doc = "Tool that generates plantuml_links.json from FlatBuffers diagram outputs", + doc = "PlantUML parser tool that generates FlatBuffers and .idmap.json from .puml files", ), "_puml_rst_template": attr.label( default = Label("//bazel/rules/rules_score:templates/puml_diagram.template.rst"), diff --git a/plantuml/linker/README.md b/plantuml/linker/README.md deleted file mode 100644 index 710adbc9..00000000 --- a/plantuml/linker/README.md +++ /dev/null @@ -1,12 +0,0 @@ - diff --git a/plantuml/linker/src/main.rs b/plantuml/linker/src/main.rs deleted file mode 100644 index 87ac1c88..00000000 --- a/plantuml/linker/src/main.rs +++ /dev/null @@ -1,266 +0,0 @@ -// ******************************************************************************* -// Copyright (c) 2026 Contributors to the Eclipse Foundation -// -// See the NOTICE file(s) distributed with this work for additional -// information regarding copyright ownership. -// -// This program and the accompanying materials are made available under the -// terms of the Apache License Version 2.0 which is available at -// -// -// SPDX-License-Identifier: Apache-2.0 -// ******************************************************************************* - -//! PlantUML Linker -//! -//! Reads FlatBuffers `.fbs.bin` files produced by the PlantUML parser and -//! generates `plantuml_links.json` for the `clickable_plantuml` Sphinx extension. -//! -//! The tool correlates components across multiple diagrams: when a component -//! alias in diagram A matches a top-level component alias in diagram B, a -//! clickable link is created from A → B. - -use std::collections::HashMap; -use std::fs; - -use clap::{Parser, ValueEnum}; -use env_logger::Builder; - -use component_fbs::component as fb_component; - -// --------------------------------------------------------------------------- -// Log level -// --------------------------------------------------------------------------- - -/// CLI-visible log level (mirrors the parser's convention). -#[derive(Copy, Clone, ValueEnum, Debug)] -enum CliLogLevel { - Error, - Warn, - Info, - Debug, - Trace, -} - -impl CliLogLevel { - fn to_level_filter(self) -> log::LevelFilter { - match self { - CliLogLevel::Error => log::LevelFilter::Error, - CliLogLevel::Warn => log::LevelFilter::Warn, - CliLogLevel::Info => log::LevelFilter::Info, - CliLogLevel::Debug => log::LevelFilter::Debug, - CliLogLevel::Trace => log::LevelFilter::Trace, - } - } -} - -// --------------------------------------------------------------------------- -// CLI -// --------------------------------------------------------------------------- - -#[derive(Parser, Debug)] -#[command(name = "linker")] -#[command(version = "1.0")] -#[command( - about = "Generate plantuml_links.json from FlatBuffers diagram outputs", - long_about = "Reads .fbs.bin files from the PlantUML parser and produces a \ - plantuml_links.json file mapping component aliases to their \ - detailed diagrams for the clickable_plantuml Sphinx extension." -)] -struct Args { - /// FlatBuffers binary files to process (.fbs.bin) - #[arg(long, num_args = 1..)] - fbs_files: Vec, - - /// Output JSON file path - #[arg(long, default_value = "plantuml_links.json")] - output: String, - - /// Log level: error, warn, info, debug, trace - #[arg(long, value_enum, default_value = "warn")] - log_level: CliLogLevel, -} - -// --------------------------------------------------------------------------- -// Data model -// --------------------------------------------------------------------------- - -/// A component extracted from a FlatBuffers diagram. -#[derive(Debug)] -struct DiagramComponent { - alias: String, - parent_id: Option, -} - -/// All components from a single diagram file. -#[derive(Debug)] -struct DiagramInfo { - source_file: String, - components: Vec, -} - -/// One entry in the output JSON `links` array. -#[derive(Debug, serde::Serialize)] -struct LinkEntry { - source_file: String, - source_id: String, - target_file: String, -} - -/// Root structure of the output JSON. -#[derive(Debug, serde::Serialize)] -struct LinksJson { - links: Vec, -} - -// --------------------------------------------------------------------------- -// FlatBuffers reading -// --------------------------------------------------------------------------- - -fn read_diagram(path: &str) -> Result { - let data = fs::read(path).map_err(|e| format!("Failed to read {path}: {e}"))?; - - if data.is_empty() { - return Err(format!("Empty file (placeholder): {path}")); - } - - let graph = flatbuffers::root::(&data) - .map_err(|e| format!("Failed to parse FlatBuffer {path}: {e}"))?; - - let source_file = graph - .source_file() - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()) - .ok_or_else(|| format!("Missing source_file in FlatBuffer: {path}"))?; - - let mut components = Vec::new(); - if let Some(entries) = graph.components() { - for entry in entries.iter() { - let Some(comp) = entry.value() else { - continue; - }; - let alias = comp.alias().or(comp.name()).unwrap_or_default().to_string(); - if alias.is_empty() { - continue; - } - components.push(DiagramComponent { - alias, - parent_id: comp.parent_id().map(|s| s.to_string()), - }); - } - } - - Ok(DiagramInfo { - source_file, - components, - }) -} - -// --------------------------------------------------------------------------- -// Link generation -// --------------------------------------------------------------------------- - -/// Build links by matching component aliases across diagrams. -/// -/// For each component alias in diagram A, if a top-level component (no parent) -/// with the same alias exists in diagram B, we create a link: -/// source_file = A, source_id = alias, target_file = B -/// -/// A component is considered "top-level" if its `parent_id` is `None`. -fn generate_links(diagrams: &[DiagramInfo]) -> Vec { - // Index: alias → list of diagrams where that alias is a top-level component - let mut top_level_index: HashMap> = HashMap::new(); - for diagram in diagrams { - for comp in &diagram.components { - if comp.parent_id.is_none() { - top_level_index - .entry(comp.alias.clone()) - .or_default() - .push(&diagram.source_file); - } - } - } - - let mut links = Vec::new(); - - for diagram in diagrams { - for comp in &diagram.components { - if let Some(target_diagrams) = top_level_index.get(&comp.alias) { - for &target_file in target_diagrams { - // Don't link a component to its own diagram. - if target_file == diagram.source_file { - continue; - } - links.push(LinkEntry { - source_file: diagram.source_file.clone(), - source_id: comp.alias.clone(), - target_file: target_file.to_string(), - }); - } - } - } - } - - // Deduplicate: same (source_file, source_id, target_file) may appear - // when a component is nested inside multiple parent scopes. - links.sort_by(|a, b| { - (&a.source_file, &a.source_id, &a.target_file).cmp(&( - &b.source_file, - &b.source_id, - &b.target_file, - )) - }); - links.dedup_by(|a, b| { - a.source_file == b.source_file - && a.source_id == b.source_id - && a.target_file == b.target_file - }); - - // PlantUML supports only one URL per alias — keep the first target - // (alphabetically) for each (source_file, source_id) pair. - links.dedup_by(|a, b| a.source_file == b.source_file && a.source_id == b.source_id); - - links -} - -// --------------------------------------------------------------------------- -// Main -// --------------------------------------------------------------------------- - -fn main() -> Result<(), Box> { - let args = Args::parse(); - Builder::new() - .filter_level(args.log_level.to_level_filter()) - .init(); - - if args.fbs_files.is_empty() { - return Err("No .fbs.bin files provided. Use --fbs-files ...".into()); - } - - let mut diagrams = Vec::new(); - for fbs_path in &args.fbs_files { - match read_diagram(fbs_path) { - Ok(diagram) => { - log::info!( - "Read {} components from {}", - diagram.components.len(), - diagram.source_file - ); - diagrams.push(diagram); - } - Err(e) => { - log::warn!("Skipping {}: {}", fbs_path, e); - } - } - } - - let links = generate_links(&diagrams); - log::info!("Generated {} link(s)", links.len()); - - let output = LinksJson { links }; - let json = serde_json::to_string_pretty(&output)?; - fs::write(&args.output, &json)?; - log::debug!("Written to {}", args.output); - - Ok(()) -} diff --git a/plantuml/parser/BUILD b/plantuml/parser/BUILD index 197ce9f6..afc02c25 100644 --- a/plantuml/parser/BUILD +++ b/plantuml/parser/BUILD @@ -21,9 +21,3 @@ alias( actual = "//plantuml/parser/puml_cli:puml_cli", visibility = ["//visibility:public"], ) - -alias( - name = "linker", - actual = "//plantuml/linker:linker", - visibility = ["//visibility:public"], -) diff --git a/plantuml/parser/puml_cli/BUILD b/plantuml/parser/puml_cli/BUILD index 45169359..905b5559 100644 --- a/plantuml/parser/puml_cli/BUILD +++ b/plantuml/parser/puml_cli/BUILD @@ -18,6 +18,7 @@ rust_binary( crate_root = "src/main.rs", visibility = ["//visibility:public"], deps = [ + "//plantuml/parser/puml_idmap", "//plantuml/parser/puml_lobster", "//plantuml/parser/puml_parser", "//plantuml/parser/puml_resolver", @@ -42,6 +43,7 @@ rust_test( srcs = ["src/main.rs"], crate_root = "src/main.rs", deps = [ + "//plantuml/parser/puml_idmap", "//plantuml/parser/puml_lobster", "//plantuml/parser/puml_parser", "//plantuml/parser/puml_resolver", diff --git a/plantuml/parser/puml_cli/src/main.rs b/plantuml/parser/puml_cli/src/main.rs index 9562ee37..d6931ebd 100644 --- a/plantuml/parser/puml_cli/src/main.rs +++ b/plantuml/parser/puml_cli/src/main.rs @@ -26,6 +26,7 @@ use class_serializer::ClassSerializer; use component_serializer::ComponentSerializer; use sequence_serializer::SequenceSerializer; +use puml_idmap::{write_idmap_to_file, IdMapModel}; use puml_lobster::{write_lobster_to_file, LobsterModel}; use puml_parser::{ DiagramParser, ErrorLocation, Preprocessor, PumlActivityParser, PumlClassParser, @@ -98,6 +99,19 @@ struct Args { /// build output set is always complete. #[arg(long)] lobster_output_dir: Option, + + /// Stable workspace-relative source path to embed in generated outputs. + /// When set, this value is used as the `source_file` identity in the + /// FlatBuffers binary and the idmap JSON (e.g. `score/mw/com/proxy.puml`). + /// When omitted, the basename of the input file is used (legacy behaviour). + #[arg(long)] + source_name: Option, + + /// Output directory for generated `.idmap.json` files (optional). + /// When set, a `.idmap.json` is written for each parsed diagram + /// containing its defined and referenced element identifiers. + #[arg(long)] + idmap_output_dir: Option, } #[derive(Copy, Clone, ValueEnum, Debug)] @@ -151,6 +165,15 @@ fn run() -> Result<(), Box> { None => None, }; + let idmap_output_dir: Option = match &args.idmap_output_dir { + Some(dir) => { + let p = PathBuf::from(dir); + fs::create_dir_all(&p)?; + Some(p) + } + None => None, + }; + let file_list = collect_files_from_args(&args)?; if file_list.is_empty() { @@ -184,9 +207,12 @@ fn run() -> Result<(), Box> { } } - let source_file = path - .file_name() - .and_then(|n| n.to_str()) + // Prefer the stable workspace-relative --source-name when + // provided; fall back to the filesystem basename (legacy). + let source_file: &str = args + .source_name + .as_deref() + .or_else(|| path.file_name().and_then(|n| n.to_str())) .unwrap_or_default(); let fbs_buffer = serialize_resolved_diagram(&logic_result, source_file); if let Some(ref dir) = fbs_output_dir { @@ -202,6 +228,16 @@ fn run() -> Result<(), Box> { }; write_lobster_to_file(lobster_model, path, ldir)?; } + + if let Some(idir) = &idmap_output_dir { + let idmap_model = match &logic_result { + ResolvedDiagram::Component(model) => IdMapModel::Component(model), + ResolvedDiagram::Class(model) => IdMapModel::Class(model), + ResolvedDiagram::Activity(_) => IdMapModel::Empty, + ResolvedDiagram::Sequence(model) => IdMapModel::Sequence(model), + }; + write_idmap_to_file(idmap_model, path, Some(source_file), idir)?; + } } Err(e) => { return Err(format!("Resolve error in {}: {}", path.display(), e).into()); diff --git a/plantuml/linker/BUILD b/plantuml/parser/puml_idmap/BUILD similarity index 61% rename from plantuml/linker/BUILD rename to plantuml/parser/puml_idmap/BUILD index 38337849..5d467f13 100644 --- a/plantuml/linker/BUILD +++ b/plantuml/parser/puml_idmap/BUILD @@ -10,25 +10,19 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_test") -rust_binary( - name = "linker", - srcs = ["src/main.rs"], - crate_root = "src/main.rs", - visibility = ["//visibility:public"], +load("@rules_rust//rust:defs.bzl", "rust_library") + +rust_library( + name = "puml_idmap", + srcs = ["src/lib.rs"], + visibility = ["//plantuml/parser:__subpackages__"], deps = [ - "//tools/serialization/flatbuffers/component:component_fbs", - "@crates//:clap", - "@crates//:env_logger", - "@crates//:flatbuffers", + "//tools/metamodel/class:class_diagram", + "//tools/metamodel/component:component_diagram", + "//tools/metamodel/sequence:sequence_diagram", "@crates//:log", "@crates//:serde", "@crates//:serde_json", ], ) - -rust_test( - name = "linker_test", - crate = ":linker", -) diff --git a/plantuml/parser/puml_idmap/src/lib.rs b/plantuml/parser/puml_idmap/src/lib.rs new file mode 100644 index 00000000..65f9c8d1 --- /dev/null +++ b/plantuml/parser/puml_idmap/src/lib.rs @@ -0,0 +1,269 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +//! Converts the resolved PlantUML logical model into an `.idmap.json` file +//! consumed by the `clickable_plantuml` Sphinx extension. +//! +//! The idmap separates each diagram's elements into two roles: +//! +//! * **defines** – elements that are *elaborated* in this diagram (they have +//! child elements, class members, or this diagram is the detail view). +//! * **references** – leaf mentions and relation endpoints (elements that +//! should link *away* to wherever they are elaborated). +//! +//! This mirrors the structure of `puml_lobster` but produces idmap JSON +//! rather than LOBSTER trace JSON. + +use class_diagram::ClassDiagram; +use component_diagram::LogicComponent; +use sequence_logic::SequenceTree; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::ffi::OsStr; +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; + +// --------------------------------------------------------------------------- +// Data model +// --------------------------------------------------------------------------- + +/// A single element entry in the idmap. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct IdMapEntry { + /// PlantUML alias used in `url of is [[url]]` injection. + pub alias: String, + /// Fully-qualified identifier (FQN) for matching across diagrams. + pub id: String, + /// `true` when this diagram elaborates the element (i.e. it is listed + /// under `defines`). Omitted from the JSON for plain references. + #[serde(default, skip_serializing_if = "is_not_elaborated")] + pub elaborated: bool, +} + +/// `skip_serializing_if` predicate: omit `elaborated` when it is `false`. +fn is_not_elaborated(elaborated: &bool) -> bool { + !*elaborated +} + +/// Root structure of an `.idmap.json` file. +#[derive(Debug, Serialize, Deserialize)] +pub struct IdMapFile { + /// Workspace-relative source path, e.g. `score/mw/com/proxy_detail.puml`. + pub source: String, + /// Elements elaborated (defined) in this diagram. + pub defines: Vec, + /// Elements referenced (leaf/relation endpoint) in this diagram. + pub references: Vec, +} + +// --------------------------------------------------------------------------- +// Model wrapper +// --------------------------------------------------------------------------- + +/// Union of the resolved diagram models accepted by the idmap writer. +pub enum IdMapModel<'a> { + Component(&'a HashMap), + Class(&'a ClassDiagram), + Sequence(&'a SequenceTree), + /// Diagrams with no cross-linkable elements (e.g. activity); produces an + /// empty idmap so the parser's always-declared sidecar output is written. + Empty, +} + +// --------------------------------------------------------------------------- +// Model converters +// --------------------------------------------------------------------------- + +/// Produce an [`IdMapFile`] from a resolved component diagram. +/// +/// An element is a **define** when at least one other element lists it as its +/// `parent_id` (i.e. it has children and is therefore elaborated here). +/// All remaining elements are **references** (top-level leaves that mention +/// something that may be detailed in another diagram). +fn comp_model_to_idmap(model: &HashMap, source: &str) -> IdMapFile { + // Collect the set of IDs that are listed as parent by at least one child. + let has_children: HashSet<&str> = model + .values() + .filter_map(|c| c.parent_id.as_deref()) + .collect(); + + let mut defines = Vec::new(); + let mut references = Vec::new(); + + for comp in model.values() { + let alias = comp + .alias + .as_deref() + .or(comp.name.as_deref()) + .unwrap_or(&comp.id) + .to_string(); + let is_define = has_children.contains(comp.id.as_str()); + let entry = IdMapEntry { + alias, + id: comp.id.clone(), + elaborated: is_define, + }; + if is_define { + defines.push(entry); + } else { + references.push(entry); + } + } + + // Deterministic output order. + defines.sort_by(|a, b| a.id.cmp(&b.id)); + references.sort_by(|a, b| a.id.cmp(&b.id)); + + IdMapFile { + source: source.to_string(), + defines, + references, + } +} + +/// Produce an [`IdMapFile`] from a resolved class diagram. +/// +/// A class entity is a **define** when it has any members (methods or +/// variables), making this diagram the elaboration site. Entities without +/// members are **references**. +fn class_model_to_idmap(model: &ClassDiagram, source: &str) -> IdMapFile { + let mut defines = Vec::new(); + let mut references = Vec::new(); + + for entity in &model.entities { + let has_members = !entity.methods.is_empty() || !entity.variables.is_empty(); + let entry = IdMapEntry { + alias: entity.name.clone(), + id: entity.id.clone(), + elaborated: has_members, + }; + if has_members { + defines.push(entry); + } else { + references.push(entry); + } + } + + defines.sort_by(|a, b| a.id.cmp(&b.id)); + references.sort_by(|a, b| a.id.cmp(&b.id)); + + IdMapFile { + source: source.to_string(), + defines, + references, + } +} + +/// Collect the unique participant names from a sequence tree. +fn collect_participants(tree: &SequenceTree) -> HashSet { + use sequence_logic::{Event, SequenceNode}; + + fn walk_nodes(nodes: &[SequenceNode], out: &mut HashSet) { + for node in nodes { + match &node.event { + Event::Interaction(i) => { + out.insert(i.caller.clone()); + out.insert(i.callee.clone()); + } + Event::Return(r) => { + out.insert(r.caller.clone()); + out.insert(r.callee.clone()); + } + Event::Condition(_) => {} + } + walk_nodes(&node.branches_node, out); + } + } + + let mut participants = HashSet::new(); + walk_nodes(&tree.root_interactions, &mut participants); + participants +} + +/// Produce an [`IdMapFile`] from a resolved sequence diagram. +/// +/// Sequence diagrams have no "definition" elements — all participants are +/// references (each participant links away to the component diagram that +/// elaborates it). +fn sequence_model_to_idmap(model: &SequenceTree, source: &str) -> IdMapFile { + let participants = collect_participants(model); + let mut references: Vec = participants + .into_iter() + .map(|name| IdMapEntry { + alias: name.clone(), + id: name, + elaborated: false, + }) + .collect(); + references.sort_by(|a, b| a.id.cmp(&b.id)); + + IdMapFile { + source: source.to_string(), + defines: Vec::new(), + references, + } +} + +/// Produce an empty [`IdMapFile`] for diagrams without cross-linkable elements. +fn empty_idmap(source: &str) -> IdMapFile { + IdMapFile { + source: source.to_string(), + defines: Vec::new(), + references: Vec::new(), + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Write an `.idmap.json` file for *model* into *output_dir*. +/// +/// The output filename is `.idmap.json` where `` is the file +/// stem of *input_path* (the original `.puml` source file). +/// +/// The `source` field embedded in the JSON is set to *source_name* when +/// provided (preferred: a stable workspace-relative path such as +/// `score/mw/com/proxy_detail.puml`), otherwise falls back to +/// `input_path.to_string_lossy()`. +pub fn write_idmap_to_file( + model: IdMapModel<'_>, + input_path: &Path, + source_name: Option<&str>, + output_dir: &Path, +) -> io::Result { + let source = source_name + .map(|s| s.to_string()) + .unwrap_or_else(|| input_path.to_string_lossy().into_owned()); + + let idmap = match model { + IdMapModel::Component(m) => comp_model_to_idmap(m, &source), + IdMapModel::Class(m) => class_model_to_idmap(m, &source), + IdMapModel::Sequence(m) => sequence_model_to_idmap(m, &source), + IdMapModel::Empty => empty_idmap(&source), + }; + + let file_stem = input_path + .file_stem() + .and_then(OsStr::to_str) + .unwrap_or("output"); + let output_path = output_dir.join(format!("{file_stem}.idmap.json")); + + let json = serde_json::to_string_pretty(&idmap) + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + fs::write(&output_path, json)?; + + log::debug!("idmap written to {}", output_path.display()); + Ok(output_path) +} diff --git a/plantuml/sphinx/clickable_plantuml/README.md b/plantuml/sphinx/clickable_plantuml/README.md index 9d767037..21ee6f44 100644 --- a/plantuml/sphinx/clickable_plantuml/README.md +++ b/plantuml/sphinx/clickable_plantuml/README.md @@ -16,19 +16,22 @@ Sphinx extension that makes PlantUML diagrams clickable by injecting hyperlinks ## Sphinx Integration -The extension hooks into the native Sphinx build lifecycle. URLs are computed by -`app.builder.get_relative_uri()`, which works for any builder and -output directory layout. +The extension hooks into the native Sphinx build lifecycle. URL computation +depends on the configured `plantuml_output_format`: in `svg_obj` mode the +rendered SVG lives in `_images/`, so links are made relative to that directory +(`os.path.relpath(target_uri, imagedir)`); for inline `svg`/`png` the link is +relative to the containing HTML page via +`app.builder.get_relative_uri(from_docname, to_docname)`. ``` Sphinx build lifecycle clickable_plantuml hooks ═══════════════════════════════════ ═══════════════════════════════════════ builder-inited ───► on_builder_inited() - │ (one-time setup) Load all *plantuml_links.json files - │ from srcdir (recursive). - │ Store {puml_basename → alias_map} - │ in app.env. + │ (one-time setup) Load all *.idmap.json files from + │ srcdir (recursive). + │ Build definition index: + │ {alias|id → [definer source paths]}. │ ├─ READ PHASE ────────────────────────────────────────────────────────────── │ for each document: @@ -40,10 +43,9 @@ Sphinx build lifecycle clickable_plantuml hooks │ │ │ doctree-read ───► on_doctree_read() │ (per document) Traverse the parsed doctree. - │ For every plantuml node that has a - │ filename attribute, record - │ {puml_basename → docname} in app.env. - │ Warn on basename collisions. + │ For every plantuml node, record + │ {normalized_source_path → docname} + │ in app.env (path identity, not basename). │ │ env-merge-info ───► on_env_merge_info() │ (parallel builds only) Merge puml→docname maps gathered @@ -55,119 +57,110 @@ Sphinx build lifecycle clickable_plantuml hooks │ post-transform / resolve │ │ │ doctree-resolved ───► on_doctree_resolved() - │ (per document) For each plantuml node, look up the - │ alias_map from app.env. - │ Resolve target .puml → docname, then - │ call app.builder.get_relative_uri() - │ to get the correct relative URL. - │ Append url of is [[url]] - │ directives to node['uml'] before - │ sphinxcontrib-plantuml renders it. + │ (per document) For each plantuml node, load its idmap. + │ For each reference entry, look up the + │ definition index (FQN first, then alias). + │ Apply proximity tiebreak on ambiguity. + │ Build the URL (relative to _images/ in + │ svg_obj mode, else page-relative via + │ get_relative_uri), then append + │ url of is [[url]] directives to + │ node['uml'] before rendering. │ build-finished ``` ## How It Works -1. **Link discovery** (`builder-inited`) – Scans for `*plantuml_links.json` files in the Sphinx source directory. -2. **Diagram location mapping** (`doctree-read`) – As Sphinx reads each document, the extension traverses the parsed doctree to record which `docname` contains which `.puml` diagram (keyed by basename). Basename collisions across documents are reported as warnings. -3. **URL resolution & link injection** (`doctree-resolved`) – For each plantuml node, resolves target `.puml` references to the docname that contains the target diagram, generates a relative URL via `app.builder.get_relative_uri()`, and appends `url of is [[url]]` directives to the PlantUML source before rendering. -4. **Incremental / parallel support** – `env-purge-doc` removes stale entries when a document is re-read; `env-merge-info` merges state from parallel worker processes. +1. **idmap discovery** (`builder-inited`) – Scans for `*.idmap.json` files in + the Sphinx source directory. Each sidecar records *defines* (elements + elaborated in that diagram, i.e. with children/members) and *references* + (leaf mentions and relation endpoints). A global definition index maps + each alias/FQN to the set of diagrams that elaborate it. -## Automatic JSON Generation (Bazel) +2. **Diagram location mapping** (`doctree-read`) – Records which `docname` + contains which `.puml` diagram, keyed by normalised workspace-relative path + (not just basename, avoiding collision false-merges). -`plantuml_links.json` is generated by the `architectural_design()` rule. +3. **URL resolution & link injection** (`doctree-resolved`) – For each + reference in a diagram's idmap, resolves the unique definer via the index. + When multiple diagrams define the same element, a *proximity tiebreak* + selects the definer sharing the longest common path prefix with the source + diagram. On a genuine tie, no link is emitted (safe over wrong). URLs are + built relative to `_images/` in `svg_obj` mode (else page-relative via + `app.builder.get_relative_uri()`) and percent-encoded before injection. -The `architectural_design()` rule invokes `//tools/plantuml/linker:linker` on all -`.fbs.bin` FlatBuffers files produced by the PlantUML parser. See -[Link Mapping Format](#link-mapping-format) for a detailed -description of which links are emitted. +4. **Incremental / parallel support** – `env-purge-doc` removes stale entries + when a document is re-read; `env-merge-info` merges state from parallel + worker processes. -### Algorithm +## Automatic idmap Generation (Bazel) -Given the set of `.fbs.bin` files for one `architectural_design()` target: +`.idmap.json` sidecars are produced by the `architectural_design()` rule. -1. **Build a top-level index** – For each diagram, collect every component whose - `parent_id` is `None` (i.e. it is not nested inside another component). - The index maps `alias → diagram file`. +The rule passes `--source-name ` and +`--idmap-output-dir` to `puml_cli` for every `.puml` file. The +`source` field in the resulting idmap is a stable, workspace-relative path +(e.g. `score/mw/com/proxy_detail.puml`), which is used as the diagram's +identity key throughout the extension. -2. **Emit links** – For every component in every diagram, look up its alias in - the top-level index. If a *different* diagram defines that alias as a - top-level component, emit a link entry: +### Role detection algorithm - ``` - source_file = diagram that contains the reference - source_id = alias of the component - target_file = diagram that defines it as a top-level component - ``` +Given the resolved model of one `.puml` diagram: -3. **Deduplicate** – Sort and deduplicate so that each `(source_file, source_id)` - pair has exactly one target (first alphabetically). Duplicate `source_id` - entries within the same source diagram are removed because PlantUML's - `url of X is [[…]]` directive supports only one URL per alias. +1. **defines** – An element is a *define* when: + - At least one other element lists it as its `parent_id` (component + diagrams); or it has member variables / methods (class diagrams). +2. **references** – All remaining elements: top-level leaf boxes, relation + endpoints (component), and sequence participants. -### Concrete Example +### Concrete example ```text -' adas_overview.puml — subsystem context +' overview.puml — top-level leaves are REFERENCES @startuml -component ADAS -component BrakeController -component LaneKeepAssist -ADAS --> BrakeController -ADAS --> LaneKeepAssist +[Gateway] --> [Proxy] @enduml ``` ```text -' brake_controller.puml — component detail +' proxy_detail.puml — Proxy has a child → DEFINE @startuml -component BrakeController -interface BrakeDemandIF -interface WheelSpeedIF -BrakeController --> BrakeDemandIF -BrakeController <-- WheelSpeedIF +package Proxy { [RequestHandler] } @enduml ``` -Generated links — one in each direction: - +`proxy_detail.idmap.json`: ```json -{ - "links": [ - { - "source_file": "adas_overview.puml", - "source_id": "BrakeController", - "target_file": "brake_controller.puml" - }, - { - "source_file": "brake_controller.puml", - "source_id": "BrakeController", - "target_file": "adas_overview.puml" - } - ] -} +{ "source": "score/mw/com/proxy_detail.puml", + "defines": [{ "alias": "Proxy", "id": "Proxy" }], + "references": [{ "alias": "RequestHandler", "id": "Proxy.RequestHandler" }] } ``` -Clicking `BrakeController` in the overview navigates to its detail diagram; -clicking it in the detail diagram navigates back to the overview. +`overview.idmap.json`: +```json +{ "source": "score/overview.puml", + "defines": [], + "references": [{ "alias": "Gateway", "id": "Gateway" }, + { "alias": "Proxy", "id": "Proxy" }] } +``` -`ADAS` and `LaneKeepAssist` appear as top-level only in `adas_overview.puml` and -have no dedicated detail diagram, so **no links** are emitted for them. +Result: `Proxy` in `overview.puml` links to `proxy_detail.puml`. +`Gateway` has no definer → no link. -(link-mapping-format)= -## Link Mapping Format +## idmap Format -Place one or more `*plantuml_links.json` filesinside the Sphinx source directory: +`.idmap.json` files are written by the parser and read by this extension. +They are not intended to be authored manually. ```json { - "links": [ - { - "source_file": "my_diagram.puml", - "source_id": "ComponentA", - "target_file": "other_diagram.puml" - } + "source": "path/to/diagram.puml", + "defines": [ + { "alias": "ComponentName", "id": "fully.qualified.Name" } + ], + "references": [ + { "alias": "OtherComponent", "id": "OtherComponent" } ] } ``` diff --git a/plantuml/sphinx/clickable_plantuml/clickable_plantuml.py b/plantuml/sphinx/clickable_plantuml/clickable_plantuml.py index 7317a052..077fbda3 100644 --- a/plantuml/sphinx/clickable_plantuml/clickable_plantuml.py +++ b/plantuml/sphinx/clickable_plantuml/clickable_plantuml.py @@ -10,12 +10,41 @@ # # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -"""Sphinx extension to make PlantUML diagrams clickable.""" +"""Sphinx extension to make PlantUML diagrams clickable. + +Design overview +--------------- +Link data is derived from ``.idmap.json`` sidecar files produced by the +PlantUML parser (``puml_cli --idmap-output-dir ...``). Each idmap file +records two roles for the elements in one ``.puml`` diagram: + +* **defines** – elements elaborated (given children / structure) in that + diagram. A component diagram that contains ``package Proxy { ... }`` is + the definition site of ``Proxy``. +* **references** – leaf mentions and relation endpoints. A top-level + ``[Proxy]`` box in an overview is a reference that should link to the + diagram that defines it. + +The matching algorithm: + +1. Build a *definition index*: ``{alias|id → [source_paths]}``. +2. For each reference ``(alias, id)`` in a diagram, look up the index (FQN + ``id`` first, then ``alias``) to find candidate definer diagrams. +3. If exactly one definer: emit the link. +4. If multiple definers: pick the one sharing the longest common workspace- + relative path prefix with the source diagram (proximity tiebreak). + On a tie: log a warning and emit no link (safe over wrong). +5. Never link a diagram to itself. +""" + +from __future__ import annotations import functools import json +import os import re -from pathlib import Path +import urllib.parse +from pathlib import Path, PurePosixPath from typing import Any from docutils import nodes @@ -24,13 +53,22 @@ logger = logging.getLogger(__name__) -# Environment attribute names used by this extension. -_ENV_LINK_DATA = "clickable_plantuml_link_data" -# Stores {puml_basename: (docname, anchor_id_or_None)} +# --------------------------------------------------------------------------- +# Environment attribute names +# --------------------------------------------------------------------------- + +# {normalized_source_path: raw_idmap_dict} — loaded once in builder-inited. +_ENV_IDMAP_BY_SOURCE = "clickable_plantuml_idmap_by_source" +# {alias_or_id: [source_path, ...]} — definition index built in builder-inited. +_ENV_DEF_INDEX = "clickable_plantuml_def_index" +# {normalized_source_path: (docname, anchor_or_None)} — populated in doctree-read. _ENV_PUML_DOCNAMES = "clickable_plantuml_puml_docnames" +# {filename: raw_idmap_dict} — basename fallback index built in builder-inited. +_ENV_IDMAP_BY_BASENAME = "clickable_plantuml_idmap_by_basename" -# Characters allowed in PlantUML alias identifiers. -_ALIAS_SAFE_RE = re.compile(r"^[\w.]+$") +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- def _find_parent_section_id(node: nodes.Node) -> str | None: @@ -45,69 +83,127 @@ def _find_parent_section_id(node: nodes.Node) -> str | None: return None +def _normalize_source_path(raw: str) -> str: + """Normalise a source path to a forward-slash relative string.""" + return str(PurePosixPath(raw)).lstrip("/") + + +def _common_prefix_length(path_a: str, path_b: str) -> int: + """Return the number of shared path components between two paths.""" + parts_a = Path(path_a).parts + parts_b = Path(path_b).parts + count = 0 + for a, b in zip(parts_a, parts_b): + if a == b: + count += 1 + else: + break + return count + + +def _proximity_tiebreak(source: str, candidates: list[str]) -> str | None: + """Pick the candidate with the longest common prefix with *source*. + + Returns ``None`` when two or more candidates score equally (tie → no link). + """ + scored = sorted( + candidates, + key=lambda c: _common_prefix_length(source, c), + reverse=True, + ) + best = _common_prefix_length(source, scored[0]) + if len(scored) > 1 and _common_prefix_length(source, scored[1]) == best: + return None + return scored[0] + + +def _escape_plantuml_url(url: str) -> str: + """Percent-encode characters significant in PlantUML URL syntax. + + PlantUML terminates ``url of X is [[...]]`` at the first ``]]``. We also + encode ``[``, spaces, and other characters that would confuse the PlantUML + lexer. The fragment (after ``#``) is encoded separately to preserve it. + """ + # Characters that are safe to leave unencoded in a URL context. + _SAFE = "/:?&=@!$'()*+,;-._~#" + fragment_sep = url.find("#") + if fragment_sep != -1: + base = urllib.parse.quote(url[:fragment_sep], safe=_SAFE.replace("#", "")) + frag = urllib.parse.quote(url[fragment_sep + 1 :], safe="-._~") + return f"{base}#{frag}" + return urllib.parse.quote(url, safe=_SAFE) + + # --------------------------------------------------------------------------- -# JSON loading +# idmap loading # --------------------------------------------------------------------------- -def _load_link_mappings( - search_dir: str, - pattern: str = "*plantuml_links.json", -) -> dict[str, dict[str, Any]]: - """Return ``{source_file: {source_id: {target_file, ...}}}``.""" - link_data: dict[str, dict[str, Any]] = {} - for json_file in Path(search_dir).rglob(pattern): +def _load_idmap_files( + source_dir: Path, +) -> tuple[dict[str, Any], dict[str, list[str]], dict[str, Any]]: + """Scan *source_dir* for ``*.idmap.json`` and build the lookup indices. + + Returns: + idmap_by_source: ``{normalized_source_path → raw idmap dict}`` + definition_index: ``{alias_or_fqn_id → [normalized_source_paths]}`` + idmap_by_basename: ``{filename → raw idmap dict}`` (first-seen wins), + an O(1) fallback for when a node's staging path differs from the + workspace-relative path stored in the idmap ``source`` field. + """ + idmap_by_source: dict[str, Any] = {} + definition_index: dict[str, list[str]] = {} + idmap_by_basename: dict[str, Any] = {} + + for json_path in sorted(source_dir.rglob("*.idmap.json")): try: - json_data = json.loads(json_file.read_text(encoding="utf-8")) - if "links" not in json_data or not isinstance(json_data["links"], list): - logger.warning( - "Invalid format in %s: missing 'links' array", - json_file.name, - ) - continue - file_link_count = 0 - for link_entry in json_data["links"]: - source_file = link_entry.get("source_file") - source_id = link_entry.get("source_id") - target_file = link_entry.get("target_file") - if not (source_file and source_id and target_file): - continue - link_data.setdefault(source_file, {})[source_id] = { - "target_file": target_file, - "line": link_entry.get("source_line", 0), - "description": link_entry.get("description", ""), - } - file_link_count += 1 - logger.info( - "Loaded %d links from %s", - file_link_count, - json_file.relative_to(search_dir), - ) + data = json.loads(json_path.read_text(encoding="utf-8")) except (json.JSONDecodeError, OSError) as exc: - logger.warning("Failed to load %s: %s", json_file.name, exc) - return link_data + logger.warning("clickable_plantuml: failed to load %s: %s", json_path, exc) + continue + + raw_source = data.get("source", "") + if not raw_source: + logger.warning( + "clickable_plantuml: idmap %s missing 'source' field — skipped", + json_path.name, + ) + continue + source_key = _normalize_source_path(raw_source) + idmap_by_source[source_key] = data + idmap_by_basename.setdefault(PurePosixPath(source_key).name, data) -def _collect_link_data(source_dir: Path) -> dict[str, dict[str, Any]]: - """Load all ``*plantuml_links.json`` files from *source_dir*.""" - if source_dir.exists(): - return _load_link_mappings(str(source_dir)) - return {} + for entry in data.get("defines", []): + alias = entry.get("alias", "") + fqn = entry.get("id", "") + if alias: + definition_index.setdefault(alias, []).append(source_key) + if fqn and fqn != alias: + definition_index.setdefault(fqn, []).append(source_key) + + logger.info( + "clickable_plantuml: loaded %d idmap file(s), %d unique definition keys", + len(idmap_by_source), + len(definition_index), + ) + return idmap_by_source, definition_index, idmap_by_basename # --------------------------------------------------------------------------- -# UML injection helper +# UML injection # --------------------------------------------------------------------------- +# Characters allowed in a PlantUML alias identifier. +_ALIAS_SAFE_RE = re.compile(r"^[\w.\-]+$") + def _inject_links_into_uml(uml_content: str, links: dict[str, str]) -> str: """Append ``url of is [[url]]`` directives before ``@enduml``.""" if not links: return uml_content safe_links = { - alias: url - for alias, url in links.items() - if _ALIAS_SAFE_RE.match(alias) and "]]" not in url + alias: url for alias, url in links.items() if _ALIAS_SAFE_RE.match(alias) } if not safe_links: return uml_content @@ -124,7 +220,7 @@ def _inject_links_into_uml(uml_content: str, links: dict[str, str]) -> str: # --------------------------------------------------------------------------- -# Sphinx event handlers +# plantuml node class (cached import) # --------------------------------------------------------------------------- @@ -139,67 +235,139 @@ def _get_plantuml_node_class() -> type | None: return None +# --------------------------------------------------------------------------- +# Node filename normalisation +# --------------------------------------------------------------------------- + + +def _node_source_key(node: nodes.Node, srcdir: str) -> str | None: + """Return a normalised source key for a plantuml *node*. + + Produces a forward-slash relative path by stripping *srcdir* from the + node's absolute filename. Falls back to the basename when the filename + is not under *srcdir*. + """ + raw: str = node.get("filename", "") + if not raw: + return None + node_path = Path(raw) + try: + rel = node_path.relative_to(srcdir) + return str(rel).replace("\\", "/") + except ValueError: + return node_path.name + + +def _resolve_idmap( + idmap_by_source: dict[str, Any], + idmap_by_basename: dict[str, Any], + key: str, +) -> Any | None: + """Return the idmap for *key*, falling back to a basename match. + + The exact source-path lookup misses in Bazel builds (a node's staging path + differs from the workspace-relative idmap ``source``); the basename index + provides the O(1) fallback. + """ + idmap = idmap_by_source.get(key) + if idmap is not None: + return idmap + return idmap_by_basename.get(PurePosixPath(key).name) + + +# --------------------------------------------------------------------------- +# Sphinx event handlers +# --------------------------------------------------------------------------- + + def on_builder_inited(app: Sphinx) -> None: - """Load JSON link data once, before any documents are read.""" + """Load idmap files and build the definition index once.""" if app.builder.format != "html": return source_dir = Path(app.srcdir) - link_data = _collect_link_data(source_dir) - if not link_data: - logger.info("clickable_plantuml: no link mappings found") + if not source_dir.exists(): + logger.info("clickable_plantuml: srcdir does not exist — no idmaps loaded") return - # Normalise keys to basenames for consistent lookup. - normalized = {Path(k).name: v for k, v in link_data.items()} - setattr(app.env, _ENV_LINK_DATA, normalized) + idmap_by_source, definition_index, idmap_by_basename = _load_idmap_files(source_dir) + if not idmap_by_source: + logger.info("clickable_plantuml: no *.idmap.json files found") + return - logger.info( - "clickable_plantuml: loaded links for %d source file(s)", len(normalized) - ) + setattr(app.env, _ENV_IDMAP_BY_SOURCE, idmap_by_source) + setattr(app.env, _ENV_DEF_INDEX, definition_index) + setattr(app.env, _ENV_IDMAP_BY_BASENAME, idmap_by_basename) def on_doctree_read(app: Sphinx, doctree: nodes.document) -> None: - """Record which docname (and section anchor) contains which ``.puml`` diagram. + """Record which docname (and section anchor) contains which diagram. - Traverses the parsed doctree. - The mapping is stored in ``app.env`` and consumed during ``doctree-resolved``. + Registers each diagram under two keys so that ``on_doctree_resolved`` can + look up a target regardless of whether it uses the Sphinx-staging path or + the original workspace-relative path stored in the idmap ``source`` field: + + 1. ``key`` — path relative to ``srcdir`` (for direct/non-Bazel use). + 2. ``idmap_source_key`` — the value of ``"source"`` in the matching idmap + (for Bazel builds where ``--source-name`` stores the workspace short + path, which differs from the staging path under ``srcdir``). """ PlantumlNode = _get_plantuml_node_class() if PlantumlNode is None: return + idmap_by_source: dict[str, Any] = getattr(app.env, _ENV_IDMAP_BY_SOURCE, {}) + idmap_by_basename: dict[str, Any] = getattr(app.env, _ENV_IDMAP_BY_BASENAME, {}) puml_docnames: dict[str, tuple[str, str | None]] = getattr( app.env, _ENV_PUML_DOCNAMES, {} ) - for node in doctree.traverse(PlantumlNode): - filename = Path(node.get("filename", "")).name - if not filename: + for node in doctree.findall(PlantumlNode): + key = _node_source_key(node, app.srcdir) + if not key: continue - if filename in puml_docnames: + if key in puml_docnames and puml_docnames[key][0] != app.env.docname: logger.warning( - "clickable_plantuml: diagram '%s' found in both '%s' and '%s' " - "(basename collision — last wins)", - filename, - puml_docnames[filename][0], + "clickable_plantuml: diagram '%s' found in both '%s' and '%s'" + " — last wins (path collision; check idmap source fields)", + key, + puml_docnames[key][0], app.env.docname, ) anchor = _find_parent_section_id(node) - puml_docnames[filename] = (app.env.docname, anchor) + puml_docnames[key] = (app.env.docname, anchor) + + # Also register under the idmap source key when the staging path + # (relative to srcdir) differs from the workspace-relative path that + # --source-name baked into the idmap's "source" field. + idmap = _resolve_idmap(idmap_by_source, idmap_by_basename, key) + if idmap is not None: + idmap_source_key = _normalize_source_path(idmap.get("source", "")) + if idmap_source_key and idmap_source_key != key: + puml_docnames[idmap_source_key] = (app.env.docname, anchor) setattr(app.env, _ENV_PUML_DOCNAMES, puml_docnames) def on_doctree_resolved(app: Sphinx, doctree: nodes.document, docname: str) -> None: - """Inject ``url of is [[url]]`` into plantuml nodes before rendering. - - For each diagram, resolves target ``.puml`` references to the docname that - contains the target diagram and uses ``app.builder.get_relative_uri`` to - produce correct relative URLs. + """Inject ``url of is [[url]]`` into plantuml nodes. + + Resolves each reference in the diagram's idmap to its definer diagram, + applies a proximity tiebreak on ambiguity, and builds a URL whose base + depends on the configured ``plantuml_output_format``: + + * ``svg_obj`` – the rendered SVG lives in the ``_images/`` directory and is + embedded via ````; ```` targets inside the SVG resolve + relative to ``_images/``, so the URL is + ``os.path.relpath(target_uri, imagedir)``. + * inline ``svg`` / ``png`` – the link resolves relative to the containing + HTML page, so the URL is + ``app.builder.get_relative_uri(docname, target_docname)``. """ - link_data: dict[str, dict[str, Any]] = getattr(app.env, _ENV_LINK_DATA, {}) - if app.builder.format != "html" or not link_data: + idmap_by_source: dict[str, Any] = getattr(app.env, _ENV_IDMAP_BY_SOURCE, {}) + definition_index: dict[str, list[str]] = getattr(app.env, _ENV_DEF_INDEX, {}) + idmap_by_basename: dict[str, Any] = getattr(app.env, _ENV_IDMAP_BY_BASENAME, {}) + if app.builder.format != "html" or not idmap_by_source: return PlantumlNode = _get_plantuml_node_class() @@ -209,47 +377,75 @@ def on_doctree_resolved(app: Sphinx, doctree: nodes.document, docname: str) -> N puml_docnames: dict[str, tuple[str, str | None]] = getattr( app.env, _ENV_PUML_DOCNAMES, {} ) - absolute_url_prefixes = ("http://", "https://", "/") + + # Loop-invariant for the whole build: resolve once instead of per reference. + output_format = getattr(app.config, "plantuml_output_format", "png") + imagedir = getattr(app.builder, "imagedir", "_images") modified_count = 0 - for node in doctree.traverse(PlantumlNode): - diagram_filename = Path(node.get("filename", "")).name - alias_map: dict[str, Any] = link_data.get(diagram_filename, {}) - if not alias_map: + for node in doctree.findall(PlantumlNode): + source_key = _node_source_key(node, app.srcdir) + if not source_key: + continue + + idmap = _resolve_idmap(idmap_by_source, idmap_by_basename, source_key) + if idmap is None: continue resolved_links: dict[str, str] = {} - for alias, info in alias_map.items(): - target_file: str = info["target_file"] - - if target_file.endswith(".puml"): - target_basename = Path(target_file).name - target_info = puml_docnames.get(target_basename) - if target_info is not None: - target_docname, target_anchor = target_info - # SVG files are stored in _images/ (one level below the - # HTML output root). Using get_relative_uri() would give a - # page-to-page relative URL, but that path is interpreted - # relative to the SVG file, not the parent HTML page — - # causing the browser to open the raw SVG. Instead, build - # the URL relative to _images/ by prepending "../" to the - # root-relative page URI returned by get_target_uri(). - page_uri = app.builder.get_target_uri(target_docname) - url = f"../{page_uri}" - if target_anchor: - url += f"#{target_anchor}" - resolved_links[alias] = url - else: - logger.debug( - "clickable_plantuml: target diagram '%s' for alias " - "'%s' not found in any document", - target_file, + for ref in idmap.get("references", []): + alias: str = ref.get("alias", "") + fqn: str = ref.get("id", alias) + if not alias: + continue + + # FQN lookup first (more specific), then alias/local-name. + candidates: list[str] = ( + definition_index.get(fqn) or definition_index.get(alias) or [] + ) + # Never link a diagram to itself. + candidates = [c for c in candidates if c != source_key] + if not candidates: + continue + + if len(candidates) == 1: + target_source = candidates[0] + else: + target_source = _proximity_tiebreak(source_key, candidates) + if target_source is None: + logger.warning( + "clickable_plantuml: ambiguous definition for '%s' in '%s'" + " — tied candidates %s; no link emitted", alias, + source_key, + candidates, ) - elif target_file.startswith(absolute_url_prefixes): - resolved_links[alias] = target_file + continue + + target_info = puml_docnames.get(target_source) + if target_info is None: + logger.debug( + "clickable_plantuml: definer '%s' for alias '%s' not" + " found in any document — skipping", + target_source, + alias, + ) + continue + + target_docname, target_anchor = target_info + # In svg_obj mode the rendered SVG lives in the _images/ directory, + # so URLs inside the SVG must be relative to _images/, not to the + # containing HTML page. For inline svg mode the SVG is embedded + # directly in the HTML, so page-relative URLs are correct. + if output_format == "svg_obj": + target_uri = app.builder.get_target_uri(target_docname) + url = os.path.relpath(target_uri, imagedir).replace("\\", "/") else: - resolved_links[alias] = target_file + url = app.builder.get_relative_uri(docname, target_docname) + if target_anchor: + url += f"#{target_anchor}" + + resolved_links[alias] = _escape_plantuml_url(url) if resolved_links: node["uml"] = _inject_links_into_uml(node.get("uml", ""), resolved_links) @@ -294,7 +490,7 @@ def setup(app: Sphinx) -> dict[str, Any]: app.connect("env-merge-info", on_env_merge_info) return { - "version": "4.0", + "version": "5.0", "parallel_read_safe": True, "parallel_write_safe": True, }