diff --git a/Cargo.lock b/Cargo.lock
index 9e425d1ed..3c8127a2f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -339,6 +339,7 @@ dependencies = [
  "axl-proto",
  "axl-types",
  "base64 0.22.1",
+ "basil-core",
  "bazelrc",
  "blake2",
  "build-event-stream",
@@ -488,6 +489,13 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 [[package]]
 name = "basil"
 version = "0.0.0-dev"
+dependencies = [
+ "basil-core",
+]
+
+[[package]]
+name = "basil-core"
+version = "0.0.0-dev"
 dependencies = [
  "axl-proto",
  "prost",
diff --git a/crates/aspect-cli/src/builtins/aspect/MODULE.aspect b/crates/aspect-cli/src/builtins/aspect/MODULE.aspect
index aa3d5080d..1b218f5b5 100644
--- a/crates/aspect-cli/src/builtins/aspect/MODULE.aspect
+++ b/crates/aspect-cli/src/builtins/aspect/MODULE.aspect
@@ -6,6 +6,7 @@ use_task("github.axl", "token")
 use_task("run.axl", "run")
 use_task("test.axl", "test")
 use_task("axl_add.axl", "add")
+use_task("axl_test.axl", "test")
 use_task("delivery.axl", "delivery")
 use_task("lint.axl", "lint")
 use_task("format.axl", "format")
diff --git a/crates/aspect-cli/src/builtins/aspect/axl_test.axl b/crates/aspect-cli/src/builtins/aspect/axl_test.axl
new file mode 100644
index 000000000..3fbb7c110
--- /dev/null
+++ b/crates/aspect-cli/src/builtins/aspect/axl_test.axl
@@ -0,0 +1,131 @@
+"""`aspect axl test` — run AXL `*_test.axl` unit tests.
+
+Discovers `*_test.axl` files under the given paths (files or directories,
+defaulting to the Aspect workspace root), runs each file's top-level
+`def test_*(t)` functions through the built-in parallel test runner, and
+reports pass/fail per test.
+
+The runner evaluates each file in isolation with no module loader, so a test
+file that `load(...)`s other modules is reported as a load error rather than
+run; self-contained test files run fully. Discovery does not descend into
+hidden directories (e.g. `.git`) or follow directory symlinks (so Bazel
+output trees are skipped).
+"""
+
+# Capture the privileged test-runner namespace at module-eval time, where the
+# @aspect std-context marker is present. The task implementation runs later in
+# the shared execution module (no marker), so it must reuse this binding rather
+# than call `__builtins__.testing()` itself.
+_testing = __builtins__.testing()
+
+GREEN = "\033[32m"
+RED = "\033[31m"
+DIM = "\033[2m"
+BOLD = "\033[1m"
+RESET = "\033[0m"
+
+# Generous worklist bound; directory symlinks are not followed (read_dir marks
+# them non-dir), so this only guards against a pathologically deep real tree.
+_WALK_LIMIT = 1000000
+
+def _walk_test_files(ctx, root):
+    """Every `*_test.axl` path at or under `root` (a file or directory)."""
+    found = []
+    if not ctx.std.fs.is_dir(root):
+        if root.endswith("_test.axl"):
+            found.append(root)
+        return found
+
+    stack = [root]
+    for _ in range(_WALK_LIMIT):
+        if not stack:
+            break
+        current = stack.pop()
+        for entry in ctx.std.fs.read_dir(current):
+            child = current + "/" + entry.path
+            if entry.is_dir:
+                if not entry.path.startswith("."):
+                    stack.append(child)
+            elif entry.path.endswith("_test.axl"):
+                found.append(child)
+    return found
+
+def _resolve_test_files(ctx, paths):
+    """De-duplicated, order-preserving list of test files across all roots."""
+    roots = list(paths) if paths else [ctx.std.env.aspect_root_dir()]
+    files = []
+    seen = {}
+    for root in roots:
+        for path in _walk_test_files(ctx, root):
+            if path not in seen:
+                seen[path] = True
+                files.append(path)
+    return files
+
+def _relativize(ctx, path):
+    """Render `path` relative to the workspace root for readable output."""
+    root = ctx.std.env.aspect_root_dir()
+    prefix = root + "/"
+    if path.startswith(prefix):
+        return path[len(prefix):]
+    return path
+
+def _report_file(ctx, path, result):
+    """Print one file's result; return (passed, failed, errored) tallies."""
+    rel = _relativize(ctx, path)
+
+    if result["error"] != None:
+        print("%s%sERROR%s %s" % (RED, BOLD, RESET, rel))
+        for line in result["error"].split("\n"):
+            print("    " + line)
+        return (0, 0, 1)
+
+    passed = result["passed"]
+    failed = result["failed"]
+    status = GREEN + "ok  " + RESET if failed == 0 else RED + "FAIL" + RESET
+    print("%s %s%s%s (%d passed, %d failed)" % (status, DIM, rel, RESET, passed, failed))
+
+    for outcome in result["outcomes"]:
+        if not outcome["passed"]:
+            print("    %sFAIL%s %s" % (RED, RESET, outcome["name"]))
+            if outcome["message"] != None:
+                for line in outcome["message"].split("\n"):
+                    print("         " + line)
+    return (passed, failed, 0)
+
+def _impl(ctx: TaskContext) -> int:
+    files = _resolve_test_files(ctx, ctx.args.paths)
+    if not files:
+        print("No *_test.axl files found.")
+        return 0
+
+    total_passed = 0
+    total_failed = 0
+    file_errors = 0
+
+    for path in files:
+        source = ctx.std.fs.read_to_string(path)
+        result = _testing.run(source)
+        passed, failed, errored = _report_file(ctx, path, result)
+        total_passed += passed
+        total_failed += failed
+        file_errors += errored
+
+    print("")
+    summary = "%d passed, %d failed across %d file(s)" % (total_passed, total_failed, len(files))
+    if file_errors:
+        summary += ", %d file error(s)" % file_errors
+    print(BOLD + summary + RESET)
+
+    return 0 if total_failed == 0 and file_errors == 0 else 1
+
+test = task(
+    group = ["axl"],
+    summary = "Run AXL *_test.axl unit tests.",
+    implementation = _impl,
+    args = {
+        "paths": args.positional(
+            description = "Files or directories to search for `*_test.axl` tests. Defaults to the Aspect workspace root.",
+        ),
+    },
+)
diff --git a/crates/axl-proto/BUILD.bazel b/crates/axl-proto/BUILD.bazel
index 328a35ea2..7b5890cab 100644
--- a/crates/axl-proto/BUILD.bazel
+++ b/crates/axl-proto/BUILD.bazel
@@ -29,6 +29,7 @@ rust_library(
     visibility = [
         "//crates/axl-runtime:__pkg__",
         "//crates/basil:__pkg__",
+        "//crates/basil-core:__pkg__",
         "//crates/build-event-stream:__pkg__",
     ],
     deps = [
diff --git a/crates/axl-runtime/BUILD.bazel b/crates/axl-runtime/BUILD.bazel
index 005e2fc25..a31b11ac6 100644
--- a/crates/axl-runtime/BUILD.bazel
+++ b/crates/axl-runtime/BUILD.bazel
@@ -20,6 +20,7 @@ rust_library(
         "//crates/aspect-telemetry",
         "//crates/axl-proto",
         "//crates/axl-types",
+        "//crates/basil-core",
         "//crates/bazelrc",
         "//crates/build-event-stream",
         "//crates/galvanize",
diff --git a/crates/axl-runtime/Cargo.toml b/crates/axl-runtime/Cargo.toml
index 40ff5c090..d505707ba 100644
--- a/crates/axl-runtime/Cargo.toml
+++ b/crates/axl-runtime/Cargo.toml
@@ -65,6 +65,7 @@ bazelrc = { path = "../bazelrc" }
 axl-types = { path = "../axl-types" }
 axl-proto = { path = "../axl-proto" }
 starbuf-derive = { path = "../starbuf-derive" }
+basil-core = { path = "../basil-core" }
 build-event-stream = { path = "../build-event-stream" }
 galvanize = { path = "../galvanize" }
 
diff --git a/crates/axl-runtime/src/engine/bazel/backend.rs b/crates/axl-runtime/src/engine/bazel/backend.rs
new file mode 100644
index 000000000..a0fa2cd82
--- /dev/null
+++ b/crates/axl-runtime/src/engine/bazel/backend.rs
@@ -0,0 +1,350 @@
+//! `BazelBackend` — which bazel a `ctx.bazel.*` invocation actually drives.
+//!
+//! Two implementations of one contract (see `docs/testing.md`, decisions 6/7):
+//!
+//!   - [`BazelBackend::Real`] — production. Spawns whatever `bazel_command()`
+//!     resolves (honoring `BAZEL_REAL`). The path is unchanged from before
+//!     this module existed.
+//!   - [`BazelBackend::Fake`] — testing. Spawns a generic fake-bazel binary
+//!     (`basil` today; a shipped `aspect` self-exec subcommand later) with the
+//!     fake path supplied **directly on the value** — never via the
+//!     process-global `BAZEL_REAL` env var, so concurrent test workers don't
+//!     race over one global. A declared [`BazelExpectation`] is handed to the
+//!     fake over an inherited control channel (a `socketpair`); the fake
+//!     synthesizes a consistent BES stream onto the real
+//!     `--build_event_binary_file` the parent already wires, so the production
+//!     `BuildEventIter` read path is exercised unchanged.
+//!
+//! The control transport sits behind the [`ControlChannel`] trait so a Windows
+//! named-pipe / loopback implementation is a drop-in later; only a Unix
+//! `socketpair` impl ships in this slice.
+
+use std::collections::BTreeMap;
+use std::io;
+use std::process::Command;
+use std::process::Stdio;
+use std::sync::Arc;
+
+use allocative::Allocative;
+use basil_core::BazelExpectation;
+
+/// How `ctx.bazel.*` reaches bazel. Cloneable + carried on the `Bazel`
+/// Starlark value so every invocation derives its own per-spawn resources
+/// (no process-global state — a hard requirement under the parallel test
+/// runner).
+#[derive(Clone, Debug, Allocative)]
+pub enum BazelBackend {
+    /// Production: spawn the real (or `BAZEL_REAL`) bazel via `bazel_command()`.
+    Real,
+    /// Testing: spawn `fake_bin` directly and feed it `expectation` over the
+    /// control channel.
+    Fake {
+        /// Absolute path to the fake-bazel binary to spawn.
+        fake_bin: String,
+        /// The declared fixture handed to the fake for this invocation.
+        #[allocative(skip)]
+        expectation: Arc<BazelExpectation>,
+    },
+}
+
+impl Default for BazelBackend {
+    fn default() -> Self {
+        BazelBackend::Real
+    }
+}
+
+impl BazelBackend {
+    /// The single fork primitive: the base `Command` for a bazel invocation
+    /// under this backend, with `startup_flags` already applied (before any
+    /// subcommand). Every verb method below goes through here, so the
+    /// Real/Fake choice is made in exactly one place.
+    ///
+    /// `Real` defers to the shared `bazel_command()` helper (which sets the
+    /// anti-inception env var and honors `BAZEL_REAL`). `Fake` builds the
+    /// `Command` straight from `fake_bin` — deliberately NOT via
+    /// `bazel_command()`, so the fake path touches no global env state.
+    pub(crate) fn base_command(&self, startup_flags: &[String]) -> Command {
+        let mut cmd = match self {
+            BazelBackend::Real => super::bazel_command(),
+            BazelBackend::Fake { fake_bin, .. } => Command::new(fake_bin),
+        };
+        cmd.args(startup_flags);
+        cmd
+    }
+
+    /// Base command with no startup flags. Retained for the `build`/`test`
+    /// spawn path (`build.rs`), which applies startup flags itself.
+    pub fn command(&self) -> Command {
+        self.base_command(&[])
+    }
+
+    /// Run `bazel [startup_flags] info [keys...]` and return the parsed
+    /// `key → value` map. Empty `keys` asks bazel for every key.
+    pub fn info(
+        &self,
+        startup_flags: &[String],
+        keys: &[&str],
+    ) -> io::Result<BTreeMap<String, String>> {
+        let mut cmd = self.base_command(startup_flags);
+        cmd.arg("info");
+        cmd.args(keys);
+        cmd.stdout(Stdio::piped());
+        cmd.stderr(Stdio::piped());
+        cmd.stdin(Stdio::null());
+        let (child, _guard) = super::live::spawn_registered(&mut cmd)
+            .map_err(|e| io::Error::other(format!("failed to spawn bazel: {e}")))?;
+        let out = child.wait_with_output()?;
+        if !out.status.success() {
+            let stderr = String::from_utf8_lossy(&out.stderr);
+            let stderr = stderr.trim();
+            let detail = if stderr.is_empty() {
+                format!("exit code {:?}", out.status.code())
+            } else {
+                format!("exit code {:?}: {stderr}", out.status.code())
+            };
+            return Err(io::Error::other(format!("bazel info failed ({detail})")));
+        }
+        Ok(super::info::parse_info_map(&String::from_utf8_lossy(
+            &out.stdout,
+        )))
+    }
+
+    /// Query `server_pid` + `release` in one `bazel info` call.
+    ///
+    /// The version is `None` for a non-release build (see
+    /// [`super::info::parse_release`]); the pid is required.
+    pub fn server_info(
+        &self,
+        startup_flags: &[String],
+    ) -> io::Result<(u32, Option<semver::Version>)> {
+        let map = self.info(startup_flags, &["server_pid", "release"])?;
+        let pid = map
+            .get("server_pid")
+            .and_then(|v| v.parse::<u32>().ok())
+            .ok_or_else(|| io::Error::other("bazel info did not return server_pid"))?;
+        let version = match map.get("release") {
+            Some(value) => {
+                let parsed = super::info::parse_release(value);
+                if parsed.is_none() {
+                    tracing::debug!(
+                        release = %value,
+                        "bazel reported a non-release version; \
+                         version-conditional flags will assume latest"
+                    );
+                }
+                parsed
+            }
+            None => None,
+        };
+        Ok((pid, version))
+    }
+
+    /// Determine the real bazel client PID via `--noblock_for_lock info
+    /// server_pid`. When another invocation holds the lock, bazel exits 9 with
+    /// `"Another command (pid=12345) is running."` on stderr; we parse it out.
+    pub fn client_pid(&self, startup_flags: &[String]) -> Option<u32> {
+        let mut cmd = self.base_command(startup_flags);
+        cmd.arg("--noblock_for_lock").arg("info").arg("server_pid");
+        cmd.stdout(Stdio::null());
+        cmd.stderr(Stdio::piped());
+        cmd.stdin(Stdio::null());
+        let (child, _guard) = super::live::spawn_registered(&mut cmd).ok()?;
+        let output = child.wait_with_output().ok()?;
+        // Exit code 9 means the lock is held — stderr carries the client PID.
+        if output.status.code() != Some(9) {
+            return None;
+        }
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        let start = stderr.find("pid=")? + 4;
+        let rest = &stderr[start..];
+        let end = rest.find(')')?;
+        rest[..end].parse::<u32>().ok()
+    }
+
+    /// Whether the bazel server lock is currently held (exit 9 from the
+    /// non-blocking probe).
+    pub fn is_server_busy(&self, startup_flags: &[String]) -> bool {
+        let mut cmd = self.base_command(startup_flags);
+        cmd.arg("--noblock_for_lock").arg("info").arg("server_pid");
+        cmd.stdout(Stdio::null());
+        cmd.stderr(Stdio::null());
+        cmd.stdin(Stdio::null());
+        let Ok((child, _guard)) = super::live::spawn_registered(&mut cmd) else {
+            return false;
+        };
+        matches!(child.wait_with_output(), Ok(o) if o.status.code() == Some(9))
+    }
+
+    /// Server PID without blocking on the lock: resolve `output_base` via
+    /// `--noblock_for_lock info output_base` (computed client-side) and read
+    /// `<output_base>/server/server.pid.txt`. `None` if the server isn't
+    /// running or bazel is unavailable.
+    pub fn server_pid_nonblocking(&self, startup_flags: &[String]) -> Option<u32> {
+        let mut cmd = self.base_command(startup_flags);
+        cmd.arg("--noblock_for_lock").arg("info").arg("output_base");
+        cmd.stdout(Stdio::piped());
+        cmd.stderr(Stdio::null());
+        cmd.stdin(Stdio::null());
+        let (child, _guard) = super::live::spawn_registered(&mut cmd).ok()?;
+        let output = child.wait_with_output().ok()?;
+        if !output.status.success() {
+            return None;
+        }
+        let output_base = String::from_utf8_lossy(&output.stdout);
+        let pid_path = std::path::Path::new(output_base.trim()).join("server/server.pid.txt");
+        let contents = std::fs::read_to_string(pid_path).ok()?;
+        contents.trim().parse::<u32>().ok()
+    }
+
+    /// Server info to seed a build with: `(server_pid, release_version)`.
+    ///
+    /// `Real` probes the live daemon. `Fake` has no daemon to probe — the
+    /// fake child we are about to fork is its own BES writer — so it returns
+    /// `(0, None)`; the real per-invocation pid is supplied post-spawn by
+    /// [`bes_server_pid`]. The `None` version makes the announce line read
+    /// "development version", same as a non-release real bazel.
+    ///
+    /// [`bes_server_pid`]: Self::bes_server_pid
+    pub fn build_server_info(
+        &self,
+        startup_flags: &[String],
+    ) -> io::Result<(u32, Option<semver::Version>)> {
+        match self {
+            BazelBackend::Real => self.server_info(startup_flags),
+            BazelBackend::Fake { .. } => Ok((0, None)),
+        }
+    }
+
+    /// The pid that owns the BEP file for an invocation whose client process
+    /// is `child_pid`. Real bazel's BEP file is written by the long-lived
+    /// daemon (`probed_daemon_pid` from [`build_server_info`]); the fake child
+    /// writes it itself, so it IS its own server and galvanize's
+    /// `IfOpenForPid` liveness watches the child pid.
+    ///
+    /// [`build_server_info`]: Self::build_server_info
+    pub fn bes_server_pid(&self, child_pid: u32, probed_daemon_pid: u32) -> u32 {
+        match self {
+            BazelBackend::Real => probed_daemon_pid,
+            BazelBackend::Fake { .. } => child_pid,
+        }
+    }
+}
+
+/// Per-invocation control channel between the parent (aspect) and the fake
+/// bazel child. Behind a trait so a non-Unix transport (named pipe, loopback
+/// socket) can be a drop-in later; only [`SocketPairChannel`] ships now.
+///
+/// Bidirectional by construction (decision 7) so later cancellation tests can
+/// drive the fake mid-stream — this slice only writes the fixture parent→child.
+pub trait ControlChannel: Send {
+    /// The raw fd number the child should read the fixture from. The parent
+    /// passes this to the child via `ASPECT_FAKE_BAZEL_FD` and arranges for it
+    /// to survive `exec` (see [`prepare_command`]).
+    fn child_fd(&self) -> i32;
+
+    /// Write the serialized [`BazelExpectation`] frame to the parent end, then
+    /// shut the write half so the child's `read_to_end` terminates.
+    fn send_expectation(&mut self, exp: &BazelExpectation) -> std::io::Result<()>;
+}
+
+/// Env var naming the inherited control fd. Must match basil's `FAKE_FD_ENV`.
+pub const FAKE_FD_ENV: &str = "ASPECT_FAKE_BAZEL_FD";
+
+#[cfg(unix)]
+mod unix {
+    use super::*;
+    use std::io::Write;
+    use std::os::fd::AsRawFd;
+    use std::os::unix::net::UnixStream;
+    use std::os::unix::process::CommandExt;
+
+    use nix::libc;
+
+    /// A Unix `socketpair`-backed control channel. Built with
+    /// [`UnixStream::pair`] — a connected `AF_UNIX`/`SOCK_STREAM` socketpair
+    /// straight from std, so no extra `nix` features are pulled into the crate
+    /// graph. The parent keeps one end (for convenient writes); the raw fd of
+    /// the other is inherited by the child and learned via `ASPECT_FAKE_BAZEL_FD`.
+    pub struct SocketPairChannel {
+        /// Parent write/read end. `take`n once the expectation is sent so the
+        /// write half closes and the child's `read_to_end` terminates.
+        parent: Option<UnixStream>,
+        /// Child end. Held (keeping the fd open) until the child is spawned.
+        child: UnixStream,
+    }
+
+    impl SocketPairChannel {
+        pub fn new() -> std::io::Result<Self> {
+            let (parent, child) = UnixStream::pair()?;
+            Ok(Self {
+                parent: Some(parent),
+                child,
+            })
+        }
+    }
+
+    impl ControlChannel for SocketPairChannel {
+        fn child_fd(&self) -> i32 {
+            self.child.as_raw_fd()
+        }
+
+        fn send_expectation(&mut self, exp: &BazelExpectation) -> std::io::Result<()> {
+            let frame = exp.encode_frame();
+            let mut parent = self
+                .parent
+                .take()
+                .ok_or_else(|| std::io::Error::other("control channel already sent"))?;
+            parent.write_all(&frame)?;
+            parent.flush()?;
+            // Dropping `parent` here closes the parent write half, so the
+            // child's `read_to_end` on the control fd returns.
+            drop(parent);
+            Ok(())
+        }
+    }
+
+    /// Arrange for `cmd` to inherit the child control fd and learn its number
+    /// via `ASPECT_FAKE_BAZEL_FD`.
+    ///
+    /// `UnixStream::pair()` sets `FD_CLOEXEC` on both ends (std sets it on every
+    /// fd it creates), so without intervention the child end would be closed on
+    /// `exec`. We clear `FD_CLOEXEC` on the inherited fd in a `pre_exec` hook so
+    /// it survives into the fake bazel process.
+    pub fn prepare_command(cmd: &mut Command, child_fd: i32) {
+        cmd.env(FAKE_FD_ENV, child_fd.to_string());
+        // SAFETY: `pre_exec` runs in the forked child before exec. We only call
+        // async-signal-safe libc `fcntl` on a single fd; no allocation, no
+        // locks, no Rust runtime state touched.
+        unsafe {
+            cmd.pre_exec(move || {
+                let flags = libc::fcntl(child_fd, libc::F_GETFD);
+                if flags < 0 {
+                    return Err(std::io::Error::last_os_error());
+                }
+                let cleared = flags & !libc::FD_CLOEXEC;
+                if libc::fcntl(child_fd, libc::F_SETFD, cleared) < 0 {
+                    return Err(std::io::Error::last_os_error());
+                }
+                Ok(())
+            });
+        }
+    }
+}
+
+#[cfg(unix)]
+pub use unix::{SocketPairChannel, prepare_command};
+
+/// Open a fresh per-invocation control channel for a `Fake` spawn.
+#[cfg(unix)]
+pub fn open_control_channel() -> std::io::Result<Box<dyn ControlChannel>> {
+    Ok(Box::new(SocketPairChannel::new()?))
+}
+
+/// TODO(windows): a named-pipe / loopback `ControlChannel` impl. The trait
+/// seam exists so this is a drop-in; the `Fake` backend is Unix-only today.
+#[cfg(not(unix))]
+pub fn open_control_channel() -> std::io::Result<Box<dyn ControlChannel>> {
+    Err(std::io::Error::other(
+        "the fake bazel backend is Unix-only in this slice (TODO: Windows transport)",
+    ))
+}
diff --git a/crates/axl-runtime/src/engine/bazel/build.rs b/crates/axl-runtime/src/engine/bazel/build.rs
index 6529accc1..9f2fcef9a 100644
--- a/crates/axl-runtime/src/engine/bazel/build.rs
+++ b/crates/axl-runtime/src/engine/bazel/build.rs
@@ -747,6 +747,7 @@ impl Build {
     // TODO: this should return a thiserror::Error
     pub fn spawn(
         verb: &str,
+        backend: super::backend::BazelBackend,
         targets: impl IntoIterator<Item = String>,
         (build_events, sinks, iters): (bool, Vec<BuildEventSink>, Vec<BuildEventIter>),
         (execution_logs, execlog_sinks): (bool, Vec<ExecLogSink>),
@@ -759,7 +760,10 @@ impl Build {
         announce: AnnounceSpawn,
         rt: AsyncRuntime,
     ) -> Result<Build, std::io::Error> {
-        let (pid, version) = super::info::server_info()?;
+        // `Real` probes the live daemon for its pid + version; `Fake` has no
+        // daemon and returns `(0, None)` — its per-invocation server pid is
+        // the child we spawn below (see `bes_server_pid`).
+        let (pid, version) = backend.build_server_info(&[])?;
 
         let span = tracing::info_span!(
             "ctx.bazel.build",
@@ -772,7 +776,9 @@ impl Build {
 
         let targets: Vec<String> = targets.into_iter().collect();
 
-        let mut cmd = super::bazel_command();
+        // `Real` → `bazel_command()` (sets the anti-inception env, honors
+        // BAZEL_REAL). `Fake` → the fake binary directly, NO global env.
+        let mut cmd = backend.command();
         cmd.args(startup_flags);
         cmd.arg(verb);
         cmd.args(flags);
@@ -859,10 +865,34 @@ impl Build {
         cmd.stderr(stderr);
         cmd.stdin(Stdio::null());
 
+        // Fake backend: open a per-invocation control channel (socketpair),
+        // arrange for the fake child to inherit the read end, and remember
+        // the expectation to send once the child is alive. Each spawn mints
+        // its own channel, so concurrent test workers never collide.
+        let mut control: Option<Box<dyn super::backend::ControlChannel>> = None;
+        let fake_expectation =
+            if let super::backend::BazelBackend::Fake { expectation, .. } = &backend {
+                let chan = super::backend::open_control_channel()?;
+                super::backend::prepare_command(&mut cmd, chan.child_fd());
+                control = Some(chan);
+                Some(expectation.clone())
+            } else {
+                None
+            };
+
         let child = cmd
             .spawn()
             .map_err(|e| io::Error::other(format!("failed to spawn bazel: {e}")))?;
 
+        // Hand the declared expectation to the fake over the control channel
+        // now that it's running. Dropping the parent end (inside
+        // `send_expectation`) closes the write half so the fake's
+        // `read_to_end` returns and it can synthesize the BES stream.
+        if let (Some(mut chan), Some(exp)) = (control.take(), fake_expectation) {
+            chan.send_expectation(&exp)
+                .map_err(|e| io::Error::other(format!("failed to send fake expectation: {e}")))?;
+        }
+
         // Register the bazel client with the live-subprocess registry so
         // aspect-cli's OS-signal handler can forward SIGINT to it on
         // CI cancellation. The guard is stored on `Self` and unregisters
@@ -875,8 +905,16 @@ impl Build {
         // REMOTE_CACHE_EVICTED state. The server (daemon) pid passed to
         // galvanize stays alive across invocations and cannot signal
         // end-of-build, which is why we want a separate per-invocation pid.
+        // The backend decides which pid plays the BES-writer role: the daemon
+        // for `Real`, the fake child itself for `Fake` (it has no daemon).
+        let server_pid = backend.bes_server_pid(child.id(), pid);
         let build_event_stream = match bes_path {
-            Some(p) => Some(BuildEventStream::spawn(p, pid, child.id(), file_sinks)?),
+            Some(p) => Some(BuildEventStream::spawn(
+                p,
+                server_pid,
+                child.id(),
+                file_sinks,
+            )?),
             None => None,
         };
 
@@ -1100,7 +1138,10 @@ pub(crate) fn build_methods(registry: &mut MethodsBuilder) {
 #[cfg(test)]
 mod tests {
     //! End-to-end coverage of `ctx.bazel.build` via the `basil` fake-bazel
-    //! binary, selected per-test via `--scenario=<name>`.
+    //! binary. Each test mints `ctx.bazel` with a `Fake` backend
+    //! (`.with_fake_bazel()` / `.with_fake_bazel_expectation(...)`) that
+    //! fork+execs basil and feeds it a declared `BazelExpectation` over the
+    //! socketpair control channel; basil synthesizes the BES stream + exit.
 
     /// Iter handle subscribed pre-spawn receives every event from a clean
     /// build, even on the warm-daemon path that drops late subscribers.
@@ -1111,7 +1152,6 @@ mod tests {
 def _impl(ctx):
     iter = bazel.build_events.iterator()
     build = ctx.bazel.build(
-        flags = ["--scenario=success"],
         build_events = [iter],
         stderr = None,
     )
@@ -1157,7 +1197,6 @@ Test = task(implementation = _impl)
 def _impl(ctx):
     iter = bazel.build_events.iterator()
     build = ctx.bazel.build(
-        flags = ["--scenario=cache_evicted_no_retry"],
         build_events = [iter],
         stderr = None,
     )
@@ -1169,7 +1208,11 @@ def _impl(ctx):
 Test = task(implementation = _impl)
 "#,
             )
-            .with_fake_bazel()
+            .with_fake_bazel_expectation(basil_core::BazelExpectation::new(
+                Vec::new(),
+                basil_core::BuildResult::CacheEvicted,
+                None,
+            ))
             .run_task(0)
         });
 
@@ -1190,7 +1233,6 @@ Test = task(implementation = _impl)
 def _impl(ctx):
     iter = bazel.build_events.iterator()
     first = ctx.bazel.build(
-        flags = ["--scenario=success"],
         build_events = [iter],
         stderr = None,
     )
@@ -1198,7 +1240,6 @@ def _impl(ctx):
         pass
     first.wait()
     second = ctx.bazel.build(
-        flags = ["--scenario=success"],
         build_events = [iter],
         stderr = None,
     )
@@ -1310,7 +1351,6 @@ Test = task(implementation = _impl)
 def _impl(ctx):
     iter = bazel.build_events.iterator(kinds = ["build_finished"])
     build = ctx.bazel.build(
-        flags = ["--scenario=success"],
         build_events = [iter],
         stderr = None,
     )
@@ -1342,7 +1382,6 @@ Test = task(implementation = _impl)
 def _impl(ctx):
     iter = bazel.build_events.iterator()
     build = ctx.bazel.build(
-        flags = ["--scenario=success"],
         build_events = [iter],
         stderr = None,
     )
@@ -1407,7 +1446,6 @@ def _impl(ctx):
         retry_min_delay = "0s",
     )
     build = ctx.bazel.build(
-        flags = ["--scenario=success"],
         build_events = [iter, sink],
         stderr = None,
     )
@@ -1440,13 +1478,11 @@ def _impl(ctx):
     sink = bazel.build_events.grpc(uri = "not a uri", max_retries = 0, retry_min_delay = "0s")
     iter = bazel.build_events.iterator()
     first = ctx.bazel.build(
-        flags = ["--scenario=success"],
         build_events = [iter, sink],
         stderr = None,
     )
     iter2 = bazel.build_events.iterator()
     second = ctx.bazel.build(
-        flags = ["--scenario=success"],
         build_events = [iter2, sink],
         stderr = None,
     )
diff --git a/crates/axl-runtime/src/engine/bazel/cancel.rs b/crates/axl-runtime/src/engine/bazel/cancel.rs
index 3636406f5..4465c1ba5 100644
--- a/crates/axl-runtime/src/engine/bazel/cancel.rs
+++ b/crates/axl-runtime/src/engine/bazel/cancel.rs
@@ -13,7 +13,6 @@ use starlark::values::Trace;
 use starlark::values::ValueLike;
 use starlark::values::starlark_value;
 
-use super::info;
 use super::process;
 
 #[derive(Debug, ProvidesStaticType, Display, Trace, NoSerialize, Allocative)]
@@ -23,13 +22,21 @@ pub struct Cancellation {
     startup_flags: Vec<String>,
     #[allocative(skip)]
     force_kill_after_ms: u64,
+    #[allocative(skip)]
+    #[trace(unsafe_ignore)]
+    backend: super::backend::BazelBackend,
 }
 
 impl Cancellation {
-    pub fn new(startup_flags: Vec<String>, force_kill_after_ms: u64) -> Self {
+    pub fn new(
+        startup_flags: Vec<String>,
+        force_kill_after_ms: u64,
+        backend: super::backend::BazelBackend,
+    ) -> Self {
         Self {
             startup_flags,
             force_kill_after_ms,
+            backend,
         }
     }
 }
@@ -55,7 +62,9 @@ pub(crate) fn cancellation_methods(registry: &mut MethodsBuilder) {
     #[starlark(attribute)]
     fn busy<'v>(this: values::Value<'v>) -> anyhow::Result<bool> {
         let cancellation = this.downcast_ref::<Cancellation>().unwrap();
-        Ok(info::is_server_busy(&cancellation.startup_flags))
+        Ok(cancellation
+            .backend
+            .is_server_busy(&cancellation.startup_flags))
     }
 
     /// Block until the cancelled invocation finishes.
@@ -82,7 +91,10 @@ pub(crate) fn cancellation_methods(registry: &mut MethodsBuilder) {
 
         let start = std::time::Instant::now();
 
-        while info::is_server_busy(&cancellation.startup_flags) {
+        while cancellation
+            .backend
+            .is_server_busy(&cancellation.startup_flags)
+        {
             let elapsed = start.elapsed();
 
             // Manual timeout: return False without escalation.
@@ -94,10 +106,13 @@ pub(crate) fn cancellation_methods(registry: &mut MethodsBuilder) {
             if force_kill_after_ms > 0
                 && elapsed >= std::time::Duration::from_millis(force_kill_after_ms)
             {
-                force_kill(&cancellation.startup_flags);
+                force_kill(&cancellation.backend, &cancellation.startup_flags);
                 // After force-kill, wait indefinitely for the server to stop.
                 // Reset by breaking out and falling through to return true.
-                while info::is_server_busy(&cancellation.startup_flags) {
+                while cancellation
+                    .backend
+                    .is_server_busy(&cancellation.startup_flags)
+                {
                     std::thread::sleep(std::time::Duration::from_millis(poll_ms));
                 }
                 return Ok(true);
@@ -123,7 +138,10 @@ pub(crate) fn cancellation_methods(registry: &mut MethodsBuilder) {
     /// server could be found (the build may have already finished).
     fn force<'v>(this: values::Value<'v>) -> anyhow::Result<bool> {
         let cancellation = this.downcast_ref::<Cancellation>().unwrap();
-        Ok(force_kill(&cancellation.startup_flags))
+        Ok(force_kill(
+            &cancellation.backend,
+            &cancellation.startup_flags,
+        ))
     }
 }
 
@@ -163,8 +181,8 @@ const FORCE_KILL_POLL_MS: u64 = 100;
 ///
 /// If the client still doesn't exit after the 3rd SIGINT, we SIGKILL both
 /// the client and server ourselves as a last resort.
-fn force_kill(startup_flags: &[String]) -> bool {
-    if let Some(client_pid) = info::client_pid(startup_flags) {
+fn force_kill(backend: &super::backend::BazelBackend, startup_flags: &[String]) -> bool {
+    if let Some(client_pid) = backend.client_pid(startup_flags) {
         // 2nd SIGINT: repeated cancel request.
         tracing::warn!("cancel_invocation: sending 2nd SIGINT to Bazel client PID {client_pid}");
         process::sigint(client_pid);
@@ -183,7 +201,7 @@ fn force_kill(startup_flags: &[String]) -> bool {
                      after {FORCE_KILL_TIMEOUT_MS}ms, sending SIGKILL"
                 );
                 process::sigkill(client_pid);
-                if let Some(server_pid) = info::server_pid_nonblocking(startup_flags) {
+                if let Some(server_pid) = backend.server_pid_nonblocking(startup_flags) {
                     tracing::warn!(
                         "cancel_invocation: also sending SIGKILL to Bazel server PID \
                          {server_pid}"
@@ -199,8 +217,8 @@ fn force_kill(startup_flags: &[String]) -> bool {
 
     // Client is gone (crashed or already exited). Only SIGKILL the server
     // if it's still busy — otherwise there's nothing to cancel.
-    if info::is_server_busy(startup_flags) {
-        if let Some(pid) = info::server_pid_nonblocking(startup_flags) {
+    if backend.is_server_busy(startup_flags) {
+        if let Some(pid) = backend.server_pid_nonblocking(startup_flags) {
             tracing::warn!(
                 "cancel_invocation: Bazel client not found, sending SIGKILL to \
                  server PID {pid}"
diff --git a/crates/axl-runtime/src/engine/bazel/health_check.rs b/crates/axl-runtime/src/engine/bazel/health_check.rs
index 57bf1030d..7df6d7891 100644
--- a/crates/axl-runtime/src/engine/bazel/health_check.rs
+++ b/crates/axl-runtime/src/engine/bazel/health_check.rs
@@ -77,10 +77,12 @@ struct CheckResult {
 }
 
 /// Runs `bazel [startup_flags] --noblock_for_lock info server_pid` and returns the result.
-fn check_bazel_server(startup_flags: &[String]) -> CheckResult {
-    let mut cmd = super::bazel_command();
-    cmd.args(startup_flags)
-        .arg("--noblock_for_lock")
+fn check_bazel_server(
+    backend: &super::backend::BazelBackend,
+    startup_flags: &[String],
+) -> CheckResult {
+    let mut cmd = backend.base_command(startup_flags);
+    cmd.arg("--noblock_for_lock")
         .arg("info")
         .arg("server_pid")
         .stdout(Stdio::piped())
@@ -123,10 +125,12 @@ fn extract_server_pid(server_pid_file: Option<&Path>) -> Option<u32> {
 }
 
 /// Tries to determine the Bazel output base by running `bazel [startup_flags] info output_base`.
-fn get_output_base(startup_flags: &[String]) -> Option<PathBuf> {
-    let mut cmd = super::bazel_command();
-    cmd.args(startup_flags)
-        .arg("info")
+fn get_output_base(
+    backend: &super::backend::BazelBackend,
+    startup_flags: &[String],
+) -> Option<PathBuf> {
+    let mut cmd = backend.base_command(startup_flags);
+    cmd.arg("info")
         .arg("output_base")
         .stdout(Stdio::piped())
         .stderr(Stdio::null())
@@ -239,11 +243,11 @@ fn output_base_from_flags(startup_flags: &[String]) -> Option<PathBuf> {
 /// On success, best-effort cleans up stranded sandbox state from a prior
 /// SIGKILL'd invocation (bazelbuild/bazel#23880) before the next bazel
 /// command runs.
-pub fn run(startup_flags: &[String]) -> HealthCheckResult {
-    let result = check_bazel_server(startup_flags);
+pub fn run(backend: &super::backend::BazelBackend, startup_flags: &[String]) -> HealthCheckResult {
+    let result = check_bazel_server(backend, startup_flags);
 
     if result.success {
-        if let Some(base) = get_output_base(startup_flags) {
+        if let Some(base) = get_output_base(backend, startup_flags) {
             let _ = cleanup_stranded_sandbox_state(&base);
         }
         return HealthCheckResult {
@@ -297,7 +301,7 @@ pub fn run(startup_flags: &[String]) -> HealthCheckResult {
         super::process::sigkill(pid);
     }
 
-    let retry = check_bazel_server(startup_flags);
+    let retry = check_bazel_server(backend, startup_flags);
 
     if retry.success {
         let _ = cleanup_stranded_sandbox_state(&output_base);
diff --git a/crates/axl-runtime/src/engine/bazel/info.rs b/crates/axl-runtime/src/engine/bazel/info.rs
index e1f1d2131..4f68ab40d 100644
--- a/crates/axl-runtime/src/engine/bazel/info.rs
+++ b/crates/axl-runtime/src/engine/bazel/info.rs
@@ -1,7 +1,20 @@
-use std::io;
-use std::process::Stdio;
+//! Parsing helpers for `bazel info` output. The spawning lives on
+//! [`super::backend::BazelBackend`] (so it picks Real vs Fake); this module
+//! only turns bazel's text output into typed values.
 
-use anyhow::anyhow;
+use std::collections::BTreeMap;
+
+/// Parse `bazel info` `key: value` lines into a map. Splits on the first
+/// `": "` so values containing colons are preserved.
+pub(super) fn parse_info_map(stdout: &str) -> BTreeMap<String, String> {
+    let mut map = BTreeMap::new();
+    for line in stdout.lines() {
+        if let Some((key, value)) = line.split_once(": ") {
+            map.insert(key.trim().to_string(), value.trim().to_string());
+        }
+    }
+    map
+}
 
 /// Parse the value of `bazel info release` into a semver version.
 ///
@@ -10,7 +23,7 @@ use anyhow::anyhow;
 /// number — `development version` (built from source) or `no_version` —
 /// and return `None` rather than erroring, so a non-release Bazel doesn't
 /// abort the task. Callers treat `None` as "version unknown".
-fn parse_release(value: &str) -> Option<semver::Version> {
+pub(super) fn parse_release(value: &str) -> Option<semver::Version> {
     let ver_str = value.trim().trim_start_matches("release ").trim();
     // Drop any pre-release suffix so an rc/pre build (`9.0.0-rc1`) matches the
     // same constraints its eventual release will.
@@ -18,150 +31,6 @@ fn parse_release(value: &str) -> Option<semver::Version> {
     semver::Version::parse(ver_str).ok()
 }
 
-/// Query bazel server info (server_pid, release version).
-///
-/// The version is `None` when Bazel reports a non-release build (see
-/// [`parse_release`]); the pid is always required.
-pub fn server_info() -> io::Result<(u32, Option<semver::Version>)> {
-    server_info_with_startup_flags(&[])
-}
-
-/// Query bazel server info with startup flags prepended before the subcommand.
-pub fn server_info_with_startup_flags(
-    startup_flags: &[String],
-) -> io::Result<(u32, Option<semver::Version>)> {
-    let mut cmd = super::bazel_command();
-    cmd.args(startup_flags);
-    cmd.arg("info");
-    cmd.arg("server_pid");
-    cmd.arg("release");
-    cmd.stdout(Stdio::piped());
-    cmd.stderr(Stdio::piped());
-    cmd.stdin(Stdio::null());
-    // `bazel info` (without --noblock_for_lock) can hang on a busy
-    // server. Register so the OS signal handler can SIGINT it on
-    // CI-cancel.
-    let (child, _guard) = super::live::spawn_registered(&mut cmd)
-        .map_err(|e| io::Error::other(format!("failed to spawn bazel: {e}")))?;
-    let c = child.wait_with_output()?;
-    if !c.status.success() {
-        let stderr = String::from_utf8_lossy(&c.stderr);
-        let stderr = stderr.trim();
-        let detail = if stderr.is_empty() {
-            format!("exit code {:?}", c.status.code())
-        } else {
-            format!("exit code {:?}: {}", c.status.code(), stderr)
-        };
-        return Err(io::Error::other(anyhow!(
-            "failed to determine Bazel server info ({})",
-            detail
-        )));
-    }
-
-    // When bazel info is called with multiple keys it emits "key: value" lines.
-    let stdout = String::from_utf8_lossy(&c.stdout);
-    let mut pid: Option<u32> = None;
-    let mut version: Option<semver::Version> = None;
-    for line in stdout.lines() {
-        if let Some((key, value)) = line.split_once(": ") {
-            match key.trim() {
-                "server_pid" => {
-                    pid = value.trim().parse::<u32>().ok();
-                }
-                "release" => {
-                    version = parse_release(value);
-                    if version.is_none() {
-                        // Not an error; version-conditional flags assume latest.
-                        // Logged for diagnosability when flag resolution looks off.
-                        tracing::debug!(
-                            release = %value.trim(),
-                            "bazel reported a non-release version; \
-                             version-conditional flags will assume latest"
-                        );
-                    }
-                }
-                _ => {}
-            }
-        }
-    }
-
-    let pid =
-        pid.ok_or_else(|| io::Error::other(anyhow!("bazel info did not return server_pid")))?;
-
-    Ok((pid, version))
-}
-
-/// Determine the real bazel client PID by running `bazel --noblock_for_lock info`.
-///
-/// When another invocation holds the lock, bazel exits with code 9 and prints:
-///   "Another command (pid=12345) is running. Exiting immediately."
-/// We parse the PID from that stderr message.
-pub fn client_pid(startup_flags: &[String]) -> Option<u32> {
-    let mut cmd = super::bazel_command();
-    cmd.args(startup_flags);
-    cmd.arg("--noblock_for_lock");
-    cmd.arg("info");
-    cmd.arg("server_pid");
-    cmd.stdout(Stdio::null());
-    cmd.stderr(Stdio::piped());
-    cmd.stdin(Stdio::null());
-    let (child, _guard) = super::live::spawn_registered(&mut cmd).ok()?;
-    let output = child.wait_with_output().ok()?;
-    // Exit code 9 means the lock is held — stderr contains the client PID.
-    if output.status.code() != Some(9) {
-        return None;
-    }
-    let stderr = String::from_utf8_lossy(&output.stderr);
-    // Parse "Another command (pid=12345) is running."
-    let start = stderr.find("pid=")? + 4;
-    let rest = &stderr[start..];
-    let end = rest.find(')')?;
-    rest[..end].parse::<u32>().ok()
-}
-
-/// Check if the bazel server lock is currently held by a client.
-pub fn is_server_busy(startup_flags: &[String]) -> bool {
-    let mut cmd = super::bazel_command();
-    cmd.args(startup_flags);
-    cmd.arg("--noblock_for_lock");
-    cmd.arg("info");
-    cmd.arg("server_pid");
-    cmd.stdout(Stdio::null());
-    cmd.stderr(Stdio::null());
-    cmd.stdin(Stdio::null());
-    let Ok((child, _guard)) = super::live::spawn_registered(&mut cmd) else {
-        return false;
-    };
-    matches!(child.wait_with_output(), Ok(o) if o.status.code() == Some(9))
-}
-
-/// Query the server PID without blocking on the lock.
-///
-/// Resolves `output_base` via `bazel --noblock_for_lock info output_base`
-/// (computed client-side, never blocks on the lock) and reads the PID from
-/// `<output_base>/server/server.pid.txt`.
-///
-/// Returns `None` only if the server is not running or bazel is not available.
-pub fn server_pid_nonblocking(startup_flags: &[String]) -> Option<u32> {
-    let mut cmd = super::bazel_command();
-    cmd.args(startup_flags);
-    cmd.arg("--noblock_for_lock");
-    cmd.arg("info");
-    cmd.arg("output_base");
-    cmd.stdout(Stdio::piped());
-    cmd.stderr(Stdio::null());
-    cmd.stdin(Stdio::null());
-    let (child, _guard) = super::live::spawn_registered(&mut cmd).ok()?;
-    let output = child.wait_with_output().ok()?;
-    if !output.status.success() {
-        return None;
-    }
-    let output_base = String::from_utf8_lossy(&output.stdout);
-    let pid_path = std::path::Path::new(output_base.trim()).join("server/server.pid.txt");
-    let contents = std::fs::read_to_string(pid_path).ok()?;
-    contents.trim().parse::<u32>().ok()
-}
-
 #[cfg(test)]
 mod tests {
     use super::parse_release;
diff --git a/crates/axl-runtime/src/engine/bazel/mod.rs b/crates/axl-runtime/src/engine/bazel/mod.rs
index d47a0ec26..98811fc18 100644
--- a/crates/axl-runtime/src/engine/bazel/mod.rs
+++ b/crates/axl-runtime/src/engine/bazel/mod.rs
@@ -33,6 +33,7 @@ use crate::engine::store::Env;
 use axl_proto;
 use axl_types::stream::Writable;
 
+pub mod backend;
 mod build;
 mod cancel;
 mod health_check;
@@ -130,12 +131,13 @@ fn partition_build_events(
 fn resolve_rc_version(
     requested: Option<String>,
     rc: &bazelrc::BazelRC,
+    backend: &backend::BazelBackend,
 ) -> anyhow::Result<Option<semver::Version>> {
     if let Some(s) = requested {
         return Ok(semver::Version::parse(&s).ok());
     }
     if rc.has_version_gated_options() {
-        return Ok(info::server_info().ok().and_then(|t| t.1));
+        return Ok(backend.server_info(&[]).ok().and_then(|t| t.1));
     }
     Ok(None)
 }
@@ -170,9 +172,11 @@ fn resolve_flags<'v>(
 /// (`build` / `test` / `query`) so they filter conditional flags identically.
 fn resolve_flags_for_running_bazel<'v>(
     items: &[Either<values::StringValue<'v>, (values::StringValue<'v>, values::StringValue<'v>)>],
+    backend: &backend::BazelBackend,
 ) -> anyhow::Result<Vec<String>> {
     let version = if items.iter().any(|f| f.is_right()) {
-        info::server_info()
+        backend
+            .server_info(&[])
             .map_err(|e| anyhow::anyhow!("failed to get Bazel server info: {}", e))?
             .1
     } else {
@@ -205,6 +209,11 @@ pub struct Bazel<'v> {
     /// source of both command and startup flags for every Bazel invocation.
     #[allocative(skip)]
     pub active_rc: RefCell<Option<values::Value<'v>>>,
+    /// Which bazel this context drives — `Real` (production) or a `Fake`
+    /// carrying the fake-bazel path + declared expectation (testing). Carried
+    /// on the value so it's per-value and parallel-safe; not a Starlark value,
+    /// so it's untraced.
+    pub backend: backend::BazelBackend,
 }
 
 unsafe impl<'v> Trace<'v> for Bazel<'v> {
@@ -229,6 +238,7 @@ impl<'v> values::Freeze for Bazel<'v> {
                 Some(v) => Some(v.freeze(freezer)?),
                 None => None,
             },
+            backend: self.backend,
         })
     }
 }
@@ -246,6 +256,7 @@ impl<'v> values::StarlarkValue<'v> for Bazel<'v> {
 pub struct FrozenBazel {
     #[allocative(skip)]
     pub active_rc: Option<values::FrozenValue>,
+    pub backend: backend::BazelBackend,
 }
 
 starlark_simple_value!(FrozenBazel);
@@ -301,8 +312,9 @@ fn resolve_invocation_flags<'v>(
     command: &str,
     rc_param: NoneOr<values::Value<'v>>,
     flags: &[Either<values::StringValue<'v>, (values::StringValue<'v>, values::StringValue<'v>)>],
+    backend: &backend::BazelBackend,
 ) -> anyhow::Result<(Vec<String>, Vec<String>)> {
-    let extras = resolve_flags_for_running_bazel(flags)?;
+    let extras = resolve_flags_for_running_bazel(flags, backend)?;
     match effective_rc(this, rc_param) {
         Some(rc) => {
             let (startup, mut cmd) = rc.resolve_for_command(command)?;
@@ -313,6 +325,18 @@ fn resolve_invocation_flags<'v>(
     }
 }
 
+/// Read the `BazelBackend` carried on the `Bazel` value. Defaults to `Real`
+/// for any other value shape (so non-`Bazel` callers see production behavior).
+fn read_backend<'v>(this: values::Value<'v>) -> backend::BazelBackend {
+    if let Some(b) = this.downcast_ref::<Bazel>() {
+        b.backend.clone()
+    } else if let Some(b) = this.downcast_ref::<FrozenBazel>() {
+        b.backend.clone()
+    } else {
+        backend::BazelBackend::Real
+    }
+}
+
 #[starlark_module]
 pub(crate) fn bazel_methods(registry: &mut MethodsBuilder) {
     /// The active `RunCommand` set via `use_rc`, or `None` if none is active.
@@ -442,12 +466,14 @@ pub(crate) fn bazel_methods(registry: &mut MethodsBuilder) {
             Either::Left(b) => (b, vec![]),
             Either::Right(sinks) => (true, sinks.items),
         };
+        let backend = read_backend(this);
         let (resolved_flags, resolved_startup_flags) =
-            resolve_invocation_flags(this, "build", rc, &flags.items)?;
+            resolve_invocation_flags(this, "build", rc, &flags.items, &backend)?;
         let env = Env::from_eval(eval)?;
         let (stdout, stderr) = resolve_stdio(stdio, stdout, stderr)?;
         let build = build::Build::spawn(
             "build",
+            backend,
             targets.items.iter().map(|f| f.as_str().to_string()),
             build_events,
             execution_log,
@@ -554,12 +580,14 @@ pub(crate) fn bazel_methods(registry: &mut MethodsBuilder) {
             Either::Left(b) => (b, vec![]),
             Either::Right(sinks) => (true, sinks.items),
         };
+        let backend = read_backend(this);
         let (resolved_flags, resolved_startup_flags) =
-            resolve_invocation_flags(this, "test", rc, &flags.items)?;
+            resolve_invocation_flags(this, "test", rc, &flags.items, &backend)?;
         let env = Env::from_eval(eval)?;
         let (stdout, stderr) = resolve_stdio(stdio, stdout, stderr)?;
         let test = build::Build::spawn(
             "test",
+            backend,
             targets.items.iter().map(|f| f.as_str().to_string()),
             build_events,
             execution_log,
@@ -612,7 +640,8 @@ pub(crate) fn bazel_methods(registry: &mut MethodsBuilder) {
         #[starlark(require = named, default = false)] announce_version: bool,
         #[starlark(require = named, default = false)] announce_command: bool,
     ) -> anyhow::Result<query::Query> {
-        let extras = resolve_flags_for_running_bazel(&flags.items)?;
+        let backend = read_backend(this);
+        let extras = resolve_flags_for_running_bazel(&flags.items, &backend)?;
         let (startup, command_flags) = match effective_rc(this, rc) {
             Some(rc) => {
                 let (startup, mut base) = rc.resolve_for_command("query")?;
@@ -622,6 +651,7 @@ pub(crate) fn bazel_methods(registry: &mut MethodsBuilder) {
             None => (read_startup_flags(this)?, extras),
         };
         query::run(
+            &backend,
             &expr,
             &startup,
             &command_flags,
@@ -650,34 +680,13 @@ pub(crate) fn bazel_methods(registry: &mut MethodsBuilder) {
     /// ```
     fn info<'v>(this: values::Value<'v>) -> anyhow::Result<SmallMap<String, String>> {
         let startup_flags = read_startup_flags(this)?;
-
-        let mut cmd = bazel_command();
-        cmd.args(&startup_flags);
-        cmd.arg("info");
-        cmd.stdout(Stdio::piped());
-        cmd.stderr(Stdio::null());
-        cmd.stdin(Stdio::null());
-        // Register with the live-bazel registry so OS-signal cancellation
-        // can reach this `bazel info` even if the daemon is busy.
-        let (child, _guard) = live::spawn_registered(&mut cmd)
-            .map_err(|e| anyhow::anyhow!("failed to spawn bazel: {}", e))?;
-        let output = child
-            .wait_with_output()
-            .map_err(|e| anyhow::anyhow!("failed to wait on bazel: {}", e))?;
-
-        if !output.status.success() {
-            anyhow::bail!(
-                "bazel info failed with exit code {:?}",
-                output.status.code()
-            );
-        }
-
-        let stdout = String::from_utf8_lossy(&output.stdout);
+        let backend = read_backend(this);
+        let parsed = backend
+            .info(&startup_flags, &[])
+            .map_err(|e| anyhow::anyhow!("bazel info failed: {}", e))?;
         let mut map = SmallMap::new();
-        for line in stdout.lines() {
-            if let Some((key, value)) = line.split_once(": ") {
-                map.insert(key.trim().to_string(), value.trim().to_string());
-            }
+        for (key, value) in parsed {
+            map.insert(key, value);
         }
         Ok(map)
     }
@@ -703,7 +712,7 @@ pub(crate) fn bazel_methods(registry: &mut MethodsBuilder) {
         this: values::Value<'v>,
     ) -> anyhow::Result<health_check::HealthCheckResult> {
         let startup_flags = read_startup_flags(this)?;
-        Ok(health_check::run(&startup_flags))
+        Ok(health_check::run(&read_backend(this), &startup_flags))
     }
 
     /// Detect and best-effort repair runner-poisoning sandbox state
@@ -811,7 +820,7 @@ pub(crate) fn bazel_methods(registry: &mut MethodsBuilder) {
         let rc = bazelrc::BazelRC::new(root, &startup_flags.items, &flags.items)
             .map_err(|e| anyhow::anyhow!("{}", e))?
             .with_skip_config_if_missing(skip_config_if_missing.items);
-        let version = resolve_rc_version(version.into_option(), &rc)?;
+        let version = resolve_rc_version(version.into_option(), &rc, &read_backend(this))?;
         Ok(rc.with_version(version))
     }
 
@@ -888,14 +897,19 @@ pub(crate) fn bazel_methods(registry: &mut MethodsBuilder) {
     ) -> anyhow::Result<cancel::Cancellation> {
         let all_flags = read_startup_flags(this)?;
         let force_kill_after_ms = force_kill_after_ms.max(0) as u64;
+        let backend = read_backend(this);
 
         // Send SIGINT to the Bazel client holding the server lock.
         // client_pid() uses --noblock_for_lock so it returns immediately.
-        if let Some(pid) = info::client_pid(&all_flags) {
+        if let Some(pid) = backend.client_pid(&all_flags) {
             process::sigint(pid);
         }
 
-        Ok(cancel::Cancellation::new(all_flags, force_kill_after_ms))
+        Ok(cancel::Cancellation::new(
+            all_flags,
+            force_kill_after_ms,
+            backend,
+        ))
     }
 }
 
diff --git a/crates/axl-runtime/src/engine/bazel/query.rs b/crates/axl-runtime/src/engine/bazel/query.rs
index 5e6445dea..71649e099 100644
--- a/crates/axl-runtime/src/engine/bazel/query.rs
+++ b/crates/axl-runtime/src/engine/bazel/query.rs
@@ -154,17 +154,18 @@ fn query_failure_error(expr: &str, exit_code: Option<i32>, stderr: &str) -> anyh
 }
 
 /// Run `bazel query <expr>` with the given startup + command flags and decode
-/// the streamed-proto result into a `TargetSet`. Fails with Bazel's own stderr
+/// the streamed-proto result into a `Query`. Fails with Bazel's own stderr
 /// on a non-zero exit — a failed query is not the same as one that matched
-/// nothing. Used by `ctx.bazel.query(expr, rc=…)`.
+/// nothing. Used by `ctx.bazel.query(expr, rc=…)`. Forking goes through the
+/// `BazelBackend` so the fake backend can answer queries in tests.
 pub fn run(
+    backend: &super::backend::BazelBackend,
     expr: &str,
     startup_flags: &[String],
     flags: &[String],
     announce: super::build::AnnounceSpawn,
 ) -> anyhow::Result<Query> {
-    let mut cmd = super::bazel_command();
-    cmd.args(startup_flags);
+    let mut cmd = backend.base_command(startup_flags);
     cmd.arg("query");
     cmd.arg(expr);
     cmd.args(flags);
@@ -188,7 +189,8 @@ pub fn run(
     // extra `bazel info`, so only pay for it when actually announcing.
     if announce.version || announce.command {
         let version = if announce.version {
-            super::info::server_info_with_startup_flags(startup_flags)
+            backend
+                .server_info(startup_flags)
                 .ok()
                 .and_then(|(_pid, version)| version)
         } else {
diff --git a/crates/axl-runtime/src/engine/builtins.rs b/crates/axl-runtime/src/engine/builtins.rs
index a63552ba1..ddda1e686 100644
--- a/crates/axl-runtime/src/engine/builtins.rs
+++ b/crates/axl-runtime/src/engine/builtins.rs
@@ -64,7 +64,7 @@ fn check_std_context(eval: &Evaluator) -> anyhow::Result<()> {
         Ok(())
     } else {
         Err(anyhow::anyhow!(
-            "__builtins__ is only available within @std modules"
+            "__builtins__ is only available within standard-library modules (@std, @bazel, @aspect)"
         ))
     }
 }
@@ -487,6 +487,124 @@ fn builtins_time_methods(registry: &mut MethodsBuilder) {
     }
 }
 
+/// Returned by `__builtins__.testing()`. Exposes the `*_test.axl` runner.
+#[derive(Debug, Clone, Copy, ProvidesStaticType, NoSerialize, Allocative)]
+pub struct BuiltinsTesting;
+
+impl fmt::Display for BuiltinsTesting {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "<BuiltinsTesting>")
+    }
+}
+
+starlark_simple_value!(BuiltinsTesting);
+
+#[starlark_value(type = "BuiltinsTesting")]
+impl<'v> StarlarkValue<'v> for BuiltinsTesting {
+    fn get_methods() -> Option<&'static Methods> {
+        static RES: MethodsStatic = MethodsStatic::new();
+        RES.methods(builtins_testing_methods)
+    }
+}
+
+/// Marshal a test run into the AXL-facing summary dict:
+/// `{"error": None|str, "passed": int, "failed": int, "outcomes": [...]}`,
+/// where each outcome is `{"name": str, "passed": bool, "message": None|str}`.
+///
+/// A module-level failure (parse error, or a `load(...)` the loader-free
+/// runner can't resolve) is surfaced as the top-level `error` string with no
+/// outcomes, rather than raising — so one bad file never aborts a whole
+/// `aspect axl test` run.
+fn marshal_test_result<'v>(
+    result: anyhow::Result<super::testing::TestSummary>,
+    heap: Heap<'v>,
+) -> Value<'v> {
+    use starlark::values::dict::AllocDict;
+    use starlark::values::list::AllocList;
+
+    let (error, outcomes, passed, failed): (Value<'v>, Vec<Value<'v>>, i32, i32) = match result {
+        Ok(summary) => {
+            let outcomes = summary
+                .outcomes
+                .iter()
+                .map(|o| {
+                    let message = match &o.message {
+                        Some(m) => heap.alloc(m.as_str()).to_value(),
+                        None => Value::new_none(),
+                    };
+                    let entries: Vec<(Value<'v>, Value<'v>)> = vec![
+                        (
+                            heap.alloc("name").to_value(),
+                            heap.alloc(o.name.as_str()).to_value(),
+                        ),
+                        (heap.alloc("passed").to_value(), Value::new_bool(o.passed)),
+                        (heap.alloc("message").to_value(), message),
+                    ];
+                    heap.alloc(AllocDict(entries))
+                })
+                .collect();
+            (
+                Value::new_none(),
+                outcomes,
+                summary.passed() as i32,
+                summary.failed() as i32,
+            )
+        }
+        Err(e) => (
+            heap.alloc(format!("{e:#}").as_str()).to_value(),
+            Vec::new(),
+            0,
+            0,
+        ),
+    };
+
+    let entries: Vec<(Value<'v>, Value<'v>)> = vec![
+        (heap.alloc("error").to_value(), error),
+        (
+            heap.alloc("passed").to_value(),
+            heap.alloc(passed).to_value(),
+        ),
+        (
+            heap.alloc("failed").to_value(),
+            heap.alloc(failed).to_value(),
+        ),
+        (
+            heap.alloc("outcomes").to_value(),
+            heap.alloc(AllocList(outcomes)).to_value(),
+        ),
+    ];
+    heap.alloc(AllocDict(entries))
+}
+
+#[starlark_module]
+fn builtins_testing_methods(registry: &mut MethodsBuilder) {
+    /// Runs every top-level `def test_*(t)` function defined in `source` (the
+    /// contents of a `*_test.axl` file), each with an isolated in-memory
+    /// environment overlay, fanned out across worker threads. Returns the
+    /// summary dict described on [`marshal_test_result`].
+    ///
+    /// `source` is evaluated with no module loader, so a file that
+    /// `load(...)`s other modules comes back as a top-level `error` rather
+    /// than running.
+    fn run<'v>(
+        this: Value<'v>,
+        #[starlark(require = pos)] source: &str,
+        eval: &mut Evaluator<'v, '_, '_>,
+    ) -> anyhow::Result<Value<'v>> {
+        let _ = this;
+        // No `check_std_context` here: the gate lives on the `testing()`
+        // accessor (same as `hash()`/`time()`). A task captures the namespace
+        // at module-eval time (where the marker is present) and calls `run`
+        // later from the shared execution module, which carries no marker —
+        // re-checking here would reject every legitimate call.
+        let result = {
+            let env = super::store::Env::from_eval(eval)?;
+            super::testing::run_test_source(source, env)
+        };
+        Ok(marshal_test_result(result, eval.heap()))
+    }
+}
+
 #[starlark_module]
 fn builtins_methods(registry: &mut MethodsBuilder) {
     /// Returns the hash namespace, exposing constructors for supported
@@ -536,6 +654,21 @@ fn builtins_methods(registry: &mut MethodsBuilder) {
         let _ = this;
         super::grpc::make_builtins_grpc(eval)
     }
+
+    /// Returns the testing namespace, exposing `run(source)` — the built-in
+    /// parallel `*_test.axl` runner that backs `aspect axl test`.
+    ///
+    /// Only callable from within standard-library modules (`@std`, `@bazel`,
+    /// `@aspect`); there is no public `@std//…` wrapper because the runner is
+    /// an internal capability of the `@aspect` standard library.
+    fn testing<'v>(
+        this: Value<'v>,
+        eval: &mut Evaluator<'v, '_, '_>,
+    ) -> anyhow::Result<BuiltinsTesting> {
+        let _ = this;
+        check_std_context(eval)?;
+        Ok(BuiltinsTesting)
+    }
 }
 
 /// Registers the `json` namespace with `encode`, `decode`, and the
@@ -619,6 +752,59 @@ pub fn register_globals(globals: &mut GlobalsBuilder) {
     const Hash: StarlarkValueAsType<HashObject> = StarlarkValueAsType::new();
 }
 
+#[cfg(test)]
+mod marshal_tests {
+    use super::marshal_test_result;
+    use crate::engine::testing::{TestOutcome, TestSummary};
+    use starlark::environment::Module;
+
+    #[test]
+    fn ok_summary_marshals_to_documented_dict_shape() {
+        Module::with_temp_heap(|m| -> anyhow::Result<()> {
+            let summary = TestSummary {
+                outcomes: vec![
+                    TestOutcome {
+                        name: "test_a".to_string(),
+                        passed: true,
+                        message: None,
+                    },
+                    TestOutcome {
+                        name: "test_b".to_string(),
+                        passed: false,
+                        message: Some("boom".to_string()),
+                    },
+                ],
+            };
+            let repr = marshal_test_result(Ok(summary), m.heap()).to_repr();
+            // Top-level summary fields.
+            assert!(repr.contains("\"error\""), "{repr}");
+            assert!(repr.contains("None"), "{repr}");
+            assert!(repr.contains("\"passed\""), "{repr}");
+            assert!(repr.contains("\"failed\""), "{repr}");
+            assert!(repr.contains("\"outcomes\""), "{repr}");
+            // Per-outcome fields, including the failure message.
+            assert!(repr.contains("\"name\""), "{repr}");
+            assert!(repr.contains("test_a"), "{repr}");
+            assert!(repr.contains("test_b"), "{repr}");
+            assert!(repr.contains("boom"), "{repr}");
+            Ok(())
+        })
+        .unwrap();
+    }
+
+    #[test]
+    fn module_error_marshals_as_top_level_error_with_no_outcomes() {
+        Module::with_temp_heap(|m| -> anyhow::Result<()> {
+            let repr = marshal_test_result(Err(anyhow::anyhow!("kaboom")), m.heap()).to_repr();
+            assert!(repr.contains("kaboom"), "{repr}");
+            assert!(repr.contains("\"outcomes\""), "{repr}");
+            assert!(repr.contains("[]"), "{repr}");
+            Ok(())
+        })
+        .unwrap();
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::axl_eval;
diff --git a/crates/axl-runtime/src/engine/config_context.rs b/crates/axl-runtime/src/engine/config_context.rs
index 2ac7da948..0c2901053 100644
--- a/crates/axl-runtime/src/engine/config_context.rs
+++ b/crates/axl-runtime/src/engine/config_context.rs
@@ -130,7 +130,7 @@ pub(crate) fn config_context_methods(registry: &mut MethodsBuilder) {
     /// Standard library is the foundation of powerful AXL tasks.
     #[starlark(attribute)]
     fn std<'v>(#[allow(unused)] this: values::Value<'v>) -> anyhow::Result<Std> {
-        Ok(Std {})
+        Ok(Std::new())
     }
 
     /// Expand template files.
diff --git a/crates/axl-runtime/src/engine/feature_context.rs b/crates/axl-runtime/src/engine/feature_context.rs
index 04f8caabd..ad63a07b7 100644
--- a/crates/axl-runtime/src/engine/feature_context.rs
+++ b/crates/axl-runtime/src/engine/feature_context.rs
@@ -124,7 +124,7 @@ fn feature_context_methods(builder: &mut MethodsBuilder) {
     /// Standard library — same as `ctx.std` in config and task functions.
     #[starlark(attribute)]
     fn std<'v>(#[allow(unused)] this: Value<'v>) -> anyhow::Result<Std> {
-        Ok(Std {})
+        Ok(Std::new())
     }
 
     /// Aspect platform APIs (auth, etc.).
diff --git a/crates/axl-runtime/src/engine/mod.rs b/crates/axl-runtime/src/engine/mod.rs
index 90ad2a837..a57c48910 100644
--- a/crates/axl-runtime/src/engine/mod.rs
+++ b/crates/axl-runtime/src/engine/mod.rs
@@ -35,6 +35,7 @@ pub mod task_context;
 pub mod task_info;
 pub mod task_map;
 pub mod telemetry;
+pub mod testing;
 pub mod trait_map;
 pub mod util;
 
diff --git a/crates/axl-runtime/src/engine/std/env.rs b/crates/axl-runtime/src/engine/std/env.rs
index cb32a2b4b..ce396c625 100644
--- a/crates/axl-runtime/src/engine/std/env.rs
+++ b/crates/axl-runtime/src/engine/std/env.rs
@@ -5,22 +5,50 @@ use starlark::eval::Evaluator;
 use starlark::values::list::{AllocList, UnpackList};
 use starlark::values::none::NoneOr;
 use starlark::values::tuple::{AllocTuple, UnpackTuple};
-use starlark::values::{Heap, NoSerialize, ProvidesStaticType, ValueOfUnchecked};
+use starlark::values::{Heap, NoSerialize, ProvidesStaticType, ValueLike, ValueOfUnchecked};
 use starlark::values::{StarlarkValue, starlark_value};
 use starlark::{starlark_module, starlark_simple_value, values};
 
-use crate::engine::store::Env as RuntimeEnv;
+use crate::engine::store::{Env as RuntimeEnv, TestEnvMap};
 
 #[derive(Clone, Debug, ProvidesStaticType, NoSerialize, Allocative, Display)]
 #[display("<std.Env>")]
-pub struct Env {}
+pub struct Env {
+    /// When `Some`, `var`/`set_var`/`remove_var`/`vars` read and write this
+    /// in-memory overlay instead of the real process environment. The overlay
+    /// is **carried on this value** — the test runner mints the harness's
+    /// `std.Env` (and the one reachable through `t.ctx.std.env`) carrying the
+    /// test's shared overlay `Rc`, rather than fishing an ambient overlay out
+    /// of `eval.extra`. Production `std.env` leaves this `None` and hits the
+    /// real process env, unchanged.
+    pub overlay: Option<TestEnvMap>,
+}
 
 impl Env {
+    /// Production constructor: no overlay, reads the real process env.
     pub fn new() -> Self {
-        Self {}
+        Self { overlay: None }
+    }
+
+    /// Test constructor: reads/writes the supplied in-memory overlay instead
+    /// of the real process env.
+    pub fn with_overlay(overlay: TestEnvMap) -> Self {
+        Self {
+            overlay: Some(overlay),
+        }
     }
 }
 
+/// Read the env overlay carried on a `std.Env` value. Returns `None` for a
+/// production `std.Env` (real process env) and `Some(map)` under the test
+/// runner. Errors only if `this` is somehow not a `std.Env`.
+fn overlay_of<'v>(this: values::Value<'v>) -> anyhow::Result<Option<TestEnvMap>> {
+    let env = this
+        .downcast_ref::<Env>()
+        .ok_or_else(|| anyhow::anyhow!("std.env method called on a non-std.Env value"))?;
+    Ok(env.overlay.clone())
+}
+
 #[starlark_value(type = "std.Env")]
 impl<'v> StarlarkValue<'v> for Env {
     fn get_methods() -> Option<&'static Methods> {
@@ -42,28 +70,41 @@ pub(crate) fn env_methods(registry: &mut MethodsBuilder) {
         Ok(eval.heap().alloc_str(&env.cli_version))
     }
 
-    /// Fetches the environment variable key from the current process.
+    /// Fetches the environment variable key from the current process — or,
+    /// under a test harness, from the in-memory overlay carried on this value.
     fn var<'v>(
-        #[allow(unused)] this: values::Value<'v>,
+        this: values::Value<'v>,
         #[starlark(require = pos)] key: values::StringValue<'v>,
-        heap: Heap<'v>,
+        eval: &mut Evaluator<'v, '_, '_>,
     ) -> anyhow::Result<NoneOr<values::StringValue<'v>>> {
-        let val = std::env::var(key.as_str())
-            .map(|val| heap.alloc_str(val.as_str()))
-            .ok();
-        Ok(NoneOr::from_option(val))
+        // Resolve the value (owned) before touching the heap.
+        let resolved: Option<String> = match overlay_of(this)? {
+            Some(map) => map.lock().unwrap().get(key.as_str()).cloned(),
+            None => std::env::var(key.as_str()).ok(),
+        };
+        let heap = eval.heap();
+        Ok(NoneOr::from_option(
+            resolved.map(|val| heap.alloc_str(val.as_str())),
+        ))
     }
 
-    /// Sets an environment variable for the current process.
+    /// Sets an environment variable for the current process — or, under a test
+    /// harness, in the in-memory overlay carried on this value.
     ///
     /// This affects all subsequent `var()` calls and any child processes spawned
     /// after this call. Use with care — environment variables are global process
     /// state.
     fn set_var<'v>(
-        #[allow(unused)] this: values::Value<'v>,
+        this: values::Value<'v>,
         #[starlark(require = pos)] key: values::StringValue<'v>,
         #[starlark(require = pos)] value: values::StringValue<'v>,
     ) -> anyhow::Result<values::none::NoneType> {
+        if let Some(map) = overlay_of(this)? {
+            map.lock()
+                .unwrap()
+                .insert(key.as_str().to_string(), value.as_str().to_string());
+            return Ok(values::none::NoneType);
+        }
         // SAFETY: AXL evaluation is single-threaded; no concurrent env reads.
         #[allow(deprecated)]
         unsafe {
@@ -77,9 +118,13 @@ pub(crate) fn env_methods(registry: &mut MethodsBuilder) {
     /// Has no effect if the variable is not set. Subsequent `var()` calls will
     /// return `None` for the removed variable.
     fn remove_var<'v>(
-        #[allow(unused)] this: values::Value<'v>,
+        this: values::Value<'v>,
         #[starlark(require = pos)] key: values::StringValue<'v>,
     ) -> anyhow::Result<values::none::NoneType> {
+        if let Some(map) = overlay_of(this)? {
+            map.lock().unwrap().remove(key.as_str());
+            return Ok(values::none::NoneType);
+        }
         // SAFETY: AXL evaluation is single-threaded; no concurrent env reads.
         #[allow(deprecated)]
         unsafe {
@@ -95,16 +140,27 @@ pub(crate) fn env_methods(registry: &mut MethodsBuilder) {
     /// variables at the time of this invocation. Modifications to environment
     /// variables afterwards will not be reflected in the returned iterator.
     fn vars<'v>(
-        #[allow(unused)] this: values::Value<'v>,
-        heap: Heap<'v>,
+        this: values::Value<'v>,
+        eval: &mut Evaluator<'v, '_, '_>,
     ) -> anyhow::Result<
         ValueOfUnchecked<
             'v,
             UnpackList<ValueOfUnchecked<'v, UnpackTuple<values::StringValue<'v>>>>,
         >,
     > {
+        // Snapshot into an owned vec before touching the heap.
+        let pairs: Vec<(String, String)> = match overlay_of(this)? {
+            Some(map) => map
+                .lock()
+                .unwrap()
+                .iter()
+                .map(|(k, v)| (k.clone(), v.clone()))
+                .collect(),
+            None => std::env::vars().collect(),
+        };
+        let heap = eval.heap();
         Ok(heap
-            .alloc_typed_unchecked(AllocList(std::env::vars().map(|(k, v)| {
+            .alloc_typed_unchecked(AllocList(pairs.into_iter().map(|(k, v)| {
                 let val = [heap.alloc_str(k.as_str()), heap.alloc_str(v.as_str())];
                 heap.alloc_typed_unchecked(AllocTuple(val))
                     .cast::<UnpackTuple<values::StringValue<'v>>>()
diff --git a/crates/axl-runtime/src/engine/std/mod.rs b/crates/axl-runtime/src/engine/std/mod.rs
index 21f63ad40..e8e2d9a39 100644
--- a/crates/axl-runtime/src/engine/std/mod.rs
+++ b/crates/axl-runtime/src/engine/std/mod.rs
@@ -9,6 +9,7 @@ use starlark::starlark_simple_value;
 use starlark::values;
 use starlark::values::NoSerialize;
 use starlark::values::ProvidesStaticType;
+use starlark::values::ValueLike;
 use starlark::values::starlark_value;
 
 use starlark::{
@@ -16,6 +17,8 @@ use starlark::{
     values::starlark_value_as_type::StarlarkValueAsType,
 };
 
+use crate::engine::store::TestEnvMap;
+
 mod env;
 mod fs;
 pub mod io;
@@ -23,9 +26,29 @@ mod net;
 mod process;
 pub mod stream;
 
-#[derive(Debug, Display, ProvidesStaticType, NoSerialize, Allocative)]
+#[derive(Clone, Debug, Display, ProvidesStaticType, NoSerialize, Allocative)]
 #[display("<std.Std>")]
-pub struct Std {}
+pub struct Std {
+    /// When `Some`, the `std.Env` minted by `std.env` carries this in-memory
+    /// overlay (the mock route is value-carried, not ambient). Production
+    /// leaves this `None`; the test runner mints `Std` carrying the test's
+    /// shared overlay `Rc` so `t.std.env` and `t.ctx.std.env` observe one map.
+    pub env_overlay: Option<TestEnvMap>,
+}
+
+impl Std {
+    /// Production constructor: no env overlay (real process env).
+    pub fn new() -> Self {
+        Self { env_overlay: None }
+    }
+
+    /// Test constructor: the `std.env` it hands out reads/writes `overlay`.
+    pub fn with_env_overlay(overlay: TestEnvMap) -> Self {
+        Self {
+            env_overlay: Some(overlay),
+        }
+    }
+}
 
 starlark_simple_value!(Std);
 
@@ -40,8 +63,14 @@ impl<'v> values::StarlarkValue<'v> for Std {
 #[starlark_module]
 pub(crate) fn std_methods(registry: &mut MethodsBuilder) {
     #[starlark(attribute)]
-    fn env<'v>(#[allow(unused)] this: values::Value<'v>) -> anyhow::Result<env::Env> {
-        Ok(env::Env::new())
+    fn env<'v>(this: values::Value<'v>) -> anyhow::Result<env::Env> {
+        let std = this
+            .downcast_ref::<Std>()
+            .ok_or_else(|| anyhow::anyhow!("std.env accessed on a non-std.Std value"))?;
+        Ok(match &std.env_overlay {
+            Some(overlay) => env::Env::with_overlay(overlay.clone()),
+            None => env::Env::new(),
+        })
     }
 
     #[starlark(attribute)]
diff --git a/crates/axl-runtime/src/engine/store.rs b/crates/axl-runtime/src/engine/store.rs
index 578c748ac..4e6640cbe 100644
--- a/crates/axl-runtime/src/engine/store.rs
+++ b/crates/axl-runtime/src/engine/store.rs
@@ -1,9 +1,28 @@
+use std::collections::BTreeMap;
 use std::path::PathBuf;
+use std::sync::{Arc, Mutex};
 
 use starlark::{eval::Evaluator, values::ProvidesStaticType};
 
 use super::r#async::rt::AsyncRuntime;
 
+/// In-memory environment-variable overlay shared between a test's harness
+/// (`t.env`) and the `std.env` backend reachable through `t.std.env` /
+/// `t.ctx.std.env`.
+///
+/// Shared via a handle so a mutation through one view (e.g. `t.env.set(...)`)
+/// is observed through the others (`t.ctx.std.env.var(...)`) — they all hold
+/// clones of the same handle onto one map. A `BTreeMap` keeps `vars()`
+/// iteration deterministic for snapshot-style assertions.
+///
+/// `Arc<Mutex<…>>` (not `Rc<RefCell<…>>`) so the handle is `Send + Sync`: the
+/// `std.Env` / `Std` Starlark values that now *carry* it must satisfy the
+/// `Send + Sync` bound that frozen Starlark values require. Each test's
+/// overlay is only ever touched on that test's own worker thread, so the
+/// `Mutex` is never actually contended — it is correctness insurance for the
+/// parallel runner, not a hot path.
+pub type TestEnvMap = Arc<Mutex<BTreeMap<String, String>>>;
+
 /// Process-wide environment passed to every Starlark evaluator via `eval.extra`.
 ///
 /// `script_path` is intentionally absent — the file currently being evaluated
diff --git a/crates/axl-runtime/src/engine/task_context.rs b/crates/axl-runtime/src/engine/task_context.rs
index 575c241c2..f4952cc87 100644
--- a/crates/axl-runtime/src/engine/task_context.rs
+++ b/crates/axl-runtime/src/engine/task_context.rs
@@ -27,6 +27,7 @@ use super::aspect::Aspect;
 use super::bazel::{Bazel, FrozenBazel};
 use super::http::Http;
 use super::std::Std;
+use super::store::TestEnvMap;
 use super::task_info::TaskInfo;
 use super::template::Template;
 use super::trait_map::{FrozenTraitMap, TraitMap};
@@ -61,6 +62,12 @@ pub struct TaskContext<'v> {
     bazel: values::Value<'v>,
     #[allocative(skip)]
     pub defers: RefCell<Vec<Defer<'v>>>,
+    /// When `Some`, `ctx.std.env` reads/writes this in-memory overlay instead
+    /// of the real process env. Carried on the value (not pulled ambiently
+    /// from `eval.extra`), so the test runner's `t.ctx` shares one overlay
+    /// `Rc` with `t.std`/`t.env`. Production leaves this `None`.
+    #[allocative(skip)]
+    pub env_overlay: Option<TestEnvMap>,
 }
 
 unsafe impl<'v> Trace<'v> for TaskContext<'v> {
@@ -88,9 +95,18 @@ impl<'v> TaskContext<'v> {
             task,
             bazel,
             defers: RefCell::new(Vec::new()),
+            env_overlay: None,
         }
     }
 
+    /// Attach an in-memory env overlay so `ctx.std.env` reads/writes it instead
+    /// of the real process env. Used by the test runner to give `t.ctx` the
+    /// same overlay `Rc` as `t.std`/`t.env`.
+    pub fn with_env_overlay(mut self, overlay: TestEnvMap) -> Self {
+        self.env_overlay = Some(overlay);
+        self
+    }
+
     pub fn drain_defers(&self) -> Vec<Defer<'v>> {
         let mut v = self.defers.borrow_mut();
         let mut out: Vec<Defer<'v>> = std::mem::take(&mut *v);
@@ -137,8 +153,12 @@ pub(crate) fn task_context_methods(registry: &mut MethodsBuilder) {
     /// The standard library. Gives access to common utilities such as
     /// filesystem, process execution, environment variables, and IO streams.
     #[starlark(attribute)]
-    fn std<'v>(#[allow(unused)] this: values::Value<'v>) -> starlark::Result<Std> {
-        Ok(Std {})
+    fn std<'v>(this: values::Value<'v>) -> starlark::Result<Std> {
+        let ctx = this.downcast_ref_err::<TaskContext>()?;
+        Ok(match &ctx.env_overlay {
+            Some(overlay) => Std::with_env_overlay(overlay.clone()),
+            None => Std::new(),
+        })
     }
 
     /// Identity of the currently running task — its name, group(s),
@@ -248,9 +268,13 @@ fn frozen_task_context_methods(registry: &mut MethodsBuilder) {
 
     /// The standard library. Gives access to common utilities such as
     /// filesystem, process execution, environment variables, and IO streams.
+    ///
+    /// Frozen contexts are a production-only artifact (the config phase freezes
+    /// them); the env overlay only lives on the live, test-minted context, so
+    /// the frozen path always hands out the real-process-env `std`.
     #[starlark(attribute)]
     fn std<'v>(#[allow(unused)] this: values::Value<'v>) -> starlark::Result<Std> {
-        Ok(Std {})
+        Ok(Std::new())
     }
 
     /// Identity of the currently running task — its name, group(s),
diff --git a/crates/axl-runtime/src/engine/testing.rs b/crates/axl-runtime/src/engine/testing.rs
new file mode 100644
index 000000000..c3505e6f1
--- /dev/null
+++ b/crates/axl-runtime/src/engine/testing.rs
@@ -0,0 +1,1143 @@
+//! Built-in test framework for AXL — POC.
+//!
+//! This is a proof-of-concept implementation of the design discussed for
+//! giving AXL a native, pytest-style testing story. It demonstrates the
+//! load-bearing decisions end to end:
+//!
+//!   1. **Test-only globals.** `*_test.axl` files are evaluated against an
+//!      augmented globals surface (base AXL + the `asserts` namespace). The
+//!      vocabulary exists *only* in test files (see `eval::api::get_test_globals`
+//!      and the loader's per-file globals selection in `eval::load`), so it
+//!      can never leak into production `config.axl` / builtins.
+//!
+//!   2. **Convention discovery.** A test is a top-level `def test_*(t)`
+//!      function. The runner enumerates the module's `test_*` callables —
+//!      the same shape as `eval::task::FrozenTaskModuleLike::tasks()`, which
+//!      filters module names by value kind.
+//!
+//!   3. **A bazel-free harness `t`.** Each test receives a zero-state handle
+//!      `t` carrying assertions-adjacent fixtures: `t.env` (an in-memory
+//!      environment overlay), `t.std` (the real `std` surface), and `t.ctx`
+//!      (a *real* `TaskContext` — same Rust type production uses — wired over
+//!      the mock backends). `t` deliberately has no bazel surface.
+//!
+//!   4. **Mock-by-backend-swap, not by masquerade.** `t.ctx.std.env` is the
+//!      genuine `std.Env` type; it reads the in-memory overlay because the
+//!      `std.Env` value is *minted carrying* the test's overlay `Rc` (the
+//!      mock route is value-carried, not ambient on `eval.extra`). The type
+//!      and its method table are unchanged; only the map a given instance
+//!      consults differs. This is what keeps the mock's contract identical to
+//!      reality — and the one shared `Rc` is what makes `t.env`, `t.std.env`,
+//!      and `t.ctx.std.env` observe the same map.
+//!
+//!   5. **Per-test isolation, run in parallel.** Each test runs with a fresh
+//!      overlay; a failed assertion (which raises) is caught per-test and
+//!      recorded, so one failure never aborts the run — pytest semantics.
+//!      Tests fan out across `min(tests, cpus)` worker threads, each with its
+//!      own Starlark heap (heaps are `!Send`), and results merge back into
+//!      definition order. This is sound precisely because per-test state lives
+//!      on the test's own values, never in a process-global — so concurrent
+//!      workers share no mutable state.
+//!
+//! Not yet built (tracked in `docs/testing.md`): the `aspect test` runner as
+//! an AXL task, snapshot golden files, fs/process/http mock backends, and the
+//! LSP knowing about the `_test.axl` surface.
+
+use std::cmp::Ordering;
+use std::collections::BTreeMap;
+use std::sync::{Arc, Mutex};
+
+use allocative::Allocative;
+use derive_more::Display;
+use starlark::environment::{GlobalsBuilder, Methods, MethodsBuilder, MethodsStatic, Module};
+use starlark::eval::Evaluator;
+use starlark::syntax::AstModule;
+use starlark::values::none::{NoneOr, NoneType};
+use starlark::values::tuple::UnpackTuple;
+use starlark::values::{
+    Heap, NoSerialize, ProvidesStaticType, StarlarkValue, Value, ValueLike, starlark_value,
+};
+use starlark::{starlark_module, starlark_simple_value, values};
+
+use crate::engine::arguments::Arguments;
+use crate::engine::bazel::Bazel;
+use crate::engine::bazel::backend::BazelBackend;
+use crate::engine::std::Std;
+use crate::engine::store::{Env as RuntimeEnv, TestEnvMap};
+use crate::engine::task_context::TaskContext;
+use crate::engine::task_info::TaskInfo;
+use crate::engine::trait_map::TraitMap;
+
+// ─── The `asserts` namespace (test-only global) ──────────────────────────────
+
+#[starlark_module]
+fn assert_namespace(globals: &mut GlobalsBuilder) {
+    /// Fails the test unless `got == want` (Starlark equality).
+    fn eq<'v>(
+        #[starlark(require = pos)] got: Value<'v>,
+        #[starlark(require = pos)] want: Value<'v>,
+    ) -> anyhow::Result<NoneType> {
+        let equal = got
+            .equals(want)
+            .map_err(|e| anyhow::anyhow!("asserts.eq: comparison failed: {e}"))?;
+        if equal {
+            Ok(NoneType)
+        } else {
+            Err(anyhow::anyhow!(
+                "asserts.eq failed:\n  got:  {}\n  want: {}",
+                got.to_repr(),
+                want.to_repr()
+            ))
+        }
+    }
+
+    /// Fails the test unless `got != unwanted`.
+    fn ne<'v>(
+        #[starlark(require = pos)] got: Value<'v>,
+        #[starlark(require = pos)] unwanted: Value<'v>,
+    ) -> anyhow::Result<NoneType> {
+        let equal = got
+            .equals(unwanted)
+            .map_err(|e| anyhow::anyhow!("asserts.ne: comparison failed: {e}"))?;
+        if equal {
+            Err(anyhow::anyhow!(
+                "asserts.ne failed: both values are {}",
+                got.to_repr()
+            ))
+        } else {
+            Ok(NoneType)
+        }
+    }
+
+    /// Fails the test unless `value` is truthy.
+    fn is_true<'v>(#[starlark(require = pos)] value: Value<'v>) -> anyhow::Result<NoneType> {
+        if value.to_bool() {
+            Ok(NoneType)
+        } else {
+            Err(anyhow::anyhow!(
+                "asserts.is_true failed: {} is falsy",
+                value.to_repr()
+            ))
+        }
+    }
+
+    /// Fails the test unless `value` is falsy.
+    fn is_false<'v>(#[starlark(require = pos)] value: Value<'v>) -> anyhow::Result<NoneType> {
+        if value.to_bool() {
+            Err(anyhow::anyhow!(
+                "asserts.is_false failed: {} is truthy",
+                value.to_repr()
+            ))
+        } else {
+            Ok(NoneType)
+        }
+    }
+
+    /// Fails the test unless `needle` is a member of `haystack`.
+    ///
+    /// Backed by the `in` operator, so it works for any container Starlark
+    /// supports: a substring of a string, an element of a list/tuple/set, or a
+    /// key of a dict.
+    fn contains<'v>(
+        #[starlark(require = pos)] haystack: Value<'v>,
+        #[starlark(require = pos)] needle: Value<'v>,
+    ) -> anyhow::Result<NoneType> {
+        let present = haystack
+            .is_in(needle)
+            .map_err(|e| anyhow::anyhow!("asserts.contains: membership test failed: {e}"))?;
+        if present {
+            Ok(NoneType)
+        } else {
+            Err(anyhow::anyhow!(
+                "asserts.contains failed: {} not found in {}",
+                needle.to_repr(),
+                haystack.to_repr()
+            ))
+        }
+    }
+
+    /// Fails the test unless `needle` is *absent* from `haystack` — the inverse
+    /// of `contains`, over the same containers.
+    fn not_contains<'v>(
+        #[starlark(require = pos)] haystack: Value<'v>,
+        #[starlark(require = pos)] needle: Value<'v>,
+    ) -> anyhow::Result<NoneType> {
+        let present = haystack
+            .is_in(needle)
+            .map_err(|e| anyhow::anyhow!("asserts.not_contains: membership test failed: {e}"))?;
+        if present {
+            Err(anyhow::anyhow!(
+                "asserts.not_contains failed: {} unexpectedly found in {}",
+                needle.to_repr(),
+                haystack.to_repr()
+            ))
+        } else {
+            Ok(NoneType)
+        }
+    }
+
+    /// Fails the test unless `got > want` (Starlark ordering).
+    fn gt<'v>(
+        #[starlark(require = pos)] got: Value<'v>,
+        #[starlark(require = pos)] want: Value<'v>,
+    ) -> anyhow::Result<NoneType> {
+        compare_assert(got, want, "gt", &[Ordering::Greater])
+    }
+
+    /// Fails the test unless `got >= want` (Starlark ordering).
+    fn ge<'v>(
+        #[starlark(require = pos)] got: Value<'v>,
+        #[starlark(require = pos)] want: Value<'v>,
+    ) -> anyhow::Result<NoneType> {
+        compare_assert(got, want, "ge", &[Ordering::Greater, Ordering::Equal])
+    }
+
+    /// Fails the test unless `got < want` (Starlark ordering).
+    fn lt<'v>(
+        #[starlark(require = pos)] got: Value<'v>,
+        #[starlark(require = pos)] want: Value<'v>,
+    ) -> anyhow::Result<NoneType> {
+        compare_assert(got, want, "lt", &[Ordering::Less])
+    }
+
+    /// Fails the test unless `got <= want` (Starlark ordering).
+    fn le<'v>(
+        #[starlark(require = pos)] got: Value<'v>,
+        #[starlark(require = pos)] want: Value<'v>,
+    ) -> anyhow::Result<NoneType> {
+        compare_assert(got, want, "le", &[Ordering::Less, Ordering::Equal])
+    }
+
+    /// Fails the test unless calling `f()` raises. The pytest `raises` analogue
+    /// for the common no-argument case. Pass `contains = "substr"` to also
+    /// require the raised error's message to contain that substring — the
+    /// common "it failed *for the right reason*" check.
+    fn fails<'v>(
+        #[starlark(require = pos)] f: Value<'v>,
+        #[starlark(require = named, default = NoneOr::None)] contains: NoneOr<&str>,
+        eval: &mut Evaluator<'v, '_, '_>,
+    ) -> anyhow::Result<NoneType> {
+        match eval.eval_function(f, &[], &[]) {
+            Ok(_) => Err(anyhow::anyhow!(
+                "asserts.fails: expected the callable to raise, but it returned normally"
+            )),
+            Err(e) => match contains {
+                NoneOr::Other(sub) => {
+                    // Match against the bare error message, not the rendered
+                    // diagnostic — the full `Display` embeds the source frame
+                    // (including this very `asserts.fails(...)` call), which
+                    // would let the call's own text spuriously satisfy the
+                    // substring check.
+                    let msg = e.without_diagnostic().to_string();
+                    if msg.contains(sub) {
+                        Ok(NoneType)
+                    } else {
+                        Err(anyhow::anyhow!(
+                            "asserts.fails: raised error did not contain {sub:?}\n  error: {msg}"
+                        ))
+                    }
+                }
+                NoneOr::None => Ok(NoneType),
+            },
+        }
+    }
+}
+
+/// Shared body for the ordering asserts (`gt`/`ge`/`lt`/`le`). Compares `got`
+/// against `want` and passes only when the resulting [`Ordering`] is in
+/// `allowed`; otherwise raises a message naming the failed `op`.
+fn compare_assert<'v>(
+    got: Value<'v>,
+    want: Value<'v>,
+    op: &str,
+    allowed: &[Ordering],
+) -> anyhow::Result<NoneType> {
+    let ord = got
+        .compare(want)
+        .map_err(|e| anyhow::anyhow!("asserts.{op}: comparison failed: {e}"))?;
+    if allowed.contains(&ord) {
+        Ok(NoneType)
+    } else {
+        Err(anyhow::anyhow!(
+            "asserts.{op} failed:\n  got:  {}\n  want: {}",
+            got.to_repr(),
+            want.to_repr()
+        ))
+    }
+}
+
+/// Register the test-only globals onto a builder. Called by
+/// `eval::api::get_test_globals` to build the surface used for `*_test.axl`
+/// files and by the test runner.
+///
+/// NOTE: the namespace is `asserts`, not `assert` — `assert` is a reserved
+/// keyword in the AXL/Starlark dialect and cannot be used as an identifier,
+/// so the plural `asserts` is used instead.
+pub fn register_test_globals(globals: &mut GlobalsBuilder) {
+    globals.namespace("asserts", assert_namespace);
+}
+
+// ─── The harness value `t` ───────────────────────────────────────────────────
+
+/// The per-test harness handed to every `def test_*(t)`. Carries the test's
+/// in-memory env overlay **on the value itself**: `t.env`, `t.std`, and
+/// `t.ctx.std.env` are all minted from this one shared `Rc`, so a mutation
+/// through any of them is observed through the others. Nothing is fished out
+/// of `eval.extra` — the mock route is value-carried, never ambient.
+///
+/// Its bazel fixture is likewise value-carried: a per-test
+/// [`PendingExpectation`] cell + the located fake-bazel binary, so a declared
+/// `t.bazel.expect_build(...)` reaches the `Fake` backend minted by `t.ctx`
+/// (state on the value, never a global — decisions 6/8).
+#[derive(Clone, Debug, ProvidesStaticType, NoSerialize, Allocative, Display)]
+#[display("<Test>")]
+pub struct Test {
+    #[allocative(skip)]
+    overlay: TestEnvMap,
+    /// Fake-bazel binary path. `None` → `t.ctx.bazel` is `Real` (production
+    /// behavior; the bazel mock is opt-in per the runner that constructs it).
+    #[allocative(skip)]
+    fake_bin: Option<Arc<str>>,
+    /// The expectation declared by `t.bazel.expect_build(...)` for the next
+    /// `t.ctx.bazel.build(...)`. Shared (by `Arc` clone) with the `t.bazel`
+    /// handle and read by `t.ctx`.
+    #[allocative(skip)]
+    expectation: PendingExpectation,
+}
+
+/// Shared per-test cell holding the declared `BazelExpectation`, if any.
+type PendingExpectation = Arc<Mutex<Option<basil_core::BazelExpectation>>>;
+
+impl Test {
+    /// A harness carrying `overlay`. `fake_bin = Some` wires `t.ctx.bazel` to a
+    /// `Fake` backend driving that binary; `None` leaves it `Real`.
+    fn new(overlay: TestEnvMap, fake_bin: Option<Arc<str>>) -> Self {
+        Self {
+            overlay,
+            fake_bin,
+            expectation: Arc::new(Mutex::new(None)),
+        }
+    }
+}
+
+#[starlark_value(type = "Test")]
+impl<'v> StarlarkValue<'v> for Test {
+    fn get_methods() -> Option<&'static Methods> {
+        static RES: MethodsStatic = MethodsStatic::new();
+        RES.methods(test_methods)
+    }
+}
+
+starlark_simple_value!(Test);
+
+/// Read the shared overlay `Rc` carried on a `t` (`Test`) value.
+fn test_overlay<'v>(this: Value<'v>) -> anyhow::Result<TestEnvMap> {
+    let t = this
+        .downcast_ref::<Test>()
+        .ok_or_else(|| anyhow::anyhow!("test harness method called on a non-Test value"))?;
+    Ok(t.overlay.clone())
+}
+
+#[starlark_module]
+fn test_methods(registry: &mut MethodsBuilder) {
+    /// In-memory environment fixture for this test. Mutations are visible
+    /// through `t.ctx.std.env` / `t.std.env` and never touch the real process —
+    /// all three share the one overlay `Rc` carried on `t`.
+    #[starlark(attribute)]
+    fn env<'v>(this: Value<'v>) -> anyhow::Result<TestEnv> {
+        Ok(TestEnv {
+            overlay: test_overlay(this)?,
+        })
+    }
+
+    /// The real `std` surface (filesystem, env, io, …). Its `env` reads/writes
+    /// this test's overlay because the `Std` is minted carrying that `Rc`.
+    #[starlark(attribute)]
+    fn std<'v>(this: Value<'v>) -> anyhow::Result<Std> {
+        Ok(Std::with_env_overlay(test_overlay(this)?))
+    }
+
+    /// The bazel mock fixture. Declare expected invocations with
+    /// `t.bazel.expect_build(...)`; the declaration is consumed by the next
+    /// `t.ctx.bazel.build(...)` via the `Fake` backend.
+    #[starlark(attribute)]
+    fn bazel<'v>(this: Value<'v>) -> anyhow::Result<TestBazel> {
+        let t = this
+            .downcast_ref::<Test>()
+            .ok_or_else(|| anyhow::anyhow!("t.bazel called on a non-Test value"))?;
+        Ok(TestBazel {
+            expectation: t.expectation.clone(),
+        })
+    }
+
+    /// A real `TaskContext` wired over this test's mock backends. Same Rust
+    /// type production uses, so functions annotated `ctx: TaskContext` accept
+    /// it with no drift. The context carries this test's overlay `Rc`, so
+    /// `t.ctx.std.env` observes the same map as `t.env` / `t.std.env`.
+    #[starlark(attribute)]
+    fn ctx<'v>(this: Value<'v>, heap: Heap<'v>) -> anyhow::Result<Value<'v>> {
+        let t = this
+            .downcast_ref::<Test>()
+            .ok_or_else(|| anyhow::anyhow!("t.ctx called on a non-Test value"))?;
+        let overlay = t.overlay.clone();
+        // Mint the bazel backend from the harness: a `Fake` pointing at the
+        // located fake binary + the expectation declared so far (defaulting to
+        // a clean passing build), or `Real` when no fake binary was installed.
+        let backend = match &t.fake_bin {
+            Some(fake_bin) => {
+                let exp = t.expectation.lock().unwrap().clone().unwrap_or_else(|| {
+                    basil_core::BazelExpectation::new(
+                        Vec::new(),
+                        basil_core::BuildResult::Passed,
+                        None,
+                    )
+                });
+                BazelBackend::Fake {
+                    fake_bin: fake_bin.to_string(),
+                    expectation: Arc::new(exp),
+                }
+            }
+            None => BazelBackend::Real,
+        };
+        let bazel = heap.alloc(Bazel {
+            active_rc: std::cell::RefCell::new(None),
+            backend,
+        });
+        let args = heap.alloc(Arguments::new());
+        let traits = heap.alloc(TraitMap::new());
+        let task_info = heap.alloc(TaskInfo::new(
+            "test".to_string(),
+            Vec::new(),
+            "test".to_string(),
+            "test".to_string(),
+        ));
+        Ok(heap.alloc(TaskContext::new(args, traits, task_info, bazel).with_env_overlay(overlay)))
+    }
+}
+
+// ─── The `t.bazel` fixture ────────────────────────────────────────────────────
+
+/// The `t.bazel` fixture handle. Carries (by `Arc` clone) the per-test
+/// expectation cell that `t.ctx`'s `Fake` backend reads.
+#[derive(Clone, Debug, ProvidesStaticType, NoSerialize, Allocative, Display)]
+#[display("<Test.bazel>")]
+pub struct TestBazel {
+    #[allocative(skip)]
+    expectation: PendingExpectation,
+}
+
+starlark_simple_value!(TestBazel);
+
+#[starlark_value(type = "Test.bazel")]
+impl<'v> StarlarkValue<'v> for TestBazel {
+    fn get_methods() -> Option<&'static Methods> {
+        static RES: MethodsStatic = MethodsStatic::new();
+        RES.methods(test_bazel_methods)
+    }
+}
+
+/// Map the AXL `result=` string onto the typed [`basil_core::BuildResult`].
+/// Mirrors a `BuildResult` enum (`passed | failed | cache_evicted`); an unknown
+/// value fails fast with the legal set, exactly as an `enum(...)` would.
+fn parse_build_result(s: &str) -> anyhow::Result<basil_core::BuildResult> {
+    match s {
+        "passed" => Ok(basil_core::BuildResult::Passed),
+        "failed" => Ok(basil_core::BuildResult::Failed),
+        "cache_evicted" => Ok(basil_core::BuildResult::CacheEvicted),
+        other => Err(anyhow::anyhow!(
+            "t.bazel.expect_build: unknown result {other:?}; expected one of \
+             \"passed\", \"failed\", \"cache_evicted\""
+        )),
+    }
+}
+
+#[starlark_module]
+fn test_bazel_methods(registry: &mut MethodsBuilder) {
+    /// Declare the expected outcome of the next `t.ctx.bazel.build(...)`.
+    ///
+    /// The fake bazel synthesizes a consistent BES stream from this:
+    /// `BuildStarted` → one `TargetComplete` per target (pass/fail per
+    /// `result`) → `BuildFinished` carrying the exit code — then exits with
+    /// that code. The parent reads it back through the real
+    /// `ctx.bazel.build` BES path.
+    ///
+    /// # Arguments
+    /// * `targets` - Target patterns the build "covers"; one `TargetComplete`
+    ///   per entry.
+    /// * `result` - `"passed"` | `"failed"` | `"cache_evicted"`.
+    /// * `exit_code` - Override the process exit code (default: derived from
+    ///   `result` — 0 / 1 / 39).
+    fn expect_build<'v>(
+        this: Value<'v>,
+        #[starlark(args)] targets: UnpackTuple<values::StringValue<'v>>,
+        #[starlark(require = named, default = "passed")] result: &str,
+        #[starlark(require = named, default = NoneOr::None)] exit_code: NoneOr<i32>,
+    ) -> anyhow::Result<NoneType> {
+        let tb = this
+            .downcast_ref::<TestBazel>()
+            .ok_or_else(|| anyhow::anyhow!("expected Test.bazel"))?;
+        let result = parse_build_result(result)?;
+        let targets: Vec<String> = targets
+            .items
+            .iter()
+            .map(|t| t.as_str().to_string())
+            .collect();
+        let exp = basil_core::BazelExpectation::new(targets, result, exit_code.into_option());
+        *tb.expectation.lock().unwrap() = Some(exp);
+        Ok(NoneType)
+    }
+}
+
+/// The `t.env` fixture handle. Carries the test's overlay `Rc` directly, so
+/// its mutations are observed through `t.std.env` / `t.ctx.std.env`.
+#[derive(Clone, Debug, ProvidesStaticType, NoSerialize, Allocative, Display)]
+#[display("<Test.env>")]
+pub struct TestEnv {
+    #[allocative(skip)]
+    overlay: TestEnvMap,
+}
+
+#[starlark_value(type = "Test.env")]
+impl<'v> StarlarkValue<'v> for TestEnv {
+    fn get_methods() -> Option<&'static Methods> {
+        static RES: MethodsStatic = MethodsStatic::new();
+        RES.methods(test_env_methods)
+    }
+}
+
+starlark_simple_value!(TestEnv);
+
+/// Read the overlay `Rc` carried on a `t.env` (`TestEnv`) value.
+fn test_env_map<'v>(this: Value<'v>) -> anyhow::Result<TestEnvMap> {
+    let env = this
+        .downcast_ref::<TestEnv>()
+        .ok_or_else(|| anyhow::anyhow!("t.env method called on a non-Test.env value"))?;
+    Ok(env.overlay.clone())
+}
+
+#[starlark_module]
+fn test_env_methods(registry: &mut MethodsBuilder) {
+    /// Set an environment variable in the in-memory overlay.
+    fn set<'v>(
+        this: Value<'v>,
+        #[starlark(require = pos)] key: values::StringValue<'v>,
+        #[starlark(require = pos)] value: values::StringValue<'v>,
+    ) -> anyhow::Result<NoneType> {
+        test_env_map(this)?
+            .lock()
+            .unwrap()
+            .insert(key.as_str().to_string(), value.as_str().to_string());
+        Ok(NoneType)
+    }
+
+    /// Read an environment variable from the overlay (`None` if unset).
+    fn get<'v>(
+        this: Value<'v>,
+        #[starlark(require = pos)] key: values::StringValue<'v>,
+        eval: &mut Evaluator<'v, '_, '_>,
+    ) -> anyhow::Result<NoneOr<values::StringValue<'v>>> {
+        let resolved = test_env_map(this)?
+            .lock()
+            .unwrap()
+            .get(key.as_str())
+            .cloned();
+        let heap = eval.heap();
+        Ok(NoneOr::from_option(
+            resolved.map(|v| heap.alloc_str(v.as_str())),
+        ))
+    }
+
+    /// Remove a variable from the overlay.
+    fn remove<'v>(
+        this: Value<'v>,
+        #[starlark(require = pos)] key: values::StringValue<'v>,
+    ) -> anyhow::Result<NoneType> {
+        test_env_map(this)?.lock().unwrap().remove(key.as_str());
+        Ok(NoneType)
+    }
+
+    /// Clear the overlay back to empty.
+    fn reset<'v>(this: Value<'v>) -> anyhow::Result<NoneType> {
+        test_env_map(this)?.lock().unwrap().clear();
+        Ok(NoneType)
+    }
+}
+
+// ─── Discovery + runner ──────────────────────────────────────────────────────
+
+/// Outcome of a single `test_*` function.
+#[derive(Debug, Clone)]
+pub struct TestOutcome {
+    pub name: String,
+    pub passed: bool,
+    /// The failure message (assertion or unexpected error) when `!passed`.
+    pub message: Option<String>,
+}
+
+/// Aggregate result of running a test module.
+#[derive(Debug, Clone, Default)]
+pub struct TestSummary {
+    pub outcomes: Vec<TestOutcome>,
+}
+
+impl TestSummary {
+    pub fn passed(&self) -> usize {
+        self.outcomes.iter().filter(|o| o.passed).count()
+    }
+
+    pub fn failed(&self) -> usize {
+        self.outcomes.iter().filter(|o| !o.passed).count()
+    }
+
+    /// Render a human-readable summary, the shape the `aspect test` AXL runner
+    /// will eventually produce (here in Rust for the POC).
+    pub fn report(&self) -> String {
+        let mut out = String::new();
+        for o in &self.outcomes {
+            if o.passed {
+                out.push_str(&format!("  ok   {}\n", o.name));
+            } else {
+                out.push_str(&format!("  FAIL {}\n", o.name));
+                if let Some(msg) = &o.message {
+                    for line in msg.lines() {
+                        out.push_str(&format!("         {line}\n"));
+                    }
+                }
+            }
+        }
+        out.push_str(&format!(
+            "\n{} passed, {} failed\n",
+            self.passed(),
+            self.failed()
+        ));
+        out
+    }
+}
+
+/// Parse `source` into the test AST against the test globals dialect.
+fn parse_test_source(source: &str) -> anyhow::Result<AstModule> {
+    AstModule::parse(
+        "<axl-test>",
+        source.to_string(),
+        &crate::eval::api::dialect(),
+    )
+    .map_err(|e| anyhow::anyhow!("parse error: {e}"))
+}
+
+/// Enumerate the `test_*` callables in definition order. Evaluates the module
+/// body once in a throwaway heap purely to learn the names; the result is
+/// `Send` (plain `String`s), so the caller can shard it across worker threads
+/// even though the `Module` itself is `!Send`.
+fn discover_test_names(source: &str, base_env: &RuntimeEnv) -> anyhow::Result<Vec<String>> {
+    let globals = crate::eval::api::get_test_globals();
+    let ast = parse_test_source(source)?;
+    Module::with_temp_heap(|module| -> anyhow::Result<Vec<String>> {
+        let mut eval = Evaluator::new(&module);
+        eval.extra = Some(base_env);
+        eval.eval_module(ast, &globals)
+            .map_err(|e| anyhow::anyhow!("eval error: {e}"))?;
+        Ok(module
+            .names()
+            .map(|n| n.as_str().to_string())
+            .filter(|n| n.starts_with("test_"))
+            .filter(|n| {
+                module
+                    .get(n)
+                    .map(|v| v.get_type() == "function")
+                    .unwrap_or(false)
+            })
+            .collect())
+    })
+}
+
+/// The `Send` slice of a [`RuntimeEnv`] needed to rebuild one on a worker
+/// thread. We never move a `RuntimeEnv` (or the per-test overlay) across a
+/// thread boundary — the `Module`/heap is `!Send` anyway, so each worker
+/// enters the shared tokio runtime handle, mints its own `RuntimeEnv`, and
+/// builds its own per-test overlays locally.
+#[derive(Clone)]
+struct EnvSeed {
+    cli_version: String,
+    aspect_root: std::path::PathBuf,
+    bazel_root: std::path::PathBuf,
+    git_root: Option<std::path::PathBuf>,
+    rt: tokio::runtime::Handle,
+}
+
+impl EnvSeed {
+    fn from_env(env: &RuntimeEnv) -> Self {
+        Self {
+            cli_version: env.cli_version.clone(),
+            aspect_root: env.aspect_root_dir.clone(),
+            bazel_root: env.bazel_root_dir.clone(),
+            git_root: env.git_root_dir.clone(),
+            rt: env.rt.0.clone(),
+        }
+    }
+}
+
+/// Run a slice of tests (carrying their original indices, so results can be
+/// merged back into definition order) in a fresh `Module` on the current
+/// thread. Each test gets its own isolated env overlay; a raised error fails
+/// that test and is recorded — the slice continues (pytest semantics).
+fn run_shard(
+    base_env: &RuntimeEnv,
+    source: &str,
+    shard: Vec<(usize, String)>,
+    fake_bin: Option<Arc<str>>,
+) -> anyhow::Result<Vec<(usize, TestOutcome)>> {
+    let globals = crate::eval::api::get_test_globals();
+    let ast = parse_test_source(source)?;
+    Module::with_temp_heap(|module| -> anyhow::Result<Vec<(usize, TestOutcome)>> {
+        // Evaluate the module body — this only binds the `def test_*` functions;
+        // a well-behaved test file performs no side effects at module scope,
+        // which is also what makes re-evaluating it per worker thread safe.
+        {
+            let mut eval = Evaluator::new(&module);
+            eval.extra = Some(base_env);
+            eval.eval_module(ast, &globals)
+                .map_err(|e| anyhow::anyhow!("eval error: {e}"))?;
+        }
+
+        let mut out = Vec::with_capacity(shard.len());
+        for (idx, name) in shard {
+            let f = match module.get(&name) {
+                Some(f) => f,
+                None => {
+                    out.push((
+                        idx,
+                        TestOutcome {
+                            name,
+                            passed: false,
+                            message: Some("test disappeared after discovery".to_string()),
+                        },
+                    ));
+                    continue;
+                }
+            };
+
+            // Fresh, isolated overlay per test, carried directly on the harness
+            // value `t`. `t.env`, `t.std`, and `t.ctx.std.env` are all minted
+            // from this one `Rc`, so they observe one shared map — and because
+            // the overlay lives on the value (not in any process-global or
+            // ambient `eval.extra`), concurrent workers never contend.
+            //
+            // `base_env` is still installed on `eval.extra` for the *production*
+            // reads `std.env` makes (`aspect_cli_version`, roots) via
+            // `RuntimeEnv::from_eval`; only the env-overlay route moved onto the
+            // value.
+            let overlay: TestEnvMap = Arc::new(Mutex::new(BTreeMap::new()));
+            // Fresh harness per test: its own expectation cell, so a declared
+            // `t.bazel.expect_build(...)` in one test never bleeds into another
+            // (parallel-safe — state lives on the value, not a global).
+            let t = module.heap().alloc(Test::new(overlay, fake_bin.clone()));
+
+            let mut eval = Evaluator::new(&module);
+            eval.extra = Some(base_env);
+            let outcome = match eval.eval_function(f, &[t], &[]) {
+                Ok(_) => TestOutcome {
+                    name,
+                    passed: true,
+                    message: None,
+                },
+                Err(e) => TestOutcome {
+                    name,
+                    passed: false,
+                    message: Some(e.to_string()),
+                },
+            };
+            out.push((idx, outcome));
+        }
+        Ok(out)
+    })
+}
+
+/// Default worker count: one per test, capped at the available parallelism.
+fn default_jobs(n_tests: usize) -> usize {
+    let cpus = std::thread::available_parallelism()
+        .map(|n| n.get())
+        .unwrap_or(1);
+    n_tests.min(cpus).max(1)
+}
+
+/// Run every `test_*` function defined in `source`, each with an isolated
+/// in-memory environment overlay, across up to `min(tests, cpus)` worker
+/// threads. Results are merged back into definition order so the report is
+/// deterministic regardless of how tests were sharded.
+///
+/// `base_env` supplies the non-mocked context (roots, async runtime).
+pub fn run_test_source(source: &str, base_env: &RuntimeEnv) -> anyhow::Result<TestSummary> {
+    let names = discover_test_names(source, base_env)?;
+    let jobs = default_jobs(names.len());
+    run_test_source_with_jobs(source, base_env, names, jobs, None)
+}
+
+/// Like [`run_test_source`] but installs a fake-bazel binary so
+/// `t.ctx.bazel.build(...)` drives the `Fake` backend (spawns `fake_bin`,
+/// feeds it the declared `BazelExpectation`). `fake_bin` is located the way
+/// `crate::test::basil_bin` does today; a shipped self-exec subcommand is
+/// roadmap item 6.
+pub fn run_test_source_with_fake_bazel(
+    source: &str,
+    base_env: &RuntimeEnv,
+    fake_bin: Arc<str>,
+) -> anyhow::Result<TestSummary> {
+    let names = discover_test_names(source, base_env)?;
+    let jobs = default_jobs(names.len());
+    run_test_source_with_jobs(source, base_env, names, jobs, Some(fake_bin))
+}
+
+/// Like [`run_test_source`] but with an explicit worker count (the `--jobs`
+/// knob). `jobs <= 1` runs serially on the calling thread; higher values fan
+/// the tests out across that many threads, each with its own Starlark heap.
+/// `fake_bin`, when set, wires every harness's `t.ctx.bazel` to a `Fake`
+/// backend driving that binary.
+fn run_test_source_with_jobs(
+    source: &str,
+    base_env: &RuntimeEnv,
+    names: Vec<String>,
+    jobs: usize,
+    fake_bin: Option<Arc<str>>,
+) -> anyhow::Result<TestSummary> {
+    let jobs = jobs.max(1);
+
+    // Serial fast path: no threads, run everything on the calling thread (which
+    // is already inside the tokio runtime context the caller established).
+    if jobs <= 1 || names.len() <= 1 {
+        let shard: Vec<(usize, String)> = names.into_iter().enumerate().collect();
+        let mut outcomes = run_shard(base_env, source, shard, fake_bin)?;
+        outcomes.sort_by_key(|(idx, _)| *idx);
+        return Ok(TestSummary {
+            outcomes: outcomes.into_iter().map(|(_, o)| o).collect(),
+        });
+    }
+
+    // Parallel path: shard test names round-robin so each worker gets a roughly
+    // even mix, then rebuild an isolated `RuntimeEnv` + `Module` per thread. The
+    // `Module`/heap is `!Send`, so each worker re-parses + re-evaluates the
+    // (side-effect-free) module body locally rather than sharing one.
+    let seed = EnvSeed::from_env(base_env);
+    let source: std::sync::Arc<str> = std::sync::Arc::from(source);
+    let mut shards: Vec<Vec<(usize, String)>> = vec![Vec::new(); jobs];
+    for (idx, name) in names.into_iter().enumerate() {
+        shards[idx % jobs].push((idx, name));
+    }
+
+    let handles: Vec<_> = shards
+        .into_iter()
+        .filter(|s| !s.is_empty())
+        .map(|shard| {
+            let seed = seed.clone();
+            let source = source.clone();
+            let fake_bin = fake_bin.clone();
+            std::thread::spawn(move || -> anyhow::Result<Vec<(usize, TestOutcome)>> {
+                // Enter the shared runtime so `RuntimeEnv::new`'s `Handle::current()`
+                // resolves, then mint this thread's own env (no `Rc` crosses here).
+                let _guard = seed.rt.enter();
+                let env = RuntimeEnv::new(
+                    seed.cli_version,
+                    seed.aspect_root,
+                    seed.bazel_root,
+                    seed.git_root,
+                );
+                run_shard(&env, &source, shard, fake_bin)
+            })
+        })
+        .collect();
+
+    let mut merged: Vec<(usize, TestOutcome)> = Vec::new();
+    for h in handles {
+        let shard_outcomes = h
+            .join()
+            .map_err(|_| anyhow::anyhow!("a test worker thread panicked"))??;
+        merged.extend(shard_outcomes);
+    }
+    merged.sort_by_key(|(idx, _)| *idx);
+    Ok(TestSummary {
+        outcomes: merged.into_iter().map(|(_, o)| o).collect(),
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::path::PathBuf;
+    use tokio::runtime::Runtime;
+
+    fn base_env() -> RuntimeEnv {
+        // `Env::new` builds an `AsyncRuntime`, which requires a tokio runtime
+        // to be entered (matching `crate::test`'s setup).
+        RuntimeEnv::new(
+            "test".to_string(),
+            PathBuf::from("/"),
+            PathBuf::from("/"),
+            None,
+        )
+    }
+
+    #[test]
+    fn discovers_and_runs_test_functions() {
+        let rt = Runtime::new().unwrap();
+        let _g = rt.enter();
+        let src = r#"
+def test_env_overlay_is_isolated(t):
+    # overlay starts empty; reads go through the real std.Env type
+    asserts.eq(t.std.env.var("BUILDKITE"), None)
+    t.env.set("BUILDKITE", "true")
+    # same overlay observed through both t.std and a real TaskContext (t.ctx)
+    asserts.eq(t.std.env.var("BUILDKITE"), "true")
+    asserts.eq(t.ctx.std.env.var("BUILDKITE"), "true")
+
+def test_env_set_and_remove(t):
+    t.env.set("FOO", "1")
+    asserts.eq(t.env.get("FOO"), "1")
+    t.env.remove("FOO")
+    asserts.eq(t.env.get("FOO"), None)
+
+def test_contains_and_truthy(t):
+    asserts.contains("hello world", "world")
+    asserts.is_true(1 == 1)
+    asserts.fails(lambda: fail("boom"))
+
+def test_intentional_failure(t):
+    asserts.eq(1, 2)
+
+def helper_not_discovered(t):
+    fail("helper_* is not a test and must not run")
+"#;
+
+        let summary = run_test_source(src, &base_env()).expect("runner should not error");
+        eprintln!("{}", summary.report());
+
+        let names: Vec<&str> = summary.outcomes.iter().map(|o| o.name.as_str()).collect();
+        assert!(
+            !names.contains(&"helper_not_discovered"),
+            "only test_* functions should be discovered, got {names:?}"
+        );
+        assert_eq!(summary.outcomes.len(), 4, "expected 4 discovered tests");
+        assert_eq!(summary.passed(), 3, "report:\n{}", summary.report());
+        assert_eq!(summary.failed(), 1, "report:\n{}", summary.report());
+
+        let failure = summary
+            .outcomes
+            .iter()
+            .find(|o| !o.passed)
+            .expect("one failing test");
+        assert_eq!(failure.name, "test_intentional_failure");
+        assert!(
+            failure
+                .message
+                .as_deref()
+                .unwrap_or("")
+                .contains("asserts.eq failed"),
+            "failure should carry the assertion message, got {:?}",
+            failure.message
+        );
+    }
+
+    #[test]
+    fn runs_tests_in_parallel_shards() {
+        let rt = Runtime::new().unwrap();
+        let _g = rt.enter();
+        // Many tests, each mutating its own overlay under the same key. If the
+        // overlay leaked across the concurrent workers, the cross-checks below
+        // would observe another test's value and fail. The intentional failure
+        // (test_xxx_fail) also proves per-test capture survives sharding.
+        let mut src = String::new();
+        for i in 0..16 {
+            src.push_str(&format!(
+                "def test_iso_{i:02}(t):\n    \
+                   asserts.eq(t.std.env.var(\"K\"), None)\n    \
+                   t.env.set(\"K\", \"{i}\")\n    \
+                   asserts.eq(t.ctx.std.env.var(\"K\"), \"{i}\")\n\n"
+            ));
+        }
+        src.push_str("def test_zzz_fail(t):\n    asserts.eq(1, 2)\n");
+
+        let names = discover_test_names(&src, &base_env()).expect("discovery ok");
+        assert_eq!(names.len(), 17, "16 isolation tests + 1 failure");
+
+        // Force the parallel path with several workers.
+        let summary = run_test_source_with_jobs(&src, &base_env(), names, 8, None)
+            .expect("parallel runner should not error");
+
+        assert_eq!(summary.passed(), 16, "report:\n{}", summary.report());
+        assert_eq!(summary.failed(), 1, "report:\n{}", summary.report());
+
+        // Results are merged back into definition order regardless of sharding.
+        let ordered: Vec<&str> = summary.outcomes.iter().map(|o| o.name.as_str()).collect();
+        let mut expected: Vec<String> = (0..16).map(|i| format!("test_iso_{i:02}")).collect();
+        expected.push("test_zzz_fail".to_string());
+        assert_eq!(
+            ordered,
+            expected.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
+            "outcomes must be in definition order, not completion order"
+        );
+    }
+
+    #[test]
+    fn expanded_assertion_vocabulary() {
+        let rt = Runtime::new().unwrap();
+        let _g = rt.enter();
+        // Exercises the assertions beyond eq/ne/is_true: container membership
+        // over every supported shape, its inverse, the ordering family, and
+        // message-matching on `fails`. Each test is written to pass; a single
+        // regression in any assertion would flip its test to failed.
+        let src = r#"
+def test_contains_over_containers(t):
+    asserts.contains("hello world", "world")   # substring
+    asserts.contains([1, 2, 3], 2)              # list element
+    asserts.contains((1, 2, 3), 3)              # tuple element
+    asserts.contains({"a": 1, "b": 2}, "a")     # dict key
+
+def test_not_contains(t):
+    asserts.not_contains("hello", "z")
+    asserts.not_contains([1, 2, 3], 9)
+    asserts.not_contains({"a": 1}, "b")
+
+def test_ordering(t):
+    asserts.gt(2, 1)
+    asserts.ge(2, 2)
+    asserts.lt(1, 2)
+    asserts.le(2, 2)
+    asserts.lt("abc", "abd")
+
+def test_fails_with_message_match(t):
+    asserts.fails(lambda: fail("boom: bad input"), contains = "bad input")
+    asserts.fails(lambda: fail("boom"))
+"#;
+        let summary = run_test_source(src, &base_env()).expect("runner ok");
+        assert_eq!(
+            summary.failed(),
+            0,
+            "all expanded-assertion tests should pass; report:\n{}",
+            summary.report()
+        );
+        assert_eq!(summary.passed(), 4, "report:\n{}", summary.report());
+    }
+
+    #[test]
+    fn assertions_fail_when_expected() {
+        let rt = Runtime::new().unwrap();
+        let _g = rt.enter();
+        // The mirror image: each test drives one assertion into its failure
+        // path, proving the failure branches actually raise (a no-op assert
+        // would let these pass and break the framework silently).
+        let src = r#"
+def test_contains_fails(t):
+    asserts.contains([1, 2], 3)
+
+def test_not_contains_fails(t):
+    asserts.not_contains([1, 2], 1)
+
+def test_gt_fails(t):
+    asserts.gt(1, 2)
+
+def test_fails_wrong_message(t):
+    asserts.fails(lambda: fail("boom"), contains = "not-present")
+
+def test_fails_when_no_raise(t):
+    asserts.fails(lambda: 1 + 1)
+"#;
+        let summary = run_test_source(src, &base_env()).expect("runner ok");
+        assert_eq!(
+            summary.passed(),
+            0,
+            "every assertion should reach its failure path; report:\n{}",
+            summary.report()
+        );
+        assert_eq!(summary.failed(), 5, "report:\n{}", summary.report());
+    }
+
+    #[test]
+    fn asserts_surface_is_test_only() {
+        // The `asserts` namespace exists in the test surface…
+        let test_globals = crate::eval::api::get_test_globals();
+        assert!(
+            test_globals.names().any(|n| n.as_str() == "asserts"),
+            "test globals must expose `asserts`"
+        );
+        // …and is absent from the production surface, so it can never leak
+        // into config.axl / builtins.
+        let prod_globals = crate::eval::get_globals().build();
+        assert!(
+            !prod_globals.names().any(|n| n.as_str() == "asserts"),
+            "production globals must NOT expose `asserts`"
+        );
+    }
+
+    #[test]
+    fn overlay_does_not_leak_into_process() {
+        let rt = Runtime::new().unwrap();
+        let _g = rt.enter();
+        // Sanity: a test setting an env var through the overlay must not mutate
+        // the real process environment.
+        let src = r#"
+def test_sets_overlay(t):
+    t.env.set("AXL_POC_LEAK_CHECK", "should-not-leak")
+    asserts.eq(t.std.env.var("AXL_POC_LEAK_CHECK"), "should-not-leak")
+"#;
+        let summary = run_test_source(src, &base_env()).expect("runner ok");
+        assert_eq!(summary.passed(), 1, "report:\n{}", summary.report());
+        assert!(
+            std::env::var("AXL_POC_LEAK_CHECK").is_err(),
+            "overlay must not leak into the real process env"
+        );
+    }
+
+    /// End-to-end proof of the bazel `Fake` backend (increment 2): a test
+    /// declares a typed `BazelExpectation` via `t.bazel.expect_build(...)`, the
+    /// `Fake` backend on `t.ctx.bazel` fork+execs the fake-bazel binary
+    /// (basil), hands it the expectation over the inherited socketpair control
+    /// channel, and the fake synthesizes a real BES stream onto the
+    /// `--build_event_binary_file` the parent already wires. The assertions run
+    /// entirely through the production `ctx.bazel.build` read path
+    /// (`BuildEventIter` + `build.wait()`), so the mock's contract is the real
+    /// one.
+    #[test]
+    fn fake_bazel_backend_synthesizes_declared_expectation() {
+        let rt = Runtime::new().unwrap();
+        let _g = rt.enter();
+        let fake_bin: Arc<str> = Arc::from(crate::test::basil_bin());
+
+        let src = r#"
+def test_passing_build_synthesizes_events_and_exit(t):
+    t.bazel.expect_build("//a:b", "//c:d", result = "passed")
+    iter = bazel.build_events.iterator()
+    build = t.ctx.bazel.build(build_events = [iter], stderr = None)
+    started = 0
+    completed = 0
+    finished = 0
+    for event in iter:
+        kind = event.kind
+        if kind == "build_started":
+            started += 1
+        elif kind == "target_completed":
+            completed += 1
+        elif kind == "build_finished":
+            finished += 1
+    status = build.wait()
+    asserts.is_true(status.success)
+    asserts.eq(status.code, 0)
+    asserts.eq(started, 1)
+    asserts.eq(completed, 2)
+    asserts.eq(finished, 1)
+
+def test_failing_build_surfaces_declared_exit_code(t):
+    t.bazel.expect_build("//x:y", result = "failed", exit_code = 7)
+    build = t.ctx.bazel.build(build_events = True, stderr = None)
+    status = build.wait()
+    asserts.is_false(status.success)
+    asserts.eq(status.code, 7)
+"#;
+        let summary = run_test_source_with_fake_bazel(src, &base_env(), fake_bin)
+            .expect("fake-bazel runner ok");
+        assert_eq!(
+            summary.failed(),
+            0,
+            "expected all fake-bazel tests to pass; report:\n{}",
+            summary.report()
+        );
+        assert_eq!(summary.passed(), 2, "report:\n{}", summary.report());
+    }
+}
diff --git a/crates/axl-runtime/src/eval/api.rs b/crates/axl-runtime/src/eval/api.rs
index b5196a023..43098a251 100644
--- a/crates/axl-runtime/src/eval/api.rs
+++ b/crates/axl-runtime/src/eval/api.rs
@@ -1,4 +1,5 @@
 use crate::engine;
+use starlark::environment::Globals;
 use starlark::environment::GlobalsBuilder;
 use starlark::environment::LibraryExtension;
 use starlark::syntax::{Dialect, DialectTypes};
@@ -35,6 +36,16 @@ pub fn get_globals() -> GlobalsBuilder {
     globals
 }
 
+/// Returns the globals surface for `*_test.axl` files: the full AXL surface
+/// plus the test-only vocabulary (`assert`, …). The extra names exist *only*
+/// in test files — the loader selects this surface by filename suffix (see
+/// `eval::load`) — so test scaffolding can never leak into production AXL.
+pub fn get_test_globals() -> Globals {
+    let mut globals = get_globals();
+    engine::testing::register_test_globals(&mut globals);
+    globals.build()
+}
+
 pub fn dialect() -> Dialect {
     Dialect {
         enable_def: true,
diff --git a/crates/axl-runtime/src/eval/load.rs b/crates/axl-runtime/src/eval/load.rs
index 117f142d0..7955b1b35 100644
--- a/crates/axl-runtime/src/eval/load.rs
+++ b/crates/axl-runtime/src/eval/load.rs
@@ -33,6 +33,12 @@ pub struct AxlLoader<'m> {
     dialect: Dialect,
     pub(crate) globals: Globals,
 
+    /// Augmented globals surface used when evaluating `*_test.axl` files:
+    /// the base surface plus the test-only vocabulary (`asserts`, …). Selected
+    /// per-file by suffix in `eval_module_inner` so the test surface never
+    /// reaches production AXL.
+    test_globals: Globals,
+
     /// All modules (root + deps) discovered during MODULE.aspect expansion,
     /// looked up by name to resolve `@<name>//path` loads.
     modules: &'m [Mod],
@@ -64,6 +70,7 @@ impl<'m> AxlLoader<'m> {
             env: Env::new(cli_version, aspect_root, bazel_root, git_root),
             dialect: api::dialect(),
             globals: api::get_globals().build(),
+            test_globals: api::get_test_globals(),
             modules,
             load_stack: RefCell::new(vec![]),
             module_stack: RefCell::new(vec![]),
@@ -80,7 +87,12 @@ impl<'m> AxlLoader<'m> {
         path: &Path,
     ) -> Result<FrozenModule, EvalError> {
         self.module_stack.borrow_mut().push(scope);
-        let result = self.eval_module_impl(path, None, false);
+        // The first-party `@aspect` standard library is privileged like the
+        // embedded `@std`/`@bazel` modules: its files may reach `__builtins__`
+        // (e.g. the test runner backing `aspect axl test`). Third-party modules
+        // stay unprivileged, so the gate still blocks arbitrary user code.
+        let is_std = scope.name == "aspect";
+        let result = self.eval_module_impl(path, None, is_std);
         self.module_stack.borrow_mut().pop();
         result
     }
@@ -146,6 +158,19 @@ impl<'m> AxlLoader<'m> {
         };
 
         let ast = AstModule::parse(&path.to_string_lossy(), raw, &self.dialect)?;
+        // `*_test.axl` files are evaluated against the augmented test surface
+        // (base AXL + `asserts`, …); every other file gets the production
+        // surface. Keying on the filename suffix keeps the test vocabulary
+        // strictly scoped to test files.
+        let is_test = path
+            .file_name()
+            .and_then(|n| n.to_str())
+            .is_some_and(|n| n.ends_with("_test.axl"));
+        let globals = if is_test {
+            &self.test_globals
+        } else {
+            &self.globals
+        };
         Module::with_temp_heap(|module| {
             if is_std {
                 module.set("#_is_std#", Value::new_bool(true));
@@ -153,7 +178,7 @@ impl<'m> AxlLoader<'m> {
             let mut eval = Evaluator::new(&module);
             eval.set_loader(self);
             eval.extra = Some(&self.env);
-            eval.eval_module(ast, &self.globals)?;
+            eval.eval_module(ast, globals)?;
             drop(eval);
             module
                 .freeze()
diff --git a/crates/axl-runtime/src/eval/multi_phase.rs b/crates/axl-runtime/src/eval/multi_phase.rs
index 81425d73d..f9e62183c 100644
--- a/crates/axl-runtime/src/eval/multi_phase.rs
+++ b/crates/axl-runtime/src/eval/multi_phase.rs
@@ -107,6 +107,11 @@ pub struct MultiPhaseEval<'v, 'l> {
     /// ConfigContext and FeatureContext as `ctx.telemetry`. The runtime
     /// drains exporter specs out of it (via `drain_exporters`) after phase 3.
     telemetry_value: Value<'v>,
+    /// Optional bazel backend override for the contexts this evaluator mints.
+    /// `None` → production `Real`. Tests set a `Fake` so `ctx.bazel.build`
+    /// fork+execs the fake-bazel binary with a declared expectation, carried
+    /// on the value (no `BAZEL_REAL` global).
+    bazel_backend: Option<crate::engine::bazel::backend::BazelBackend>,
 }
 
 impl<'v, 'l> MultiPhaseEval<'v, 'l> {
@@ -120,9 +125,20 @@ impl<'v, 'l> MultiPhaseEval<'v, 'l> {
             features: heap.alloc(FeatureMap::new()),
             trait_map_value: None,
             telemetry_value,
+            bazel_backend: None,
         }
     }
 
+    /// Override the bazel backend for the contexts this evaluator mints
+    /// (test-only; production leaves it `None` = `Real`).
+    pub fn with_bazel_backend(
+        mut self,
+        backend: crate::engine::bazel::backend::BazelBackend,
+    ) -> Self {
+        self.bazel_backend = Some(backend);
+        self
+    }
+
     /// Drain exporter specs collected from `ctx.telemetry.exporters.add(...)`
     /// during phases 2-3. Intended to be called once, after
     /// `execute_features_with_args` completes and before phase 4 begins.
@@ -520,6 +536,10 @@ impl<'v, 'l> MultiPhaseEval<'v, 'l> {
 
         let bazel = heap.alloc(Bazel {
             active_rc: std::cell::RefCell::new(None),
+            backend: self
+                .bazel_backend
+                .clone()
+                .unwrap_or(crate::engine::bazel::backend::BazelBackend::Real),
         });
         // Allocate `task_info` on the heap and keep the resulting Value
         // so we can read back timing + phases after `_impl` returns.
diff --git a/crates/axl-runtime/src/test.rs b/crates/axl-runtime/src/test.rs
index 94518cfbc..336b6fde1 100644
--- a/crates/axl-runtime/src/test.rs
+++ b/crates/axl-runtime/src/test.rs
@@ -35,14 +35,17 @@ pub fn eval(code: &str) -> EvalBuilder {
     EvalBuilder {
         code: code.to_string(),
         with_loader: false,
-        with_fake_bazel: false,
+        fake_bazel: None,
     }
 }
 
 pub struct EvalBuilder {
     code: String,
     with_loader: bool,
-    with_fake_bazel: bool,
+    /// When set, `run_task` mints `ctx.bazel` with a `Fake` backend driving
+    /// the basil fake-bazel binary, fed this declared expectation over the
+    /// socketpair control channel. `None` → production `Real`.
+    fake_bazel: Option<basil_core::BazelExpectation>,
 }
 
 impl EvalBuilder {
@@ -53,12 +56,27 @@ impl EvalBuilder {
         self
     }
 
-    /// Install the basil fake-bazel binary as `BAZEL_REAL` before running.
-    /// Use on any builder whose snippet calls `ctx.bazel.build` so the
-    /// runtime spawns basil instead of shelling out to a real `bazel`.
-    /// Idempotent across tests; see `install_basil` for details.
-    pub fn with_fake_bazel(mut self) -> Self {
-        self.with_fake_bazel = true;
+    /// Mint `ctx.bazel` with a `Fake` backend so a snippet's `ctx.bazel.build`
+    /// fork+execs the basil fake-bazel binary instead of a real `bazel`. The
+    /// fake synthesizes a clean passing build (`BuildStarted` →
+    /// `BuildFinished` exit 0). Use [`with_fake_bazel_expectation`] to declare
+    /// a different outcome.
+    ///
+    /// [`with_fake_bazel_expectation`]: Self::with_fake_bazel_expectation
+    pub fn with_fake_bazel(self) -> Self {
+        self.with_fake_bazel_expectation(basil_core::BazelExpectation::new(
+            Vec::new(),
+            basil_core::BuildResult::Passed,
+            None,
+        ))
+    }
+
+    /// Like [`with_fake_bazel`] but with an explicit declared expectation, so
+    /// the fake synthesizes the BES stream + exit code the test needs.
+    ///
+    /// [`with_fake_bazel`]: Self::with_fake_bazel
+    pub fn with_fake_bazel_expectation(mut self, exp: basil_core::BazelExpectation) -> Self {
+        self.fake_bazel = Some(exp);
         self
     }
 
@@ -138,12 +156,16 @@ impl EvalBuilder {
     /// be self-contained. Task `idx` runs with empty `Arguments`.
     ///
     /// Snippets that call `ctx.bazel.build` should opt in via
-    /// `.with_fake_bazel()` so the runtime spawns the basil fake-bazel
-    /// instead of a real `bazel`.
+    /// `.with_fake_bazel()` so `ctx.bazel` is minted with a `Fake` backend
+    /// that fork+execs the basil fake-bazel instead of a real `bazel`.
     pub fn run_task(self, task_index: usize) -> anyhow::Result<Option<u8>> {
-        if self.with_fake_bazel {
-            install_basil();
-        }
+        let fake_backend =
+            self.fake_bazel
+                .clone()
+                .map(|exp| crate::engine::bazel::backend::BazelBackend::Fake {
+                    fake_bin: basil_bin().to_string(),
+                    expectation: std::sync::Arc::new(exp),
+                });
         let tmp = tempfile::tempdir()?;
         let script_path = tmp.path().join("test.axl");
         std::fs::write(&script_path, &self.code)?;
@@ -166,6 +188,9 @@ impl EvalBuilder {
                 &modules,
             );
             let mut mpe = MultiPhaseEval::new(env, &loader);
+            if let Some(backend) = fake_backend {
+                mpe = mpe.with_bazel_backend(backend);
+            }
             let scripts = vec![script_path];
             mpe.eval(&scripts, &root_mod, &modules)
                 .map_err(anyhow::Error::from)?;
@@ -245,21 +270,6 @@ pub fn basil_bin() -> &'static str {
     })
 }
 
-/// Point axl-runtime at basil. Idempotent and safe under parallel test
-/// execution: every test sets the same value, so there is nothing to race
-/// over (no PATH_LOCK needed).
-///
-/// Per-scenario timing (e.g. the post-open pause that lets a late AXL
-/// subscriber land its `.subscribe()` call before basil writes) lives in
-/// basil's `scenario()` table, not here.
-pub fn install_basil() {
-    // SAFETY: process-wide env mutation. All callers in this test binary
-    // converge on the same value, so concurrent writes are no-ops.
-    unsafe {
-        std::env::set_var("BAZEL_REAL", basil_bin());
-    }
-}
-
 /// Run `f` on a worker thread; return `Some(result)` if it finished within
 /// `timeout`, `None` if it didn't. Used to fail-fast on regression tests
 /// that exercise potential hangs (the worker thread is leaked on timeout —
diff --git a/crates/basil-core/BUILD.bazel b/crates/basil-core/BUILD.bazel
new file mode 100644
index 000000000..43b41a70d
--- /dev/null
+++ b/crates/basil-core/BUILD.bazel
@@ -0,0 +1,19 @@
+load("//bazel/rust:defs.bzl", "rust_library", "rust_test")
+
+rust_library(
+    name = "basil-core",
+    srcs = glob(["src/**/*.rs"]),
+    visibility = [
+        "//crates/axl-runtime:__pkg__",
+        "//crates/basil:__pkg__",
+    ],
+    deps = [
+        "//crates/axl-proto",
+        "@crates//:prost",
+    ],
+)
+
+rust_test(
+    name = "test",
+    crate = ":basil-core",
+)
diff --git a/crates/basil-core/Cargo.toml b/crates/basil-core/Cargo.toml
new file mode 100644
index 000000000..9390d08a6
--- /dev/null
+++ b/crates/basil-core/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "basil-core"
+version.workspace = true
+categories.workspace = true
+homepage.workspace = true
+repository.workspace = true
+license.workspace = true
+edition.workspace = true
+readme.workspace = true
+rust-version.workspace = true
+
+[lib]
+name = "basil_core"
+path = "src/lib.rs"
+
+[dependencies]
+axl-proto = { path = "../axl-proto" }
+prost = "0.14.1"
diff --git a/crates/basil-core/src/lib.rs b/crates/basil-core/src/lib.rs
new file mode 100644
index 000000000..b68339d75
--- /dev/null
+++ b/crates/basil-core/src/lib.rs
@@ -0,0 +1,339 @@
+//! `basil-core` — the reusable guts of the `basil` fake-`bazel` binary.
+//!
+//! Two consumers share this crate:
+//!   - the standalone `basil` binary (`crates/basil`), spawned by the
+//!     `BazelBackend::Fake` path the axl-runtime tests drive; and
+//!   - a shipped self-exec subcommand of `aspect` (roadmap item 6) so the AXL
+//!     test runner can fork+exec a fake bazel without embedding a second
+//!     binary (see `docs/testing.md`, decision 7).
+//!
+//! Replay path — [`replay_expectation`]: reads a typed, declared
+//! [`BazelExpectation`] (length-delimited protobuf) off a control channel and
+//! **synthesizes** a consistent `BuildStarted` → `TargetComplete` (one per
+//! target) → `BuildFinished` BES stream onto `--build_event_binary_file`,
+//! then exits with the fixture's code. This is the contract the AXL
+//! `BazelExpectation` record serializes into.
+//!
+//! Wire format (control channel): the [`BazelExpectation`] message encoded
+//! **length-delimited** via `prost::Message::encode_length_delimited` — the
+//! exact framing `basil` already uses for `BuildEvent`s, so producer and
+//! consumer share one framing convention. The raw `events` escape hatch
+//! carries pre-encoded length-delimited `BuildEvent`s as opaque `bytes`, so
+//! this crate does not have to re-derive the full BES schema to pass them
+//! through untouched.
+
+use std::fs::OpenOptions;
+use std::io::{Read, Write};
+use std::time::Duration;
+
+use axl_proto::build_event_stream::{
+    BuildEvent, BuildEventId, BuildFinished, BuildStarted, TargetComplete,
+    build_event::Payload,
+    build_event_id::{BuildFinishedId, BuildStartedId, Id, TargetCompletedId},
+    build_finished::ExitCode,
+};
+use prost::Message;
+
+// ─── Wire format: BazelExpectation ───────────────────────────────────────────
+
+/// The terminal outcome a fixture declares. Mirrors the AXL-facing
+/// `BuildResult` enum (`passed | failed | cache_evicted`). Synthesized into a
+/// `BuildFinished.exit_code` so the AXL read path observes a real,
+/// self-consistent BES terminal event.
+///
+/// Encoded on the wire as the proto enum's `i32` tag.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
+pub enum BuildResult {
+    /// Clean build: every target completes successfully, exit 0.
+    #[default]
+    Passed = 0,
+    /// At least one target fails; default exit code 1.
+    Failed = 1,
+    /// `REMOTE_CACHE_EVICTED` (Bazel exit 39) — the bug-1060 shape.
+    CacheEvicted = 2,
+}
+
+impl BuildResult {
+    fn from_i32(v: i32) -> Self {
+        match v {
+            1 => BuildResult::Failed,
+            2 => BuildResult::CacheEvicted,
+            _ => BuildResult::Passed,
+        }
+    }
+
+    /// The default Bazel exit code for this result when the fixture does not
+    /// override `exit_code` explicitly.
+    fn default_exit_code(self) -> i32 {
+        match self {
+            BuildResult::Passed => 0,
+            BuildResult::Failed => 1,
+            BuildResult::CacheEvicted => 39,
+        }
+    }
+
+    /// Whether per-target `TargetComplete` events should report success.
+    fn targets_succeed(self) -> bool {
+        matches!(self, BuildResult::Passed)
+    }
+}
+
+/// A declared, typed fixture describing how the fake bazel should behave for
+/// one invocation. Serialized over the control channel as length-delimited
+/// protobuf (see crate docs).
+///
+/// This is the Rust mirror of the AXL `BazelExpectation` record minted by
+/// `t.bazel.expect_build(...)`.
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct BazelExpectation {
+    /// Target patterns the fake "built". One `TargetComplete` is synthesized
+    /// per entry.
+    #[prost(string, repeated, tag = "1")]
+    pub targets: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
+
+    /// Declared terminal result, encoded as [`BuildResult`]'s `i32` tag.
+    #[prost(int32, tag = "2")]
+    pub result: i32,
+
+    /// Process exit code. `None` (not present) means "derive from `result`".
+    #[prost(int32, optional, tag = "3")]
+    pub exit_code: ::core::option::Option<i32>,
+
+    /// Raw escape hatch: pre-encoded length-delimited `BuildEvent`s. When
+    /// non-empty, these are written verbatim INSTEAD of the synthesized
+    /// stream, so a test can express an event sequence the typed surface
+    /// can't yet model. Each entry is one already-length-delimited
+    /// `BuildEvent` (so basil-core passes them through without re-deriving
+    /// the framing).
+    #[prost(bytes = "vec", repeated, tag = "4")]
+    pub events: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
+}
+
+impl BazelExpectation {
+    /// Construct an expectation from typed inputs. `exit_code = None` defers to
+    /// the result's default code.
+    pub fn new(targets: Vec<String>, result: BuildResult, exit_code: Option<i32>) -> Self {
+        Self {
+            targets,
+            result: result as i32,
+            exit_code,
+            events: Vec::new(),
+        }
+    }
+
+    /// Attach raw pre-encoded length-delimited `BuildEvent`s (escape hatch).
+    pub fn with_raw_events(mut self, events: Vec<Vec<u8>>) -> Self {
+        self.events = events;
+        self
+    }
+
+    fn build_result(&self) -> BuildResult {
+        BuildResult::from_i32(self.result)
+    }
+
+    /// The effective process exit code: explicit override, else the result
+    /// default.
+    pub fn effective_exit_code(&self) -> i32 {
+        self.exit_code
+            .unwrap_or_else(|| self.build_result().default_exit_code())
+    }
+
+    /// Serialize to a length-delimited protobuf frame (the control-channel
+    /// wire format).
+    pub fn encode_frame(&self) -> Vec<u8> {
+        let mut buf = Vec::new();
+        self.encode_length_delimited(&mut buf)
+            .expect("encode BazelExpectation");
+        buf
+    }
+
+    /// Read one length-delimited [`BazelExpectation`] frame from `r`.
+    pub fn read_frame<R: Read>(mut r: R) -> std::io::Result<Self> {
+        let mut bytes = Vec::new();
+        r.read_to_end(&mut bytes)?;
+        Self::decode_length_delimited(bytes.as_slice())
+            .map_err(|e| std::io::Error::other(format!("decode BazelExpectation: {e}")))
+    }
+}
+
+// ─── Generic synthesis ───────────────────────────────────────────────────────
+
+/// Synthesize the consistent BES event sequence implied by `exp`:
+/// `BuildStarted` → one `TargetComplete` per target → `BuildFinished`.
+///
+/// When `exp.events` is non-empty it is treated as a raw, pre-framed override
+/// and returned as-is (already length-delimited), bypassing synthesis.
+pub fn synthesize_frames(exp: &BazelExpectation) -> Vec<Vec<u8>> {
+    if !exp.events.is_empty() {
+        return exp.events.clone();
+    }
+
+    let result = exp.build_result();
+    let mut events: Vec<BuildEvent> = Vec::with_capacity(exp.targets.len() + 2);
+    events.push(build_started());
+    for target in &exp.targets {
+        events.push(target_complete(target, result.targets_succeed()));
+    }
+    events.push(build_finished(exp.effective_exit_code(), true));
+
+    events
+        .iter()
+        .map(|ev| {
+            let mut buf = Vec::new();
+            ev.encode_length_delimited(&mut buf)
+                .expect("encode BuildEvent");
+            buf
+        })
+        .collect()
+}
+
+/// Generic replay entrypoint. Synthesizes (or replays the raw escape-hatch)
+/// BES stream for `exp` into `bes_path` (the `--build_event_binary_file` the
+/// parent wired), then returns the process exit code the caller should exit
+/// with.
+///
+/// `open_delay` widens the window for a late AXL subscriber to land its
+/// `.subscribe()` before events fan out (see basil's `Scenario::open_delay`
+/// doc); pass `Duration::ZERO` when the test only checks `build.wait()`.
+///
+/// TODO(increment-2): execlog (`--execution_log_compact_file`) and
+/// stdout/stderr synthesis are not yet emitted from the typed fixture. Only
+/// the BES surface + exit code are synthesized in this slice.
+pub fn replay_expectation(
+    exp: &BazelExpectation,
+    bes_path: Option<&str>,
+    open_delay: Duration,
+) -> i32 {
+    if let Some(path) = bes_path {
+        let frames = synthesize_frames(exp);
+        write_frames(path, &frames, open_delay);
+    }
+    exp.effective_exit_code()
+}
+
+/// One open/write/close cycle on `path` writing every frame in `frames`
+/// (each already length-delimited). The read side observes a writer appear,
+/// drain bytes, and disappear — same shape as Bazel writing the BEP file.
+pub fn write_frames(path: &str, frames: &[Vec<u8>], open_delay: Duration) {
+    let mut f = OpenOptions::new()
+        .write(true)
+        .open(path)
+        .unwrap_or_else(|e| panic!("basil-core: opening BES path {path:?} for write: {e}"));
+    if !open_delay.is_zero() {
+        std::thread::sleep(open_delay);
+    }
+    for frame in frames {
+        f.write_all(frame)
+            .unwrap_or_else(|e| panic!("basil-core: writing to BES path: {e}"));
+    }
+}
+
+// ─── BES event constructors ──────────────────────────────────────────────────
+
+pub fn build_started() -> BuildEvent {
+    BuildEvent {
+        id: Some(BuildEventId {
+            id: Some(Id::Started(BuildStartedId {})),
+        }),
+        last_message: false,
+        payload: Some(Payload::Started(BuildStarted::default())),
+        ..Default::default()
+    }
+}
+
+pub fn build_finished(code: i32, last: bool) -> BuildEvent {
+    BuildEvent {
+        id: Some(BuildEventId {
+            id: Some(Id::BuildFinished(BuildFinishedId {})),
+        }),
+        last_message: last,
+        payload: Some(Payload::Finished(BuildFinished {
+            exit_code: Some(ExitCode {
+                code,
+                ..Default::default()
+            }),
+            ..Default::default()
+        })),
+        ..Default::default()
+    }
+}
+
+/// A `TargetComplete` event for `label`, reporting `success`.
+pub fn target_complete(label: &str, success: bool) -> BuildEvent {
+    BuildEvent {
+        id: Some(BuildEventId {
+            id: Some(Id::TargetCompleted(TargetCompletedId {
+                label: label.to_string(),
+                ..Default::default()
+            })),
+        }),
+        last_message: false,
+        payload: Some(Payload::Completed(TargetComplete {
+            success,
+            ..Default::default()
+        })),
+        ..Default::default()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn expectation_round_trips_through_the_wire_frame() {
+        let exp = BazelExpectation::new(
+            vec!["//a:b".into(), "//c:d".into()],
+            BuildResult::Failed,
+            Some(7),
+        );
+        let frame = exp.encode_frame();
+        let decoded = BazelExpectation::read_frame(frame.as_slice()).unwrap();
+        assert_eq!(decoded.targets, vec!["//a:b", "//c:d"]);
+        assert_eq!(decoded.build_result(), BuildResult::Failed);
+        assert_eq!(decoded.effective_exit_code(), 7);
+    }
+
+    #[test]
+    fn default_exit_codes_follow_the_result() {
+        assert_eq!(
+            BazelExpectation::new(vec![], BuildResult::Passed, None).effective_exit_code(),
+            0
+        );
+        assert_eq!(
+            BazelExpectation::new(vec![], BuildResult::Failed, None).effective_exit_code(),
+            1
+        );
+        assert_eq!(
+            BazelExpectation::new(vec![], BuildResult::CacheEvicted, None).effective_exit_code(),
+            39
+        );
+    }
+
+    #[test]
+    fn synthesizes_started_then_target_then_finished() {
+        let exp = BazelExpectation::new(vec!["//x:y".into()], BuildResult::Passed, None);
+        let frames = synthesize_frames(&exp);
+        // BuildStarted + one TargetComplete + BuildFinished.
+        assert_eq!(frames.len(), 3);
+        // Decode the middle frame and confirm it's a successful TargetComplete.
+        let ev = BuildEvent::decode_length_delimited(frames[1].as_slice()).unwrap();
+        match ev.payload {
+            Some(Payload::Completed(tc)) => assert!(tc.success),
+            other => panic!("expected TargetComplete, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn raw_events_escape_hatch_bypasses_synthesis() {
+        let raw = {
+            let mut buf = Vec::new();
+            build_started().encode_length_delimited(&mut buf).unwrap();
+            buf
+        };
+        let exp = BazelExpectation::new(vec!["//x:y".into()], BuildResult::Passed, None)
+            .with_raw_events(vec![raw.clone()]);
+        let frames = synthesize_frames(&exp);
+        assert_eq!(frames, vec![raw]);
+    }
+}
diff --git a/crates/basil/BUILD.bazel b/crates/basil/BUILD.bazel
index 5660dc9ad..c9c75b6a5 100644
--- a/crates/basil/BUILD.bazel
+++ b/crates/basil/BUILD.bazel
@@ -8,7 +8,6 @@ rust_binary(
         "//crates/axl-runtime:__pkg__",
     ],
     deps = [
-        "//crates/axl-proto",
-        "@crates//:prost",
+        "//crates/basil-core",
     ],
 )
diff --git a/crates/basil/Cargo.toml b/crates/basil/Cargo.toml
index 6bc3ce094..1a1196f46 100644
--- a/crates/basil/Cargo.toml
+++ b/crates/basil/Cargo.toml
@@ -14,5 +14,4 @@ name = "basil"
 path = "src/main.rs"
 
 [dependencies]
-axl-proto = { path = "../axl-proto" }
-prost = "0.14.1"
+basil-core = { path = "../basil-core" }
diff --git a/crates/basil/src/main.rs b/crates/basil/src/main.rs
index beab105c4..e18646669 100644
--- a/crates/basil/src/main.rs
+++ b/crates/basil/src/main.rs
@@ -1,44 +1,35 @@
 //! basil — a fake `bazel` binary used to drive integration tests of
-//! `ctx.bazel.build`. The runtime spawns whichever binary `BAZEL_REAL`
-//! points at; tests point it at this one.
+//! `ctx.bazel.build`. The axl-runtime `BazelBackend::Fake` path fork+execs
+//! this binary directly (no `BAZEL_REAL`).
 //!
-//! Verbs:
-//!   - `info <key>...`  — prints `key: value` lines; supports `server_pid`,
-//!     `release`, `output_base`. The pid printed defaults to the basil
-//!     process's own pid; tests can override via `BASIL_SERVER_PID` so a
-//!     long-lived holder process keeps galvanize's `IfOpenForPid` retry
-//!     check satisfied for the whole test.
-//!   - `build` / `test` — finds `--build_event_binary_file <path>`, finds
-//!     `--scenario=<name>` somewhere in argv, and writes a sequence of
-//!     length-delimited `BuildEvent` protobufs into the BES path according
-//!     to the named scenario. Each "attempt" is one open/write/close cycle
-//!     on the path, so multi-attempt scenarios faithfully simulate Bazel's
-//!     reconnect-after-eviction behavior on a FIFO.
+//! All synthesis logic lives in the `basil-core` library so the standalone
+//! binary and a shipped `aspect` self-exec subcommand (roadmap item 6) share
+//! one implementation. This binary is just the argv/env front-end.
 //!
-//! Scenarios are added in `scenario`. Pick names that document the behavior
-//! they exercise (`success`, `cache_evicted_no_retry`, etc.) so the AXL test
-//! reads obviously: `ctx.bazel.build(flags = ["--scenario=cache_evicted_no_retry"], ...)`.
+//! Verb `build` / `test`: reads a declared `BazelExpectation` (length-delimited
+//! protobuf) off the inherited control channel named by `ASPECT_FAKE_BAZEL_FD`,
+//! then synthesizes a consistent BES stream onto `--build_event_binary_file`
+//! and exits with the fixture's code.
 
 use std::env;
-use std::fs::{self, OpenOptions};
+use std::fs::File;
 use std::io::Write;
+use std::os::fd::FromRawFd;
 use std::process;
-use std::thread;
 use std::time::Duration;
 
-use axl_proto::build_event_stream::{
-    BuildEvent, BuildEventId, BuildFinished, BuildStarted,
-    build_event::Payload,
-    build_event_id::{BuildFinishedId, BuildStartedId, Id},
-    build_finished::ExitCode,
-};
-use prost::Message;
+use basil_core::{BazelExpectation, replay_expectation};
+
+/// Env var naming the inherited control-channel fd carrying the serialized
+/// `BazelExpectation`. Mirrors the constant the parent sets in
+/// `axl-runtime`'s `Fake` backend.
+const FAKE_FD_ENV: &str = "ASPECT_FAKE_BAZEL_FD";
 
 fn main() {
     let args: Vec<String> = env::args().skip(1).collect();
-    // First non-flag arg is the verb (e.g. "info", "build"). Flags before it
-    // (like bazel startup flags) are tolerated and ignored — we don't model
-    // bazel's real flag positioning rules.
+    // First non-flag arg is the verb (e.g. "build"). Flags before it (like
+    // bazel startup flags) are tolerated and ignored — we don't model bazel's
+    // real flag positioning rules.
     let verb = args
         .iter()
         .find(|a| !a.starts_with('-'))
@@ -46,7 +37,6 @@ fn main() {
         .unwrap_or("");
 
     match verb {
-        "info" => run_info(&args),
         "build" | "test" => run_build(&args),
         "" => {
             eprintln!("basil: no verb given");
@@ -59,69 +49,42 @@ fn main() {
     }
 }
 
-fn run_info(args: &[String]) {
-    // Honors BASIL_SERVER_PID so tests can pin the reported pid to a holder
-    // process (e.g. `sleep 60`) that outlives basil's own short-lived info
-    // invocation. Required for galvanize's IfOpenForPid retry loop to keep
-    // the FIFO read end open across the lifetime of the test.
-    let pid: u32 = env::var("BASIL_SERVER_PID")
-        .ok()
-        .and_then(|s| s.parse().ok())
-        .unwrap_or_else(process::id);
-    let release = env::var("BASIL_RELEASE").unwrap_or_else(|_| "9.0.0".to_string());
-
-    for key in args.iter().filter(|a| !a.starts_with('-') && *a != "info") {
-        match key.as_str() {
-            "server_pid" => println!("server_pid: {pid}"),
-            "release" => println!("release: {release}"),
-            "output_base" => {
-                let base =
-                    env::var("BASIL_OUTPUT_BASE").unwrap_or_else(|_| format!("/tmp/basil-{pid}"));
-                let _ = fs::create_dir_all(format!("{base}/server"));
-                let _ = fs::write(format!("{base}/server/server.pid.txt"), pid.to_string());
-                println!("{base}");
-            }
-            _ => {}
-        }
-    }
-}
-
 fn run_build(args: &[String]) {
     let bes_path = find_flag_value(args, "--build_event_binary_file");
-    let scenario_name =
-        find_flag_value(args, "--scenario").unwrap_or_else(|| "success".to_string());
-    let s = scenario(&scenario_name);
-
-    if let Some(path) = bes_path {
-        write_scenario(&path, &s);
-    }
+    let Some(fd) = control_fd() else {
+        eprintln!("basil: no {FAKE_FD_ENV} control fd inherited; nothing to replay");
+        process::exit(2);
+    };
+    run_generic(fd, bes_path.as_deref());
+}
 
-    match s.exit {
-        ExitBehavior::Code(c) => process::exit(c),
-        ExitBehavior::Signal(sig) => {
-            // libc(3)'s `raise` declared directly to avoid pulling in the
-            // libc crate for one symbol. async-signal-safe and only
-            // delivers the named signal to the current process.
-            unsafe extern "C" {
-                fn raise(sig: i32) -> i32;
-            }
-            // SAFETY: `raise` is a safe-to-call libc function with a
-            // well-defined contract on every Unix.
-            unsafe {
-                raise(sig);
-            }
-            // If `raise` returned (e.g. signal caught/ignored, which
-            // we don't expect with SIGKILL or default handlers), fall
-            // through to a non-zero exit so the caller still sees an
-            // abnormal-looking outcome.
-            process::exit(128 + sig);
+/// Read the serialized `BazelExpectation` from the inherited control fd,
+/// synthesize the BES stream into `bes_path`, and exit with the fixture's
+/// code. Never returns.
+fn run_generic(fd: i32, bes_path: Option<&str>) -> ! {
+    // SAFETY: `fd` is an inherited control-channel fd the parent dup2'd into
+    // place for us; we own this end for the lifetime of the process.
+    let file = unsafe { File::from_raw_fd(fd) };
+    let exp = match BazelExpectation::read_frame(file) {
+        Ok(e) => e,
+        Err(e) => {
+            eprintln!("basil: reading BazelExpectation from control fd {fd}: {e}");
+            process::exit(2);
         }
-    }
+    };
+    // A small open_delay widens the window for the AXL iterator's late
+    // `.subscribe()` to land before events fan out on the warm-daemon path.
+    let code = replay_expectation(&exp, bes_path, Duration::from_millis(50));
+    let _ = std::io::stderr().flush();
+    process::exit(code);
+}
+
+/// The inherited control fd, if the parent named one via `ASPECT_FAKE_BAZEL_FD`.
+fn control_fd() -> Option<i32> {
+    env::var(FAKE_FD_ENV).ok().and_then(|s| s.parse().ok())
 }
 
-/// Finds `--name <value>` or `--name=<value>` in argv. The runtime emits both
-/// forms (`--build_event_binary_file <path>` for paths, `--scenario=foo` for
-/// user-supplied flags), so handling both keeps us tolerant.
+/// Finds `--name <value>` or `--name=<value>` in argv.
 fn find_flag_value(args: &[String], name: &str) -> Option<String> {
     let prefix = format!("{name}=");
     for (i, a) in args.iter().enumerate() {
@@ -134,156 +97,3 @@ fn find_flag_value(args: &[String], name: &str) -> Option<String> {
     }
     None
 }
-
-/// How basil terminates after writing the BES event stream. `Code(n)`
-/// shells out to `process::exit(n)`; `Signal(n)` raises Unix signal `n`
-/// on itself so the parent's `ExitStatus::code()` is `None`, modeling
-/// Bazel being killed by a signal rather than exiting cleanly.
-enum ExitBehavior {
-    Code(i32),
-    Signal(i32),
-}
-
-/// One full BES interaction. Each attempt is one open/write/close cycle on
-/// the FIFO. `open_delay` sleeps after the FIFO open and before any writes —
-/// only set this on scenarios whose tests assert on what the AXL iterator
-/// received. `build.build_events()` subscribes after `Build::spawn` returns
-/// (the broadcaster doesn't replay history; see
-/// `crates/axl-runtime/src/engine/bazel/stream/broadcaster.rs:271`), so a
-/// pause widens the window for that subscribe to land before basil starts
-/// fanning out events. Scenarios whose tests only check `build.wait()`
-/// status don't need it — leave at zero.
-///
-/// `exit` controls how basil terminates after the event sequence is
-/// flushed. Defaults to `Code(0)`; set explicitly to model nonzero exits
-/// or signal kills.
-struct Scenario {
-    open_delay: Duration,
-    attempts: Vec<Vec<BuildEvent>>,
-    exit: ExitBehavior,
-}
-
-fn write_scenario(path: &str, scenario: &Scenario) {
-    for events in &scenario.attempts {
-        // One open/write/close per attempt: the read side observes a writer
-        // appear, drain bytes, and disappear — same as Bazel reopening the
-        // BEP file on each retry.
-        let mut f = OpenOptions::new()
-            .write(true)
-            .open(path)
-            .unwrap_or_else(|e| panic!("basil: opening BES path {path:?} for write: {e}"));
-        if !scenario.open_delay.is_zero() {
-            thread::sleep(scenario.open_delay);
-        }
-        for ev in events {
-            let mut buf = Vec::new();
-            ev.encode_length_delimited(&mut buf)
-                .expect("basil: encode BuildEvent");
-            f.write_all(&buf)
-                .unwrap_or_else(|e| panic!("basil: writing to BES path: {e}"));
-        }
-    }
-}
-
-/// Resolve a scenario by name. Each scenario documents the behavior or bug
-/// it targets. Add new ones here.
-fn scenario(name: &str) -> Scenario {
-    match name {
-        // Clean run: one attempt, terminates with last_message=true.
-        // 50ms open_delay so AXL's `for event in build.build_events()` (a late
-        // subscriber by API shape) lands its subscription before basil starts
-        // fanning out events. Without this, the iterator races the producer
-        // and yields zero events.
-        "success" => Scenario {
-            open_delay: Duration::from_millis(50),
-            attempts: vec![vec![build_started(), build_finished(0, true)]],
-            exit: ExitBehavior::Code(0),
-        },
-
-        // Regression for aspect-build/aspect-cli#1060: a single attempt with
-        // REMOTE_CACHE_EVICTED (exit code 39) and last_message=true, then
-        // basil exits without writing a retry attempt. axl-runtime's stream
-        // sets `expecting_retry = true` on the evicted BuildFinished and
-        // would otherwise loop swallowing BrokenPipe forever. The fix in
-        // crates/axl-runtime/src/engine/bazel/stream/build_event.rs falls
-        // through to a graceful close once it observes the writer pid is
-        // dead, so this scenario must terminate the AXL build promptly.
-        "cache_evicted_no_retry" => Scenario {
-            open_delay: Duration::ZERO,
-            attempts: vec![vec![build_started(), build_finished(39, true)]],
-            exit: ExitBehavior::Code(0),
-        },
-
-        // Reference scenario: REMOTE_CACHE_EVICTED followed by a successful
-        // retry. Two attempts, one open/write/close each. Matches Bazel's
-        // real reconnect-after-eviction shape and exercises the
-        // `expecting_retry` swallow-BrokenPipe-and-keep-reading path.
-        "cache_evicted_with_retry" => Scenario {
-            open_delay: Duration::ZERO,
-            attempts: vec![
-                vec![build_started(), build_finished(39, false)],
-                vec![build_started(), build_finished(0, true)],
-            ],
-            exit: ExitBehavior::Code(0),
-        },
-
-        // Like `success`, but basil exits with code 2 (a genuine Bazel
-        // build failure). Used by the fail_at_end-preserves-bazel-exit
-        // regression test: even when the sink reports terminal failure,
-        // wait() must surface code 2 rather than the synthetic 36.
-        "nonzero_exit" => Scenario {
-            open_delay: Duration::ZERO,
-            attempts: vec![vec![build_started(), build_finished(2, true)]],
-            exit: ExitBehavior::Code(2),
-        },
-
-        // Like `success`, but basil is killed by SIGKILL after the event
-        // sequence is flushed. The parent's `ExitStatus::code()` is
-        // `None`, which exercises the signal-kill path in `wait()`'s
-        // exit-code mapping — fail_at_end must not collapse `None` into
-        // the synthetic 36.
-        "signal_killed_sigkill" => Scenario {
-            open_delay: Duration::ZERO,
-            attempts: vec![vec![build_started(), build_finished(0, true)]],
-            // SIGKILL: signal 9 on every Unix. Hard-coded to avoid a
-            // libc dep for a single constant.
-            exit: ExitBehavior::Signal(9),
-        },
-
-        other => {
-            eprintln!("basil: unknown scenario {other:?}");
-            process::exit(2);
-        }
-    }
-}
-
-fn build_started() -> BuildEvent {
-    BuildEvent {
-        // `id` is required for AXL's `event.kind` accessor (renamed from
-        // `last_message` in axl-proto/build.rs) — it unwraps both
-        // BuildEvent.id and BuildEventId.id.
-        id: Some(BuildEventId {
-            id: Some(Id::Started(BuildStartedId {})),
-        }),
-        last_message: false,
-        payload: Some(Payload::Started(BuildStarted::default())),
-        ..Default::default()
-    }
-}
-
-fn build_finished(code: i32, last: bool) -> BuildEvent {
-    BuildEvent {
-        id: Some(BuildEventId {
-            id: Some(Id::BuildFinished(BuildFinishedId {})),
-        }),
-        last_message: last,
-        payload: Some(Payload::Finished(BuildFinished {
-            exit_code: Some(ExitCode {
-                code,
-                ..Default::default()
-            }),
-            ..Default::default()
-        })),
-        ..Default::default()
-    }
-}
diff --git a/docs/testing.md b/docs/testing.md
new file mode 100644
index 000000000..57dd59866
--- /dev/null
+++ b/docs/testing.md
@@ -0,0 +1,273 @@
+# AXL native testing — design sketch + POC
+
+Status: **proof-of-concept**. This document captures the design we converged
+on for giving AXL a first-class, pytest-style testing story built into the
+engine, describes what the POC in this branch actually implements, and logs
+the decisions made without explicit sign-off so they can be reviewed.
+
+## Goal
+
+A test author writes:
+
+```python
+# lib/ci_test.axl   — a *_test.axl file gets the augmented test surface
+load("./ci.axl", "detect_ci_host")
+
+def test_detect_ci_host_none_off_ci(t):
+    asserts.eq(detect_ci_host(t.ctx.std.env), None)
+
+def test_github_actions_precedence(t):
+    t.env.set("GITHUB_ACTIONS", "true")
+    t.env.set("BUILDKITE", "true")
+    asserts.eq(detect_ci_host(t.ctx.std.env)["marker"], "GITHUB_ACTIONS")
+```
+
+and runs `aspect test //...`. No per-test wiring in `config.axl`, no
+hand-maintained list in `pipeline.yaml`, no copied `_eq` helper, no
+`_snapshot_env`/`_restore_env` dance.
+
+## The converged design
+
+1. **`*_test.axl` files get a different globals surface.** The loader
+   evaluates files whose name ends in `_test.axl` against *base AXL + a
+   test-only vocabulary*. The extra names exist **only** in test files, so
+   test scaffolding can never leak into production `config.axl` / builtins.
+
+2. **Tests are functions, discovered by convention.** A test is a top-level
+   `def test_*(t)`. The runner enumerates a module's `test_*` callables — the
+   same shape as the existing task discovery (`FrozenTaskModuleLike::tasks()`
+   filters module names by value kind).
+
+3. **The harness `t` is bazel-free and purpose-built.** Each test gets a
+   handle `t` with:
+   - `t.env` — an in-memory environment overlay (set/get/remove/reset),
+   - `t.std` — the real `std` surface (fs, env, io, …),
+   - `t.ctx` — a **real `TaskContext`** (the same Rust type production uses),
+     wired over the mock backends.
+
+   `t` has **no bazel surface**. `TaskContext` is the bazel-flavored context;
+   we mint a narrower context for tests, exactly as the engine already mints
+   different contexts per phase (`ConfigContext`, `FeatureContext`).
+
+4. **Mocking is backend-swap, not type masquerade.** `t.ctx.std.env` is the
+   genuine `std.Env` type. It reads the in-memory overlay only because the
+   runner installs a `test_env` on `eval.extra` for the duration of the test.
+   The Starlark type and its method table are unchanged; only the backend they
+   consult differs. This keeps the mock's contract *identical* to reality
+   (enforced by the type system) instead of hand-maintaining a parallel
+   look-alike type — and it keeps every internal `downcast_ref::<RealType>()`
+   working, which a masquerading type would silently break.
+
+5. **Per-test isolation, pytest semantics, run in parallel.** Each test runs
+   with a fresh overlay. A failed assertion raises; the runner catches it
+   per-test, records the failure, and continues to the next test. Tests fan
+   out across `min(tests, cpus)` worker threads (overridable, `--jobs`-style),
+   each with its own Starlark heap — heaps are `!Send`, so workers re-evaluate
+   the side-effect-free module body locally rather than sharing one. Results
+   merge back into definition order so the report is deterministic. This is
+   sound **because** every per-test fixture lives on the test's own values
+   (the env overlay, and the bazel backend below), never in a process-global —
+   concurrent workers therefore share no mutable state.
+
+## What the POC implements (and how to run it)
+
+All in `crates/axl-runtime`:
+
+| Piece | Location |
+|---|---|
+| `asserts` namespace (`eq`, `ne`, `is_true`, `is_false`, `contains`, `not_contains`, `gt`, `ge`, `lt`, `le`, `fails`) | `src/engine/testing.rs` |
+| `Test` harness value (`t.env`, `t.std`, `t.ctx`) | `src/engine/testing.rs` |
+| `test_*` discovery + parallel (thread-per-shard) runner + summary | `src/engine/testing.rs` (`run_test_source`) |
+| In-memory env overlay handle (`TestEnvMap`) carried on the harness values | `src/engine/store.rs` (`TestEnvMap`); minted in `src/engine/testing.rs` |
+| `std.Env`/`Std`/`TaskContext` carry the overlay (`Option<TestEnvMap>`) on the value | `src/engine/std/env.rs`, `src/engine/std/mod.rs`, `src/engine/task_context.rs` |
+| `std.env` reads/writes the overlay carried on its value when present | `src/engine/std/env.rs` (`var`/`set_var`/`remove_var`/`vars`) |
+| Test-only globals surface | `src/eval/api.rs` (`get_test_globals`) |
+| Loader selects test globals for `*_test.axl` | `src/eval/load.rs` |
+
+Run the end-to-end proof:
+
+```sh
+cargo test -p axl-runtime testing::
+```
+
+`discovers_and_runs_test_functions` proves: test-only `asserts` parses only via
+the test surface; `test_*` discovery (and that `helper_*` is *not* run);
+`t.env` overlay observed through both `t.std.env` and a real `t.ctx`;
+isolation; and per-test failure capture. `overlay_does_not_leak_into_process`
+proves the overlay never mutates the real process environment.
+`runs_tests_in_parallel_shards` forces the multi-worker path (8 jobs over 17
+tests) and proves cross-test isolation holds concurrently and that outcomes
+still merge back into definition order.
+
+## Decisions — reviewed
+
+Items marked *Decided* were reviewed and settled during the design walkthrough;
+the rest were forced or chosen to keep the POC moving and remain cheap to
+change:
+
+1. **`asserts`, not `assert`.** `assert` is a **reserved keyword** in the
+   AXL/Starlark dialect and cannot be used as an identifier, so the namespace
+   `assert.eq(...)` won't parse. Per review, the plural `asserts` is used
+   (`asserts.eq`, `asserts.contains`, …) — it parses and reads almost exactly
+   like `assert.*`. Alternatives considered: a different namespace (`check`,
+   `expect`), or moving assertions onto the harness (`t.eq(...)`,
+   `t.assert_eq(...)`).
+
+2. **Assertions are a global namespace, not harness methods.** *Decided:* keep
+   `asserts.*` global. The alternative (`t.assert_eq`) is viable but the
+   test-globals swap is the mechanism that scopes *future* test-only builders
+   (`parametrize`, fixtures), so we keep it and free-standing `asserts.eq`
+   reads cleaner than threading `t.` through every check.
+
+3. **`contains` covers every container.** Backed by the `in` operator, so it
+   works for a substring of a string, an element of a list/tuple/set, or a key
+   of a dict; `not_contains` is its inverse. The ordering family (`gt`, `ge`,
+   `lt`, `le`) is backed by Starlark comparison, and `asserts.fails` takes an
+   optional `contains = "substr"` to assert a raise happened *for the right
+   reason* (matched against the bare error message, not the rendered
+   diagnostic).
+
+4. **`is_true`/`is_false`, not `true`/`false`.** Rust reserved words; also
+   clearer.
+
+5. **The runner re-evaluates the test source in a live module** rather than
+   reusing the loader's frozen module + cross-heap calls. Simpler and avoids
+   freeze/thaw; the loader globals-swap is still implemented and exercised for
+   the normal load path. A production runner should unify these.
+
+6. **`env` is an *overlay*, not a "backend"; bazel *is* a backend.** *Decided
+   vocabulary.* For env, the `std.Env` type and methods are identical — only
+   the map they read swaps — so "overlay" fits. For bazel, `Real` (spawns a
+   subprocess, live BES) and `Fake` (below) are genuinely different
+   implementations of one contract, so `BazelBackend::{Real, Fake}` fits.
+   *Decided mechanism:* both the overlay handle and the bazel backend are
+   **carried on the harness-constructed value**, not bolted onto the production
+   `Env` and fished out of `eval.extra`. *Done for env (roadmap item 1c):* the
+   overlay is a `TestEnvMap` (`Arc<Mutex<BTreeMap<…>>>`) carried as an
+   `Option` on the `std.Env` / `Std` / `TaskContext` values. The runner mints
+   the harness's `t.env`, `t.std`, and `t.ctx` from one shared handle, so all
+   three observe the same map; production mints these values with `None` and
+   `std.env` hits the real process env unchanged. `Env::test_env` /
+   `with_test_env` and the `from_eval` mock route are gone. (`Arc<Mutex>`, not
+   `Rc<RefCell>`, because the values that carry it must satisfy the `Send +
+   Sync` bound frozen Starlark values require; each overlay is still only
+   touched on its own worker thread, so the mutex is never contended.)
+
+7. **bazel `Fake` = a generic fake-bazel process driven by declared data.**
+   *Decided, superseding the earlier "canned `BuildEvent`s" sketch.* Canned
+   in-process events were rejected for two reasons: (a) they'd force us to
+   hand-reimplement *every* surface AXL consumes — BES (`BuildEventIter`),
+   execlog (`ExecLogSink`, incl. the zstd `CompactFile` format), stdout/stderr
+   streams, exit codes — and keep them mutually consistent; (b) baking named
+   scenarios into Rust (as `basil` does) means a test author can't express new
+   behavior without editing Rust, which breaks the no-Rust promise. Instead:
+   - a **single generic fake-bazel** *synthesizes* all surfaces from a declared,
+     typed AXL fixture (a `BazelExpectation` record: `targets` / `result` enum /
+     `exit_code`, with a raw `events=` escape hatch). Author writes intent; the
+     fake manufactures consistent BES + execlog + streams + exit so they can't
+     drift.
+   - **Control channel = an inherited `socketpair`** (parent → fake) carrying
+     the serialized fixture; bidirectional so timing/cancellation tests can
+     drive the fake mid-stream (the lifecycle fidelity canned events can't
+     reach). Unix-only — the control transport sits behind a small trait so a
+     Windows named-pipe/loopback impl is a drop-in later.
+   - **Output channels are the *real* bazel channels** the parent already wires
+     for real bazel (`--build_event_binary_file`/gRPC, `--execution_log_*`,
+     stdout/stderr), so the production `ctx.bazel.build` read path is exercised
+     unchanged. Every per-invocation resource (socketpair, BES path/port,
+     execlog path, temp dir) is **uniquely derived per spawn** — a hard
+     requirement under the parallel runner.
+   - **Process model = fork+exec (`current_exe`/`posix_spawn`), never bare
+     `fork()`.** We carry a tokio runtime + threads; a bare `fork()` is unsafe
+     (frozen locks, broken runtime). `exec` is what gives the "start fresh"
+     image; inherited FDs (the socketpair, BES file) give fork's only useful
+     property without its hazard.
+   - **Reuse `basil`, don't reinvent.** *Done.* `basil`'s replay/synthesis was
+     extracted into a `basil-core` lib; `basil` is now a thin argv/env
+     front-end that reads a `BazelExpectation` off the control fd and replays
+     via the lib. The named-scenario table (`--scenario=<name>`, the
+     `BAZEL_REAL` global, `BASIL_SERVER_PID`) is **gone** — all `ctx.bazel.build`
+     Rust tests (`engine::bazel::build::tests`) now drive the `Fake` backend
+     with a typed `BazelExpectation` (one mechanism, less to maintain). Shipped
+     `aspect test` reuses `basil-core` via a hidden self-exec subcommand. We do
+     **not** `include_bytes!` a standalone `basil` (≈2–3 MB stripped, mostly
+     `prost`/proto already linked into aspect-cli) — that duplicate is the
+     binary bloat to avoid.
+
+8. **Parallelism makes "state on the value, nothing global" a correctness
+   requirement, and turns three in-tree shortcuts into bugs to fix before the
+   bazel `Fake` lands:**
+   - ✅ `std::env::set_var("BAZEL_REAL", …)` (`test.rs`) was process-global →
+     removed. The `Fake` backend builds the `Command` with the fake path
+     directly (carried on the value); `crate::test`'s `.with_fake_bazel()` mints
+     `ctx.bazel` with a `Fake` backend via `MultiPhaseEval::with_bazel_backend`,
+     no global env var.
+   - the BES output path / gRPC port and execlog path must be per-invocation
+     unique (fixed paths/ports collide across concurrent builds).
+   - the spawn registry (`bazel/live.rs`, `static REG`) pools pids from *all*
+     concurrent tests → cancellation scope must be per-test, not the global
+     registry.
+   `t.stdout()` (roadmap) must likewise be a per-test buffer, never a
+   process-stdout redirect.
+
+9. **No `aspect test` CLI task yet.** The runner is exposed as a Rust function
+   and proven by Rust tests. The design calls for `aspect test` to be a
+   builtin **AXL task** (next to `axl_add.axl`) that calls a sandbox-run
+   primitive — wiring that touches `cmd.rs` + `MODULE.aspect` and is the next
+   step, deliberately out of this slice. The fake-bazel embedding (item 7's
+   self-exec subcommand) rides on this step.
+
+10. **No deny-by-default for unstubbed net/process.** That hermeticity property
+    (an unstubbed subprocess/HTTP call fails the test) is designed but not
+    built, since those backends aren't mocked yet.
+
+## Roadmap / open questions
+
+Build order (each independently shippable):
+
+1. ✅ `env` overlay + `asserts` + discovery + `t.ctx` (this POC).
+1b. ✅ **Parallel runner** — thread-per-shard, `min(tests, cpus)` workers,
+    deterministic merge (`run_test_source` / `run_test_source_with_jobs`).
+1c. ✅ Move the env overlay off the production `Env`/`eval.extra` and **onto the
+    `std.Env` value** (`Option<overlay>`); `t.env` and `t.ctx.std.env` share one
+    handle. Removed `Env::test_env` + the `from_eval` mock route. (Handle is
+    `Arc<Mutex<…>>` so the value satisfies the `Send + Sync` bound frozen
+    Starlark values require.)
+2. ✅ `bazel` → `BazelBackend::{Real, Fake}` on the `bazel.Bazel` value (carried
+   on the value, read via `read_backend`, not `eval.extra`). `Fake` fork+execs a
+   generic fake-bazel (`basil-core`, reused via the standalone `basil` binary
+   today) with a per-invocation `socketpair` control channel carrying a
+   length-delimited `BazelExpectation` fixture; the fake synthesizes a
+   consistent `BuildStarted` → `TargetComplete`* → `BuildFinished` BES stream +
+   exit code onto the real `--build_event_binary_file` the parent already wires,
+   so the production `ctx.bazel.build` read path is exercised unchanged. `Fake`
+   builds the `Command` straight from the fake path — no `BAZEL_REAL` global —
+   and derives the child pid as galvanize's `server_pid`. `t.bazel.expect_build(
+   *targets, result=, exit_code=)` declares the fixture. (See decisions 7/8.)
+   *Not yet synthesized from the typed fixture:* execlog + stdout/stderr (BES +
+   exit only); a raw `events=` escape hatch passes pre-framed `BuildEvent`s
+   through. Unix-only — the control transport sits behind a `ControlChannel`
+   trait so a Windows named-pipe impl is a drop-in.
+3. `io` backend → captured `t.stdout()` (per-test buffer, never process stdout).
+4. `fs` backend → `t.fs.tmpdir()` (tmpdir-rooted real fs by default).
+5. `process` / `net` backends → `t.process.stub(...)` / `t.http.stub(...)`,
+   **deny-by-default**.
+6. `aspect test` as an AXL runner task + the sandbox-run engine primitive;
+   extract `basil-core` and ship the fake-bazel via a hidden self-exec
+   subcommand (no embedded second binary — see decision 7).
+7. Snapshots: `t.snapshot(value, name=...)` + golden files + `--update`.
+8. Teach `axl-lsp` / `axl-docgen` about the `_test.axl` augmented surface.
+
+Open questions to settle before promoting past POC:
+- ✅ Namespace name — `asserts`, global (decisions 1, 2).
+- ✅ env "overlay" vs bazel "backend"; state carried on the value (decision 6).
+- ✅ bazel `Fake` shape — generic process + socketpair + synthesized surfaces,
+  reusing basil-core; not canned events, not Rust scenarios (decisions 7, 8).
+- ✅ `BazelExpectation` control-channel **wire format** — length-delimited
+  protobuf (the framing basil already uses for BES); the `events=` escape hatch
+  carries pre-framed `BuildEvent`s as opaque `bytes` passed through untouched.
+- Snapshot golden location: `__snapshots__/` dir vs inline-string snapshots.
+- Should the test surface also gate on *where* the runner loads from, so a
+  stray `_test.axl` evaluated outside the runner can't pick up test globals?
+- Fixtures: single-`t` + helpers + `t.defer` for v1, or signature-injected
+  named fixtures + `parametrize` later?