isamplesorg · rdhyee · Jun 15, 2026 · Jun 14, 2026 · Jun 14, 2026 · Jun 15, 2026
diff --git a/.github/workflows/explorer-e2e.yml b/.github/workflows/explorer-e2e.yml
@@ -0,0 +1,170 @@
+name: Explorer e2e smoke gate
+
+# Browser-level regression gate for explorer.html (#249 PR 1 — the safety
+# net that must exist BEFORE any refactor of explorer.qmd lands).
+#
+# What it does: renders explorer.qmd with Quarto, serves the rendered
+# docs/ with the repo's range-capable static server, and runs the
+# Playwright smoke set (tests/playwright/explorer-smoke.spec.js) against
+# it in headless Chromium.
+#
+# What it does NOT do: assert on data correctness. The explorer streams
+# large parquet files from data.isamples.org; the smoke set only asserts
+# structural liveness (page boots, Cesium canvas, facet sidebar, search
+# box) so a slow or flaky data load cannot fail the gate. The deeper
+# data-dependent specs in tests/playwright/ can be run manually via
+# workflow_dispatch with a spec filter.
+
+on:
+  pull_request:
+    paths:
+      - "explorer.qmd"
+      - "_quarto.yml"
+      - "styles.css"
+      - "assets/**"
+      - "workers/**"
+      - "tests/playwright/**"
+      - "playwright.config.js"
+      - "package.json"
+      - "dev_server.py"
+      - ".github/workflows/explorer-e2e.yml"
+  push:
+    branches: [main]
+    paths:
+      - "explorer.qmd"
+      - "_quarto.yml"
+      - "styles.css"
+      - "assets/**"
+      - "workers/**"
+      - "tests/playwright/**"
+      - "playwright.config.js"
+      - "package.json"
+      - "dev_server.py"
+      - ".github/workflows/explorer-e2e.yml"
+  workflow_dispatch:
+    inputs:
+      spec_filter:
+        description: >-
+          Playwright test file filter. Default runs only the smoke set;
+          pass e.g. "explorer-map-overlay" or "" (empty for all specs)
+          to run the data-dependent suite manually.
+        required: false
+        default: "explorer-smoke"
+
+concurrency:
+  group: explorer-e2e-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+env:
+  # Pinned: the version this gate was developed and verified against.
+  # The deploy workflow installs latest; pinning here keeps the gate's
+  # failures attributable to the PR, not a Quarto release.
+  QUARTO_VERSION: "1.6.42"
+
+jobs:
+  smoke:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Cache Quarto installer
+        uses: actions/cache@v4
+        with:
+          path: ~/quarto.deb
+          key: quarto-deb-${{ env.QUARTO_VERSION }}
+
+      - name: Install Quarto
+        run: |
+          if [ ! -f ~/quarto.deb ]; then
+            curl -sSL -o ~/quarto.deb \
+              "https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-amd64.deb"
+          fi
+          sudo dpkg -i ~/quarto.deb
+          quarto --version
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+
+      # package-lock.json is gitignored in this repo, so `npm ci` and
+      # setup-node's built-in npm cache are unavailable; cache ~/.npm
+      # keyed on package.json instead.
+      - name: Cache npm
+        uses: actions/cache@v4
+        with:
+          path: ~/.npm
+          key: npm-${{ runner.os }}-${{ hashFiles('package.json') }}
+
+      - name: Install node deps
+        run: npm install
+
+      # Fast pure-logic gate (#249 PR3): unit-tests the ES modules extracted
+      # from explorer.qmd (assets/js/sql-builders.js + explorer-utils.js).
+      # Node built-ins only, sub-second — runs before the slow Quarto/
+      # Playwright steps so a module regression fails fast.
+      - name: Unit tests (extracted modules)
+        run: node --test tests/unit/*.test.mjs
+
+      - name: Resolve Playwright version
+        id: pw
+        run: echo "version=$(node -e 'console.log(require("@playwright/test/package.json").version)')" >> "$GITHUB_OUTPUT"
+
+      - name: Cache Playwright browsers
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/ms-playwright
+          key: playwright-${{ runner.os }}-${{ steps.pw.outputs.version }}
+
+      - name: Install Chromium
+        run: npx playwright install --with-deps chromium
+
+      # Single-page render: explorer.qmd is OJS/HTML only, so this needs
+      # neither the Python deps nor generate_vocab_docs.sh that the full
+      # site render (quarto-pages.yml) requires. ~10s, produces
+      # docs/explorer.html + site_libs + assets.
+      - name: Render explorer page
+        run: quarto render explorer.qmd
+
+      # dev_server.py is the repo's range-capable (206) static server —
+      # same serving semantics the explorer verify loop uses locally.
+      - name: Serve rendered site
+        run: |
+          python3 dev_server.py --dir docs --port 5860 > devserver.log 2>&1 &
+          for i in $(seq 1 30); do
+            curl -sf http://localhost:5860/explorer.html > /dev/null && break
+            sleep 1
+          done
+          curl -sf -o /dev/null http://localhost:5860/explorer.html
+
+      # PR/push runs are pinned to the smoke set. Only workflow_dispatch
+      # honors spec_filter, and it is passed as ONE quoted argument so the
+      # input can never smuggle extra Playwright flags (e.g.
+      # --pass-with-no-tests, which would green-light a zero-test run).
+      # Empty spec_filter on dispatch = run ALL specs.
+      - name: Run Playwright specs
+        env:
+          SPEC_FILTER: ${{ github.event.inputs.spec_filter }}
+        run: |
+          if [ "${{ github.event_name }}" != "workflow_dispatch" ]; then
+            npx playwright test explorer-smoke --reporter=list
+          elif [ -n "$SPEC_FILTER" ]; then
+            npx playwright test "$SPEC_FILTER" --reporter=list
+          else
+            npx playwright test --reporter=list
+          fi
+
+      - name: Upload Playwright artifacts
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: playwright-artifacts
+          path: |
+            tests/playwright-report/
+            test-results/
+            devserver.log
+          retention-days: 7
+          if-no-files-found: ignore
diff --git a/REFACTOR_PLAN_249.md b/REFACTOR_PLAN_249.md
@@ -0,0 +1,104 @@
+# Draft comment for isamplesorg/isamplesorg.github.io#249
+
+> Paste-ready draft for the refactor-window plan. Edit freely — written
+> 2026-06-12 alongside the PR that wires the e2e gate (PR 1 below).
+
+---
+
+## Refactor window: concrete PR sequence
+
+Following up on the 2026-06-05 update above: here is the staged plan for
+the refactor window, structured so every stage lands behind a green
+browser-level gate and no stage requires a big-bang diff. The method is
+still Option C (strangler / extract-along-seams); what's new is the
+explicit sequencing and the regression gate that goes in *first*.
+
+### PR 1 — CI safety net (this is the only infrastructure PR)
+
+Wire the existing Playwright browser specs into CI before anything in
+`explorer.qmd` moves:
+
+- New workflow `explorer-e2e.yml`: renders `explorer.qmd` with Quarto,
+  serves `docs/` with the repo's range-capable `dev_server.py`, runs a
+  headless-Chromium **smoke set** on every PR that touches the explorer
+  (page boots with no uncaught/OJS-cell errors, Cesium canvas draws at
+  non-zero size, facet sidebar renders, search box present).
+- The smoke set deliberately does **not** wait on parquet loads from
+  data.isamples.org, so slow data can't flake the gate. The deeper
+  data-dependent specs (facet viewport, URL round-trip, heatmap, search
+  counts) stay runnable on demand via `workflow_dispatch` with a spec
+  filter, and locally per `tests/README.md`.
+- This complements (doesn't replace) the pre-deploy DuckDB-liveness gate
+  already in `quarto-pages.yml` / `tests/test_smoke.py`: that one guards
+  *deploys to production*; this one guards *PRs*, which is where refactor
+  regressions need to be caught.
+
+**Gate rule for every PR below: the smoke set must stay green, and any
+stage that touches a seam covered by a deeper spec runs that spec
+manually before merge.**
+
+### PR 2 — characterization tests around the seams we're about to cut
+
+Cheap, compounding (step 1 of the 2026-06-05 plan): turn the recent
+symptom reports into specs *before* moving code — #260 (detail-card
+material/category), #265 (facet label provenance), #267 (facet selection
+drives the map; partially latched already), back/forward + deep-link
+round-trip (#239's divergence case). #266 is already covered by the
+updated `explorer-map-overlay.spec.js` assertion. These are
+characterization tests: they pin current *intended* behavior, not new
+features.
+
+### PR 3 — extract pure logic into `assets/js/` ES modules (lowest risk)
+
+The step the original analysis sized at ~500 lines, near-zero risk: SQL
+builders (`facetFilterSQL`, `sourceFilterSQL`, `textSearchWhere`/`Score`),
+hash codec (`readHash`/`buildHash`), bbox math (`paddedViewportBounds`/
+`viewerBboxSQL`), card renderers, escapers. The explorer already imports
+`assets/js/source-palette.js` at runtime, so the mechanism is proven.
+Pure functions become unit-testable outside the browser; this PR also
+directly answers #268's "where does the SQL live?" — the answer becomes a
+file path instead of a line range in a 5.4k-line qmd. Includes the
+parquet-URL registry / plain-English query doc from the 2026-06-05 plan.
+
+### PR 4 — URL/state codecs + single-writer boundary (#208)
+
+Extract the state codecs (search params, hash state, selection, heatmap,
+source/facet selections — the `EXPLORER_STATE.md` inventory), then land
+#208's two fixes on top of the now-isolated codec: collapse the dual
+`mode` state to `viewer._globeState.mode` as single source of truth, and
+funnel the ~8 URL writers through `writeGlobeHash` / `setExplorerMode` /
+`reconcileCameraState`. This is the stage that de-risks every future
+"interactive state diverges from cold-reload state" bug (#239, #262).
+
+### PR 5 — split controllers out of `zoomWatcher`, one seam at a time
+
+The god-closure (~2,200 lines) gets carved along its already-namespaced
+seams, heatmap first (`viewer._heatmap*` state is already hoisted —
+the seam the original analysis rated highest-risk, so it goes last among
+extractions but first among controllers since its state is cleanest),
+then facet panel, samples table, search panel, map mode/rendering.
+Each seam = one PR, each behind the gate plus the relevant deep spec.
+#189's selection controller (`selectSample`/`selectCluster`/
+`clearSelection`) joins this stage **only if** its YAGNI trigger has
+fired by then — per that issue, it stays filed until a selection feature
+or recurrence forces it.
+
+### Explicitly deferred
+
+- The "should this become a Vite/TS app embedded in Quarto?" question
+  (open question 1 in the issue body) — decide *after* PR 5, when the
+  module boundaries make the cost visible. Not a prerequisite for any
+  stage above.
+- #234's filter-semantics direction (A1+B1+C3/C2) is *feature* work, not
+  refactor work — but PR 3/PR 4 are sequenced so that when #234
+  implementation starts, search/facet predicates and URL state are
+  already modules it can build on instead of more closure growth.
+- No UI redesign, no data-substrate changes, no Quarto replacement
+  (non-goals from the 2026-06-05 comment stand).
+
+### Coordination
+
+While PRs 3–5 are open, feature work in `explorer.qmd` freezes only on
+the seam being moved (answer to open question 2: per-seam freeze, not a
+page-wide freeze). `EXPLORER_STATE.md` gains a module-boundary section as
+each extraction lands (open question 3: yes, same doc).
diff --git a/_quarto.yml b/_quarto.yml
@@ -3,6 +3,8 @@ project:
   output-dir: docs
   resources:
     - assets/js/source-palette.js
+    - assets/js/sql-builders.js
+    - assets/js/explorer-utils.js
 
 website:
   title: "iSamples"

diff --git a/assets/js/explorer-utils.js b/assets/js/explorer-utils.js
@@ -0,0 +1,66 @@
+// Pure helpers extracted from explorer.qmd (issue #249, PR3).
+// No closure over `viewer`/`db`/DOM — safe to unit-test under Node and to
+// import into the Interactive Explorer's OJS runtime (see explorer.qmd).
+
+// HTML-escape a value for safe interpolation into innerHTML.
+export function escapeHtml(value) {
+    return String(value ?? '')
+        .replace(/&/g, '&amp;')
+        .replace(/</g, '&lt;')
+        .replace(/>/g, '&gt;')
+        .replace(/"/g, '&quot;')
+        .replace(/'/g, '&#39;');
+}
+
+// Split a free-text query into whitespace-delimited terms (no empties).
+export function searchTerms(value) {
+    return String(value || '').trim().split(/\s+/).filter(Boolean);
+}
+
+// Parse a numeric URL param with a default and optional clamping.
+export function parseNum(val, def, min, max) {
+    if (val == null) return def;
+    const n = parseFloat(val);
+    if (!Number.isFinite(n)) return def;
+    if (min != null && n < min) return min;
+    if (max != null && n > max) return max;
+    return n;
+}
+
+// Read a comma-separated URLSearchParams value into an array of trimmed,
+// non-empty strings. Returns null when the key is absent, [] when present
+// but empty.
+export function csvParamValues(params, key) {
+    if (!params.has(key)) return null;
+    const raw = params.get(key) || '';
+    if (raw.trim() === '') return [];
+    return raw.split(',').map(s => s.trim()).filter(Boolean);
+}
+
+// Resolve a pid to its canonical resolver URL. All iSamples sources resolve
+// via n2t.net: ARK pids (OpenContext, GEOME, Smithsonian) and IGSN pids
+// (SESAR) alike.
+export function sourceUrl(pid) {
+    if (!pid) return null;
+    return `https://n2t.net/${pid}`;
+}
+
+// Decode the explorer globe state from a URL hash fragment.
+// `hashStr` defaults to `location.hash` for in-browser callers (every current
+// call site is zero-arg); tests pass an explicit string so `location` is never
+// referenced. Numeric fields are clamped to valid geographic / altitude ranges.
+export function readHash(hashStr = location.hash) {
+    const params = new URLSearchParams(hashStr.slice(1));
+    return {
+        v: parseInt(params.get('v')) || 0,
+        lat: parseNum(params.get('lat'), null, -90, 90),
+        lng: parseNum(params.get('lng'), null, -180, 180),
+        alt: parseNum(params.get('alt'), null, 100, 40000000),
+        heading: parseNum(params.get('heading'), 0, 0, 360),
+        pitch: parseNum(params.get('pitch'), -90, -90, 0),
+        mode: params.get('mode') || null,
+        pid: params.get('pid') || null,
+        h3: params.get('h3') || null,
+        heatmap: params.get('heatmap') === '1',
+    };
+}
diff --git a/assets/js/sql-builders.js b/assets/js/sql-builders.js
@@ -0,0 +1,41 @@
+// Pure SQL-string builders extracted from explorer.qmd (issue #249, PR3).
+// No closure over `viewer`/`db`/DOM — safe to unit-test under Node and to
+// import into the Interactive Explorer's OJS runtime (see explorer.qmd).
+//
+// Internal dependency chain: textSearch* -> escapeIlikePattern -> escSql.
+// Each function references the module-local sibling directly, so importing
+// these into OJS does NOT create reactive edges between the bound cells.
+
+// Double single-quotes for safe interpolation into a SQL string literal.
+export function escSql(value) {
+    return String(value).replace(/'/g, "''");
+}
+
+// Escape a value for use inside an ILIKE '%...%' pattern with ESCAPE '\'.
+// First SQL-escapes single quotes (escSql), then backslash-escapes the LIKE
+// metacharacters \ % _ so they match literally.
+export function escapeIlikePattern(value) {
+    return escSql(value).replace(/[\\%_]/g, "\\$&");
+}
+
+// Build a WHERE fragment: every term must match at least one column
+// (terms AND-ed, columns OR-ed within a term).
+export function textSearchWhere(terms, columns) {
+    return terms.map(raw => {
+        const term = escapeIlikePattern(raw);
+        const checks = columns.map(col => `${col} ILIKE '%${term}%' ESCAPE '\\'`);
+        return `(${checks.join(' OR ')})`;
+    }).join(' AND ');
+}
+
+// Build a relevance-score expression: sum of per-term, per-weighted-column
+// CASE contributions. Returns '0' when there are no terms.
+export function textSearchScore(terms, weightedColumns) {
+    if (!terms.length) return '0';
+    return terms.map(raw => {
+        const term = escapeIlikePattern(raw);
+        return weightedColumns.map(({ col, weight }) =>
+            `CASE WHEN ${col} ILIKE '%${term}%' ESCAPE '\\' THEN ${weight} ELSE 0 END`
+        ).join(' + ');
+    }).map(score => `(${score})`).join(' + ');
+}