Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions .github/workflows/explorer-e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
name: Explorer e2e smoke gate

# Browser-level regression gate for explorer.html (#249 PR 1 — the safety
# net that must exist BEFORE any refactor of explorer.qmd lands).
#
# What it does: renders explorer.qmd with Quarto, serves the rendered
# docs/ with the repo's range-capable static server, and runs the
# Playwright smoke set (tests/playwright/explorer-smoke.spec.js) against
# it in headless Chromium.
#
# What it does NOT do: assert on data correctness. The explorer streams
# large parquet files from data.isamples.org; the smoke set only asserts
# structural liveness (page boots, Cesium canvas, facet sidebar, search
# box) so a slow or flaky data load cannot fail the gate. The deeper
# data-dependent specs in tests/playwright/ can be run manually via
# workflow_dispatch with a spec filter.

on:
pull_request:
paths:
- "explorer.qmd"
- "_quarto.yml"
- "styles.css"
- "assets/**"
- "workers/**"
- "tests/playwright/**"
- "playwright.config.js"
- "package.json"
- "dev_server.py"
- ".github/workflows/explorer-e2e.yml"
push:
branches: [main]
paths:
- "explorer.qmd"
- "_quarto.yml"
- "styles.css"
- "assets/**"
- "workers/**"
- "tests/playwright/**"
- "playwright.config.js"
- "package.json"
- "dev_server.py"
- ".github/workflows/explorer-e2e.yml"
workflow_dispatch:
inputs:
spec_filter:
description: >-
Playwright test file filter. Default runs only the smoke set;
pass e.g. "explorer-map-overlay" or "" (empty for all specs)
to run the data-dependent suite manually.
required: false
default: "explorer-smoke"

concurrency:
group: explorer-e2e-${{ github.ref }}
cancel-in-progress: true

permissions:
contents: read

env:
# Pinned: the version this gate was developed and verified against.
# The deploy workflow installs latest; pinning here keeps the gate's
# failures attributable to the PR, not a Quarto release.
QUARTO_VERSION: "1.6.42"

jobs:
smoke:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v4

- name: Cache Quarto installer
uses: actions/cache@v4
with:
path: ~/quarto.deb
key: quarto-deb-${{ env.QUARTO_VERSION }}

- name: Install Quarto
run: |
if [ ! -f ~/quarto.deb ]; then
curl -sSL -o ~/quarto.deb \
"https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-amd64.deb"
fi
sudo dpkg -i ~/quarto.deb
quarto --version

- uses: actions/setup-node@v4
with:
node-version: 22

# package-lock.json is gitignored in this repo, so `npm ci` and
# setup-node's built-in npm cache are unavailable; cache ~/.npm
# keyed on package.json instead.
- name: Cache npm
uses: actions/cache@v4
with:
path: ~/.npm
key: npm-${{ runner.os }}-${{ hashFiles('package.json') }}

- name: Install node deps
run: npm install

# Fast pure-logic gate (#249 PR3): unit-tests the ES modules extracted
# from explorer.qmd (assets/js/sql-builders.js + explorer-utils.js).
# Node built-ins only, sub-second — runs before the slow Quarto/
# Playwright steps so a module regression fails fast.
- name: Unit tests (extracted modules)
run: node --test tests/unit/*.test.mjs

- name: Resolve Playwright version
id: pw
run: echo "version=$(node -e 'console.log(require("@playwright/test/package.json").version)')" >> "$GITHUB_OUTPUT"

- name: Cache Playwright browsers
uses: actions/cache@v4
with:
path: ~/.cache/ms-playwright
key: playwright-${{ runner.os }}-${{ steps.pw.outputs.version }}

- name: Install Chromium
run: npx playwright install --with-deps chromium

# Single-page render: explorer.qmd is OJS/HTML only, so this needs
# neither the Python deps nor generate_vocab_docs.sh that the full
# site render (quarto-pages.yml) requires. ~10s, produces
# docs/explorer.html + site_libs + assets.
- name: Render explorer page
run: quarto render explorer.qmd

# dev_server.py is the repo's range-capable (206) static server —
# same serving semantics the explorer verify loop uses locally.
- name: Serve rendered site
run: |
python3 dev_server.py --dir docs --port 5860 > devserver.log 2>&1 &
for i in $(seq 1 30); do
curl -sf http://localhost:5860/explorer.html > /dev/null && break
sleep 1
done
curl -sf -o /dev/null http://localhost:5860/explorer.html

# PR/push runs are pinned to the smoke set. Only workflow_dispatch
# honors spec_filter, and it is passed as ONE quoted argument so the
# input can never smuggle extra Playwright flags (e.g.
# --pass-with-no-tests, which would green-light a zero-test run).
# Empty spec_filter on dispatch = run ALL specs.
- name: Run Playwright specs
env:
SPEC_FILTER: ${{ github.event.inputs.spec_filter }}
run: |
if [ "${{ github.event_name }}" != "workflow_dispatch" ]; then
npx playwright test explorer-smoke --reporter=list
elif [ -n "$SPEC_FILTER" ]; then
npx playwright test "$SPEC_FILTER" --reporter=list
else
npx playwright test --reporter=list
fi

- name: Upload Playwright artifacts
if: failure()
uses: actions/upload-artifact@v4
with:
name: playwright-artifacts
path: |
tests/playwright-report/
test-results/
devserver.log
retention-days: 7
if-no-files-found: ignore
104 changes: 104 additions & 0 deletions REFACTOR_PLAN_249.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Draft comment for isamplesorg/isamplesorg.github.io#249

> Paste-ready draft for the refactor-window plan. Edit freely — written
> 2026-06-12 alongside the PR that wires the e2e gate (PR 1 below).

---

## Refactor window: concrete PR sequence

Following up on the 2026-06-05 update above: here is the staged plan for
the refactor window, structured so every stage lands behind a green
browser-level gate and no stage requires a big-bang diff. The method is
still Option C (strangler / extract-along-seams); what's new is the
explicit sequencing and the regression gate that goes in *first*.

### PR 1 — CI safety net (this is the only infrastructure PR)

Wire the existing Playwright browser specs into CI before anything in
`explorer.qmd` moves:

- New workflow `explorer-e2e.yml`: renders `explorer.qmd` with Quarto,
serves `docs/` with the repo's range-capable `dev_server.py`, runs a
headless-Chromium **smoke set** on every PR that touches the explorer
(page boots with no uncaught/OJS-cell errors, Cesium canvas draws at
non-zero size, facet sidebar renders, search box present).
- The smoke set deliberately does **not** wait on parquet loads from
data.isamples.org, so slow data can't flake the gate. The deeper
data-dependent specs (facet viewport, URL round-trip, heatmap, search
counts) stay runnable on demand via `workflow_dispatch` with a spec
filter, and locally per `tests/README.md`.
- This complements (doesn't replace) the pre-deploy DuckDB-liveness gate
already in `quarto-pages.yml` / `tests/test_smoke.py`: that one guards
*deploys to production*; this one guards *PRs*, which is where refactor
regressions need to be caught.

**Gate rule for every PR below: the smoke set must stay green, and any
stage that touches a seam covered by a deeper spec runs that spec
manually before merge.**

### PR 2 — characterization tests around the seams we're about to cut

Cheap, compounding (step 1 of the 2026-06-05 plan): turn the recent
symptom reports into specs *before* moving code — #260 (detail-card
material/category), #265 (facet label provenance), #267 (facet selection
drives the map; partially latched already), back/forward + deep-link
round-trip (#239's divergence case). #266 is already covered by the
updated `explorer-map-overlay.spec.js` assertion. These are
characterization tests: they pin current *intended* behavior, not new
features.

### PR 3 — extract pure logic into `assets/js/` ES modules (lowest risk)

The step the original analysis sized at ~500 lines, near-zero risk: SQL
builders (`facetFilterSQL`, `sourceFilterSQL`, `textSearchWhere`/`Score`),
hash codec (`readHash`/`buildHash`), bbox math (`paddedViewportBounds`/
`viewerBboxSQL`), card renderers, escapers. The explorer already imports
`assets/js/source-palette.js` at runtime, so the mechanism is proven.
Pure functions become unit-testable outside the browser; this PR also
directly answers #268's "where does the SQL live?" — the answer becomes a
file path instead of a line range in a 5.4k-line qmd. Includes the
parquet-URL registry / plain-English query doc from the 2026-06-05 plan.

### PR 4 — URL/state codecs + single-writer boundary (#208)

Extract the state codecs (search params, hash state, selection, heatmap,
source/facet selections — the `EXPLORER_STATE.md` inventory), then land
#208's two fixes on top of the now-isolated codec: collapse the dual
`mode` state to `viewer._globeState.mode` as single source of truth, and
funnel the ~8 URL writers through `writeGlobeHash` / `setExplorerMode` /
`reconcileCameraState`. This is the stage that de-risks every future
"interactive state diverges from cold-reload state" bug (#239, #262).

### PR 5 — split controllers out of `zoomWatcher`, one seam at a time

The god-closure (~2,200 lines) gets carved along its already-namespaced
seams, heatmap first (`viewer._heatmap*` state is already hoisted —
the seam the original analysis rated highest-risk, so it goes last among
extractions but first among controllers since its state is cleanest),
then facet panel, samples table, search panel, map mode/rendering.
Each seam = one PR, each behind the gate plus the relevant deep spec.
#189's selection controller (`selectSample`/`selectCluster`/
`clearSelection`) joins this stage **only if** its YAGNI trigger has
fired by then — per that issue, it stays filed until a selection feature
or recurrence forces it.

### Explicitly deferred

- The "should this become a Vite/TS app embedded in Quarto?" question
(open question 1 in the issue body) — decide *after* PR 5, when the
module boundaries make the cost visible. Not a prerequisite for any
stage above.
- #234's filter-semantics direction (A1+B1+C3/C2) is *feature* work, not
refactor work — but PR 3/PR 4 are sequenced so that when #234
implementation starts, search/facet predicates and URL state are
already modules it can build on instead of more closure growth.
- No UI redesign, no data-substrate changes, no Quarto replacement
(non-goals from the 2026-06-05 comment stand).

### Coordination

While PRs 3–5 are open, feature work in `explorer.qmd` freezes only on
the seam being moved (answer to open question 2: per-seam freeze, not a
page-wide freeze). `EXPLORER_STATE.md` gains a module-boundary section as
each extraction lands (open question 3: yes, same doc).
2 changes: 2 additions & 0 deletions _quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ project:
output-dir: docs
resources:
- assets/js/source-palette.js
- assets/js/sql-builders.js
- assets/js/explorer-utils.js

website:
title: "iSamples"
Expand Down
66 changes: 66 additions & 0 deletions assets/js/explorer-utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Pure helpers extracted from explorer.qmd (issue #249, PR3).
// No closure over `viewer`/`db`/DOM — safe to unit-test under Node and to
// import into the Interactive Explorer's OJS runtime (see explorer.qmd).

// HTML-escape a value for safe interpolation into innerHTML.
export function escapeHtml(value) {
return String(value ?? '')
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}

// Split a free-text query into whitespace-delimited terms (no empties).
export function searchTerms(value) {
return String(value || '').trim().split(/\s+/).filter(Boolean);
}

// Parse a numeric URL param with a default and optional clamping.
export function parseNum(val, def, min, max) {
if (val == null) return def;
const n = parseFloat(val);
if (!Number.isFinite(n)) return def;
if (min != null && n < min) return min;
if (max != null && n > max) return max;
return n;
}

// Read a comma-separated URLSearchParams value into an array of trimmed,
// non-empty strings. Returns null when the key is absent, [] when present
// but empty.
export function csvParamValues(params, key) {
if (!params.has(key)) return null;
const raw = params.get(key) || '';
if (raw.trim() === '') return [];
return raw.split(',').map(s => s.trim()).filter(Boolean);
}

// Resolve a pid to its canonical resolver URL. All iSamples sources resolve
// via n2t.net: ARK pids (OpenContext, GEOME, Smithsonian) and IGSN pids
// (SESAR) alike.
export function sourceUrl(pid) {
if (!pid) return null;
return `https://n2t.net/${pid}`;
}

// Decode the explorer globe state from a URL hash fragment.
// `hashStr` defaults to `location.hash` for in-browser callers (every current
// call site is zero-arg); tests pass an explicit string so `location` is never
// referenced. Numeric fields are clamped to valid geographic / altitude ranges.
export function readHash(hashStr = location.hash) {
const params = new URLSearchParams(hashStr.slice(1));
return {
v: parseInt(params.get('v')) || 0,
lat: parseNum(params.get('lat'), null, -90, 90),
lng: parseNum(params.get('lng'), null, -180, 180),
alt: parseNum(params.get('alt'), null, 100, 40000000),
heading: parseNum(params.get('heading'), 0, 0, 360),
pitch: parseNum(params.get('pitch'), -90, -90, 0),
mode: params.get('mode') || null,
pid: params.get('pid') || null,
h3: params.get('h3') || null,
heatmap: params.get('heatmap') === '1',
};
}
41 changes: 41 additions & 0 deletions assets/js/sql-builders.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Pure SQL-string builders extracted from explorer.qmd (issue #249, PR3).
// No closure over `viewer`/`db`/DOM — safe to unit-test under Node and to
// import into the Interactive Explorer's OJS runtime (see explorer.qmd).
//
// Internal dependency chain: textSearch* -> escapeIlikePattern -> escSql.
// Each function references the module-local sibling directly, so importing
// these into OJS does NOT create reactive edges between the bound cells.

// Double single-quotes for safe interpolation into a SQL string literal.
export function escSql(value) {
return String(value).replace(/'/g, "''");
}

// Escape a value for use inside an ILIKE '%...%' pattern with ESCAPE '\'.
// First SQL-escapes single quotes (escSql), then backslash-escapes the LIKE
// metacharacters \ % _ so they match literally.
export function escapeIlikePattern(value) {
return escSql(value).replace(/[\\%_]/g, "\\$&");
}

// Build a WHERE fragment: every term must match at least one column
// (terms AND-ed, columns OR-ed within a term).
export function textSearchWhere(terms, columns) {
return terms.map(raw => {
const term = escapeIlikePattern(raw);
const checks = columns.map(col => `${col} ILIKE '%${term}%' ESCAPE '\\'`);
return `(${checks.join(' OR ')})`;
}).join(' AND ');
}

// Build a relevance-score expression: sum of per-term, per-weighted-column
// CASE contributions. Returns '0' when there are no terms.
export function textSearchScore(terms, weightedColumns) {
if (!terms.length) return '0';
return terms.map(raw => {
const term = escapeIlikePattern(raw);
return weightedColumns.map(({ col, weight }) =>
`CASE WHEN ${col} ILIKE '%${term}%' ESCAPE '\\' THEN ${weight} ELSE 0 END`
).join(' + ');
}).map(score => `(${score})`).join(' + ');
}
Loading
Loading