diff --git a/csreview/src/reports/html.js b/csreview/src/reports/html.js index 65f5666..dffaad4 100644 --- a/csreview/src/reports/html.js +++ b/csreview/src/reports/html.js @@ -1,6 +1,7 @@ // @ts-check import fs from 'fs'; import { calculateSecurityScore } from '../score.js'; +import { originBreakdown } from './summary.js'; const SEVERITY_COLORS = { CRITICAL: '#dc2626', @@ -316,6 +317,12 @@ export function generateHtmlReport(projectInfo, findings, outputPath, metadata = ? `OSV-Scanner ${escapeHtml(osvScanner.version || '')} (${osvScanner.rawCount || osvScanner.findings?.length || 0} findings)` : `OSV-Scanner unavailable${osvScanner.error ? `: ${escapeHtml(osvScanner.error)}` : ''}. Install with winget install Google.OSVScanner, brew install osv-scanner, or go install github.com/google/osv-scanner/v2/cmd/osv-scanner@latest.`; + const origin = originBreakdown(findings); + const originText = + origin.rows.length > 0 + ? origin.rows.map((r) => `${escapeHtml(r.label)} ${r.count}`).join(' · ') + : 'none'; + const findingsHtml = findings .map((f) => { const color = getSeverityColor(f.severity); @@ -1456,6 +1463,7 @@ a:hover {
Semgrep: ${semgrepText}
Dependency scanners: ${packageAuditText} ${osvScannerText}
+Findings by origin (trust corroborated first): ${origin.confirmed} CONFIRMED (tool+detector) · ${originText}
CSReview remains read-only for audited source code and only writes report artifacts.
diff --git a/csreview/src/reports/markdown.js b/csreview/src/reports/markdown.js index f649bbe..7e0b994 100644 --- a/csreview/src/reports/markdown.js +++ b/csreview/src/reports/markdown.js @@ -2,6 +2,7 @@ import fs from 'fs'; import path from 'path'; import { calculateSecurityScore, SEVERITY_WEIGHTS } from '../score.js'; +import { originBreakdown } from './summary.js'; const SEVERITY_ORDER = { CRITICAL: 0, HIGH: 1, MEDIUM: 2, LOW: 3, INFO: 4 }; @@ -624,6 +625,9 @@ function buildScanMetadata(projectInfo, findings, startTime, metadata = {}) { const c = String(f.confidence || 'MEDIUM').toUpperCase(); if (confidenceBreakdown[c] !== undefined) confidenceBreakdown[c]++; } + const origin = originBreakdown(findings); + const originText = + origin.rows.length > 0 ? origin.rows.map((r) => `${escapeMdInline(r.label)} ${r.count}`).join(', ') : 'none'; const filesCount = projectInfo.files?.length || 0; const configCount = projectInfo.configFiles?.length || 0; @@ -635,6 +639,7 @@ function buildScanMetadata(projectInfo, findings, startTime, metadata = {}) { - **Config Files**: ${configCount} ${buildToolMetadata(metadata.toolResults)} - **Confidence Breakdown**: ${confidenceBreakdown.CONFIRMED} CONFIRMED, ${confidenceBreakdown['TOOL-ONLY']} TOOL-ONLY, ${confidenceBreakdown.HIGH} HIGH, ${confidenceBreakdown.MEDIUM} MEDIUM, ${confidenceBreakdown.LOW} LOW +- **Findings by origin** (trust corroborated first): ${origin.confirmed} CONFIRMED (tool+detector), ${originText} - **Duration**: ${duration}s`; } diff --git a/csreview/src/reports/summary.js b/csreview/src/reports/summary.js new file mode 100644 index 0000000..5f97ca3 --- /dev/null +++ b/csreview/src/reports/summary.js @@ -0,0 +1,64 @@ +// @ts-check +// Aggregate findings by their originating tool/source so a report makes the +// corroboration story explicit: a finding seen by BOTH a tool and the heuristic +// detector is CONFIRMED and should be trusted first; detector-only heuristics are +// lower confidence. This separates "what Semgrep/OSV/audit/Gitleaks found" from +// "what the internal detector guessed", which is exactly the triage signal users +// asked for. + +const SOURCE_LABELS = { + 'csreview-detector': 'CSReview detector (heuristic)', + semgrep: 'Semgrep', + 'osv-scanner': 'OSV-Scanner', + 'npm-audit': 'npm audit', + 'pnpm-audit': 'pnpm audit', + 'bun-audit': 'bun audit', + gitleaks: 'Gitleaks', + trivy: 'Trivy', + bandit: 'Bandit', + gosec: 'gosec', +}; + +const SUBAGENT_PREFIX = 'subagent:'; + +/** + * Human-friendly label for an internal source id. + * @param {string} source + * @returns {string} + */ +export function labelForSource(source) { + const s = String(source || 'csreview-detector'); + if (s.startsWith(SUBAGENT_PREFIX)) return `Subagent: ${s.slice(SUBAGENT_PREFIX.length)}`; + return SOURCE_LABELS[s] || s; +} + +/** + * Count findings per originating source. A finding corroborated by more than one + * source counts once under each; `confirmed` counts findings a tool and the + * detector (or any two sources) agree on. + * + * @param {Array<{source?: string, sources?: string[], confidence?: string}>} [findings] + * @returns {{confirmed: number, total: number, rows: Array<{source: string, label: string, count: number}>}} + */ +export function originBreakdown(findings = []) { + const list = Array.isArray(findings) ? findings : []; + const counts = new Map(); + let confirmed = 0; + for (const finding of list) { + const raw = + Array.isArray(finding?.sources) && finding.sources.length + ? finding.sources + : [finding?.source || 'csreview-detector']; + const unique = [...new Set(raw.map((s) => String(s)))]; + if (String(finding?.confidence).toUpperCase() === 'CONFIRMED' || unique.length > 1) { + confirmed += 1; + } + for (const source of unique) { + counts.set(source, (counts.get(source) || 0) + 1); + } + } + const rows = [...counts.entries()] + .map(([source, count]) => ({ source, label: labelForSource(source), count })) + .sort((a, b) => b.count - a.count || a.label.localeCompare(b.label)); + return { confirmed, total: list.length, rows }; +} diff --git a/csreview/test/reports.test.js b/csreview/test/reports.test.js index ff8e056..f5acda0 100644 --- a/csreview/test/reports.test.js +++ b/csreview/test/reports.test.js @@ -174,6 +174,48 @@ test('HTML report logs its generation and save (parity with Markdown/SARIF logs) assert.ok(fs.existsSync(out), 'HTML file written'); }); +test('Markdown report includes a findings-by-origin breakdown (trust corroborated first)', () => { + const out = tmpFile('origin_security-findings.md'); + const findings = [ + baseFinding({ id: 'A', sources: ['csreview-detector', 'gitleaks'], confidence: 'CONFIRMED' }), + baseFinding({ id: 'B', sources: ['semgrep'] }), + ]; + generateMarkdownReport({ name: 'demo', files: ['src/app.js'], configFiles: [] }, findings, out, {}); + const md = fs.readFileSync(out, 'utf8'); + assert.match(md, /Findings by origin/); + assert.match(md, /1 CONFIRMED \(tool\+detector\)/); + assert.match(md, /Gitleaks 1/); + assert.match(md, /Semgrep 1/); +}); + +test('HTML report includes a findings-by-origin breakdown', () => { + const out = tmpFile('origin_security-report.html'); + const findings = [ + baseFinding({ id: 'A', sources: ['csreview-detector', 'trivy'], confidence: 'CONFIRMED' }), + baseFinding({ id: 'B', sources: ['osv-scanner'] }), + ]; + generateHtmlReport( + { + name: 'demo', + files: ['src/app.js'], + configFiles: [], + depFiles: [], + baasFiles: [], + frameworks: [], + techStack: [], + projectType: 'unknown', + }, + findings, + out, + {}, + ); + const html = fs.readFileSync(out, 'utf8'); + assert.match(html, /Findings by origin/); + assert.match(html, /CONFIRMED \(tool\+detector\)/); + assert.match(html, /Trivy/); + assert.match(html, /OSV-Scanner/); +}); + test('Markdown does not allow link injection through a crafted CWE id (M1)', () => { const out = tmpFile('cwe_security-findings.md'); const finding = baseFinding({ id: 'CWEINJ', cwe: 'x)](http://evil.com) and [pwn](http://evil2.com' }); diff --git a/csreview/test/summary.test.js b/csreview/test/summary.test.js new file mode 100644 index 0000000..e4771b5 --- /dev/null +++ b/csreview/test/summary.test.js @@ -0,0 +1,37 @@ +// @ts-check +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { originBreakdown, labelForSource } from '../src/reports/summary.js'; + +test('labelForSource maps known sources and formats subagent domains', () => { + assert.equal(labelForSource('semgrep'), 'Semgrep'); + assert.equal(labelForSource('osv-scanner'), 'OSV-Scanner'); + assert.equal(labelForSource('npm-audit'), 'npm audit'); + assert.equal(labelForSource('csreview-detector'), 'CSReview detector (heuristic)'); + assert.equal(labelForSource('subagent:rust'), 'Subagent: rust'); + assert.equal(labelForSource('mystery-tool'), 'mystery-tool'); +}); + +test('originBreakdown counts findings per source and flags corroborated ones', () => { + const { confirmed, total, rows } = originBreakdown([ + { sources: ['csreview-detector', 'gitleaks'], confidence: 'CONFIRMED' }, + { sources: ['semgrep'] }, + { source: 'osv-scanner' }, + { sources: ['csreview-detector'] }, + {}, // no source -> defaults to the detector + ]); + assert.equal(total, 5); + assert.equal(confirmed, 1); + const map = Object.fromEntries(rows.map((r) => [r.source, r.count])); + assert.equal(map['gitleaks'], 1); + assert.equal(map['semgrep'], 1); + assert.equal(map['osv-scanner'], 1); + assert.equal(map['csreview-detector'], 3); // 2 explicit + 1 default +}); + +test('originBreakdown rows sort by count desc and it tolerates empty input', () => { + assert.deepEqual(originBreakdown([]), { confirmed: 0, total: 0, rows: [] }); + const { rows } = originBreakdown([{ source: 'semgrep' }, { source: 'semgrep' }, { source: 'gitleaks' }]); + assert.equal(rows[0].source, 'semgrep'); + assert.equal(rows[0].count, 2); +});