From ba352ae79439261d9cc1ab8892f58b6ffd3d1272 Mon Sep 17 00:00:00 2001 From: dev-ecd-dm Date: Wed, 3 Jun 2026 11:03:29 -0300 Subject: [PATCH] fix(detector): calibrate WEAK_CIPHER, unique IDs, downgrade non-source findings User feedback: the internal detector "shouted fire because it saw the word match in the dictionary." On a real project it flagged args.includes('--no-update-check') and ids.includes('postgres') as CRITICAL Weak Cipher, because the WEAK_CIPHER regex matched the "des" substring in inclu-des / exclu-des / mo-des (the bare (?:DES|RC4|Blowfish|ECB)\b alternative: case-insensitive, no leading boundary). 150 of 192 detector findings on csreview's own code were this FP, cratering the score to 0/100. - WEAK_CIPHER is now context-bound: only real crypto APIs (createCipheriv, Cipher.getInstance, CryptoJS.*, .new(), MODE_ECB, algorithm/cipher config) with proper word boundaries. Never a bare cipher-name substring. - Deterministic, globally-unique finding IDs (the old ${id}_${index} scheme collided across files, e.g. WEAK_CIPHER_0 in every file). - Findings in non-source paths (tests/fixtures/examples/docs) are downgraded to LOW + confidence LOW so intentional test vectors and example keys do not crater the score; they stay visible rather than hidden. Effect on csreview's own tree: 192 -> 51 detector findings, 0 duplicate IDs, the .includes() false positives gone. Recall preserved (real weak-cipher usage still flagged). Remaining src CRITICAL/HIGH are csreview matching its own rule definitions in detector.js (only when scanning csreview itself). Tests: +4 calibration + 3 negative-corpus guards. 175/175 - lint clean - typecheck 0. Co-Authored-By: Claude Opus 4.8 (1M context) --- csreview/src/detector.js | 29 +++++++- csreview/test/detector-calibration.test.js | 86 ++++++++++++++++++++++ csreview/test/detector-precision.test.js | 20 +++++ 3 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 csreview/test/detector-calibration.test.js diff --git a/csreview/src/detector.js b/csreview/src/detector.js index c03778e..473c11d 100644 --- a/csreview/src/detector.js +++ b/csreview/src/detector.js @@ -692,8 +692,12 @@ const VULNERABILITY_PATTERNS = [ category: 'Cryptography', name: 'Weak Cipher Algorithm', description: 'DES, RC4, Blowfish, or ECB mode.', + // Context-bound: only real crypto APIs (createCipheriv, Cipher.getInstance, + // CryptoJS.*, .new(), MODE_ECB, algorithm/cipher config). Never a bare + // "des"/"rc4"/"ecb" substring — the old `(?:DES|RC4|Blowfish|ECB)\b` (case- + // insensitive, no leading boundary) matched inclu*des*/exclu*des*/mo*des*. regex: - /(?:createCipheriv?\s*\(\s*['"](?:des|rc4|bf|blowfish|ecb)|(?:DES|RC4|Blowfish|ECB)\b|(?:Cipher|cipher)\s*\(\s*['"](?:des|rc4|bf))/gi, + /(?:createCipher(?:iv)?\s*\(\s*['"`](?:des|3des|rc4|rc2|bf|blowfish)\b|createCipheriv\s*\(\s*['"`][a-z0-9-]*-ecb\b|Cipher\.getInstance\s*\(\s*['"`][^'"`]*(?:\bDES|\bRC4|\bRC2|\bBlowfish|\bECB)|CryptoJS\.(?:DES|TripleDES|RC4|RC2|Blowfish)\b|\b(?:DES3?|ARC4|RC4|RC2|Blowfish)\.new\s*\(|\bMODE_ECB\b|(?:algorithm|cipher)\s*[:=]\s*['"`](?:des|3des|rc4|rc2|bf|blowfish)\b)/gi, cwe: 'CWE-327', owasp: 'A02:2021-Cryptographic Failures', fix: 'Use AES-256-GCM or ChaCha20-Poly1305.', @@ -1660,6 +1664,29 @@ export function detectVulnerabilities(projectInfo) { allFindings.push(...vulnFindings); } + // Post-process the merged set: + // 1) Deterministic, globally-unique IDs. The per-file `${id}_${index}` scheme + // collided across files (WEAK_CIPHER_0 appeared in every file). + // 2) Downgrade findings in non-source paths (tests/fixtures/examples/docs): + // intentional test vectors and example keys should not crater the score. + // They stay visible at LOW rather than being hidden entirely. + const idCounts = new Map(); + for (const finding of allFindings) { + const base = String(finding.id).replace(/_\d+$/, ''); + const slug = String(finding.file || 'unknown') + .replace(/[^a-zA-Z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); + const key = `${base}_${slug}_${finding.line}`; + const seen = idCounts.get(key) || 0; + idCounts.set(key, seen + 1); + finding.id = seen > 0 ? `${key}_${seen}` : key; + + if (finding.severity !== 'INFO' && isNonSourcePath(finding.file)) { + finding.severity = 'LOW'; + finding.confidence = 'LOW'; + } + } + allFindings.sort((a, b) => { const sa = SEVERITY_ORDER[a.severity] ?? 99; const sb = SEVERITY_ORDER[b.severity] ?? 99; diff --git a/csreview/test/detector-calibration.test.js b/csreview/test/detector-calibration.test.js new file mode 100644 index 0000000..1556f94 --- /dev/null +++ b/csreview/test/detector-calibration.test.js @@ -0,0 +1,86 @@ +// @ts-check +import test from 'node:test'; +import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { detectVulnerabilities } from '../src/detector.js'; + +// Calibration guards driven by real user feedback: the internal detector was +// "shouting fire because it saw the word match in the dictionary" — WEAK_CIPHER +// matched the "des" substring inside includes/excludes/modes, IDs collided +// (WEAK_CIPHER_0 repeated), and test fixtures cratered the score. + +function scan(files) { + const root = fs.mkdtempSync(path.join(os.tmpdir(), 'csreview-calib-')); + for (const f of files) { + const abs = path.join(root, f.path); + fs.mkdirSync(path.dirname(abs), { recursive: true }); + fs.writeFileSync(abs, f.code, 'utf8'); + } + return detectVulnerabilities({ + root, + files: files.map((f) => ({ path: f.path, language: f.language || 'javascript', kind: f.kind || 'source' })), + }); +} + +test('WEAK_CIPHER does not fire on the "des" substring in includes/excludes/modes', () => { + const findings = scan([ + { path: 'src/a.js', code: "export const f = (args) => args.includes('--no-update-check');\n" }, + { path: 'src/b.js', code: "export const ok = (ids) => ids.includes('postgres');\n" }, + { path: 'src/c.js', code: 'export const modes = (x) => x.excludes && x.decodes;\n' }, + { path: 'src/d.js', code: 'export const nodes = ["a"];\nexport const provides = true;\n' }, + ]); + const wc = findings.filter((f) => String(f.id).startsWith('WEAK_CIPHER')); + assert.equal(wc.length, 0, `unexpected WEAK_CIPHER FPs: ${wc.map((f) => `${f.file}:${f.line}`).join(', ')}`); +}); + +test('WEAK_CIPHER still detects real weak-cipher usage (recall preserved)', () => { + const findings = scan([ + { + path: 'src/n.js', + code: 'import crypto from "crypto";\nexport const e = (k, iv) => crypto.createCipheriv("des-ede3-cbc", k, iv);\n', + }, + { + path: 'src/cjs.js', + code: 'import CryptoJS from "crypto-js";\nexport const e = (d) => CryptoJS.DES.encrypt(d, "key");\n', + }, + { + path: 'src/ecb.js', + code: 'import crypto from "crypto";\nexport const e = (k, iv) => crypto.createCipheriv("aes-128-ecb", k, iv);\n', + }, + { + path: 'src/J.java', + language: 'java', + code: 'class A { void f() throws Exception { Cipher.getInstance("DES"); } }\n', + }, + ]); + const wc = findings.filter((f) => f.cwe === 'CWE-327'); + assert.ok(wc.length >= 4, `expected >= 4 weak-cipher detections, got ${wc.length}`); +}); + +test('detector assigns globally-unique finding IDs across files (no WEAK_CIPHER_0 collision)', () => { + const findings = scan([ + { + path: 'src/a.js', + code: 'import crypto from "crypto";\nexport const e = (k, iv) => crypto.createCipheriv("rc4", k, iv);\n', + }, + { + path: 'src/b.js', + code: 'import crypto from "crypto";\nexport const e = (k, iv) => crypto.createCipheriv("rc4", k, iv);\n', + }, + ]); + const ids = findings.map((f) => f.id); + assert.equal(new Set(ids).size, ids.length, `duplicate IDs present: ${ids.join(', ')}`); +}); + +test('findings in non-source paths (test/fixtures) are downgraded; real source keeps its severity', () => { + const srcFindings = scan([{ path: 'src/x.js', code: 'export const key = "AKIAIOSFODNN7EXAMPLE";\n' }]); + const testFindings = scan([{ path: 'test/x.test.js', code: 'export const key = "AKIAIOSFODNN7EXAMPLE";\n' }]); + const inSrc = srcFindings.find((f) => String(f.id).startsWith('AWS_ACCESS_KEY')); + const inTest = testFindings.find((f) => String(f.id).startsWith('AWS_ACCESS_KEY')); + assert.ok(inSrc && inTest, 'AWS key detected in both src and test'); + assert.equal(inSrc.severity, 'CRITICAL', 'real source finding keeps full severity'); + assert.equal(inTest.severity, 'LOW', 'test/fixture finding is downgraded to LOW'); + assert.equal(inTest.confidence, 'LOW'); +}); diff --git a/csreview/test/detector-precision.test.js b/csreview/test/detector-precision.test.js index bab8b06..9d070a9 100644 --- a/csreview/test/detector-precision.test.js +++ b/csreview/test/detector-precision.test.js @@ -153,6 +153,26 @@ const NEGATIVES = [ code: 'import crypto from "crypto";\nexport const token = () => crypto.randomBytes(32).toString("hex");\n', }, { name: 'plain', file: 'src/n10.js', language: 'javascript', code: 'export const x = (a, b) => compute(a, b);\n' }, + // WEAK_CIPHER must not fire on the "des" substring inside includes/excludes/modes + // (the reported false positives: args.includes('--no-update-check'), ids.includes('postgres')). + { + name: 'includes-not-cipher', + file: 'src/n11.js', + language: 'javascript', + code: "export const f = (args) => args.includes('--no-update-check');\n", + }, + { + name: 'includes-postgres', + file: 'src/n12.js', + language: 'javascript', + code: "export const ok = (ids) => ids.includes('postgres');\n", + }, + { + name: 'excludes-modes', + file: 'src/n13.js', + language: 'javascript', + code: 'export const modes = ["a"];\nexport const g = (x) => x.excludes;\n', + }, ]; function runOne(sample) {