From 38ed9184dca4c7beac262feaa94fc464e4968b60 Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Thu, 4 Jun 2026 22:02:42 -0300 Subject: [PATCH 1/8] feat(fonts): parse embedding policy from the font OS/2 table Reads a font's OS/2 table to decide whether a DOCX-embedded font may be registered for rendering (fsType licensing) and which weight/style FACE it is (usWeightClass + fsSelection italic bit), so embedded fonts can become first-class registry faces with the correct FaceKey instead of filename inference. Restricted-License fonts (fsType bit 1) are non-embeddable; a parse failure returns null so callers skip conservatively. Foundation for registering embedded fonts as providers, stacked on the registered_face ladder. --- shared/font-system/src/index.ts | 3 ++ shared/font-system/src/os2.test.ts | 87 ++++++++++++++++++++++++++++++ shared/font-system/src/os2.ts | 86 +++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 shared/font-system/src/os2.test.ts create mode 100644 shared/font-system/src/os2.ts diff --git a/shared/font-system/src/index.ts b/shared/font-system/src/index.ts index b6f620966b..0720addf4d 100644 --- a/shared/font-system/src/index.ts +++ b/shared/font-system/src/index.ts @@ -56,6 +56,9 @@ export { setBundledFontAssetBase, getBundledFontAssetBase, DEFAULT_BUNDLED_FONT_ export type { FontResolutionRecord, UsedFace } from './report'; export { buildFontReport, buildFaceReport } from './report'; +export type { EmbeddingPolicy } from './os2'; +export { parseEmbeddingPolicy } from './os2'; + export type { FontSetLike, FontFaceLike, FontFaceCtor, FontRegistryOptions } from './registry'; export { FontRegistry, diff --git a/shared/font-system/src/os2.test.ts b/shared/font-system/src/os2.test.ts new file mode 100644 index 0000000000..dd97cfee01 --- /dev/null +++ b/shared/font-system/src/os2.test.ts @@ -0,0 +1,87 @@ +import { describe, it, expect } from 'vitest'; +import { parseEmbeddingPolicy } from './os2'; + +/** + * Build a minimal SFNT font with a single OS/2 table at a known offset, so the parser can be tested + * deterministically without a real font fixture. Layout: 12-byte offset table + one 16-byte table + * directory record (tag "OS/2") + the OS/2 table (only usWeightClass/fsType/fsSelection are set). + */ +function makeFont(opts: { usWeightClass: number; fsType: number; fsSelection: number; tag?: string }): ArrayBuffer { + const tag = opts.tag ?? 'OS/2'; + const os2Offset = 28; // 12 (offset table) + 16 (one directory record) + const os2Length = 64; // through fsSelection @62-63 + const buf = new ArrayBuffer(os2Offset + os2Length); + const dv = new DataView(buf); + dv.setUint32(0, 0x00010000); // sfnt version (TrueType) + dv.setUint16(4, 1); // numTables + for (let i = 0; i < 4; i += 1) dv.setUint8(12 + i, tag.charCodeAt(i)); + dv.setUint32(16, 0); // checksum + dv.setUint32(20, os2Offset); // table offset + dv.setUint32(24, os2Length); // table length + dv.setUint16(os2Offset + 4, opts.usWeightClass); + dv.setUint16(os2Offset + 8, opts.fsType); + dv.setUint16(os2Offset + 62, opts.fsSelection); + return buf; +} + +describe('parseEmbeddingPolicy (OS/2)', () => { + it('reads an installable Regular font: 400/normal, embeddable', () => { + expect(parseEmbeddingPolicy(makeFont({ usWeightClass: 400, fsType: 0x0000, fsSelection: 0 }))).toEqual({ + fsType: 0x0000, + face: { weight: '400', style: 'normal' }, + embeddable: true, + }); + }); + + it('reads Bold (usWeightClass 700) and SemiBold (600) as the 700 face; 500 stays 400', () => { + expect(parseEmbeddingPolicy(makeFont({ usWeightClass: 700, fsType: 0, fsSelection: 0 }))?.face.weight).toBe('700'); + expect(parseEmbeddingPolicy(makeFont({ usWeightClass: 600, fsType: 0, fsSelection: 0 }))?.face.weight).toBe('700'); + expect(parseEmbeddingPolicy(makeFont({ usWeightClass: 500, fsType: 0, fsSelection: 0 }))?.face.weight).toBe('400'); + }); + + it('reads the italic bit from fsSelection', () => { + expect(parseEmbeddingPolicy(makeFont({ usWeightClass: 400, fsType: 0, fsSelection: 0x01 }))?.face.style).toBe( + 'italic', + ); + expect(parseEmbeddingPolicy(makeFont({ usWeightClass: 700, fsType: 0, fsSelection: 0x01 }))?.face).toEqual({ + weight: '700', + style: 'italic', + }); + }); + + it('marks a Restricted-License font (fsType bit 1) as NOT embeddable', () => { + const policy = parseEmbeddingPolicy(makeFont({ usWeightClass: 400, fsType: 0x0002, fsSelection: 0 })); + expect(policy?.embeddable).toBe(false); + expect(policy?.fsType).toBe(0x0002); + }); + + it('treats Preview&Print and Editable as embeddable', () => { + expect(parseEmbeddingPolicy(makeFont({ usWeightClass: 400, fsType: 0x0004, fsSelection: 0 }))?.embeddable).toBe( + true, + ); + expect(parseEmbeddingPolicy(makeFont({ usWeightClass: 400, fsType: 0x0008, fsSelection: 0 }))?.embeddable).toBe( + true, + ); + }); + + it('returns null for non-SFNT / truncated bytes and for a font with no OS/2 table', () => { + expect(parseEmbeddingPolicy(new ArrayBuffer(4))).toBeNull(); // too short + expect(parseEmbeddingPolicy(makeFont({ usWeightClass: 400, fsType: 0, fsSelection: 0, tag: 'cmap' }))).toBeNull(); + }); + + it('honors a typed-array view byteOffset (a subarray into a larger pooled buffer)', () => { + const font = makeFont({ usWeightClass: 700, fsType: 0x0002, fsSelection: 0x01 }); + // Place the font at a non-zero offset inside a bigger buffer, then view it via subarray - the + // shape a deobfuscated Node Buffer / Uint8Array.subarray() takes. Reading from buffer offset 0 + // would parse the leading padding, not the font. + const padded = new Uint8Array(font.byteLength + 128); + padded.set(new Uint8Array(font), 64); + const view = padded.subarray(64, 64 + font.byteLength); + expect(view.byteOffset).toBe(64); + expect(parseEmbeddingPolicy(view)).toEqual({ + fsType: 0x0002, + face: { weight: '700', style: 'italic' }, + embeddable: false, + }); + }); +}); diff --git a/shared/font-system/src/os2.ts b/shared/font-system/src/os2.ts new file mode 100644 index 0000000000..e0ac455071 --- /dev/null +++ b/shared/font-system/src/os2.ts @@ -0,0 +1,86 @@ +import type { FaceKey } from './resolver'; + +/** + * The embedding policy + face axis read from a font's OS/2 table. + * + * Used to decide whether a DOCX-embedded font may be REGISTERED for rendering (its license permits + * embedding) and which weight/style FACE it represents - so embedded fonts become first-class + * registry faces with the correct {@link FaceKey} instead of being inferred from filenames. + */ +export interface EmbeddingPolicy { + /** Raw OS/2 `fsType` bit field (the licensing/embedding permissions). */ + fsType: number; + /** The face this font provides, from OS/2 `usWeightClass` + the `fsSelection` italic bit. */ + face: FaceKey; + /** + * The minimal RENDER gate: false only when fsType marks the font Restricted-License / no-embedding + * (bit 1). This is NOT a complete licensing model - Preview&Print vs Editable vs Installable, the + * No-Subsetting (0x0100) and Bitmap-only (0x0200) bits, and re-embedding for EXPORT/EDIT (vs + * display) all need their own policy decisions. The raw {@link fsType} is preserved so callers can + * apply a stricter policy without re-parsing. + */ + embeddable: boolean; +} + +/** OS/2 fsType: Restricted-License embedding (no embedding permitted). Bits 0-3 are mutually exclusive. */ +const FS_TYPE_RESTRICTED = 0x0002; +/** OS/2 fsSelection: ITALIC bit. */ +const FS_SELECTION_ITALIC = 0x0001; +/** Word's Bold is usWeightClass 700; treat >= 600 (SemiBold and up) as the bold face for our 400/700 axis. */ +const BOLD_WEIGHT_THRESHOLD = 600; + +/** The SFNT table directory starts after the 12-byte offset table; each record is 16 bytes. */ +const SFNT_TABLE_DIR_OFFSET = 12; +const SFNT_TABLE_RECORD_SIZE = 16; +/** OS/2 field offsets within the table: usWeightClass @4, fsType @8, fsSelection @62 (version 0+). */ +const OS2_USWEIGHTCLASS = 4; +const OS2_FSTYPE = 8; +const OS2_FSSELECTION = 62; +/** Bytes we must be able to read past the OS/2 table start (through fsSelection @62-63). */ +const OS2_MIN_LENGTH = OS2_FSSELECTION + 2; + +/** + * Parse a font's OS/2 embedding policy + face from its raw bytes (a deobfuscated DOCX-embedded TTF/OTF). + * + * Returns `null` when the bytes are not a parseable SFNT or have no readable OS/2 table - callers MUST + * treat a null result conservatively (do NOT register; fall through to the bundled substitute), since + * we cannot prove the font is licensed for embedding. + */ +export function parseEmbeddingPolicy(bytes: ArrayBuffer | ArrayBufferView): EmbeddingPolicy | null { + // Honor a view's byteOffset/byteLength: a Uint8Array.subarray() or a Node Buffer is a window into a + // larger (often pooled) ArrayBuffer, so `new DataView(bytes.buffer)` would read the wrong bytes. + const view = + bytes instanceof ArrayBuffer ? new DataView(bytes) : new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); + if (view.byteLength < SFNT_TABLE_DIR_OFFSET) return null; + + const numTables = view.getUint16(4); + let os2Offset = -1; + for (let i = 0; i < numTables; i += 1) { + const record = SFNT_TABLE_DIR_OFFSET + i * SFNT_TABLE_RECORD_SIZE; + if (record + SFNT_TABLE_RECORD_SIZE > view.byteLength) return null; + // Tag is 4 bytes; the OS/2 table tag is the literal "OS/2" (with the slash). + const tag = String.fromCharCode( + view.getUint8(record), + view.getUint8(record + 1), + view.getUint8(record + 2), + view.getUint8(record + 3), + ); + if (tag === 'OS/2') { + os2Offset = view.getUint32(record + 8); + break; + } + } + if (os2Offset < 0 || os2Offset + OS2_MIN_LENGTH > view.byteLength) return null; + + const usWeightClass = view.getUint16(os2Offset + OS2_USWEIGHTCLASS); + const fsType = view.getUint16(os2Offset + OS2_FSTYPE); + const fsSelection = view.getUint16(os2Offset + OS2_FSSELECTION); + + const weight: '400' | '700' = usWeightClass >= BOLD_WEIGHT_THRESHOLD ? '700' : '400'; + const style: 'normal' | 'italic' = (fsSelection & FS_SELECTION_ITALIC) !== 0 ? 'italic' : 'normal'; + return { + fsType, + face: { weight, style }, + embeddable: (fsType & FS_TYPE_RESTRICTED) === 0, + }; +} From 054fbcc15f418a44c984b1118776b0c61b0b8821 Mon Sep 17 00:00:00 2001 From: Caio Pizzol Date: Thu, 4 Jun 2026 22:11:54 -0300 Subject: [PATCH 2/8] feat(fonts): extract embedded DOCX fonts as structured deobfuscated faces Adds SuperConverter.getEmbeddedFontFaces(), the architecturally correct replacement for the legacy @font-face CSS injection: it extracts + deobfuscates the embedded fonts and classifies each via the OS/2 parser (weight/style + fsType licensing), returning structured faces with the deobfuscated bytes and the fontTable relationship id. The converter no longer needs to mint object URLs or inject