From 737b874b7c51191eae2496141747d60f6576e41e Mon Sep 17 00:00:00 2001 From: JiuqingSong Date: Mon, 8 Jun 2026 14:27:21 -0700 Subject: [PATCH] @ Convert plain text/thin-wrapper paste to formatted HTML via markdown When pasted content is plain text, or HTML that is only a thin wrapper of plain text (e.g. each line wrapped in a DIV or P with no other formatting), interpret it as markdown and replace the paste fragment with the converted formatted HTML, so we paste formatted content by default instead of raw markdown text. Co-Authored-By: Claude Opus 4.8 (1M context) @ --- .../Markdown/convertPastedTextToMarkdown.ts | 79 +++++++++ .../lib/paste/PastePlugin.ts | 17 ++ .../package.json | 1 + .../convertPastedTextToMarkdownTest.ts | 156 ++++++++++++++++++ .../plugin/ContentModelPastePluginTest.ts | 56 +++++++ 5 files changed, 309 insertions(+) create mode 100644 packages/roosterjs-content-model-plugins/lib/paste/Markdown/convertPastedTextToMarkdown.ts create mode 100644 packages/roosterjs-content-model-plugins/test/paste/Markdown/convertPastedTextToMarkdownTest.ts diff --git a/packages/roosterjs-content-model-plugins/lib/paste/Markdown/convertPastedTextToMarkdown.ts b/packages/roosterjs-content-model-plugins/lib/paste/Markdown/convertPastedTextToMarkdown.ts new file mode 100644 index 000000000000..c74187d683a5 --- /dev/null +++ b/packages/roosterjs-content-model-plugins/lib/paste/Markdown/convertPastedTextToMarkdown.ts @@ -0,0 +1,79 @@ +import { contentModelToDom, createModelToDomContext } from 'roosterjs-content-model-dom'; +import { convertMarkdownToContentModel } from 'roosterjs-content-model-markdown'; +import type { ClipboardData, IEditor } from 'roosterjs-content-model-types'; + +// Tags that are considered "thin wrappers", which only add structure (such as line breaks) +// around the plain text without applying any real formatting to its content. +const ThinWrapperTags = new Set(['DIV', 'P', 'BR', 'SPAN']); + +/** + * @internal + * Detect whether the pasted content is plain text, or HTML that is only a thin wrapper of + * plain text (for example, each line wrapped in a DIV or P with no other formatting). When + * this is the case we can safely interpret the plain text as markdown. + * @param clipboardData The clipboard data of the paste + * @param fragment The parsed HTML fragment to be pasted + */ +export function shouldConvertPastedTextToMarkdown( + clipboardData: ClipboardData, + fragment: DocumentFragment +): boolean { + const { text, rawHtml } = clipboardData; + + // There must be some plain text to interpret as markdown + if (!text || !text.trim()) { + return false; + } + + // No HTML content at all (text/plain only), so the plain text is all we have + if (!rawHtml) { + return true; + } + + // There is HTML content, only continue when it is a thin wrapper of the plain text + return isThinWrapperOfPlainText(fragment, text); +} + +/** + * @internal + * Interpret the given plain text as markdown, convert it to a DOM tree and replace the + * content of the given fragment with the result, so that we paste formatted HTML instead + * of the original markdown text. + * @param editor The editor instance + * @param fragment The fragment whose content will be replaced + * @param text The plain text (markdown) to convert + */ +export function convertPastedTextToMarkdown( + editor: IEditor, + fragment: DocumentFragment, + text: string +) { + const model = convertMarkdownToContentModel(text); + + while (fragment.firstChild) { + fragment.removeChild(fragment.firstChild); + } + + contentModelToDom(editor.getDocument(), fragment, model, createModelToDomContext()); +} + +function isThinWrapperOfPlainText(fragment: DocumentFragment, text: string): boolean { + const elements = fragment.querySelectorAll('*'); + + for (let i = 0; i < elements.length; i++) { + const element = elements[i]; + + // Any element that is not a structural wrapper, or that carries its own attributes + // (style, class, etc.), means the HTML adds real formatting on top of the text. + if (!ThinWrapperTags.has(element.tagName) || element.attributes.length > 0) { + return false; + } + } + + // Make sure the HTML and the plain text actually represent the same content + return removeWhitespace(fragment.textContent || '') === removeWhitespace(text); +} + +function removeWhitespace(text: string): string { + return text.replace(/\s/g, ''); +} diff --git a/packages/roosterjs-content-model-plugins/lib/paste/PastePlugin.ts b/packages/roosterjs-content-model-plugins/lib/paste/PastePlugin.ts index 4126af50d4fb..886dc843cc92 100644 --- a/packages/roosterjs-content-model-plugins/lib/paste/PastePlugin.ts +++ b/packages/roosterjs-content-model-plugins/lib/paste/PastePlugin.ts @@ -1,6 +1,10 @@ import { addParser } from './utils/addParser'; import { blockElementParser } from './parsers/blockElementParser'; import { chainSanitizerCallback } from './utils/chainSanitizerCallback'; +import { + convertPastedTextToMarkdown, + shouldConvertPastedTextToMarkdown, +} from './Markdown/convertPastedTextToMarkdown'; import { DefaultSanitizers } from './DefaultSanitizers'; import { deprecatedBorderColorParser } from './parsers/deprecatedColorParser'; import { getDocumentSource } from './pasteSourceValidations/getDocumentSource'; @@ -144,6 +148,19 @@ export class PastePlugin implements EditorPlugin { case 'oneNoteDesktop': processPastedContentFromOneNote(event); break; + + case 'default': + // When the pasted content is plain text (or HTML that is just a thin + // wrapper of plain text), interpret it as markdown and replace the paste + // fragment with the converted formatted HTML, so we paste formatted content + // by default instead of the raw markdown text. + if ( + pasteType === 'normal' && + shouldConvertPastedTextToMarkdown(clipboardData, fragment) + ) { + convertPastedTextToMarkdown(this.editor, fragment, clipboardData.text); + } + break; } addParser(event.domToModelOption, 'link', parseLink); diff --git a/packages/roosterjs-content-model-plugins/package.json b/packages/roosterjs-content-model-plugins/package.json index 3b25efcf7192..ecaa2d09e874 100644 --- a/packages/roosterjs-content-model-plugins/package.json +++ b/packages/roosterjs-content-model-plugins/package.json @@ -5,6 +5,7 @@ "tslib": "^2.3.1", "roosterjs-content-model-core": "", "roosterjs-content-model-dom": "", + "roosterjs-content-model-markdown": "", "roosterjs-content-model-types": "", "roosterjs-content-model-api": "" }, diff --git a/packages/roosterjs-content-model-plugins/test/paste/Markdown/convertPastedTextToMarkdownTest.ts b/packages/roosterjs-content-model-plugins/test/paste/Markdown/convertPastedTextToMarkdownTest.ts new file mode 100644 index 000000000000..a65c438c67fe --- /dev/null +++ b/packages/roosterjs-content-model-plugins/test/paste/Markdown/convertPastedTextToMarkdownTest.ts @@ -0,0 +1,156 @@ +import { IEditor } from 'roosterjs-content-model-types'; +import { + convertPastedTextToMarkdown, + shouldConvertPastedTextToMarkdown, +} from '../../../lib/paste/Markdown/convertPastedTextToMarkdown'; + +function createFragment(html: string): DocumentFragment { + const template = document.createElement('template'); + template.innerHTML = html; + return template.content; +} + +describe('shouldConvertPastedTextToMarkdown', () => { + it('returns false when there is no plain text', () => { + const result = shouldConvertPastedTextToMarkdown( + { text: '', rawHtml: null }, + createFragment('') + ); + + expect(result).toBeFalse(); + }); + + it('returns false when the plain text is only whitespace', () => { + const result = shouldConvertPastedTextToMarkdown( + { text: ' \n ', rawHtml: null }, + createFragment('') + ); + + expect(result).toBeFalse(); + }); + + it('returns true when there is plain text and no HTML (rawHtml is null)', () => { + const result = shouldConvertPastedTextToMarkdown( + { text: '# Heading', rawHtml: null }, + createFragment('') + ); + + expect(result).toBeTrue(); + }); + + it('returns true when there is plain text and rawHtml is undefined', () => { + const result = shouldConvertPastedTextToMarkdown( + { text: '# Heading', rawHtml: undefined }, + createFragment('') + ); + + expect(result).toBeTrue(); + }); + + it('returns true when the HTML is a thin wrapper of the plain text', () => { + const text = '# Heading\n- item 1\n- item 2'; + const result = shouldConvertPastedTextToMarkdown( + { text, rawHtml: '
# Heading
- item 1
- item 2
' }, + createFragment('
# Heading
- item 1
- item 2
') + ); + + expect(result).toBeTrue(); + }); + + it('returns true when the HTML uses P and BR as thin wrappers', () => { + const text = 'line 1\nline 2'; + const result = shouldConvertPastedTextToMarkdown( + { text, rawHtml: '

line 1
line 2

' }, + createFragment('

line 1
line 2

') + ); + + expect(result).toBeTrue(); + }); + + it('returns false when the HTML contains a formatting element', () => { + const text = 'hello world'; + const result = shouldConvertPastedTextToMarkdown( + { text, rawHtml: '
hello world
' }, + createFragment('
hello world
') + ); + + expect(result).toBeFalse(); + }); + + it('returns false when a thin wrapper element carries attributes', () => { + const text = 'hello world'; + const result = shouldConvertPastedTextToMarkdown( + { text, rawHtml: '
hello world
' }, + createFragment('
hello world
') + ); + + expect(result).toBeFalse(); + }); + + it('returns false when the HTML text does not match the plain text', () => { + const result = shouldConvertPastedTextToMarkdown( + { text: 'hello world', rawHtml: '
different content
' }, + createFragment('
different content
') + ); + + expect(result).toBeFalse(); + }); + + it('returns false when the HTML contains a link', () => { + const text = 'see roosterjs'; + const result = shouldConvertPastedTextToMarkdown( + { text, rawHtml: '' }, + createFragment('') + ); + + expect(result).toBeFalse(); + }); +}); + +describe('convertPastedTextToMarkdown', () => { + let editor: IEditor; + + beforeEach(() => { + editor = ({ + getDocument: () => document, + }) as IEditor; + }); + + it('converts a markdown heading into an HTML heading', () => { + const fragment = createFragment('
# Heading
'); + + convertPastedTextToMarkdown(editor, fragment, '# Heading'); + + const div = document.createElement('div'); + div.appendChild(fragment.cloneNode(true)); + + expect(div.querySelector('h1')).not.toBeNull(); + expect(div.textContent).toBe('Heading'); + }); + + it('converts a markdown unordered list into list items', () => { + const fragment = createFragment(''); + + convertPastedTextToMarkdown(editor, fragment, '- item 1\n- item 2'); + + const div = document.createElement('div'); + div.appendChild(fragment.cloneNode(true)); + + const listItems = div.querySelectorAll('li'); + expect(listItems.length).toBe(2); + expect(listItems[0].textContent).toBe('item 1'); + expect(listItems[1].textContent).toBe('item 2'); + }); + + it('clears the existing content of the fragment before conversion', () => { + const fragment = createFragment('
old content
'); + + convertPastedTextToMarkdown(editor, fragment, 'new content'); + + const div = document.createElement('div'); + div.appendChild(fragment.cloneNode(true)); + + expect(div.textContent).toBe('new content'); + expect(div.textContent).not.toContain('old'); + }); +}); diff --git a/packages/roosterjs-content-model-plugins/test/paste/plugin/ContentModelPastePluginTest.ts b/packages/roosterjs-content-model-plugins/test/paste/plugin/ContentModelPastePluginTest.ts index b3dd98241be7..07cba62ca197 100644 --- a/packages/roosterjs-content-model-plugins/test/paste/plugin/ContentModelPastePluginTest.ts +++ b/packages/roosterjs-content-model-plugins/test/paste/plugin/ContentModelPastePluginTest.ts @@ -1,6 +1,7 @@ import * as addParser from '../../../lib/paste/utils/addParser'; import * as ExcelFile from '../../../lib/paste/Excel/processPastedContentFromExcel'; import * as getDocumentSource from '../../../lib/paste/pasteSourceValidations/getDocumentSource'; +import * as MarkdownFile from '../../../lib/paste/Markdown/convertPastedTextToMarkdown'; import * as oneNoteFile from '../../../lib/paste/oneNote/processPastedContentFromOneNote'; import * as PowerPointFile from '../../../lib/paste/PowerPoint/processPastedContentFromPowerPoint'; import * as setProcessor from '../../../lib/paste/utils/setProcessor'; @@ -24,6 +25,7 @@ describe('Content Model Paste Plugin Test', () => { getTrustedHTMLHandler: () => trustedHTMLHandler, getDOMCreator: () => domCreator, getEnvironment: () => ({}), + getDocument: () => document, } as any) as IEditor; spyOn(addParser, 'addParser').and.callThrough(); spyOn(setProcessor, 'setProcessor').and.callThrough(); @@ -174,6 +176,60 @@ describe('Content Model Paste Plugin Test', () => { expect(Object.keys(event.domToModelOption.styleSanitizers).length).toEqual(4); }); + it('Default | plain text is converted to markdown HTML', () => { + spyOn(getDocumentSource, 'getDocumentSource').and.returnValue('default'); + spyOn(MarkdownFile, 'convertPastedTextToMarkdown').and.callThrough(); + + (event).clipboardData = { + text: '# Heading', + rawHtml: null, + types: [], + }; + + plugin.initialize(editor); + plugin.onPluginEvent(event); + + expect(MarkdownFile.convertPastedTextToMarkdown).toHaveBeenCalledWith( + editor, + event.fragment, + '# Heading' + ); + }); + + it('Default | formatted HTML is not converted to markdown HTML', () => { + spyOn(getDocumentSource, 'getDocumentSource').and.returnValue('default'); + spyOn(MarkdownFile, 'convertPastedTextToMarkdown').and.callThrough(); + + (event).clipboardData = { + text: 'hello world', + rawHtml: '
hello world
', + types: [], + }; + event.fragment.appendChild(domCreator.htmlToDOM('
hello world
').body); + + plugin.initialize(editor); + plugin.onPluginEvent(event); + + expect(MarkdownFile.convertPastedTextToMarkdown).not.toHaveBeenCalled(); + }); + + it('Default | plain text is not converted when pasting as plain text', () => { + spyOn(getDocumentSource, 'getDocumentSource').and.returnValue('default'); + spyOn(MarkdownFile, 'convertPastedTextToMarkdown').and.callThrough(); + + (event).pasteType = 'asPlainText'; + (event).clipboardData = { + text: '# Heading', + rawHtml: null, + types: [], + }; + + plugin.initialize(editor); + plugin.onPluginEvent(event); + + expect(MarkdownFile.convertPastedTextToMarkdown).not.toHaveBeenCalled(); + }); + it('excelNonNativeEvent', () => { spyOn(getDocumentSource, 'getDocumentSource').and.returnValue('excelNonNativeEvent'); spyOn(ExcelFile, 'processPastedContentFromExcel').and.callThrough();