1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95import { micromark } from 'micromark';
import { gfm, gfmHtml } from 'micromark-extension-gfm';
import DOMPurify from 'isomorphic-dompurify';
export function markdownToHtml(markdown: string | null | undefined) {
if (!markdown) return undefined;
const html = micromark(markdown, {
extensions: [gfm()],
htmlExtensions: [gfmHtml()],
allowDangerousHtml: true,
});
// Sanitize the HTML to prevent XSS while keeping safe tags like <picture>, <img>, etc.
return DOMPurify.sanitize(html, {
ALLOWED_TAGS: [
'p',
'br',
'strong',
'em',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'ul',
'ol',
'li',
'a',
'code',
'pre',
'blockquote',
'img',
'picture',
'source',
'table',
'thead',
'tbody',
'tr',
'th',
'td',
'hr',
'div',
'span',
],
ALLOWED_ATTR: ['href', 'src', 'srcset', 'alt', 'title', 'media', 'class', 'id'],
});
}
export function stripMarkdown(markdown: string | null | undefined): string {
if (!markdown) return '';
let text = markdown;
// Remove headers (# ## ### etc.)
text = text.replace(/^#{1,6}\s+/gm, '');
// Remove links [text](url) -> text
text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
// Remove bold/italic **text** *text* -> text
text = text.replace(/\*{1,2}([^*]+)\*{1,2}/g, '$1');
text = text.replace(/_{1,2}([^_]+)_{1,2}/g, '$1');
// Remove inline code `code` -> code
text = text.replace(/`([^`]+)`/g, '$1');
// Remove code blocks ```code``` -> code
text = text.replace(/```[\s\S]*?```/g, '');
text = text.replace(/~~~[\s\S]*?~~~/g, '');
// Remove images  -> alt
text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1');
// Remove strikethrough ~~text~~ -> text
text = text.replace(/~~([^~]+)~~/g, '$1');
// Remove horizontal rules
text = text.replace(/^[-*_]{3,}$/gm, '');
// Remove blockquotes > text -> text
text = text.replace(/^>\s*/gm, '');
// Remove list markers
text = text.replace(/^\s*[-*+]\s+/gm, '');
text = text.replace(/^\s*\d+\.\s+/gm, '');
// Clean up extra whitespace
text = text.replace(/\n{3,}/g, '\n\n');
text = text.trim();
return text;
}