feat: sanitize html before assiging it to tempDiv (#13252)

This commit is contained in:
Shivam Mishra
2026-01-12 22:41:37 +05:30
committed by GitHub
parent 8311657f9c
commit 58cec84b93
4 changed files with 80 additions and 3 deletions

View File

@@ -1,3 +1,5 @@
import DOMPurify from 'dompurify';
// Quote detection strategies
const QUOTE_INDICATORS = [
'.gmail_quote_container',
@@ -29,7 +31,7 @@ export class EmailQuoteExtractor {
static extractQuotes(htmlContent) {
// Create a temporary DOM element to parse HTML
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
tempDiv.innerHTML = DOMPurify.sanitize(htmlContent);
// Remove elements matching class selectors
QUOTE_INDICATORS.forEach(selector => {
@@ -56,7 +58,7 @@ export class EmailQuoteExtractor {
*/
static hasQuotes(htmlContent) {
const tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;
tempDiv.innerHTML = DOMPurify.sanitize(htmlContent);
// Check for class-based quotes
// eslint-disable-next-line no-restricted-syntax

View File

@@ -1,4 +1,5 @@
import { format, parseISO, isValid as isValidDate } from 'date-fns';
import DOMPurify from 'dompurify';
/**
* Extracts plain text from HTML content
@@ -13,7 +14,7 @@ export const extractPlainTextFromHtml = html => {
return html.replace(/<[^>]*>/g, ' ');
}
const tempDiv = document.createElement('div');
tempDiv.innerHTML = html;
tempDiv.innerHTML = DOMPurify.sanitize(html);
return tempDiv.textContent || tempDiv.innerText || '';
};

View File

@@ -96,4 +96,58 @@ describe('EmailQuoteExtractor', () => {
it('detects quotes for trailing blockquotes even when signatures follow text', () => {
expect(EmailQuoteExtractor.hasQuotes(EMAIL_WITH_SIGNATURE)).toBe(true);
});
describe('HTML sanitization', () => {
it('removes onerror handlers from img tags in extractQuotes', () => {
const maliciousHtml = '<p>Hello</p><img src="x" onerror="alert(1)">';
const cleanedHtml = EmailQuoteExtractor.extractQuotes(maliciousHtml);
expect(cleanedHtml).not.toContain('onerror');
expect(cleanedHtml).toContain('<p>Hello</p>');
});
it('removes onerror handlers from img tags in hasQuotes', () => {
const maliciousHtml = '<p>Hello</p><img src="x" onerror="alert(1)">';
// Should not throw and should safely check for quotes
const result = EmailQuoteExtractor.hasQuotes(maliciousHtml);
expect(result).toBe(false);
});
it('removes script tags in extractQuotes', () => {
const maliciousHtml =
'<p>Content</p><script>alert("xss")</script><p>More</p>';
const cleanedHtml = EmailQuoteExtractor.extractQuotes(maliciousHtml);
expect(cleanedHtml).not.toContain('<script');
expect(cleanedHtml).not.toContain('alert');
expect(cleanedHtml).toContain('<p>Content</p>');
expect(cleanedHtml).toContain('<p>More</p>');
});
it('removes onclick handlers in extractQuotes', () => {
const maliciousHtml = '<p onclick="alert(1)">Click me</p>';
const cleanedHtml = EmailQuoteExtractor.extractQuotes(maliciousHtml);
expect(cleanedHtml).not.toContain('onclick');
expect(cleanedHtml).toContain('Click me');
});
it('removes javascript: URLs in extractQuotes', () => {
const maliciousHtml = '<a href="javascript:alert(1)">Link</a>';
const cleanedHtml = EmailQuoteExtractor.extractQuotes(maliciousHtml);
// eslint-disable-next-line no-script-url
expect(cleanedHtml).not.toContain('javascript:');
expect(cleanedHtml).toContain('Link');
});
it('removes encoded payloads with event handlers in extractQuotes', () => {
const maliciousHtml =
'<img src="x" id="PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==" onerror="eval(atob(this.id))">';
const cleanedHtml = EmailQuoteExtractor.extractQuotes(maliciousHtml);
expect(cleanedHtml).not.toContain('onerror');
expect(cleanedHtml).not.toContain('eval');
});
});
});

View File

@@ -33,6 +33,26 @@ describe('quotedEmailHelper', () => {
expect(result).toContain('Line 1');
expect(result).toContain('Line 2');
});
it('sanitizes onerror handlers from img tags', () => {
const html = '<p>Hello</p><img src="x" onerror="alert(1)">';
const result = extractPlainTextFromHtml(html);
expect(result).toBe('Hello');
});
it('sanitizes script tags', () => {
const html = '<p>Safe</p><script>alert(1)</script><p>Content</p>';
const result = extractPlainTextFromHtml(html);
expect(result).toContain('Safe');
expect(result).toContain('Content');
expect(result).not.toContain('alert');
});
it('sanitizes onclick handlers', () => {
const html = '<p onclick="alert(1)">Click me</p>';
const result = extractPlainTextFromHtml(html);
expect(result).toBe('Click me');
});
});
describe('getEmailSenderName', () => {