init

2025-11-13 16:23:22 +01:00
parent de9c515a47
commit cb235644dc
34924 changed files with 3811102 additions and 0 deletions
--- a/node_modules/parse5-html-rewriting-stream/dist/index.d.ts
+++ b/node_modules/parse5-html-rewriting-stream/dist/index.d.ts
@@ -0,0 +1,89 @@
+import { SAXParser, type EndTag, type StartTag, type Doctype, type Text, type Comment, type SaxToken } from 'parse5-sax-parser';
+/**
+ * Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter.
+ * A [transform stream](https://nodejs.org/api/stream.html#stream_class_stream_transform) (which means you can pipe _through_ it, see example).
+ *
+ * The rewriter uses the raw source representation of tokens if they are not modified by the user. Therefore, the resulting
+ * HTML is not affected by parser error-recovery mechanisms as in a classical parsing-serialization roundtrip.
+ *
+ * @example
+ *
+ * ```js
+ * const RewritingStream = require('parse5-html-rewriting-stream');
+ * const http = require('http');
+ * const fs = require('fs');
+ *
+ * const file = fs.createWriteStream('/home/google.com.html');
+ * const rewriter = new RewritingStream();
+ *
+ * // Replace divs with spans
+ * rewriter.on('startTag', startTag => {
+ *     if (startTag.tagName === 'span') {
+ *         startTag.tagName = 'div';
+ *     }
+ *
+ *     rewriter.emitStartTag(startTag);
+ * });
+ *
+ * rewriter.on('endTag', endTag => {
+ *     if (endTag.tagName === 'span') {
+ *         endTag.tagName = 'div';
+ *     }
+ *
+ *     rewriter.emitEndTag(endTag);
+ * });
+ *
+ * // Wrap all text nodes with an <i> tag
+ * rewriter.on('text', (_, raw) => {
+ *     // Use the raw representation of text without HTML entities decoding
+ *     rewriter.emitRaw(`<i>${raw}</i>`);
+ * });
+ *
+ * http.get('http://google.com', res => {
+ *    // Assumes response is UTF-8.
+ *    res.setEncoding('utf8');
+ *    // `RewritingStream` is a `Transform` stream, which means you can pipe
+ *    // through it.
+ *    res.pipe(rewriter).pipe(file);
+ * });
+ * ```
+ */
+export declare class RewritingStream extends SAXParser {
+    /** Note: `sourceCodeLocationInfo` is always enabled. */
+    constructor();
+    _transformChunk(chunk: string): string;
+    private _getRawHtml;
+    protected emitIfListenerExists(eventName: string, token: SaxToken): boolean;
+    protected _emitToken(eventName: string, token: SaxToken): void;
+    /** Emits a serialized document type token into the output stream. */
+    emitDoctype(token: Doctype): void;
+    /** Emits a serialized start tag token into the output stream. */
+    emitStartTag(token: StartTag): void;
+    /** Emits a serialized end tag token into the output stream. */
+    emitEndTag(token: EndTag): void;
+    /** Emits a serialized text token into the output stream. */
+    emitText({ text }: Text): void;
+    /** Emits a serialized comment token into the output stream. */
+    emitComment(token: Comment): void;
+    /** Emits a raw HTML string into the output stream. */
+    emitRaw(html: string): void;
+}
+export interface RewritingStream {
+    /** Raised when the rewriter encounters a start tag. */
+    on(event: 'startTag', listener: (startTag: StartTag, rawHtml: string) => void): this;
+    /** Raised when rewriter encounters an end tag. */
+    on(event: 'endTag', listener: (endTag: EndTag, rawHtml: string) => void): this;
+    /** Raised when rewriter encounters a comment. */
+    on(event: 'comment', listener: (comment: Comment, rawHtml: string) => void): this;
+    /** Raised when rewriter encounters text content. */
+    on(event: 'text', listener: (text: Text, rawHtml: string) => void): this;
+    /** Raised when rewriter encounters a [document type declaration](https://en.wikipedia.org/wiki/Document_type_declaration). */
+    on(event: 'doctype', listener: (doctype: Doctype, rawHtml: string) => void): this;
+    /**
+     * Base event handler.
+     *
+     * @param event Name of the event
+     * @param handler Event handler
+     */
+    on(event: string, handler: (...args: any[]) => void): this;
+}
--- a/node_modules/parse5-html-rewriting-stream/dist/index.js
+++ b/node_modules/parse5-html-rewriting-stream/dist/index.js
@@ -0,0 +1,127 @@
+import { html } from 'parse5';
+import { SAXParser, } from 'parse5-sax-parser';
+import { escapeText, escapeAttribute } from 'entities/escape';
+/**
+ * Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter.
+ * A [transform stream](https://nodejs.org/api/stream.html#stream_class_stream_transform) (which means you can pipe _through_ it, see example).
+ *
+ * The rewriter uses the raw source representation of tokens if they are not modified by the user. Therefore, the resulting
+ * HTML is not affected by parser error-recovery mechanisms as in a classical parsing-serialization roundtrip.
+ *
+ * @example
+ *
+ * ```js
+ * const RewritingStream = require('parse5-html-rewriting-stream');
+ * const http = require('http');
+ * const fs = require('fs');
+ *
+ * const file = fs.createWriteStream('/home/google.com.html');
+ * const rewriter = new RewritingStream();
+ *
+ * // Replace divs with spans
+ * rewriter.on('startTag', startTag => {
+ *     if (startTag.tagName === 'span') {
+ *         startTag.tagName = 'div';
+ *     }
+ *
+ *     rewriter.emitStartTag(startTag);
+ * });
+ *
+ * rewriter.on('endTag', endTag => {
+ *     if (endTag.tagName === 'span') {
+ *         endTag.tagName = 'div';
+ *     }
+ *
+ *     rewriter.emitEndTag(endTag);
+ * });
+ *
+ * // Wrap all text nodes with an <i> tag
+ * rewriter.on('text', (_, raw) => {
+ *     // Use the raw representation of text without HTML entities decoding
+ *     rewriter.emitRaw(`<i>${raw}</i>`);
+ * });
+ *
+ * http.get('http://google.com', res => {
+ *    // Assumes response is UTF-8.
+ *    res.setEncoding('utf8');
+ *    // `RewritingStream` is a `Transform` stream, which means you can pipe
+ *    // through it.
+ *    res.pipe(rewriter).pipe(file);
+ * });
+ * ```
+ */
+export class RewritingStream extends SAXParser {
+    /** Note: `sourceCodeLocationInfo` is always enabled. */
+    constructor() {
+        super({ sourceCodeLocationInfo: true });
+    }
+    _transformChunk(chunk) {
+        // NOTE: ignore upstream return values as we want to push to
+        // the `Writable` part of the `Transform` stream ourselves.
+        super._transformChunk(chunk);
+        return '';
+    }
+    _getRawHtml(location) {
+        const { droppedBufferSize, html } = this.tokenizer.preprocessor;
+        const start = location.startOffset - droppedBufferSize;
+        const end = location.endOffset - droppedBufferSize;
+        return html.slice(start, end);
+    }
+    // Events
+    emitIfListenerExists(eventName, token) {
+        if (!super.emitIfListenerExists(eventName, token)) {
+            this.emitRaw(this._getRawHtml(token.sourceCodeLocation));
+        }
+        // NOTE: don't skip new lines after `<pre>` and other tags,
+        // otherwise we'll have incorrect raw data.
+        this.parserFeedbackSimulator.skipNextNewLine = false;
+        return true;
+    }
+    // Emitter API
+    _emitToken(eventName, token) {
+        this.emit(eventName, token, this._getRawHtml(token.sourceCodeLocation));
+    }
+    /** Emits a serialized document type token into the output stream. */
+    emitDoctype(token) {
+        let res = `<!DOCTYPE ${token.name}`;
+        if (token.publicId !== null) {
+            res += ` PUBLIC "${token.publicId}"`;
+        }
+        else if (token.systemId !== null) {
+            res += ' SYSTEM';
+        }
+        if (token.systemId !== null) {
+            res += ` "${token.systemId}"`;
+        }
+        res += '>';
+        this.push(res);
+    }
+    /** Emits a serialized start tag token into the output stream. */
+    emitStartTag(token) {
+        let res = `<${token.tagName}`;
+        for (const attr of token.attrs) {
+            res += ` ${attr.name}="${escapeAttribute(attr.value)}"`;
+        }
+        res += token.selfClosing ? '/>' : '>';
+        this.push(res);
+    }
+    /** Emits a serialized end tag token into the output stream. */
+    emitEndTag(token) {
+        this.push(`</${token.tagName}>`);
+    }
+    /** Emits a serialized text token into the output stream. */
+    emitText({ text }) {
+        this.push(!this.parserFeedbackSimulator.inForeignContent &&
+            html.hasUnescapedText(this.tokenizer.lastStartTagName, true)
+            ? text
+            : escapeText(text));
+    }
+    /** Emits a serialized comment token into the output stream. */
+    emitComment(token) {
+        this.push(`<!--${token.text}-->`);
+    }
+    /** Emits a raw HTML string into the output stream. */
+    emitRaw(html) {
+        this.push(html);
+    }
+}