avancement planning

This commit is contained in:
2026-05-26 11:58:39 +02:00
parent 619a2b240a
commit 150b97cd2e
4892 changed files with 99214 additions and 429382 deletions
+3 -1
View File
@@ -459,7 +459,9 @@ export class Tokenizer {
: cp === $.NULL
? TokenType.NULL_CHARACTER
: TokenType.CHARACTER;
this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
// OPTIMIZATION: Use String.fromCharCode for BMP characters (< 0x10000) which is faster
// than String.fromCodePoint. Characters outside BMP are rare in HTML.
this._appendCharToCurrentCharacterToken(type, cp < 65536 ? String.fromCharCode(cp) : String.fromCodePoint(cp));
}
//NOTE: used when we emit characters explicitly.
//This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
-1
View File
@@ -1 +0,0 @@
export * from "./dist/commonjs/decode.js";
-3
View File
@@ -1,3 +0,0 @@
// Make exports work in Node < 12
// eslint-disable-next-line no-undef, unicorn/prefer-module
module.exports = require("./dist/commonjs/decode.js");
@@ -1,19 +0,0 @@
/**
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
*/
export declare const fromCodePoint: (...codePoints: number[]) => string;
/**
* Replace the given code point with a replacement character if it is a
* surrogate or is outside the valid range. Otherwise return the code
* point unchanged.
*/
export declare function replaceCodePoint(codePoint: number): number;
/**
* Replace the code point if relevant, then convert it to a string.
*
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
* @param codePoint The code point to decode.
* @returns The decoded code point.
*/
export declare function decodeCodePoint(codePoint: number): string;
//# sourceMappingURL=decode-codepoint.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-codepoint.d.ts","sourceRoot":"","sources":["../../src/decode-codepoint.ts"],"names":[],"mappings":"AAkCA;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,CAAC,GAAG,UAAU,EAAE,MAAM,EAAE,KAAK,MAgBpD,CAAC;AAEN;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAS1D;AAED;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAEzD"}
@@ -1,77 +0,0 @@
"use strict";
// Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
var _a;
Object.defineProperty(exports, "__esModule", { value: true });
exports.fromCodePoint = void 0;
exports.replaceCodePoint = replaceCodePoint;
exports.decodeCodePoint = decodeCodePoint;
const decodeMap = new Map([
[0, 65533],
// C1 Unicode control character reference replacements
[128, 8364],
[130, 8218],
[131, 402],
[132, 8222],
[133, 8230],
[134, 8224],
[135, 8225],
[136, 710],
[137, 8240],
[138, 352],
[139, 8249],
[140, 338],
[142, 381],
[145, 8216],
[146, 8217],
[147, 8220],
[148, 8221],
[149, 8226],
[150, 8211],
[151, 8212],
[152, 732],
[153, 8482],
[154, 353],
[155, 8250],
[156, 339],
[158, 382],
[159, 376],
]);
/**
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
*/
exports.fromCodePoint =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
(_a = String.fromCodePoint) !== null && _a !== void 0 ? _a : function (codePoint) {
let output = "";
if (codePoint > 65535) {
codePoint -= 65536;
output += String.fromCharCode(((codePoint >>> 10) & 1023) | 55296);
codePoint = 56320 | (codePoint & 1023);
}
output += String.fromCharCode(codePoint);
return output;
};
/**
* Replace the given code point with a replacement character if it is a
* surrogate or is outside the valid range. Otherwise return the code
* point unchanged.
*/
function replaceCodePoint(codePoint) {
var _a;
if ((codePoint >= 55296 && codePoint <= 57343) ||
codePoint > 1114111) {
return 65533;
}
return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint;
}
/**
* Replace the code point if relevant, then convert it to a string.
*
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
* @param codePoint The code point to decode.
* @returns The decoded code point.
*/
function decodeCodePoint(codePoint) {
return (0, exports.fromCodePoint)(replaceCodePoint(codePoint));
}
//# sourceMappingURL=decode-codepoint.js.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-codepoint.js","sourceRoot":"","sources":["../../src/decode-codepoint.ts"],"names":[],"mappings":";AAAA,qHAAqH;;;;AA4DrH,4CASC;AASD,0CAEC;AA9ED,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACtB,CAAC,CAAC,EAAE,KAAM,CAAC;IACX,sDAAsD;IACtD,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;CACb,CAAC,CAAC;AAEH;;GAEG;AACU,QAAA,aAAa;AACtB,8GAA8G;AAC9G,MAAA,MAAM,CAAC,aAAa,mCACpB,UAAU,SAAiB;IACvB,IAAI,MAAM,GAAG,EAAE,CAAC;IAEhB,IAAI,SAAS,GAAG,KAAO,EAAE,CAAC;QACtB,SAAS,IAAI,KAAS,CAAC;QACvB,MAAM,IAAI,MAAM,CAAC,YAAY,CACzB,CAAC,CAAC,SAAS,KAAK,EAAE,CAAC,GAAG,IAAM,CAAC,GAAG,KAAO,CAC1C,CAAC;QACF,SAAS,GAAG,KAAO,GAAG,CAAC,SAAS,GAAG,IAAM,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,IAAI,MAAM,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACzC,OAAO,MAAM,CAAC;AAClB,CAAC,CAAC;AAEN;;;;GAIG;AACH,SAAgB,gBAAgB,CAAC,SAAiB;;IAC9C,IACI,CAAC,SAAS,IAAI,KAAO,IAAI,SAAS,IAAI,KAAO,CAAC;QAC9C,SAAS,GAAG,OAAU,EACxB,CAAC;QACC,OAAO,KAAO,CAAC;IACnB,CAAC;IAED,OAAO,MAAA,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,mCAAI,SAAS,CAAC;AACjD,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,eAAe,CAAC,SAAiB;IAC7C,OAAO,IAAA,qBAAa,EAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC,CAAC;AACtD,CAAC"}
-209
View File
@@ -1,209 +0,0 @@
export declare enum BinTrieFlags {
VALUE_LENGTH = 49152,
BRANCH_LENGTH = 16256,
JUMP_TABLE = 127
}
export declare enum DecodingMode {
/** Entities in text nodes that can end with any character. */
Legacy = 0,
/** Only allow entities terminated with a semicolon. */
Strict = 1,
/** Entities in attributes have limitations on ending characters. */
Attribute = 2
}
/**
* Producers for character reference errors as defined in the HTML spec.
*/
export interface EntityErrorProducer {
missingSemicolonAfterCharacterReference(): void;
absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void;
validateNumericCharacterReference(code: number): void;
}
/**
* Token decoder with support of writing partial entities.
*/
export declare class EntityDecoder {
/** The tree used to decode entities. */
private readonly decodeTree;
/**
* The function that is called when a codepoint is decoded.
*
* For multi-byte named entities, this will be called multiple times,
* with the second codepoint, and the same `consumed` value.
*
* @param codepoint The decoded codepoint.
* @param consumed The number of bytes consumed by the decoder.
*/
private readonly emitCodePoint;
/** An object that is used to produce errors. */
private readonly errors?;
constructor(
/** The tree used to decode entities. */
decodeTree: Uint16Array,
/**
* The function that is called when a codepoint is decoded.
*
* For multi-byte named entities, this will be called multiple times,
* with the second codepoint, and the same `consumed` value.
*
* @param codepoint The decoded codepoint.
* @param consumed The number of bytes consumed by the decoder.
*/
emitCodePoint: (cp: number, consumed: number) => void,
/** An object that is used to produce errors. */
errors?: EntityErrorProducer | undefined);
/** The current state of the decoder. */
private state;
/** Characters that were consumed while parsing an entity. */
private consumed;
/**
* The result of the entity.
*
* Either the result index of a numeric entity, or the codepoint of a
* numeric entity.
*/
private result;
/** The current index in the decode tree. */
private treeIndex;
/** The number of characters that were consumed in excess. */
private excess;
/** The mode in which the decoder is operating. */
private decodeMode;
/** Resets the instance to make it reusable. */
startEntity(decodeMode: DecodingMode): void;
/**
* Write an entity to the decoder. This can be called multiple times with partial entities.
* If the entity is incomplete, the decoder will return -1.
*
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
* entity is incomplete, and resume when the next string is written.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
write(input: string, offset: number): number;
/**
* Switches between the numeric decimal and hexadecimal states.
*
* Equivalent to the `Numeric character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericStart;
private addToNumericResult;
/**
* Parses a hexadecimal numeric entity.
*
* Equivalent to the `Hexademical character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericHex;
/**
* Parses a decimal numeric entity.
*
* Equivalent to the `Decimal character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericDecimal;
/**
* Validate and emit a numeric entity.
*
* Implements the logic from the `Hexademical character reference start
* state` and `Numeric character reference end state` in the HTML spec.
*
* @param lastCp The last code point of the entity. Used to see if the
* entity was terminated with a semicolon.
* @param expectedLength The minimum number of characters that should be
* consumed. Used to validate that at least one digit
* was consumed.
* @returns The number of characters that were consumed.
*/
private emitNumericEntity;
/**
* Parses a named entity.
*
* Equivalent to the `Named character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNamedEntity;
/**
* Emit a named entity that was not terminated with a semicolon.
*
* @returns The number of characters consumed.
*/
private emitNotTerminatedNamedEntity;
/**
* Emit a named entity.
*
* @param result The index of the entity in the decode tree.
* @param valueLength The number of bytes in the entity.
* @param consumed The number of characters consumed.
*
* @returns The number of characters consumed.
*/
private emitNamedEntityData;
/**
* Signal to the parser that the end of the input was reached.
*
* Remaining data will be emitted and relevant errors will be produced.
*
* @returns The number of characters consumed.
*/
end(): number;
}
/**
* Determines the branch of the current node that is taken given the current
* character. This function is used to traverse the trie.
*
* @param decodeTree The trie.
* @param current The current node.
* @param nodeIdx The index right after the current node and its value.
* @param char The current character.
* @returns The index of the next node, or -1 if no branch is taken.
*/
export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIndex: number, char: number): number;
/**
* Decodes an HTML string.
*
* @param htmlString The string to decode.
* @param mode The decoding mode.
* @returns The decoded string.
*/
export declare function decodeHTML(htmlString: string, mode?: DecodingMode): string;
/**
* Decodes an HTML string in an attribute.
*
* @param htmlAttribute The string to decode.
* @returns The decoded string.
*/
export declare function decodeHTMLAttribute(htmlAttribute: string): string;
/**
* Decodes an HTML string, requiring all entities to be terminated by a semicolon.
*
* @param htmlString The string to decode.
* @returns The decoded string.
*/
export declare function decodeHTMLStrict(htmlString: string): string;
/**
* Decodes an XML string, requiring all entities to be terminated by a semicolon.
*
* @param xmlString The string to decode.
* @returns The decoded string.
*/
export declare function decodeXML(xmlString: string): string;
export { htmlDecodeTree } from "./generated/decode-data-html.js";
export { xmlDecodeTree } from "./generated/decode-data-xml.js";
export { decodeCodePoint, replaceCodePoint, fromCodePoint, } from "./decode-codepoint.js";
//# sourceMappingURL=decode.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"decode.d.ts","sourceRoot":"","sources":["../../src/decode.ts"],"names":[],"mappings":"AAsBA,oBAAY,YAAY;IACpB,YAAY,QAAwB;IACpC,aAAa,QAAwB;IACrC,UAAU,MAAwB;CACrC;AAuCD,oBAAY,YAAY;IACpB,8DAA8D;IAC9D,MAAM,IAAI;IACV,uDAAuD;IACvD,MAAM,IAAI;IACV,oEAAoE;IACpE,SAAS,IAAI;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,uCAAuC,IAAI,IAAI,CAAC;IAChD,0CAA0C,CACtC,kBAAkB,EAAE,MAAM,GAC3B,IAAI,CAAC;IACR,iCAAiC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;CACzD;AAED;;GAEG;AACH,qBAAa,aAAa;IAElB,wCAAwC;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU;IAC3B;;;;;;;;OAQG;IACH,OAAO,CAAC,QAAQ,CAAC,aAAa;IAC9B,gDAAgD;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;;IAbxB,wCAAwC;IACvB,UAAU,EAAE,WAAW;IACxC;;;;;;;;OAQG;IACc,aAAa,EAAE,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,KAAK,IAAI;IACtE,gDAAgD;IAC/B,MAAM,CAAC,EAAE,mBAAmB,GAAG,SAAS;IAG7D,wCAAwC;IACxC,OAAO,CAAC,KAAK,CAAkC;IAC/C,6DAA6D;IAC7D,OAAO,CAAC,QAAQ,CAAK;IACrB;;;;;OAKG;IACH,OAAO,CAAC,MAAM,CAAK;IAEnB,4CAA4C;IAC5C,OAAO,CAAC,SAAS,CAAK;IACtB,6DAA6D;IAC7D,OAAO,CAAC,MAAM,CAAK;IACnB,kDAAkD;IAClD,OAAO,CAAC,UAAU,CAAuB;IAEzC,+CAA+C;IAC/C,WAAW,CAAC,UAAU,EAAE,YAAY,GAAG,IAAI;IAS3C;;;;;;;;;;OAUG;IACH,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM;IA8B5C;;;;;;;;OAQG;IACH,OAAO,CAAC,iBAAiB;IAezB,OAAO,CAAC,kBAAkB;IAe1B;;;;;;;;OAQG;IACH,OAAO,CAAC,eAAe;IAkBvB;;;;;;;;OAQG;IACH,OAAO,CAAC,mBAAmB;IAkB3B;;;;;;;;;;;;OAYG;IACH,OAAO,CAAC,iBAAiB;IA6BzB;;;;;;;;OAQG;IACH,OAAO,CAAC,gBAAgB;IAsDxB;;;;OAIG;IACH,OAAO,CAAC,4BAA4B;IAYpC;;;;;;;;OAQG;IACH,OAAO,CAAC,mBAAmB;IAqB3B;;;;;;OAMG;IACH,GAAG,IAAI,MAAM;CA6BhB;AAoDD;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAC3B,UAAU,EAAE,WAAW,EACvB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,GACb,MAAM,CAsCR;AAKD;;;;;;GAMG;AACH,wBAAgB,UAAU,CACtB,UAAU,EAAE,MAAM,EAClB,IAAI,GAAE,YAAkC,GACzC,MAAM,CAER;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAAC,aAAa,EAAE,MAAM,GAAG,MAAM,CAEjE;AAED;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAE3D;AAED;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAEnD;AAGD,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAE/D,OAAO,EACH,eAAe,EACf,gBAAgB,EAChB,aAAa,GAChB,MAAM,uBAAuB,CAAC"}
-511
View File
@@ -1,511 +0,0 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.fromCodePoint = exports.replaceCodePoint = exports.decodeCodePoint = exports.xmlDecodeTree = exports.htmlDecodeTree = exports.EntityDecoder = exports.DecodingMode = exports.BinTrieFlags = void 0;
exports.determineBranch = determineBranch;
exports.decodeHTML = decodeHTML;
exports.decodeHTMLAttribute = decodeHTMLAttribute;
exports.decodeHTMLStrict = decodeHTMLStrict;
exports.decodeXML = decodeXML;
const decode_data_html_js_1 = require("./generated/decode-data-html.js");
const decode_data_xml_js_1 = require("./generated/decode-data-xml.js");
const decode_codepoint_js_1 = require("./decode-codepoint.js");
var CharCodes;
(function (CharCodes) {
CharCodes[CharCodes["NUM"] = 35] = "NUM";
CharCodes[CharCodes["SEMI"] = 59] = "SEMI";
CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS";
CharCodes[CharCodes["ZERO"] = 48] = "ZERO";
CharCodes[CharCodes["NINE"] = 57] = "NINE";
CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A";
CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F";
CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X";
CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z";
CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A";
CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F";
CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
})(CharCodes || (CharCodes = {}));
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
const TO_LOWER_BIT = 32;
var BinTrieFlags;
(function (BinTrieFlags) {
BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
})(BinTrieFlags || (exports.BinTrieFlags = BinTrieFlags = {}));
function isNumber(code) {
return code >= CharCodes.ZERO && code <= CharCodes.NINE;
}
function isHexadecimalCharacter(code) {
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F));
}
function isAsciiAlphaNumeric(code) {
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) ||
isNumber(code));
}
/**
* Checks if the given character is a valid end character for an entity in an attribute.
*
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
*/
function isEntityInAttributeInvalidEnd(code) {
return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
}
var EntityDecoderState;
(function (EntityDecoderState) {
EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart";
EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart";
EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal";
EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex";
EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity";
})(EntityDecoderState || (EntityDecoderState = {}));
var DecodingMode;
(function (DecodingMode) {
/** Entities in text nodes that can end with any character. */
DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy";
/** Only allow entities terminated with a semicolon. */
DecodingMode[DecodingMode["Strict"] = 1] = "Strict";
/** Entities in attributes have limitations on ending characters. */
DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute";
})(DecodingMode || (exports.DecodingMode = DecodingMode = {}));
/**
* Token decoder with support of writing partial entities.
*/
class EntityDecoder {
constructor(
/** The tree used to decode entities. */
decodeTree,
/**
* The function that is called when a codepoint is decoded.
*
* For multi-byte named entities, this will be called multiple times,
* with the second codepoint, and the same `consumed` value.
*
* @param codepoint The decoded codepoint.
* @param consumed The number of bytes consumed by the decoder.
*/
emitCodePoint,
/** An object that is used to produce errors. */
errors) {
this.decodeTree = decodeTree;
this.emitCodePoint = emitCodePoint;
this.errors = errors;
/** The current state of the decoder. */
this.state = EntityDecoderState.EntityStart;
/** Characters that were consumed while parsing an entity. */
this.consumed = 1;
/**
* The result of the entity.
*
* Either the result index of a numeric entity, or the codepoint of a
* numeric entity.
*/
this.result = 0;
/** The current index in the decode tree. */
this.treeIndex = 0;
/** The number of characters that were consumed in excess. */
this.excess = 1;
/** The mode in which the decoder is operating. */
this.decodeMode = DecodingMode.Strict;
}
/** Resets the instance to make it reusable. */
startEntity(decodeMode) {
this.decodeMode = decodeMode;
this.state = EntityDecoderState.EntityStart;
this.result = 0;
this.treeIndex = 0;
this.excess = 1;
this.consumed = 1;
}
/**
* Write an entity to the decoder. This can be called multiple times with partial entities.
* If the entity is incomplete, the decoder will return -1.
*
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
* entity is incomplete, and resume when the next string is written.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
write(input, offset) {
switch (this.state) {
case EntityDecoderState.EntityStart: {
if (input.charCodeAt(offset) === CharCodes.NUM) {
this.state = EntityDecoderState.NumericStart;
this.consumed += 1;
return this.stateNumericStart(input, offset + 1);
}
this.state = EntityDecoderState.NamedEntity;
return this.stateNamedEntity(input, offset);
}
case EntityDecoderState.NumericStart: {
return this.stateNumericStart(input, offset);
}
case EntityDecoderState.NumericDecimal: {
return this.stateNumericDecimal(input, offset);
}
case EntityDecoderState.NumericHex: {
return this.stateNumericHex(input, offset);
}
case EntityDecoderState.NamedEntity: {
return this.stateNamedEntity(input, offset);
}
}
}
/**
* Switches between the numeric decimal and hexadecimal states.
*
* Equivalent to the `Numeric character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNumericStart(input, offset) {
if (offset >= input.length) {
return -1;
}
if ((input.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) {
this.state = EntityDecoderState.NumericHex;
this.consumed += 1;
return this.stateNumericHex(input, offset + 1);
}
this.state = EntityDecoderState.NumericDecimal;
return this.stateNumericDecimal(input, offset);
}
addToNumericResult(input, start, end, base) {
if (start !== end) {
const digitCount = end - start;
this.result =
this.result * Math.pow(base, digitCount) +
Number.parseInt(input.substr(start, digitCount), base);
this.consumed += digitCount;
}
}
/**
* Parses a hexadecimal numeric entity.
*
* Equivalent to the `Hexademical character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNumericHex(input, offset) {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char) || isHexadecimalCharacter(char)) {
offset += 1;
}
else {
this.addToNumericResult(input, startIndex, offset, 16);
return this.emitNumericEntity(char, 3);
}
}
this.addToNumericResult(input, startIndex, offset, 16);
return -1;
}
/**
* Parses a decimal numeric entity.
*
* Equivalent to the `Decimal character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNumericDecimal(input, offset) {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char)) {
offset += 1;
}
else {
this.addToNumericResult(input, startIndex, offset, 10);
return this.emitNumericEntity(char, 2);
}
}
this.addToNumericResult(input, startIndex, offset, 10);
return -1;
}
/**
* Validate and emit a numeric entity.
*
* Implements the logic from the `Hexademical character reference start
* state` and `Numeric character reference end state` in the HTML spec.
*
* @param lastCp The last code point of the entity. Used to see if the
* entity was terminated with a semicolon.
* @param expectedLength The minimum number of characters that should be
* consumed. Used to validate that at least one digit
* was consumed.
* @returns The number of characters that were consumed.
*/
emitNumericEntity(lastCp, expectedLength) {
var _a;
// Ensure we consumed at least one digit.
if (this.consumed <= expectedLength) {
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
return 0;
}
// Figure out if this is a legit end of the entity
if (lastCp === CharCodes.SEMI) {
this.consumed += 1;
}
else if (this.decodeMode === DecodingMode.Strict) {
return 0;
}
this.emitCodePoint((0, decode_codepoint_js_1.replaceCodePoint)(this.result), this.consumed);
if (this.errors) {
if (lastCp !== CharCodes.SEMI) {
this.errors.missingSemicolonAfterCharacterReference();
}
this.errors.validateNumericCharacterReference(this.result);
}
return this.consumed;
}
/**
* Parses a named entity.
*
* Equivalent to the `Named character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNamedEntity(input, offset) {
const { decodeTree } = this;
let current = decodeTree[this.treeIndex];
// The mask is the number of bytes of the value, including the current byte.
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
for (; offset < input.length; offset++, this.excess++) {
const char = input.charCodeAt(offset);
this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
if (this.treeIndex < 0) {
return this.result === 0 ||
// If we are parsing an attribute
(this.decodeMode === DecodingMode.Attribute &&
// We shouldn't have consumed any characters after the entity,
(valueLength === 0 ||
// And there should be no invalid characters.
isEntityInAttributeInvalidEnd(char)))
? 0
: this.emitNotTerminatedNamedEntity();
}
current = decodeTree[this.treeIndex];
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
// If the branch is a value, store it and continue
if (valueLength !== 0) {
// If the entity is terminated by a semicolon, we are done.
if (char === CharCodes.SEMI) {
return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
}
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
if (this.decodeMode !== DecodingMode.Strict) {
this.result = this.treeIndex;
this.consumed += this.excess;
this.excess = 0;
}
}
}
return -1;
}
/**
* Emit a named entity that was not terminated with a semicolon.
*
* @returns The number of characters consumed.
*/
emitNotTerminatedNamedEntity() {
var _a;
const { result, decodeTree } = this;
const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
this.emitNamedEntityData(result, valueLength, this.consumed);
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
return this.consumed;
}
/**
* Emit a named entity.
*
* @param result The index of the entity in the decode tree.
* @param valueLength The number of bytes in the entity.
* @param consumed The number of characters consumed.
*
* @returns The number of characters consumed.
*/
emitNamedEntityData(result, valueLength, consumed) {
const { decodeTree } = this;
this.emitCodePoint(valueLength === 1
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
: decodeTree[result + 1], consumed);
if (valueLength === 3) {
// For multi-byte values, we need to emit the second byte.
this.emitCodePoint(decodeTree[result + 2], consumed);
}
return consumed;
}
/**
* Signal to the parser that the end of the input was reached.
*
* Remaining data will be emitted and relevant errors will be produced.
*
* @returns The number of characters consumed.
*/
end() {
var _a;
switch (this.state) {
case EntityDecoderState.NamedEntity: {
// Emit a named entity if we have one.
return this.result !== 0 &&
(this.decodeMode !== DecodingMode.Attribute ||
this.result === this.treeIndex)
? this.emitNotTerminatedNamedEntity()
: 0;
}
// Otherwise, emit a numeric entity if we have one.
case EntityDecoderState.NumericDecimal: {
return this.emitNumericEntity(0, 2);
}
case EntityDecoderState.NumericHex: {
return this.emitNumericEntity(0, 3);
}
case EntityDecoderState.NumericStart: {
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
return 0;
}
case EntityDecoderState.EntityStart: {
// Return 0 if we have no entity.
return 0;
}
}
}
}
exports.EntityDecoder = EntityDecoder;
/**
* Creates a function that decodes entities in a string.
*
* @param decodeTree The decode tree.
* @returns A function that decodes entities in a string.
*/
function getDecoder(decodeTree) {
let returnValue = "";
const decoder = new EntityDecoder(decodeTree, (data) => (returnValue += (0, decode_codepoint_js_1.fromCodePoint)(data)));
return function decodeWithTrie(input, decodeMode) {
let lastIndex = 0;
let offset = 0;
while ((offset = input.indexOf("&", offset)) >= 0) {
returnValue += input.slice(lastIndex, offset);
decoder.startEntity(decodeMode);
const length = decoder.write(input,
// Skip the "&"
offset + 1);
if (length < 0) {
lastIndex = offset + decoder.end();
break;
}
lastIndex = offset + length;
// If `length` is 0, skip the current `&` and continue.
offset = length === 0 ? lastIndex + 1 : lastIndex;
}
const result = returnValue + input.slice(lastIndex);
// Make sure we don't keep a reference to the final string.
returnValue = "";
return result;
};
}
/**
* Determines the branch of the current node that is taken given the current
* character. This function is used to traverse the trie.
*
* @param decodeTree The trie.
* @param current The current node.
* @param nodeIdx The index right after the current node and its value.
* @param char The current character.
* @returns The index of the next node, or -1 if no branch is taken.
*/
function determineBranch(decodeTree, current, nodeIndex, char) {
const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
const jumpOffset = current & BinTrieFlags.JUMP_TABLE;
// Case 1: Single branch encoded in jump offset
if (branchCount === 0) {
return jumpOffset !== 0 && char === jumpOffset ? nodeIndex : -1;
}
// Case 2: Multiple branches encoded in jump table
if (jumpOffset) {
const value = char - jumpOffset;
return value < 0 || value >= branchCount
? -1
: decodeTree[nodeIndex + value] - 1;
}
// Case 3: Multiple branches encoded in dictionary
// Binary search for the character.
let lo = nodeIndex;
let hi = lo + branchCount - 1;
while (lo <= hi) {
const mid = (lo + hi) >>> 1;
const midValue = decodeTree[mid];
if (midValue < char) {
lo = mid + 1;
}
else if (midValue > char) {
hi = mid - 1;
}
else {
return decodeTree[mid + branchCount];
}
}
return -1;
}
const htmlDecoder = /* #__PURE__ */ getDecoder(decode_data_html_js_1.htmlDecodeTree);
const xmlDecoder = /* #__PURE__ */ getDecoder(decode_data_xml_js_1.xmlDecodeTree);
/**
* Decodes an HTML string.
*
* @param htmlString The string to decode.
* @param mode The decoding mode.
* @returns The decoded string.
*/
function decodeHTML(htmlString, mode = DecodingMode.Legacy) {
return htmlDecoder(htmlString, mode);
}
/**
* Decodes an HTML string in an attribute.
*
* @param htmlAttribute The string to decode.
* @returns The decoded string.
*/
function decodeHTMLAttribute(htmlAttribute) {
return htmlDecoder(htmlAttribute, DecodingMode.Attribute);
}
/**
* Decodes an HTML string, requiring all entities to be terminated by a semicolon.
*
* @param htmlString The string to decode.
* @returns The decoded string.
*/
function decodeHTMLStrict(htmlString) {
return htmlDecoder(htmlString, DecodingMode.Strict);
}
/**
* Decodes an XML string, requiring all entities to be terminated by a semicolon.
*
* @param xmlString The string to decode.
* @returns The decoded string.
*/
function decodeXML(xmlString) {
return xmlDecoder(xmlString, DecodingMode.Strict);
}
// Re-export for use by eg. htmlparser2
var decode_data_html_js_2 = require("./generated/decode-data-html.js");
Object.defineProperty(exports, "htmlDecodeTree", { enumerable: true, get: function () { return decode_data_html_js_2.htmlDecodeTree; } });
var decode_data_xml_js_2 = require("./generated/decode-data-xml.js");
Object.defineProperty(exports, "xmlDecodeTree", { enumerable: true, get: function () { return decode_data_xml_js_2.xmlDecodeTree; } });
var decode_codepoint_js_2 = require("./decode-codepoint.js");
Object.defineProperty(exports, "decodeCodePoint", { enumerable: true, get: function () { return decode_codepoint_js_2.decodeCodePoint; } });
Object.defineProperty(exports, "replaceCodePoint", { enumerable: true, get: function () { return decode_codepoint_js_2.replaceCodePoint; } });
Object.defineProperty(exports, "fromCodePoint", { enumerable: true, get: function () { return decode_codepoint_js_2.fromCodePoint; } });
//# sourceMappingURL=decode.js.map
File diff suppressed because one or more lines are too long
-22
View File
@@ -1,22 +0,0 @@
/**
* Encodes all characters in the input using HTML entities. This includes
* characters that are valid ASCII characters in HTML documents, such as `#`.
*
* To get a more compact output, consider using the `encodeNonAsciiHTML`
* function, which will only encode characters that are not valid in HTML
* documents, as well as non-ASCII characters.
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
*/
export declare function encodeHTML(input: string): string;
/**
* Encodes all non-ASCII characters, as well as characters not valid in HTML
* documents using HTML entities. This function will not encode characters that
* are valid in HTML documents, such as `#`.
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
*/
export declare function encodeNonAsciiHTML(input: string): string;
//# sourceMappingURL=encode.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"encode.d.ts","sourceRoot":"","sources":["../../src/encode.ts"],"names":[],"mappings":"AAKA;;;;;;;;;;GAUG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAEhD;AACD;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAExD"}
-73
View File
@@ -1,73 +0,0 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.encodeHTML = encodeHTML;
exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
const encode_html_js_1 = require("./generated/encode-html.js");
const escape_js_1 = require("./escape.js");
const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
/**
* Encodes all characters in the input using HTML entities. This includes
* characters that are valid ASCII characters in HTML documents, such as `#`.
*
* To get a more compact output, consider using the `encodeNonAsciiHTML`
* function, which will only encode characters that are not valid in HTML
* documents, as well as non-ASCII characters.
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
*/
function encodeHTML(input) {
return encodeHTMLTrieRe(htmlReplacer, input);
}
/**
* Encodes all non-ASCII characters, as well as characters not valid in HTML
* documents using HTML entities. This function will not encode characters that
* are valid in HTML documents, such as `#`.
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
*/
function encodeNonAsciiHTML(input) {
return encodeHTMLTrieRe(escape_js_1.xmlReplacer, input);
}
function encodeHTMLTrieRe(regExp, input) {
let returnValue = "";
let lastIndex = 0;
let match;
while ((match = regExp.exec(input)) !== null) {
const { index } = match;
returnValue += input.substring(lastIndex, index);
const char = input.charCodeAt(index);
let next = encode_html_js_1.htmlTrie.get(char);
if (typeof next === "object") {
// We are in a branch. Try to match the next char.
if (index + 1 < input.length) {
const nextChar = input.charCodeAt(index + 1);
const value = typeof next.n === "number"
? next.n === nextChar
? next.o
: undefined
: next.n.get(nextChar);
if (value !== undefined) {
returnValue += value;
lastIndex = regExp.lastIndex += 1;
continue;
}
}
next = next.v;
}
// We might have a tree node without a value; skip and use a numeric entity.
if (next === undefined) {
const cp = (0, escape_js_1.getCodePoint)(input, index);
returnValue += `&#x${cp.toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIndex = regExp.lastIndex += Number(cp !== char);
}
else {
returnValue += next;
lastIndex = index + 1;
}
}
return returnValue + input.substr(lastIndex);
}
//# sourceMappingURL=encode.js.map
@@ -1 +0,0 @@
{"version":3,"file":"encode.js","sourceRoot":"","sources":["../../src/encode.ts"],"names":[],"mappings":";;AAgBA,gCAEC;AASD,gDAEC;AA7BD,+DAAsD;AACtD,2CAAwD;AAExD,MAAM,YAAY,GAAG,sCAAsC,CAAC;AAE5D;;;;;;;;;;GAUG;AACH,SAAgB,UAAU,CAAC,KAAa;IACpC,OAAO,gBAAgB,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;AACjD,CAAC;AACD;;;;;;;GAOG;AACH,SAAgB,kBAAkB,CAAC,KAAa;IAC5C,OAAO,gBAAgB,CAAC,uBAAW,EAAE,KAAK,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,gBAAgB,CAAC,MAAc,EAAE,KAAa;IACnD,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC3C,MAAM,EAAE,KAAK,EAAE,GAAG,KAAK,CAAC;QACxB,WAAW,IAAI,KAAK,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QACjD,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACrC,IAAI,IAAI,GAAG,yBAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAE9B,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC3B,kDAAkD;YAClD,IAAI,KAAK,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;gBAC3B,MAAM,QAAQ,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;gBAC7C,MAAM,KAAK,GACP,OAAO,IAAI,CAAC,CAAC,KAAK,QAAQ;oBACtB,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,QAAQ;wBACjB,CAAC,CAAC,IAAI,CAAC,CAAC;wBACR,CAAC,CAAC,SAAS;oBACf,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAE/B,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;oBACtB,WAAW,IAAI,KAAK,CAAC;oBACrB,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC;oBAClC,SAAS;gBACb,CAAC;YACL,CAAC;YAED,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,4EAA4E;QAC5E,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,GAAG,IAAA,wBAAY,EAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACtC,WAAW,IAAI,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC;YACxC,4CAA4C;YAC5C,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;QACxD,CAAC;aAAM,CAAC;YACJ,WAAW,IAAI,IAAI,CAAC;YACpB,SAAS,GAAG,KAAK,GAAG,CAAC,CAAC;QAC1B,CAAC;IACL,CAAC;IAED,OAAO,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;AACjD,CAAC"}
-43
View File
@@ -1,43 +0,0 @@
export declare const xmlReplacer: RegExp;
export declare const getCodePoint: (c: string, index: number) => number;
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities.
*
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `&#xfc;`) will be used.
*/
export declare function encodeXML(input: string): string;
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using numeric hexadecimal reference (eg. `&#xfc;`).
*
* Have a look at `escapeUTF8` if you want a more concise output at the expense
* of reduced transportability.
*
* @param data String to escape.
*/
export declare const escape: typeof encodeXML;
/**
* Encodes all characters not valid in XML documents using XML entities.
*
* Note that the output will be character-set dependent.
*
* @param data String to escape.
*/
export declare const escapeUTF8: (data: string) => string;
/**
* Encodes all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export declare const escapeAttribute: (data: string) => string;
/**
* Encodes all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export declare const escapeText: (data: string) => string;
//# sourceMappingURL=escape.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"escape.d.ts","sourceRoot":"","sources":["../../src/escape.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,WAAW,EAAE,MAAiC,CAAC;AAW5D,eAAO,MAAM,YAAY,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,MAWoB,CAAC;AAE9E;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CA0B/C;AAED;;;;;;;;GAQG;AACH,eAAO,MAAM,MAAM,EAAE,OAAO,SAAqB,CAAC;AAqClD;;;;;;GAMG;AACH,eAAO,MAAM,UAAU,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAG1C,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,eAAe,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAQ3C,CAAC;AAEN;;;;;GAKG;AACH,eAAO,MAAM,UAAU,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAQ1C,CAAC"}
-121
View File
@@ -1,121 +0,0 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.escapeText = exports.escapeAttribute = exports.escapeUTF8 = exports.escape = exports.getCodePoint = exports.xmlReplacer = void 0;
exports.encodeXML = encodeXML;
exports.xmlReplacer = /["$&'<>\u0080-\uFFFF]/g;
const xmlCodeMap = new Map([
[34, "&quot;"],
[38, "&amp;"],
[39, "&apos;"],
[60, "&lt;"],
[62, "&gt;"],
]);
// For compatibility with node < 4, we wrap `codePointAt`
exports.getCodePoint =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
String.prototype.codePointAt == null
? (c, index) => (c.charCodeAt(index) & 64512) === 55296
? (c.charCodeAt(index) - 55296) * 1024 +
c.charCodeAt(index + 1) -
56320 +
65536
: c.charCodeAt(index)
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
(input, index) => input.codePointAt(index);
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities.
*
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `&#xfc;`) will be used.
*/
function encodeXML(input) {
let returnValue = "";
let lastIndex = 0;
let match;
while ((match = exports.xmlReplacer.exec(input)) !== null) {
const { index } = match;
const char = input.charCodeAt(index);
const next = xmlCodeMap.get(char);
if (next === undefined) {
returnValue += `${input.substring(lastIndex, index)}&#x${(0, exports.getCodePoint)(input, index).toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIndex = exports.xmlReplacer.lastIndex += Number((char & 64512) === 55296);
}
else {
returnValue += input.substring(lastIndex, index) + next;
lastIndex = index + 1;
}
}
return returnValue + input.substr(lastIndex);
}
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using numeric hexadecimal reference (eg. `&#xfc;`).
*
* Have a look at `escapeUTF8` if you want a more concise output at the expense
* of reduced transportability.
*
* @param data String to escape.
*/
exports.escape = encodeXML;
/**
* Creates a function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*
* @param regex Regular expression to match characters to escape.
* @param map Map of characters to escape to their entities.
*
* @returns Function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*/
function getEscaper(regex, map) {
return function escape(data) {
let match;
let lastIndex = 0;
let result = "";
while ((match = regex.exec(data))) {
if (lastIndex !== match.index) {
result += data.substring(lastIndex, match.index);
}
// We know that this character will be in the map.
result += map.get(match[0].charCodeAt(0));
// Every match will be of length 1
lastIndex = match.index + 1;
}
return result + data.substring(lastIndex);
};
}
/**
* Encodes all characters not valid in XML documents using XML entities.
*
* Note that the output will be character-set dependent.
*
* @param data String to escape.
*/
exports.escapeUTF8 = getEscaper(/["&'<>]/g, xmlCodeMap);
/**
* Encodes all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
exports.escapeAttribute =
/* #__PURE__ */ getEscaper(/["&\u00A0]/g, new Map([
[34, "&quot;"],
[38, "&amp;"],
[160, "&nbsp;"],
]));
/**
* Encodes all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
exports.escapeText = getEscaper(/[&<>\u00A0]/g, new Map([
[38, "&amp;"],
[60, "&lt;"],
[62, "&gt;"],
[160, "&nbsp;"],
]));
//# sourceMappingURL=escape.js.map
@@ -1 +0,0 @@
{"version":3,"file":"escape.js","sourceRoot":"","sources":["../../src/escape.ts"],"names":[],"mappings":";;;AA+BA,8BA0BC;AAzDY,QAAA,WAAW,GAAW,wBAAwB,CAAC;AAE5D,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACvB,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,EAAE,MAAM,CAAC;CACf,CAAC,CAAC;AAEH,yDAAyD;AAC5C,QAAA,YAAY;AACrB,uEAAuE;AACvE,MAAM,CAAC,SAAS,CAAC,WAAW,IAAI,IAAI;IAChC,CAAC,CAAC,CAAC,CAAS,EAAE,KAAa,EAAU,EAAE,CACjC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,KAAO,CAAC,KAAK,KAAO;QACvC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,KAAO,CAAC,GAAG,IAAM;YACxC,CAAC,CAAC,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC;YACvB,KAAO;YACP,KAAS;QACX,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC;IAC/B,CAAC,CAAC,uEAAuE;QACvE,CAAC,KAAa,EAAE,KAAa,EAAU,EAAE,CAAC,KAAK,CAAC,WAAW,CAAC,KAAK,CAAE,CAAC;AAE9E;;;;;;GAMG;AACH,SAAgB,SAAS,CAAC,KAAa;IACnC,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,mBAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAChD,MAAM,EAAE,KAAK,EAAE,GAAG,KAAK,CAAC;QACxB,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACrC,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAElC,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACrB,WAAW,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,MAAM,IAAA,oBAAY,EACjE,KAAK,EACL,KAAK,CACR,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC;YAClB,4CAA4C;YAC5C,SAAS,GAAG,mBAAW,CAAC,SAAS,IAAI,MAAM,CACvC,CAAC,IAAI,GAAG,KAAO,CAAC,KAAK,KAAO,CAC/B,CAAC;QACN,CAAC;aAAM,CAAC;YACJ,WAAW,IAAI,KAAK,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,IAAI,CAAC;YACxD,SAAS,GAAG,KAAK,GAAG,CAAC,CAAC;QAC1B,CAAC;IACL,CAAC;IAED,OAAO,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;AACjD,CAAC;AAED;;;;;;;;GAQG;AACU,QAAA,MAAM,GAAqB,SAAS,CAAC;AAElD;;;;;;;;;GASG;AACH,SAAS,UAAU,CACf,KAAa,EACb,GAAwB;IAExB,OAAO,SAAS,MAAM,CAAC,IAAY;QAC/B,IAAI,KAAK,CAAC;QACV,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YAChC,IAAI,SAAS,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;gBAC5B,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;YACrD,CAAC;YAED,kDAAkD;YAClD,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAE,CAAC;YAE3C,kCAAkC;YAClC,SAAS,GAAG,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC;QAChC,CAAC;QAED,OAAO,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;IAC9C,CAAC,CAAC;AACN,CAAC;AAED;;;;;;GAMG;AACU,QAAA,UAAU,GAA6C,UAAU,CAC1E,UAAU,EACV,UAAU,CACb,CAAC;AAEF;;;;;GAKG;AACU,QAAA,eAAe;AACxB,eAAe,CAAC,UAAU,CACtB,aAAa,EACb,IAAI,GAAG,CAAC;IACJ,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,GAAG,EAAE,QAAQ,CAAC;CAClB,CAAC,CACL,CAAC;AAEN;;;;;GAKG;AACU,QAAA,UAAU,GAA6C,UAAU,CAC1E,cAAc,EACd,IAAI,GAAG,CAAC;IACJ,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,GAAG,EAAE,QAAQ,CAAC;CAClB,CAAC,CACL,CAAC"}
@@ -1,2 +0,0 @@
export declare const htmlDecodeTree: Uint16Array;
//# sourceMappingURL=decode-data-html.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-data-html.d.ts","sourceRoot":"","sources":["../../../src/generated/decode-data-html.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,cAAc,EAAE,WAK5B,CAAC"}
File diff suppressed because one or more lines are too long
@@ -1 +0,0 @@
{"version":3,"file":"decode-data-html.js","sourceRoot":"","sources":["../../../src/generated/decode-data-html.ts"],"names":[],"mappings":";AAAA,8CAA8C;;;AAEjC,QAAA,cAAc,GAAgC,IAAI,WAAW;AACtE,kBAAkB;AAClB,eAAe,CAAC,268CAA268C;KACt78C,KAAK,CAAC,EAAE,CAAC;KACT,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CACnC,CAAC"}
@@ -1,2 +0,0 @@
export declare const xmlDecodeTree: Uint16Array;
//# sourceMappingURL=decode-data-xml.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-data-xml.d.ts","sourceRoot":"","sources":["../../../src/generated/decode-data-xml.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,aAAa,EAAE,WAK3B,CAAC"}
@@ -1,10 +0,0 @@
"use strict";
// Generated using scripts/write-decode-map.ts
Object.defineProperty(exports, "__esModule", { value: true });
exports.xmlDecodeTree = void 0;
exports.xmlDecodeTree = new Uint16Array(
// prettier-ignore
/* #__PURE__ */ "\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022"
.split("")
.map((c) => c.charCodeAt(0)));
//# sourceMappingURL=decode-data-xml.js.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-data-xml.js","sourceRoot":"","sources":["../../../src/generated/decode-data-xml.ts"],"names":[],"mappings":";AAAA,8CAA8C;;;AAEjC,QAAA,aAAa,GAAgC,IAAI,WAAW;AACrE,kBAAkB;AAClB,eAAe,CAAC,uFAAuF;KAClG,KAAK,CAAC,EAAE,CAAC;KACT,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CACnC,CAAC"}
@@ -1,8 +0,0 @@
type EncodeTrieNode = string | {
v?: string;
n: number | Map<number, EncodeTrieNode>;
o?: string;
};
export declare const htmlTrie: Map<number, EncodeTrieNode>;
export {};
//# sourceMappingURL=encode-html.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"encode-html.d.ts","sourceRoot":"","sources":["../../../src/generated/encode-html.ts"],"names":[],"mappings":"AAEA,KAAK,cAAc,GACb,MAAM,GACN;IAAE,CAAC,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;IAAC,CAAC,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAY1E,eAAO,MAAM,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAC,cAAc,CAAwhuB,CAAC"}
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
-96
View File
@@ -1,96 +0,0 @@
import { DecodingMode } from "./decode.js";
/** The level of entities to support. */
export declare enum EntityLevel {
/** Support only XML entities. */
XML = 0,
/** Support HTML entities, which are a superset of XML entities. */
HTML = 1
}
export declare enum EncodingMode {
/**
* The output is UTF-8 encoded. Only characters that need escaping within
* XML will be escaped.
*/
UTF8 = 0,
/**
* The output consists only of ASCII characters. Characters that need
* escaping within HTML, and characters that aren't ASCII characters will
* be escaped.
*/
ASCII = 1,
/**
* Encode all characters that have an equivalent entity, as well as all
* characters that are not ASCII characters.
*/
Extensive = 2,
/**
* Encode all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*/
Attribute = 3,
/**
* Encode all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*/
Text = 4
}
export interface DecodingOptions {
/**
* The level of entities to support.
* @default {@link EntityLevel.XML}
*/
level?: EntityLevel;
/**
* Decoding mode. If `Legacy`, will support legacy entities not terminated
* with a semicolon (`;`).
*
* Always `Strict` for XML. For HTML, set this to `true` if you are parsing
* an attribute value.
*
* The deprecated `decodeStrict` function defaults this to `Strict`.
*
* @default {@link DecodingMode.Legacy}
*/
mode?: DecodingMode | undefined;
}
/**
* Decodes a string with entities.
*
* @param input String to decode.
* @param options Decoding options.
*/
export declare function decode(input: string, options?: DecodingOptions | EntityLevel): string;
/**
* Decodes a string with entities. Does not allow missing trailing semicolons for entities.
*
* @param input String to decode.
* @param options Decoding options.
* @deprecated Use `decode` with the `mode` set to `Strict`.
*/
export declare function decodeStrict(input: string, options?: DecodingOptions | EntityLevel): string;
/**
* Options for `encode`.
*/
export interface EncodingOptions {
/**
* The level of entities to support.
* @default {@link EntityLevel.XML}
*/
level?: EntityLevel;
/**
* Output format.
* @default {@link EncodingMode.Extensive}
*/
mode?: EncodingMode;
}
/**
* Encodes a string with entities.
*
* @param input String to encode.
* @param options Encoding options.
*/
export declare function encode(input: string, options?: EncodingOptions | EntityLevel): string;
export { encodeXML, escape, escapeUTF8, escapeAttribute, escapeText, } from "./escape.js";
export { encodeHTML, encodeNonAsciiHTML, encodeHTML as encodeHTML4, encodeHTML as encodeHTML5, } from "./encode.js";
export { EntityDecoder, DecodingMode, decodeXML, decodeHTML, decodeHTMLStrict, decodeHTMLAttribute, decodeHTML as decodeHTML4, decodeHTML as decodeHTML5, decodeHTMLStrict as decodeHTML4Strict, decodeHTMLStrict as decodeHTML5Strict, decodeXML as decodeXMLStrict, } from "./decode.js";
//# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,aAAa,CAAC;AASlE,wCAAwC;AACxC,oBAAY,WAAW;IACnB,iCAAiC;IACjC,GAAG,IAAI;IACP,mEAAmE;IACnE,IAAI,IAAI;CACX;AAED,oBAAY,YAAY;IACpB;;;OAGG;IACH,IAAI,IAAA;IACJ;;;;OAIG;IACH,KAAK,IAAA;IACL;;;OAGG;IACH,SAAS,IAAA;IACT;;;OAGG;IACH,SAAS,IAAA;IACT;;;OAGG;IACH,IAAI,IAAA;CACP;AAED,MAAM,WAAW,eAAe;IAC5B;;;OAGG;IACH,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB;;;;;;;;;;OAUG;IACH,IAAI,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;CACnC;AAED;;;;;GAKG;AACH,wBAAgB,MAAM,CAClB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,eAAe,GAAG,WAA6B,GACzD,MAAM,CASR;AAED;;;;;;GAMG;AACH,wBAAgB,YAAY,CACxB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,eAAe,GAAG,WAA6B,GACzD,MAAM,CAMR;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B;;;OAGG;IACH,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB;;;OAGG;IACH,IAAI,CAAC,EAAE,YAAY,CAAC;CACvB;AAED;;;;;GAKG;AACH,wBAAgB,MAAM,CAClB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,eAAe,GAAG,WAA6B,GACzD,MAAM,CA2BR;AAED,OAAO,EACH,SAAS,EACT,MAAM,EACN,UAAU,EACV,eAAe,EACf,UAAU,GACb,MAAM,aAAa,CAAC;AAErB,OAAO,EACH,UAAU,EACV,kBAAkB,EAElB,UAAU,IAAI,WAAW,EACzB,UAAU,IAAI,WAAW,GAC5B,MAAM,aAAa,CAAC;AAErB,OAAO,EACH,aAAa,EACb,YAAY,EACZ,SAAS,EACT,UAAU,EACV,gBAAgB,EAChB,mBAAmB,EAEnB,UAAU,IAAI,WAAW,EACzB,UAAU,IAAI,WAAW,EACzB,gBAAgB,IAAI,iBAAiB,EACrC,gBAAgB,IAAI,iBAAiB,EACrC,SAAS,IAAI,eAAe,GAC/B,MAAM,aAAa,CAAC"}
-131
View File
@@ -1,131 +0,0 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.decodeXMLStrict = exports.decodeHTML5Strict = exports.decodeHTML4Strict = exports.decodeHTML5 = exports.decodeHTML4 = exports.decodeHTMLAttribute = exports.decodeHTMLStrict = exports.decodeHTML = exports.decodeXML = exports.DecodingMode = exports.EntityDecoder = exports.encodeHTML5 = exports.encodeHTML4 = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.escapeText = exports.escapeAttribute = exports.escapeUTF8 = exports.encodeXML = exports.EncodingMode = exports.EntityLevel = void 0;
exports.decode = decode;
exports.decodeStrict = decodeStrict;
exports.encode = encode;
const decode_js_1 = require("./decode.js");
const encode_js_1 = require("./encode.js");
const escape_js_1 = require("./escape.js");
/** The level of entities to support. */
var EntityLevel;
(function (EntityLevel) {
/** Support only XML entities. */
EntityLevel[EntityLevel["XML"] = 0] = "XML";
/** Support HTML entities, which are a superset of XML entities. */
EntityLevel[EntityLevel["HTML"] = 1] = "HTML";
})(EntityLevel || (exports.EntityLevel = EntityLevel = {}));
var EncodingMode;
(function (EncodingMode) {
/**
* The output is UTF-8 encoded. Only characters that need escaping within
* XML will be escaped.
*/
EncodingMode[EncodingMode["UTF8"] = 0] = "UTF8";
/**
* The output consists only of ASCII characters. Characters that need
* escaping within HTML, and characters that aren't ASCII characters will
* be escaped.
*/
EncodingMode[EncodingMode["ASCII"] = 1] = "ASCII";
/**
* Encode all characters that have an equivalent entity, as well as all
* characters that are not ASCII characters.
*/
EncodingMode[EncodingMode["Extensive"] = 2] = "Extensive";
/**
* Encode all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*/
EncodingMode[EncodingMode["Attribute"] = 3] = "Attribute";
/**
* Encode all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*/
EncodingMode[EncodingMode["Text"] = 4] = "Text";
})(EncodingMode || (exports.EncodingMode = EncodingMode = {}));
/**
* Decodes a string with entities.
*
* @param input String to decode.
* @param options Decoding options.
*/
function decode(input, options = EntityLevel.XML) {
const level = typeof options === "number" ? options : options.level;
if (level === EntityLevel.HTML) {
const mode = typeof options === "object" ? options.mode : undefined;
return (0, decode_js_1.decodeHTML)(input, mode);
}
return (0, decode_js_1.decodeXML)(input);
}
/**
* Decodes a string with entities. Does not allow missing trailing semicolons for entities.
*
* @param input String to decode.
* @param options Decoding options.
* @deprecated Use `decode` with the `mode` set to `Strict`.
*/
function decodeStrict(input, options = EntityLevel.XML) {
var _a;
const normalizedOptions = typeof options === "number" ? { level: options } : options;
(_a = normalizedOptions.mode) !== null && _a !== void 0 ? _a : (normalizedOptions.mode = decode_js_1.DecodingMode.Strict);
return decode(input, normalizedOptions);
}
/**
* Encodes a string with entities.
*
* @param input String to encode.
* @param options Encoding options.
*/
function encode(input, options = EntityLevel.XML) {
const { mode = EncodingMode.Extensive, level = EntityLevel.XML } = typeof options === "number" ? { level: options } : options;
switch (mode) {
case EncodingMode.UTF8: {
return (0, escape_js_1.escapeUTF8)(input);
}
case EncodingMode.Attribute: {
return (0, escape_js_1.escapeAttribute)(input);
}
case EncodingMode.Text: {
return (0, escape_js_1.escapeText)(input);
}
case EncodingMode.ASCII: {
return level === EntityLevel.HTML
? (0, encode_js_1.encodeNonAsciiHTML)(input)
: (0, escape_js_1.encodeXML)(input);
}
// eslint-disable-next-line unicorn/no-useless-switch-case
case EncodingMode.Extensive:
default: {
return level === EntityLevel.HTML
? (0, encode_js_1.encodeHTML)(input)
: (0, escape_js_1.encodeXML)(input);
}
}
}
var escape_js_2 = require("./escape.js");
Object.defineProperty(exports, "encodeXML", { enumerable: true, get: function () { return escape_js_2.encodeXML; } });
Object.defineProperty(exports, "escape", { enumerable: true, get: function () { return escape_js_2.escape; } });
Object.defineProperty(exports, "escapeUTF8", { enumerable: true, get: function () { return escape_js_2.escapeUTF8; } });
Object.defineProperty(exports, "escapeAttribute", { enumerable: true, get: function () { return escape_js_2.escapeAttribute; } });
Object.defineProperty(exports, "escapeText", { enumerable: true, get: function () { return escape_js_2.escapeText; } });
var encode_js_2 = require("./encode.js");
Object.defineProperty(exports, "encodeHTML", { enumerable: true, get: function () { return encode_js_2.encodeHTML; } });
Object.defineProperty(exports, "encodeNonAsciiHTML", { enumerable: true, get: function () { return encode_js_2.encodeNonAsciiHTML; } });
// Legacy aliases (deprecated)
Object.defineProperty(exports, "encodeHTML4", { enumerable: true, get: function () { return encode_js_2.encodeHTML; } });
Object.defineProperty(exports, "encodeHTML5", { enumerable: true, get: function () { return encode_js_2.encodeHTML; } });
var decode_js_2 = require("./decode.js");
Object.defineProperty(exports, "EntityDecoder", { enumerable: true, get: function () { return decode_js_2.EntityDecoder; } });
Object.defineProperty(exports, "DecodingMode", { enumerable: true, get: function () { return decode_js_2.DecodingMode; } });
Object.defineProperty(exports, "decodeXML", { enumerable: true, get: function () { return decode_js_2.decodeXML; } });
Object.defineProperty(exports, "decodeHTML", { enumerable: true, get: function () { return decode_js_2.decodeHTML; } });
Object.defineProperty(exports, "decodeHTMLStrict", { enumerable: true, get: function () { return decode_js_2.decodeHTMLStrict; } });
Object.defineProperty(exports, "decodeHTMLAttribute", { enumerable: true, get: function () { return decode_js_2.decodeHTMLAttribute; } });
// Legacy aliases (deprecated)
Object.defineProperty(exports, "decodeHTML4", { enumerable: true, get: function () { return decode_js_2.decodeHTML; } });
Object.defineProperty(exports, "decodeHTML5", { enumerable: true, get: function () { return decode_js_2.decodeHTML; } });
Object.defineProperty(exports, "decodeHTML4Strict", { enumerable: true, get: function () { return decode_js_2.decodeHTMLStrict; } });
Object.defineProperty(exports, "decodeHTML5Strict", { enumerable: true, get: function () { return decode_js_2.decodeHTMLStrict; } });
Object.defineProperty(exports, "decodeXMLStrict", { enumerable: true, get: function () { return decode_js_2.decodeXML; } });
//# sourceMappingURL=index.js.map
-1
View File
@@ -1 +0,0 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;AAwEA,wBAYC;AASD,oCASC;AAwBD,wBA8BC;AA5JD,2CAAkE;AAClE,2CAA6D;AAC7D,2CAKqB;AAErB,wCAAwC;AACxC,IAAY,WAKX;AALD,WAAY,WAAW;IACnB,iCAAiC;IACjC,2CAAO,CAAA;IACP,mEAAmE;IACnE,6CAAQ,CAAA;AACZ,CAAC,EALW,WAAW,2BAAX,WAAW,QAKtB;AAED,IAAY,YA2BX;AA3BD,WAAY,YAAY;IACpB;;;OAGG;IACH,+CAAI,CAAA;IACJ;;;;OAIG;IACH,iDAAK,CAAA;IACL;;;OAGG;IACH,yDAAS,CAAA;IACT;;;OAGG;IACH,yDAAS,CAAA;IACT;;;OAGG;IACH,+CAAI,CAAA;AACR,CAAC,EA3BW,YAAY,4BAAZ,YAAY,QA2BvB;AAsBD;;;;;GAKG;AACH,SAAgB,MAAM,CAClB,KAAa,EACb,UAAyC,WAAW,CAAC,GAAG;IAExD,MAAM,KAAK,GAAG,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAEpE,IAAI,KAAK,KAAK,WAAW,CAAC,IAAI,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;QACpE,OAAO,IAAA,sBAAU,EAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,IAAA,qBAAS,EAAC,KAAK,CAAC,CAAC;AAC5B,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,YAAY,CACxB,KAAa,EACb,UAAyC,WAAW,CAAC,GAAG;;IAExD,MAAM,iBAAiB,GACnB,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC/D,MAAA,iBAAiB,CAAC,IAAI,oCAAtB,iBAAiB,CAAC,IAAI,GAAK,wBAAY,CAAC,MAAM,EAAC;IAE/C,OAAO,MAAM,CAAC,KAAK,EAAE,iBAAiB,CAAC,CAAC;AAC5C,CAAC;AAkBD;;;;;GAKG;AACH,SAAgB,MAAM,CAClB,KAAa,EACb,UAAyC,WAAW,CAAC,GAAG;IAExD,MAAM,EAAE,IAAI,GAAG,YAAY,CAAC,SAAS,EAAE,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,GAC5D,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAE/D,QAAQ,IAAI,EAAE,CAAC;QACX,KAAK,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;YACrB,OAAO,IAAA,sBAAU,EAAC,KAAK,CAAC,CAAC;QAC7B,CAAC;QACD,KAAK,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;YAC1B,OAAO,IAAA,2BAAe,EAAC,KAAK,CAAC,CAAC;QAClC,CAAC;QACD,KAAK,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;YACrB,OAAO,IAAA,sBAAU,EAAC,KAAK,CAAC,CAAC;QAC7B,CAAC;QACD,KAAK,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC;YACtB,OAAO,KAAK,KAAK,WAAW,CAAC,IAAI;gBAC7B,CAAC,CAAC,IAAA,8BAAkB,EAAC,KAAK,CAAC;gBAC3B,CAAC,CAAC,IAAA,qBAAS,EAAC,KAAK,CAAC,CAAC;QAC3B,CAAC;QACD,0DAA0D;QAC1D,KAAK,YAAY,CAAC,SAAS,CAAC;QAC5B,OAAO,CAAC,CAAC,CAAC;YACN,OAAO,KAAK,KAAK,WAAW,CAAC,IAAI;gBAC7B,CAAC,CAAC,IAAA,sBAAU,EAAC,KAAK,CAAC;gBACnB,CAAC,CAAC,IAAA,qBAAS,EAAC,KAAK,CAAC,CAAC;QAC3B,CAAC;IACL,CAAC;AACL,CAAC;AAED,yCAMqB;AALjB,sGAAA,SAAS,OAAA;AACT,mGAAA,MAAM,OAAA;AACN,uGAAA,UAAU,OAAA;AACV,4GAAA,eAAe,OAAA;AACf,uGAAA,UAAU,OAAA;AAGd,yCAMqB;AALjB,uGAAA,UAAU,OAAA;AACV,+GAAA,kBAAkB,OAAA;AAClB,8BAA8B;AAC9B,wGAAA,UAAU,OAAe;AACzB,wGAAA,UAAU,OAAe;AAG7B,yCAaqB;AAZjB,0GAAA,aAAa,OAAA;AACb,yGAAA,YAAY,OAAA;AACZ,sGAAA,SAAS,OAAA;AACT,uGAAA,UAAU,OAAA;AACV,6GAAA,gBAAgB,OAAA;AAChB,gHAAA,mBAAmB,OAAA;AACnB,8BAA8B;AAC9B,wGAAA,UAAU,OAAe;AACzB,wGAAA,UAAU,OAAe;AACzB,8GAAA,gBAAgB,OAAqB;AACrC,8GAAA,gBAAgB,OAAqB;AACrC,4GAAA,SAAS,OAAmB"}
-3
View File
@@ -1,3 +0,0 @@
{
"type": "commonjs"
}
@@ -1,19 +0,0 @@
/**
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
*/
export declare const fromCodePoint: (...codePoints: number[]) => string;
/**
* Replace the given code point with a replacement character if it is a
* surrogate or is outside the valid range. Otherwise return the code
* point unchanged.
*/
export declare function replaceCodePoint(codePoint: number): number;
/**
* Replace the code point if relevant, then convert it to a string.
*
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
* @param codePoint The code point to decode.
* @returns The decoded code point.
*/
export declare function decodeCodePoint(codePoint: number): string;
//# sourceMappingURL=decode-codepoint.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-codepoint.d.ts","sourceRoot":"","sources":["../../src/decode-codepoint.ts"],"names":[],"mappings":"AAkCA;;GAEG;AACH,eAAO,MAAM,aAAa,EAAE,CAAC,GAAG,UAAU,EAAE,MAAM,EAAE,KAAK,MAgBpD,CAAC;AAEN;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAS1D;AAED;;;;;;GAMG;AACH,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAEzD"}
-72
View File
@@ -1,72 +0,0 @@
// Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
var _a;
const decodeMap = new Map([
[0, 65533],
// C1 Unicode control character reference replacements
[128, 8364],
[130, 8218],
[131, 402],
[132, 8222],
[133, 8230],
[134, 8224],
[135, 8225],
[136, 710],
[137, 8240],
[138, 352],
[139, 8249],
[140, 338],
[142, 381],
[145, 8216],
[146, 8217],
[147, 8220],
[148, 8221],
[149, 8226],
[150, 8211],
[151, 8212],
[152, 732],
[153, 8482],
[154, 353],
[155, 8250],
[156, 339],
[158, 382],
[159, 376],
]);
/**
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
*/
export const fromCodePoint =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
(_a = String.fromCodePoint) !== null && _a !== void 0 ? _a : function (codePoint) {
let output = "";
if (codePoint > 65535) {
codePoint -= 65536;
output += String.fromCharCode(((codePoint >>> 10) & 1023) | 55296);
codePoint = 56320 | (codePoint & 1023);
}
output += String.fromCharCode(codePoint);
return output;
};
/**
* Replace the given code point with a replacement character if it is a
* surrogate or is outside the valid range. Otherwise return the code
* point unchanged.
*/
export function replaceCodePoint(codePoint) {
var _a;
if ((codePoint >= 55296 && codePoint <= 57343) ||
codePoint > 1114111) {
return 65533;
}
return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint;
}
/**
* Replace the code point if relevant, then convert it to a string.
*
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
* @param codePoint The code point to decode.
* @returns The decoded code point.
*/
export function decodeCodePoint(codePoint) {
return fromCodePoint(replaceCodePoint(codePoint));
}
//# sourceMappingURL=decode-codepoint.js.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-codepoint.js","sourceRoot":"","sources":["../../src/decode-codepoint.ts"],"names":[],"mappings":"AAAA,qHAAqH;;AAErH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACtB,CAAC,CAAC,EAAE,KAAM,CAAC;IACX,sDAAsD;IACtD,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,IAAI,CAAC;IACX,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;IACV,CAAC,GAAG,EAAE,GAAG,CAAC;CACb,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa;AACtB,8GAA8G;AAC9G,MAAA,MAAM,CAAC,aAAa,mCACpB,UAAU,SAAiB;IACvB,IAAI,MAAM,GAAG,EAAE,CAAC;IAEhB,IAAI,SAAS,GAAG,KAAO,EAAE,CAAC;QACtB,SAAS,IAAI,KAAS,CAAC;QACvB,MAAM,IAAI,MAAM,CAAC,YAAY,CACzB,CAAC,CAAC,SAAS,KAAK,EAAE,CAAC,GAAG,IAAM,CAAC,GAAG,KAAO,CAC1C,CAAC;QACF,SAAS,GAAG,KAAO,GAAG,CAAC,SAAS,GAAG,IAAM,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,IAAI,MAAM,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;IACzC,OAAO,MAAM,CAAC;AAClB,CAAC,CAAC;AAEN;;;;GAIG;AACH,MAAM,UAAU,gBAAgB,CAAC,SAAiB;;IAC9C,IACI,CAAC,SAAS,IAAI,KAAO,IAAI,SAAS,IAAI,KAAO,CAAC;QAC9C,SAAS,GAAG,OAAU,EACxB,CAAC;QACC,OAAO,KAAO,CAAC;IACnB,CAAC;IAED,OAAO,MAAA,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,mCAAI,SAAS,CAAC;AACjD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,SAAiB;IAC7C,OAAO,aAAa,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC,CAAC;AACtD,CAAC"}
-209
View File
@@ -1,209 +0,0 @@
export declare enum BinTrieFlags {
VALUE_LENGTH = 49152,
BRANCH_LENGTH = 16256,
JUMP_TABLE = 127
}
export declare enum DecodingMode {
/** Entities in text nodes that can end with any character. */
Legacy = 0,
/** Only allow entities terminated with a semicolon. */
Strict = 1,
/** Entities in attributes have limitations on ending characters. */
Attribute = 2
}
/**
* Producers for character reference errors as defined in the HTML spec.
*/
export interface EntityErrorProducer {
missingSemicolonAfterCharacterReference(): void;
absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void;
validateNumericCharacterReference(code: number): void;
}
/**
* Token decoder with support of writing partial entities.
*/
export declare class EntityDecoder {
/** The tree used to decode entities. */
private readonly decodeTree;
/**
* The function that is called when a codepoint is decoded.
*
* For multi-byte named entities, this will be called multiple times,
* with the second codepoint, and the same `consumed` value.
*
* @param codepoint The decoded codepoint.
* @param consumed The number of bytes consumed by the decoder.
*/
private readonly emitCodePoint;
/** An object that is used to produce errors. */
private readonly errors?;
constructor(
/** The tree used to decode entities. */
decodeTree: Uint16Array,
/**
* The function that is called when a codepoint is decoded.
*
* For multi-byte named entities, this will be called multiple times,
* with the second codepoint, and the same `consumed` value.
*
* @param codepoint The decoded codepoint.
* @param consumed The number of bytes consumed by the decoder.
*/
emitCodePoint: (cp: number, consumed: number) => void,
/** An object that is used to produce errors. */
errors?: EntityErrorProducer | undefined);
/** The current state of the decoder. */
private state;
/** Characters that were consumed while parsing an entity. */
private consumed;
/**
* The result of the entity.
*
* Either the result index of a numeric entity, or the codepoint of a
* numeric entity.
*/
private result;
/** The current index in the decode tree. */
private treeIndex;
/** The number of characters that were consumed in excess. */
private excess;
/** The mode in which the decoder is operating. */
private decodeMode;
/** Resets the instance to make it reusable. */
startEntity(decodeMode: DecodingMode): void;
/**
* Write an entity to the decoder. This can be called multiple times with partial entities.
* If the entity is incomplete, the decoder will return -1.
*
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
* entity is incomplete, and resume when the next string is written.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
write(input: string, offset: number): number;
/**
* Switches between the numeric decimal and hexadecimal states.
*
* Equivalent to the `Numeric character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericStart;
private addToNumericResult;
/**
* Parses a hexadecimal numeric entity.
*
* Equivalent to the `Hexademical character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericHex;
/**
* Parses a decimal numeric entity.
*
* Equivalent to the `Decimal character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericDecimal;
/**
* Validate and emit a numeric entity.
*
* Implements the logic from the `Hexademical character reference start
* state` and `Numeric character reference end state` in the HTML spec.
*
* @param lastCp The last code point of the entity. Used to see if the
* entity was terminated with a semicolon.
* @param expectedLength The minimum number of characters that should be
* consumed. Used to validate that at least one digit
* was consumed.
* @returns The number of characters that were consumed.
*/
private emitNumericEntity;
/**
* Parses a named entity.
*
* Equivalent to the `Named character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNamedEntity;
/**
* Emit a named entity that was not terminated with a semicolon.
*
* @returns The number of characters consumed.
*/
private emitNotTerminatedNamedEntity;
/**
* Emit a named entity.
*
* @param result The index of the entity in the decode tree.
* @param valueLength The number of bytes in the entity.
* @param consumed The number of characters consumed.
*
* @returns The number of characters consumed.
*/
private emitNamedEntityData;
/**
* Signal to the parser that the end of the input was reached.
*
* Remaining data will be emitted and relevant errors will be produced.
*
* @returns The number of characters consumed.
*/
end(): number;
}
/**
* Determines the branch of the current node that is taken given the current
* character. This function is used to traverse the trie.
*
* @param decodeTree The trie.
* @param current The current node.
* @param nodeIdx The index right after the current node and its value.
* @param char The current character.
* @returns The index of the next node, or -1 if no branch is taken.
*/
export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIndex: number, char: number): number;
/**
* Decodes an HTML string.
*
* @param htmlString The string to decode.
* @param mode The decoding mode.
* @returns The decoded string.
*/
export declare function decodeHTML(htmlString: string, mode?: DecodingMode): string;
/**
* Decodes an HTML string in an attribute.
*
* @param htmlAttribute The string to decode.
* @returns The decoded string.
*/
export declare function decodeHTMLAttribute(htmlAttribute: string): string;
/**
* Decodes an HTML string, requiring all entities to be terminated by a semicolon.
*
* @param htmlString The string to decode.
* @returns The decoded string.
*/
export declare function decodeHTMLStrict(htmlString: string): string;
/**
* Decodes an XML string, requiring all entities to be terminated by a semicolon.
*
* @param xmlString The string to decode.
* @returns The decoded string.
*/
export declare function decodeXML(xmlString: string): string;
export { htmlDecodeTree } from "./generated/decode-data-html.js";
export { xmlDecodeTree } from "./generated/decode-data-xml.js";
export { decodeCodePoint, replaceCodePoint, fromCodePoint, } from "./decode-codepoint.js";
//# sourceMappingURL=decode.d.ts.map
-1
View File
@@ -1 +0,0 @@
{"version":3,"file":"decode.d.ts","sourceRoot":"","sources":["../../src/decode.ts"],"names":[],"mappings":"AAsBA,oBAAY,YAAY;IACpB,YAAY,QAAwB;IACpC,aAAa,QAAwB;IACrC,UAAU,MAAwB;CACrC;AAuCD,oBAAY,YAAY;IACpB,8DAA8D;IAC9D,MAAM,IAAI;IACV,uDAAuD;IACvD,MAAM,IAAI;IACV,oEAAoE;IACpE,SAAS,IAAI;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAChC,uCAAuC,IAAI,IAAI,CAAC;IAChD,0CAA0C,CACtC,kBAAkB,EAAE,MAAM,GAC3B,IAAI,CAAC;IACR,iCAAiC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;CACzD;AAED;;GAEG;AACH,qBAAa,aAAa;IAElB,wCAAwC;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU;IAC3B;;;;;;;;OAQG;IACH,OAAO,CAAC,QAAQ,CAAC,aAAa;IAC9B,gDAAgD;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;;IAbxB,wCAAwC;IACvB,UAAU,EAAE,WAAW;IACxC;;;;;;;;OAQG;IACc,aAAa,EAAE,CAAC,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,KAAK,IAAI;IACtE,gDAAgD;IAC/B,MAAM,CAAC,EAAE,mBAAmB,GAAG,SAAS;IAG7D,wCAAwC;IACxC,OAAO,CAAC,KAAK,CAAkC;IAC/C,6DAA6D;IAC7D,OAAO,CAAC,QAAQ,CAAK;IACrB;;;;;OAKG;IACH,OAAO,CAAC,MAAM,CAAK;IAEnB,4CAA4C;IAC5C,OAAO,CAAC,SAAS,CAAK;IACtB,6DAA6D;IAC7D,OAAO,CAAC,MAAM,CAAK;IACnB,kDAAkD;IAClD,OAAO,CAAC,UAAU,CAAuB;IAEzC,+CAA+C;IAC/C,WAAW,CAAC,UAAU,EAAE,YAAY,GAAG,IAAI;IAS3C;;;;;;;;;;OAUG;IACH,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM;IA8B5C;;;;;;;;OAQG;IACH,OAAO,CAAC,iBAAiB;IAezB,OAAO,CAAC,kBAAkB;IAe1B;;;;;;;;OAQG;IACH,OAAO,CAAC,eAAe;IAkBvB;;;;;;;;OAQG;IACH,OAAO,CAAC,mBAAmB;IAkB3B;;;;;;;;;;;;OAYG;IACH,OAAO,CAAC,iBAAiB;IA6BzB;;;;;;;;OAQG;IACH,OAAO,CAAC,gBAAgB;IAsDxB;;;;OAIG;IACH,OAAO,CAAC,4BAA4B;IAYpC;;;;;;;;OAQG;IACH,OAAO,CAAC,mBAAmB;IAqB3B;;;;;;OAMG;IACH,GAAG,IAAI,MAAM;CA6BhB;AAoDD;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAC3B,UAAU,EAAE,WAAW,EACvB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,GACb,MAAM,CAsCR;AAKD;;;;;;GAMG;AACH,wBAAgB,UAAU,CACtB,UAAU,EAAE,MAAM,EAClB,IAAI,GAAE,YAAkC,GACzC,MAAM,CAER;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAAC,aAAa,EAAE,MAAM,GAAG,MAAM,CAEjE;AAED;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAE3D;AAED;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAEnD;AAGD,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAE/D,OAAO,EACH,eAAe,EACf,gBAAgB,EAChB,aAAa,GAChB,MAAM,uBAAuB,CAAC"}
-497
View File
@@ -1,497 +0,0 @@
import { htmlDecodeTree } from "./generated/decode-data-html.js";
import { xmlDecodeTree } from "./generated/decode-data-xml.js";
import { replaceCodePoint, fromCodePoint } from "./decode-codepoint.js";
var CharCodes;
(function (CharCodes) {
CharCodes[CharCodes["NUM"] = 35] = "NUM";
CharCodes[CharCodes["SEMI"] = 59] = "SEMI";
CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS";
CharCodes[CharCodes["ZERO"] = 48] = "ZERO";
CharCodes[CharCodes["NINE"] = 57] = "NINE";
CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A";
CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F";
CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X";
CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z";
CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A";
CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F";
CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
})(CharCodes || (CharCodes = {}));
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
const TO_LOWER_BIT = 32;
export var BinTrieFlags;
(function (BinTrieFlags) {
BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
})(BinTrieFlags || (BinTrieFlags = {}));
function isNumber(code) {
return code >= CharCodes.ZERO && code <= CharCodes.NINE;
}
function isHexadecimalCharacter(code) {
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F));
}
function isAsciiAlphaNumeric(code) {
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) ||
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) ||
isNumber(code));
}
/**
* Checks if the given character is a valid end character for an entity in an attribute.
*
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
*/
function isEntityInAttributeInvalidEnd(code) {
return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
}
var EntityDecoderState;
(function (EntityDecoderState) {
EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart";
EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart";
EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal";
EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex";
EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity";
})(EntityDecoderState || (EntityDecoderState = {}));
export var DecodingMode;
(function (DecodingMode) {
/** Entities in text nodes that can end with any character. */
DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy";
/** Only allow entities terminated with a semicolon. */
DecodingMode[DecodingMode["Strict"] = 1] = "Strict";
/** Entities in attributes have limitations on ending characters. */
DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute";
})(DecodingMode || (DecodingMode = {}));
/**
* Token decoder with support of writing partial entities.
*/
export class EntityDecoder {
constructor(
/** The tree used to decode entities. */
decodeTree,
/**
* The function that is called when a codepoint is decoded.
*
* For multi-byte named entities, this will be called multiple times,
* with the second codepoint, and the same `consumed` value.
*
* @param codepoint The decoded codepoint.
* @param consumed The number of bytes consumed by the decoder.
*/
emitCodePoint,
/** An object that is used to produce errors. */
errors) {
this.decodeTree = decodeTree;
this.emitCodePoint = emitCodePoint;
this.errors = errors;
/** The current state of the decoder. */
this.state = EntityDecoderState.EntityStart;
/** Characters that were consumed while parsing an entity. */
this.consumed = 1;
/**
* The result of the entity.
*
* Either the result index of a numeric entity, or the codepoint of a
* numeric entity.
*/
this.result = 0;
/** The current index in the decode tree. */
this.treeIndex = 0;
/** The number of characters that were consumed in excess. */
this.excess = 1;
/** The mode in which the decoder is operating. */
this.decodeMode = DecodingMode.Strict;
}
/** Resets the instance to make it reusable. */
startEntity(decodeMode) {
this.decodeMode = decodeMode;
this.state = EntityDecoderState.EntityStart;
this.result = 0;
this.treeIndex = 0;
this.excess = 1;
this.consumed = 1;
}
/**
* Write an entity to the decoder. This can be called multiple times with partial entities.
* If the entity is incomplete, the decoder will return -1.
*
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
* entity is incomplete, and resume when the next string is written.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
write(input, offset) {
switch (this.state) {
case EntityDecoderState.EntityStart: {
if (input.charCodeAt(offset) === CharCodes.NUM) {
this.state = EntityDecoderState.NumericStart;
this.consumed += 1;
return this.stateNumericStart(input, offset + 1);
}
this.state = EntityDecoderState.NamedEntity;
return this.stateNamedEntity(input, offset);
}
case EntityDecoderState.NumericStart: {
return this.stateNumericStart(input, offset);
}
case EntityDecoderState.NumericDecimal: {
return this.stateNumericDecimal(input, offset);
}
case EntityDecoderState.NumericHex: {
return this.stateNumericHex(input, offset);
}
case EntityDecoderState.NamedEntity: {
return this.stateNamedEntity(input, offset);
}
}
}
/**
* Switches between the numeric decimal and hexadecimal states.
*
* Equivalent to the `Numeric character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNumericStart(input, offset) {
if (offset >= input.length) {
return -1;
}
if ((input.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) {
this.state = EntityDecoderState.NumericHex;
this.consumed += 1;
return this.stateNumericHex(input, offset + 1);
}
this.state = EntityDecoderState.NumericDecimal;
return this.stateNumericDecimal(input, offset);
}
addToNumericResult(input, start, end, base) {
if (start !== end) {
const digitCount = end - start;
this.result =
this.result * Math.pow(base, digitCount) +
Number.parseInt(input.substr(start, digitCount), base);
this.consumed += digitCount;
}
}
/**
* Parses a hexadecimal numeric entity.
*
* Equivalent to the `Hexademical character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNumericHex(input, offset) {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char) || isHexadecimalCharacter(char)) {
offset += 1;
}
else {
this.addToNumericResult(input, startIndex, offset, 16);
return this.emitNumericEntity(char, 3);
}
}
this.addToNumericResult(input, startIndex, offset, 16);
return -1;
}
/**
* Parses a decimal numeric entity.
*
* Equivalent to the `Decimal character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNumericDecimal(input, offset) {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char)) {
offset += 1;
}
else {
this.addToNumericResult(input, startIndex, offset, 10);
return this.emitNumericEntity(char, 2);
}
}
this.addToNumericResult(input, startIndex, offset, 10);
return -1;
}
/**
* Validate and emit a numeric entity.
*
* Implements the logic from the `Hexademical character reference start
* state` and `Numeric character reference end state` in the HTML spec.
*
* @param lastCp The last code point of the entity. Used to see if the
* entity was terminated with a semicolon.
* @param expectedLength The minimum number of characters that should be
* consumed. Used to validate that at least one digit
* was consumed.
* @returns The number of characters that were consumed.
*/
emitNumericEntity(lastCp, expectedLength) {
var _a;
// Ensure we consumed at least one digit.
if (this.consumed <= expectedLength) {
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
return 0;
}
// Figure out if this is a legit end of the entity
if (lastCp === CharCodes.SEMI) {
this.consumed += 1;
}
else if (this.decodeMode === DecodingMode.Strict) {
return 0;
}
this.emitCodePoint(replaceCodePoint(this.result), this.consumed);
if (this.errors) {
if (lastCp !== CharCodes.SEMI) {
this.errors.missingSemicolonAfterCharacterReference();
}
this.errors.validateNumericCharacterReference(this.result);
}
return this.consumed;
}
/**
* Parses a named entity.
*
* Equivalent to the `Named character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
stateNamedEntity(input, offset) {
const { decodeTree } = this;
let current = decodeTree[this.treeIndex];
// The mask is the number of bytes of the value, including the current byte.
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
for (; offset < input.length; offset++, this.excess++) {
const char = input.charCodeAt(offset);
this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
if (this.treeIndex < 0) {
return this.result === 0 ||
// If we are parsing an attribute
(this.decodeMode === DecodingMode.Attribute &&
// We shouldn't have consumed any characters after the entity,
(valueLength === 0 ||
// And there should be no invalid characters.
isEntityInAttributeInvalidEnd(char)))
? 0
: this.emitNotTerminatedNamedEntity();
}
current = decodeTree[this.treeIndex];
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
// If the branch is a value, store it and continue
if (valueLength !== 0) {
// If the entity is terminated by a semicolon, we are done.
if (char === CharCodes.SEMI) {
return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
}
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
if (this.decodeMode !== DecodingMode.Strict) {
this.result = this.treeIndex;
this.consumed += this.excess;
this.excess = 0;
}
}
}
return -1;
}
/**
* Emit a named entity that was not terminated with a semicolon.
*
* @returns The number of characters consumed.
*/
emitNotTerminatedNamedEntity() {
var _a;
const { result, decodeTree } = this;
const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
this.emitNamedEntityData(result, valueLength, this.consumed);
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
return this.consumed;
}
/**
* Emit a named entity.
*
* @param result The index of the entity in the decode tree.
* @param valueLength The number of bytes in the entity.
* @param consumed The number of characters consumed.
*
* @returns The number of characters consumed.
*/
emitNamedEntityData(result, valueLength, consumed) {
const { decodeTree } = this;
this.emitCodePoint(valueLength === 1
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
: decodeTree[result + 1], consumed);
if (valueLength === 3) {
// For multi-byte values, we need to emit the second byte.
this.emitCodePoint(decodeTree[result + 2], consumed);
}
return consumed;
}
/**
* Signal to the parser that the end of the input was reached.
*
* Remaining data will be emitted and relevant errors will be produced.
*
* @returns The number of characters consumed.
*/
end() {
var _a;
switch (this.state) {
case EntityDecoderState.NamedEntity: {
// Emit a named entity if we have one.
return this.result !== 0 &&
(this.decodeMode !== DecodingMode.Attribute ||
this.result === this.treeIndex)
? this.emitNotTerminatedNamedEntity()
: 0;
}
// Otherwise, emit a numeric entity if we have one.
case EntityDecoderState.NumericDecimal: {
return this.emitNumericEntity(0, 2);
}
case EntityDecoderState.NumericHex: {
return this.emitNumericEntity(0, 3);
}
case EntityDecoderState.NumericStart: {
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
return 0;
}
case EntityDecoderState.EntityStart: {
// Return 0 if we have no entity.
return 0;
}
}
}
}
/**
* Creates a function that decodes entities in a string.
*
* @param decodeTree The decode tree.
* @returns A function that decodes entities in a string.
*/
function getDecoder(decodeTree) {
let returnValue = "";
const decoder = new EntityDecoder(decodeTree, (data) => (returnValue += fromCodePoint(data)));
return function decodeWithTrie(input, decodeMode) {
let lastIndex = 0;
let offset = 0;
while ((offset = input.indexOf("&", offset)) >= 0) {
returnValue += input.slice(lastIndex, offset);
decoder.startEntity(decodeMode);
const length = decoder.write(input,
// Skip the "&"
offset + 1);
if (length < 0) {
lastIndex = offset + decoder.end();
break;
}
lastIndex = offset + length;
// If `length` is 0, skip the current `&` and continue.
offset = length === 0 ? lastIndex + 1 : lastIndex;
}
const result = returnValue + input.slice(lastIndex);
// Make sure we don't keep a reference to the final string.
returnValue = "";
return result;
};
}
/**
* Determines the branch of the current node that is taken given the current
* character. This function is used to traverse the trie.
*
* @param decodeTree The trie.
* @param current The current node.
* @param nodeIdx The index right after the current node and its value.
* @param char The current character.
* @returns The index of the next node, or -1 if no branch is taken.
*/
export function determineBranch(decodeTree, current, nodeIndex, char) {
const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
const jumpOffset = current & BinTrieFlags.JUMP_TABLE;
// Case 1: Single branch encoded in jump offset
if (branchCount === 0) {
return jumpOffset !== 0 && char === jumpOffset ? nodeIndex : -1;
}
// Case 2: Multiple branches encoded in jump table
if (jumpOffset) {
const value = char - jumpOffset;
return value < 0 || value >= branchCount
? -1
: decodeTree[nodeIndex + value] - 1;
}
// Case 3: Multiple branches encoded in dictionary
// Binary search for the character.
let lo = nodeIndex;
let hi = lo + branchCount - 1;
while (lo <= hi) {
const mid = (lo + hi) >>> 1;
const midValue = decodeTree[mid];
if (midValue < char) {
lo = mid + 1;
}
else if (midValue > char) {
hi = mid - 1;
}
else {
return decodeTree[mid + branchCount];
}
}
return -1;
}
const htmlDecoder = /* #__PURE__ */ getDecoder(htmlDecodeTree);
const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree);
/**
* Decodes an HTML string.
*
* @param htmlString The string to decode.
* @param mode The decoding mode.
* @returns The decoded string.
*/
export function decodeHTML(htmlString, mode = DecodingMode.Legacy) {
return htmlDecoder(htmlString, mode);
}
/**
* Decodes an HTML string in an attribute.
*
* @param htmlAttribute The string to decode.
* @returns The decoded string.
*/
export function decodeHTMLAttribute(htmlAttribute) {
return htmlDecoder(htmlAttribute, DecodingMode.Attribute);
}
/**
* Decodes an HTML string, requiring all entities to be terminated by a semicolon.
*
* @param htmlString The string to decode.
* @returns The decoded string.
*/
export function decodeHTMLStrict(htmlString) {
return htmlDecoder(htmlString, DecodingMode.Strict);
}
/**
* Decodes an XML string, requiring all entities to be terminated by a semicolon.
*
* @param xmlString The string to decode.
* @returns The decoded string.
*/
export function decodeXML(xmlString) {
return xmlDecoder(xmlString, DecodingMode.Strict);
}
// Re-export for use by eg. htmlparser2
export { htmlDecodeTree } from "./generated/decode-data-html.js";
export { xmlDecodeTree } from "./generated/decode-data-xml.js";
export { decodeCodePoint, replaceCodePoint, fromCodePoint, } from "./decode-codepoint.js";
//# sourceMappingURL=decode.js.map
File diff suppressed because one or more lines are too long
-22
View File
@@ -1,22 +0,0 @@
/**
* Encodes all characters in the input using HTML entities. This includes
* characters that are valid ASCII characters in HTML documents, such as `#`.
*
* To get a more compact output, consider using the `encodeNonAsciiHTML`
* function, which will only encode characters that are not valid in HTML
* documents, as well as non-ASCII characters.
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
*/
export declare function encodeHTML(input: string): string;
/**
* Encodes all non-ASCII characters, as well as characters not valid in HTML
* documents using HTML entities. This function will not encode characters that
* are valid in HTML documents, such as `#`.
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
*/
export declare function encodeNonAsciiHTML(input: string): string;
//# sourceMappingURL=encode.d.ts.map
-1
View File
@@ -1 +0,0 @@
{"version":3,"file":"encode.d.ts","sourceRoot":"","sources":["../../src/encode.ts"],"names":[],"mappings":"AAKA;;;;;;;;;;GAUG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAEhD;AACD;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAExD"}
-69
View File
@@ -1,69 +0,0 @@
import { htmlTrie } from "./generated/encode-html.js";
import { xmlReplacer, getCodePoint } from "./escape.js";
const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
/**
* Encodes all characters in the input using HTML entities. This includes
* characters that are valid ASCII characters in HTML documents, such as `#`.
*
* To get a more compact output, consider using the `encodeNonAsciiHTML`
* function, which will only encode characters that are not valid in HTML
* documents, as well as non-ASCII characters.
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
*/
export function encodeHTML(input) {
return encodeHTMLTrieRe(htmlReplacer, input);
}
/**
* Encodes all non-ASCII characters, as well as characters not valid in HTML
* documents using HTML entities. This function will not encode characters that
* are valid in HTML documents, such as `#`.
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
*/
export function encodeNonAsciiHTML(input) {
return encodeHTMLTrieRe(xmlReplacer, input);
}
function encodeHTMLTrieRe(regExp, input) {
let returnValue = "";
let lastIndex = 0;
let match;
while ((match = regExp.exec(input)) !== null) {
const { index } = match;
returnValue += input.substring(lastIndex, index);
const char = input.charCodeAt(index);
let next = htmlTrie.get(char);
if (typeof next === "object") {
// We are in a branch. Try to match the next char.
if (index + 1 < input.length) {
const nextChar = input.charCodeAt(index + 1);
const value = typeof next.n === "number"
? next.n === nextChar
? next.o
: undefined
: next.n.get(nextChar);
if (value !== undefined) {
returnValue += value;
lastIndex = regExp.lastIndex += 1;
continue;
}
}
next = next.v;
}
// We might have a tree node without a value; skip and use a numeric entity.
if (next === undefined) {
const cp = getCodePoint(input, index);
returnValue += `&#x${cp.toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIndex = regExp.lastIndex += Number(cp !== char);
}
else {
returnValue += next;
lastIndex = index + 1;
}
}
return returnValue + input.substr(lastIndex);
}
//# sourceMappingURL=encode.js.map
-1
View File
@@ -1 +0,0 @@
{"version":3,"file":"encode.js","sourceRoot":"","sources":["../../src/encode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAExD,MAAM,YAAY,GAAG,sCAAsC,CAAC;AAE5D;;;;;;;;;;GAUG;AACH,MAAM,UAAU,UAAU,CAAC,KAAa;IACpC,OAAO,gBAAgB,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;AACjD,CAAC;AACD;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAAC,KAAa;IAC5C,OAAO,gBAAgB,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,gBAAgB,CAAC,MAAc,EAAE,KAAa;IACnD,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC3C,MAAM,EAAE,KAAK,EAAE,GAAG,KAAK,CAAC;QACxB,WAAW,IAAI,KAAK,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QACjD,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACrC,IAAI,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAE9B,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC3B,kDAAkD;YAClD,IAAI,KAAK,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;gBAC3B,MAAM,QAAQ,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;gBAC7C,MAAM,KAAK,GACP,OAAO,IAAI,CAAC,CAAC,KAAK,QAAQ;oBACtB,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,QAAQ;wBACjB,CAAC,CAAC,IAAI,CAAC,CAAC;wBACR,CAAC,CAAC,SAAS;oBACf,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAE/B,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;oBACtB,WAAW,IAAI,KAAK,CAAC;oBACrB,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,CAAC,CAAC;oBAClC,SAAS;gBACb,CAAC;YACL,CAAC;YAED,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,4EAA4E;QAC5E,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,GAAG,YAAY,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACtC,WAAW,IAAI,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC;YACxC,4CAA4C;YAC5C,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;QACxD,CAAC;aAAM,CAAC;YACJ,WAAW,IAAI,IAAI,CAAC;YACpB,SAAS,GAAG,KAAK,GAAG,CAAC,CAAC;QAC1B,CAAC;IACL,CAAC;IAED,OAAO,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;AACjD,CAAC"}
-43
View File
@@ -1,43 +0,0 @@
export declare const xmlReplacer: RegExp;
export declare const getCodePoint: (c: string, index: number) => number;
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities.
*
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `&#xfc;`) will be used.
*/
export declare function encodeXML(input: string): string;
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using numeric hexadecimal reference (eg. `&#xfc;`).
*
* Have a look at `escapeUTF8` if you want a more concise output at the expense
* of reduced transportability.
*
* @param data String to escape.
*/
export declare const escape: typeof encodeXML;
/**
* Encodes all characters not valid in XML documents using XML entities.
*
* Note that the output will be character-set dependent.
*
* @param data String to escape.
*/
export declare const escapeUTF8: (data: string) => string;
/**
* Encodes all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export declare const escapeAttribute: (data: string) => string;
/**
* Encodes all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export declare const escapeText: (data: string) => string;
//# sourceMappingURL=escape.d.ts.map
-1
View File
@@ -1 +0,0 @@
{"version":3,"file":"escape.d.ts","sourceRoot":"","sources":["../../src/escape.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,WAAW,EAAE,MAAiC,CAAC;AAW5D,eAAO,MAAM,YAAY,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,MAWoB,CAAC;AAE9E;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CA0B/C;AAED;;;;;;;;GAQG;AACH,eAAO,MAAM,MAAM,EAAE,OAAO,SAAqB,CAAC;AAqClD;;;;;;GAMG;AACH,eAAO,MAAM,UAAU,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAG1C,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,eAAe,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAQ3C,CAAC;AAEN;;;;;GAKG;AACH,eAAO,MAAM,UAAU,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAQ1C,CAAC"}
-117
View File
@@ -1,117 +0,0 @@
export const xmlReplacer = /["$&'<>\u0080-\uFFFF]/g;
const xmlCodeMap = new Map([
[34, "&quot;"],
[38, "&amp;"],
[39, "&apos;"],
[60, "&lt;"],
[62, "&gt;"],
]);
// For compatibility with node < 4, we wrap `codePointAt`
export const getCodePoint =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
String.prototype.codePointAt == null
? (c, index) => (c.charCodeAt(index) & 64512) === 55296
? (c.charCodeAt(index) - 55296) * 1024 +
c.charCodeAt(index + 1) -
56320 +
65536
: c.charCodeAt(index)
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
(input, index) => input.codePointAt(index);
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities.
*
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `&#xfc;`) will be used.
*/
export function encodeXML(input) {
let returnValue = "";
let lastIndex = 0;
let match;
while ((match = xmlReplacer.exec(input)) !== null) {
const { index } = match;
const char = input.charCodeAt(index);
const next = xmlCodeMap.get(char);
if (next === undefined) {
returnValue += `${input.substring(lastIndex, index)}&#x${getCodePoint(input, index).toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIndex = xmlReplacer.lastIndex += Number((char & 64512) === 55296);
}
else {
returnValue += input.substring(lastIndex, index) + next;
lastIndex = index + 1;
}
}
return returnValue + input.substr(lastIndex);
}
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using numeric hexadecimal reference (eg. `&#xfc;`).
*
* Have a look at `escapeUTF8` if you want a more concise output at the expense
* of reduced transportability.
*
* @param data String to escape.
*/
export const escape = encodeXML;
/**
* Creates a function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*
* @param regex Regular expression to match characters to escape.
* @param map Map of characters to escape to their entities.
*
* @returns Function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*/
function getEscaper(regex, map) {
return function escape(data) {
let match;
let lastIndex = 0;
let result = "";
while ((match = regex.exec(data))) {
if (lastIndex !== match.index) {
result += data.substring(lastIndex, match.index);
}
// We know that this character will be in the map.
result += map.get(match[0].charCodeAt(0));
// Every match will be of length 1
lastIndex = match.index + 1;
}
return result + data.substring(lastIndex);
};
}
/**
* Encodes all characters not valid in XML documents using XML entities.
*
* Note that the output will be character-set dependent.
*
* @param data String to escape.
*/
export const escapeUTF8 = /* #__PURE__ */ getEscaper(/["&'<>]/g, xmlCodeMap);
/**
* Encodes all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export const escapeAttribute =
/* #__PURE__ */ getEscaper(/["&\u00A0]/g, new Map([
[34, "&quot;"],
[38, "&amp;"],
[160, "&nbsp;"],
]));
/**
* Encodes all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export const escapeText = /* #__PURE__ */ getEscaper(/[&<>\u00A0]/g, new Map([
[38, "&amp;"],
[60, "&lt;"],
[62, "&gt;"],
[160, "&nbsp;"],
]));
//# sourceMappingURL=escape.js.map
-1
View File
@@ -1 +0,0 @@
{"version":3,"file":"escape.js","sourceRoot":"","sources":["../../src/escape.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,WAAW,GAAW,wBAAwB,CAAC;AAE5D,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACvB,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,EAAE,MAAM,CAAC;CACf,CAAC,CAAC;AAEH,yDAAyD;AACzD,MAAM,CAAC,MAAM,YAAY;AACrB,uEAAuE;AACvE,MAAM,CAAC,SAAS,CAAC,WAAW,IAAI,IAAI;IAChC,CAAC,CAAC,CAAC,CAAS,EAAE,KAAa,EAAU,EAAE,CACjC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,KAAO,CAAC,KAAK,KAAO;QACvC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,KAAO,CAAC,GAAG,IAAM;YACxC,CAAC,CAAC,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC;YACvB,KAAO;YACP,KAAS;QACX,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC;IAC/B,CAAC,CAAC,uEAAuE;QACvE,CAAC,KAAa,EAAE,KAAa,EAAU,EAAE,CAAC,KAAK,CAAC,WAAW,CAAC,KAAK,CAAE,CAAC;AAE9E;;;;;;GAMG;AACH,MAAM,UAAU,SAAS,CAAC,KAAa;IACnC,IAAI,WAAW,GAAG,EAAE,CAAC;IACrB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAChD,MAAM,EAAE,KAAK,EAAE,GAAG,KAAK,CAAC;QACxB,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACrC,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QAElC,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACrB,WAAW,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,MAAM,YAAY,CACjE,KAAK,EACL,KAAK,CACR,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC;YAClB,4CAA4C;YAC5C,SAAS,GAAG,WAAW,CAAC,SAAS,IAAI,MAAM,CACvC,CAAC,IAAI,GAAG,KAAO,CAAC,KAAK,KAAO,CAC/B,CAAC;QACN,CAAC;aAAM,CAAC;YACJ,WAAW,IAAI,KAAK,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,IAAI,CAAC;YACxD,SAAS,GAAG,KAAK,GAAG,CAAC,CAAC;QAC1B,CAAC;IACL,CAAC;IAED,OAAO,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;AACjD,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,MAAM,GAAqB,SAAS,CAAC;AAElD;;;;;;;;;GASG;AACH,SAAS,UAAU,CACf,KAAa,EACb,GAAwB;IAExB,OAAO,SAAS,MAAM,CAAC,IAAY;QAC/B,IAAI,KAAK,CAAC;QACV,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YAChC,IAAI,SAAS,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC;gBAC5B,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;YACrD,CAAC;YAED,kDAAkD;YAClD,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAE,CAAC;YAE3C,kCAAkC;YAClC,SAAS,GAAG,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC;QAChC,CAAC;QAED,OAAO,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;IAC9C,CAAC,CAAC;AACN,CAAC;AAED;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,UAAU,GAA6B,eAAe,CAAC,UAAU,CAC1E,UAAU,EACV,UAAU,CACb,CAAC;AAEF;;;;;GAKG;AACH,MAAM,CAAC,MAAM,eAAe;AACxB,eAAe,CAAC,UAAU,CACtB,aAAa,EACb,IAAI,GAAG,CAAC;IACJ,CAAC,EAAE,EAAE,QAAQ,CAAC;IACd,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,GAAG,EAAE,QAAQ,CAAC;CAClB,CAAC,CACL,CAAC;AAEN;;;;;GAKG;AACH,MAAM,CAAC,MAAM,UAAU,GAA6B,eAAe,CAAC,UAAU,CAC1E,cAAc,EACd,IAAI,GAAG,CAAC;IACJ,CAAC,EAAE,EAAE,OAAO,CAAC;IACb,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,EAAE,MAAM,CAAC;IACZ,CAAC,GAAG,EAAE,QAAQ,CAAC;CAClB,CAAC,CACL,CAAC"}
@@ -1,2 +0,0 @@
export declare const htmlDecodeTree: Uint16Array;
//# sourceMappingURL=decode-data-html.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-data-html.d.ts","sourceRoot":"","sources":["../../../src/generated/decode-data-html.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,cAAc,EAAE,WAK5B,CAAC"}
File diff suppressed because one or more lines are too long
@@ -1 +0,0 @@
{"version":3,"file":"decode-data-html.js","sourceRoot":"","sources":["../../../src/generated/decode-data-html.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAE9C,MAAM,CAAC,MAAM,cAAc,GAAgB,eAAe,CAAC,IAAI,WAAW;AACtE,kBAAkB;AAClB,eAAe,CAAC,268CAA268C;KACt78C,KAAK,CAAC,EAAE,CAAC;KACT,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CACnC,CAAC"}
@@ -1,2 +0,0 @@
export declare const xmlDecodeTree: Uint16Array;
//# sourceMappingURL=decode-data-xml.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-data-xml.d.ts","sourceRoot":"","sources":["../../../src/generated/decode-data-xml.ts"],"names":[],"mappings":"AAEA,eAAO,MAAM,aAAa,EAAE,WAK3B,CAAC"}
@@ -1,7 +0,0 @@
// Generated using scripts/write-decode-map.ts
export const xmlDecodeTree = /* #__PURE__ */ new Uint16Array(
// prettier-ignore
/* #__PURE__ */ "\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022"
.split("")
.map((c) => c.charCodeAt(0)));
//# sourceMappingURL=decode-data-xml.js.map
@@ -1 +0,0 @@
{"version":3,"file":"decode-data-xml.js","sourceRoot":"","sources":["../../../src/generated/decode-data-xml.ts"],"names":[],"mappings":"AAAA,8CAA8C;AAE9C,MAAM,CAAC,MAAM,aAAa,GAAgB,eAAe,CAAC,IAAI,WAAW;AACrE,kBAAkB;AAClB,eAAe,CAAC,uFAAuF;KAClG,KAAK,CAAC,EAAE,CAAC;KACT,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CACnC,CAAC"}
@@ -1,8 +0,0 @@
type EncodeTrieNode = string | {
v?: string;
n: number | Map<number, EncodeTrieNode>;
o?: string;
};
export declare const htmlTrie: Map<number, EncodeTrieNode>;
export {};
//# sourceMappingURL=encode-html.d.ts.map
@@ -1 +0,0 @@
{"version":3,"file":"encode-html.d.ts","sourceRoot":"","sources":["../../../src/generated/encode-html.ts"],"names":[],"mappings":"AAEA,KAAK,cAAc,GACb,MAAM,GACN;IAAE,CAAC,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;IAAC,CAAC,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAY1E,eAAO,MAAM,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAC,cAAc,CAAwhuB,CAAC"}
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
-96
View File
@@ -1,96 +0,0 @@
import { DecodingMode } from "./decode.js";
/** The level of entities to support. */
export declare enum EntityLevel {
/** Support only XML entities. */
XML = 0,
/** Support HTML entities, which are a superset of XML entities. */
HTML = 1
}
export declare enum EncodingMode {
/**
* The output is UTF-8 encoded. Only characters that need escaping within
* XML will be escaped.
*/
UTF8 = 0,
/**
* The output consists only of ASCII characters. Characters that need
* escaping within HTML, and characters that aren't ASCII characters will
* be escaped.
*/
ASCII = 1,
/**
* Encode all characters that have an equivalent entity, as well as all
* characters that are not ASCII characters.
*/
Extensive = 2,
/**
* Encode all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*/
Attribute = 3,
/**
* Encode all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*/
Text = 4
}
export interface DecodingOptions {
/**
* The level of entities to support.
* @default {@link EntityLevel.XML}
*/
level?: EntityLevel;
/**
* Decoding mode. If `Legacy`, will support legacy entities not terminated
* with a semicolon (`;`).
*
* Always `Strict` for XML. For HTML, set this to `true` if you are parsing
* an attribute value.
*
* The deprecated `decodeStrict` function defaults this to `Strict`.
*
* @default {@link DecodingMode.Legacy}
*/
mode?: DecodingMode | undefined;
}
/**
* Decodes a string with entities.
*
* @param input String to decode.
* @param options Decoding options.
*/
export declare function decode(input: string, options?: DecodingOptions | EntityLevel): string;
/**
* Decodes a string with entities. Does not allow missing trailing semicolons for entities.
*
* @param input String to decode.
* @param options Decoding options.
* @deprecated Use `decode` with the `mode` set to `Strict`.
*/
export declare function decodeStrict(input: string, options?: DecodingOptions | EntityLevel): string;
/**
* Options for `encode`.
*/
export interface EncodingOptions {
/**
* The level of entities to support.
* @default {@link EntityLevel.XML}
*/
level?: EntityLevel;
/**
* Output format.
* @default {@link EncodingMode.Extensive}
*/
mode?: EncodingMode;
}
/**
* Encodes a string with entities.
*
* @param input String to encode.
* @param options Encoding options.
*/
export declare function encode(input: string, options?: EncodingOptions | EntityLevel): string;
export { encodeXML, escape, escapeUTF8, escapeAttribute, escapeText, } from "./escape.js";
export { encodeHTML, encodeNonAsciiHTML, encodeHTML as encodeHTML4, encodeHTML as encodeHTML5, } from "./encode.js";
export { EntityDecoder, DecodingMode, decodeXML, decodeHTML, decodeHTMLStrict, decodeHTMLAttribute, decodeHTML as decodeHTML4, decodeHTML as decodeHTML5, decodeHTMLStrict as decodeHTML4Strict, decodeHTMLStrict as decodeHTML5Strict, decodeXML as decodeXMLStrict, } from "./decode.js";
//# sourceMappingURL=index.d.ts.map
-1
View File
@@ -1 +0,0 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,aAAa,CAAC;AASlE,wCAAwC;AACxC,oBAAY,WAAW;IACnB,iCAAiC;IACjC,GAAG,IAAI;IACP,mEAAmE;IACnE,IAAI,IAAI;CACX;AAED,oBAAY,YAAY;IACpB;;;OAGG;IACH,IAAI,IAAA;IACJ;;;;OAIG;IACH,KAAK,IAAA;IACL;;;OAGG;IACH,SAAS,IAAA;IACT;;;OAGG;IACH,SAAS,IAAA;IACT;;;OAGG;IACH,IAAI,IAAA;CACP;AAED,MAAM,WAAW,eAAe;IAC5B;;;OAGG;IACH,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB;;;;;;;;;;OAUG;IACH,IAAI,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;CACnC;AAED;;;;;GAKG;AACH,wBAAgB,MAAM,CAClB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,eAAe,GAAG,WAA6B,GACzD,MAAM,CASR;AAED;;;;;;GAMG;AACH,wBAAgB,YAAY,CACxB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,eAAe,GAAG,WAA6B,GACzD,MAAM,CAMR;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC5B;;;OAGG;IACH,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB;;;OAGG;IACH,IAAI,CAAC,EAAE,YAAY,CAAC;CACvB;AAED;;;;;GAKG;AACH,wBAAgB,MAAM,CAClB,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,eAAe,GAAG,WAA6B,GACzD,MAAM,CA2BR;AAED,OAAO,EACH,SAAS,EACT,MAAM,EACN,UAAU,EACV,eAAe,EACf,UAAU,GACb,MAAM,aAAa,CAAC;AAErB,OAAO,EACH,UAAU,EACV,kBAAkB,EAElB,UAAU,IAAI,WAAW,EACzB,UAAU,IAAI,WAAW,GAC5B,MAAM,aAAa,CAAC;AAErB,OAAO,EACH,aAAa,EACb,YAAY,EACZ,SAAS,EACT,UAAU,EACV,gBAAgB,EAChB,mBAAmB,EAEnB,UAAU,IAAI,WAAW,EACzB,UAAU,IAAI,WAAW,EACzB,gBAAgB,IAAI,iBAAiB,EACrC,gBAAgB,IAAI,iBAAiB,EACrC,SAAS,IAAI,eAAe,GAC/B,MAAM,aAAa,CAAC"}
-107
View File
@@ -1,107 +0,0 @@
import { decodeXML, decodeHTML, DecodingMode } from "./decode.js";
import { encodeHTML, encodeNonAsciiHTML } from "./encode.js";
import { encodeXML, escapeUTF8, escapeAttribute, escapeText, } from "./escape.js";
/** The level of entities to support. */
export var EntityLevel;
(function (EntityLevel) {
/** Support only XML entities. */
EntityLevel[EntityLevel["XML"] = 0] = "XML";
/** Support HTML entities, which are a superset of XML entities. */
EntityLevel[EntityLevel["HTML"] = 1] = "HTML";
})(EntityLevel || (EntityLevel = {}));
export var EncodingMode;
(function (EncodingMode) {
/**
* The output is UTF-8 encoded. Only characters that need escaping within
* XML will be escaped.
*/
EncodingMode[EncodingMode["UTF8"] = 0] = "UTF8";
/**
* The output consists only of ASCII characters. Characters that need
* escaping within HTML, and characters that aren't ASCII characters will
* be escaped.
*/
EncodingMode[EncodingMode["ASCII"] = 1] = "ASCII";
/**
* Encode all characters that have an equivalent entity, as well as all
* characters that are not ASCII characters.
*/
EncodingMode[EncodingMode["Extensive"] = 2] = "Extensive";
/**
* Encode all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*/
EncodingMode[EncodingMode["Attribute"] = 3] = "Attribute";
/**
* Encode all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*/
EncodingMode[EncodingMode["Text"] = 4] = "Text";
})(EncodingMode || (EncodingMode = {}));
/**
* Decodes a string with entities.
*
* @param input String to decode.
* @param options Decoding options.
*/
export function decode(input, options = EntityLevel.XML) {
const level = typeof options === "number" ? options : options.level;
if (level === EntityLevel.HTML) {
const mode = typeof options === "object" ? options.mode : undefined;
return decodeHTML(input, mode);
}
return decodeXML(input);
}
/**
* Decodes a string with entities. Does not allow missing trailing semicolons for entities.
*
* @param input String to decode.
* @param options Decoding options.
* @deprecated Use `decode` with the `mode` set to `Strict`.
*/
export function decodeStrict(input, options = EntityLevel.XML) {
var _a;
const normalizedOptions = typeof options === "number" ? { level: options } : options;
(_a = normalizedOptions.mode) !== null && _a !== void 0 ? _a : (normalizedOptions.mode = DecodingMode.Strict);
return decode(input, normalizedOptions);
}
/**
* Encodes a string with entities.
*
* @param input String to encode.
* @param options Encoding options.
*/
export function encode(input, options = EntityLevel.XML) {
const { mode = EncodingMode.Extensive, level = EntityLevel.XML } = typeof options === "number" ? { level: options } : options;
switch (mode) {
case EncodingMode.UTF8: {
return escapeUTF8(input);
}
case EncodingMode.Attribute: {
return escapeAttribute(input);
}
case EncodingMode.Text: {
return escapeText(input);
}
case EncodingMode.ASCII: {
return level === EntityLevel.HTML
? encodeNonAsciiHTML(input)
: encodeXML(input);
}
// eslint-disable-next-line unicorn/no-useless-switch-case
case EncodingMode.Extensive:
default: {
return level === EntityLevel.HTML
? encodeHTML(input)
: encodeXML(input);
}
}
}
export { encodeXML, escape, escapeUTF8, escapeAttribute, escapeText, } from "./escape.js";
export { encodeHTML, encodeNonAsciiHTML,
// Legacy aliases (deprecated)
encodeHTML as encodeHTML4, encodeHTML as encodeHTML5, } from "./encode.js";
export { EntityDecoder, DecodingMode, decodeXML, decodeHTML, decodeHTMLStrict, decodeHTMLAttribute,
// Legacy aliases (deprecated)
decodeHTML as decodeHTML4, decodeHTML as decodeHTML5, decodeHTMLStrict as decodeHTML4Strict, decodeHTMLStrict as decodeHTML5Strict, decodeXML as decodeXMLStrict, } from "./decode.js";
//# sourceMappingURL=index.js.map
-1
View File
@@ -1 +0,0 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAC7D,OAAO,EACH,SAAS,EACT,UAAU,EACV,eAAe,EACf,UAAU,GACb,MAAM,aAAa,CAAC;AAErB,wCAAwC;AACxC,MAAM,CAAN,IAAY,WAKX;AALD,WAAY,WAAW;IACnB,iCAAiC;IACjC,2CAAO,CAAA;IACP,mEAAmE;IACnE,6CAAQ,CAAA;AACZ,CAAC,EALW,WAAW,KAAX,WAAW,QAKtB;AAED,MAAM,CAAN,IAAY,YA2BX;AA3BD,WAAY,YAAY;IACpB;;;OAGG;IACH,+CAAI,CAAA;IACJ;;;;OAIG;IACH,iDAAK,CAAA;IACL;;;OAGG;IACH,yDAAS,CAAA;IACT;;;OAGG;IACH,yDAAS,CAAA;IACT;;;OAGG;IACH,+CAAI,CAAA;AACR,CAAC,EA3BW,YAAY,KAAZ,YAAY,QA2BvB;AAsBD;;;;;GAKG;AACH,MAAM,UAAU,MAAM,CAClB,KAAa,EACb,UAAyC,WAAW,CAAC,GAAG;IAExD,MAAM,KAAK,GAAG,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAEpE,IAAI,KAAK,KAAK,WAAW,CAAC,IAAI,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;QACpE,OAAO,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;AAC5B,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CACxB,KAAa,EACb,UAAyC,WAAW,CAAC,GAAG;;IAExD,MAAM,iBAAiB,GACnB,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC/D,MAAA,iBAAiB,CAAC,IAAI,oCAAtB,iBAAiB,CAAC,IAAI,GAAK,YAAY,CAAC,MAAM,EAAC;IAE/C,OAAO,MAAM,CAAC,KAAK,EAAE,iBAAiB,CAAC,CAAC;AAC5C,CAAC;AAkBD;;;;;GAKG;AACH,MAAM,UAAU,MAAM,CAClB,KAAa,EACb,UAAyC,WAAW,CAAC,GAAG;IAExD,MAAM,EAAE,IAAI,GAAG,YAAY,CAAC,SAAS,EAAE,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,GAC5D,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAE/D,QAAQ,IAAI,EAAE,CAAC;QACX,KAAK,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;YACrB,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC;QACD,KAAK,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC;YAC1B,OAAO,eAAe,CAAC,KAAK,CAAC,CAAC;QAClC,CAAC;QACD,KAAK,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;YACrB,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC;QACD,KAAK,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC;YACtB,OAAO,KAAK,KAAK,WAAW,CAAC,IAAI;gBAC7B,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC;gBAC3B,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC3B,CAAC;QACD,0DAA0D;QAC1D,KAAK,YAAY,CAAC,SAAS,CAAC;QAC5B,OAAO,CAAC,CAAC,CAAC;YACN,OAAO,KAAK,KAAK,WAAW,CAAC,IAAI;gBAC7B,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC;gBACnB,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC3B,CAAC;IACL,CAAC;AACL,CAAC;AAED,OAAO,EACH,SAAS,EACT,MAAM,EACN,UAAU,EACV,eAAe,EACf,UAAU,GACb,MAAM,aAAa,CAAC;AAErB,OAAO,EACH,UAAU,EACV,kBAAkB;AAClB,8BAA8B;AAC9B,UAAU,IAAI,WAAW,EACzB,UAAU,IAAI,WAAW,GAC5B,MAAM,aAAa,CAAC;AAErB,OAAO,EACH,aAAa,EACb,YAAY,EACZ,SAAS,EACT,UAAU,EACV,gBAAgB,EAChB,mBAAmB;AACnB,8BAA8B;AAC9B,UAAU,IAAI,WAAW,EACzB,UAAU,IAAI,WAAW,EACzB,gBAAgB,IAAI,iBAAiB,EACrC,gBAAgB,IAAI,iBAAiB,EACrC,SAAS,IAAI,eAAe,GAC/B,MAAM,aAAa,CAAC"}
-3
View File
@@ -1,3 +0,0 @@
{
"type": "module"
}
-1
View File
@@ -1 +0,0 @@
export * from "./dist/commonjs/escape.js";
-3
View File
@@ -1,3 +0,0 @@
// Make exports work in Node < 12
// eslint-disable-next-line no-undef, unicorn/prefer-module
module.exports = require("./dist/commonjs/escape.js");
+38 -73
View File
@@ -1,6 +1,6 @@
{
"name": "entities",
"version": "6.0.1",
"version": "8.0.0",
"description": "Encode & decode XML and HTML entities with ease & speed",
"keywords": [
"html entities",
@@ -13,7 +13,7 @@
],
"repository": {
"type": "git",
"url": "git://github.com/fb55/entities.git"
"url": "https://github.com/fb55/entities.git"
},
"funding": "https://github.com/fb55/entities?sponsor=1",
"license": "BSD-2-Clause",
@@ -22,97 +22,62 @@
"type": "module",
"exports": {
".": {
"import": {
"types": "./dist/esm/index.d.ts",
"default": "./dist/esm/index.js"
},
"require": {
"types": "./dist/commonjs/index.d.ts",
"default": "./dist/commonjs/index.js"
}
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
},
"./decode": {
"import": {
"types": "./dist/esm/decode.d.ts",
"default": "./dist/esm/decode.js"
},
"require": {
"types": "./dist/commonjs/decode.d.ts",
"default": "./dist/commonjs/decode.js"
}
"types": "./dist/decode.d.ts",
"default": "./dist/decode.js"
},
"./escape": {
"import": {
"types": "./dist/esm/escape.d.ts",
"default": "./dist/esm/escape.js"
},
"require": {
"types": "./dist/commonjs/escape.d.ts",
"default": "./dist/commonjs/escape.js"
}
"types": "./dist/escape.d.ts",
"default": "./dist/escape.js"
}
},
"main": "./dist/commonjs/index.js",
"module": "./dist/esm/index.js",
"types": "./dist/commonjs/index.d.ts",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"files": [
"decode.js",
"decode.d.ts",
"escape.js",
"escape.d.ts",
"dist",
"src"
"src",
"!**/*.spec.ts"
],
"scripts": {
"benchmark": "node --import=tsx scripts/benchmark.ts",
"build": "tsc",
"build:docs": "typedoc --hideGenerator src/index.ts",
"build:encode-trie": "node --import=tsx scripts/write-encode-map.ts",
"build:trie": "node --import=tsx scripts/write-decode-map.ts",
"format": "npm run format:es && npm run format:prettier",
"format": "npm run format:es && npm run format:biome",
"format:biome": "biome check --fix .",
"format:es": "npm run lint:es -- --fix",
"format:prettier": "npm run prettier -- --write",
"lint": "npm run lint:es && npm run lint:ts && npm run lint:prettier",
"lint:es": "eslint . --ignore-path .gitignore",
"lint:prettier": "npm run prettier -- --check",
"lint": "npm run lint:es && npm run lint:ts && npm run lint:biome",
"lint:biome": "biome check .",
"lint:es": "eslint .",
"lint:ts": "tsc --noEmit",
"prepublishOnly": "tshy",
"prettier": "prettier '**/*.{ts,md,json,yml}'",
"prepublishOnly": "npm run build",
"test": "npm run test:vi && npm run lint",
"test:vi": "vitest run"
},
"prettier": {
"proseWrap": "always",
"tabWidth": 4
},
"devDependencies": {
"@types/node": "^22.15.30",
"@typescript-eslint/eslint-plugin": "^8.33.1",
"@typescript-eslint/parser": "^8.33.1",
"@vitest/coverage-v8": "^2.1.8",
"eslint": "^8.57.1",
"eslint-config-prettier": "^10.1.5",
"eslint-plugin-n": "^17.19.0",
"eslint-plugin-unicorn": "^56.0.1",
"prettier": "^3.5.3",
"tshy": "^3.0.2",
"tsx": "^4.19.4",
"typedoc": "^0.28.5",
"typescript": "^5.8.3",
"vitest": "^2.0.2"
"@biomejs/biome": "^2.4.7",
"@eslint/compat": "^2.0.3",
"@feedic/eslint-config": "^0.3.1",
"@types/he": "^1.2.3",
"@types/node": "^25.5.0",
"eslint": "^10.0.3",
"eslint-config-biome": "^2.1.3",
"globals": "^17.4.0",
"he": "^1.2.0",
"html-entities": "^2.6.0",
"parse-entities": "^4.0.2",
"tinybench": "^6.0.0",
"tsx": "^4.21.0",
"typedoc": "^0.28.17",
"typescript": "^5.9.3",
"typescript-eslint": "^8.57.1",
"vitest": "^4.0.17"
},
"engines": {
"node": ">=0.12"
},
"tshy": {
"exclude": [
"**/*.spec.ts",
"**/__fixtures__/*",
"**/__tests__/*",
"**/__snapshots__/*"
],
"exports": {
".": "./src/index.ts",
"./decode": "./src/decode.ts",
"./escape": "./src/escape.ts"
}
"node": ">=20.19.0"
}
}
+36 -27
View File
@@ -10,7 +10,7 @@ Encode & decode HTML & XML entities with ease & speed.
[`commonmark`](https://github.com/commonmark/commonmark.js) use it to process
HTML entities.
- ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as of
April 2022); see [performance](#performance).
September 2025); see [performance](#performance).
- 🎛 Configurable: Get an output tailored for your needs. You are fine with
UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We
can do that as well!
@@ -24,7 +24,7 @@ Encode & decode HTML & XML entities with ease & speed.
### …use `entities`
```javascript
const entities = require("entities");
import * as entities from "entities";
// Encoding
entities.escapeUTF8("&#38; ü"); // "&amp;#38; ü"
@@ -38,15 +38,36 @@ entities.decodeHTML("asdf &amp; &yuml; &uuml; &apos;"); // "asdf & ÿ ü '"
## Performance
This is how `entities` compares to other libraries on a very basic benchmark
(see `scripts/benchmark.ts`, for 10,000,000 iterations; **lower is better**):
Benchmarked in September 2025 with Node v24.6.0 on Apple M2 using `tinybench`.
Higher ops/s is better; `avg (μs)` is the mean time per operation.
See `scripts/benchmark.ts` to reproduce.
| Library | Version | `decode` perf | `encode` perf | `escape` perf |
| -------------- | ------- | ------------- | ------------- | ------------- |
| entities | `3.0.1` | 1.418s | 6.786s | 2.196s |
| html-entities | `2.3.2` | 2.530s | 6.829s | 2.415s |
| he | `1.2.0` | 5.800s | 24.237s | 3.624s |
| parse-entities | `3.0.0` | 9.660s | N/A | N/A |
### Decoding
| Library | Version | ops/s | avg (μs) | ±% | slower |
| -------------- | ------- | --------- | -------- | ---- | ------ |
| entities | 7.0.0 | 5,838,416 | 175.57 | 0.06 | — |
| html-entities | 2.6.0 | 2,919,637 | 347.77 | 0.33 | 50.0% |
| he | 1.2.0 | 2,318,438 | 446.48 | 0.70 | 60.3% |
| parse-entities | 4.0.2 | 852,855 | 1,199.51 | 0.36 | 85.4% |
### Encoding
| Library | Version | ops/s | avg (μs) | ±% | slower |
| -------------- | ------- | --------- | -------- | ---- | ------ |
| entities | 7.0.0 | 2,770,115 | 368.09 | 0.11 | — |
| html-entities | 2.6.0 | 1,491,963 | 679.96 | 0.58 | 46.2% |
| he | 1.2.0 | 481,278 | 2,118.25 | 0.61 | 82.6% |
### Escaping
| Library | Version | ops/s | avg (μs) | ±% | slower |
| -------------- | ------- | --------- | -------- | ---- | ------ |
| entities | 7.0.0 | 4,616,468 | 223.84 | 0.17 | — |
| he | 1.2.0 | 3,659,301 | 280.76 | 0.58 | 20.7% |
| html-entities | 2.6.0 | 3,555,301 | 296.63 | 0.84 | 23.0% |
Note: Micro-benchmarks may vary across machines and Node versions.
---
@@ -68,8 +89,8 @@ This is helpful for decoding entities in legacy environments.
> Why should I use `entities` instead of alternative modules?
As of April 2022, `entities` is a bit faster than other modules. Still, this is
not a very differentiated space and other modules can catch up.
As of September 2025, `entities` is faster than other modules. Still, this is
not a differentiated space and other modules can catch up.
**More importantly**, you might already have `entities` in your dependency graph
(as a dependency of eg. `cheerio`, or `htmlparser2`), and including it directly
@@ -78,10 +99,9 @@ libraries, so have a look through your `node_modules` directory!
> Does `entities` support tree shaking?
Yes! `entities` ships as both a CommonJS and a ES module. Note that for best
results, you should not use the `encode` and `decode` functions, as they wrap
around a number of other functions, all of which will remain in the bundle.
Instead, use the functions that you need directly.
Yes! Note that for best results, you should not use the `encode` and `decode`
functions, as they wrap around a number of other functions, all of which will
remain in the bundle. Instead, use the functions that you need directly.
---
@@ -109,14 +129,3 @@ License: BSD-2-Clause
To report a security vulnerability, please use the
[Tidelift security contact](https://tidelift.com/security). Tidelift will
coordinate the fix and disclosure.
## `entities` for enterprise
Available as part of the Tidelift Subscription
The maintainers of `entities` and thousands of other packages are working with
Tidelift to deliver commercial support and maintenance for the open source
dependencies you use to build your applications. Save time, reduce risk, and
improve code health, while paying the maintainers of the exact dependencies you
use.
[Learn more.](https://tidelift.com/subscription/pkg/npm-entities?utm_source=npm-entities&utm_medium=referral&utm_campaign=enterprise&utm_term=repo)
+1 -32
View File
@@ -32,31 +32,11 @@ const decodeMap = new Map([
[159, 376],
]);
/**
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
*/
export const fromCodePoint: (...codePoints: number[]) => string =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
String.fromCodePoint ??
function (codePoint: number): string {
let output = "";
if (codePoint > 0xff_ff) {
codePoint -= 0x1_00_00;
output += String.fromCharCode(
((codePoint >>> 10) & 0x3_ff) | 0xd8_00,
);
codePoint = 0xdc_00 | (codePoint & 0x3_ff);
}
output += String.fromCharCode(codePoint);
return output;
};
/**
* Replace the given code point with a replacement character if it is a
* surrogate or is outside the valid range. Otherwise return the code
* point unchanged.
* @param codePoint Unicode code point to convert.
*/
export function replaceCodePoint(codePoint: number): number {
if (
@@ -68,14 +48,3 @@ export function replaceCodePoint(codePoint: number): number {
return decodeMap.get(codePoint) ?? codePoint;
}
/**
* Replace the code point if relevant, then convert it to a string.
*
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
* @param codePoint The code point to decode.
* @returns The decoded code point.
*/
export function decodeCodePoint(codePoint: number): string {
return fromCodePoint(replaceCodePoint(codePoint));
}
-320
View File
@@ -1,320 +0,0 @@
import { describe, it, expect, vitest } from "vitest";
import * as entities from "./decode.js";
describe("Decode test", () => {
const testcases = [
{ input: "&amp;amp;", output: "&amp;" },
{ input: "&amp;#38;", output: "&#38;" },
{ input: "&amp;#x26;", output: "&#x26;" },
{ input: "&amp;#X26;", output: "&#X26;" },
{ input: "&#38;#38;", output: "&#38;" },
{ input: "&#x26;#38;", output: "&#38;" },
{ input: "&#X26;#38;", output: "&#38;" },
{ input: "&#x3a;", output: ":" },
{ input: "&#x3A;", output: ":" },
{ input: "&#X3a;", output: ":" },
{ input: "&#X3A;", output: ":" },
{ input: "&#", output: "&#" },
{ input: "&>", output: "&>" },
{ input: "id=770&#anchor", output: "id=770&#anchor" },
];
for (const { input, output } of testcases) {
it(`should XML decode ${input}`, () =>
expect(entities.decodeXML(input)).toBe(output));
it(`should HTML decode ${input}`, () =>
expect(entities.decodeHTML(input)).toBe(output));
}
it("should HTML decode partial legacy entity", () => {
expect(entities.decodeHTMLStrict("&timesbar")).toBe("&timesbar");
expect(entities.decodeHTML("&timesbar")).toBe("×bar");
});
it("should HTML decode legacy entities according to spec", () =>
expect(entities.decodeHTML("?&image_uri=1&=2&image=3")).toBe(
"?&image_uri=1&=2&image=3",
));
it("should back out of legacy entities", () =>
expect(entities.decodeHTML("&ampa")).toBe("&a"));
it("should not parse numeric entities in strict mode", () =>
expect(entities.decodeHTMLStrict("&#55")).toBe("&#55"));
it("should parse &nbsp followed by < (#852)", () =>
expect(entities.decodeHTML("&nbsp<")).toBe("\u00A0<"));
it("should decode trailing legacy entities", () => {
expect(entities.decodeHTML("&timesbar;&timesbar")).toBe("⨱×bar");
});
it("should decode multi-byte entities", () => {
expect(entities.decodeHTML("&NotGreaterFullEqual;")).toBe("≧̸");
});
it("should not decode legacy entities followed by text in attribute mode", () => {
expect(
entities.decodeHTML("&not", entities.DecodingMode.Attribute),
).toBe("¬");
expect(
entities.decodeHTML("&noti", entities.DecodingMode.Attribute),
).toBe("&noti");
expect(
entities.decodeHTML("&not=", entities.DecodingMode.Attribute),
).toBe("&not=");
expect(entities.decodeHTMLAttribute("&notp")).toBe("&notp");
expect(entities.decodeHTMLAttribute("&notP")).toBe("&notP");
expect(entities.decodeHTMLAttribute("&not3")).toBe("&not3");
});
});
describe("EntityDecoder", () => {
it("should decode decimal entities", () => {
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
);
expect(decoder.write("&#5", 1)).toBe(-1);
expect(decoder.write("8;", 0)).toBe(5);
expect(callback).toHaveBeenCalledTimes(1);
expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 5);
});
it("should decode hex entities", () => {
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
);
expect(decoder.write("&#x3a;", 1)).toBe(6);
expect(callback).toHaveBeenCalledTimes(1);
expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
});
it("should decode named entities", () => {
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
);
expect(decoder.write("&amp;", 1)).toBe(5);
expect(callback).toHaveBeenCalledTimes(1);
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
});
it("should decode legacy entities", () => {
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
);
decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&amp", 1)).toBe(-1);
expect(callback).toHaveBeenCalledTimes(0);
expect(decoder.end()).toBe(4);
expect(callback).toHaveBeenCalledTimes(1);
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 4);
});
it("should decode named entity written character by character", () => {
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
);
for (const c of "amp") {
expect(decoder.write(c, 0)).toBe(-1);
}
expect(decoder.write(";", 0)).toBe(5);
expect(callback).toHaveBeenCalledTimes(1);
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
});
it("should decode numeric entity written character by character", () => {
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
);
for (const c of "#x3a") {
expect(decoder.write(c, 0)).toBe(-1);
}
expect(decoder.write(";", 0)).toBe(6);
expect(callback).toHaveBeenCalledTimes(1);
expect(callback).toHaveBeenCalledWith(":".charCodeAt(0), 6);
});
it("should decode hex entities across several chunks", () => {
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
);
for (const chunk of ["#x", "cf", "ff", "d"]) {
expect(decoder.write(chunk, 0)).toBe(-1);
}
expect(decoder.write(";", 0)).toBe(9);
expect(callback).toHaveBeenCalledTimes(1);
expect(callback).toHaveBeenCalledWith(0xc_ff_fd, 9);
});
it("should not fail if nothing is written", () => {
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
);
expect(decoder.end()).toBe(0);
expect(callback).toHaveBeenCalledTimes(0);
});
describe("errors", () => {
it("should produce an error for a named entity without a semicolon", () => {
const errorHandlers = {
missingSemicolonAfterCharacterReference: vitest.fn(),
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
validateNumericCharacterReference: vitest.fn(),
};
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
errorHandlers,
);
decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&amp;", 1)).toBe(5);
expect(callback).toHaveBeenCalledTimes(1);
expect(callback).toHaveBeenCalledWith("&".charCodeAt(0), 5);
expect(
errorHandlers.missingSemicolonAfterCharacterReference,
).toHaveBeenCalledTimes(0);
decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&amp", 1)).toBe(-1);
expect(decoder.end()).toBe(4);
expect(callback).toHaveBeenCalledTimes(2);
expect(callback).toHaveBeenLastCalledWith("&".charCodeAt(0), 4);
expect(
errorHandlers.missingSemicolonAfterCharacterReference,
).toHaveBeenCalledTimes(1);
});
it("should produce an error for a numeric entity without a semicolon", () => {
const errorHandlers = {
missingSemicolonAfterCharacterReference: vitest.fn(),
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
validateNumericCharacterReference: vitest.fn(),
};
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
errorHandlers,
);
decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&#x3a", 1)).toBe(-1);
expect(decoder.end()).toBe(5);
expect(callback).toHaveBeenCalledTimes(1);
expect(callback).toHaveBeenCalledWith(0x3a, 5);
expect(
errorHandlers.missingSemicolonAfterCharacterReference,
).toHaveBeenCalledTimes(1);
expect(
errorHandlers.absenceOfDigitsInNumericCharacterReference,
).toHaveBeenCalledTimes(0);
expect(
errorHandlers.validateNumericCharacterReference,
).toHaveBeenCalledTimes(1);
expect(
errorHandlers.validateNumericCharacterReference,
).toHaveBeenCalledWith(0x3a);
});
it("should produce an error for numeric entities without digits", () => {
const errorHandlers = {
missingSemicolonAfterCharacterReference: vitest.fn(),
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
validateNumericCharacterReference: vitest.fn(),
};
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
errorHandlers,
);
decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&#", 1)).toBe(-1);
expect(decoder.end()).toBe(0);
expect(callback).toHaveBeenCalledTimes(0);
expect(
errorHandlers.missingSemicolonAfterCharacterReference,
).toHaveBeenCalledTimes(0);
expect(
errorHandlers.absenceOfDigitsInNumericCharacterReference,
).toHaveBeenCalledTimes(1);
expect(
errorHandlers.absenceOfDigitsInNumericCharacterReference,
).toHaveBeenCalledWith(2);
expect(
errorHandlers.validateNumericCharacterReference,
).toHaveBeenCalledTimes(0);
});
it("should produce an error for hex entities without digits", () => {
const errorHandlers = {
missingSemicolonAfterCharacterReference: vitest.fn(),
absenceOfDigitsInNumericCharacterReference: vitest.fn(),
validateNumericCharacterReference: vitest.fn(),
};
const callback = vitest.fn();
const decoder = new entities.EntityDecoder(
entities.htmlDecodeTree,
callback,
errorHandlers,
);
decoder.startEntity(entities.DecodingMode.Legacy);
expect(decoder.write("&#x", 1)).toBe(-1);
expect(decoder.end()).toBe(0);
expect(callback).toHaveBeenCalledTimes(0);
expect(
errorHandlers.missingSemicolonAfterCharacterReference,
).toHaveBeenCalledTimes(0);
expect(
errorHandlers.absenceOfDigitsInNumericCharacterReference,
).toHaveBeenCalledTimes(1);
expect(
errorHandlers.validateNumericCharacterReference,
).toHaveBeenCalledTimes(0);
});
});
});
+127 -76
View File
@@ -1,6 +1,7 @@
import { replaceCodePoint } from "./decode-codepoint.js";
import { htmlDecodeTree } from "./generated/decode-data-html.js";
import { xmlDecodeTree } from "./generated/decode-data-xml.js";
import { replaceCodePoint, fromCodePoint } from "./decode-codepoint.js";
import { BinTrieFlags } from "./internal/bin-trie-flags.js";
const enum CharCodes {
NUM = 35, // "#"
@@ -20,12 +21,6 @@ const enum CharCodes {
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
const TO_LOWER_BIT = 0b10_0000;
export enum BinTrieFlags {
VALUE_LENGTH = 0b1100_0000_0000_0000,
BRANCH_LENGTH = 0b0011_1111_1000_0000,
JUMP_TABLE = 0b0000_0000_0111_1111,
}
function isNumber(code: number): boolean {
return code >= CharCodes.ZERO && code <= CharCodes.NINE;
}
@@ -50,6 +45,7 @@ function isAsciiAlphaNumeric(code: number): boolean {
*
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
* @param code Code point to decode.
*/
function isEntityInAttributeInvalidEnd(code: number): boolean {
return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
@@ -63,6 +59,9 @@ const enum EntityDecoderState {
NamedEntity,
}
/**
* Decoding mode for named entities.
*/
export enum DecodingMode {
/** Entities in text nodes that can end with any character. */
Legacy = 0,
@@ -89,13 +88,13 @@ export interface EntityErrorProducer {
export class EntityDecoder {
constructor(
/** The tree used to decode entities. */
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: False positive
private readonly decodeTree: Uint16Array,
/**
* The function that is called when a codepoint is decoded.
*
* For multi-byte named entities, this will be called multiple times,
* with the second codepoint, and the same `consumed` value.
*
* @param codepoint The decoded codepoint.
* @param consumed The number of bytes consumed by the decoder.
*/
@@ -122,8 +121,13 @@ export class EntityDecoder {
private excess = 1;
/** The mode in which the decoder is operating. */
private decodeMode = DecodingMode.Strict;
/** The number of characters that have been consumed in the current run. */
private runConsumed = 0;
/** Resets the instance to make it reusable. */
/**
* Resets the instance to make it reusable.
* @param decodeMode Entity decoding mode to use.
*/
startEntity(decodeMode: DecodingMode): void {
this.decodeMode = decodeMode;
this.state = EntityDecoderState.EntityStart;
@@ -131,6 +135,7 @@ export class EntityDecoder {
this.treeIndex = 0;
this.excess = 1;
this.consumed = 1;
this.runConsumed = 0;
}
/**
@@ -139,7 +144,6 @@ export class EntityDecoder {
*
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
* entity is incomplete, and resume when the next string is written.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -178,7 +182,6 @@ export class EntityDecoder {
* Switches between the numeric decimal and hexadecimal states.
*
* Equivalent to the `Numeric character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -198,73 +201,53 @@ export class EntityDecoder {
return this.stateNumericDecimal(input, offset);
}
private addToNumericResult(
input: string,
start: number,
end: number,
base: number,
): void {
if (start !== end) {
const digitCount = end - start;
this.result =
this.result * Math.pow(base, digitCount) +
Number.parseInt(input.substr(start, digitCount), base);
this.consumed += digitCount;
}
}
/**
* Parses a hexadecimal numeric entity.
*
* Equivalent to the `Hexademical character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericHex(input: string, offset: number): number {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char) || isHexadecimalCharacter(char)) {
offset += 1;
// Convert hex digit to value (0-15); 'a'/'A' -> 10.
const digit =
char <= CharCodes.NINE
? char - CharCodes.ZERO
: (char | TO_LOWER_BIT) - CharCodes.LOWER_A + 10;
this.result = this.result * 16 + digit;
this.consumed++;
offset++;
} else {
this.addToNumericResult(input, startIndex, offset, 16);
return this.emitNumericEntity(char, 3);
}
}
this.addToNumericResult(input, startIndex, offset, 16);
return -1;
return -1; // Incomplete entity
}
/**
* Parses a decimal numeric entity.
*
* Equivalent to the `Decimal character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericDecimal(input: string, offset: number): number {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char)) {
offset += 1;
this.result = this.result * 10 + (char - CharCodes.ZERO);
this.consumed++;
offset++;
} else {
this.addToNumericResult(input, startIndex, offset, 10);
return this.emitNumericEntity(char, 2);
}
}
this.addToNumericResult(input, startIndex, offset, 10);
return -1;
return -1; // Incomplete entity
}
/**
@@ -272,7 +255,6 @@ export class EntityDecoder {
*
* Implements the logic from the `Hexademical character reference start
* state` and `Numeric character reference end state` in the HTML spec.
*
* @param lastCp The last code point of the entity. Used to see if the
* entity was terminated with a semicolon.
* @param expectedLength The minimum number of characters that should be
@@ -313,7 +295,6 @@ export class EntityDecoder {
* Parses a named entity.
*
* Equivalent to the `Named character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -321,12 +302,84 @@ export class EntityDecoder {
private stateNamedEntity(input: string, offset: number): number {
const { decodeTree } = this;
let current = decodeTree[this.treeIndex];
// The mask is the number of bytes of the value, including the current byte.
// The length is the number of bytes of the value, including the current byte.
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
for (; offset < input.length; offset++, this.excess++) {
while (offset < input.length) {
// Handle compact runs (possibly inline): valueLength == 0 and SEMI_REQUIRED bit set.
if (valueLength === 0 && (current & BinTrieFlags.FLAG13) !== 0) {
const runLength =
(current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */
// If we are starting a run, check the first char.
if (this.runConsumed === 0) {
const firstChar = current & BinTrieFlags.JUMP_TABLE;
if (input.charCodeAt(offset) !== firstChar) {
return this.result === 0
? 0
: this.emitNotTerminatedNamedEntity();
}
offset++;
this.excess++;
this.runConsumed++;
}
// Check remaining characters in the run.
while (this.runConsumed < runLength) {
if (offset >= input.length) {
return -1;
}
const charIndexInPacked = this.runConsumed - 1;
const packedWord =
decodeTree[
this.treeIndex + 1 + (charIndexInPacked >> 1)
];
const expectedChar =
charIndexInPacked % 2 === 0
? packedWord & 0xff
: (packedWord >> 8) & 0xff;
if (input.charCodeAt(offset) !== expectedChar) {
this.runConsumed = 0;
return this.result === 0
? 0
: this.emitNotTerminatedNamedEntity();
}
offset++;
this.excess++;
this.runConsumed++;
}
this.runConsumed = 0;
this.treeIndex += 1 + (runLength >> 1);
current = decodeTree[this.treeIndex];
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
}
if (offset >= input.length) break;
const char = input.charCodeAt(offset);
/*
* Implicit semicolon handling for nodes that require a semicolon but
* don't have an explicit ';' branch stored in the trie. If we have
* a value on the current node, it requires a semicolon, and the
* current input character is a semicolon, emit the entity using the
* current node (without descending further).
*/
if (
char === CharCodes.SEMI &&
valueLength !== 0 &&
(current & BinTrieFlags.FLAG13) !== 0
) {
return this.emitNamedEntityData(
this.treeIndex,
valueLength,
this.consumed + this.excess,
);
}
this.treeIndex = determineBranch(
decodeTree,
current,
@@ -361,12 +414,18 @@ export class EntityDecoder {
}
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
if (this.decodeMode !== DecodingMode.Strict) {
if (
this.decodeMode !== DecodingMode.Strict &&
(current & BinTrieFlags.FLAG13) === 0
) {
this.result = this.treeIndex;
this.consumed += this.excess;
this.excess = 0;
}
}
// Increment offset & excess for next iteration
offset++;
this.excess++;
}
return -1;
@@ -374,7 +433,6 @@ export class EntityDecoder {
/**
* Emit a named entity that was not terminated with a semicolon.
*
* @returns The number of characters consumed.
*/
private emitNotTerminatedNamedEntity(): number {
@@ -391,11 +449,9 @@ export class EntityDecoder {
/**
* Emit a named entity.
*
* @param result The index of the entity in the decode tree.
* @param valueLength The number of bytes in the entity.
* @param consumed The number of characters consumed.
*
* @returns The number of characters consumed.
*/
private emitNamedEntityData(
@@ -407,7 +463,8 @@ export class EntityDecoder {
this.emitCodePoint(
valueLength === 1
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
? decodeTree[result] &
~(BinTrieFlags.VALUE_LENGTH | BinTrieFlags.FLAG13)
: decodeTree[result + 1],
consumed,
);
@@ -423,7 +480,6 @@ export class EntityDecoder {
* Signal to the parser that the end of the input was reached.
*
* Remaining data will be emitted and relevant errors will be produced.
*
* @returns The number of characters consumed.
*/
end(): number {
@@ -459,7 +515,6 @@ export class EntityDecoder {
/**
* Creates a function that decodes entities in a string.
*
* @param decodeTree The decode tree.
* @returns A function that decodes entities in a string.
*/
@@ -467,7 +522,7 @@ function getDecoder(decodeTree: Uint16Array) {
let returnValue = "";
const decoder = new EntityDecoder(
decodeTree,
(data) => (returnValue += fromCodePoint(data)),
(data) => (returnValue += String.fromCodePoint(data)),
);
return function decodeWithTrie(
@@ -510,10 +565,9 @@ function getDecoder(decodeTree: Uint16Array) {
/**
* Determines the branch of the current node that is taken given the current
* character. This function is used to traverse the trie.
*
* @param decodeTree The trie.
* @param current The current node.
* @param nodeIdx The index right after the current node and its value.
* @param nodeIndex Index immediately after the current node header.
* @param char The current character.
* @returns The index of the next node, or -1 if no branch is taken.
*/
@@ -540,22 +594,28 @@ export function determineBranch(
: decodeTree[nodeIndex + value] - 1;
}
// Case 3: Multiple branches encoded in dictionary
// Case 3: Multiple branches encoded in packed dictionary (two keys per uint16)
const packedKeySlots = (branchCount + 1) >> 1;
// Binary search for the character.
let lo = nodeIndex;
let hi = lo + branchCount - 1;
/*
* Treat packed keys as a virtual sorted array of length `branchCount`.
* Key(i) = low byte for even i, high byte for odd i in slot i>>1.
*/
let lo = 0;
let hi = branchCount - 1;
while (lo <= hi) {
const mid = (lo + hi) >>> 1;
const midValue = decodeTree[mid];
const slot = mid >> 1;
const packed = decodeTree[nodeIndex + slot];
const midKey = (packed >> ((mid & 1) * 8)) & 0xff;
if (midValue < char) {
if (midKey < char) {
lo = mid + 1;
} else if (midValue > char) {
} else if (midKey > char) {
hi = mid - 1;
} else {
return decodeTree[mid + branchCount];
return decodeTree[nodeIndex + packedKeySlots + mid];
}
}
@@ -567,7 +627,6 @@ const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree);
/**
* Decodes an HTML string.
*
* @param htmlString The string to decode.
* @param mode The decoding mode.
* @returns The decoded string.
@@ -581,7 +640,6 @@ export function decodeHTML(
/**
* Decodes an HTML string in an attribute.
*
* @param htmlAttribute The string to decode.
* @returns The decoded string.
*/
@@ -591,7 +649,6 @@ export function decodeHTMLAttribute(htmlAttribute: string): string {
/**
* Decodes an HTML string, requiring all entities to be terminated by a semicolon.
*
* @param htmlString The string to decode.
* @returns The decoded string.
*/
@@ -601,7 +658,6 @@ export function decodeHTMLStrict(htmlString: string): string {
/**
* Decodes an XML string, requiring all entities to be terminated by a semicolon.
*
* @param xmlString The string to decode.
* @returns The decoded string.
*/
@@ -609,12 +665,7 @@ export function decodeXML(xmlString: string): string {
return xmlDecoder(xmlString, DecodingMode.Strict);
}
export { replaceCodePoint } from "./decode-codepoint.js";
// Re-export for use by eg. htmlparser2
export { htmlDecodeTree } from "./generated/decode-data-html.js";
export { xmlDecodeTree } from "./generated/decode-data-xml.js";
export {
decodeCodePoint,
replaceCodePoint,
fromCodePoint,
} from "./decode-codepoint.js";
-78
View File
@@ -1,78 +0,0 @@
import { describe, it, expect } from "vitest";
import * as entities from "./index.js";
describe("Encode->decode test", () => {
const testcases = [
{
input: "asdf & ÿ ü '",
xml: "asdf &amp; &#xff; &#xfc; &apos;",
html: "asdf &amp; &yuml; &uuml; &apos;",
},
{
input: "&#38;",
xml: "&amp;#38;",
html: "&amp;&num;38&semi;",
},
];
for (const { input, xml, html } of testcases) {
const encodedXML = entities.encodeXML(input);
it(`should XML encode ${input}`, () => expect(encodedXML).toBe(xml));
it(`should default to XML encode ${input}`, () =>
expect(entities.encode(input)).toBe(xml));
it(`should XML decode ${encodedXML}`, () =>
expect(entities.decodeXML(encodedXML)).toBe(input));
it(`should default to XML encode ${encodedXML}`, () =>
expect(entities.decode(encodedXML)).toBe(input));
it(`should default strict to XML encode ${encodedXML}`, () =>
expect(entities.decodeStrict(encodedXML)).toBe(input));
const encodedHTML5 = entities.encodeHTML5(input);
it(`should HTML5 encode ${input}`, () =>
expect(encodedHTML5).toBe(html));
it(`should HTML5 decode ${encodedHTML5}`, () =>
expect(entities.decodeHTML(encodedHTML5)).toBe(input));
it("should encode emojis", () =>
expect(entities.encodeHTML5("😄🍾🥳💥😇")).toBe(
"&#x1f604;&#x1f37e;&#x1f973;&#x1f4a5;&#x1f607;",
));
}
it("should encode data URIs (issue #16)", () => {
const data =
"data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAALAAABAAEAAAIBRAA7";
expect(entities.decode(entities.encode(data))).toBe(data);
});
it("should HTML encode all ASCII characters", () => {
for (let index = 0; index < 128; index++) {
const char = String.fromCharCode(index);
const encoded = entities.encodeHTML(char);
const decoded = entities.decodeHTML(encoded);
expect(decoded).toBe(char);
}
});
it("should encode trailing parts of entities", () =>
expect(entities.encodeHTML("\uD835")).toBe("&#xd835;"));
it("should encode surrogate pair with first surrogate equivalent of entity, without corresponding entity", () =>
expect(entities.encodeHTML("\u{1D4A4}")).toBe("&#x1d4a4;"));
});
describe("encodeNonAsciiHTML", () => {
it("should encode all non-ASCII characters", () =>
expect(entities.encodeNonAsciiHTML("<test> #123! übermaßen")).toBe(
"&lt;test&gt; #123! &uuml;berma&szlig;en",
));
it("should encode emojis", () =>
expect(entities.encodeNonAsciiHTML("😄🍾🥳💥😇")).toBe(
"&#x1f604;&#x1f37e;&#x1f973;&#x1f4a5;&#x1f607;",
));
it("should encode chars above surrogates", () =>
expect(entities.encodeNonAsciiHTML("♒️♓️♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️")).toBe(
"&#x2652;&#xfe0f;&#x2653;&#xfe0f;&#x2648;&#xfe0f;&#x2649;&#xfe0f;&#x264a;&#xfe0f;&#x264b;&#xfe0f;&#x264c;&#xfe0f;&#x264d;&#xfe0f;&#x264e;&#xfe0f;&#x264f;&#xfe0f;&#x2650;&#xfe0f;&#x2651;&#xfe0f;",
));
});
+49 -31
View File
@@ -1,7 +1,17 @@
import { getCodePoint, XML_BITSET_VALUE } from "./escape.js";
import { htmlTrie } from "./generated/encode-html.js";
import { xmlReplacer, getCodePoint } from "./escape.js";
const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
/**
* We store the characters to consider as a compact bitset for fast lookups.
*/
const HTML_BITSET = /* #__PURE__ */ new Uint32Array([
0x16_00, // Bits for 09,0A,0C
0xfc_00_ff_fe, // 32..63 -> 21-2D (minus space), 2E,2F,3A-3F
0xf8_00_00_01, // 64..95 -> 40, 5B-5F
0x38_00_00_01, // 96..127-> 60, 7B-7D
]);
const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, XML_BITSET_VALUE, 0, 0]);
/**
* Encodes all characters in the input using HTML entities. This includes
@@ -13,9 +23,10 @@ const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
* @param input Input string to encode or decode.
*/
export function encodeHTML(input: string): string {
return encodeHTMLTrieRe(htmlReplacer, input);
return encodeHTMLTrieRe(HTML_BITSET, input);
}
/**
* Encodes all non-ASCII characters, as well as characters not valid in HTML
@@ -24,54 +35,61 @@ export function encodeHTML(input: string): string {
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
* @param input Input string to encode or decode.
*/
export function encodeNonAsciiHTML(input: string): string {
return encodeHTMLTrieRe(xmlReplacer, input);
return encodeHTMLTrieRe(XML_BITSET, input);
}
function encodeHTMLTrieRe(regExp: RegExp, input: string): string {
let returnValue = "";
let lastIndex = 0;
let match;
function encodeHTMLTrieRe(bitset: Uint32Array, input: string): string {
let out: string | undefined;
let last = 0; // Start of the next untouched slice.
const { length } = input;
while ((match = regExp.exec(input)) !== null) {
const { index } = match;
returnValue += input.substring(lastIndex, index);
for (let index = 0; index < length; index++) {
const char = input.charCodeAt(index);
let next = htmlTrie.get(char);
// Skip ASCII characters that don't need encoding
if (char < 0x80 && !((bitset[char >>> 5] >>> char) & 1)) {
continue;
}
if (typeof next === "object") {
// We are in a branch. Try to match the next char.
if (index + 1 < input.length) {
if (out === undefined) out = input.substring(0, index);
else if (last !== index) out += input.substring(last, index);
let node = htmlTrie.get(char);
if (typeof node === "object") {
if (index + 1 < length) {
const nextChar = input.charCodeAt(index + 1);
const value =
typeof next.n === "number"
? next.n === nextChar
? next.o
typeof node.next === "number"
? node.next === nextChar
? node.nextValue
: undefined
: next.n.get(nextChar);
: node.next.get(nextChar);
if (value !== undefined) {
returnValue += value;
lastIndex = regExp.lastIndex += 1;
out += value;
index++;
last = index + 1;
continue;
}
}
next = next.v;
node = node.value;
}
// We might have a tree node without a value; skip and use a numeric entity.
if (next === undefined) {
if (node === undefined) {
const cp = getCodePoint(input, index);
returnValue += `&#x${cp.toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIndex = regExp.lastIndex += Number(cp !== char);
out += `&#x${cp.toString(16)};`;
if (cp !== char) index++;
last = index + 1;
} else {
returnValue += next;
lastIndex = index + 1;
out += node;
last = index + 1;
}
}
return returnValue + input.substr(lastIndex);
if (out === undefined) return input;
if (last < length) out += input.substr(last);
return out;
}
-14
View File
@@ -1,14 +0,0 @@
import { describe, it, expect } from "vitest";
import * as entities from "./index.js";
describe("escape HTML", () => {
it("should escape HTML attribute values", () =>
expect(entities.escapeAttribute('<a " attr > & value \u00A0!')).toBe(
"<a &quot; attr > &amp; value &nbsp;!",
));
it("should escape HTML text", () =>
expect(entities.escapeText('<a " text > & value \u00A0!')).toBe(
'&lt;a " text &gt; &amp; value &nbsp;!',
));
});
+49 -37
View File
@@ -1,5 +1,3 @@
export const xmlReplacer: RegExp = /["$&'<>\u0080-\uFFFF]/g;
const xmlCodeMap = new Map([
[34, "&quot;"],
[38, "&amp;"],
@@ -9,52 +7,72 @@ const xmlCodeMap = new Map([
]);
// For compatibility with node < 4, we wrap `codePointAt`
/**
* Read a code point at a given index.
* @param input Input string to encode or decode.
* @param index Current read position in the input string.
*/
export const getCodePoint: (c: string, index: number) => number =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
String.prototype.codePointAt == null
? (c: string, index: number): number =>
typeof String.prototype.codePointAt === "function"
? (input: string, index: number): number => input.codePointAt(index)!
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
(c: string, index: number): number =>
(c.charCodeAt(index) & 0xfc_00) === 0xd8_00
? (c.charCodeAt(index) - 0xd8_00) * 0x4_00 +
c.charCodeAt(index + 1) -
0xdc_00 +
0x1_00_00
: c.charCodeAt(index)
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
(input: string, index: number): number => input.codePointAt(index)!;
: c.charCodeAt(index);
/**
* Bitset for ASCII characters that need to be escaped in XML.
*/
export const XML_BITSET_VALUE = 0x50_00_00_c4; // 32..63 -> 34 ("),38 (&),39 ('),60 (<),62 (>)
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities.
* documents using XML entities. Uses a fast bitset scan instead of RegExp.
*
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `&#xfc;`) will be used.
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
* @param input Input string to encode or decode.
*/
export function encodeXML(input: string): string {
let returnValue = "";
let lastIndex = 0;
let match;
let out: string | undefined;
let last = 0;
const { length } = input;
while ((match = xmlReplacer.exec(input)) !== null) {
const { index } = match;
for (let index = 0; index < length; index++) {
const char = input.charCodeAt(index);
const next = xmlCodeMap.get(char);
if (next === undefined) {
returnValue += `${input.substring(lastIndex, index)}&#x${getCodePoint(
input,
index,
).toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIndex = xmlReplacer.lastIndex += Number(
(char & 0xfc_00) === 0xd8_00,
);
} else {
returnValue += input.substring(lastIndex, index) + next;
lastIndex = index + 1;
// Check for ASCII chars that don't need escaping
if (
char < 0x80 &&
(((XML_BITSET_VALUE >>> char) & 1) === 0 || char >= 64 || char < 32)
) {
continue;
}
if (out === undefined) out = input.substring(0, index);
else if (last !== index) out += input.substring(last, index);
if (char < 64) {
// Known replacement
out += xmlCodeMap.get(char)!;
last = index + 1;
continue;
}
// Non-ASCII: encode as numeric entity (handle surrogate pair)
const cp = getCodePoint(input, index);
out += `&#x${cp.toString(16)};`;
if (cp !== char) index++; // Skip trailing surrogate
last = index + 1;
}
return returnValue + input.substr(lastIndex);
if (out === undefined) return input;
if (last < length) out += input.substr(last);
return out;
}
/**
@@ -63,7 +81,6 @@ export function encodeXML(input: string): string {
*
* Have a look at `escapeUTF8` if you want a more concise output at the expense
* of reduced transportability.
*
* @param data String to escape.
*/
export const escape: typeof encodeXML = encodeXML;
@@ -71,10 +88,8 @@ export const escape: typeof encodeXML = encodeXML;
/**
* Creates a function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*
* @param regex Regular expression to match characters to escape.
* @param map Map of characters to escape to their entities.
*
* @returns Function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*/
@@ -83,7 +98,7 @@ function getEscaper(
map: Map<number, string>,
): (data: string) => string {
return function escape(data: string): string {
let match;
let match: RegExpExecArray | null;
let lastIndex = 0;
let result = "";
@@ -107,7 +122,6 @@ function getEscaper(
* Encodes all characters not valid in XML documents using XML entities.
*
* Note that the output will be character-set dependent.
*
* @param data String to escape.
*/
export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper(
@@ -118,7 +132,6 @@ export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper(
/**
* Encodes all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export const escapeAttribute: (data: string) => string =
@@ -134,7 +147,6 @@ export const escapeAttribute: (data: string) => string =
/**
* Encodes all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export const escapeText: (data: string) => string = /* #__PURE__ */ getEscaper(
-10
View File
@@ -1,10 +0,0 @@
{
"rules": {
"multiline-comment-style": 0,
"capitalized-comments": 0,
"unicorn/escape-case": 0,
"unicorn/no-hex-escape": 0,
"unicorn/numeric-separators-style": 0,
"unicorn/prefer-spread": 0
}
}
File diff suppressed because one or more lines are too long
@@ -1,8 +1,7 @@
// Generated using scripts/write-decode-map.ts
export const xmlDecodeTree: Uint16Array = /* #__PURE__ */ new Uint16Array(
// prettier-ignore
/* #__PURE__ */ "\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022"
.split("")
.map((c) => c.charCodeAt(0)),
import { decodeBase64 } from "../internal/decode-shared.js";
/** Packed XML decode trie data. */
export const xmlDecodeTree: Uint16Array = /* #__PURE__ */ decodeBase64(
"AAJhZ2xxBwARABMAFQBtAg0AAAAAAA8AcAAmYG8AcwAnYHQAPmB0ADxg9SFvdCJg",
);
File diff suppressed because one or more lines are too long
-125
View File
@@ -1,125 +0,0 @@
import { readFileSync } from "node:fs";
import { describe, it, expect } from "vitest";
import * as entities from "./index.js";
import legacy from "../maps/legacy.json" with { type: "json" };
const levels = ["xml", "entities"];
describe("Documents", () => {
const levelDocuments = levels
.map((name) => new URL(`../maps/${name}.json`, import.meta.url))
.map((url) => JSON.parse(readFileSync(url, "utf8")))
.map((document, index) => [index, document]);
for (const [level, document] of levelDocuments) {
describe("Decode", () => {
it(levels[level], () => {
for (const entity of Object.keys(document)) {
for (let l = level; l < levels.length; l++) {
expect(entities.decode(`&${entity};`, l)).toBe(
document[entity],
);
expect(
entities.decode(`&${entity};`, { level: l }),
).toBe(document[entity]);
}
}
});
});
describe("Decode strict", () => {
it(levels[level], () => {
for (const entity of Object.keys(document)) {
for (let l = level; l < levels.length; l++) {
expect(entities.decodeStrict(`&${entity};`, l)).toBe(
document[entity],
);
expect(
entities.decode(`&${entity};`, {
level: l,
mode: entities.DecodingMode.Strict,
}),
).toBe(document[entity]);
}
}
});
});
describe("Encode", () => {
it(levels[level], () => {
for (const entity of Object.keys(document)) {
for (let l = level; l < levels.length; l++) {
const encoded = entities.encode(document[entity], l);
const decoded = entities.decode(encoded, l);
expect(decoded).toBe(document[entity]);
}
}
});
it("should only encode non-ASCII values if asked", () =>
expect(
entities.encode("Great #'s of 🎁", {
level,
mode: entities.EncodingMode.ASCII,
}),
).toBe("Great #&apos;s of &#x1f381;"));
});
}
describe("Legacy", () => {
const legacyMap: Record<string, string> = legacy;
it("should decode", () => {
for (const entity of Object.keys(legacyMap)) {
expect(entities.decodeHTML(`&${entity}`)).toBe(
legacyMap[entity],
);
expect(
entities.decodeStrict(`&${entity}`, {
level: entities.EntityLevel.HTML,
mode: entities.DecodingMode.Legacy,
}),
).toBe(legacyMap[entity]);
}
});
});
});
const astral = [
["1d306", "\uD834\uDF06"],
["1d11e", "\uD834\uDD1E"],
];
const astralSpecial = [
["80", "\u20AC"],
["110000", "\uFFFD"],
];
describe("Astral entities", () => {
for (const [c, value] of astral) {
it(`should decode ${value}`, () =>
expect(entities.decode(`&#x${c};`)).toBe(value));
it(`should encode ${value}`, () =>
expect(entities.encode(value)).toBe(`&#x${c};`));
it(`should escape ${value}`, () =>
expect(entities.escape(value)).toBe(`&#x${c};`));
}
for (const [c, value] of astralSpecial) {
it(`should decode special \\u${c}`, () =>
expect(entities.decode(`&#x${c};`)).toBe(value));
}
});
describe("Escape", () => {
it("should always decode ASCII chars", () => {
for (let index = 0; index < 0x7f; index++) {
const c = String.fromCharCode(index);
expect(entities.decodeXML(entities.escape(c))).toBe(c);
}
});
it("should keep UTF8 characters", () =>
expect(entities.escapeUTF8('ß < "ü"')).toBe(`ß &lt; &quot;ü&quot;`));
});
+23 -49
View File
@@ -1,10 +1,10 @@
import { decodeXML, decodeHTML, DecodingMode } from "./decode.js";
import { type DecodingMode, decodeHTML, decodeXML } from "./decode.js";
import { encodeHTML, encodeNonAsciiHTML } from "./encode.js";
import {
encodeXML,
escapeUTF8,
escapeAttribute,
escapeText,
escapeUTF8,
} from "./escape.js";
/** The level of entities to support. */
@@ -15,6 +15,9 @@ export enum EntityLevel {
HTML = 1,
}
/**
* Encoding strategy used by `encode`.
*/
export enum EncodingMode {
/**
* The output is UTF-8 encoded. Only characters that need escaping within
@@ -44,6 +47,9 @@ export enum EncodingMode {
Text,
}
/**
* Options for `decode`.
*/
export interface DecodingOptions {
/**
* The level of entities to support.
@@ -56,9 +62,6 @@ export interface DecodingOptions {
*
* Always `Strict` for XML. For HTML, set this to `true` if you are parsing
* an attribute value.
*
* The deprecated `decodeStrict` function defaults this to `Strict`.
*
* @default {@link DecodingMode.Legacy}
*/
mode?: DecodingMode | undefined;
@@ -66,7 +69,6 @@ export interface DecodingOptions {
/**
* Decodes a string with entities.
*
* @param input String to decode.
* @param options Decoding options.
*/
@@ -84,24 +86,6 @@ export function decode(
return decodeXML(input);
}
/**
* Decodes a string with entities. Does not allow missing trailing semicolons for entities.
*
* @param input String to decode.
* @param options Decoding options.
* @deprecated Use `decode` with the `mode` set to `Strict`.
*/
export function decodeStrict(
input: string,
options: DecodingOptions | EntityLevel = EntityLevel.XML,
): string {
const normalizedOptions =
typeof options === "number" ? { level: options } : options;
normalizedOptions.mode ??= DecodingMode.Strict;
return decode(input, normalizedOptions);
}
/**
* Options for `encode`.
*/
@@ -120,7 +104,6 @@ export interface EncodingOptions {
/**
* Encodes a string with entities.
*
* @param input String to encode.
* @param options Encoding options.
*/
@@ -146,7 +129,7 @@ export function encode(
? encodeNonAsciiHTML(input)
: encodeXML(input);
}
// eslint-disable-next-line unicorn/no-useless-switch-case
// biome-ignore lint/complexity/noUselessSwitchCase: we get an error for the switch not being exhaustive
case EncodingMode.Extensive:
default: {
return level === EntityLevel.HTML
@@ -157,32 +140,23 @@ export function encode(
}
export {
encodeXML,
escape,
escapeUTF8,
escapeAttribute,
escapeText,
} from "./escape.js";
DecodingMode,
decodeHTML,
decodeHTMLAttribute,
decodeHTMLStrict,
decodeXML,
decodeXML as decodeXMLStrict,
EntityDecoder,
} from "./decode.js";
export {
encodeHTML,
encodeNonAsciiHTML,
// Legacy aliases (deprecated)
encodeHTML as encodeHTML4,
encodeHTML as encodeHTML5,
} from "./encode.js";
export {
EntityDecoder,
DecodingMode,
decodeXML,
decodeHTML,
decodeHTMLStrict,
decodeHTMLAttribute,
// Legacy aliases (deprecated)
decodeHTML as decodeHTML4,
decodeHTML as decodeHTML5,
decodeHTMLStrict as decodeHTML4Strict,
decodeHTMLStrict as decodeHTML5Strict,
decodeXML as decodeXMLStrict,
} from "./decode.js";
encodeXML,
escape,
escapeAttribute,
escapeText,
escapeUTF8,
} from "./escape.js";
+8 -3
View File
@@ -2,13 +2,18 @@
"name": "parse5",
"type": "module",
"description": "HTML parser and serializer.",
"version": "8.0.0",
"version": "8.0.1",
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"contributors": [
"James Garbutt (https://github.com/43081j)",
"Felix Boehm (https://github.com/fb55)",
"Ivan Nikulin (https://github.com/inikulin)",
"Titus (https://github.com/wooorm)"
],
"homepage": "https://parse5.js.org",
"funding": "https://github.com/inikulin/parse5?sponsor=1",
"dependencies": {
"entities": "^6.0.0"
"entities": "^8.0.0"
},
"keywords": [
"html",