feat(planning): grille hebdomadaire complète avec API et filtres

- Connexion API via proxy Angular (résolution CORS, base path /api)
- Import CSS ng-zorro global pour les modales et composants
- Filtres Camion/Show câblés sur l'affichage de la grille
- Camions affichés via TrucksService (linkés au show du même créneau)
- Panneau de détails : spectacles + camions du jour sélectionné
- Modale de création de spectacle stylisée avec fond et centrage
- Positionnement précis des events à la minute dans leur créneau
- Auto-scroll vers l'heure courante au chargement
- Ligne "maintenant" sur la colonne du jour actuel
- Régénération des services OpenAPI (nouveaux noms de types)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 20:36:03 +02:00
parent 150b97cd2e
commit 654b297e2e
3131 changed files with 149304 additions and 104334 deletions
+1 -3
View File
@@ -459,9 +459,7 @@ export class Tokenizer {
: cp === $.NULL
? TokenType.NULL_CHARACTER
: TokenType.CHARACTER;
// OPTIMIZATION: Use String.fromCharCode for BMP characters (< 0x10000) which is faster
// than String.fromCodePoint. Characters outside BMP are rare in HTML.
this._appendCharToCurrentCharacterToken(type, cp < 65536 ? String.fromCharCode(cp) : String.fromCodePoint(cp));
this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
}
//NOTE: used when we emit characters explicitly.
//This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
+73 -38
View File
@@ -1,6 +1,6 @@
{
"name": "entities",
"version": "8.0.0",
"version": "6.0.1",
"description": "Encode & decode XML and HTML entities with ease & speed",
"keywords": [
"html entities",
@@ -13,7 +13,7 @@
],
"repository": {
"type": "git",
"url": "https://github.com/fb55/entities.git"
"url": "git://github.com/fb55/entities.git"
},
"funding": "https://github.com/fb55/entities?sponsor=1",
"license": "BSD-2-Clause",
@@ -22,62 +22,97 @@
"type": "module",
"exports": {
".": {
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
"import": {
"types": "./dist/esm/index.d.ts",
"default": "./dist/esm/index.js"
},
"require": {
"types": "./dist/commonjs/index.d.ts",
"default": "./dist/commonjs/index.js"
}
},
"./decode": {
"types": "./dist/decode.d.ts",
"default": "./dist/decode.js"
"import": {
"types": "./dist/esm/decode.d.ts",
"default": "./dist/esm/decode.js"
},
"require": {
"types": "./dist/commonjs/decode.d.ts",
"default": "./dist/commonjs/decode.js"
}
},
"./escape": {
"types": "./dist/escape.d.ts",
"default": "./dist/escape.js"
"import": {
"types": "./dist/esm/escape.d.ts",
"default": "./dist/esm/escape.js"
},
"require": {
"types": "./dist/commonjs/escape.d.ts",
"default": "./dist/commonjs/escape.js"
}
}
},
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"main": "./dist/commonjs/index.js",
"module": "./dist/esm/index.js",
"types": "./dist/commonjs/index.d.ts",
"files": [
"decode.js",
"decode.d.ts",
"escape.js",
"escape.d.ts",
"dist",
"src",
"!**/*.spec.ts"
"src"
],
"scripts": {
"benchmark": "node --import=tsx scripts/benchmark.ts",
"build": "tsc",
"build:docs": "typedoc --hideGenerator src/index.ts",
"build:encode-trie": "node --import=tsx scripts/write-encode-map.ts",
"build:trie": "node --import=tsx scripts/write-decode-map.ts",
"format": "npm run format:es && npm run format:biome",
"format:biome": "biome check --fix .",
"format": "npm run format:es && npm run format:prettier",
"format:es": "npm run lint:es -- --fix",
"lint": "npm run lint:es && npm run lint:ts && npm run lint:biome",
"lint:biome": "biome check .",
"lint:es": "eslint .",
"format:prettier": "npm run prettier -- --write",
"lint": "npm run lint:es && npm run lint:ts && npm run lint:prettier",
"lint:es": "eslint . --ignore-path .gitignore",
"lint:prettier": "npm run prettier -- --check",
"lint:ts": "tsc --noEmit",
"prepublishOnly": "npm run build",
"prepublishOnly": "tshy",
"prettier": "prettier '**/*.{ts,md,json,yml}'",
"test": "npm run test:vi && npm run lint",
"test:vi": "vitest run"
},
"prettier": {
"proseWrap": "always",
"tabWidth": 4
},
"devDependencies": {
"@biomejs/biome": "^2.4.7",
"@eslint/compat": "^2.0.3",
"@feedic/eslint-config": "^0.3.1",
"@types/he": "^1.2.3",
"@types/node": "^25.5.0",
"eslint": "^10.0.3",
"eslint-config-biome": "^2.1.3",
"globals": "^17.4.0",
"he": "^1.2.0",
"html-entities": "^2.6.0",
"parse-entities": "^4.0.2",
"tinybench": "^6.0.0",
"tsx": "^4.21.0",
"typedoc": "^0.28.17",
"typescript": "^5.9.3",
"typescript-eslint": "^8.57.1",
"vitest": "^4.0.17"
"@types/node": "^22.15.30",
"@typescript-eslint/eslint-plugin": "^8.33.1",
"@typescript-eslint/parser": "^8.33.1",
"@vitest/coverage-v8": "^2.1.8",
"eslint": "^8.57.1",
"eslint-config-prettier": "^10.1.5",
"eslint-plugin-n": "^17.19.0",
"eslint-plugin-unicorn": "^56.0.1",
"prettier": "^3.5.3",
"tshy": "^3.0.2",
"tsx": "^4.19.4",
"typedoc": "^0.28.5",
"typescript": "^5.8.3",
"vitest": "^2.0.2"
},
"engines": {
"node": ">=20.19.0"
"node": ">=0.12"
},
"tshy": {
"exclude": [
"**/*.spec.ts",
"**/__fixtures__/*",
"**/__tests__/*",
"**/__snapshots__/*"
],
"exports": {
".": "./src/index.ts",
"./decode": "./src/decode.ts",
"./escape": "./src/escape.ts"
}
}
}
+27 -36
View File
@@ -10,7 +10,7 @@ Encode & decode HTML & XML entities with ease & speed.
[`commonmark`](https://github.com/commonmark/commonmark.js) use it to process
HTML entities.
- ⚡️ Fast: `entities` is the fastest library for decoding HTML entities (as of
September 2025); see [performance](#performance).
April 2022); see [performance](#performance).
- 🎛 Configurable: Get an output tailored for your needs. You are fine with
UTF8? That'll save you some bytes. Prefer to only have ASCII characters? We
can do that as well!
@@ -24,7 +24,7 @@ Encode & decode HTML & XML entities with ease & speed.
### …use `entities`
```javascript
import * as entities from "entities";
const entities = require("entities");
// Encoding
entities.escapeUTF8("&#38; ü"); // "&amp;#38; ü"
@@ -38,36 +38,15 @@ entities.decodeHTML("asdf &amp; &yuml; &uuml; &apos;"); // "asdf & ÿ ü '"
## Performance
Benchmarked in September 2025 with Node v24.6.0 on Apple M2 using `tinybench`.
Higher ops/s is better; `avg (μs)` is the mean time per operation.
See `scripts/benchmark.ts` to reproduce.
This is how `entities` compares to other libraries on a very basic benchmark
(see `scripts/benchmark.ts`, for 10,000,000 iterations; **lower is better**):
### Decoding
| Library | Version | ops/s | avg (μs) | ±% | slower |
| -------------- | ------- | --------- | -------- | ---- | ------ |
| entities | 7.0.0 | 5,838,416 | 175.57 | 0.06 | — |
| html-entities | 2.6.0 | 2,919,637 | 347.77 | 0.33 | 50.0% |
| he | 1.2.0 | 2,318,438 | 446.48 | 0.70 | 60.3% |
| parse-entities | 4.0.2 | 852,855 | 1,199.51 | 0.36 | 85.4% |
### Encoding
| Library | Version | ops/s | avg (μs) | ±% | slower |
| -------------- | ------- | --------- | -------- | ---- | ------ |
| entities | 7.0.0 | 2,770,115 | 368.09 | 0.11 | — |
| html-entities | 2.6.0 | 1,491,963 | 679.96 | 0.58 | 46.2% |
| he | 1.2.0 | 481,278 | 2,118.25 | 0.61 | 82.6% |
### Escaping
| Library | Version | ops/s | avg (μs) | ±% | slower |
| -------------- | ------- | --------- | -------- | ---- | ------ |
| entities | 7.0.0 | 4,616,468 | 223.84 | 0.17 | — |
| he | 1.2.0 | 3,659,301 | 280.76 | 0.58 | 20.7% |
| html-entities | 2.6.0 | 3,555,301 | 296.63 | 0.84 | 23.0% |
Note: Micro-benchmarks may vary across machines and Node versions.
| Library | Version | `decode` perf | `encode` perf | `escape` perf |
| -------------- | ------- | ------------- | ------------- | ------------- |
| entities | `3.0.1` | 1.418s | 6.786s | 2.196s |
| html-entities | `2.3.2` | 2.530s | 6.829s | 2.415s |
| he | `1.2.0` | 5.800s | 24.237s | 3.624s |
| parse-entities | `3.0.0` | 9.660s | N/A | N/A |
---
@@ -89,8 +68,8 @@ This is helpful for decoding entities in legacy environments.
> Why should I use `entities` instead of alternative modules?
As of September 2025, `entities` is faster than other modules. Still, this is
not a differentiated space and other modules can catch up.
As of April 2022, `entities` is a bit faster than other modules. Still, this is
not a very differentiated space and other modules can catch up.
**More importantly**, you might already have `entities` in your dependency graph
(as a dependency of eg. `cheerio`, or `htmlparser2`), and including it directly
@@ -99,9 +78,10 @@ libraries, so have a look through your `node_modules` directory!
> Does `entities` support tree shaking?
Yes! Note that for best results, you should not use the `encode` and `decode`
functions, as they wrap around a number of other functions, all of which will
remain in the bundle. Instead, use the functions that you need directly.
Yes! `entities` ships as both a CommonJS and a ES module. Note that for best
results, you should not use the `encode` and `decode` functions, as they wrap
around a number of other functions, all of which will remain in the bundle.
Instead, use the functions that you need directly.
---
@@ -129,3 +109,14 @@ License: BSD-2-Clause
To report a security vulnerability, please use the
[Tidelift security contact](https://tidelift.com/security). Tidelift will
coordinate the fix and disclosure.
## `entities` for enterprise
Available as part of the Tidelift Subscription
The maintainers of `entities` and thousands of other packages are working with
Tidelift to deliver commercial support and maintenance for the open source
dependencies you use to build your applications. Save time, reduce risk, and
improve code health, while paying the maintainers of the exact dependencies you
use.
[Learn more.](https://tidelift.com/subscription/pkg/npm-entities?utm_source=npm-entities&utm_medium=referral&utm_campaign=enterprise&utm_term=repo)
+32 -1
View File
@@ -32,11 +32,31 @@ const decodeMap = new Map([
[159, 376],
]);
/**
* Polyfill for `String.fromCodePoint`. It is used to create a string from a Unicode code point.
*/
export const fromCodePoint: (...codePoints: number[]) => string =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, n/no-unsupported-features/es-builtins
String.fromCodePoint ??
function (codePoint: number): string {
let output = "";
if (codePoint > 0xff_ff) {
codePoint -= 0x1_00_00;
output += String.fromCharCode(
((codePoint >>> 10) & 0x3_ff) | 0xd8_00,
);
codePoint = 0xdc_00 | (codePoint & 0x3_ff);
}
output += String.fromCharCode(codePoint);
return output;
};
/**
* Replace the given code point with a replacement character if it is a
* surrogate or is outside the valid range. Otherwise return the code
* point unchanged.
* @param codePoint Unicode code point to convert.
*/
export function replaceCodePoint(codePoint: number): number {
if (
@@ -48,3 +68,14 @@ export function replaceCodePoint(codePoint: number): number {
return decodeMap.get(codePoint) ?? codePoint;
}
/**
* Replace the code point if relevant, then convert it to a string.
*
* @deprecated Use `fromCodePoint(replaceCodePoint(codePoint))` instead.
* @param codePoint The code point to decode.
* @returns The decoded code point.
*/
export function decodeCodePoint(codePoint: number): string {
return fromCodePoint(replaceCodePoint(codePoint));
}
+76 -127
View File
@@ -1,7 +1,6 @@
import { replaceCodePoint } from "./decode-codepoint.js";
import { htmlDecodeTree } from "./generated/decode-data-html.js";
import { xmlDecodeTree } from "./generated/decode-data-xml.js";
import { BinTrieFlags } from "./internal/bin-trie-flags.js";
import { replaceCodePoint, fromCodePoint } from "./decode-codepoint.js";
const enum CharCodes {
NUM = 35, // "#"
@@ -21,6 +20,12 @@ const enum CharCodes {
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
const TO_LOWER_BIT = 0b10_0000;
export enum BinTrieFlags {
VALUE_LENGTH = 0b1100_0000_0000_0000,
BRANCH_LENGTH = 0b0011_1111_1000_0000,
JUMP_TABLE = 0b0000_0000_0111_1111,
}
function isNumber(code: number): boolean {
return code >= CharCodes.ZERO && code <= CharCodes.NINE;
}
@@ -45,7 +50,6 @@ function isAsciiAlphaNumeric(code: number): boolean {
*
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
* @param code Code point to decode.
*/
function isEntityInAttributeInvalidEnd(code: number): boolean {
return code === CharCodes.EQUALS || isAsciiAlphaNumeric(code);
@@ -59,9 +63,6 @@ const enum EntityDecoderState {
NamedEntity,
}
/**
* Decoding mode for named entities.
*/
export enum DecodingMode {
/** Entities in text nodes that can end with any character. */
Legacy = 0,
@@ -88,13 +89,13 @@ export interface EntityErrorProducer {
export class EntityDecoder {
constructor(
/** The tree used to decode entities. */
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: False positive
private readonly decodeTree: Uint16Array,
/**
* The function that is called when a codepoint is decoded.
*
* For multi-byte named entities, this will be called multiple times,
* with the second codepoint, and the same `consumed` value.
*
* @param codepoint The decoded codepoint.
* @param consumed The number of bytes consumed by the decoder.
*/
@@ -121,13 +122,8 @@ export class EntityDecoder {
private excess = 1;
/** The mode in which the decoder is operating. */
private decodeMode = DecodingMode.Strict;
/** The number of characters that have been consumed in the current run. */
private runConsumed = 0;
/**
* Resets the instance to make it reusable.
* @param decodeMode Entity decoding mode to use.
*/
/** Resets the instance to make it reusable. */
startEntity(decodeMode: DecodingMode): void {
this.decodeMode = decodeMode;
this.state = EntityDecoderState.EntityStart;
@@ -135,7 +131,6 @@ export class EntityDecoder {
this.treeIndex = 0;
this.excess = 1;
this.consumed = 1;
this.runConsumed = 0;
}
/**
@@ -144,6 +139,7 @@ export class EntityDecoder {
*
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
* entity is incomplete, and resume when the next string is written.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -182,6 +178,7 @@ export class EntityDecoder {
* Switches between the numeric decimal and hexadecimal states.
*
* Equivalent to the `Numeric character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -201,53 +198,73 @@ export class EntityDecoder {
return this.stateNumericDecimal(input, offset);
}
private addToNumericResult(
input: string,
start: number,
end: number,
base: number,
): void {
if (start !== end) {
const digitCount = end - start;
this.result =
this.result * Math.pow(base, digitCount) +
Number.parseInt(input.substr(start, digitCount), base);
this.consumed += digitCount;
}
}
/**
* Parses a hexadecimal numeric entity.
*
* Equivalent to the `Hexademical character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericHex(input: string, offset: number): number {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char) || isHexadecimalCharacter(char)) {
// Convert hex digit to value (0-15); 'a'/'A' -> 10.
const digit =
char <= CharCodes.NINE
? char - CharCodes.ZERO
: (char | TO_LOWER_BIT) - CharCodes.LOWER_A + 10;
this.result = this.result * 16 + digit;
this.consumed++;
offset++;
offset += 1;
} else {
this.addToNumericResult(input, startIndex, offset, 16);
return this.emitNumericEntity(char, 3);
}
}
return -1; // Incomplete entity
this.addToNumericResult(input, startIndex, offset, 16);
return -1;
}
/**
* Parses a decimal numeric entity.
*
* Equivalent to the `Decimal character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
*/
private stateNumericDecimal(input: string, offset: number): number {
const startIndex = offset;
while (offset < input.length) {
const char = input.charCodeAt(offset);
if (isNumber(char)) {
this.result = this.result * 10 + (char - CharCodes.ZERO);
this.consumed++;
offset++;
offset += 1;
} else {
this.addToNumericResult(input, startIndex, offset, 10);
return this.emitNumericEntity(char, 2);
}
}
return -1; // Incomplete entity
this.addToNumericResult(input, startIndex, offset, 10);
return -1;
}
/**
@@ -255,6 +272,7 @@ export class EntityDecoder {
*
* Implements the logic from the `Hexademical character reference start
* state` and `Numeric character reference end state` in the HTML spec.
*
* @param lastCp The last code point of the entity. Used to see if the
* entity was terminated with a semicolon.
* @param expectedLength The minimum number of characters that should be
@@ -295,6 +313,7 @@ export class EntityDecoder {
* Parses a named entity.
*
* Equivalent to the `Named character reference state` in the HTML spec.
*
* @param input The string containing the entity (or a continuation of the entity).
* @param offset The current offset.
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
@@ -302,84 +321,12 @@ export class EntityDecoder {
private stateNamedEntity(input: string, offset: number): number {
const { decodeTree } = this;
let current = decodeTree[this.treeIndex];
// The length is the number of bytes of the value, including the current byte.
// The mask is the number of bytes of the value, including the current byte.
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
while (offset < input.length) {
// Handle compact runs (possibly inline): valueLength == 0 and SEMI_REQUIRED bit set.
if (valueLength === 0 && (current & BinTrieFlags.FLAG13) !== 0) {
const runLength =
(current & BinTrieFlags.BRANCH_LENGTH) >> 7; /* 2..63 */
// If we are starting a run, check the first char.
if (this.runConsumed === 0) {
const firstChar = current & BinTrieFlags.JUMP_TABLE;
if (input.charCodeAt(offset) !== firstChar) {
return this.result === 0
? 0
: this.emitNotTerminatedNamedEntity();
}
offset++;
this.excess++;
this.runConsumed++;
}
// Check remaining characters in the run.
while (this.runConsumed < runLength) {
if (offset >= input.length) {
return -1;
}
const charIndexInPacked = this.runConsumed - 1;
const packedWord =
decodeTree[
this.treeIndex + 1 + (charIndexInPacked >> 1)
];
const expectedChar =
charIndexInPacked % 2 === 0
? packedWord & 0xff
: (packedWord >> 8) & 0xff;
if (input.charCodeAt(offset) !== expectedChar) {
this.runConsumed = 0;
return this.result === 0
? 0
: this.emitNotTerminatedNamedEntity();
}
offset++;
this.excess++;
this.runConsumed++;
}
this.runConsumed = 0;
this.treeIndex += 1 + (runLength >> 1);
current = decodeTree[this.treeIndex];
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
}
if (offset >= input.length) break;
for (; offset < input.length; offset++, this.excess++) {
const char = input.charCodeAt(offset);
/*
* Implicit semicolon handling for nodes that require a semicolon but
* don't have an explicit ';' branch stored in the trie. If we have
* a value on the current node, it requires a semicolon, and the
* current input character is a semicolon, emit the entity using the
* current node (without descending further).
*/
if (
char === CharCodes.SEMI &&
valueLength !== 0 &&
(current & BinTrieFlags.FLAG13) !== 0
) {
return this.emitNamedEntityData(
this.treeIndex,
valueLength,
this.consumed + this.excess,
);
}
this.treeIndex = determineBranch(
decodeTree,
current,
@@ -414,18 +361,12 @@ export class EntityDecoder {
}
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
if (
this.decodeMode !== DecodingMode.Strict &&
(current & BinTrieFlags.FLAG13) === 0
) {
if (this.decodeMode !== DecodingMode.Strict) {
this.result = this.treeIndex;
this.consumed += this.excess;
this.excess = 0;
}
}
// Increment offset & excess for next iteration
offset++;
this.excess++;
}
return -1;
@@ -433,6 +374,7 @@ export class EntityDecoder {
/**
* Emit a named entity that was not terminated with a semicolon.
*
* @returns The number of characters consumed.
*/
private emitNotTerminatedNamedEntity(): number {
@@ -449,9 +391,11 @@ export class EntityDecoder {
/**
* Emit a named entity.
*
* @param result The index of the entity in the decode tree.
* @param valueLength The number of bytes in the entity.
* @param consumed The number of characters consumed.
*
* @returns The number of characters consumed.
*/
private emitNamedEntityData(
@@ -463,8 +407,7 @@ export class EntityDecoder {
this.emitCodePoint(
valueLength === 1
? decodeTree[result] &
~(BinTrieFlags.VALUE_LENGTH | BinTrieFlags.FLAG13)
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
: decodeTree[result + 1],
consumed,
);
@@ -480,6 +423,7 @@ export class EntityDecoder {
* Signal to the parser that the end of the input was reached.
*
* Remaining data will be emitted and relevant errors will be produced.
*
* @returns The number of characters consumed.
*/
end(): number {
@@ -515,6 +459,7 @@ export class EntityDecoder {
/**
* Creates a function that decodes entities in a string.
*
* @param decodeTree The decode tree.
* @returns A function that decodes entities in a string.
*/
@@ -522,7 +467,7 @@ function getDecoder(decodeTree: Uint16Array) {
let returnValue = "";
const decoder = new EntityDecoder(
decodeTree,
(data) => (returnValue += String.fromCodePoint(data)),
(data) => (returnValue += fromCodePoint(data)),
);
return function decodeWithTrie(
@@ -565,9 +510,10 @@ function getDecoder(decodeTree: Uint16Array) {
/**
* Determines the branch of the current node that is taken given the current
* character. This function is used to traverse the trie.
*
* @param decodeTree The trie.
* @param current The current node.
* @param nodeIndex Index immediately after the current node header.
* @param nodeIdx The index right after the current node and its value.
* @param char The current character.
* @returns The index of the next node, or -1 if no branch is taken.
*/
@@ -594,28 +540,22 @@ export function determineBranch(
: decodeTree[nodeIndex + value] - 1;
}
// Case 3: Multiple branches encoded in packed dictionary (two keys per uint16)
const packedKeySlots = (branchCount + 1) >> 1;
// Case 3: Multiple branches encoded in dictionary
/*
* Treat packed keys as a virtual sorted array of length `branchCount`.
* Key(i) = low byte for even i, high byte for odd i in slot i>>1.
*/
let lo = 0;
let hi = branchCount - 1;
// Binary search for the character.
let lo = nodeIndex;
let hi = lo + branchCount - 1;
while (lo <= hi) {
const mid = (lo + hi) >>> 1;
const slot = mid >> 1;
const packed = decodeTree[nodeIndex + slot];
const midKey = (packed >> ((mid & 1) * 8)) & 0xff;
const midValue = decodeTree[mid];
if (midKey < char) {
if (midValue < char) {
lo = mid + 1;
} else if (midKey > char) {
} else if (midValue > char) {
hi = mid - 1;
} else {
return decodeTree[nodeIndex + packedKeySlots + mid];
return decodeTree[mid + branchCount];
}
}
@@ -627,6 +567,7 @@ const xmlDecoder = /* #__PURE__ */ getDecoder(xmlDecodeTree);
/**
* Decodes an HTML string.
*
* @param htmlString The string to decode.
* @param mode The decoding mode.
* @returns The decoded string.
@@ -640,6 +581,7 @@ export function decodeHTML(
/**
* Decodes an HTML string in an attribute.
*
* @param htmlAttribute The string to decode.
* @returns The decoded string.
*/
@@ -649,6 +591,7 @@ export function decodeHTMLAttribute(htmlAttribute: string): string {
/**
* Decodes an HTML string, requiring all entities to be terminated by a semicolon.
*
* @param htmlString The string to decode.
* @returns The decoded string.
*/
@@ -658,6 +601,7 @@ export function decodeHTMLStrict(htmlString: string): string {
/**
* Decodes an XML string, requiring all entities to be terminated by a semicolon.
*
* @param xmlString The string to decode.
* @returns The decoded string.
*/
@@ -665,7 +609,12 @@ export function decodeXML(xmlString: string): string {
return xmlDecoder(xmlString, DecodingMode.Strict);
}
export { replaceCodePoint } from "./decode-codepoint.js";
// Re-export for use by eg. htmlparser2
export { htmlDecodeTree } from "./generated/decode-data-html.js";
export { xmlDecodeTree } from "./generated/decode-data-xml.js";
export {
decodeCodePoint,
replaceCodePoint,
fromCodePoint,
} from "./decode-codepoint.js";
+31 -49
View File
@@ -1,17 +1,7 @@
import { getCodePoint, XML_BITSET_VALUE } from "./escape.js";
import { htmlTrie } from "./generated/encode-html.js";
import { xmlReplacer, getCodePoint } from "./escape.js";
/**
* We store the characters to consider as a compact bitset for fast lookups.
*/
const HTML_BITSET = /* #__PURE__ */ new Uint32Array([
0x16_00, // Bits for 09,0A,0C
0xfc_00_ff_fe, // 32..63 -> 21-2D (minus space), 2E,2F,3A-3F
0xf8_00_00_01, // 64..95 -> 40, 5B-5F
0x38_00_00_01, // 96..127-> 60, 7B-7D
]);
const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, XML_BITSET_VALUE, 0, 0]);
const htmlReplacer = /[\t\n\f!-,./:-@[-`{-}\u0080-\uFFFF]/g;
/**
* Encodes all characters in the input using HTML entities. This includes
@@ -23,10 +13,9 @@ const XML_BITSET = /* #__PURE__ */ new Uint32Array([0, XML_BITSET_VALUE, 0, 0]);
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
* @param input Input string to encode or decode.
*/
export function encodeHTML(input: string): string {
return encodeHTMLTrieRe(HTML_BITSET, input);
return encodeHTMLTrieRe(htmlReplacer, input);
}
/**
* Encodes all non-ASCII characters, as well as characters not valid in HTML
@@ -35,61 +24,54 @@ export function encodeHTML(input: string): string {
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
* @param input Input string to encode or decode.
*/
export function encodeNonAsciiHTML(input: string): string {
return encodeHTMLTrieRe(XML_BITSET, input);
return encodeHTMLTrieRe(xmlReplacer, input);
}
function encodeHTMLTrieRe(bitset: Uint32Array, input: string): string {
let out: string | undefined;
let last = 0; // Start of the next untouched slice.
const { length } = input;
function encodeHTMLTrieRe(regExp: RegExp, input: string): string {
let returnValue = "";
let lastIndex = 0;
let match;
for (let index = 0; index < length; index++) {
while ((match = regExp.exec(input)) !== null) {
const { index } = match;
returnValue += input.substring(lastIndex, index);
const char = input.charCodeAt(index);
// Skip ASCII characters that don't need encoding
if (char < 0x80 && !((bitset[char >>> 5] >>> char) & 1)) {
continue;
}
let next = htmlTrie.get(char);
if (out === undefined) out = input.substring(0, index);
else if (last !== index) out += input.substring(last, index);
let node = htmlTrie.get(char);
if (typeof node === "object") {
if (index + 1 < length) {
if (typeof next === "object") {
// We are in a branch. Try to match the next char.
if (index + 1 < input.length) {
const nextChar = input.charCodeAt(index + 1);
const value =
typeof node.next === "number"
? node.next === nextChar
? node.nextValue
typeof next.n === "number"
? next.n === nextChar
? next.o
: undefined
: node.next.get(nextChar);
: next.n.get(nextChar);
if (value !== undefined) {
out += value;
index++;
last = index + 1;
returnValue += value;
lastIndex = regExp.lastIndex += 1;
continue;
}
}
node = node.value;
next = next.v;
}
if (node === undefined) {
// We might have a tree node without a value; skip and use a numeric entity.
if (next === undefined) {
const cp = getCodePoint(input, index);
out += `&#x${cp.toString(16)};`;
if (cp !== char) index++;
last = index + 1;
returnValue += `&#x${cp.toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIndex = regExp.lastIndex += Number(cp !== char);
} else {
out += node;
last = index + 1;
returnValue += next;
lastIndex = index + 1;
}
}
if (out === undefined) return input;
if (last < length) out += input.substr(last);
return out;
return returnValue + input.substr(lastIndex);
}
+37 -49
View File
@@ -1,3 +1,5 @@
export const xmlReplacer: RegExp = /["$&'<>\u0080-\uFFFF]/g;
const xmlCodeMap = new Map([
[34, "&quot;"],
[38, "&amp;"],
@@ -7,72 +9,52 @@ const xmlCodeMap = new Map([
]);
// For compatibility with node < 4, we wrap `codePointAt`
/**
* Read a code point at a given index.
* @param input Input string to encode or decode.
* @param index Current read position in the input string.
*/
export const getCodePoint: (c: string, index: number) => number =
typeof String.prototype.codePointAt === "function"
? (input: string, index: number): number => input.codePointAt(index)!
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
(c: string, index: number): number =>
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
String.prototype.codePointAt == null
? (c: string, index: number): number =>
(c.charCodeAt(index) & 0xfc_00) === 0xd8_00
? (c.charCodeAt(index) - 0xd8_00) * 0x4_00 +
c.charCodeAt(index + 1) -
0xdc_00 +
0x1_00_00
: c.charCodeAt(index);
/**
* Bitset for ASCII characters that need to be escaped in XML.
*/
export const XML_BITSET_VALUE = 0x50_00_00_c4; // 32..63 -> 34 ("),38 (&),39 ('),60 (<),62 (>)
: c.charCodeAt(index)
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
(input: string, index: number): number => input.codePointAt(index)!;
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities. Uses a fast bitset scan instead of RegExp.
* documents using XML entities.
*
* If a character has no equivalent entity, a numeric hexadecimal reference
* (eg. `&#xfc;`) will be used.
* @param input Input string to encode or decode.
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `&#xfc;`) will be used.
*/
export function encodeXML(input: string): string {
let out: string | undefined;
let last = 0;
const { length } = input;
let returnValue = "";
let lastIndex = 0;
let match;
for (let index = 0; index < length; index++) {
while ((match = xmlReplacer.exec(input)) !== null) {
const { index } = match;
const char = input.charCodeAt(index);
const next = xmlCodeMap.get(char);
// Check for ASCII chars that don't need escaping
if (
char < 0x80 &&
(((XML_BITSET_VALUE >>> char) & 1) === 0 || char >= 64 || char < 32)
) {
continue;
if (next === undefined) {
returnValue += `${input.substring(lastIndex, index)}&#x${getCodePoint(
input,
index,
).toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIndex = xmlReplacer.lastIndex += Number(
(char & 0xfc_00) === 0xd8_00,
);
} else {
returnValue += input.substring(lastIndex, index) + next;
lastIndex = index + 1;
}
if (out === undefined) out = input.substring(0, index);
else if (last !== index) out += input.substring(last, index);
if (char < 64) {
// Known replacement
out += xmlCodeMap.get(char)!;
last = index + 1;
continue;
}
// Non-ASCII: encode as numeric entity (handle surrogate pair)
const cp = getCodePoint(input, index);
out += `&#x${cp.toString(16)};`;
if (cp !== char) index++; // Skip trailing surrogate
last = index + 1;
}
if (out === undefined) return input;
if (last < length) out += input.substr(last);
return out;
return returnValue + input.substr(lastIndex);
}
/**
@@ -81,6 +63,7 @@ export function encodeXML(input: string): string {
*
* Have a look at `escapeUTF8` if you want a more concise output at the expense
* of reduced transportability.
*
* @param data String to escape.
*/
export const escape: typeof encodeXML = encodeXML;
@@ -88,8 +71,10 @@ export const escape: typeof encodeXML = encodeXML;
/**
* Creates a function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*
* @param regex Regular expression to match characters to escape.
* @param map Map of characters to escape to their entities.
*
* @returns Function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*/
@@ -98,7 +83,7 @@ function getEscaper(
map: Map<number, string>,
): (data: string) => string {
return function escape(data: string): string {
let match: RegExpExecArray | null;
let match;
let lastIndex = 0;
let result = "";
@@ -122,6 +107,7 @@ function getEscaper(
* Encodes all characters not valid in XML documents using XML entities.
*
* Note that the output will be character-set dependent.
*
* @param data String to escape.
*/
export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper(
@@ -132,6 +118,7 @@ export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper(
/**
* Encodes all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export const escapeAttribute: (data: string) => string =
@@ -147,6 +134,7 @@ export const escapeAttribute: (data: string) => string =
/**
* Encodes all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
export const escapeText: (data: string) => string = /* #__PURE__ */ getEscaper(
File diff suppressed because one or more lines are too long
@@ -1,7 +1,8 @@
// Generated using scripts/write-decode-map.ts
import { decodeBase64 } from "../internal/decode-shared.js";
/** Packed XML decode trie data. */
export const xmlDecodeTree: Uint16Array = /* #__PURE__ */ decodeBase64(
"AAJhZ2xxBwARABMAFQBtAg0AAAAAAA8AcAAmYG8AcwAnYHQAPmB0ADxg9SFvdCJg",
export const xmlDecodeTree: Uint16Array = /* #__PURE__ */ new Uint16Array(
// prettier-ignore
/* #__PURE__ */ "\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022"
.split("")
.map((c) => c.charCodeAt(0)),
);
File diff suppressed because one or more lines are too long
+49 -23
View File
@@ -1,10 +1,10 @@
import { type DecodingMode, decodeHTML, decodeXML } from "./decode.js";
import { decodeXML, decodeHTML, DecodingMode } from "./decode.js";
import { encodeHTML, encodeNonAsciiHTML } from "./encode.js";
import {
encodeXML,
escapeUTF8,
escapeAttribute,
escapeText,
escapeUTF8,
} from "./escape.js";
/** The level of entities to support. */
@@ -15,9 +15,6 @@ export enum EntityLevel {
HTML = 1,
}
/**
* Encoding strategy used by `encode`.
*/
export enum EncodingMode {
/**
* The output is UTF-8 encoded. Only characters that need escaping within
@@ -47,9 +44,6 @@ export enum EncodingMode {
Text,
}
/**
* Options for `decode`.
*/
export interface DecodingOptions {
/**
* The level of entities to support.
@@ -62,6 +56,9 @@ export interface DecodingOptions {
*
* Always `Strict` for XML. For HTML, set this to `true` if you are parsing
* an attribute value.
*
* The deprecated `decodeStrict` function defaults this to `Strict`.
*
* @default {@link DecodingMode.Legacy}
*/
mode?: DecodingMode | undefined;
@@ -69,6 +66,7 @@ export interface DecodingOptions {
/**
* Decodes a string with entities.
*
* @param input String to decode.
* @param options Decoding options.
*/
@@ -86,6 +84,24 @@ export function decode(
return decodeXML(input);
}
/**
* Decodes a string with entities. Does not allow missing trailing semicolons for entities.
*
* @param input String to decode.
* @param options Decoding options.
* @deprecated Use `decode` with the `mode` set to `Strict`.
*/
export function decodeStrict(
input: string,
options: DecodingOptions | EntityLevel = EntityLevel.XML,
): string {
const normalizedOptions =
typeof options === "number" ? { level: options } : options;
normalizedOptions.mode ??= DecodingMode.Strict;
return decode(input, normalizedOptions);
}
/**
* Options for `encode`.
*/
@@ -104,6 +120,7 @@ export interface EncodingOptions {
/**
* Encodes a string with entities.
*
* @param input String to encode.
* @param options Encoding options.
*/
@@ -129,7 +146,7 @@ export function encode(
? encodeNonAsciiHTML(input)
: encodeXML(input);
}
// biome-ignore lint/complexity/noUselessSwitchCase: we get an error for the switch not being exhaustive
// eslint-disable-next-line unicorn/no-useless-switch-case
case EncodingMode.Extensive:
default: {
return level === EntityLevel.HTML
@@ -140,23 +157,32 @@ export function encode(
}
export {
DecodingMode,
decodeHTML,
decodeHTMLAttribute,
decodeHTMLStrict,
decodeXML,
decodeXML as decodeXMLStrict,
EntityDecoder,
} from "./decode.js";
encodeXML,
escape,
escapeUTF8,
escapeAttribute,
escapeText,
} from "./escape.js";
export {
encodeHTML,
encodeNonAsciiHTML,
// Legacy aliases (deprecated)
encodeHTML as encodeHTML4,
encodeHTML as encodeHTML5,
} from "./encode.js";
export {
encodeXML,
escape,
escapeAttribute,
escapeText,
escapeUTF8,
} from "./escape.js";
EntityDecoder,
DecodingMode,
decodeXML,
decodeHTML,
decodeHTMLStrict,
decodeHTMLAttribute,
// Legacy aliases (deprecated)
decodeHTML as decodeHTML4,
decodeHTML as decodeHTML5,
decodeHTMLStrict as decodeHTML4Strict,
decodeHTMLStrict as decodeHTML5Strict,
decodeXML as decodeXMLStrict,
} from "./decode.js";
+3 -8
View File
@@ -2,18 +2,13 @@
"name": "parse5",
"type": "module",
"description": "HTML parser and serializer.",
"version": "8.0.1",
"version": "8.0.0",
"author": "Ivan Nikulin <ifaaan@gmail.com> (https://github.com/inikulin)",
"contributors": [
"James Garbutt (https://github.com/43081j)",
"Felix Boehm (https://github.com/fb55)",
"Ivan Nikulin (https://github.com/inikulin)",
"Titus (https://github.com/wooorm)"
],
"contributors": "https://github.com/inikulin/parse5/graphs/contributors",
"homepage": "https://parse5.js.org",
"funding": "https://github.com/inikulin/parse5?sponsor=1",
"dependencies": {
"entities": "^8.0.0"
"entities": "^6.0.0"
},
"keywords": [
"html",