163 lines
5.4 KiB
TypeScript
163 lines
5.4 KiB
TypeScript
import { describe, it, expect } from "vitest";
|
|
import { Tokenizer } from "./index.js";
|
|
import type { Callbacks } from "./Tokenizer.js";
|
|
|
|
function tokenize(data: string, options = {}) {
|
|
const log: unknown[][] = [];
|
|
const tokenizer = new Tokenizer(
|
|
options,
|
|
new Proxy(
|
|
{},
|
|
{
|
|
get(_, property) {
|
|
return (...values: unknown[]) =>
|
|
log.push([property, ...values]);
|
|
},
|
|
},
|
|
) as Callbacks,
|
|
);
|
|
|
|
tokenizer.write(data);
|
|
tokenizer.end();
|
|
|
|
return log;
|
|
}
|
|
|
|
describe("Tokenizer", () => {
|
|
describe("should support self-closing special tags", () => {
|
|
it("for self-closing script tag", () => {
|
|
expect(tokenize("<script /><div></div>")).toMatchSnapshot();
|
|
});
|
|
it("for self-closing style tag", () => {
|
|
expect(tokenize("<style /><div></div>")).toMatchSnapshot();
|
|
});
|
|
it("for self-closing title tag", () => {
|
|
expect(tokenize("<title /><div></div>")).toMatchSnapshot();
|
|
});
|
|
it("for self-closing textarea tag", () => {
|
|
expect(tokenize("<textarea /><div></div>")).toMatchSnapshot();
|
|
});
|
|
it("for self-closing xmp tag", () => {
|
|
expect(tokenize("<xmp /><div></div>")).toMatchSnapshot();
|
|
});
|
|
});
|
|
|
|
describe("should support standard special tags", () => {
|
|
it("for normal script tag", () => {
|
|
expect(tokenize("<script></script><div></div>")).toMatchSnapshot();
|
|
});
|
|
it("for normal style tag", () => {
|
|
expect(tokenize("<style></style><div></div>")).toMatchSnapshot();
|
|
});
|
|
it("for normal sitle tag", () => {
|
|
expect(tokenize("<title></title><div></div>")).toMatchSnapshot();
|
|
});
|
|
it("for normal textarea tag", () => {
|
|
expect(
|
|
tokenize("<textarea></textarea><div></div>"),
|
|
).toMatchSnapshot();
|
|
});
|
|
it("for normal xmp tag", () => {
|
|
expect(tokenize("<xmp></xmp><div></div>")).toMatchSnapshot();
|
|
});
|
|
});
|
|
|
|
describe("should treat html inside special tags as text", () => {
|
|
it("for div inside script tag", () => {
|
|
expect(tokenize("<script><div></div></script>")).toMatchSnapshot();
|
|
});
|
|
it("for div inside style tag", () => {
|
|
expect(tokenize("<style><div></div></style>")).toMatchSnapshot();
|
|
});
|
|
it("for div inside title tag", () => {
|
|
expect(tokenize("<title><div></div></title>")).toMatchSnapshot();
|
|
});
|
|
it("for div inside textarea tag", () => {
|
|
expect(
|
|
tokenize("<textarea><div></div></textarea>"),
|
|
).toMatchSnapshot();
|
|
});
|
|
it("for div inside xmp tag", () => {
|
|
expect(tokenize("<xmp><div></div></xmp>")).toMatchSnapshot();
|
|
});
|
|
});
|
|
|
|
describe("should correctly mark attributes", () => {
|
|
it("for no value attribute", () => {
|
|
expect(tokenize("<div aaaaaaa >")).toMatchSnapshot();
|
|
});
|
|
it("for no quotes attribute", () => {
|
|
expect(tokenize("<div aaa=aaa >")).toMatchSnapshot();
|
|
});
|
|
it("for single quotes attribute", () => {
|
|
expect(tokenize("<div aaa='a' >")).toMatchSnapshot();
|
|
});
|
|
it("for double quotes attribute", () => {
|
|
expect(tokenize('<div aaa="a" >')).toMatchSnapshot();
|
|
});
|
|
});
|
|
|
|
describe("should not break after special tag followed by an entity", () => {
|
|
it("for normal special tag", () => {
|
|
expect(tokenize("<style>a{}</style>'<br/>")).toMatchSnapshot();
|
|
});
|
|
it("for self-closing special tag", () => {
|
|
expect(tokenize("<style />'<br/>")).toMatchSnapshot();
|
|
});
|
|
});
|
|
|
|
describe("should handle entities", () => {
|
|
it("for XML entities", () =>
|
|
expect(
|
|
tokenize("&>&<üabcde", {
|
|
xmlMode: true,
|
|
}),
|
|
).toMatchSnapshot());
|
|
|
|
it("for entities in attributes (#276)", () =>
|
|
expect(
|
|
tokenize(
|
|
'<img src="?&image_uri=1&ℑ=2&image=3"/>?&image_uri=1&ℑ=2&image=3',
|
|
),
|
|
).toMatchSnapshot());
|
|
|
|
it("for trailing legacy entity", () =>
|
|
expect(tokenize("⨱×bar")).toMatchSnapshot());
|
|
|
|
it("for multi-byte entities", () =>
|
|
expect(tokenize("≧̸")).toMatchSnapshot());
|
|
});
|
|
|
|
it("should not lose data when pausing", () => {
|
|
const log: unknown[][] = [];
|
|
const tokenizer = new Tokenizer(
|
|
{},
|
|
new Proxy(
|
|
{},
|
|
{
|
|
get(_, property) {
|
|
return (...values: unknown[]) => {
|
|
if (property === "ontext") {
|
|
tokenizer.pause();
|
|
}
|
|
log.push([property, ...values]);
|
|
};
|
|
},
|
|
},
|
|
) as Callbacks,
|
|
);
|
|
|
|
tokenizer.write("&am");
|
|
tokenizer.write("p; it up!");
|
|
tokenizer.resume();
|
|
tokenizer.resume();
|
|
|
|
// Tokenizer shouldn't be paused
|
|
expect(tokenizer).toHaveProperty("running", true);
|
|
|
|
tokenizer.end();
|
|
|
|
expect(log).toMatchSnapshot();
|
|
});
|
|
});
|