2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00

fix(289): contextFreeTags closing tag bug (#290)

* feat: add tests

* fix: parsing context free

* refactor: code style

* chore: add changeset

* fix: disable coveralls
This commit is contained in:
Nikolay Kost
2025-09-14 23:18:41 +02:00
committed by GitHub
parent 0edd490a24
commit e943184294
9 changed files with 447 additions and 330 deletions
+36 -50
View File
@@ -7,49 +7,48 @@ import type { Token as TokenInterface } from "@bbob/types";
// type, value, line, row, start pos, end pos
const TOKEN_TYPE_ID = 't'; // 0;
const TOKEN_VALUE_ID = 'v'; // 1;
const TOKEN_COLUMN_ID = 'r'; // 2;
const TOKEN_LINE_ID = 'l'; // 3;
const TOKEN_START_POS_ID = 's'; // 4;
const TOKEN_END_POS_ID = 'e'; // 5;
const TOKEN_TYPE_WORD = 1; // 'word';
const TOKEN_TYPE_TAG = 2; // 'tag';
const TOKEN_TYPE_ATTR_NAME = 3; // 'attr-name';
const TOKEN_TYPE_ATTR_VALUE = 4; // 'attr-value';
const TOKEN_TYPE_SPACE = 5; // 'space';
const TOKEN_TYPE_NEW_LINE = 6; // 'new-line';
export const TYPE_ID = 't'; // 0;
export const VALUE_ID = 'v'; // 1;
export const LINE_ID = 'l'; // 3;
export const COLUMN_ID = 'r'; // 2;
export const START_POS_ID = 's'; // 4;
export const END_POS_ID = 'e'; // 5;
export const TYPE_WORD = 1; // 'word';
export const TYPE_TAG = 2; // 'tag';
export const TYPE_ATTR_NAME = 3; // 'attr-name';
export const TYPE_ATTR_VALUE = 4; // 'attr-value';
export const TYPE_SPACE = 5; // 'space';
export const TYPE_NEW_LINE = 6; // 'new-line';
const getTokenValue = (token: Token) => {
if (token && typeof token[TOKEN_VALUE_ID] !== 'undefined') {
return token[TOKEN_VALUE_ID];
if (token && typeof token[VALUE_ID] !== 'undefined') {
return token[VALUE_ID];
}
return '';
};
const getTokenLine = (token: Token) => (token && token[TOKEN_LINE_ID]) || 0;
const getTokenLine = (token: Token) => (token && token[LINE_ID]) || 0;
const getTokenColumn = (token: Token) => (token && token[TOKEN_COLUMN_ID]) || 0;
const getTokenColumn = (token: Token) => (token && token[COLUMN_ID]) || 0;
const getStartPosition = (token: Token) => (token && token[TOKEN_START_POS_ID]) || 0;
const getStartPosition = (token: Token) => (token && token[START_POS_ID]) || 0;
const getEndPosition = (token: Token) => (token && token[TOKEN_END_POS_ID]) || 0;
const getEndPosition = (token: Token) => (token && token[END_POS_ID]) || 0;
const isTextToken = (token: Token) => {
if (token && typeof token[TOKEN_TYPE_ID] !== 'undefined') {
return token[TOKEN_TYPE_ID] === TOKEN_TYPE_SPACE
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
if (token && typeof token[TYPE_ID] !== 'undefined') {
return token[TYPE_ID] === TYPE_SPACE
|| token[TYPE_ID] === TYPE_NEW_LINE
|| token[TYPE_ID] === TYPE_WORD;
}
return false;
};
const isTagToken = (token: Token) => {
if (token && typeof token[TOKEN_TYPE_ID] !== 'undefined') {
return token[TOKEN_TYPE_ID] === TOKEN_TYPE_TAG;
if (token && typeof token[TYPE_ID] !== 'undefined') {
return token[TYPE_ID] === TYPE_TAG;
}
return false;
@@ -60,16 +59,16 @@ const isTagEnd = (token: Token) => getTokenValue(token).charCodeAt(0) === SLASH.
const isTagStart = (token: Token) => !isTagEnd(token);
const isAttrNameToken = (token: Token) => {
if (token && typeof token[TOKEN_TYPE_ID] !== 'undefined') {
return token[TOKEN_TYPE_ID] === TOKEN_TYPE_ATTR_NAME;
if (token && typeof token[TYPE_ID] !== 'undefined') {
return token[TYPE_ID] === TYPE_ATTR_NAME;
}
return false;
};
const isAttrValueToken = (token: Token) => {
if (token && typeof token[TOKEN_TYPE_ID] !== 'undefined') {
return token[TOKEN_TYPE_ID] === TOKEN_TYPE_ATTR_VALUE;
if (token && typeof token[TYPE_ID] !== 'undefined') {
return token[TYPE_ID] === TYPE_ATTR_VALUE;
}
return false;
@@ -103,20 +102,20 @@ class Token<TokenValue = string> implements TokenInterface {
readonly e: number; // end pos
constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0, start: number = 0, end: number = 0) {
this[TOKEN_LINE_ID] = row;
this[TOKEN_COLUMN_ID] = col;
this[TOKEN_TYPE_ID] = type || 0;
this[TOKEN_VALUE_ID] = String(value);
this[TOKEN_START_POS_ID] = start;
this[TOKEN_END_POS_ID] = end;
this[LINE_ID] = row;
this[COLUMN_ID] = col;
this[TYPE_ID] = type || 0;
this[VALUE_ID] = String(value);
this[START_POS_ID] = start;
this[END_POS_ID] = end;
}
get type() {
return this[TOKEN_TYPE_ID];
return this[TYPE_ID];
}
isEmpty() {
return this[TOKEN_TYPE_ID] === 0 || isNaN(this[TOKEN_TYPE_ID]);
return this[TYPE_ID] === 0 || isNaN(this[TYPE_ID]);
}
isText() {
@@ -172,18 +171,5 @@ class Token<TokenValue = string> implements TokenInterface {
}
}
export const TYPE_ID = TOKEN_TYPE_ID;
export const VALUE_ID = TOKEN_VALUE_ID;
export const LINE_ID = TOKEN_LINE_ID;
export const COLUMN_ID = TOKEN_COLUMN_ID;
export const START_POS_ID = TOKEN_START_POS_ID;
export const END_POS_ID = TOKEN_END_POS_ID;
export const TYPE_WORD = TOKEN_TYPE_WORD;
export const TYPE_TAG = TOKEN_TYPE_TAG;
export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME;
export const TYPE_ATTR_VALUE = TOKEN_TYPE_ATTR_VALUE;
export const TYPE_SPACE = TOKEN_TYPE_SPACE;
export const TYPE_NEW_LINE = TOKEN_TYPE_NEW_LINE;
export { Token };
export default Token;
+20 -14
View File
@@ -74,15 +74,18 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
col++;
};
const checkContextFreeMode = (name: string, isClosingTag?: boolean) => {
const setupContextFreeTag = (name: string, isClosingTag?: boolean) => {
if (contextFreeTag !== '' && isClosingTag) {
contextFreeTag = '';
}
if (contextFreeTag === '' && contextFreeTags.includes(name.toLowerCase())) {
contextFreeTag = name;
const tagName = name.toLowerCase()
if (contextFreeTag === '' && isTokenNested(name) && contextFreeTags.includes(tagName)) {
contextFreeTag = tagName;
}
};
const toEndTag = (tagName: string) => `${openTag}${SLASH}${tagName}${closeTag}`
const chars = createCharGrabber(buffer, { onSkip });
@@ -178,12 +181,13 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
const name = tagChars.grabWhile(validName);
emitToken(TYPE_TAG, name, start, masterStartPos + tagChars.getLength() + 1);
checkContextFreeMode(name);
setupContextFreeTag(name);
tagChars.skip();
prevCol++;
// in cases when we has [url=someval]GET[/url] and we dont need to parse all
// in cases when we have [url=someval]GET[/url] and we don't need to parse all
if (isSingleValueTag) {
return TAG_STATE_VALUE;
}
@@ -202,7 +206,6 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = chars.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
const isNextCharReserved = nextChar && isCharReserved(nextChar)
const isLastChar = chars.isLast()
@@ -228,7 +231,8 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
chars.skip(); // skip closeTag
emitToken(TYPE_TAG, name, startPos, endPos);
checkContextFreeMode(name, isClosingTag);
setupContextFreeTag(name, isClosingTag);
return STATE_WORD;
}
@@ -277,12 +281,11 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
if (chars.getCurr() === openTag) {
if (contextFreeTag) {
const fullTagLen = openTag.length + SLASH.length + contextFreeTag.length;
const fullTagName = `${openTag}${SLASH}${contextFreeTag}`;
const foundTag = chars.grabN(fullTagLen);
const isEndContextFreeMode = foundTag === fullTagName;
const fullTagName = toEndTag(contextFreeTag);
const foundTag = chars.grabN(fullTagName.length);
const isContextFreeEnded = foundTag.toLowerCase() === fullTagName.toLowerCase();
if (isEndContextFreeMode) {
if (isContextFreeEnded) {
return STATE_TAG;
}
} else if (chars.includes(closeTag)) {
@@ -357,12 +360,15 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
}
function isTokenNested(tokenValue: string) {
const value = openTag + SLASH + tokenValue;
const value = toEndTag(tokenValue);
if (nestedMap.has(value)) {
return !!nestedMap.get(value);
} else {
const status = caseFreeTags ? (buffer.toLowerCase().indexOf(value.toLowerCase()) > -1) : (buffer.indexOf(value) > -1);
const buf = caseFreeTags ? buffer.toLowerCase() : buffer;
const val = caseFreeTags ? value.toLowerCase() : value;
const status = buf.indexOf(val) > -1;
nestedMap.set(value, status);
+76 -78
View File
@@ -1,15 +1,10 @@
import type { NodeContent, TagNodeTree, LexerTokenizer, ParseOptions } from "@bbob/types";
import type { NodeContent, ParseOptions, TagNodeTree } from "@bbob/types";
import {
CLOSE_BRAKET,
OPEN_BRAKET,
TagNode,
isTagNode,
} from "@bbob/plugin-helper";
import { CLOSE_BRAKET, isTagNode, OPEN_BRAKET, TagNode, } from "@bbob/plugin-helper";
import { createLexer } from "./lexer.js";
import type { Token } from "./Token.js";
import { Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD } from "./Token.js";
class NodeList<Value> {
private n: Value[];
@@ -85,15 +80,21 @@ function parse(input: string, opts: ParseOptions = {}) {
*/
const nestedTagsMap = new Set<string>();
function getValue(tokenValue: string) {
return caseFreeTags ? tokenValue.toLowerCase() : tokenValue;
}
function isTokenNested(token: Token) {
const tokenValue = token.getValue();
const value = caseFreeTags ? tokenValue.toLowerCase() : tokenValue;
const value = getValue(tokenValue);
const { isTokenNested } = tokenizer || {};
if (!nestedTagsMap.has(value) && isTokenNested && isTokenNested(value)) {
nestedTagsMap.add(value);
if (!nestedTagsMap.has(value) && typeof isTokenNested === "function") {
if (isTokenNested(value)) {
nestedTagsMap.add(value);
return true;
return true;
}
}
return nestedTagsMap.has(value);
@@ -103,13 +104,13 @@ function parse(input: string, opts: ParseOptions = {}) {
* @private
*/
function isTagNested(tagName: string) {
return Boolean(nestedTagsMap.has(caseFreeTags ? tagName.toLowerCase() : tagName));
return Boolean(nestedTagsMap.has(getValue(tagName)));
}
/**
* @private
*/
function isAllowedTag(value: string) {
function isTagAllowed(value: string) {
if (onlyAllowTags.length) {
return onlyAllowTags.indexOf(value.toLowerCase()) >= 0;
}
@@ -121,7 +122,7 @@ function parse(input: string, opts: ParseOptions = {}) {
* Flushes temp tag nodes and its attributes buffers
* @private
*/
function flushTagNodes() {
function tagNodesFlush() {
if (tagNodes.flush()) {
tagNodesAttrName.flush();
}
@@ -143,7 +144,7 @@ function parse(input: string, opts: ParseOptions = {}) {
/**
* @private
*/
function appendNodeAsString(
function nodesAppendAsString(
nodes?: TagNodeTree,
node?: TagNode,
isNested = true
@@ -166,13 +167,13 @@ function parse(input: string, opts: ParseOptions = {}) {
/**
* @private
*/
function appendNodes(nodes?: TagNodeTree, node?: NodeContent) {
function nodesAppend(nodes?: TagNodeTree, node?: NodeContent) {
if (Array.isArray(nodes) && typeof node !== "undefined") {
if (isTagNode(node)) {
if (isAllowedTag(node.tag)) {
if (isTagAllowed(node.tag)) {
nodes.push(node.toTagNode());
} else {
appendNodeAsString(nodes, node);
nodesAppendAsString(nodes, node);
}
} else {
nodes.push(node);
@@ -184,8 +185,8 @@ function parse(input: string, opts: ParseOptions = {}) {
* @private
* @param {Token} token
*/
function handleTagStart(token: Token) {
flushTagNodes();
function tagHandleStart(token: Token) {
tagNodesFlush();
const tagNode = TagNode.create(token.getValue(), {}, [], { from: token.getStart(), to: token.getEnd() });
const isNested = isTokenNested(token);
@@ -196,7 +197,7 @@ function parse(input: string, opts: ParseOptions = {}) {
nestedNodes.push(tagNode);
} else {
const nodes = getNodes();
appendNodes(nodes, tagNode);
nodesAppend(nodes, tagNode);
}
}
@@ -204,24 +205,24 @@ function parse(input: string, opts: ParseOptions = {}) {
* @private
* @param {Token} token
*/
function handleTagEnd(token: Token) {
function tagHandleEnd(token: Token) {
const tagName = token.getValue().slice(1);
const lastNestedNode = nestedNodes.flush();
flushTagNodes();
tagNodesFlush();
if (lastNestedNode) {
const nodes = getNodes();
const nodes = getNodes()
if (isTagNode(lastNestedNode)) {
lastNestedNode.setEnd({ from: token.getStart(), to: token.getEnd() });
}
appendNodes(nodes, lastNestedNode);
nodesAppend(nodes, lastNestedNode);
} else if (!isTagNested(tagName)) { // when we have only close tag [/some] without any open tag
const nodes = getNodes();
appendNodes(nodes, token.toString({ openTag, closeTag }));
nodesAppend(nodes, token.toString({ openTag, closeTag }));
} else if (typeof options.onError === "function") {
const tag = token.getValue();
const line = token.getLine();
@@ -239,23 +240,7 @@ function parse(input: string, opts: ParseOptions = {}) {
* @private
* @param {Token} token
*/
function handleTag(token: Token) {
// [tag]
if (token.isStart()) {
handleTagStart(token);
}
// [/tag]
if (token.isEnd()) {
handleTagEnd(token);
}
}
/**
* @private
* @param {Token} token
*/
function handleNode(token: Token) {
function nodeHandle(token: Token) {
/**
* @type {TagNode}
*/
@@ -265,37 +250,47 @@ function parse(input: string, opts: ParseOptions = {}) {
const nodes = getNodes();
if (activeTagNode !== null) {
if (token.isAttrName()) {
tagNodesAttrName.push(tokenValue);
const attrName = tagNodesAttrName.last();
switch (token.type) {
case TYPE_ATTR_NAME:
tagNodesAttrName.push(tokenValue);
const attrName = tagNodesAttrName.last();
if (attrName) {
activeTagNode.attr(attrName, "");
}
} else if (token.isAttrValue()) {
const attrName = tagNodesAttrName.last();
if (attrName) {
activeTagNode.attr(attrName, "");
}
break;
if (attrName) {
activeTagNode.attr(attrName, tokenValue);
tagNodesAttrName.flush();
} else {
activeTagNode.attr(tokenValue, tokenValue);
}
} else if (token.isText()) {
if (isNested) {
activeTagNode.append(tokenValue);
} else {
appendNodes(nodes, tokenValue);
}
} else if (token.isTag()) {
// if tag is not allowed, just pass it as is
appendNodes(nodes, token.toString({ openTag, closeTag }));
case TYPE_ATTR_VALUE:
const attrValName = tagNodesAttrName.last();
if (attrValName) {
activeTagNode.attr(attrValName, tokenValue);
tagNodesAttrName.flush();
} else {
activeTagNode.attr(tokenValue, tokenValue);
}
break;
case TYPE_SPACE:
case TYPE_NEW_LINE:
case TYPE_WORD:
if (isNested) {
activeTagNode.append(tokenValue);
} else {
nodesAppend(nodes, tokenValue);
}
break;
case TYPE_TAG:
// if tag is not allowed, just pass it as is
nodesAppend(nodes, token.toString({ openTag, closeTag }));
break;
}
} else if (token.isText()) {
appendNodes(nodes, tokenValue);
nodesAppend(nodes, tokenValue);
} else if (token.isTag()) {
// if tag is not allowed, just pass it as is
appendNodes(nodes, token.toString({ openTag, closeTag }));
nodesAppend(nodes, token.toString({ openTag, closeTag }));
}
}
@@ -305,9 +300,17 @@ function parse(input: string, opts: ParseOptions = {}) {
*/
function onToken(token: Token) {
if (token.isTag()) {
handleTag(token);
// [tag]
if (token.isStart()) {
tagHandleStart(token);
}
// [/tag]
if (token.isEnd()) {
tagHandleEnd(token);
}
} else {
handleNode(token);
nodeHandle(token);
}
}
@@ -331,13 +334,8 @@ function parse(input: string, opts: ParseOptions = {}) {
// for ex [q]test[/q][u]some[/u][q]some [u]some[/u] // forgot to close [/q]
// so we need to flush nested content to nodes array
const lastNestedNode = nestedNodes.flush();
if (
lastNestedNode !== null &&
lastNestedNode &&
isTagNode(lastNestedNode) &&
isTagNested(lastNestedNode.tag)
) {
appendNodeAsString(getNodes(), lastNestedNode, false);
if (isTagNode(lastNestedNode) && isTagNested(lastNestedNode.tag)) {
nodesAppendAsString(getNodes(), lastNestedNode, false);
}
return nodes.toArray();
+107 -108
View File
@@ -1,13 +1,6 @@
import { TYPE_ID, VALUE_ID, TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE, LINE_ID, COLUMN_ID, START_POS_ID, END_POS_ID } from '../src/Token';
import { createLexer } from '../src/lexer';
declare global {
namespace jest {
interface Matchers<R> {
toBeMantchOutput(expected: Array<unknown>): CustomMatcherResult;
}
}
}
import { parse } from "../src";
const TYPE = {
WORD: TYPE_WORD,
@@ -24,88 +17,94 @@ const tokenize = (input: string) => (createLexer(input).tokenize());
const tokenizeEscape = (input: string) => (createLexer(input, { enableEscapeTags: true }).tokenize());
const tokenizeContextFreeTags = (input: string, tags: string[] = []) => (createLexer(input, { contextFreeTags: tags }).tokenize());
describe('lexer', () => {
declare global {
namespace jest {
interface Matchers<R> {
toBeMatchOutput(expected: Array<unknown>): CustomMatcherResult;
}
}
}
expect.extend({
toBeMatchOutput(tokens, output) {
if (tokens.length !== output.length) {
return {
message: () =>
`expected tokens length ${tokens.length} to be ${output.length}`,
pass: false,
};
}
expect.extend({
toBeMantchOutput(tokens, output) {
if (tokens.length !== output.length) {
for (let idx = 0; idx < tokens.length; idx++) {
const token = tokens[idx];
const [type, value, col, row, startPos, endPos] = output[idx];
if (typeof token !== 'object') {
return {
message: () =>
`expected tokens length ${tokens.length} to be ${output.length}`,
`token must to be Object`,
pass: false,
};
}
for (let idx = 0; idx < tokens.length; idx++) {
const token = tokens[idx];
const [type, value, col, row, startPos, endPos] = output[idx];
if (typeof token !== 'object') {
return {
message: () =>
`token must to be Object`,
pass: false,
};
}
if (token[TYPE_ID] !== type) {
return {
message: () =>
if (token[TYPE_ID] !== type) {
return {
message: () =>
`expected token type ${TYPE_NAMES[type]} but received ${TYPE_NAMES[token[TYPE_ID]]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (token[VALUE_ID] !== value) {
return {
message: () =>
`expected token value ${value} but received ${token[VALUE_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (token[LINE_ID] !== row) {
return {
message: () =>
`expected token row ${row} but received ${token[LINE_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (token[COLUMN_ID] !== col) {
return {
message: () =>
`expected token col ${col} but received ${token[COLUMN_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (type === TYPE.TAG && token[START_POS_ID] !== startPos) {
return {
message: () =>
`expected token start pos ${startPos} but received ${token[START_POS_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (type === TYPE.TAG && token[END_POS_ID] !== endPos) {
return {
message: () =>
`expected token end pos ${endPos} but received ${token[END_POS_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
pass: false,
};
}
return {
message: () =>
`no valid output`,
pass: true,
};
},
});
if (token[VALUE_ID] !== value) {
return {
message: () =>
`expected token value ${value} but received ${token[VALUE_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (token[LINE_ID] !== row) {
return {
message: () =>
`expected token row ${row} but received ${token[LINE_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (token[COLUMN_ID] !== col) {
return {
message: () =>
`expected token col ${col} but received ${token[COLUMN_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (type === TYPE.TAG && token[START_POS_ID] !== startPos) {
return {
message: () =>
`expected token start pos ${startPos} but received ${token[START_POS_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (type === TYPE.TAG && token[END_POS_ID] !== endPos) {
return {
message: () =>
`expected token end pos ${endPos} but received ${token[END_POS_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
}
return {
message: () =>
`no valid output`,
pass: true,
};
},
});
describe('lexer', () => {
test('single tag', () => {
const input = '[SingleTag]';
const tokens = tokenize(input);
@@ -113,7 +112,7 @@ describe('lexer', () => {
[TYPE.TAG, 'SingleTag', 0, 0, 0, 11],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('single tag with params', () => {
@@ -124,7 +123,7 @@ describe('lexer', () => {
[TYPE.ATTR_VALUE, '111', 6, 0],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('paired tag with single param', () => {
@@ -137,7 +136,7 @@ describe('lexer', () => {
[TYPE.TAG, '/url', 17, 0, 16, 22],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('single fake tag', () => {
@@ -149,7 +148,7 @@ describe('lexer', () => {
[TYPE.WORD, 'user=111]', 2, 0, 2],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('single tag with spaces', () => {
@@ -160,7 +159,7 @@ describe('lexer', () => {
[TYPE.TAG, 'Single Tag', 0, 0, 0, 12],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
// @TODO: this is breaking change behavior
@@ -175,7 +174,7 @@ describe('lexer', () => {
[TYPE.TAG, '/textarea', 25, 0],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('tags with single word and camel case params', () => {
@@ -213,7 +212,7 @@ describe('lexer', () => {
[TYPE.SPACE, ' ', 28, 2, 203],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('string with quotemarks', () => {
@@ -232,7 +231,7 @@ describe('lexer', () => {
[TYPE.WORD, 'Adele', 22, 0],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('tags in brakets', () => {
@@ -249,7 +248,7 @@ describe('lexer', () => {
[TYPE.WORD, ']', 13, 0],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('tag as param', () => {
@@ -262,7 +261,7 @@ describe('lexer', () => {
[TYPE.TAG, '/color', 21, 0, 21, 29],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('tag with quotemark params with spaces', () => {
@@ -278,7 +277,7 @@ describe('lexer', () => {
[TYPE.TAG, '/url', 42, 0, 42, 48],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('tag with escaped quotemark param', () => {
@@ -292,7 +291,7 @@ describe('lexer', () => {
[TYPE.TAG, '/url', 26, 0, 26, 32],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('tag param without quotemarks', () => {
@@ -306,7 +305,7 @@ describe('lexer', () => {
[TYPE.TAG, '/style', 26, 0, 25, 33],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('list tag with items', () => {
@@ -344,7 +343,7 @@ describe('lexer', () => {
[TYPE.TAG, '/list', 0, 4, 52, 59],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('few tags without spaces', () => {
@@ -366,7 +365,7 @@ describe('lexer', () => {
[TYPE.TAG, '/mytag3', 74, 0, 74, 83],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('bad tags as texts', () => {
@@ -434,7 +433,7 @@ describe('lexer', () => {
const tokens = tokenize(input);
const output = asserts[idx];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
});
@@ -452,7 +451,7 @@ describe('lexer', () => {
[TYPE.TAG, 'Finger', 15, 0, 15, 23]
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('no close tag', () => {
@@ -467,7 +466,7 @@ describe('lexer', () => {
[TYPE.WORD, 'A', 13, 0],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('escaped tag', () => {
@@ -482,7 +481,7 @@ describe('lexer', () => {
[TYPE.WORD, '[', 9, 0],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('escaped tag and escaped backslash', () => {
@@ -502,7 +501,7 @@ describe('lexer', () => {
[TYPE.WORD, ']', 21, 0],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('context free tag [code]', () => {
@@ -520,12 +519,12 @@ describe('lexer', () => {
[TYPE.TAG, '/code', 25, 0, 25, 32],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('context free tag case insensitive [CODE]', () => {
const input = '[CODE] [b]some string[/b][/CODE]';
const tokens = tokenizeContextFreeTags(input, ['code']);
const tokens = tokenizeContextFreeTags('[CODE] [b]some string[/b][/CODE]', ['code']);
const output = [
[TYPE.TAG, 'CODE', 0, 0, 0, 6],
[TYPE.SPACE, ' ', 6, 0],
@@ -538,7 +537,7 @@ describe('lexer', () => {
[TYPE.TAG, '/CODE', 25, 0, 25, 32],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('bad closed tag with escaped backslash', () => {
@@ -552,7 +551,7 @@ describe('lexer', () => {
[TYPE.WORD, 'b]', 9, 0],
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
describe('html', () => {
@@ -575,7 +574,7 @@ describe('lexer', () => {
[TYPE.TAG, '/button', 78, 0, 78, 87]
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('attributes with no quotes or value', () => {
@@ -594,7 +593,7 @@ describe('lexer', () => {
[TYPE.TAG, '/button', 63, 0, 62, 71]
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test('attributes with no space between them. No valid, but accepted by the browser', () => {
@@ -612,7 +611,7 @@ describe('lexer', () => {
[TYPE.TAG, '/button', 76, 0, 76, 85]
];
expect(tokens).toBeMantchOutput(output);
expect(tokens).toBeMatchOutput(output);
});
test.skip('style tag', () => {
@@ -634,7 +633,7 @@ input.buttonred{cursor:hand;font-family:verdana;background:#d12124;color:#fff;he
-->
</style>`;
const tokens = tokenizeHTML(content);
expect(tokens).toBeMantchOutput([]);
expect(tokens).toBeMatchOutput([]);
});
test.skip('script tag', () => {
@@ -645,7 +644,7 @@ input.buttonred{cursor:hand;font-family:verdana;background:#d12124;color:#fff;he
//-->
</script>`;
const tokens = tokenizeHTML(content);
expect(tokens).toBeMantchOutput([]);
expect(tokens).toBeMatchOutput([]);
});
});
});
+153 -40
View File
@@ -1,12 +1,36 @@
import { parse } from '../src';
import type { TagNode, TagNodeTree } from "@bbob/types";
describe('Parser', () => {
const expectOutput = (ast: TagNodeTree, output: Partial<TagNodeTree>) => {
expect(ast).toBeInstanceOf(Array);
expect(ast).toMatchObject(output as {} | TagNode[]);
};
const astToJSON = (ast: TagNodeTree) => Array.isArray(ast) ? ast.map(item => {
if (typeof item === 'object' && typeof item.toJSON === 'function') {
return item.toJSON()
}
return item
}) : ast
declare global {
namespace jest {
interface Matchers<R> {
toBeMatchAST(expected: Array<unknown>): CustomMatcherResult;
}
}
}
expect.extend({
toBeMatchAST(ast, output) {
expect(astToJSON(ast)).toMatchObject(output as {} | TagNode[]);
return {
message: () =>
`no valid output`,
pass: true,
};
},
});
describe('Parser', () => {
test('parse paired tags tokens', () => {
const ast = parse('[best name=value]Foo Bar[/best]');
const output = [
@@ -31,7 +55,7 @@ describe('Parser', () => {
},
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('parse paired tags tokens 2', () => {
@@ -56,7 +80,7 @@ describe('Parser', () => {
},
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
describe('onlyAllowTags', () => {
@@ -87,7 +111,7 @@ describe('Parser', () => {
},
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('parse only allowed tags with params', () => {
@@ -96,7 +120,7 @@ describe('Parser', () => {
};
const ast = parse('hello [blah foo="bar"]world[/blah]', options);
expectOutput(ast, [
expect(ast).toBeMatchAST([
'hello',
' ',
'[blah foo="bar"]',
@@ -111,7 +135,7 @@ describe('Parser', () => {
};
const ast = parse('hello [blah="bar"]world[/blah]', options);
expectOutput(ast, [
expect(ast).toBeMatchAST([
'hello',
' ',
'[blah="bar"]',
@@ -180,7 +204,7 @@ describe('Parser', () => {
'[/tab]',
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('parse only allowed tags case insensitive', () => {
@@ -210,7 +234,7 @@ describe('Parser', () => {
},
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
});
@@ -243,8 +267,53 @@ describe('Parser', () => {
}
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('nesting similar context free tags [code][codeButton]text[/codeButton][/code]', () => {
const ast = parse('[code][codeButton]text[/codeButton][/code]', {
contextFreeTags: ['code']
});
const output = [
{
tag: 'code',
attrs: {},
content: [
'[',
'codeButton]text',
'[',
'/codeButton]'
]
}
];
expect(ast).toBeMatchAST(output);
})
test('broken nesting similar context free tags [code][codeButton]text[/codeButton][code]', () => {
const ast = parse('[code][codeButton]text[/codeButton][code]', {
contextFreeTags: ['code']
});
const output = [
{
attrs: {},
content: [],
tag: 'code',
},
{
attrs: {},
content: ['text'],
tag: 'codeButton',
},
{
attrs: {},
content: [],
tag: 'code',
},
];
expect(ast).toBeMatchAST(output);
})
});
describe('caseFreeTags', () => {
@@ -268,7 +337,7 @@ describe('Parser', () => {
"[/H1]"
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('case free tags', () => {
@@ -295,10 +364,57 @@ describe('Parser', () => {
}
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
})
test('nesting similar tags [code][codeButton]text[/codeButton][/code]', () => {
const ast = parse('[code][codeButton]text[/codeButton][/code]');
const output = [
{
tag: 'code',
attrs: {},
content: [
{
tag: 'codeButton',
attrs: {},
content: [
'text'
]
}
]
}
];
expect(ast).toBeMatchAST(output);
})
test('forgot close code tag [code][codeButton]text[/codeButton][code]', () => {
const ast = parse('[code][codeButton]text[/codeButton][code]');
const output = [
{
tag: 'code',
attrs: {},
content: []
},
{
tag: 'codeButton',
attrs: {},
content: [
'text'
]
},
{
tag: 'code',
attrs: {},
content: []
}
];
expect(ast).toBeMatchAST(output);
})
test('parse inconsistent tags', () => {
const ast = parse('[h1 name=value]Foo [Bar] /h1]');
const output = [
@@ -316,7 +432,7 @@ describe('Parser', () => {
'Foo',
' ',
{
tag: 'bar',
tag: 'Bar',
attrs: {},
content: [],
start: {
@@ -328,7 +444,7 @@ describe('Parser', () => {
'/h1]',
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('parse closed tag', () => {
@@ -337,7 +453,7 @@ describe('Parser', () => {
'[/h1]',
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('parse tag with value param', () => {
@@ -360,7 +476,7 @@ describe('Parser', () => {
},
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('parse tag with quoted param with spaces', () => {
@@ -385,7 +501,7 @@ describe('Parser', () => {
},
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('parse single tag with params', () => {
@@ -404,7 +520,7 @@ describe('Parser', () => {
},
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
test('detect inconsistent tag', () => {
@@ -463,14 +579,14 @@ describe('Parser', () => {
},
];
expectOutput(ast, output);
expect(ast).toBeMatchAST(output);
});
// @TODO: this is breaking change behavior
test.skip('parse tags with single attributes like disabled', () => {
const ast = parse('[b]hello[/b] [textarea disabled]world[/textarea]');
expectOutput(ast, [
expect(ast).toBeMatchAST([
{
tag: 'b',
attrs: {},
@@ -506,7 +622,7 @@ describe('Parser', () => {
test('parse url tag with get params', () => {
const ast = parse('[url=https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any]GET[/url]');
expectOutput(ast, [
expect(ast).toBeMatchAST([
{
tag: 'url',
attrs: {
@@ -531,7 +647,7 @@ describe('Parser', () => {
attr value"] this is a spoiler
[b]this is bold [i]this is bold and italic[/i] this is bold again[/b]
[/spoiler]this is outside again`);
expectOutput(ast, [
expect(ast).toBeMatchAST([
"this",
" ",
"is",
@@ -632,7 +748,7 @@ describe('Parser', () => {
[avatar href="/avatar/4/3/b/1606.jpg@20x20?cache=1561462725&bgclr=ffffff" size=xs][/avatar]
Group Name Go[/url] `);
expectOutput(ast, [
expect(ast).toBeMatchAST([
{
tag: 'url',
attrs: {
@@ -684,7 +800,7 @@ describe('Parser', () => {
test('parse url tag with # and = symbols [google docs]', () => {
const ast = parse('[url href=https://docs.google.com/spreadsheets/d/1W9VPUESF_NkbSa_HtRFrQNl0nYo8vPCxJFy7jD3Tpio/edit#gid=0]Docs[/url]');
expectOutput(ast, [
expect(ast).toBeMatchAST([
{
tag: 'url',
attrs: {
@@ -710,8 +826,7 @@ sdfasdfasdf
[url=xxx]xxx[/url]`;
expectOutput(
parse(str),
expect(parse(str)).toBeMatchAST(
[
{
tag: 'quote', attrs: {}, content: ['some'],
@@ -760,8 +875,7 @@ sdfasdfasdf
test('parse with lost closing tag on from', () => {
const str = `[quote]xxxsdfasdf[quote]some[/quote][color=red]test[/color]sdfasdfasdf[url=xxx]xxx[/url]`;
expectOutput(
parse(str),
expect(parse(str)).toBeMatchAST(
[
'[quote]',
'xxxsdfasdf',
@@ -806,8 +920,7 @@ sdfasdfasdf
test('parse with lost closing tag on to', () => {
const str = `[quote]some[/quote][color=red]test[/color]sdfasdfasdf[url=xxx]xxx[/url][quote]xxxsdfasdf`;
expectOutput(
parse(str),
expect(parse(str)).toBeMatchAST(
[
{
tag: 'quote', attrs: {}, content: ['some'],
@@ -852,7 +965,7 @@ sdfasdfasdf
test('parse with url in tag content', () => {
const input = parse('[img]https://tw.greywool.com/i/e3Ph5.png[/img]');
expectOutput(input, [
expect(input).toBeMatchAST([
{
tag: 'img',
attrs: {},
@@ -874,7 +987,7 @@ sdfasdfasdf
whitespaceInTags: false
})
expectOutput(input, [
expect(input).toBeMatchAST([
{
tag: 'b',
attrs: {},
@@ -913,7 +1026,7 @@ sdfasdfasdf
const content = `<button id="test0" class="value0" title="value1">class="value0" title="value1"</button>`;
const ast = parseHTML(content);
expectOutput(ast, [
expect(ast).toBeMatchAST([
{
"tag": "button",
"attrs": {
@@ -942,7 +1055,7 @@ sdfasdfasdf
const content = `<button id="test1" class=value2 disabled required>class=value2 disabled</button>`;
const ast = parseHTML(content);
expectOutput(ast, [
expect(ast).toBeMatchAST([
{
"tag": "button",
"attrs": {
@@ -972,7 +1085,7 @@ sdfasdfasdf
const content = `<button id="test2" class="value4"title="value5">class="value4"title="value5"</button>`;
const ast = parseHTML(content);
expectOutput(ast, [
expect(ast).toBeMatchAST([
{
"tag": "button",
"attrs": {
@@ -1000,7 +1113,7 @@ sdfasdfasdf
enableEscapeTags: true
});
expectOutput(ast, [
expect(ast).toBeMatchAST([
'[',
'b',
']',
@@ -1016,7 +1129,7 @@ sdfasdfasdf
enableEscapeTags: true
});
expectOutput(ast, [
expect(ast).toBeMatchAST([
'\\',
'[',
'b',
+32 -38
View File
@@ -1,14 +1,7 @@
import type { NodeContent, TagNodeObject, TagNodeTree, TagPosition } from "@bbob/types";
import { OPEN_BRAKET, CLOSE_BRAKET, SLASH } from './char.js';
import {
getUniqAttr,
getNodeLength,
appendToNode,
attrsToString,
attrValue,
isTagNode,
} from './helpers.js';
import { CLOSE_BRAKET, OPEN_BRAKET, SLASH } from './char.js';
import { appendToNode, attrsToString, attrValue, getNodeLength, getUniqAttr, isTagNode, } from './helpers.js';
const getTagAttrs = <AttrValue>(tag: string, params: Record<string, AttrValue>) => {
const uniqAttr = getUniqAttr(params);
@@ -27,19 +20,19 @@ const getTagAttrs = <AttrValue>(tag: string, params: Record<string, AttrValue>)
return `${tag}${attrsToString(params)}`;
};
const renderContent = (content: TagNodeTree, openTag: string, closeTag: string) => {
const toString = (node: NodeContent) => {
if (isTagNode(node)) {
return node.toString({ openTag, closeTag });
}
const toString = (node: NodeContent, openTag: string, closeTag: string) => {
if (isTagNode(node)) {
return node.toString({ openTag, closeTag });
}
return String(node);
};
return String(node);
};
const nodeTreeToString = (content: TagNodeTree, openTag: string, closeTag: string) => {
if (Array.isArray(content)) {
return content.reduce<string>((r, node) => {
if (node !== null) {
return r + toString(node);
return r + toString(node, openTag, closeTag);
}
return r;
@@ -47,7 +40,7 @@ const renderContent = (content: TagNodeTree, openTag: string, closeTag: string)
}
if (content) {
return toString(content);
return toString(content, openTag, closeTag);
}
return null;
@@ -60,10 +53,16 @@ export class TagNode<TagValue extends any = any> implements TagNodeObject {
public start?: TagPosition;
public end?: TagPosition;
constructor(tag: string | TagValue, attrs: Record<string, unknown>, content: TagNodeTree) {
this.tag = tag;
constructor(tag: string | TagValue, attrs: Record<string, unknown>, content: TagNodeTree, start?: TagPosition, end?: TagPosition) {
this.tag = tag
this.attrs = attrs;
this.content = content;
this.start = start;
this.end = end;
}
get length(): number {
return getNodeLength(this);
}
attr(name: string, value?: unknown) {
@@ -86,10 +85,6 @@ export class TagNode<TagValue extends any = any> implements TagNodeObject {
this.end = value;
}
get length(): number {
return getNodeLength(this);
}
toTagStart({ openTag = OPEN_BRAKET, closeTag = CLOSE_BRAKET } = {}) {
const tagAttrs = getTagAttrs(String(this.tag), this.attrs);
@@ -101,18 +96,11 @@ export class TagNode<TagValue extends any = any> implements TagNodeObject {
}
toTagNode() {
const newNode = new TagNode(String(this.tag).toLowerCase(), this.attrs, this.content);
if (this.start) {
newNode.setStart(this.start);
}
if (this.end) {
newNode.setEnd(this.end);
}
return newNode;
return new TagNode(this.tag, this.attrs, this.content, this.start, this.end);
}
toString({ openTag = OPEN_BRAKET, closeTag = CLOSE_BRAKET } = {}): string {
const content = this.content ? renderContent(this.content, openTag, closeTag) : '';
const content = this.content ? nodeTreeToString(this.content, openTag, closeTag) : '';
const tagStart = this.toTagStart({ openTag, closeTag });
if (this.content === null || Array.isArray(this.content) && this.content.length === 0) {
@@ -122,12 +110,18 @@ export class TagNode<TagValue extends any = any> implements TagNodeObject {
return `${tagStart}${content}${this.toTagEnd({ openTag, closeTag })}`;
}
toJSON() {
return {
tag: this.tag,
attrs: this.attrs,
content: this.content,
start: this.start,
end: this.end,
};
}
static create(tag: string, attrs: Record<string, unknown> = {}, content: TagNodeTree = null, start?: TagPosition) {
const node = new TagNode(tag, attrs, content);
if (start) {
node.setStart(start);
}
return node;
return new TagNode(tag, attrs, content, start);
}
static isOf(node: TagNode, type: string) {
+1
View File
@@ -6,6 +6,7 @@ export interface TagNodeObject<TagValue extends any = any> {
content?: TagNodeTree<TagValue>;
start?: TagPosition;
end?: TagPosition;
toJSON?: () => TagNodeObject<TagValue>;
}
export type NodeContent<TagValue extends any = any> = TagNodeObject<TagValue> | StringNode | null;