2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-06-20 20:00:33 +03:00

fix(296): parse with lost repeated closing tag (#297)

* fix: extract node list ot separate file, fix nested parsing

* chore:  node list tests

* fix: nested tags parsing

* chore: changeset

* chore: remove unused files

* chore: disable publish on every commit
This commit is contained in:
Nikolay Kost
2025-10-14 04:11:19 +02:00
committed by GitHub
parent 40041a0680
commit 29f909a589
7 changed files with 183 additions and 77 deletions
+27
View File
@@ -0,0 +1,27 @@
---
"@bbob/parser": major
"@bbob/cli": major
"@bbob/core": major
"@bbob/html": major
"@bbob/plugin-helper": major
"@bbob/preset": major
"@bbob/preset-html5": major
"@bbob/preset-react": major
"@bbob/preset-vue": major
"@bbob/react": major
"@bbob/types": major
"@bbob/vue2": major
"@bbob/vue3": major
---
Now `@bbob/parser` correctly parses nested tags like
```html
Hello World[u]Wrong underline[u] This is another text [u]and this, too[/u]
```
to
```html
Hello World[u]Wrong underline[u] This is another text <u>and this, too</u>
```
Fixes #296, #295
+8 -1
View File
@@ -1,5 +1,12 @@
name: Pull Request name: Pull Request
on: on:
workflow_dispatch:
inputs:
test:
description: 'Build & Publish Pre-alpha Release'
required: true
type: boolean
default: true
# workflow_run: # workflow_run:
# workflows: # workflows:
# - Tests # - Tests
@@ -23,7 +30,7 @@ permissions:
jobs: jobs:
build: build:
name: 'Build & pre-alpha release' name: 'Build & Publish Pre-alpha Release'
runs-on: ubuntu-latest runs-on: ubuntu-latest
+36
View File
@@ -0,0 +1,36 @@
class NodeList<Value> {
private readonly n: Value[];
constructor() {
this.n = [];
}
last() {
const len = this.n.length
if (len > 0) {
return this.n[len - 1];
}
return undefined;
}
has() {
return this.n.length > 0;
}
flush() {
return this.n.length ? this.n.pop() : undefined;
}
push(value: Value) {
this.n.push(value);
}
ref() {
return this.n;
}
}
export { NodeList };
export default NodeList;
+5
View File
@@ -91,6 +91,11 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
/** /**
* Emits newly created token to subscriber * Emits newly created token to subscriber
*
* @param {number} type - 1 - word, 2 - tag, 3 - attr-name, 4 - attr-value, 5 - space, 6 - new-line
* @param {string} value - token value
* @param {number} startPos - start position
* @param {number} endPos - end position
*/ */
function emitToken(type: number, value: string, startPos?: number, endPos?: number) { function emitToken(type: number, value: string, startPos?: number, endPos?: number) {
const token = createTokenOfType(type, value, row, prevCol, startPos, endPos); const token = createTokenOfType(type, value, row, prevCol, startPos, endPos);
+42 -76
View File
@@ -6,37 +6,7 @@ import { createLexer } from "./lexer.js";
import { Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD } from "./Token.js"; import { Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD } from "./Token.js";
class NodeList<Value> { import { NodeList } from "./NodeList.js";
private n: Value[];
constructor() {
this.n = [];
}
last() {
if (
Array.isArray(this.n) &&
this.n.length > 0 &&
typeof this.n[this.n.length - 1] !== "undefined"
) {
return this.n[this.n.length - 1];
}
return null;
}
flush() {
return this.n.length ? this.n.pop() : false;
}
push(value: Value) {
this.n.push(value);
}
toArray() {
return this.n;
}
}
const createList = <Type>() => new NodeList<Type>(); const createList = <Type>() => new NodeList<Type>();
@@ -67,13 +37,13 @@ function parse(input: string, opts: ParseOptions = {}) {
* @private * @private
* @type {NodeList} * @type {NodeList}
*/ */
const tagNodes = createList<TagNode>(); let activeTagNode: TagNode | null = null;
/** /**
* Temp buffer of tag attributes * Temp buffer of tag attributes
* @private * @private
* @type {NodeList} * @type {NodeList}
*/ */
const tagNodesAttrName = createList<string>(); let activeTagNodesAttrName: string | null = null;
/** /**
* Cache for nested tags checks * Cache for nested tags checks
@@ -122,30 +92,31 @@ function parse(input: string, opts: ParseOptions = {}) {
* Flushes temp tag nodes and its attributes buffers * Flushes temp tag nodes and its attributes buffers
* @private * @private
*/ */
function tagNodesFlush() { function activeTagNodeFlush() {
if (tagNodes.flush()) { if (activeTagNode) {
tagNodesAttrName.flush(); activeTagNode = null;
activeTagNodesAttrName = null;
} }
} }
/** /**
* @private * @private
*/ */
function getNodes() { function getNodesContent() {
const lastNestedNode = nestedNodes.last(); const lastNestedNode = nestedNodes.last();
if (lastNestedNode && isTagNode(lastNestedNode)) { if (lastNestedNode && isTagNode(lastNestedNode)) {
return lastNestedNode.content; return lastNestedNode.content;
} }
return nodes.toArray(); return nodes.ref();
} }
/** /**
* @private * @private
*/ */
function nodesAppendAsString( function nodesAppendAsString(
nodes?: TagNodeTree, nodes: TagNodeTree,
node?: TagNode, node?: TagNode,
isNested = true isNested = true
) { ) {
@@ -167,7 +138,9 @@ function parse(input: string, opts: ParseOptions = {}) {
/** /**
* @private * @private
*/ */
function nodesAppend(nodes?: TagNodeTree, node?: NodeContent) { function nodesAppend(node: NodeContent) {
const nodes = getNodesContent() as TagNodeTree;
if (Array.isArray(nodes) && typeof node !== "undefined") { if (Array.isArray(nodes) && typeof node !== "undefined") {
if (isTagNode(node)) { if (isTagNode(node)) {
if (isTagAllowed(node.tag)) { if (isTagAllowed(node.tag)) {
@@ -186,18 +159,17 @@ function parse(input: string, opts: ParseOptions = {}) {
* @param {Token} token * @param {Token} token
*/ */
function tagHandleStart(token: Token) { function tagHandleStart(token: Token) {
tagNodesFlush(); activeTagNodeFlush();
const tagNode = TagNode.create(token.getValue(), {}, [], { from: token.getStart(), to: token.getEnd() }); const tagNode = TagNode.create(token.getValue(), {}, [], { from: token.getStart(), to: token.getEnd() });
const isNested = isTokenNested(token); const isNested = isTokenNested(token);
tagNodes.push(tagNode); activeTagNode = tagNode;
if (isNested) { if (isNested) {
nestedNodes.push(tagNode); nestedNodes.push(tagNode);
} else { } else {
const nodes = getNodes(); nodesAppend(tagNode);
nodesAppend(nodes, tagNode);
} }
} }
@@ -209,20 +181,16 @@ function parse(input: string, opts: ParseOptions = {}) {
const tagName = token.getValue().slice(1); const tagName = token.getValue().slice(1);
const lastNestedNode = nestedNodes.flush(); const lastNestedNode = nestedNodes.flush();
tagNodesFlush(); activeTagNodeFlush();
if (lastNestedNode) { if (lastNestedNode) {
const nodes = getNodes()
if (isTagNode(lastNestedNode)) { if (isTagNode(lastNestedNode)) {
lastNestedNode.setEnd({ from: token.getStart(), to: token.getEnd() }); lastNestedNode.setEnd({ from: token.getStart(), to: token.getEnd() });
} }
nodesAppend(nodes, lastNestedNode); nodesAppend(lastNestedNode);
} else if (!isTagNested(tagName)) { // when we have only close tag [/some] without any open tag } else if (!isTagNested(tagName)) { // when we have only close tag [/some] without any open tag
const nodes = getNodes(); nodesAppend(token.toString({ openTag, closeTag }));
nodesAppend(nodes, token.toString({ openTag, closeTag }));
} else if (typeof options.onError === "function") { } else if (typeof options.onError === "function") {
const tag = token.getValue(); const tag = token.getValue();
const line = token.getLine(); const line = token.getLine();
@@ -241,31 +209,24 @@ function parse(input: string, opts: ParseOptions = {}) {
* @param {Token} token * @param {Token} token
*/ */
function nodeHandle(token: Token) { function nodeHandle(token: Token) {
/**
* @type {TagNode}
*/
const activeTagNode = tagNodes.last();
const tokenValue = token.getValue(); const tokenValue = token.getValue();
const isNested = isTagNested(token.toString()); const isNested = isTagNested(token.toString());
const nodes = getNodes();
if (activeTagNode !== null) { if (activeTagNode) {
switch (token.type) { switch (token.type) {
case TYPE_ATTR_NAME: case TYPE_ATTR_NAME:
tagNodesAttrName.push(tokenValue); activeTagNodesAttrName = tokenValue;
const attrName = tagNodesAttrName.last();
if (attrName) { if (tokenValue) {
activeTagNode.attr(attrName, ""); activeTagNode.attr(tokenValue, "");
} }
break; break;
case TYPE_ATTR_VALUE: case TYPE_ATTR_VALUE:
const attrValName = tagNodesAttrName.last(); if (activeTagNodesAttrName) {
activeTagNode.attr(activeTagNodesAttrName, tokenValue);
if (attrValName) { activeTagNodesAttrName = null;
activeTagNode.attr(attrValName, tokenValue);
tagNodesAttrName.flush();
} else { } else {
activeTagNode.attr(tokenValue, tokenValue); activeTagNode.attr(tokenValue, tokenValue);
} }
@@ -277,20 +238,20 @@ function parse(input: string, opts: ParseOptions = {}) {
if (isNested) { if (isNested) {
activeTagNode.append(tokenValue); activeTagNode.append(tokenValue);
} else { } else {
nodesAppend(nodes, tokenValue); nodesAppend(tokenValue);
} }
break; break;
case TYPE_TAG: case TYPE_TAG:
// if tag is not allowed, just pass it as is // if tag is not allowed, just pass it as is
nodesAppend(nodes, token.toString({ openTag, closeTag })); nodesAppend(token.toString({ openTag, closeTag }));
break; break;
} }
} else if (token.isText()) { } else if (token.isText()) {
nodesAppend(nodes, tokenValue); nodesAppend(tokenValue);
} else if (token.isTag()) { } else if (token.isTag()) {
// if tag is not allowed, just pass it as is // if tag is not allowed, just pass it as is
nodesAppend(nodes, token.toString({ openTag, closeTag })); nodesAppend(token.toString({ openTag, closeTag }));
} }
} }
@@ -330,15 +291,20 @@ function parse(input: string, opts: ParseOptions = {}) {
// eslint-disable-next-line no-unused-vars // eslint-disable-next-line no-unused-vars
const tokens = tokenizer.tokenize(); const tokens = tokenizer.tokenize();
// handles situations where we open tag, but forgot close them // handles situations where we opened tag, but forget to close them
// for ex [q]test[/q][u]some[/u][q]some [u]some[/u] // forgot to close [/q] // for ex [q]test[/q][u]some[/u][q]some [u]some[/u] // forgot to close [/q]
// so we need to flush nested content to nodes array // so we need to flush nested content to nodes array
const lastNestedNode = nestedNodes.flush(); do {
if (isTagNode(lastNestedNode) && isTagNested(lastNestedNode.tag)) { const node = nestedNodes.flush();
nodesAppendAsString(getNodes(), lastNestedNode, false);
}
return nodes.toArray(); if (isTagNode(node) && isTagNested(node.tag)) {
nodesAppendAsString(getNodesContent(), node, false);
} else if (typeof node !== 'undefined') {
nodesAppend(node);
}
} while (nestedNodes.has());
return nodes.ref();
} }
export { parse }; export { parse };
@@ -0,0 +1,35 @@
import { NodeList } from '../src/NodeList';
describe('NodeList', () => {
test('push', () => {
const list = new NodeList();
list.push('a');
list.push('b');
list.push('c');
expect(list.ref()).toEqual(['a', 'b', 'c']);
});
test('last', () => {
const list = new NodeList();
list.push('a');
list.push('b');
list.push('c');
expect(list.last()).toBe('c');
expect(list.ref()).toEqual(['a', 'b', 'c']);
});
test('flush', () => {
const list = new NodeList();
list.push('a');
list.push('b');
list.push('c');
expect(list.flush()).toBe('c');
expect(list.ref()).toEqual(['a', 'b']);
});
});
+30
View File
@@ -962,6 +962,36 @@ sdfasdfasdf
); );
}); });
test('parse with lost repeated closing tag', () => {
const input = parse('Hello World[u]Wrong underline[u] This is another text [u]and this, too[/u]');
expect(input).toBeMatchAST([
'Hello',
' ',
'World',
'[u]',
'Wrong',
' ',
'underline',
'[u]',
' ',
'This',
' ',
'is',
' ',
'another',
' ',
'text',
' ',
{
tag: 'u',
attrs: {},
content: ['and', ' ', 'this,', ' ', 'too'],
}
]);
});
test('parse with url in tag content', () => { test('parse with url in tag content', () => {
const input = parse('[img]https://tw.greywool.com/i/e3Ph5.png[/img]'); const input = parse('[img]https://tw.greywool.com/i/e3Ph5.png[/img]');