2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00

fix(296): parse with lost repeated closing tag (#297)

* fix: extract node list ot separate file, fix nested parsing

* chore:  node list tests

* fix: nested tags parsing

* chore: changeset

* chore: remove unused files

* chore: disable publish on every commit
This commit is contained in:
Nikolay Kost
2025-10-14 04:11:19 +02:00
committed by GitHub
parent 40041a0680
commit 29f909a589
7 changed files with 183 additions and 77 deletions
+27
View File
@@ -0,0 +1,27 @@
---
"@bbob/parser": major
"@bbob/cli": major
"@bbob/core": major
"@bbob/html": major
"@bbob/plugin-helper": major
"@bbob/preset": major
"@bbob/preset-html5": major
"@bbob/preset-react": major
"@bbob/preset-vue": major
"@bbob/react": major
"@bbob/types": major
"@bbob/vue2": major
"@bbob/vue3": major
---
Now `@bbob/parser` correctly parses nested tags like
```html
Hello World[u]Wrong underline[u] This is another text [u]and this, too[/u]
```
to
```html
Hello World[u]Wrong underline[u] This is another text <u>and this, too</u>
```
Fixes #296, #295
+8 -1
View File
@@ -1,5 +1,12 @@
name: Pull Request
on:
workflow_dispatch:
inputs:
test:
description: 'Build & Publish Pre-alpha Release'
required: true
type: boolean
default: true
# workflow_run:
# workflows:
# - Tests
@@ -23,7 +30,7 @@ permissions:
jobs:
build:
name: 'Build & pre-alpha release'
name: 'Build & Publish Pre-alpha Release'
runs-on: ubuntu-latest
+36
View File
@@ -0,0 +1,36 @@
class NodeList<Value> {
private readonly n: Value[];
constructor() {
this.n = [];
}
last() {
const len = this.n.length
if (len > 0) {
return this.n[len - 1];
}
return undefined;
}
has() {
return this.n.length > 0;
}
flush() {
return this.n.length ? this.n.pop() : undefined;
}
push(value: Value) {
this.n.push(value);
}
ref() {
return this.n;
}
}
export { NodeList };
export default NodeList;
+5
View File
@@ -91,6 +91,11 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
/**
* Emits newly created token to subscriber
*
* @param {number} type - 1 - word, 2 - tag, 3 - attr-name, 4 - attr-value, 5 - space, 6 - new-line
* @param {string} value - token value
* @param {number} startPos - start position
* @param {number} endPos - end position
*/
function emitToken(type: number, value: string, startPos?: number, endPos?: number) {
const token = createTokenOfType(type, value, row, prevCol, startPos, endPos);
+42 -76
View File
@@ -6,37 +6,7 @@ import { createLexer } from "./lexer.js";
import { Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD } from "./Token.js";
class NodeList<Value> {
private n: Value[];
constructor() {
this.n = [];
}
last() {
if (
Array.isArray(this.n) &&
this.n.length > 0 &&
typeof this.n[this.n.length - 1] !== "undefined"
) {
return this.n[this.n.length - 1];
}
return null;
}
flush() {
return this.n.length ? this.n.pop() : false;
}
push(value: Value) {
this.n.push(value);
}
toArray() {
return this.n;
}
}
import { NodeList } from "./NodeList.js";
const createList = <Type>() => new NodeList<Type>();
@@ -67,13 +37,13 @@ function parse(input: string, opts: ParseOptions = {}) {
* @private
* @type {NodeList}
*/
const tagNodes = createList<TagNode>();
let activeTagNode: TagNode | null = null;
/**
* Temp buffer of tag attributes
* @private
* @type {NodeList}
*/
const tagNodesAttrName = createList<string>();
let activeTagNodesAttrName: string | null = null;
/**
* Cache for nested tags checks
@@ -122,30 +92,31 @@ function parse(input: string, opts: ParseOptions = {}) {
* Flushes temp tag nodes and its attributes buffers
* @private
*/
function tagNodesFlush() {
if (tagNodes.flush()) {
tagNodesAttrName.flush();
function activeTagNodeFlush() {
if (activeTagNode) {
activeTagNode = null;
activeTagNodesAttrName = null;
}
}
/**
* @private
*/
function getNodes() {
function getNodesContent() {
const lastNestedNode = nestedNodes.last();
if (lastNestedNode && isTagNode(lastNestedNode)) {
return lastNestedNode.content;
}
return nodes.toArray();
return nodes.ref();
}
/**
* @private
*/
function nodesAppendAsString(
nodes?: TagNodeTree,
nodes: TagNodeTree,
node?: TagNode,
isNested = true
) {
@@ -167,7 +138,9 @@ function parse(input: string, opts: ParseOptions = {}) {
/**
* @private
*/
function nodesAppend(nodes?: TagNodeTree, node?: NodeContent) {
function nodesAppend(node: NodeContent) {
const nodes = getNodesContent() as TagNodeTree;
if (Array.isArray(nodes) && typeof node !== "undefined") {
if (isTagNode(node)) {
if (isTagAllowed(node.tag)) {
@@ -186,18 +159,17 @@ function parse(input: string, opts: ParseOptions = {}) {
* @param {Token} token
*/
function tagHandleStart(token: Token) {
tagNodesFlush();
activeTagNodeFlush();
const tagNode = TagNode.create(token.getValue(), {}, [], { from: token.getStart(), to: token.getEnd() });
const isNested = isTokenNested(token);
tagNodes.push(tagNode);
activeTagNode = tagNode;
if (isNested) {
nestedNodes.push(tagNode);
} else {
const nodes = getNodes();
nodesAppend(nodes, tagNode);
nodesAppend(tagNode);
}
}
@@ -209,20 +181,16 @@ function parse(input: string, opts: ParseOptions = {}) {
const tagName = token.getValue().slice(1);
const lastNestedNode = nestedNodes.flush();
tagNodesFlush();
activeTagNodeFlush();
if (lastNestedNode) {
const nodes = getNodes()
if (isTagNode(lastNestedNode)) {
lastNestedNode.setEnd({ from: token.getStart(), to: token.getEnd() });
}
nodesAppend(nodes, lastNestedNode);
nodesAppend(lastNestedNode);
} else if (!isTagNested(tagName)) { // when we have only close tag [/some] without any open tag
const nodes = getNodes();
nodesAppend(nodes, token.toString({ openTag, closeTag }));
nodesAppend(token.toString({ openTag, closeTag }));
} else if (typeof options.onError === "function") {
const tag = token.getValue();
const line = token.getLine();
@@ -241,31 +209,24 @@ function parse(input: string, opts: ParseOptions = {}) {
* @param {Token} token
*/
function nodeHandle(token: Token) {
/**
* @type {TagNode}
*/
const activeTagNode = tagNodes.last();
const tokenValue = token.getValue();
const isNested = isTagNested(token.toString());
const nodes = getNodes();
if (activeTagNode !== null) {
if (activeTagNode) {
switch (token.type) {
case TYPE_ATTR_NAME:
tagNodesAttrName.push(tokenValue);
const attrName = tagNodesAttrName.last();
activeTagNodesAttrName = tokenValue;
if (attrName) {
activeTagNode.attr(attrName, "");
if (tokenValue) {
activeTagNode.attr(tokenValue, "");
}
break;
case TYPE_ATTR_VALUE:
const attrValName = tagNodesAttrName.last();
if (attrValName) {
activeTagNode.attr(attrValName, tokenValue);
tagNodesAttrName.flush();
if (activeTagNodesAttrName) {
activeTagNode.attr(activeTagNodesAttrName, tokenValue);
activeTagNodesAttrName = null;
} else {
activeTagNode.attr(tokenValue, tokenValue);
}
@@ -277,20 +238,20 @@ function parse(input: string, opts: ParseOptions = {}) {
if (isNested) {
activeTagNode.append(tokenValue);
} else {
nodesAppend(nodes, tokenValue);
nodesAppend(tokenValue);
}
break;
case TYPE_TAG:
// if tag is not allowed, just pass it as is
nodesAppend(nodes, token.toString({ openTag, closeTag }));
nodesAppend(token.toString({ openTag, closeTag }));
break;
}
} else if (token.isText()) {
nodesAppend(nodes, tokenValue);
nodesAppend(tokenValue);
} else if (token.isTag()) {
// if tag is not allowed, just pass it as is
nodesAppend(nodes, token.toString({ openTag, closeTag }));
nodesAppend(token.toString({ openTag, closeTag }));
}
}
@@ -330,15 +291,20 @@ function parse(input: string, opts: ParseOptions = {}) {
// eslint-disable-next-line no-unused-vars
const tokens = tokenizer.tokenize();
// handles situations where we open tag, but forgot close them
// handles situations where we opened tag, but forget to close them
// for ex [q]test[/q][u]some[/u][q]some [u]some[/u] // forgot to close [/q]
// so we need to flush nested content to nodes array
const lastNestedNode = nestedNodes.flush();
if (isTagNode(lastNestedNode) && isTagNested(lastNestedNode.tag)) {
nodesAppendAsString(getNodes(), lastNestedNode, false);
}
do {
const node = nestedNodes.flush();
return nodes.toArray();
if (isTagNode(node) && isTagNested(node.tag)) {
nodesAppendAsString(getNodesContent(), node, false);
} else if (typeof node !== 'undefined') {
nodesAppend(node);
}
} while (nestedNodes.has());
return nodes.ref();
}
export { parse };
@@ -0,0 +1,35 @@
import { NodeList } from '../src/NodeList';
describe('NodeList', () => {
test('push', () => {
const list = new NodeList();
list.push('a');
list.push('b');
list.push('c');
expect(list.ref()).toEqual(['a', 'b', 'c']);
});
test('last', () => {
const list = new NodeList();
list.push('a');
list.push('b');
list.push('c');
expect(list.last()).toBe('c');
expect(list.ref()).toEqual(['a', 'b', 'c']);
});
test('flush', () => {
const list = new NodeList();
list.push('a');
list.push('b');
list.push('c');
expect(list.flush()).toBe('c');
expect(list.ref()).toEqual(['a', 'b']);
});
});
+30
View File
@@ -962,6 +962,36 @@ sdfasdfasdf
);
});
test('parse with lost repeated closing tag', () => {
const input = parse('Hello World[u]Wrong underline[u] This is another text [u]and this, too[/u]');
expect(input).toBeMatchAST([
'Hello',
' ',
'World',
'[u]',
'Wrong',
' ',
'underline',
'[u]',
' ',
'This',
' ',
'is',
' ',
'another',
' ',
'text',
' ',
{
tag: 'u',
attrs: {},
content: ['and', ' ', 'this,', ' ', 'too'],
}
]);
});
test('parse with url in tag content', () => {
const input = parse('[img]https://tw.greywool.com/i/e3Ph5.png[/img]');