diff --git a/.changeset/purple-horses-shave.md b/.changeset/purple-horses-shave.md new file mode 100644 index 0000000..59599e7 --- /dev/null +++ b/.changeset/purple-horses-shave.md @@ -0,0 +1,27 @@ +--- +"@bbob/parser": major +"@bbob/cli": major +"@bbob/core": major +"@bbob/html": major +"@bbob/plugin-helper": major +"@bbob/preset": major +"@bbob/preset-html5": major +"@bbob/preset-react": major +"@bbob/preset-vue": major +"@bbob/react": major +"@bbob/types": major +"@bbob/vue2": major +"@bbob/vue3": major +--- + +Now `@bbob/parser` correctly parses nested tags like + +```html +Hello World[u]Wrong underline[u] This is another text [u]and this, too[/u] +``` +to +```html +Hello World[u]Wrong underline[u] This is another text and this, too +``` + +Fixes #296, #295 diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 6a1f206..71eea99 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -1,5 +1,12 @@ name: Pull Request on: + workflow_dispatch: + inputs: + test: + description: 'Build & Publish Pre-alpha Release' + required: true + type: boolean + default: true # workflow_run: # workflows: # - Tests @@ -23,7 +30,7 @@ permissions: jobs: build: - name: 'Build & pre-alpha release' + name: 'Build & Publish Pre-alpha Release' runs-on: ubuntu-latest diff --git a/packages/bbob-parser/src/NodeList.ts b/packages/bbob-parser/src/NodeList.ts new file mode 100644 index 0000000..fa5eb76 --- /dev/null +++ b/packages/bbob-parser/src/NodeList.ts @@ -0,0 +1,36 @@ +class NodeList { + private readonly n: Value[]; + + constructor() { + this.n = []; + } + + last() { + const len = this.n.length + + if (len > 0) { + return this.n[len - 1]; + } + + return undefined; + } + + has() { + return this.n.length > 0; + } + + flush() { + return this.n.length ? this.n.pop() : undefined; + } + + push(value: Value) { + this.n.push(value); + } + + ref() { + return this.n; + } +} + +export { NodeList }; +export default NodeList; diff --git a/packages/bbob-parser/src/lexer.ts b/packages/bbob-parser/src/lexer.ts index 8944a7b..e83520a 100644 --- a/packages/bbob-parser/src/lexer.ts +++ b/packages/bbob-parser/src/lexer.ts @@ -91,6 +91,11 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo /** * Emits newly created token to subscriber + * + * @param {number} type - 1 - word, 2 - tag, 3 - attr-name, 4 - attr-value, 5 - space, 6 - new-line + * @param {string} value - token value + * @param {number} startPos - start position + * @param {number} endPos - end position */ function emitToken(type: number, value: string, startPos?: number, endPos?: number) { const token = createTokenOfType(type, value, row, prevCol, startPos, endPos); diff --git a/packages/bbob-parser/src/parse.ts b/packages/bbob-parser/src/parse.ts index 0e9ab1e..8214111 100644 --- a/packages/bbob-parser/src/parse.ts +++ b/packages/bbob-parser/src/parse.ts @@ -6,37 +6,7 @@ import { createLexer } from "./lexer.js"; import { Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD } from "./Token.js"; -class NodeList { - private n: Value[]; - - constructor() { - this.n = []; - } - - last() { - if ( - Array.isArray(this.n) && - this.n.length > 0 && - typeof this.n[this.n.length - 1] !== "undefined" - ) { - return this.n[this.n.length - 1]; - } - - return null; - } - - flush() { - return this.n.length ? this.n.pop() : false; - } - - push(value: Value) { - this.n.push(value); - } - - toArray() { - return this.n; - } -} +import { NodeList } from "./NodeList.js"; const createList = () => new NodeList(); @@ -67,13 +37,13 @@ function parse(input: string, opts: ParseOptions = {}) { * @private * @type {NodeList} */ - const tagNodes = createList(); + let activeTagNode: TagNode | null = null; /** * Temp buffer of tag attributes * @private * @type {NodeList} */ - const tagNodesAttrName = createList(); + let activeTagNodesAttrName: string | null = null; /** * Cache for nested tags checks @@ -122,30 +92,31 @@ function parse(input: string, opts: ParseOptions = {}) { * Flushes temp tag nodes and its attributes buffers * @private */ - function tagNodesFlush() { - if (tagNodes.flush()) { - tagNodesAttrName.flush(); + function activeTagNodeFlush() { + if (activeTagNode) { + activeTagNode = null; + activeTagNodesAttrName = null; } } /** * @private */ - function getNodes() { + function getNodesContent() { const lastNestedNode = nestedNodes.last(); if (lastNestedNode && isTagNode(lastNestedNode)) { return lastNestedNode.content; } - return nodes.toArray(); + return nodes.ref(); } /** * @private */ function nodesAppendAsString( - nodes?: TagNodeTree, + nodes: TagNodeTree, node?: TagNode, isNested = true ) { @@ -167,7 +138,9 @@ function parse(input: string, opts: ParseOptions = {}) { /** * @private */ - function nodesAppend(nodes?: TagNodeTree, node?: NodeContent) { + function nodesAppend(node: NodeContent) { + const nodes = getNodesContent() as TagNodeTree; + if (Array.isArray(nodes) && typeof node !== "undefined") { if (isTagNode(node)) { if (isTagAllowed(node.tag)) { @@ -186,18 +159,17 @@ function parse(input: string, opts: ParseOptions = {}) { * @param {Token} token */ function tagHandleStart(token: Token) { - tagNodesFlush(); + activeTagNodeFlush(); const tagNode = TagNode.create(token.getValue(), {}, [], { from: token.getStart(), to: token.getEnd() }); const isNested = isTokenNested(token); - tagNodes.push(tagNode); + activeTagNode = tagNode; if (isNested) { nestedNodes.push(tagNode); } else { - const nodes = getNodes(); - nodesAppend(nodes, tagNode); + nodesAppend(tagNode); } } @@ -209,20 +181,16 @@ function parse(input: string, opts: ParseOptions = {}) { const tagName = token.getValue().slice(1); const lastNestedNode = nestedNodes.flush(); - tagNodesFlush(); + activeTagNodeFlush(); if (lastNestedNode) { - const nodes = getNodes() - if (isTagNode(lastNestedNode)) { lastNestedNode.setEnd({ from: token.getStart(), to: token.getEnd() }); } - nodesAppend(nodes, lastNestedNode); + nodesAppend(lastNestedNode); } else if (!isTagNested(tagName)) { // when we have only close tag [/some] without any open tag - const nodes = getNodes(); - - nodesAppend(nodes, token.toString({ openTag, closeTag })); + nodesAppend(token.toString({ openTag, closeTag })); } else if (typeof options.onError === "function") { const tag = token.getValue(); const line = token.getLine(); @@ -241,31 +209,24 @@ function parse(input: string, opts: ParseOptions = {}) { * @param {Token} token */ function nodeHandle(token: Token) { - /** - * @type {TagNode} - */ - const activeTagNode = tagNodes.last(); + const tokenValue = token.getValue(); const isNested = isTagNested(token.toString()); - const nodes = getNodes(); - if (activeTagNode !== null) { + if (activeTagNode) { switch (token.type) { case TYPE_ATTR_NAME: - tagNodesAttrName.push(tokenValue); - const attrName = tagNodesAttrName.last(); + activeTagNodesAttrName = tokenValue; - if (attrName) { - activeTagNode.attr(attrName, ""); + if (tokenValue) { + activeTagNode.attr(tokenValue, ""); } break; case TYPE_ATTR_VALUE: - const attrValName = tagNodesAttrName.last(); - - if (attrValName) { - activeTagNode.attr(attrValName, tokenValue); - tagNodesAttrName.flush(); + if (activeTagNodesAttrName) { + activeTagNode.attr(activeTagNodesAttrName, tokenValue); + activeTagNodesAttrName = null; } else { activeTagNode.attr(tokenValue, tokenValue); } @@ -277,20 +238,20 @@ function parse(input: string, opts: ParseOptions = {}) { if (isNested) { activeTagNode.append(tokenValue); } else { - nodesAppend(nodes, tokenValue); + nodesAppend(tokenValue); } break; case TYPE_TAG: // if tag is not allowed, just pass it as is - nodesAppend(nodes, token.toString({ openTag, closeTag })); + nodesAppend(token.toString({ openTag, closeTag })); break; } } else if (token.isText()) { - nodesAppend(nodes, tokenValue); + nodesAppend(tokenValue); } else if (token.isTag()) { // if tag is not allowed, just pass it as is - nodesAppend(nodes, token.toString({ openTag, closeTag })); + nodesAppend(token.toString({ openTag, closeTag })); } } @@ -330,15 +291,20 @@ function parse(input: string, opts: ParseOptions = {}) { // eslint-disable-next-line no-unused-vars const tokens = tokenizer.tokenize(); - // handles situations where we open tag, but forgot close them + // handles situations where we opened tag, but forget to close them // for ex [q]test[/q][u]some[/u][q]some [u]some[/u] // forgot to close [/q] // so we need to flush nested content to nodes array - const lastNestedNode = nestedNodes.flush(); - if (isTagNode(lastNestedNode) && isTagNested(lastNestedNode.tag)) { - nodesAppendAsString(getNodes(), lastNestedNode, false); - } + do { + const node = nestedNodes.flush(); - return nodes.toArray(); + if (isTagNode(node) && isTagNested(node.tag)) { + nodesAppendAsString(getNodesContent(), node, false); + } else if (typeof node !== 'undefined') { + nodesAppend(node); + } + } while (nestedNodes.has()); + + return nodes.ref(); } export { parse }; diff --git a/packages/bbob-parser/test/NodeList.test.ts b/packages/bbob-parser/test/NodeList.test.ts new file mode 100644 index 0000000..1c531e6 --- /dev/null +++ b/packages/bbob-parser/test/NodeList.test.ts @@ -0,0 +1,35 @@ +import { NodeList } from '../src/NodeList'; + +describe('NodeList', () => { + test('push', () => { + const list = new NodeList(); + + list.push('a'); + list.push('b'); + list.push('c'); + + expect(list.ref()).toEqual(['a', 'b', 'c']); + }); + + test('last', () => { + const list = new NodeList(); + + list.push('a'); + list.push('b'); + list.push('c'); + + expect(list.last()).toBe('c'); + expect(list.ref()).toEqual(['a', 'b', 'c']); + }); + + test('flush', () => { + const list = new NodeList(); + + list.push('a'); + list.push('b'); + list.push('c'); + + expect(list.flush()).toBe('c'); + expect(list.ref()).toEqual(['a', 'b']); + }); +}); diff --git a/packages/bbob-parser/test/parse.test.ts b/packages/bbob-parser/test/parse.test.ts index 055fb51..fa55728 100644 --- a/packages/bbob-parser/test/parse.test.ts +++ b/packages/bbob-parser/test/parse.test.ts @@ -962,6 +962,36 @@ sdfasdfasdf ); }); + test('parse with lost repeated closing tag', () => { + const input = parse('Hello World[u]Wrong underline[u] This is another text [u]and this, too[/u]'); + + expect(input).toBeMatchAST([ + 'Hello', + ' ', + 'World', + '[u]', + 'Wrong', + ' ', + 'underline', + '[u]', + ' ', + 'This', + ' ', + 'is', + ' ', + 'another', + ' ', + 'text', + ' ', + { + tag: 'u', + attrs: {}, + content: ['and', ' ', 'this,', ' ', 'too'], + } + ]); + }); + + test('parse with url in tag content', () => { const input = parse('[img]https://tw.greywool.com/i/e3Ph5.png[/img]');