feat(parser): context free tag mode (#165)

* feat(parser): initial context free tag mode * fix: tests coverage * chore: update readme * chore: remove unused badge from readme
2026-05-15 11:59:37 +03:00 · 2023-01-27 05:26:09 +02:00
parent 6b2810fcf4
commit 19e8dd659e
11 changed files with 215 additions and 79 deletions
@@ -13,10 +13,7 @@ written in pure javascript, no dependencies
 </a> 
 <a href="https://www.codefactor.io/repository/github/jilizart/bbob">
  <img src="https://www.codefactor.io/repository/github/jilizart/bbob/badge" alt="CodeFactor">
-</a> 
-<a href="https://bettercodehub.com/">
-<img src="https://bettercodehub.com/edge/badge/JiLiZART/bbob?branch=master" alt="BCH compliance">
-</a> 
+</a>
 <a href="https://snyk.io/test/github/JiLiZART/bbob?targetFile=package.json">
  <img src="https://snyk.io/test/github/JiLiZART/bbob/badge.svg?targetFile=package.json" alt="Known Vulnerabilities">
 </a>
@@ -73,6 +70,7 @@ written in pure javascript, no dependencies
  * [Basic usage](#basic-usage)
  * [React usage](#react-usage)
  * [Vue 2 usage](#vue2-usage)
+* [Parse Options](#parse-options)
 * [Presets](#presets)
   * [Create your own preset](#create-preset)
   * [HTML Preset](#html-preset)
@@ -111,7 +109,7 @@ import {render} from 'react-dom'
 import bbobReactRender from '@bbob/react/es/render'
 import presetReact from '@bbob/preset-react'

-const options = { onlyAllowTags: ['i'], enableEscapeTags: true }
+const options = { onlyAllowTags: ['i'], enableEscapeTags: true, contextFreeTags: ['code'] }
 const content = bbobReactRender(`[i]Text[/i]`, presetReact(), options)

 console.log(render(<span>{content}</span>)); // <span><span style="font-style: italic;">Text</span></span>
@@ -156,6 +154,47 @@ Vue.use(VueBbob);
 ```
 More examples available in <a href="https://github.com/JiLiZART/BBob/tree/master/examples">examples folder</a>

+### Parse options <a name="parse-options"></a>
+
+#### onlyAllowTags
+
+Parse only allowed tags
+
+```js
+import bbobHTML from '@bbob/html'
+import presetHTML5 from '@bbob/preset-html5'
+
+const processed = bbobHTML(`[i][b]Text[/b][/i]`, presetHTML5(), { onlyAllowTags: ['i'] })
+
+console.log(processed); // <span style="font-style: italic;">[b]Text[/b]</span>
+```
+
+#### contextFreeTags
+
+Enable context free mode that ignores parsing all tags inside given tags
+
+```js
+import bbobHTML from '@bbob/html'
+import presetHTML5 from '@bbob/preset-html5'
+
+const processed = bbobHTML(`[b]Text[/b][code][b]Text[/b][/code]`, presetHTML5(), { contextFreeTags: ['code'] })
+
+console.log(processed); // <span style="font-weight: bold;">Text</span><pre>[b]Text[/b]</pre>
+```
+
+#### enableEscapeTags
+
+Enable escape support for tags
+
+```js
+import bbobHTML from '@bbob/html'
+import presetHTML5 from '@bbob/preset-html5'
+
+const processed = bbobHTML(`[b]Text[/b]'\\[b\\]Text\\[/b\\]'`, presetHTML5(), { enableEscapeTags: true })
+
+console.log(processed); // <span style="font-weight: bold;">Text</span>[b]Text[/b]
+```
+

 ### Presets <a name="basic"></a>

@@ -330,7 +369,7 @@ Tested on Node v12.18.3
 | regex/parser         | 6.02 ops/sec ±2.77%  | (20 runs sampled) |
 | ya-bbcode            | 10.70 ops/sec ±1.94% | (31 runs sampled) |
 | xbbcode/parser       | 107 ops/sec ±2.29%   | (69 runs sampled) |
-| @bbob/parser         | 137 ops/sec ±1.11%   | (78 runs sampled) |
+| @bbob/parser         | 140 ops/sec ±1.11%   | (78 runs sampled) |


 Developed with <3 using JetBrains
@@ -13,8 +13,8 @@
        "@rollup/plugin-node-resolve": "15.0.1",
        "@rollup/plugin-replace": "5.0.1",
        "@size-limit/preset-small-lib": "6.0.1",
-        "@swc/cli": "0.1.57",
-        "@swc/core": "1.3.16",
+        "@swc/cli": "^0.1.57",
+        "@swc/core": "^1.3.16",
        "@swc/jest": "0.2.23",
        "bundlesize2": "0.0.31",
        "cross-env": "7.0.3",
@@ -7326,14 +7326,20 @@
      }
    },
    "node_modules/caniuse-lite": {
-      "version": "1.0.30001283",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001283.tgz",
-      "integrity": "sha512-9RoKo841j1GQFSJz/nCXOj0sD7tHBtlowjYlrqIUS812x9/emfBLBt6IyMz1zIaYc/eRL8Cs6HPUVi2Hzq4sIg==",
+      "version": "1.0.30001441",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001441.tgz",
+      "integrity": "sha512-OyxRR4Vof59I3yGWXws6i908EtGbMzVUi3ganaZQHmydk1iwDhRnvaPG2WaR0KcqrDFKrxVZHULT396LEPhXfg==",
      "dev": true,
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/browserslist"
-      }
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/caniuse-lite"
+        }
+      ]
    },
    "node_modules/chalk": {
      "version": "4.1.2",
@@ -23840,9 +23846,9 @@
      }
    },
    "caniuse-lite": {
-      "version": "1.0.30001283",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001283.tgz",
-      "integrity": "sha512-9RoKo841j1GQFSJz/nCXOj0sD7tHBtlowjYlrqIUS812x9/emfBLBt6IyMz1zIaYc/eRL8Cs6HPUVi2Hzq4sIg==",
+      "version": "1.0.30001441",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001441.tgz",
+      "integrity": "sha512-OyxRR4Vof59I3yGWXws6i908EtGbMzVUi3ganaZQHmydk1iwDhRnvaPG2WaR0KcqrDFKrxVZHULT396LEPhXfg==",
      "dev": true
    },
    "chalk": {
@@ -12,19 +12,17 @@
        "@bbob/plugin-helper": "^2.8.3"
      }
    },
-    "../bbob-plugin-helper": {
-      "name": "@bbob/plugin-helper",
-      "version": "2.8.2",
-      "license": "MIT"
-    },
    "node_modules/@bbob/plugin-helper": {
-      "resolved": "../bbob-plugin-helper",
-      "link": true
+      "version": "2.8.3",
+      "resolved": "https://registry.npmjs.org/@bbob/plugin-helper/-/plugin-helper-2.8.3.tgz",
+      "integrity": "sha512-i1vVQZ7Ja5x6OLVyAXpwbTX/Id++wVJkve8q+wDhRHylW5/MJQqB6B6pZdGuFbyA5AQvUw2us8bsW0h4iZsDew=="
    }
  },
  "dependencies": {
    "@bbob/plugin-helper": {
-      "version": "file:../bbob-plugin-helper"
+      "version": "2.8.3",
+      "resolved": "https://registry.npmjs.org/@bbob/plugin-helper/-/plugin-helper-2.8.3.tgz",
+      "integrity": "sha512-i1vVQZ7Ja5x6OLVyAXpwbTX/Id++wVJkve8q+wDhRHylW5/MJQqB6B6pZdGuFbyA5AQvUw2us8bsW0h4iZsDew=="
    }
  }
 }
@@ -171,5 +171,6 @@ export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME;
 export const TYPE_ATTR_VALUE = TOKEN_TYPE_ATTR_VALUE;
 export const TYPE_SPACE = TOKEN_TYPE_SPACE;
 export const TYPE_NEW_LINE = TOKEN_TYPE_NEW_LINE;
+
 export { Token };
 export default Token;
@@ -58,16 +58,17 @@ function createLexer(buffer, options = {}) {
  let tokenIndex = -1;
  let stateMode = STATE_WORD;
  let tagMode = TAG_STATE_NAME;
+  let contextFreeTag = '';
  const tokens = new Array(Math.floor(buffer.length));
  const openTag = options.openTag || OPEN_BRAKET;
  const closeTag = options.closeTag || CLOSE_BRAKET;
  const escapeTags = !!options.enableEscapeTags;
+  const contextFreeTags = options.contextFreeTags || [];
  const onToken = options.onToken || (() => {
  });

  const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
  const NOT_CHAR_TOKENS = [
-    // ...(options.enableEscapeTags ? [BACKSLASH] : []),
    openTag, SPACE, TAB, N,
  ];
  const WHITESPACES = [SPACE, TAB];
@@ -86,6 +87,16 @@ function createLexer(buffer, options = {}) {

  const unq = (val) => unquote(trimChar(val, QUOTEMARK));

+  const checkContextFreeMode = (name, isClosingTag) => {
+    if (contextFreeTag !== '' && isClosingTag) {
+      contextFreeTag = '';
+    }
+
+    if (contextFreeTag === '' && contextFreeTags.includes(name)) {
+      contextFreeTag = name;
+    }
+  };
+
  const chars = createCharGrabber(buffer, { onSkip });

  /**
@@ -177,6 +188,7 @@ function createLexer(buffer, options = {}) {
    const name = tagChars.grabWhile(validName);

    emitToken(TYPE_TAG, name);
+    checkContextFreeMode(name);

    tagChars.skip();

@@ -192,41 +204,37 @@ function createLexer(buffer, options = {}) {

  function stateTag() {
    const currChar = chars.getCurr();
+    const nextChar = chars.getNext();

-    if (currChar === openTag) {
-      const nextChar = chars.getNext();
+    chars.skip();

-      chars.skip();
+    // detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
+    const substr = chars.substrUntilChar(closeTag);
+    const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;

-      // detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
-      const substr = chars.substrUntilChar(closeTag);
-      const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
+    if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
+      emitToken(TYPE_WORD, currChar);

-      if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
-        emitToken(TYPE_WORD, currChar);
-
-        return STATE_WORD;
-      }
-
-      // [myTag   ]
-      const isNoAttrsInTag = substr.indexOf(EQ) === -1;
-      // [/myTag]
-      const isClosingTag = substr[0] === SLASH;
-
-      if (isNoAttrsInTag || isClosingTag) {
-        const name = chars.grabWhile((char) => char !== closeTag);
-
-        chars.skip(); // skip closeTag
-
-        emitToken(TYPE_TAG, name);
-
-        return STATE_WORD;
-      }
-
-      return STATE_TAG_ATTRS;
+      return STATE_WORD;
    }

-    return STATE_WORD;
+    // [myTag   ]
+    const isNoAttrsInTag = substr.indexOf(EQ) === -1;
+    // [/myTag]
+    const isClosingTag = substr[0] === SLASH;
+
+    if (isNoAttrsInTag || isClosingTag) {
+      const name = chars.grabWhile((char) => char !== closeTag);
+
+      chars.skip(); // skip closeTag
+
+      emitToken(TYPE_TAG, name);
+      checkContextFreeMode(name, isClosingTag);
+
+      return STATE_WORD;
+    }
+
+    return STATE_TAG_ATTRS;
  }

  function stateAttrs() {
@@ -259,13 +267,24 @@ function createLexer(buffer, options = {}) {
    }

    if (isWhiteSpace(chars.getCurr())) {
-      emitToken(TYPE_SPACE, chars.grabWhile(isWhiteSpace));
+      const word = chars.grabWhile(isWhiteSpace);
+
+      emitToken(TYPE_SPACE, word);

      return STATE_WORD;
    }

    if (chars.getCurr() === openTag) {
-      if (chars.includes(closeTag)) {
+      if (contextFreeTag) {
+        const fullTagLen = openTag.length + SLASH.length + contextFreeTag.length;
+        const fullTagName = `${openTag}${SLASH}${contextFreeTag}`;
+        const foundTag = chars.grabN(fullTagLen);
+        const isEndContextFreeMode = foundTag === fullTagName;
+
+        if (isEndContextFreeMode) {
+          return STATE_TAG;
+        }
+      } else if (chars.includes(closeTag)) {
        return STATE_TAG;
      }

@@ -298,12 +317,16 @@ function createLexer(buffer, options = {}) {

      const isChar = (char) => isCharToken(char) && !isEscapeChar(char);

-      emitToken(TYPE_WORD, chars.grabWhile(isChar));
+      const word = chars.grabWhile(isChar);
+
+      emitToken(TYPE_WORD, word);

      return STATE_WORD;
    }

-    emitToken(TYPE_WORD, chars.grabWhile(isCharToken));
+    const word = chars.grabWhile(isCharToken);
+
+    emitToken(TYPE_WORD, word);

    return STATE_WORD;
  }
@@ -320,10 +343,8 @@ function createLexer(buffer, options = {}) {
          stateMode = stateAttrs();
          break;
        case STATE_WORD:
-          stateMode = stateWord();
-          break;
        default:
-          stateMode = STATE_WORD;
+          stateMode = stateWord();
          break;
      }
    }
@@ -10,9 +10,10 @@ import { createList } from './utils';
 * @param {Object} opts
 * @param {Function} opts.createTokenizer
 * @param {Array<string>} opts.onlyAllowTags
+ * @param {Array<string>} opts.contextFreeTags
+ * @param {Boolean} opts.enableEscapeTags
 * @param {String} opts.openTag
 * @param {String} opts.closeTag
- * @param {Boolean} opts.enableEscapeTags
 * @return {Array}
 */
 const parse = (input, opts = {}) => {
@@ -258,9 +259,10 @@ const parse = (input, opts = {}) => {

  tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, {
    onToken,
-    onlyAllowTags: options.onlyAllowTags,
    openTag,
    closeTag,
+    onlyAllowTags: options.onlyAllowTags,
+    contextFreeTags: options.contextFreeTags,
    enableEscapeTags: options.enableEscapeTags,
  });

@@ -13,7 +13,7 @@ function CharGrabber(source, options) {
    const { pos } = cursor;
    const idx = source.indexOf(char, pos);

-    return idx >= 0 ? source.substr(pos, idx - pos) : '';
+    return idx >= 0 ? source.substring(pos, idx) : '';
  };
  const includes = (val) => source.indexOf(val, cursor.pos) >= 0;
  const hasNext = () => cursor.len > cursor.pos;
@@ -25,7 +25,8 @@ function CharGrabber(source, options) {
      options.onSkip();
    }
  };
-  const rest = () => source.substr(cursor.pos);
+  const rest = () => source.substring(cursor.pos);
+  const grabN = (num = 0) => source.substring(cursor.pos, cursor.pos + num);
  const curr = () => source[cursor.pos];
  const prev = () => {
    const prevPos = cursor.pos - 1;
@@ -48,7 +49,7 @@ function CharGrabber(source, options) {
      }
    }

-    return source.substr(start, cursor.pos - start);
+    return source.substring(start, cursor.pos);
  };
  /**
   * @type {skip}
@@ -88,6 +89,11 @@ function CharGrabber(source, options) {
   * @return {String}
   */
  this.grabWhile = grabWhile;
+  /**
+   * @param {Number} num
+   * @return {String}
+   */
+  this.grabN = grabN;
  /**
   * Grabs rest of string until it find a char
   * @param {String} char
@@ -0,0 +1,9 @@
+import { TagNode } from "../src/index";
+
+describe('index', () => {
+  test('tag with content and params', () => {
+    const tagNode = TagNode.create('test', {test: 1}, ['Hello']);
+
+    expect(String(tagNode)).toBe('[test test="1"]Hello[/test]');
+  });
+})
@@ -14,6 +14,7 @@ const TYPE_NAMES = Object.fromEntries(Object.keys(TYPE).map(key => [TYPE[key], k

 const tokenize = input => (createLexer(input).tokenize());
 const tokenizeEscape = input => (createLexer(input, { enableEscapeTags: true }).tokenize());
+const tokenizeContextFreeTags = (input, tags = []) => (createLexer(input, { contextFreeTags: tags }).tokenize());

 describe('lexer', () => {
  expect.extend({
@@ -463,6 +464,24 @@ describe('lexer', () => {
    expect(tokens).toBeMantchOutput(output);
  });

+  test('context free tag [code]', () => {
+    const input = '[code] [b]some string[/b][/code]'
+    const tokens = tokenizeContextFreeTags(input, ['code']);
+    const output = [
+      [TYPE.TAG, 'code', 0, 0],
+      [TYPE.SPACE, ' ', 0, 0],
+      [TYPE.WORD, '[', 0, 0],
+      [TYPE.WORD, 'b]some', 0, 0],
+      [TYPE.SPACE, ' ', 0, 0],
+      [TYPE.WORD, 'string', 0, 0],
+      [TYPE.WORD, '[', 0, 0],
+      [TYPE.WORD, '/b]', 0, 0],
+      [TYPE.TAG, '/code', 0, 0],
+    ]
+
+    expect(tokens).toBeMantchOutput(output);
+  })
+
  test('bad closed tag with escaped backslash', () => {
    const input = `[b]test[\\b]`;
    const tokens = tokenizeEscape(input);
@@ -25,6 +25,23 @@ describe('Parser', () => {
    expectOutput(ast, output);
  });

+  test('parse paired tags tokens 2', () => {
+    const ast = parse('[bar]Foo Bar[/bar]');
+    const output = [
+      {
+        tag: 'bar',
+        attrs: {},
+        content: [
+          'Foo',
+          ' ',
+          'Bar',
+        ],
+      },
+    ];
+
+    expectOutput(ast, output);
+  });
+
  describe('onlyAllowTags', () => {
    test('parse only allowed tags', () => {
      const ast = parse('[h1 name=value]Foo [Bar] [/h1]', {
@@ -126,6 +143,31 @@ describe('Parser', () => {
    });
  })

+  describe('contextFreeTags', () => {
+    test('context free tag [code]', () => {
+      const ast = parse('[code] [b]some string[/b][/code]', {
+        contextFreeTags: ['code']
+      });
+      const output = [
+        {
+          tag: 'code',
+          attrs: {},
+          content: [
+              ' ',
+              '[',
+              'b]some',
+              ' ',
+              'string',
+              '[',
+              '/b]'
+          ]
+        }
+      ]
+
+      expectOutput(ast, output);
+    })
+  })
+
  test('parse inconsistent tags', () => {
    const ast = parse('[h1 name=value]Foo [Bar] /h1]');
    const output = [
@@ -13,23 +13,16 @@ const CLOSE_BRAKET = ']';
 const SLASH = '/';
 const BACKSLASH = '\\';

-const PLACEHOLDER_SPACE_TAB = '    ';
-const PLACEHOLDER_SPACE = ' ';
-
-// const getChar = String.fromCharCode;
-
 export {
  N,
  F,
  R,
-  TAB,
  EQ,
-  QUOTEMARK,
+  TAB,
  SPACE,
+  SLASH,
+  BACKSLASH,
+  QUOTEMARK,
  OPEN_BRAKET,
  CLOSE_BRAKET,
-  SLASH,
-  PLACEHOLDER_SPACE_TAB,
-  PLACEHOLDER_SPACE,
-  BACKSLASH,
 };