feat(#271): whitespaceInTags mode (#272)

* fix: test for buggy behavior * feat: implement whitespaceInTags mode * feat: move all char arrays to Map * feat: revert Map for char arrays
2026-05-15 11:59:37 +03:00 · 2025-03-09 02:23:03 +02:00
parent f0c9da683d
commit 0566241e23
5 changed files with 77 additions and 4 deletions
@@ -0,0 +1,29 @@
+---
+"@bbob/parser": minor
+"@bbob/types": minor
+"@bbob/cli": minor
+"@bbob/core": minor
+"@bbob/html": minor
+"@bbob/plugin-helper": minor
+"@bbob/preset": minor
+"@bbob/preset-html5": minor
+"@bbob/preset-react": minor
+"@bbob/preset-vue": minor
+"@bbob/react": minor
+"@bbob/vue2": minor
+"@bbob/vue3": minor
+---
+
+Added `whitespaceInTags` parsing option (true by default) with this option you can disable parsing `[tags with spaces]` it will be considered as text
+
+```js
+import html5 from '@bbob/preset-html5'
+import parse from '@bbob/html'
+
+const html = parse('[b]lorem[/b] [foo bar] [i]ipsum[/i]', html5(), {
+  whitespaceInTags: false
+})
+
+console.log(html) // <b>lorem </b> [foo bar] <i>ipsum</i>
+```
+
@@ -67,7 +67,6 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
  const NOT_CHAR_TOKENS = [
    openTag, SPACE, TAB, N,
  ];
-
  const isCharReserved = (char: string) => (RESERVED_CHARS.indexOf(char) >= 0);
  const isCharToken = (char: string) => (NOT_CHAR_TOKENS.indexOf(char) === -1);
  const isEscapableChar = (char: string) => (char === openTag || char === closeTag || char === BACKSLASH);
@@ -198,13 +197,19 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
    const currChar = chars.getCurr();
    const nextChar = chars.getNext();

-    chars.skip();
+    chars.skip(); // skip openTag

    // detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
    const substr = chars.substrUntilChar(closeTag);
-    const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;

-    if ((nextChar && isCharReserved(nextChar)) || hasInvalidChars || chars.isLast()) {
+
+    const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
+    const isNextCharReserved = nextChar && isCharReserved(nextChar)
+    const isLastChar = chars.isLast()
+    const hasSpace = substr.indexOf(SPACE) >= 0;
+    const isSpaceRestricted = hasSpace && options.whitespaceInTags === false;
+
+    if (isNextCharReserved || hasInvalidChars || isLastChar || isSpaceRestricted) {
      emitToken(TYPE_WORD, currChar);

      return STATE_WORD;
@@ -321,6 +321,7 @@ function parse(input: string, opts: ParseOptions = {}) {
    contextFreeTags: options.contextFreeTags,
    caseFreeTags: options.caseFreeTags,
    enableEscapeTags: options.enableEscapeTags,
+    whitespaceInTags: options.whitespaceInTags,
  });

  // eslint-disable-next-line no-unused-vars
@@ -869,6 +869,43 @@ sdfasdfasdf
    ]);
  });

+  test('parse invalid tags', () => {
+    const input = parse('[b]Press Release[/b] [statement redacted] [i]This is more content[/i]', {
+      whitespaceInTags: false
+    })
+
+    expectOutput(input, [
+      {
+        tag: 'b',
+        attrs: {},
+        content: [
+          'Press',
+          ' ',
+          'Release'
+        ],
+      },
+      ' ',
+      '[',
+      'statement',
+      ' ',
+      'redacted]',
+      ' ',
+      {
+        tag: 'i',
+        attrs: {},
+        content: [
+          'This',
+          ' ',
+          'is',
+          ' ',
+          'more',
+          ' ',
+          'content'
+        ],
+      },
+    ]);
+  })
+
  describe('html', () => {
    const parseHTML = (input: string) => parse(input, { openTag: '<', closeTag: '>' });

@@ -32,6 +32,7 @@ export interface CommonOptions {
  onlyAllowTags?: string[];
  enableEscapeTags?: boolean;
  caseFreeTags?: boolean;
+  whitespaceInTags?: boolean;
  contextFreeTags?: string[];
 }