From 0566241e2315cae0879ecb3ab467c83e99f0cc49 Mon Sep 17 00:00:00 2001
From: Nikolay Kost <JiLiZART@Gmail.com>
Date: Sun, 9 Mar 2025 02:23:03 +0200
Subject: [PATCH] feat(#271): whitespaceInTags mode (#272)

* fix: test for buggy behavior

* feat: implement whitespaceInTags mode

* feat: move all char arrays to Map

* feat: revert Map for char arrays
---
 .changeset/tiny-dolls-raise.md          | 29 +++++++++++++++++++
 packages/bbob-parser/src/lexer.ts       | 13 ++++++---
 packages/bbob-parser/src/parse.ts       |  1 +
 packages/bbob-parser/test/parse.test.ts | 37 +++++++++++++++++++++++++
 packages/bbob-types/src/parser.ts       |  1 +
 5 files changed, 77 insertions(+), 4 deletions(-)
 create mode 100644 .changeset/tiny-dolls-raise.md
diff --git a/.changeset/tiny-dolls-raise.md b/.changeset/tiny-dolls-raise.md
new file mode 100644
index 0000000..3ccfa5a
--- /dev/null
+++ b/.changeset/tiny-dolls-raise.md
@@ -0,0 +1,29 @@
+---
+"@bbob/parser": minor
+"@bbob/types": minor
+"@bbob/cli": minor
+"@bbob/core": minor
+"@bbob/html": minor
+"@bbob/plugin-helper": minor
+"@bbob/preset": minor
+"@bbob/preset-html5": minor
+"@bbob/preset-react": minor
+"@bbob/preset-vue": minor
+"@bbob/react": minor
+"@bbob/vue2": minor
+"@bbob/vue3": minor
+---
+
+Added `whitespaceInTags` parsing option (true by default) with this option you can disable parsing `[tags with spaces]` it will be considered as text
+
+```js
+import html5 from '@bbob/preset-html5'
+import parse from '@bbob/html'
+
+const html = parse('[b]lorem[/b] [foo bar] [i]ipsum[/i]', html5(), {
+  whitespaceInTags: false
+})
+
+console.log(html) // <b>lorem </b> [foo bar] <i>ipsum</i>
+```
+
diff --git a/packages/bbob-parser/src/lexer.ts b/packages/bbob-parser/src/lexer.ts
index c57ba7e..d5d1ce7 100644
--- a/packages/bbob-parser/src/lexer.ts
+++ b/packages/bbob-parser/src/lexer.ts
@@ -67,7 +67,6 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
   const NOT_CHAR_TOKENS = [
     openTag, SPACE, TAB, N,
   ];
-
   const isCharReserved = (char: string) => (RESERVED_CHARS.indexOf(char) >= 0);
   const isCharToken = (char: string) => (NOT_CHAR_TOKENS.indexOf(char) === -1);
   const isEscapableChar = (char: string) => (char === openTag || char === closeTag || char === BACKSLASH);
@@ -198,13 +197,19 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
     const currChar = chars.getCurr();
     const nextChar = chars.getNext();
 
-    chars.skip();
+    chars.skip(); // skip openTag
 
     // detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
     const substr = chars.substrUntilChar(closeTag);
-    const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
 
-    if ((nextChar && isCharReserved(nextChar)) || hasInvalidChars || chars.isLast()) {
+
+    const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
+    const isNextCharReserved = nextChar && isCharReserved(nextChar)
+    const isLastChar = chars.isLast()
+    const hasSpace = substr.indexOf(SPACE) >= 0;
+    const isSpaceRestricted = hasSpace && options.whitespaceInTags === false;
+
+    if (isNextCharReserved || hasInvalidChars || isLastChar || isSpaceRestricted) {
       emitToken(TYPE_WORD, currChar);
 
       return STATE_WORD;
diff --git a/packages/bbob-parser/src/parse.ts b/packages/bbob-parser/src/parse.ts
index 1217614..7e77dbd 100644
--- a/packages/bbob-parser/src/parse.ts
+++ b/packages/bbob-parser/src/parse.ts
@@ -321,6 +321,7 @@ function parse(input: string, opts: ParseOptions = {}) {
     contextFreeTags: options.contextFreeTags,
     caseFreeTags: options.caseFreeTags,
     enableEscapeTags: options.enableEscapeTags,
+    whitespaceInTags: options.whitespaceInTags,
   });
 
   // eslint-disable-next-line no-unused-vars
diff --git a/packages/bbob-parser/test/parse.test.ts b/packages/bbob-parser/test/parse.test.ts
index ec558ca..ca4dca6 100644
--- a/packages/bbob-parser/test/parse.test.ts
+++ b/packages/bbob-parser/test/parse.test.ts
@@ -869,6 +869,43 @@ sdfasdfasdf
     ]);
   });
 
+  test('parse invalid tags', () => {
+    const input = parse('[b]Press Release[/b] [statement redacted] [i]This is more content[/i]', {
+      whitespaceInTags: false
+    })
+
+    expectOutput(input, [
+      {
+        tag: 'b',
+        attrs: {},
+        content: [
+          'Press',
+          ' ',
+          'Release'
+        ],
+      },
+      ' ',
+      '[',
+      'statement',
+      ' ',
+      'redacted]',
+      ' ',
+      {
+        tag: 'i',
+        attrs: {},
+        content: [
+          'This',
+          ' ',
+          'is',
+          ' ',
+          'more',
+          ' ',
+          'content'
+        ],
+      },
+    ]);
+  })
+
   describe('html', () => {
     const parseHTML = (input: string) => parse(input, { openTag: '<', closeTag: '>' });
 
diff --git a/packages/bbob-types/src/parser.ts b/packages/bbob-types/src/parser.ts
index c845dea..6c7463f 100644
--- a/packages/bbob-types/src/parser.ts
+++ b/packages/bbob-types/src/parser.ts
@@ -32,6 +32,7 @@ export interface CommonOptions {
   onlyAllowTags?: string[];
   enableEscapeTags?: boolean;
   caseFreeTags?: boolean;
+  whitespaceInTags?: boolean;
   contextFreeTags?: string[];
 }