feat(#271): whitespaceInTags mode (#272)

* fix: test for buggy behavior * feat: implement whitespaceInTags mode * feat: move all char arrays to Map * feat: revert Map for char arrays
2026-06-20 20:00:33 +03:00 · 2025-03-09 02:23:03 +02:00
parent f0c9da683d
commit 0566241e23
5 changed files with 77 additions and 4 deletions
@@ -0,0 +1,29 @@
 ---
 "@bbob/parser": minor
 "@bbob/types": minor
 "@bbob/cli": minor
 "@bbob/core": minor
 "@bbob/html": minor
 "@bbob/plugin-helper": minor
 "@bbob/preset": minor
 "@bbob/preset-html5": minor
 "@bbob/preset-react": minor
 "@bbob/preset-vue": minor
 "@bbob/react": minor
 "@bbob/vue2": minor
 "@bbob/vue3": minor
 ---
 Added `whitespaceInTags` parsing option (true by default) with this option you can disable parsing `[tags with spaces]` it will be considered as text
 ```js
 import html5 from '@bbob/preset-html5'
 import parse from '@bbob/html'
 const html = parse('[b]lorem[/b] [foo bar] [i]ipsum[/i]', html5(), {
  whitespaceInTags: false
 })
 console.log(html) // <b>lorem </b> [foo bar] <i>ipsum</i>
 ```
@@ -67,7 +67,6 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
  const NOT_CHAR_TOKENS = [
    openTag, SPACE, TAB, N,
  ];
  const isCharReserved = (char: string) => (RESERVED_CHARS.indexOf(char) >= 0);
  const isCharToken = (char: string) => (NOT_CHAR_TOKENS.indexOf(char) === -1);
  const isEscapableChar = (char: string) => (char === openTag || char === closeTag || char === BACKSLASH);
@@ -198,13 +197,19 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
    const currChar = chars.getCurr();
    const nextChar = chars.getNext();
-    chars.skip();
+    chars.skip(); // skip openTag
    // detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
    const substr = chars.substrUntilChar(closeTag);
    const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
-    if ((nextChar && isCharReserved(nextChar)) || hasInvalidChars || chars.isLast()) {
+
    const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
    const isNextCharReserved = nextChar && isCharReserved(nextChar)
    const isLastChar = chars.isLast()
    const hasSpace = substr.indexOf(SPACE) >= 0;
    const isSpaceRestricted = hasSpace && options.whitespaceInTags === false;
    if (isNextCharReserved || hasInvalidChars || isLastChar || isSpaceRestricted) {
      emitToken(TYPE_WORD, currChar);
      return STATE_WORD;
@@ -321,6 +321,7 @@ function parse(input: string, opts: ParseOptions = {}) {
    contextFreeTags: options.contextFreeTags,
    caseFreeTags: options.caseFreeTags,
    enableEscapeTags: options.enableEscapeTags,
    whitespaceInTags: options.whitespaceInTags,
  });
  // eslint-disable-next-line no-unused-vars
@@ -869,6 +869,43 @@ sdfasdfasdf
    ]);
  });
  test('parse invalid tags', () => {
    const input = parse('[b]Press Release[/b] [statement redacted] [i]This is more content[/i]', {
      whitespaceInTags: false
    })
    expectOutput(input, [
      {
        tag: 'b',
        attrs: {},
        content: [
          'Press',
          ' ',
          'Release'
        ],
      },
      ' ',
      '[',
      'statement',
      ' ',
      'redacted]',
      ' ',
      {
        tag: 'i',
        attrs: {},
        content: [
          'This',
          ' ',
          'is',
          ' ',
          'more',
          ' ',
          'content'
        ],
      },
    ]);
  })
  describe('html', () => {
    const parseHTML = (input: string) => parse(input, { openTag: '<', closeTag: '>' });
@@ -32,6 +32,7 @@ export interface CommonOptions {
  onlyAllowTags?: string[];
  enableEscapeTags?: boolean;
  caseFreeTags?: boolean;
  whitespaceInTags?: boolean;
  contextFreeTags?: string[];
 }