fix(parser): infinity loop problem when escape [\b] (#31)

With enableEscapeTags: true, when trying to write [b]test[\b] page is crashed. Fixes #23
2026-05-15 11:59:37 +03:00 · 2019-06-30 11:15:10 +02:00
parent 3d5c1f19d5
commit b4cf27127f
2 changed files with 34 additions and 16 deletions
@@ -49,10 +49,11 @@ function createLexer(buffer, options = {}) {
  const tokens = new Array(Math.floor(buffer.length));
  const openTag = options.openTag || OPEN_BRAKET;
  const closeTag = options.closeTag || CLOSE_BRAKET;
+  const escapeTags = options.enableEscapeTags;

  const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
  const NOT_CHAR_TOKENS = [
-    ...(options.enableEscapeTags ? [BACKSLASH] : []),
+    // ...(options.enableEscapeTags ? [BACKSLASH] : []),
    openTag, SPACE, TAB, N,
  ];
  const WHITESPACES = [SPACE, TAB];
@@ -62,6 +63,8 @@ function createLexer(buffer, options = {}) {
  const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
  const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
  const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
+  const isEscapableChar = char => (char === openTag || char === closeTag || char === BACKSLASH);
+  const isEscapeChar = char => char === BACKSLASH;

  /**
   * Emits newly created token to subscriber
@@ -158,14 +161,9 @@ function createLexer(buffer, options = {}) {
    } else if (isWhiteSpace(currChar)) {
      const str = bufferGrabber.grabWhile(isWhiteSpace);
      emitToken(createToken(TYPE_SPACE, str, row, col));
-    } else if (options.enableEscapeTags && currChar === BACKSLASH
-               && (nextChar === openTag || nextChar === closeTag)) {
+    } else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) {
      bufferGrabber.skip(); // skip the \ without emitting anything
-      bufferGrabber.skip(); // skip past the [ or ] as well
-      emitToken(createToken(TYPE_WORD, nextChar, row, col));
-    } else if (options.enableEscapeTags && currChar === BACKSLASH && nextChar === BACKSLASH) {
-      bufferGrabber.skip(); // skip the first \ without emitting anything
-      bufferGrabber.skip(); // skip past the second \ and emit it
+      bufferGrabber.skip(); // skip past the [, ] or \ as well
      emitToken(createToken(TYPE_WORD, nextChar, row, col));
    } else if (currChar === openTag) {
      bufferGrabber.skip(); // skip openTag
@@ -200,9 +198,19 @@ function createLexer(buffer, options = {}) {

      emitToken(createToken(TYPE_WORD, currChar, row, col));
    } else if (isCharToken(currChar)) {
-      const str = bufferGrabber.grabWhile(isCharToken);
+      if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) {
+        bufferGrabber.skip();
+        emitToken(createToken(TYPE_WORD, currChar, row, col));
+      } else {
+        const str = bufferGrabber.grabWhile((char) => {
+          if (escapeTags) {
+            return isCharToken(char) && !isEscapeChar(char);
+          }
+          return isCharToken(char);
+        });

-      emitToken(createToken(TYPE_WORD, str, row, col));
+        emitToken(createToken(TYPE_WORD, str, row, col));
+      }
    }
  };

@@ -11,6 +11,7 @@ const TYPE = {
 };

 const tokenize = input => (createLexer(input).tokenize());
+const tokenizeEscape = input => (createLexer(input, { enableEscapeTags: true }).tokenize());

 describe('lexer', () => {
  const expectOutput = (output, tokens) => {
@@ -289,11 +290,9 @@ describe('lexer', () => {
  });

  test('escaped tag', () => {
-    const tokenizeEscape = input => (createLexer(input, {
-      enableEscapeTags: true
-    }).tokenize());
    const input = '\\[b\\]test\\[';
    const tokens = tokenizeEscape(input);
+
    const output = [
      [TYPE.WORD, '[', '0', '0'],
      [TYPE.WORD, 'b', '0', '0'],
@@ -306,9 +305,6 @@ describe('lexer', () => {
  });

  test('escaped tag and escaped backslash', () => {
-    const tokenizeEscape = input => (createLexer(input, {
-      enableEscapeTags: true
-    }).tokenize());
    const input = '\\\\\\[b\\\\\\]test\\\\\\[/b\\\\\\]';
    const tokens = tokenizeEscape(input);
    const output = [
@@ -328,6 +324,20 @@ describe('lexer', () => {
    expectOutput(output, tokens);
  });

+  test('bad closed tag with escaped backslash', () => {
+    const input = `[b]test[\\b]`;
+    const tokens = tokenizeEscape(input);
+    const output = [
+      [TYPE.TAG, 'b', '0', '3'],
+      [TYPE.WORD, 'test', '0', '7'],
+      [TYPE.WORD, '[', '0', '8'],
+      [TYPE.WORD, '\\', '0', '9'],
+      [TYPE.WORD, 'b]', '0', '11'],
+    ];
+
+    expectOutput(output, tokens);
+  });
+
  describe('html', () => {
    const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();