feat(parser): better handlinf of unclosed tags like '[My unclosed and [closed] tag'

2026-05-15 11:59:37 +03:00 · 2018-09-24 00:33:27 +02:00
parent 505152bf4c
commit b49b7435da
5 changed files with 79 additions and 21 deletions
@@ -16,12 +16,12 @@ npm i @bbob/parser
 ### API

 ```js
-import parse from '@bbob/parser'
+import { parse } from '@bbob/parser'

 const options = {
    onlyAllowTags: ['url', 'h'],
    onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
-}
+};
 const ast = parse('[url=https://github.com]hello world![/url]', options)
 ```

@@ -43,12 +43,13 @@ const ast = parse('[url=https://github.com]hello world![/url]', options)

 ```js
 import render from 'posthtml-render'
-import parse from '@bbob/parser'
+import { parse } from '@bbob/parser'

 const options = {
    onlyAllowTags: ['url', 'h'],
    onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
-}
-const ast = parse('[url=https://github.com]hello world![/url]', options)
+};
+
+const ast = parse('[url=https://github.com]hello world![/url]', options);
 const html = render(ast) // <url url="https://github.com">hello world!</url>
 ```
@@ -1 +1,2 @@
-export { parse, createTagNode } from './parse';
+export { default, parse } from './parse';
+export { TagNode } from '@bbob/plugin-helper/lib/TagNode';
@@ -22,6 +22,7 @@ const createCharGrabber = (source) => {
    idx += 1;
  };
  const hasNext = () => source.length > idx;
+  const getRest = () => source.substr(idx);

  return {
    skip,
@@ -39,6 +40,20 @@ const createCharGrabber = (source) => {
    getNext: () => source[idx + 1],
    getPrev: () => source[idx - 1],
    getCurr: () => source[idx],
+    moveIdxTo: (val) => {
+      idx += val;
+    },
+    getRest,
+    substrUntilChar: (char) => {
+      const restStr = getRest();
+      const indexOfChar = restStr.indexOf(char);
+
+      if (indexOfChar >= 0) {
+        return restStr.substr(0, indexOfChar);
+      }
+
+      return '';
+    },
  };
 };

@@ -75,6 +90,7 @@ function createLexer(buffer, options = {}) {
  const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
  const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
  const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
+  const isNotValidCharInTag = char => ([openTag].indexOf(char) >= 0);

  const emitToken = (token) => {
    if (options.onToken) {
@@ -159,15 +175,22 @@ function createLexer(buffer, options = {}) {
      emitToken(createToken(TYPE_SPACE, str, row, col));
    } else if (char === openTag) {
      const nextChar = bufferGrabber.getNext();
-      bufferGrabber.skip(); // skip [
+      bufferGrabber.skip(); // skip openTag

-      if (isCharReserved(nextChar)) {
+      // detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
+      const substr = bufferGrabber.substrUntilChar(closeTag);
+      const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
+
+      if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
        emitToken(createToken(TYPE_WORD, char, row, col));
      } else {
        const str = bufferGrabber.grabWhile(val => val !== closeTag);
-        bufferGrabber.skip(); // skip ]

-        if (!(str.indexOf(EQ) > 0) || str[0] === SLASH) {
+        bufferGrabber.skip(); // skip closeTag
+        const isNoAttrsInTag = str.indexOf(EQ) === -1;
+        const isClosingTag = str[0] === SLASH;
+
+        if (isNoAttrsInTag || isClosingTag) {
          emitToken(createToken(TYPE_TAG, str, row, col));
        } else {
          const parsed = parseAttrs(str);
@@ -239,5 +239,5 @@ const parse = (input, opts = {}) => {
  return nodes;
 };

-export { createTagNode, parse };
+export { parse };
 export default parse;
@@ -217,34 +217,67 @@ describe('lexer', () => {
        [TYPE.TAG, '/y', '0', '0']
      ],
      [
-        [TYPE.TAG, 'sc', '0', '0']
+        [TYPE.WORD, '[', '0', '0'],
+        [TYPE.WORD, 'sc', '0', '0']
      ],
      [
-        [TYPE.TAG, 'sc / [/sc', '0', '0']
+        // [sc /
+        [TYPE.WORD, '[', '0', '0'],
+        [TYPE.WORD, 'sc', '0', '0'],
+        [TYPE.SPACE, ' ', '0', '0'],
+        [TYPE.WORD, '/', '0', '0'],
+        [TYPE.SPACE, ' ', '0', '0'],
+        [TYPE.TAG, '/sc', '0', '0']
      ],
      [
-        [TYPE.TAG, 'sc', '0', '0'],
-        [TYPE.ATTR_NAME, 'arg', '0', '0'],
-        [TYPE.ATTR_VALUE, 'val', '0', '0']
+        [TYPE.WORD, '[', '0', '0'],
+        [TYPE.WORD, 'sc', '0', '0'],
+        [TYPE.SPACE, ' ', '0', '0'],
+        [TYPE.WORD, 'arg="val', '0', '0'],
      ]
    ];

    inputs.forEach((input, idx) => {
      const tokens = tokenize(input);
+      const output = asserts[idx];

-      expectOutput(asserts[idx], tokens);
+      expectOutput(output, tokens);
    });
  });

-/*
+
  test('bad unclosed tag', () => {
-    const input = `[Finger tapping; R.H. = Right Hand) Part A [Finger tapping (Right hand -15-, -16-)]`;
+    const input = `[Finger Part A [Finger]`;
    const tokens = tokenize(input);
-    const output = [];
+    const output = [
+      [TYPE.WORD, '[', '0', '0'],
+      [TYPE.WORD, 'Finger', '0', '0'],
+      [TYPE.SPACE, ' ', '0', '0'],
+      [TYPE.WORD, 'Part', '0', '0'],
+      [TYPE.SPACE, ' ', '0', '0'],
+      [TYPE.WORD, 'A', '0', '0'],
+      [TYPE.SPACE, ' ', '0', '0'],
+      [TYPE.TAG, 'Finger', '0', '0']
+    ];

    expectOutput(output, tokens);
  });
-*/
+
+  test('no close tag', () => {
+    const input = '[Finger Part A';
+    const tokens = tokenize(input);
+    const output = [
+      [TYPE.WORD, '[', '0', '0'],
+      [TYPE.WORD, 'Finger', '0', '0'],
+      [TYPE.SPACE, ' ', '0', '0'],
+      [TYPE.WORD, 'Part', '0', '0'],
+      [TYPE.SPACE, ' ', '0', '0'],
+      [TYPE.WORD, 'A', '0', '0'],
+    ];
+
+    expectOutput(output, tokens);
+  });
+

  describe('html', () => {
    const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();