feat(parser): better handlinf of unclosed tags like '[My unclosed and [closed] tag'

2026-06-20 20:00:33 +03:00 · 2018-09-24 00:33:27 +02:00
parent 505152bf4c
commit b49b7435da
5 changed files with 79 additions and 21 deletions
@@ -16,12 +16,12 @@ npm i @bbob/parser
 ### API
 ```js
-import parse from '@bbob/parser'
+import { parse } from '@bbob/parser'
 const options = {
    onlyAllowTags: ['url', 'h'],
    onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
-}
+};
 const ast = parse('[url=https://github.com]hello world![/url]', options)
 ```
@@ -43,12 +43,13 @@ const ast = parse('[url=https://github.com]hello world![/url]', options)
 ```js
 import render from 'posthtml-render'
-import parse from '@bbob/parser'
+import { parse } from '@bbob/parser'
 const options = {
    onlyAllowTags: ['url', 'h'],
    onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
-}
+};
-const ast = parse('[url=https://github.com]hello world![/url]', options)
+
 const ast = parse('[url=https://github.com]hello world![/url]', options);
 const html = render(ast) // <url url="https://github.com">hello world!</url>
 ```
@@ -1 +1,2 @@
-export { parse, createTagNode } from './parse';
+export { default, parse } from './parse';
 export { TagNode } from '@bbob/plugin-helper/lib/TagNode';
@@ -22,6 +22,7 @@ const createCharGrabber = (source) => {
    idx += 1;
  };
  const hasNext = () => source.length > idx;
  const getRest = () => source.substr(idx);
  return {
    skip,
@@ -39,6 +40,20 @@ const createCharGrabber = (source) => {
    getNext: () => source[idx + 1],
    getPrev: () => source[idx - 1],
    getCurr: () => source[idx],
    moveIdxTo: (val) => {
      idx += val;
    },
    getRest,
    substrUntilChar: (char) => {
      const restStr = getRest();
      const indexOfChar = restStr.indexOf(char);
      if (indexOfChar >= 0) {
        return restStr.substr(0, indexOfChar);
      }
      return '';
    },
  };
 };
@@ -75,6 +90,7 @@ function createLexer(buffer, options = {}) {
  const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
  const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
  const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
  const isNotValidCharInTag = char => ([openTag].indexOf(char) >= 0);
  const emitToken = (token) => {
    if (options.onToken) {
@@ -159,15 +175,22 @@ function createLexer(buffer, options = {}) {
      emitToken(createToken(TYPE_SPACE, str, row, col));
    } else if (char === openTag) {
      const nextChar = bufferGrabber.getNext();
-      bufferGrabber.skip(); // skip [
+      bufferGrabber.skip(); // skip openTag
-      if (isCharReserved(nextChar)) {
+      // detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
      const substr = bufferGrabber.substrUntilChar(closeTag);
      const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
      if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
        emitToken(createToken(TYPE_WORD, char, row, col));
      } else {
        const str = bufferGrabber.grabWhile(val => val !== closeTag);
        bufferGrabber.skip(); // skip ]
-        if (!(str.indexOf(EQ) > 0) || str[0] === SLASH) {
+        bufferGrabber.skip(); // skip closeTag
        const isNoAttrsInTag = str.indexOf(EQ) === -1;
        const isClosingTag = str[0] === SLASH;
        if (isNoAttrsInTag || isClosingTag) {
          emitToken(createToken(TYPE_TAG, str, row, col));
        } else {
          const parsed = parseAttrs(str);
@@ -239,5 +239,5 @@ const parse = (input, opts = {}) => {
  return nodes;
 };
-export { createTagNode, parse };
+export { parse };
 export default parse;
@@ -217,34 +217,67 @@ describe('lexer', () => {
        [TYPE.TAG, '/y', '0', '0']
      ],
      [
-        [TYPE.TAG, 'sc', '0', '0']
+        [TYPE.WORD, '[', '0', '0'],
        [TYPE.WORD, 'sc', '0', '0']
      ],
      [
-        [TYPE.TAG, 'sc / [/sc', '0', '0']
+        // [sc /
        [TYPE.WORD, '[', '0', '0'],
        [TYPE.WORD, 'sc', '0', '0'],
        [TYPE.SPACE, ' ', '0', '0'],
        [TYPE.WORD, '/', '0', '0'],
        [TYPE.SPACE, ' ', '0', '0'],
        [TYPE.TAG, '/sc', '0', '0']
      ],
      [
-        [TYPE.TAG, 'sc', '0', '0'],
+        [TYPE.WORD, '[', '0', '0'],
-        [TYPE.ATTR_NAME, 'arg', '0', '0'],
+        [TYPE.WORD, 'sc', '0', '0'],
-        [TYPE.ATTR_VALUE, 'val', '0', '0']
+        [TYPE.SPACE, ' ', '0', '0'],
        [TYPE.WORD, 'arg="val', '0', '0'],
      ]
    ];
    inputs.forEach((input, idx) => {
      const tokens = tokenize(input);
      const output = asserts[idx];
-      expectOutput(asserts[idx], tokens);
+      expectOutput(output, tokens);
    });
  });
-/*
+
  test('bad unclosed tag', () => {
-    const input = `[Finger tapping; R.H. = Right Hand) Part A [Finger tapping (Right hand -15-, -16-)]`;
+    const input = `[Finger Part A [Finger]`;
    const tokens = tokenize(input);
-    const output = [];
+    const output = [
      [TYPE.WORD, '[', '0', '0'],
      [TYPE.WORD, 'Finger', '0', '0'],
      [TYPE.SPACE, ' ', '0', '0'],
      [TYPE.WORD, 'Part', '0', '0'],
      [TYPE.SPACE, ' ', '0', '0'],
      [TYPE.WORD, 'A', '0', '0'],
      [TYPE.SPACE, ' ', '0', '0'],
      [TYPE.TAG, 'Finger', '0', '0']
    ];
    expectOutput(output, tokens);
  });
-*/
+
  test('no close tag', () => {
    const input = '[Finger Part A';
    const tokens = tokenize(input);
    const output = [
      [TYPE.WORD, '[', '0', '0'],
      [TYPE.WORD, 'Finger', '0', '0'],
      [TYPE.SPACE, ' ', '0', '0'],
      [TYPE.WORD, 'Part', '0', '0'],
      [TYPE.SPACE, ' ', '0', '0'],
      [TYPE.WORD, 'A', '0', '0'],
    ];
    expectOutput(output, tokens);
  });
  describe('html', () => {
    const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();
`@@ -1 +1,2 @@`
	`export { parse, createTagNode } from './parse';`	`export { default, parse } from './parse';`
		`export { TagNode } from '@bbob/plugin-helper/lib/TagNode';`