2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00

feat(parser): better handlinf of unclosed tags like '[My unclosed and [closed] tag'

This commit is contained in:
Nikolay Kostyurin
2018-09-24 00:33:27 +02:00
parent 505152bf4c
commit b49b7435da
5 changed files with 79 additions and 21 deletions
+6 -5
View File
@@ -16,12 +16,12 @@ npm i @bbob/parser
### API
```js
import parse from '@bbob/parser'
import { parse } from '@bbob/parser'
const options = {
onlyAllowTags: ['url', 'h'],
onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
}
};
const ast = parse('[url=https://github.com]hello world![/url]', options)
```
@@ -43,12 +43,13 @@ const ast = parse('[url=https://github.com]hello world![/url]', options)
```js
import render from 'posthtml-render'
import parse from '@bbob/parser'
import { parse } from '@bbob/parser'
const options = {
onlyAllowTags: ['url', 'h'],
onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
}
const ast = parse('[url=https://github.com]hello world![/url]', options)
};
const ast = parse('[url=https://github.com]hello world![/url]', options);
const html = render(ast) // <url url="https://github.com">hello world!</url>
```
+2 -1
View File
@@ -1 +1,2 @@
export { parse, createTagNode } from './parse';
export { default, parse } from './parse';
export { TagNode } from '@bbob/plugin-helper/lib/TagNode';
+27 -4
View File
@@ -22,6 +22,7 @@ const createCharGrabber = (source) => {
idx += 1;
};
const hasNext = () => source.length > idx;
const getRest = () => source.substr(idx);
return {
skip,
@@ -39,6 +40,20 @@ const createCharGrabber = (source) => {
getNext: () => source[idx + 1],
getPrev: () => source[idx - 1],
getCurr: () => source[idx],
moveIdxTo: (val) => {
idx += val;
},
getRest,
substrUntilChar: (char) => {
const restStr = getRest();
const indexOfChar = restStr.indexOf(char);
if (indexOfChar >= 0) {
return restStr.substr(0, indexOfChar);
}
return '';
},
};
};
@@ -75,6 +90,7 @@ function createLexer(buffer, options = {}) {
const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
const isNotValidCharInTag = char => ([openTag].indexOf(char) >= 0);
const emitToken = (token) => {
if (options.onToken) {
@@ -159,15 +175,22 @@ function createLexer(buffer, options = {}) {
emitToken(createToken(TYPE_SPACE, str, row, col));
} else if (char === openTag) {
const nextChar = bufferGrabber.getNext();
bufferGrabber.skip(); // skip [
bufferGrabber.skip(); // skip openTag
if (isCharReserved(nextChar)) {
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = bufferGrabber.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
emitToken(createToken(TYPE_WORD, char, row, col));
} else {
const str = bufferGrabber.grabWhile(val => val !== closeTag);
bufferGrabber.skip(); // skip ]
if (!(str.indexOf(EQ) > 0) || str[0] === SLASH) {
bufferGrabber.skip(); // skip closeTag
const isNoAttrsInTag = str.indexOf(EQ) === -1;
const isClosingTag = str[0] === SLASH;
if (isNoAttrsInTag || isClosingTag) {
emitToken(createToken(TYPE_TAG, str, row, col));
} else {
const parsed = parseAttrs(str);
+1 -1
View File
@@ -239,5 +239,5 @@ const parse = (input, opts = {}) => {
return nodes;
};
export { createTagNode, parse };
export { parse };
export default parse;
+43 -10
View File
@@ -217,34 +217,67 @@ describe('lexer', () => {
[TYPE.TAG, '/y', '0', '0']
],
[
[TYPE.TAG, 'sc', '0', '0']
[TYPE.WORD, '[', '0', '0'],
[TYPE.WORD, 'sc', '0', '0']
],
[
[TYPE.TAG, 'sc / [/sc', '0', '0']
// [sc /
[TYPE.WORD, '[', '0', '0'],
[TYPE.WORD, 'sc', '0', '0'],
[TYPE.SPACE, ' ', '0', '0'],
[TYPE.WORD, '/', '0', '0'],
[TYPE.SPACE, ' ', '0', '0'],
[TYPE.TAG, '/sc', '0', '0']
],
[
[TYPE.TAG, 'sc', '0', '0'],
[TYPE.ATTR_NAME, 'arg', '0', '0'],
[TYPE.ATTR_VALUE, 'val', '0', '0']
[TYPE.WORD, '[', '0', '0'],
[TYPE.WORD, 'sc', '0', '0'],
[TYPE.SPACE, ' ', '0', '0'],
[TYPE.WORD, 'arg="val', '0', '0'],
]
];
inputs.forEach((input, idx) => {
const tokens = tokenize(input);
const output = asserts[idx];
expectOutput(asserts[idx], tokens);
expectOutput(output, tokens);
});
});
/*
test('bad unclosed tag', () => {
const input = `[Finger tapping; R.H. = Right Hand) Part A [Finger tapping (Right hand -15-, -16-)]`;
const input = `[Finger Part A [Finger]`;
const tokens = tokenize(input);
const output = [];
const output = [
[TYPE.WORD, '[', '0', '0'],
[TYPE.WORD, 'Finger', '0', '0'],
[TYPE.SPACE, ' ', '0', '0'],
[TYPE.WORD, 'Part', '0', '0'],
[TYPE.SPACE, ' ', '0', '0'],
[TYPE.WORD, 'A', '0', '0'],
[TYPE.SPACE, ' ', '0', '0'],
[TYPE.TAG, 'Finger', '0', '0']
];
expectOutput(output, tokens);
});
*/
test('no close tag', () => {
const input = '[Finger Part A';
const tokens = tokenize(input);
const output = [
[TYPE.WORD, '[', '0', '0'],
[TYPE.WORD, 'Finger', '0', '0'],
[TYPE.SPACE, ' ', '0', '0'],
[TYPE.WORD, 'Part', '0', '0'],
[TYPE.SPACE, ' ', '0', '0'],
[TYPE.WORD, 'A', '0', '0'],
];
expectOutput(output, tokens);
});
describe('html', () => {
const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();