mirror of
https://github.com/tenrok/BBob.git
synced 2026-05-15 11:59:37 +03:00
feat(parser): better handlinf of unclosed tags like '[My unclosed and [closed] tag'
This commit is contained in:
@@ -16,12 +16,12 @@ npm i @bbob/parser
|
||||
### API
|
||||
|
||||
```js
|
||||
import parse from '@bbob/parser'
|
||||
import { parse } from '@bbob/parser'
|
||||
|
||||
const options = {
|
||||
onlyAllowTags: ['url', 'h'],
|
||||
onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
|
||||
}
|
||||
};
|
||||
const ast = parse('[url=https://github.com]hello world![/url]', options)
|
||||
```
|
||||
|
||||
@@ -43,12 +43,13 @@ const ast = parse('[url=https://github.com]hello world![/url]', options)
|
||||
|
||||
```js
|
||||
import render from 'posthtml-render'
|
||||
import parse from '@bbob/parser'
|
||||
import { parse } from '@bbob/parser'
|
||||
|
||||
const options = {
|
||||
onlyAllowTags: ['url', 'h'],
|
||||
onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
|
||||
}
|
||||
const ast = parse('[url=https://github.com]hello world![/url]', options)
|
||||
};
|
||||
|
||||
const ast = parse('[url=https://github.com]hello world![/url]', options);
|
||||
const html = render(ast) // <url url="https://github.com">hello world!</url>
|
||||
```
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
export { parse, createTagNode } from './parse';
|
||||
export { default, parse } from './parse';
|
||||
export { TagNode } from '@bbob/plugin-helper/lib/TagNode';
|
||||
|
||||
@@ -22,6 +22,7 @@ const createCharGrabber = (source) => {
|
||||
idx += 1;
|
||||
};
|
||||
const hasNext = () => source.length > idx;
|
||||
const getRest = () => source.substr(idx);
|
||||
|
||||
return {
|
||||
skip,
|
||||
@@ -39,6 +40,20 @@ const createCharGrabber = (source) => {
|
||||
getNext: () => source[idx + 1],
|
||||
getPrev: () => source[idx - 1],
|
||||
getCurr: () => source[idx],
|
||||
moveIdxTo: (val) => {
|
||||
idx += val;
|
||||
},
|
||||
getRest,
|
||||
substrUntilChar: (char) => {
|
||||
const restStr = getRest();
|
||||
const indexOfChar = restStr.indexOf(char);
|
||||
|
||||
if (indexOfChar >= 0) {
|
||||
return restStr.substr(0, indexOfChar);
|
||||
}
|
||||
|
||||
return '';
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
@@ -75,6 +90,7 @@ function createLexer(buffer, options = {}) {
|
||||
const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
|
||||
const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
|
||||
const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
|
||||
const isNotValidCharInTag = char => ([openTag].indexOf(char) >= 0);
|
||||
|
||||
const emitToken = (token) => {
|
||||
if (options.onToken) {
|
||||
@@ -159,15 +175,22 @@ function createLexer(buffer, options = {}) {
|
||||
emitToken(createToken(TYPE_SPACE, str, row, col));
|
||||
} else if (char === openTag) {
|
||||
const nextChar = bufferGrabber.getNext();
|
||||
bufferGrabber.skip(); // skip [
|
||||
bufferGrabber.skip(); // skip openTag
|
||||
|
||||
if (isCharReserved(nextChar)) {
|
||||
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
|
||||
const substr = bufferGrabber.substrUntilChar(closeTag);
|
||||
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
|
||||
|
||||
if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
|
||||
emitToken(createToken(TYPE_WORD, char, row, col));
|
||||
} else {
|
||||
const str = bufferGrabber.grabWhile(val => val !== closeTag);
|
||||
bufferGrabber.skip(); // skip ]
|
||||
|
||||
if (!(str.indexOf(EQ) > 0) || str[0] === SLASH) {
|
||||
bufferGrabber.skip(); // skip closeTag
|
||||
const isNoAttrsInTag = str.indexOf(EQ) === -1;
|
||||
const isClosingTag = str[0] === SLASH;
|
||||
|
||||
if (isNoAttrsInTag || isClosingTag) {
|
||||
emitToken(createToken(TYPE_TAG, str, row, col));
|
||||
} else {
|
||||
const parsed = parseAttrs(str);
|
||||
|
||||
@@ -239,5 +239,5 @@ const parse = (input, opts = {}) => {
|
||||
return nodes;
|
||||
};
|
||||
|
||||
export { createTagNode, parse };
|
||||
export { parse };
|
||||
export default parse;
|
||||
|
||||
@@ -217,34 +217,67 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, '/y', '0', '0']
|
||||
],
|
||||
[
|
||||
[TYPE.TAG, 'sc', '0', '0']
|
||||
[TYPE.WORD, '[', '0', '0'],
|
||||
[TYPE.WORD, 'sc', '0', '0']
|
||||
],
|
||||
[
|
||||
[TYPE.TAG, 'sc / [/sc', '0', '0']
|
||||
// [sc /
|
||||
[TYPE.WORD, '[', '0', '0'],
|
||||
[TYPE.WORD, 'sc', '0', '0'],
|
||||
[TYPE.SPACE, ' ', '0', '0'],
|
||||
[TYPE.WORD, '/', '0', '0'],
|
||||
[TYPE.SPACE, ' ', '0', '0'],
|
||||
[TYPE.TAG, '/sc', '0', '0']
|
||||
],
|
||||
[
|
||||
[TYPE.TAG, 'sc', '0', '0'],
|
||||
[TYPE.ATTR_NAME, 'arg', '0', '0'],
|
||||
[TYPE.ATTR_VALUE, 'val', '0', '0']
|
||||
[TYPE.WORD, '[', '0', '0'],
|
||||
[TYPE.WORD, 'sc', '0', '0'],
|
||||
[TYPE.SPACE, ' ', '0', '0'],
|
||||
[TYPE.WORD, 'arg="val', '0', '0'],
|
||||
]
|
||||
];
|
||||
|
||||
inputs.forEach((input, idx) => {
|
||||
const tokens = tokenize(input);
|
||||
const output = asserts[idx];
|
||||
|
||||
expectOutput(asserts[idx], tokens);
|
||||
expectOutput(output, tokens);
|
||||
});
|
||||
});
|
||||
|
||||
/*
|
||||
|
||||
test('bad unclosed tag', () => {
|
||||
const input = `[Finger tapping; R.H. = Right Hand) Part A [Finger tapping (Right hand -15-, -16-)]`;
|
||||
const input = `[Finger Part A [Finger]`;
|
||||
const tokens = tokenize(input);
|
||||
const output = [];
|
||||
const output = [
|
||||
[TYPE.WORD, '[', '0', '0'],
|
||||
[TYPE.WORD, 'Finger', '0', '0'],
|
||||
[TYPE.SPACE, ' ', '0', '0'],
|
||||
[TYPE.WORD, 'Part', '0', '0'],
|
||||
[TYPE.SPACE, ' ', '0', '0'],
|
||||
[TYPE.WORD, 'A', '0', '0'],
|
||||
[TYPE.SPACE, ' ', '0', '0'],
|
||||
[TYPE.TAG, 'Finger', '0', '0']
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
});
|
||||
*/
|
||||
|
||||
test('no close tag', () => {
|
||||
const input = '[Finger Part A';
|
||||
const tokens = tokenize(input);
|
||||
const output = [
|
||||
[TYPE.WORD, '[', '0', '0'],
|
||||
[TYPE.WORD, 'Finger', '0', '0'],
|
||||
[TYPE.SPACE, ' ', '0', '0'],
|
||||
[TYPE.WORD, 'Part', '0', '0'],
|
||||
[TYPE.SPACE, ' ', '0', '0'],
|
||||
[TYPE.WORD, 'A', '0', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
});
|
||||
|
||||
|
||||
describe('html', () => {
|
||||
const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();
|
||||
|
||||
Reference in New Issue
Block a user