mirror of
https://github.com/tenrok/BBob.git
synced 2026-06-20 20:00:33 +03:00
feat(parser): better handlinf of unclosed tags like '[My unclosed and [closed] tag'
This commit is contained in:
@@ -16,12 +16,12 @@ npm i @bbob/parser
|
|||||||
### API
|
### API
|
||||||
|
|
||||||
```js
|
```js
|
||||||
import parse from '@bbob/parser'
|
import { parse } from '@bbob/parser'
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
onlyAllowTags: ['url', 'h'],
|
onlyAllowTags: ['url', 'h'],
|
||||||
onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
|
onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
|
||||||
}
|
};
|
||||||
const ast = parse('[url=https://github.com]hello world![/url]', options)
|
const ast = parse('[url=https://github.com]hello world![/url]', options)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -43,12 +43,13 @@ const ast = parse('[url=https://github.com]hello world![/url]', options)
|
|||||||
|
|
||||||
```js
|
```js
|
||||||
import render from 'posthtml-render'
|
import render from 'posthtml-render'
|
||||||
import parse from '@bbob/parser'
|
import { parse } from '@bbob/parser'
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
onlyAllowTags: ['url', 'h'],
|
onlyAllowTags: ['url', 'h'],
|
||||||
onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
|
onError: (err) => console.warn(err.message, err.lineNumber, err.columnNumber)
|
||||||
}
|
};
|
||||||
const ast = parse('[url=https://github.com]hello world![/url]', options)
|
|
||||||
|
const ast = parse('[url=https://github.com]hello world![/url]', options);
|
||||||
const html = render(ast) // <url url="https://github.com">hello world!</url>
|
const html = render(ast) // <url url="https://github.com">hello world!</url>
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
export { parse, createTagNode } from './parse';
|
export { default, parse } from './parse';
|
||||||
|
export { TagNode } from '@bbob/plugin-helper/lib/TagNode';
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ const createCharGrabber = (source) => {
|
|||||||
idx += 1;
|
idx += 1;
|
||||||
};
|
};
|
||||||
const hasNext = () => source.length > idx;
|
const hasNext = () => source.length > idx;
|
||||||
|
const getRest = () => source.substr(idx);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
skip,
|
skip,
|
||||||
@@ -39,6 +40,20 @@ const createCharGrabber = (source) => {
|
|||||||
getNext: () => source[idx + 1],
|
getNext: () => source[idx + 1],
|
||||||
getPrev: () => source[idx - 1],
|
getPrev: () => source[idx - 1],
|
||||||
getCurr: () => source[idx],
|
getCurr: () => source[idx],
|
||||||
|
moveIdxTo: (val) => {
|
||||||
|
idx += val;
|
||||||
|
},
|
||||||
|
getRest,
|
||||||
|
substrUntilChar: (char) => {
|
||||||
|
const restStr = getRest();
|
||||||
|
const indexOfChar = restStr.indexOf(char);
|
||||||
|
|
||||||
|
if (indexOfChar >= 0) {
|
||||||
|
return restStr.substr(0, indexOfChar);
|
||||||
|
}
|
||||||
|
|
||||||
|
return '';
|
||||||
|
},
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -75,6 +90,7 @@ function createLexer(buffer, options = {}) {
|
|||||||
const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
|
const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
|
||||||
const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
|
const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
|
||||||
const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
|
const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
|
||||||
|
const isNotValidCharInTag = char => ([openTag].indexOf(char) >= 0);
|
||||||
|
|
||||||
const emitToken = (token) => {
|
const emitToken = (token) => {
|
||||||
if (options.onToken) {
|
if (options.onToken) {
|
||||||
@@ -159,15 +175,22 @@ function createLexer(buffer, options = {}) {
|
|||||||
emitToken(createToken(TYPE_SPACE, str, row, col));
|
emitToken(createToken(TYPE_SPACE, str, row, col));
|
||||||
} else if (char === openTag) {
|
} else if (char === openTag) {
|
||||||
const nextChar = bufferGrabber.getNext();
|
const nextChar = bufferGrabber.getNext();
|
||||||
bufferGrabber.skip(); // skip [
|
bufferGrabber.skip(); // skip openTag
|
||||||
|
|
||||||
if (isCharReserved(nextChar)) {
|
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
|
||||||
|
const substr = bufferGrabber.substrUntilChar(closeTag);
|
||||||
|
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
|
||||||
|
|
||||||
|
if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
|
||||||
emitToken(createToken(TYPE_WORD, char, row, col));
|
emitToken(createToken(TYPE_WORD, char, row, col));
|
||||||
} else {
|
} else {
|
||||||
const str = bufferGrabber.grabWhile(val => val !== closeTag);
|
const str = bufferGrabber.grabWhile(val => val !== closeTag);
|
||||||
bufferGrabber.skip(); // skip ]
|
|
||||||
|
|
||||||
if (!(str.indexOf(EQ) > 0) || str[0] === SLASH) {
|
bufferGrabber.skip(); // skip closeTag
|
||||||
|
const isNoAttrsInTag = str.indexOf(EQ) === -1;
|
||||||
|
const isClosingTag = str[0] === SLASH;
|
||||||
|
|
||||||
|
if (isNoAttrsInTag || isClosingTag) {
|
||||||
emitToken(createToken(TYPE_TAG, str, row, col));
|
emitToken(createToken(TYPE_TAG, str, row, col));
|
||||||
} else {
|
} else {
|
||||||
const parsed = parseAttrs(str);
|
const parsed = parseAttrs(str);
|
||||||
|
|||||||
@@ -239,5 +239,5 @@ const parse = (input, opts = {}) => {
|
|||||||
return nodes;
|
return nodes;
|
||||||
};
|
};
|
||||||
|
|
||||||
export { createTagNode, parse };
|
export { parse };
|
||||||
export default parse;
|
export default parse;
|
||||||
|
|||||||
@@ -217,34 +217,67 @@ describe('lexer', () => {
|
|||||||
[TYPE.TAG, '/y', '0', '0']
|
[TYPE.TAG, '/y', '0', '0']
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
[TYPE.TAG, 'sc', '0', '0']
|
[TYPE.WORD, '[', '0', '0'],
|
||||||
|
[TYPE.WORD, 'sc', '0', '0']
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
[TYPE.TAG, 'sc / [/sc', '0', '0']
|
// [sc /
|
||||||
|
[TYPE.WORD, '[', '0', '0'],
|
||||||
|
[TYPE.WORD, 'sc', '0', '0'],
|
||||||
|
[TYPE.SPACE, ' ', '0', '0'],
|
||||||
|
[TYPE.WORD, '/', '0', '0'],
|
||||||
|
[TYPE.SPACE, ' ', '0', '0'],
|
||||||
|
[TYPE.TAG, '/sc', '0', '0']
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
[TYPE.TAG, 'sc', '0', '0'],
|
[TYPE.WORD, '[', '0', '0'],
|
||||||
[TYPE.ATTR_NAME, 'arg', '0', '0'],
|
[TYPE.WORD, 'sc', '0', '0'],
|
||||||
[TYPE.ATTR_VALUE, 'val', '0', '0']
|
[TYPE.SPACE, ' ', '0', '0'],
|
||||||
|
[TYPE.WORD, 'arg="val', '0', '0'],
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
|
|
||||||
inputs.forEach((input, idx) => {
|
inputs.forEach((input, idx) => {
|
||||||
const tokens = tokenize(input);
|
const tokens = tokenize(input);
|
||||||
|
const output = asserts[idx];
|
||||||
|
|
||||||
expectOutput(asserts[idx], tokens);
|
expectOutput(output, tokens);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
/*
|
|
||||||
test('bad unclosed tag', () => {
|
test('bad unclosed tag', () => {
|
||||||
const input = `[Finger tapping; R.H. = Right Hand) Part A [Finger tapping (Right hand -15-, -16-)]`;
|
const input = `[Finger Part A [Finger]`;
|
||||||
const tokens = tokenize(input);
|
const tokens = tokenize(input);
|
||||||
const output = [];
|
const output = [
|
||||||
|
[TYPE.WORD, '[', '0', '0'],
|
||||||
|
[TYPE.WORD, 'Finger', '0', '0'],
|
||||||
|
[TYPE.SPACE, ' ', '0', '0'],
|
||||||
|
[TYPE.WORD, 'Part', '0', '0'],
|
||||||
|
[TYPE.SPACE, ' ', '0', '0'],
|
||||||
|
[TYPE.WORD, 'A', '0', '0'],
|
||||||
|
[TYPE.SPACE, ' ', '0', '0'],
|
||||||
|
[TYPE.TAG, 'Finger', '0', '0']
|
||||||
|
];
|
||||||
|
|
||||||
expectOutput(output, tokens);
|
expectOutput(output, tokens);
|
||||||
});
|
});
|
||||||
*/
|
|
||||||
|
test('no close tag', () => {
|
||||||
|
const input = '[Finger Part A';
|
||||||
|
const tokens = tokenize(input);
|
||||||
|
const output = [
|
||||||
|
[TYPE.WORD, '[', '0', '0'],
|
||||||
|
[TYPE.WORD, 'Finger', '0', '0'],
|
||||||
|
[TYPE.SPACE, ' ', '0', '0'],
|
||||||
|
[TYPE.WORD, 'Part', '0', '0'],
|
||||||
|
[TYPE.SPACE, ' ', '0', '0'],
|
||||||
|
[TYPE.WORD, 'A', '0', '0'],
|
||||||
|
];
|
||||||
|
|
||||||
|
expectOutput(output, tokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
describe('html', () => {
|
describe('html', () => {
|
||||||
const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();
|
const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();
|
||||||
|
|||||||
Reference in New Issue
Block a user