2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00

feat(parser): context free tag mode (#165)

* feat(parser): initial context free tag mode

* fix: tests coverage

* chore: update readme

* chore: remove unused badge from readme
This commit is contained in:
Nikolay Kost
2023-01-27 05:26:09 +02:00
committed by GitHub
parent 6b2810fcf4
commit 19e8dd659e
11 changed files with 215 additions and 79 deletions
+45 -6
View File
@@ -13,10 +13,7 @@ written in pure javascript, no dependencies
</a>
<a href="https://www.codefactor.io/repository/github/jilizart/bbob">
<img src="https://www.codefactor.io/repository/github/jilizart/bbob/badge" alt="CodeFactor">
</a>
<a href="https://bettercodehub.com/">
<img src="https://bettercodehub.com/edge/badge/JiLiZART/bbob?branch=master" alt="BCH compliance">
</a>
</a>
<a href="https://snyk.io/test/github/JiLiZART/bbob?targetFile=package.json">
<img src="https://snyk.io/test/github/JiLiZART/bbob/badge.svg?targetFile=package.json" alt="Known Vulnerabilities">
</a>
@@ -73,6 +70,7 @@ written in pure javascript, no dependencies
* [Basic usage](#basic-usage)
* [React usage](#react-usage)
* [Vue 2 usage](#vue2-usage)
* [Parse Options](#parse-options)
* [Presets](#presets)
* [Create your own preset](#create-preset)
* [HTML Preset](#html-preset)
@@ -111,7 +109,7 @@ import {render} from 'react-dom'
import bbobReactRender from '@bbob/react/es/render'
import presetReact from '@bbob/preset-react'
const options = { onlyAllowTags: ['i'], enableEscapeTags: true }
const options = { onlyAllowTags: ['i'], enableEscapeTags: true, contextFreeTags: ['code'] }
const content = bbobReactRender(`[i]Text[/i]`, presetReact(), options)
console.log(render(<span>{content}</span>)); // <span><span style="font-style: italic;">Text</span></span>
@@ -156,6 +154,47 @@ Vue.use(VueBbob);
```
More examples available in <a href="https://github.com/JiLiZART/BBob/tree/master/examples">examples folder</a>
### Parse options <a name="parse-options"></a>
#### onlyAllowTags
Parse only allowed tags
```js
import bbobHTML from '@bbob/html'
import presetHTML5 from '@bbob/preset-html5'
const processed = bbobHTML(`[i][b]Text[/b][/i]`, presetHTML5(), { onlyAllowTags: ['i'] })
console.log(processed); // <span style="font-style: italic;">[b]Text[/b]</span>
```
#### contextFreeTags
Enable context free mode that ignores parsing all tags inside given tags
```js
import bbobHTML from '@bbob/html'
import presetHTML5 from '@bbob/preset-html5'
const processed = bbobHTML(`[b]Text[/b][code][b]Text[/b][/code]`, presetHTML5(), { contextFreeTags: ['code'] })
console.log(processed); // <span style="font-weight: bold;">Text</span><pre>[b]Text[/b]</pre>
```
#### enableEscapeTags
Enable escape support for tags
```js
import bbobHTML from '@bbob/html'
import presetHTML5 from '@bbob/preset-html5'
const processed = bbobHTML(`[b]Text[/b]'\\[b\\]Text\\[/b\\]'`, presetHTML5(), { enableEscapeTags: true })
console.log(processed); // <span style="font-weight: bold;">Text</span>[b]Text[/b]
```
### Presets <a name="basic"></a>
@@ -330,7 +369,7 @@ Tested on Node v12.18.3
| regex/parser | 6.02 ops/sec ±2.77% | (20 runs sampled) |
| ya-bbcode | 10.70 ops/sec ±1.94% | (31 runs sampled) |
| xbbcode/parser | 107 ops/sec ±2.29% | (69 runs sampled) |
| @bbob/parser | 137 ops/sec ±1.11% | (78 runs sampled) |
| @bbob/parser | 140 ops/sec ±1.11% | (78 runs sampled) |
Developed with <3 using JetBrains
+18 -12
View File
@@ -13,8 +13,8 @@
"@rollup/plugin-node-resolve": "15.0.1",
"@rollup/plugin-replace": "5.0.1",
"@size-limit/preset-small-lib": "6.0.1",
"@swc/cli": "0.1.57",
"@swc/core": "1.3.16",
"@swc/cli": "^0.1.57",
"@swc/core": "^1.3.16",
"@swc/jest": "0.2.23",
"bundlesize2": "0.0.31",
"cross-env": "7.0.3",
@@ -7326,14 +7326,20 @@
}
},
"node_modules/caniuse-lite": {
"version": "1.0.30001283",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001283.tgz",
"integrity": "sha512-9RoKo841j1GQFSJz/nCXOj0sD7tHBtlowjYlrqIUS812x9/emfBLBt6IyMz1zIaYc/eRL8Cs6HPUVi2Hzq4sIg==",
"version": "1.0.30001441",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001441.tgz",
"integrity": "sha512-OyxRR4Vof59I3yGWXws6i908EtGbMzVUi3ganaZQHmydk1iwDhRnvaPG2WaR0KcqrDFKrxVZHULT396LEPhXfg==",
"dev": true,
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/browserslist"
}
"funding": [
{
"type": "opencollective",
"url": "https://opencollective.com/browserslist"
},
{
"type": "tidelift",
"url": "https://tidelift.com/funding/github/npm/caniuse-lite"
}
]
},
"node_modules/chalk": {
"version": "4.1.2",
@@ -23840,9 +23846,9 @@
}
},
"caniuse-lite": {
"version": "1.0.30001283",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001283.tgz",
"integrity": "sha512-9RoKo841j1GQFSJz/nCXOj0sD7tHBtlowjYlrqIUS812x9/emfBLBt6IyMz1zIaYc/eRL8Cs6HPUVi2Hzq4sIg==",
"version": "1.0.30001441",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001441.tgz",
"integrity": "sha512-OyxRR4Vof59I3yGWXws6i908EtGbMzVUi3ganaZQHmydk1iwDhRnvaPG2WaR0KcqrDFKrxVZHULT396LEPhXfg==",
"dev": true
},
"chalk": {
+6 -8
View File
@@ -12,19 +12,17 @@
"@bbob/plugin-helper": "^2.8.3"
}
},
"../bbob-plugin-helper": {
"name": "@bbob/plugin-helper",
"version": "2.8.2",
"license": "MIT"
},
"node_modules/@bbob/plugin-helper": {
"resolved": "../bbob-plugin-helper",
"link": true
"version": "2.8.3",
"resolved": "https://registry.npmjs.org/@bbob/plugin-helper/-/plugin-helper-2.8.3.tgz",
"integrity": "sha512-i1vVQZ7Ja5x6OLVyAXpwbTX/Id++wVJkve8q+wDhRHylW5/MJQqB6B6pZdGuFbyA5AQvUw2us8bsW0h4iZsDew=="
}
},
"dependencies": {
"@bbob/plugin-helper": {
"version": "file:../bbob-plugin-helper"
"version": "2.8.3",
"resolved": "https://registry.npmjs.org/@bbob/plugin-helper/-/plugin-helper-2.8.3.tgz",
"integrity": "sha512-i1vVQZ7Ja5x6OLVyAXpwbTX/Id++wVJkve8q+wDhRHylW5/MJQqB6B6pZdGuFbyA5AQvUw2us8bsW0h4iZsDew=="
}
}
}
+1
View File
@@ -171,5 +171,6 @@ export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME;
export const TYPE_ATTR_VALUE = TOKEN_TYPE_ATTR_VALUE;
export const TYPE_SPACE = TOKEN_TYPE_SPACE;
export const TYPE_NEW_LINE = TOKEN_TYPE_NEW_LINE;
export { Token };
export default Token;
+58 -37
View File
@@ -58,16 +58,17 @@ function createLexer(buffer, options = {}) {
let tokenIndex = -1;
let stateMode = STATE_WORD;
let tagMode = TAG_STATE_NAME;
let contextFreeTag = '';
const tokens = new Array(Math.floor(buffer.length));
const openTag = options.openTag || OPEN_BRAKET;
const closeTag = options.closeTag || CLOSE_BRAKET;
const escapeTags = !!options.enableEscapeTags;
const contextFreeTags = options.contextFreeTags || [];
const onToken = options.onToken || (() => {
});
const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
const NOT_CHAR_TOKENS = [
// ...(options.enableEscapeTags ? [BACKSLASH] : []),
openTag, SPACE, TAB, N,
];
const WHITESPACES = [SPACE, TAB];
@@ -86,6 +87,16 @@ function createLexer(buffer, options = {}) {
const unq = (val) => unquote(trimChar(val, QUOTEMARK));
const checkContextFreeMode = (name, isClosingTag) => {
if (contextFreeTag !== '' && isClosingTag) {
contextFreeTag = '';
}
if (contextFreeTag === '' && contextFreeTags.includes(name)) {
contextFreeTag = name;
}
};
const chars = createCharGrabber(buffer, { onSkip });
/**
@@ -177,6 +188,7 @@ function createLexer(buffer, options = {}) {
const name = tagChars.grabWhile(validName);
emitToken(TYPE_TAG, name);
checkContextFreeMode(name);
tagChars.skip();
@@ -192,41 +204,37 @@ function createLexer(buffer, options = {}) {
function stateTag() {
const currChar = chars.getCurr();
const nextChar = chars.getNext();
if (currChar === openTag) {
const nextChar = chars.getNext();
chars.skip();
chars.skip();
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = chars.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = chars.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
emitToken(TYPE_WORD, currChar);
if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
emitToken(TYPE_WORD, currChar);
return STATE_WORD;
}
// [myTag ]
const isNoAttrsInTag = substr.indexOf(EQ) === -1;
// [/myTag]
const isClosingTag = substr[0] === SLASH;
if (isNoAttrsInTag || isClosingTag) {
const name = chars.grabWhile((char) => char !== closeTag);
chars.skip(); // skip closeTag
emitToken(TYPE_TAG, name);
return STATE_WORD;
}
return STATE_TAG_ATTRS;
return STATE_WORD;
}
return STATE_WORD;
// [myTag ]
const isNoAttrsInTag = substr.indexOf(EQ) === -1;
// [/myTag]
const isClosingTag = substr[0] === SLASH;
if (isNoAttrsInTag || isClosingTag) {
const name = chars.grabWhile((char) => char !== closeTag);
chars.skip(); // skip closeTag
emitToken(TYPE_TAG, name);
checkContextFreeMode(name, isClosingTag);
return STATE_WORD;
}
return STATE_TAG_ATTRS;
}
function stateAttrs() {
@@ -259,13 +267,24 @@ function createLexer(buffer, options = {}) {
}
if (isWhiteSpace(chars.getCurr())) {
emitToken(TYPE_SPACE, chars.grabWhile(isWhiteSpace));
const word = chars.grabWhile(isWhiteSpace);
emitToken(TYPE_SPACE, word);
return STATE_WORD;
}
if (chars.getCurr() === openTag) {
if (chars.includes(closeTag)) {
if (contextFreeTag) {
const fullTagLen = openTag.length + SLASH.length + contextFreeTag.length;
const fullTagName = `${openTag}${SLASH}${contextFreeTag}`;
const foundTag = chars.grabN(fullTagLen);
const isEndContextFreeMode = foundTag === fullTagName;
if (isEndContextFreeMode) {
return STATE_TAG;
}
} else if (chars.includes(closeTag)) {
return STATE_TAG;
}
@@ -298,12 +317,16 @@ function createLexer(buffer, options = {}) {
const isChar = (char) => isCharToken(char) && !isEscapeChar(char);
emitToken(TYPE_WORD, chars.grabWhile(isChar));
const word = chars.grabWhile(isChar);
emitToken(TYPE_WORD, word);
return STATE_WORD;
}
emitToken(TYPE_WORD, chars.grabWhile(isCharToken));
const word = chars.grabWhile(isCharToken);
emitToken(TYPE_WORD, word);
return STATE_WORD;
}
@@ -320,10 +343,8 @@ function createLexer(buffer, options = {}) {
stateMode = stateAttrs();
break;
case STATE_WORD:
stateMode = stateWord();
break;
default:
stateMode = STATE_WORD;
stateMode = stateWord();
break;
}
}
+4 -2
View File
@@ -10,9 +10,10 @@ import { createList } from './utils';
* @param {Object} opts
* @param {Function} opts.createTokenizer
* @param {Array<string>} opts.onlyAllowTags
* @param {Array<string>} opts.contextFreeTags
* @param {Boolean} opts.enableEscapeTags
* @param {String} opts.openTag
* @param {String} opts.closeTag
* @param {Boolean} opts.enableEscapeTags
* @return {Array}
*/
const parse = (input, opts = {}) => {
@@ -258,9 +259,10 @@ const parse = (input, opts = {}) => {
tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, {
onToken,
onlyAllowTags: options.onlyAllowTags,
openTag,
closeTag,
onlyAllowTags: options.onlyAllowTags,
contextFreeTags: options.contextFreeTags,
enableEscapeTags: options.enableEscapeTags,
});
+9 -3
View File
@@ -13,7 +13,7 @@ function CharGrabber(source, options) {
const { pos } = cursor;
const idx = source.indexOf(char, pos);
return idx >= 0 ? source.substr(pos, idx - pos) : '';
return idx >= 0 ? source.substring(pos, idx) : '';
};
const includes = (val) => source.indexOf(val, cursor.pos) >= 0;
const hasNext = () => cursor.len > cursor.pos;
@@ -25,7 +25,8 @@ function CharGrabber(source, options) {
options.onSkip();
}
};
const rest = () => source.substr(cursor.pos);
const rest = () => source.substring(cursor.pos);
const grabN = (num = 0) => source.substring(cursor.pos, cursor.pos + num);
const curr = () => source[cursor.pos];
const prev = () => {
const prevPos = cursor.pos - 1;
@@ -48,7 +49,7 @@ function CharGrabber(source, options) {
}
}
return source.substr(start, cursor.pos - start);
return source.substring(start, cursor.pos);
};
/**
* @type {skip}
@@ -88,6 +89,11 @@ function CharGrabber(source, options) {
* @return {String}
*/
this.grabWhile = grabWhile;
/**
* @param {Number} num
* @return {String}
*/
this.grabN = grabN;
/**
* Grabs rest of string until it find a char
* @param {String} char
+9
View File
@@ -0,0 +1,9 @@
import { TagNode } from "../src/index";
describe('index', () => {
test('tag with content and params', () => {
const tagNode = TagNode.create('test', {test: 1}, ['Hello']);
expect(String(tagNode)).toBe('[test test="1"]Hello[/test]');
});
})
+19
View File
@@ -14,6 +14,7 @@ const TYPE_NAMES = Object.fromEntries(Object.keys(TYPE).map(key => [TYPE[key], k
const tokenize = input => (createLexer(input).tokenize());
const tokenizeEscape = input => (createLexer(input, { enableEscapeTags: true }).tokenize());
const tokenizeContextFreeTags = (input, tags = []) => (createLexer(input, { contextFreeTags: tags }).tokenize());
describe('lexer', () => {
expect.extend({
@@ -463,6 +464,24 @@ describe('lexer', () => {
expect(tokens).toBeMantchOutput(output);
});
test('context free tag [code]', () => {
const input = '[code] [b]some string[/b][/code]'
const tokens = tokenizeContextFreeTags(input, ['code']);
const output = [
[TYPE.TAG, 'code', 0, 0],
[TYPE.SPACE, ' ', 0, 0],
[TYPE.WORD, '[', 0, 0],
[TYPE.WORD, 'b]some', 0, 0],
[TYPE.SPACE, ' ', 0, 0],
[TYPE.WORD, 'string', 0, 0],
[TYPE.WORD, '[', 0, 0],
[TYPE.WORD, '/b]', 0, 0],
[TYPE.TAG, '/code', 0, 0],
]
expect(tokens).toBeMantchOutput(output);
})
test('bad closed tag with escaped backslash', () => {
const input = `[b]test[\\b]`;
const tokens = tokenizeEscape(input);
+42
View File
@@ -25,6 +25,23 @@ describe('Parser', () => {
expectOutput(ast, output);
});
test('parse paired tags tokens 2', () => {
const ast = parse('[bar]Foo Bar[/bar]');
const output = [
{
tag: 'bar',
attrs: {},
content: [
'Foo',
' ',
'Bar',
],
},
];
expectOutput(ast, output);
});
describe('onlyAllowTags', () => {
test('parse only allowed tags', () => {
const ast = parse('[h1 name=value]Foo [Bar] [/h1]', {
@@ -126,6 +143,31 @@ describe('Parser', () => {
});
})
describe('contextFreeTags', () => {
test('context free tag [code]', () => {
const ast = parse('[code] [b]some string[/b][/code]', {
contextFreeTags: ['code']
});
const output = [
{
tag: 'code',
attrs: {},
content: [
' ',
'[',
'b]some',
' ',
'string',
'[',
'/b]'
]
}
]
expectOutput(ast, output);
})
})
test('parse inconsistent tags', () => {
const ast = parse('[h1 name=value]Foo [Bar] /h1]');
const output = [
+4 -11
View File
@@ -13,23 +13,16 @@ const CLOSE_BRAKET = ']';
const SLASH = '/';
const BACKSLASH = '\\';
const PLACEHOLDER_SPACE_TAB = ' ';
const PLACEHOLDER_SPACE = ' ';
// const getChar = String.fromCharCode;
export {
N,
F,
R,
TAB,
EQ,
QUOTEMARK,
TAB,
SPACE,
SLASH,
BACKSLASH,
QUOTEMARK,
OPEN_BRAKET,
CLOSE_BRAKET,
SLASH,
PLACEHOLDER_SPACE_TAB,
PLACEHOLDER_SPACE,
BACKSLASH,
};