2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-06-08 17:22:26 +03:00

feat(parser): rewrite lexer to make it faster (#50)

* feat(parser): first iteration of new lexer

* feat(parser): convert token string props to number props

* refactor(parser): optimize char grabber

* refactor(parser): working on new lexer

* refactor(parser): convert token string props to number props

* refactor(parser): rebuild lexer, add tag attrs parsing

* refactor(parser): rework word parsing and tag parsing

* refactor(parser): rework to pass tests

* refactor(parser): rework tag parsing

* refactor(parser): rework escape tags parsing

* refactor(parser): rework tests

* refactor(parser): all test pass

* refactor(parser): make lexer faster by move mode switching in loop

* refactor(parser): remove all state map objects

* refactor(parser): order of parsing states

* refactor(parser): state switching without return

* refactor(parser): rename buffers to chars

* refactor(lexer): reduce function calls

* feat(lexer): add new parser tests and code to pass it

* fix(utils): remove unused variable in char grabber

* feat(lexer): add test for new lexer bug

* chore(*): add lexer and lexer2 to benchmark

* chore(lexer): add some debug info for char grabber

* feat(parser): add new test for single attributes without values

* fix(lexer): paired tags tests

* refactor(lexer): comment breaking changes tests for future releases

* feat(core): improve tests

* refactor(parser): add more tests, reduce char grabber size

* refactor(parser): reduce utils size

* refactor(parser): remove unused code from tag parsing code

* refactor(parser): remove unused code from word to tag transforming code

* chore(benchmark): fix benchmark imports
This commit is contained in:
Nikolay Kostyurin
2020-12-09 01:03:48 +02:00
committed by GitHub
parent fda6ddd6ee
commit 772d422d77
13 changed files with 998 additions and 359 deletions
+102 -19
View File
@@ -8,8 +8,7 @@ describe('Parser', () => {
test('parse paired tags tokens', () => {
const ast = parse('[best name=value]Foo Bar[/best]');
expectOutput(ast, [
const output = [
{
tag: 'best',
attrs: {
@@ -21,15 +20,16 @@ describe('Parser', () => {
'Bar',
],
},
]);
];
expectOutput(ast, output);
});
test('parse only allowed tags', () => {
const ast = parse('[h1 name=value]Foo [Bar] [/h1]', {
onlyAllowTags: ['h1']
});
expectOutput(ast, [
const output = [
{
tag: 'h1',
attrs: {
@@ -42,13 +42,14 @@ describe('Parser', () => {
' '
],
},
]);
];
expectOutput(ast, output);
});
test('parse inconsistent tags', () => {
const ast = parse('[h1 name=value]Foo [Bar] /h1]');
expectOutput(ast, [
const output = [
{
attrs: {
name: 'value'
@@ -65,13 +66,14 @@ describe('Parser', () => {
},
' ',
'/h1]',
]);
];
expectOutput(ast, output);
});
test('parse tag with value param', () => {
const ast = parse('[url=https://github.com/jilizart/bbob]BBob[/url]');
expectOutput(ast, [
const output = [
{
tag: 'url',
attrs: {
@@ -79,13 +81,14 @@ describe('Parser', () => {
},
content: ['BBob'],
},
]);
];
expectOutput(ast, output);
});
test('parse tag with quoted param with spaces', () => {
const ast = parse('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]');
expectOutput(ast, [
const output = [
{
tag: 'url',
attrs: {
@@ -95,13 +98,14 @@ describe('Parser', () => {
},
content: ['Text'],
},
]);
];
expectOutput(ast, output);
});
test('parse single tag with params', () => {
const ast = parse('[url=https://github.com/jilizart/bbob]');
expectOutput(ast, [
const output = [
{
tag: 'url',
attrs: {
@@ -109,12 +113,15 @@ describe('Parser', () => {
},
content: [],
},
]);
];
expectOutput(ast, output);
});
test('detect inconsistent tag', () => {
const onError = jest.fn();
const ast = parse('[c][/c][b]hello[/c][/b][b]', { onError });
parse('[c][/c][b]hello[/c][/b][b]', { onError });
expect(onError).toHaveBeenCalled();
});
@@ -145,6 +152,82 @@ describe('Parser', () => {
])
});
test('parse few tags without spaces', () => {
const ast = parse('[mytag1 size="15"]Tag1[/mytag1][mytag2 size="16"]Tag2[/mytag2][mytag3]Tag3[/mytag3]');
const output = [
{
tag: 'mytag1',
attrs: {
size: '15',
},
content: ['Tag1'],
},
{
tag: 'mytag2',
attrs: {
size: '16',
},
content: ['Tag2'],
},
{
tag: 'mytag3',
attrs: {},
content: ['Tag3'],
},
];
expectOutput(ast, output);
});
// @TODO: this is breaking change behavior
test.skip('parse tags with single attributes like disabled', () => {
const ast = parse('[b]hello[/b] [textarea disabled]world[/textarea]');
expectOutput(ast, [
{
tag: 'b',
attrs: {},
content: ['hello'],
},
' ',
{
tag: 'textarea',
attrs: {
disabled: 'disabled',
},
content: ['world'],
},
]);
});
test('parse url tag with get params', () => {
const ast = parse('[url=https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any]GET[/url]');
expectOutput(ast, [
{
tag: 'url',
attrs: {
'https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any': 'https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any',
},
content: ['GET'],
},
]);
});
test('parse url tag with # and = symbols [google docs]', () => {
const ast = parse('[url href=https://docs.google.com/spreadsheets/d/1W9VPUESF_NkbSa_HtRFrQNl0nYo8vPCxJFy7jD3Tpio/edit#gid=0]Docs[/url]');
expectOutput(ast, [
{
tag: 'url',
attrs: {
href: 'https://docs.google.com/spreadsheets/d/1W9VPUESF_NkbSa_HtRFrQNl0nYo8vPCxJFy7jD3Tpio/edit#gid=0',
},
content: ['Docs'],
},
]);
});
describe('html', () => {
const parseHTML = input => parse(input, { openTag: '<', closeTag: '>' });