mirror of
https://github.com/tenrok/BBob.git
synced 2026-06-08 17:22:26 +03:00
feat(parser): rewrite lexer to make it faster (#50)
* feat(parser): first iteration of new lexer * feat(parser): convert token string props to number props * refactor(parser): optimize char grabber * refactor(parser): working on new lexer * refactor(parser): convert token string props to number props * refactor(parser): rebuild lexer, add tag attrs parsing * refactor(parser): rework word parsing and tag parsing * refactor(parser): rework to pass tests * refactor(parser): rework tag parsing * refactor(parser): rework escape tags parsing * refactor(parser): rework tests * refactor(parser): all test pass * refactor(parser): make lexer faster by move mode switching in loop * refactor(parser): remove all state map objects * refactor(parser): order of parsing states * refactor(parser): state switching without return * refactor(parser): rename buffers to chars * refactor(lexer): reduce function calls * feat(lexer): add new parser tests and code to pass it * fix(utils): remove unused variable in char grabber * feat(lexer): add test for new lexer bug * chore(*): add lexer and lexer2 to benchmark * chore(lexer): add some debug info for char grabber * feat(parser): add new test for single attributes without values * fix(lexer): paired tags tests * refactor(lexer): comment breaking changes tests for future releases * feat(core): improve tests * refactor(parser): add more tests, reduce char grabber size * refactor(parser): reduce utils size * refactor(parser): remove unused code from tag parsing code * refactor(parser): remove unused code from word to tag transforming code * chore(benchmark): fix benchmark imports
This commit is contained in:
committed by
GitHub
parent
fda6ddd6ee
commit
772d422d77
@@ -8,8 +8,7 @@ describe('Parser', () => {
|
||||
|
||||
test('parse paired tags tokens', () => {
|
||||
const ast = parse('[best name=value]Foo Bar[/best]');
|
||||
|
||||
expectOutput(ast, [
|
||||
const output = [
|
||||
{
|
||||
tag: 'best',
|
||||
attrs: {
|
||||
@@ -21,15 +20,16 @@ describe('Parser', () => {
|
||||
'Bar',
|
||||
],
|
||||
},
|
||||
]);
|
||||
];
|
||||
|
||||
expectOutput(ast, output);
|
||||
});
|
||||
|
||||
test('parse only allowed tags', () => {
|
||||
const ast = parse('[h1 name=value]Foo [Bar] [/h1]', {
|
||||
onlyAllowTags: ['h1']
|
||||
});
|
||||
|
||||
expectOutput(ast, [
|
||||
const output = [
|
||||
{
|
||||
tag: 'h1',
|
||||
attrs: {
|
||||
@@ -42,13 +42,14 @@ describe('Parser', () => {
|
||||
' '
|
||||
],
|
||||
},
|
||||
]);
|
||||
];
|
||||
|
||||
expectOutput(ast, output);
|
||||
});
|
||||
|
||||
test('parse inconsistent tags', () => {
|
||||
const ast = parse('[h1 name=value]Foo [Bar] /h1]');
|
||||
|
||||
expectOutput(ast, [
|
||||
const output = [
|
||||
{
|
||||
attrs: {
|
||||
name: 'value'
|
||||
@@ -65,13 +66,14 @@ describe('Parser', () => {
|
||||
},
|
||||
' ',
|
||||
'/h1]',
|
||||
]);
|
||||
];
|
||||
|
||||
expectOutput(ast, output);
|
||||
});
|
||||
|
||||
test('parse tag with value param', () => {
|
||||
const ast = parse('[url=https://github.com/jilizart/bbob]BBob[/url]');
|
||||
|
||||
expectOutput(ast, [
|
||||
const output = [
|
||||
{
|
||||
tag: 'url',
|
||||
attrs: {
|
||||
@@ -79,13 +81,14 @@ describe('Parser', () => {
|
||||
},
|
||||
content: ['BBob'],
|
||||
},
|
||||
]);
|
||||
];
|
||||
|
||||
expectOutput(ast, output);
|
||||
});
|
||||
|
||||
test('parse tag with quoted param with spaces', () => {
|
||||
const ast = parse('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]');
|
||||
|
||||
expectOutput(ast, [
|
||||
const output = [
|
||||
{
|
||||
tag: 'url',
|
||||
attrs: {
|
||||
@@ -95,13 +98,14 @@ describe('Parser', () => {
|
||||
},
|
||||
content: ['Text'],
|
||||
},
|
||||
]);
|
||||
];
|
||||
|
||||
expectOutput(ast, output);
|
||||
});
|
||||
|
||||
test('parse single tag with params', () => {
|
||||
const ast = parse('[url=https://github.com/jilizart/bbob]');
|
||||
|
||||
expectOutput(ast, [
|
||||
const output = [
|
||||
{
|
||||
tag: 'url',
|
||||
attrs: {
|
||||
@@ -109,12 +113,15 @@ describe('Parser', () => {
|
||||
},
|
||||
content: [],
|
||||
},
|
||||
]);
|
||||
];
|
||||
|
||||
expectOutput(ast, output);
|
||||
});
|
||||
|
||||
test('detect inconsistent tag', () => {
|
||||
const onError = jest.fn();
|
||||
const ast = parse('[c][/c][b]hello[/c][/b][b]', { onError });
|
||||
|
||||
parse('[c][/c][b]hello[/c][/b][b]', { onError });
|
||||
|
||||
expect(onError).toHaveBeenCalled();
|
||||
});
|
||||
@@ -145,6 +152,82 @@ describe('Parser', () => {
|
||||
])
|
||||
});
|
||||
|
||||
test('parse few tags without spaces', () => {
|
||||
const ast = parse('[mytag1 size="15"]Tag1[/mytag1][mytag2 size="16"]Tag2[/mytag2][mytag3]Tag3[/mytag3]');
|
||||
const output = [
|
||||
{
|
||||
tag: 'mytag1',
|
||||
attrs: {
|
||||
size: '15',
|
||||
},
|
||||
content: ['Tag1'],
|
||||
},
|
||||
{
|
||||
tag: 'mytag2',
|
||||
attrs: {
|
||||
size: '16',
|
||||
},
|
||||
content: ['Tag2'],
|
||||
},
|
||||
{
|
||||
tag: 'mytag3',
|
||||
attrs: {},
|
||||
content: ['Tag3'],
|
||||
},
|
||||
];
|
||||
|
||||
expectOutput(ast, output);
|
||||
});
|
||||
|
||||
// @TODO: this is breaking change behavior
|
||||
test.skip('parse tags with single attributes like disabled', () => {
|
||||
const ast = parse('[b]hello[/b] [textarea disabled]world[/textarea]');
|
||||
|
||||
expectOutput(ast, [
|
||||
{
|
||||
tag: 'b',
|
||||
attrs: {},
|
||||
content: ['hello'],
|
||||
},
|
||||
' ',
|
||||
{
|
||||
tag: 'textarea',
|
||||
attrs: {
|
||||
disabled: 'disabled',
|
||||
},
|
||||
content: ['world'],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test('parse url tag with get params', () => {
|
||||
const ast = parse('[url=https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any]GET[/url]');
|
||||
|
||||
expectOutput(ast, [
|
||||
{
|
||||
tag: 'url',
|
||||
attrs: {
|
||||
'https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any': 'https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any',
|
||||
},
|
||||
content: ['GET'],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test('parse url tag with # and = symbols [google docs]', () => {
|
||||
const ast = parse('[url href=https://docs.google.com/spreadsheets/d/1W9VPUESF_NkbSa_HtRFrQNl0nYo8vPCxJFy7jD3Tpio/edit#gid=0]Docs[/url]');
|
||||
|
||||
expectOutput(ast, [
|
||||
{
|
||||
tag: 'url',
|
||||
attrs: {
|
||||
href: 'https://docs.google.com/spreadsheets/d/1W9VPUESF_NkbSa_HtRFrQNl0nYo8vPCxJFy7jD3Tpio/edit#gid=0',
|
||||
},
|
||||
content: ['Docs'],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
describe('html', () => {
|
||||
const parseHTML = input => parse(input, { openTag: '<', closeTag: '>' });
|
||||
|
||||
|
||||
Reference in New Issue
Block a user