mirror of
https://github.com/tenrok/BBob.git
synced 2026-06-17 19:21:20 +03:00
feat(parser): rewrite lexer to make it faster (#50)
* feat(parser): first iteration of new lexer * feat(parser): convert token string props to number props * refactor(parser): optimize char grabber * refactor(parser): working on new lexer * refactor(parser): convert token string props to number props * refactor(parser): rebuild lexer, add tag attrs parsing * refactor(parser): rework word parsing and tag parsing * refactor(parser): rework to pass tests * refactor(parser): rework tag parsing * refactor(parser): rework escape tags parsing * refactor(parser): rework tests * refactor(parser): all test pass * refactor(parser): make lexer faster by move mode switching in loop * refactor(parser): remove all state map objects * refactor(parser): order of parsing states * refactor(parser): state switching without return * refactor(parser): rename buffers to chars * refactor(lexer): reduce function calls * feat(lexer): add new parser tests and code to pass it * fix(utils): remove unused variable in char grabber * feat(lexer): add test for new lexer bug * chore(*): add lexer and lexer2 to benchmark * chore(lexer): add some debug info for char grabber * feat(parser): add new test for single attributes without values * fix(lexer): paired tags tests * refactor(lexer): comment breaking changes tests for future releases * feat(core): improve tests * refactor(parser): add more tests, reduce char grabber size * refactor(parser): reduce utils size * refactor(parser): remove unused code from tag parsing code * refactor(parser): remove unused code from word to tag transforming code * chore(benchmark): fix benchmark imports
This commit is contained in:
committed by
GitHub
parent
fda6ddd6ee
commit
772d422d77
@@ -1,4 +1,4 @@
|
||||
import {TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE} from '../src/Token'
|
||||
import { TYPE_ID, VALUE_ID, TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE} from '../src/Token'
|
||||
import { createLexer } from '../src/lexer'
|
||||
|
||||
const TYPE = {
|
||||
@@ -10,19 +10,58 @@ const TYPE = {
|
||||
NEW_LINE: TYPE_NEW_LINE,
|
||||
};
|
||||
|
||||
const TYPE_NAMES = Object.fromEntries(Object.keys(TYPE).map(key => [TYPE[key], key]));
|
||||
|
||||
const tokenize = input => (createLexer(input).tokenize());
|
||||
const tokenizeEscape = input => (createLexer(input, { enableEscapeTags: true }).tokenize());
|
||||
|
||||
describe('lexer', () => {
|
||||
const expectOutput = (output, tokens) => {
|
||||
expect(tokens.length).toBe(output.length);
|
||||
expect(tokens).toBeInstanceOf(Array);
|
||||
tokens.forEach((token, idx) => {
|
||||
expect(token).toBeInstanceOf(Object);
|
||||
expect(token.type).toEqual(output[idx][0]);
|
||||
expect(token.value).toEqual(output[idx][1]);
|
||||
});
|
||||
};
|
||||
expect.extend({
|
||||
toBeMantchOutput(tokens, output) {
|
||||
if (tokens.length !== output.length) {
|
||||
return {
|
||||
message: () =>
|
||||
`expected tokens length ${tokens.length} to be ${output.length}`,
|
||||
pass: false,
|
||||
};
|
||||
}
|
||||
|
||||
for (let idx = 0; idx < tokens.length; idx++) {
|
||||
const token = tokens[idx];
|
||||
const [type, value] = output[idx];
|
||||
|
||||
if (typeof token !== 'object') {
|
||||
return {
|
||||
message: () =>
|
||||
`token must to be Object`,
|
||||
pass: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (token[TYPE_ID] !== type) {
|
||||
return {
|
||||
message: () =>
|
||||
`expected token type ${TYPE_NAMES[type]} but recieved ${TYPE_NAMES[token[TYPE_ID]]} for ${JSON.stringify(output[idx])}`,
|
||||
pass: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (token[VALUE_ID] !== value) {
|
||||
return {
|
||||
message: () =>
|
||||
`expected token value ${value} but recieved ${token[VALUE_ID]} for ${JSON.stringify(output[idx])}`,
|
||||
pass: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
message: () =>
|
||||
`no valid output`,
|
||||
pass: true,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
test('single tag', () => {
|
||||
const input = '[SingleTag]';
|
||||
@@ -31,7 +70,7 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, 'SingleTag', '0', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('single tag with params', () => {
|
||||
@@ -42,7 +81,19 @@ describe('lexer', () => {
|
||||
[TYPE.ATTR_VALUE, '111', '0', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('single fake tag', () => {
|
||||
const input = '[ user=111]';
|
||||
const tokens = tokenize(input);
|
||||
const output = [
|
||||
[TYPE.WORD, '[', '0', '0'],
|
||||
[TYPE.SPACE, ' ', '0', '0'],
|
||||
[TYPE.WORD, 'user=111]', '0', '0'],
|
||||
];
|
||||
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('single tag with spaces', () => {
|
||||
@@ -53,9 +104,25 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, 'Single Tag', '0', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
// @TODO: this is breaking change behavior
|
||||
test.skip('tags with single attrs like disabled', () => {
|
||||
const input = '[textarea disabled]world[/textarea]';
|
||||
const tokens = tokenize(input);
|
||||
|
||||
const output = [
|
||||
[TYPE.TAG, 'textarea', '0', '0'],
|
||||
[TYPE.ATTR_VALUE, 'disabled', '0', '0'],
|
||||
[TYPE.WORD, 'world"', '0', '0'],
|
||||
[TYPE.TAG, '/textarea', '0', '0'],
|
||||
];
|
||||
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
|
||||
test('string with quotemarks', () => {
|
||||
const input = '"Someone Like You" by Adele';
|
||||
const tokens = tokenize(input);
|
||||
@@ -72,7 +139,7 @@ describe('lexer', () => {
|
||||
[TYPE.WORD, 'Adele', '21', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('tags in brakets', () => {
|
||||
@@ -89,7 +156,7 @@ describe('lexer', () => {
|
||||
[TYPE.WORD, ']', '7', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('tag as param', () => {
|
||||
@@ -102,7 +169,7 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, '/color', '21', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('tag with quotemark params with spaces', () => {
|
||||
@@ -118,7 +185,7 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, '/url', '24', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('tag with escaped quotemark param', () => {
|
||||
@@ -132,7 +199,7 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, '/url', '26', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('tag param without quotemarks', () => {
|
||||
@@ -146,7 +213,7 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, '/style', '25', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('list tag with items', () => {
|
||||
@@ -184,7 +251,29 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, '/list', '0', '4'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('few tags without spaces', () => {
|
||||
const input = '[mytag1 size="15"]Tag1[/mytag1][mytag2 size="16"]Tag2[/mytag2][mytag3]Tag3[/mytag3]';
|
||||
const tokens = tokenize(input);
|
||||
const output = [
|
||||
[TYPE.TAG, 'mytag1', 0, 0],
|
||||
[TYPE.ATTR_NAME, 'size', 0, 0],
|
||||
[TYPE.ATTR_VALUE, '15', 0, 0],
|
||||
[TYPE.WORD, 'Tag1', 0, 0],
|
||||
[TYPE.TAG, '/mytag1', 0, 0],
|
||||
[TYPE.TAG, 'mytag2', 0, 0],
|
||||
[TYPE.ATTR_NAME, 'size', 0, 0],
|
||||
[TYPE.ATTR_VALUE, '16', 0, 0],
|
||||
[TYPE.WORD, 'Tag2', 0, 0],
|
||||
[TYPE.TAG, '/mytag2', 0, 0],
|
||||
[TYPE.TAG, 'mytag3', 0, 0],
|
||||
[TYPE.WORD, 'Tag3', 0, 0],
|
||||
[TYPE.TAG, '/mytag3', 0, 0],
|
||||
];
|
||||
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('bad tags as texts', () => {
|
||||
@@ -211,8 +300,8 @@ describe('lexer', () => {
|
||||
[
|
||||
[TYPE.WORD, '!', '0', '0'],
|
||||
[TYPE.WORD, '[', '1', '0'],
|
||||
[TYPE.WORD, ']', '1', '0'],
|
||||
[TYPE.WORD, '(image.jpg)', '1', '0'],
|
||||
[TYPE.WORD, '](image.jpg)', '1', '0'],
|
||||
// [TYPE.WORD, '', '1', '0'],
|
||||
],
|
||||
[
|
||||
[TYPE.WORD, 'x', '0', '0'],
|
||||
@@ -253,7 +342,7 @@ describe('lexer', () => {
|
||||
const tokens = tokenize(input);
|
||||
const output = asserts[idx];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -271,7 +360,7 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, 'Finger', '0', '0']
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('no close tag', () => {
|
||||
@@ -286,7 +375,7 @@ describe('lexer', () => {
|
||||
[TYPE.WORD, 'A', '0', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('escaped tag', () => {
|
||||
@@ -301,7 +390,7 @@ describe('lexer', () => {
|
||||
[TYPE.WORD, '[', '0', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('escaped tag and escaped backslash', () => {
|
||||
@@ -321,7 +410,7 @@ describe('lexer', () => {
|
||||
[TYPE.WORD, ']', '0', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('bad closed tag with escaped backslash', () => {
|
||||
@@ -335,7 +424,7 @@ describe('lexer', () => {
|
||||
[TYPE.WORD, 'b]', '0', '11'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
describe('html', () => {
|
||||
@@ -358,7 +447,7 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, '/button', 2, 0]
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('attributes with no quotes or value', () => {
|
||||
@@ -377,7 +466,7 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, '/button', 2, 0]
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test('attributes with no space between them. No valid, but accepted by the browser', () => {
|
||||
@@ -395,7 +484,7 @@ describe('lexer', () => {
|
||||
[TYPE.TAG, '/button', 2, 0]
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test.skip('style tag', () => {
|
||||
@@ -416,10 +505,10 @@ input.medium{width:100px;height:18px}
|
||||
input.buttonred{cursor:hand;font-family:verdana;background:#d12124;color:#fff;height:1.4em;font-weight:bold;font-size:9pt;padding:0px 2px;margin:0px;border:0px none #000}
|
||||
-->
|
||||
</style>`
|
||||
const tokens = tokenizeHTML(content);
|
||||
const output = [];
|
||||
const tokens = tokenizeHTML(content);
|
||||
const output = [];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
});
|
||||
|
||||
test.skip('script tag', () => {
|
||||
@@ -432,7 +521,7 @@ input.buttonred{cursor:hand;font-family:verdana;background:#d12124;color:#fff;he
|
||||
const tokens = tokenizeHTML(content);
|
||||
const output = [];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
expect(tokens).toBeMantchOutput(output);
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user