mirror of
https://github.com/tenrok/BBob.git
synced 2026-06-14 18:42:24 +03:00
speed parser optimization
This commit is contained in:
@@ -22,11 +22,10 @@ const getTokenValue = token => token[TOKEN_VALUE_ID];
|
||||
const getTokenLine = token => token[TOKEN_LINE_ID];
|
||||
const getTokenColumn = token => token[TOKEN_COLUMN_ID];
|
||||
|
||||
const isTextToken = (token) => {
|
||||
const type = token[TOKEN_TYPE_ID];
|
||||
|
||||
return type === TOKEN_TYPE_SPACE || type === TOKEN_TYPE_NEW_LINE || type === TOKEN_TYPE_WORD;
|
||||
};
|
||||
const isTextToken = token =>
|
||||
token[TOKEN_TYPE_ID] === TOKEN_TYPE_SPACE ||
|
||||
token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE ||
|
||||
token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
|
||||
|
||||
const isTagToken = token => token[TOKEN_TYPE_ID] === TOKEN_TYPE_TAG;
|
||||
const isTagEnd = token => getTokenValue(token).charCodeAt(0) === SLASH;
|
||||
|
||||
@@ -15,11 +15,12 @@ class Tokenizer {
|
||||
this.buffer = input;
|
||||
this.colPos = 0;
|
||||
this.rowPos = 0;
|
||||
this.index = 0;
|
||||
// eslint-disable-next-line no-bitwise
|
||||
this.index = 2 ** 32;
|
||||
|
||||
this.tokenIndex = -1;
|
||||
this.tokens = new Array(Math.floor(this.buffer.length));
|
||||
this.dummyToken = createTokenOfType('', '', '', '');
|
||||
this.dummyToken = null; // createTokenOfType('', '', '', '');
|
||||
|
||||
this.wordToken = this.dummyToken;
|
||||
this.tagToken = this.dummyToken;
|
||||
@@ -30,16 +31,16 @@ class Tokenizer {
|
||||
this.options = options;
|
||||
|
||||
this.charMap = {
|
||||
TAB: this.charSPACE.bind(this),
|
||||
SPACE: this.charSPACE.bind(this),
|
||||
N: this.charN.bind(this),
|
||||
OPEN_BRAKET: this.charOPENBRAKET.bind(this),
|
||||
CLOSE_BRAKET: this.charCLOSEBRAKET.bind(this),
|
||||
EQ: this.charEQ.bind(this),
|
||||
QUOTEMARK: this.charQUOTEMARK.bind(this),
|
||||
BACKSLASH: this.charBACKSLASH.bind(this),
|
||||
[TAB]: this.charSPACE.bind(this),
|
||||
[SPACE]: this.charSPACE.bind(this),
|
||||
[N]: this.charN.bind(this),
|
||||
[OPEN_BRAKET]: this.charOPENBRAKET.bind(this),
|
||||
[CLOSE_BRAKET]: this.charCLOSEBRAKET.bind(this),
|
||||
[EQ]: this.charEQ.bind(this),
|
||||
[QUOTEMARK]: this.charQUOTEMARK.bind(this),
|
||||
[BACKSLASH]: this.charBACKSLASH.bind(this),
|
||||
default: this.charWORD.bind(this),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
emitToken(token) {
|
||||
@@ -72,22 +73,23 @@ class Tokenizer {
|
||||
}
|
||||
|
||||
flushWord() {
|
||||
if (this.wordToken[Token.TYPE_ID] && this.wordToken[Token.VALUE_ID]) {
|
||||
if (this.inWord() && this.wordToken[Token.VALUE_ID]) {
|
||||
this.appendToken(this.wordToken);
|
||||
this.wordToken = this.createWordToken('');
|
||||
}
|
||||
}
|
||||
|
||||
createWord(value, line, row) {
|
||||
if (this.wordToken[Token.TYPE_ID] === '') {
|
||||
if (!this.inWord()) {
|
||||
this.wordToken = this.createWordToken(value, line, row);
|
||||
this.wordIndex = this.index;
|
||||
}
|
||||
}
|
||||
|
||||
flushTag() {
|
||||
if (this.tagToken[Token.TYPE_ID]) {
|
||||
if (this.inTag()) {
|
||||
// [] and [=] tag case
|
||||
if (this.tagToken[Token.VALUE_ID] === '') {
|
||||
if (!this.inTag()) {
|
||||
const value = this.attrValueToken[Token.TYPE_ID] ? getChar(EQ) : '';
|
||||
const word = getChar(OPEN_BRAKET) + value + getChar(CLOSE_BRAKET);
|
||||
|
||||
@@ -103,7 +105,8 @@ class Tokenizer {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.attrNameToken[Token.TYPE_ID] && !this.attrValueToken[Token.TYPE_ID]) {
|
||||
// this.attrNameToken[Token.TYPE_ID] && !this.attrValueToken[Token.TYPE_ID]
|
||||
if (this.inAttrName() && !this.inAttrValue()) {
|
||||
this.tagToken[Token.VALUE_ID] += PLACEHOLDER_SPACE + this.attrNameToken[Token.VALUE_ID];
|
||||
this.attrNameToken = this.dummyToken;
|
||||
}
|
||||
@@ -114,7 +117,7 @@ class Tokenizer {
|
||||
}
|
||||
|
||||
flushUnclosedTag() {
|
||||
if (this.tagToken[Token.TYPE_ID]) {
|
||||
if (this.inTag()) {
|
||||
const value = this.tagToken[Token.VALUE_ID] + (this.attrValueToken[Token.VALUE_ID] ? getChar(EQ) : '');
|
||||
|
||||
this.tagToken[Token.TYPE_ID] = Token.TYPE_WORD;
|
||||
@@ -131,13 +134,13 @@ class Tokenizer {
|
||||
}
|
||||
|
||||
flushAttrNames() {
|
||||
if (this.attrNameToken[Token.TYPE_ID]) {
|
||||
if (this.inAttrName()) {
|
||||
this.attrTokens.push(this.attrNameToken);
|
||||
this.attrNameToken = this.dummyToken;
|
||||
}
|
||||
|
||||
if (this.attrValueToken[Token.TYPE_ID]) {
|
||||
delete this.attrValueToken.quoted;
|
||||
if (this.inAttrValue()) {
|
||||
this.attrValueToken.quoted = null;
|
||||
this.attrTokens.push(this.attrValueToken);
|
||||
this.attrValueToken = this.dummyToken;
|
||||
}
|
||||
@@ -151,9 +154,10 @@ class Tokenizer {
|
||||
}
|
||||
|
||||
charSPACE(charCode) {
|
||||
this.flushWord();
|
||||
const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE;
|
||||
|
||||
this.flushWord();
|
||||
|
||||
if (this.inTag()) {
|
||||
if (this.inAttrValue() && this.attrValueToken.quoted) {
|
||||
this.attrValueToken[Token.VALUE_ID] += spaceCode;
|
||||
@@ -216,7 +220,7 @@ class Tokenizer {
|
||||
this.attrValueToken.quoted &&
|
||||
!isPrevBackslash) {
|
||||
this.flushAttrNames();
|
||||
} else if (this.tagToken[Token.TYPE_ID] === '') {
|
||||
} else if (!this.inTag()) {
|
||||
this.wordToken[Token.VALUE_ID] += getChar(charCode);
|
||||
}
|
||||
|
||||
@@ -258,12 +262,14 @@ class Tokenizer {
|
||||
}
|
||||
|
||||
tokenize() {
|
||||
this.index = 0;
|
||||
while (this.index < this.buffer.length) {
|
||||
const charCode = this.buffer.charCodeAt(this.index);
|
||||
|
||||
(this.charMap[charCode] || this.charMap.default)(charCode);
|
||||
|
||||
this.index += 1;
|
||||
// eslint-disable-next-line no-plusplus
|
||||
++this.index;
|
||||
}
|
||||
|
||||
this.flushWord();
|
||||
@@ -274,16 +280,20 @@ class Tokenizer {
|
||||
return this.tokens;
|
||||
}
|
||||
|
||||
inWord() {
|
||||
return this.wordToken && this.wordToken[Token.TYPE_ID];
|
||||
}
|
||||
|
||||
inTag() {
|
||||
return this.tagToken[Token.TYPE_ID];
|
||||
return this.tagToken && this.tagToken[Token.TYPE_ID];
|
||||
}
|
||||
|
||||
inAttrValue() {
|
||||
return this.attrValueToken[Token.TYPE_ID];
|
||||
return this.attrValueToken && this.attrValueToken[Token.TYPE_ID];
|
||||
}
|
||||
|
||||
inAttrName() {
|
||||
return this.attrNameToken[Token.TYPE_ID];
|
||||
return this.attrNameToken && this.attrNameToken[Token.TYPE_ID];
|
||||
}
|
||||
|
||||
createWordToken(value = '', line = this.colPos, row = this.rowPos) {
|
||||
@@ -317,7 +327,7 @@ class Tokenizer {
|
||||
}
|
||||
|
||||
// warm up tokenizer to elimitate code branches that never execute
|
||||
new Tokenizer('[b param="hello"]Sample text[/b]\n\t[Chorus 2] x html([a. title][, alt][, classes]) x [=] [/y]').tokenize();
|
||||
// new Tokenizer('[b param="hello"]Sample text[/b]\n\t[Chorus 2] x html([a. title][, alt][, classes]) x [=] [/y]').tokenize();
|
||||
|
||||
module.exports = Tokenizer;
|
||||
module.exports.createTokenOfType = createTokenOfType;
|
||||
|
||||
@@ -18,12 +18,16 @@
|
||||
"serialize",
|
||||
"html"
|
||||
],
|
||||
"main": "./lib/index.js",
|
||||
"main": "dist/cjs.js",
|
||||
"module": "dist/esm.js",
|
||||
"browser": "dist/umd.js",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git://github.com/JiLiZART/bbob.git"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "../../node_modules/.bin/rollup -c",
|
||||
"dev": "../../node_modules/.bin/rollup -c -w",
|
||||
"test": "../../node_modules/.bin/jest --",
|
||||
"cover": "../../node_modules/.bin/jest --coverage",
|
||||
"lint": "../../node_modules/.bin/eslint ."
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
import resolve from 'rollup-plugin-node-resolve';
|
||||
import commonjs from 'rollup-plugin-commonjs';
|
||||
import pkg from './package.json';
|
||||
|
||||
export default [
|
||||
// browser-friendly UMD build
|
||||
{
|
||||
input: 'lib/index.js',
|
||||
output: {
|
||||
name: 'BBobParser',
|
||||
file: pkg.browser,
|
||||
format: 'umd',
|
||||
},
|
||||
plugins: [
|
||||
resolve(), // so Rollup can find `ms`
|
||||
commonjs(), // so Rollup can convert `ms` to an ES module
|
||||
],
|
||||
},
|
||||
|
||||
// CommonJS (for Node) and ES module (for bundlers) build.
|
||||
// (We could have three entries in the configuration array
|
||||
// instead of two, but it's quicker to generate multiple
|
||||
// builds from a single configuration where possible, using
|
||||
// an array for the output` option, where we can specify
|
||||
// `file` and `format` for each target)
|
||||
{
|
||||
input: 'lib/index.js',
|
||||
output: [
|
||||
{ file: pkg.main, format: 'cjs' },
|
||||
{ file: pkg.module, format: 'es' },
|
||||
],
|
||||
},
|
||||
];
|
||||
Reference in New Issue
Block a user