From 8832c07646d8299cf0fdef32ff1374442ffd2390 Mon Sep 17 00:00:00 2001 From: Nikolay Kostyurin Date: Sun, 8 Jul 2018 11:59:59 +0200 Subject: [PATCH] speed parser optimization --- .eslintignore | 1 + package-lock.json | 101 ++++++++++++++++++++++++++ package.json | 3 + packages/bbob-parser/lib/Token.js | 9 +-- packages/bbob-parser/lib/Tokenizer.js | 64 +++++++++------- packages/bbob-parser/package.json | 6 +- packages/bbob-parser/rollup.config.js | 33 +++++++++ 7 files changed, 184 insertions(+), 33 deletions(-) create mode 100644 .eslintignore create mode 100644 packages/bbob-parser/rollup.config.js diff --git a/.eslintignore b/.eslintignore new file mode 100644 index 0000000..35c9b40 --- /dev/null +++ b/.eslintignore @@ -0,0 +1 @@ +/packages/**/dist diff --git a/package-lock.json b/package-lock.json index 4a81c4c..7e586a6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,6 +22,18 @@ "js-tokens": "^3.0.0" } }, + "@types/estree": { + "version": "0.0.39", + "resolved": "https://npm.wsmgroup.ru/@types%2festree/-/estree-0.0.39.tgz", + "integrity": "sha512-EYNwp3bU+98cpU4lAWYYL7Zz+2gryWH1qbdDTidVd6hkiR6weksdbMadyXKXNPEkQFhXM+hVO9ZygomHXp+AIw==", + "dev": true + }, + "@types/node": { + "version": "10.5.2", + "resolved": "https://npm.wsmgroup.ru/@types%2fnode/-/node-10.5.2.tgz", + "integrity": "sha512-m9zXmifkZsMHZBOyxZWilMwmTlpC8x5Ty360JKTiXvlXZfBWYpsg9ZZvP/Ye+iZUh+Q+MxDLjItVTWIsfwz+8Q==", + "dev": true + }, "abab": { "version": "1.0.4", "resolved": "https://npm.wsmgroup.ru/abab/-/abab-1.0.4.tgz", @@ -1791,6 +1803,12 @@ "integrity": "sha1-De4/7TH81GlhjOc0IJn8GvoL2xM=", "dev": true }, + "estree-walker": { + "version": "0.5.2", + "resolved": "https://npm.wsmgroup.ru/estree-walker/-/estree-walker-0.5.2.tgz", + "integrity": "sha512-XpCnW/AE10ws/kDAs37cngSkvgIR8aN3G0MS85m7dUpuK2EREo9VJ00uvw6Dg/hXEpfsE1I1TvJOJr+Z+TL+ig==", + "dev": true + }, "esutils": { "version": "2.0.2", "resolved": "https://npm.wsmgroup.ru/esutils/-/esutils-2.0.2.tgz", @@ -3237,6 +3255,12 @@ "is-extglob": "^1.0.0" } }, + "is-module": { + "version": "1.0.0", + "resolved": "https://npm.wsmgroup.ru/is-module/-/is-module-1.0.0.tgz", + "integrity": "sha1-Mlj7afeMFNW4FdZkM2tM/7ZEFZE=", + "dev": true + }, "is-number": { "version": "3.0.0", "resolved": "https://npm.wsmgroup.ru/is-number/-/is-number-3.0.0.tgz", @@ -4226,6 +4250,15 @@ "yallist": "^2.1.2" } }, + "magic-string": { + "version": "0.22.5", + "resolved": "https://npm.wsmgroup.ru/magic-string/-/magic-string-0.22.5.tgz", + "integrity": "sha512-oreip9rJZkzvA8Qzk9HFs8fZGF/u7H/gtrE8EN6RjKJ9kh2HlC+yQ2QezifqTZfGyiuAV0dRv5a+y/8gBb1m9w==", + "dev": true, + "requires": { + "vlq": "^0.2.2" + } + }, "makeerror": { "version": "1.0.11", "resolved": "https://npm.wsmgroup.ru/makeerror/-/makeerror-1.0.11.tgz", @@ -5356,6 +5389,68 @@ "glob": "^7.0.5" } }, + "rollup": { + "version": "0.62.0", + "resolved": "https://npm.wsmgroup.ru/rollup/-/rollup-0.62.0.tgz", + "integrity": "sha512-mZS0aIGfYzuJySJD78znu9/hCJsNfBzg4lDuZGMj0hFVcYHt2evNRHv8aqiu9/w6z6Qn8AQoVl4iyEjDmisGeA==", + "dev": true, + "requires": { + "@types/estree": "0.0.39", + "@types/node": "*" + } + }, + "rollup-plugin-commonjs": { + "version": "9.1.3", + "resolved": "https://npm.wsmgroup.ru/rollup-plugin-commonjs/-/rollup-plugin-commonjs-9.1.3.tgz", + "integrity": "sha512-g91ZZKZwTW7F7vL6jMee38I8coj/Q9GBdTmXXeFL7ldgC1Ky5WJvHgbKlAiXXTh762qvohhExwUgeQGFh9suGg==", + "dev": true, + "requires": { + "estree-walker": "^0.5.1", + "magic-string": "^0.22.4", + "resolve": "^1.5.0", + "rollup-pluginutils": "^2.0.1" + }, + "dependencies": { + "resolve": { + "version": "1.8.1", + "resolved": "https://npm.wsmgroup.ru/resolve/-/resolve-1.8.1.tgz", + "integrity": "sha512-AicPrAC7Qu1JxPCZ9ZgCZlY35QgFnNqc+0LtbRNxnVw4TXvjQ72wnuL9JQcEBgXkI9JM8MsT9kaQoHcpCRJOYA==", + "dev": true, + "requires": { + "path-parse": "^1.0.5" + } + } + } + }, + "rollup-plugin-node-resolve": { + "version": "3.3.0", + "resolved": "https://npm.wsmgroup.ru/rollup-plugin-node-resolve/-/rollup-plugin-node-resolve-3.3.0.tgz", + "integrity": "sha512-9zHGr3oUJq6G+X0oRMYlzid9fXicBdiydhwGChdyeNRGPcN/majtegApRKHLR5drboUvEWU+QeUmGTyEZQs3WA==", + "dev": true, + "requires": { + "builtin-modules": "^2.0.0", + "is-module": "^1.0.0", + "resolve": "^1.1.6" + }, + "dependencies": { + "builtin-modules": { + "version": "2.0.0", + "resolved": "https://npm.wsmgroup.ru/builtin-modules/-/builtin-modules-2.0.0.tgz", + "integrity": "sha512-3U5kUA5VPsRUA3nofm/BXX7GVHKfxz0hOBAPxXrIvHzlDRkQVqEn6yi8QJegxl4LzOHLdvb7XF5dVawa/VVYBg==", + "dev": true + } + } + }, + "rollup-pluginutils": { + "version": "2.3.0", + "resolved": "https://npm.wsmgroup.ru/rollup-pluginutils/-/rollup-pluginutils-2.3.0.tgz", + "integrity": "sha512-xB6hsRsjdJdIYWEyYUJy/3ki5g69wrf0luHPGNK3ZSocV6HLNfio59l3dZ3TL4xUwEKgROhFi9jOCt6c5gfUWw==", + "dev": true, + "requires": { + "estree-walker": "^0.5.2", + "micromatch": "^2.3.11" + } + }, "rsvp": { "version": "3.6.2", "resolved": "https://npm.wsmgroup.ru/rsvp/-/rsvp-3.6.2.tgz", @@ -6355,6 +6450,12 @@ "extsprintf": "^1.2.0" } }, + "vlq": { + "version": "0.2.3", + "resolved": "https://npm.wsmgroup.ru/vlq/-/vlq-0.2.3.tgz", + "integrity": "sha512-DRibZL6DsNhIgYQ+wNdWDL2SL3bKPlVrRiBqV5yuMm++op8W4kGFtaQfCs4KEJn0wBZcHVHJ3eoywX8983k1ow==", + "dev": true + }, "w3c-hr-time": { "version": "1.0.1", "resolved": "https://npm.wsmgroup.ru/w3c-hr-time/-/w3c-hr-time-1.0.1.tgz", diff --git a/package.json b/package.json index 1ffc179..1f0d1eb 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,9 @@ "jest": "^23.1.0", "jsdoc-to-markdown": "^4.0.1", "lerna": "^2.11.0", + "rollup": "^0.62.0", + "rollup-plugin-commonjs": "^9.1.3", + "rollup-plugin-node-resolve": "^3.3.0", "xbbcode-parser": "^0.1.2" } } diff --git a/packages/bbob-parser/lib/Token.js b/packages/bbob-parser/lib/Token.js index a47915c..9f79e13 100644 --- a/packages/bbob-parser/lib/Token.js +++ b/packages/bbob-parser/lib/Token.js @@ -22,11 +22,10 @@ const getTokenValue = token => token[TOKEN_VALUE_ID]; const getTokenLine = token => token[TOKEN_LINE_ID]; const getTokenColumn = token => token[TOKEN_COLUMN_ID]; -const isTextToken = (token) => { - const type = token[TOKEN_TYPE_ID]; - - return type === TOKEN_TYPE_SPACE || type === TOKEN_TYPE_NEW_LINE || type === TOKEN_TYPE_WORD; -}; +const isTextToken = token => + token[TOKEN_TYPE_ID] === TOKEN_TYPE_SPACE || + token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE || + token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD; const isTagToken = token => token[TOKEN_TYPE_ID] === TOKEN_TYPE_TAG; const isTagEnd = token => getTokenValue(token).charCodeAt(0) === SLASH; diff --git a/packages/bbob-parser/lib/Tokenizer.js b/packages/bbob-parser/lib/Tokenizer.js index 6df60b2..6aea150 100644 --- a/packages/bbob-parser/lib/Tokenizer.js +++ b/packages/bbob-parser/lib/Tokenizer.js @@ -15,11 +15,12 @@ class Tokenizer { this.buffer = input; this.colPos = 0; this.rowPos = 0; - this.index = 0; + // eslint-disable-next-line no-bitwise + this.index = 2 ** 32; this.tokenIndex = -1; this.tokens = new Array(Math.floor(this.buffer.length)); - this.dummyToken = createTokenOfType('', '', '', ''); + this.dummyToken = null; // createTokenOfType('', '', '', ''); this.wordToken = this.dummyToken; this.tagToken = this.dummyToken; @@ -30,16 +31,16 @@ class Tokenizer { this.options = options; this.charMap = { - TAB: this.charSPACE.bind(this), - SPACE: this.charSPACE.bind(this), - N: this.charN.bind(this), - OPEN_BRAKET: this.charOPENBRAKET.bind(this), - CLOSE_BRAKET: this.charCLOSEBRAKET.bind(this), - EQ: this.charEQ.bind(this), - QUOTEMARK: this.charQUOTEMARK.bind(this), - BACKSLASH: this.charBACKSLASH.bind(this), + [TAB]: this.charSPACE.bind(this), + [SPACE]: this.charSPACE.bind(this), + [N]: this.charN.bind(this), + [OPEN_BRAKET]: this.charOPENBRAKET.bind(this), + [CLOSE_BRAKET]: this.charCLOSEBRAKET.bind(this), + [EQ]: this.charEQ.bind(this), + [QUOTEMARK]: this.charQUOTEMARK.bind(this), + [BACKSLASH]: this.charBACKSLASH.bind(this), default: this.charWORD.bind(this), - } + }; } emitToken(token) { @@ -72,22 +73,23 @@ class Tokenizer { } flushWord() { - if (this.wordToken[Token.TYPE_ID] && this.wordToken[Token.VALUE_ID]) { + if (this.inWord() && this.wordToken[Token.VALUE_ID]) { this.appendToken(this.wordToken); this.wordToken = this.createWordToken(''); } } createWord(value, line, row) { - if (this.wordToken[Token.TYPE_ID] === '') { + if (!this.inWord()) { this.wordToken = this.createWordToken(value, line, row); + this.wordIndex = this.index; } } flushTag() { - if (this.tagToken[Token.TYPE_ID]) { + if (this.inTag()) { // [] and [=] tag case - if (this.tagToken[Token.VALUE_ID] === '') { + if (!this.inTag()) { const value = this.attrValueToken[Token.TYPE_ID] ? getChar(EQ) : ''; const word = getChar(OPEN_BRAKET) + value + getChar(CLOSE_BRAKET); @@ -103,7 +105,8 @@ class Tokenizer { return; } - if (this.attrNameToken[Token.TYPE_ID] && !this.attrValueToken[Token.TYPE_ID]) { + // this.attrNameToken[Token.TYPE_ID] && !this.attrValueToken[Token.TYPE_ID] + if (this.inAttrName() && !this.inAttrValue()) { this.tagToken[Token.VALUE_ID] += PLACEHOLDER_SPACE + this.attrNameToken[Token.VALUE_ID]; this.attrNameToken = this.dummyToken; } @@ -114,7 +117,7 @@ class Tokenizer { } flushUnclosedTag() { - if (this.tagToken[Token.TYPE_ID]) { + if (this.inTag()) { const value = this.tagToken[Token.VALUE_ID] + (this.attrValueToken[Token.VALUE_ID] ? getChar(EQ) : ''); this.tagToken[Token.TYPE_ID] = Token.TYPE_WORD; @@ -131,13 +134,13 @@ class Tokenizer { } flushAttrNames() { - if (this.attrNameToken[Token.TYPE_ID]) { + if (this.inAttrName()) { this.attrTokens.push(this.attrNameToken); this.attrNameToken = this.dummyToken; } - if (this.attrValueToken[Token.TYPE_ID]) { - delete this.attrValueToken.quoted; + if (this.inAttrValue()) { + this.attrValueToken.quoted = null; this.attrTokens.push(this.attrValueToken); this.attrValueToken = this.dummyToken; } @@ -151,9 +154,10 @@ class Tokenizer { } charSPACE(charCode) { - this.flushWord(); const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE; + this.flushWord(); + if (this.inTag()) { if (this.inAttrValue() && this.attrValueToken.quoted) { this.attrValueToken[Token.VALUE_ID] += spaceCode; @@ -216,7 +220,7 @@ class Tokenizer { this.attrValueToken.quoted && !isPrevBackslash) { this.flushAttrNames(); - } else if (this.tagToken[Token.TYPE_ID] === '') { + } else if (!this.inTag()) { this.wordToken[Token.VALUE_ID] += getChar(charCode); } @@ -258,12 +262,14 @@ class Tokenizer { } tokenize() { + this.index = 0; while (this.index < this.buffer.length) { const charCode = this.buffer.charCodeAt(this.index); (this.charMap[charCode] || this.charMap.default)(charCode); - this.index += 1; + // eslint-disable-next-line no-plusplus + ++this.index; } this.flushWord(); @@ -274,16 +280,20 @@ class Tokenizer { return this.tokens; } + inWord() { + return this.wordToken && this.wordToken[Token.TYPE_ID]; + } + inTag() { - return this.tagToken[Token.TYPE_ID]; + return this.tagToken && this.tagToken[Token.TYPE_ID]; } inAttrValue() { - return this.attrValueToken[Token.TYPE_ID]; + return this.attrValueToken && this.attrValueToken[Token.TYPE_ID]; } inAttrName() { - return this.attrNameToken[Token.TYPE_ID]; + return this.attrNameToken && this.attrNameToken[Token.TYPE_ID]; } createWordToken(value = '', line = this.colPos, row = this.rowPos) { @@ -317,7 +327,7 @@ class Tokenizer { } // warm up tokenizer to elimitate code branches that never execute -new Tokenizer('[b param="hello"]Sample text[/b]\n\t[Chorus 2] x html([a. title][, alt][, classes]) x [=] [/y]').tokenize(); +// new Tokenizer('[b param="hello"]Sample text[/b]\n\t[Chorus 2] x html([a. title][, alt][, classes]) x [=] [/y]').tokenize(); module.exports = Tokenizer; module.exports.createTokenOfType = createTokenOfType; diff --git a/packages/bbob-parser/package.json b/packages/bbob-parser/package.json index dee63fb..0adacda 100644 --- a/packages/bbob-parser/package.json +++ b/packages/bbob-parser/package.json @@ -18,12 +18,16 @@ "serialize", "html" ], - "main": "./lib/index.js", + "main": "dist/cjs.js", + "module": "dist/esm.js", + "browser": "dist/umd.js", "repository": { "type": "git", "url": "git://github.com/JiLiZART/bbob.git" }, "scripts": { + "build": "../../node_modules/.bin/rollup -c", + "dev": "../../node_modules/.bin/rollup -c -w", "test": "../../node_modules/.bin/jest --", "cover": "../../node_modules/.bin/jest --coverage", "lint": "../../node_modules/.bin/eslint ." diff --git a/packages/bbob-parser/rollup.config.js b/packages/bbob-parser/rollup.config.js new file mode 100644 index 0000000..f288c3f --- /dev/null +++ b/packages/bbob-parser/rollup.config.js @@ -0,0 +1,33 @@ +import resolve from 'rollup-plugin-node-resolve'; +import commonjs from 'rollup-plugin-commonjs'; +import pkg from './package.json'; + +export default [ + // browser-friendly UMD build + { + input: 'lib/index.js', + output: { + name: 'BBobParser', + file: pkg.browser, + format: 'umd', + }, + plugins: [ + resolve(), // so Rollup can find `ms` + commonjs(), // so Rollup can convert `ms` to an ES module + ], + }, + + // CommonJS (for Node) and ES module (for bundlers) build. + // (We could have three entries in the configuration array + // instead of two, but it's quicker to generate multiple + // builds from a single configuration where possible, using + // an array for the output` option, where we can specify + // `file` and `format` for each target) + { + input: 'lib/index.js', + output: [ + { file: pkg.main, format: 'cjs' }, + { file: pkg.module, format: 'es' }, + ], + }, +];