mirror of
https://github.com/tenrok/BBob.git
synced 2026-05-15 11:59:37 +03:00
initial
This commit is contained in:
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"lerna": "2.11.0",
|
||||
"packages": [
|
||||
"packages/bbob-parser"
|
||||
],
|
||||
"version": "independent"
|
||||
}
|
||||
Generated
+255
@@ -0,0 +1,255 @@
|
||||
{
|
||||
"name": "bibob",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
"assertion-error": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/assertion-error/-/assertion-error-1.1.0.tgz",
|
||||
"integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==",
|
||||
"dev": true
|
||||
},
|
||||
"balanced-match": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/balanced-match/-/balanced-match-1.0.0.tgz",
|
||||
"integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=",
|
||||
"dev": true
|
||||
},
|
||||
"brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://npm.wsmgroup.ru/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
}
|
||||
},
|
||||
"browser-stdout": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://npm.wsmgroup.ru/browser-stdout/-/browser-stdout-1.3.1.tgz",
|
||||
"integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==",
|
||||
"dev": true
|
||||
},
|
||||
"chai": {
|
||||
"version": "4.1.2",
|
||||
"resolved": "https://npm.wsmgroup.ru/chai/-/chai-4.1.2.tgz",
|
||||
"integrity": "sha1-D2RYS6ZC8PKs4oBiefTwbKI61zw=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"assertion-error": "^1.0.1",
|
||||
"check-error": "^1.0.1",
|
||||
"deep-eql": "^3.0.0",
|
||||
"get-func-name": "^2.0.0",
|
||||
"pathval": "^1.0.0",
|
||||
"type-detect": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"check-error": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://npm.wsmgroup.ru/check-error/-/check-error-1.0.2.tgz",
|
||||
"integrity": "sha1-V00xLt2Iu13YkS6Sht1sCu1KrII=",
|
||||
"dev": true
|
||||
},
|
||||
"commander": {
|
||||
"version": "2.15.1",
|
||||
"resolved": "https://npm.wsmgroup.ru/commander/-/commander-2.15.1.tgz",
|
||||
"integrity": "sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag==",
|
||||
"dev": true
|
||||
},
|
||||
"concat-map": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://npm.wsmgroup.ru/concat-map/-/concat-map-0.0.1.tgz",
|
||||
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
|
||||
"dev": true
|
||||
},
|
||||
"debug": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/debug/-/debug-3.1.0.tgz",
|
||||
"integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"ms": "2.0.0"
|
||||
}
|
||||
},
|
||||
"deep-eql": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://npm.wsmgroup.ru/deep-eql/-/deep-eql-3.0.1.tgz",
|
||||
"integrity": "sha512-+QeIQyN5ZuO+3Uk5DYh6/1eKO0m0YmJFGNmFHGACpf1ClL1nmlV/p4gNgbl2pJGxgXb4faqo6UE+M5ACEMyVcw==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"type-detect": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"diff": {
|
||||
"version": "3.5.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/diff/-/diff-3.5.0.tgz",
|
||||
"integrity": "sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==",
|
||||
"dev": true
|
||||
},
|
||||
"escape-string-regexp": {
|
||||
"version": "1.0.5",
|
||||
"resolved": "https://npm.wsmgroup.ru/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
|
||||
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
|
||||
"dev": true
|
||||
},
|
||||
"fs.realpath": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/fs.realpath/-/fs.realpath-1.0.0.tgz",
|
||||
"integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=",
|
||||
"dev": true
|
||||
},
|
||||
"get-func-name": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/get-func-name/-/get-func-name-2.0.0.tgz",
|
||||
"integrity": "sha1-6td0q+5y4gQJQzoGY2YCPdaIekE=",
|
||||
"dev": true
|
||||
},
|
||||
"glob": {
|
||||
"version": "7.1.2",
|
||||
"resolved": "https://npm.wsmgroup.ru/glob/-/glob-7.1.2.tgz",
|
||||
"integrity": "sha512-MJTUg1kjuLeQCJ+ccE4Vpa6kKVXkPYJ2mOCQyUuKLcLQsdrMCpBPUi8qVE6+YuaJkozeA9NusTAw3hLr8Xe5EQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"fs.realpath": "^1.0.0",
|
||||
"inflight": "^1.0.4",
|
||||
"inherits": "2",
|
||||
"minimatch": "^3.0.4",
|
||||
"once": "^1.3.0",
|
||||
"path-is-absolute": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"growl": {
|
||||
"version": "1.10.5",
|
||||
"resolved": "https://npm.wsmgroup.ru/growl/-/growl-1.10.5.tgz",
|
||||
"integrity": "sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==",
|
||||
"dev": true
|
||||
},
|
||||
"has-flag": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/has-flag/-/has-flag-3.0.0.tgz",
|
||||
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=",
|
||||
"dev": true
|
||||
},
|
||||
"he": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://npm.wsmgroup.ru/he/-/he-1.1.1.tgz",
|
||||
"integrity": "sha1-k0EP0hsAlzUVH4howvJx80J+I/0=",
|
||||
"dev": true
|
||||
},
|
||||
"inflight": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://npm.wsmgroup.ru/inflight/-/inflight-1.0.6.tgz",
|
||||
"integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"once": "^1.3.0",
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"inherits": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://npm.wsmgroup.ru/inherits/-/inherits-2.0.3.tgz",
|
||||
"integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=",
|
||||
"dev": true
|
||||
},
|
||||
"minimatch": {
|
||||
"version": "3.0.4",
|
||||
"resolved": "https://npm.wsmgroup.ru/minimatch/-/minimatch-3.0.4.tgz",
|
||||
"integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
}
|
||||
},
|
||||
"minimist": {
|
||||
"version": "0.0.8",
|
||||
"resolved": "https://npm.wsmgroup.ru/minimist/-/minimist-0.0.8.tgz",
|
||||
"integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=",
|
||||
"dev": true
|
||||
},
|
||||
"mkdirp": {
|
||||
"version": "0.5.1",
|
||||
"resolved": "https://npm.wsmgroup.ru/mkdirp/-/mkdirp-0.5.1.tgz",
|
||||
"integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"minimist": "0.0.8"
|
||||
}
|
||||
},
|
||||
"mocha": {
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/mocha/-/mocha-5.2.0.tgz",
|
||||
"integrity": "sha512-2IUgKDhc3J7Uug+FxMXuqIyYzH7gJjXECKe/w43IGgQHTSj3InJi+yAA7T24L9bQMRKiUEHxEX37G5JpVUGLcQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"browser-stdout": "1.3.1",
|
||||
"commander": "2.15.1",
|
||||
"debug": "3.1.0",
|
||||
"diff": "3.5.0",
|
||||
"escape-string-regexp": "1.0.5",
|
||||
"glob": "7.1.2",
|
||||
"growl": "1.10.5",
|
||||
"he": "1.1.1",
|
||||
"minimatch": "3.0.4",
|
||||
"mkdirp": "0.5.1",
|
||||
"supports-color": "5.4.0"
|
||||
}
|
||||
},
|
||||
"ms": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/ms/-/ms-2.0.0.tgz",
|
||||
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=",
|
||||
"dev": true
|
||||
},
|
||||
"once": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/once/-/once-1.4.0.tgz",
|
||||
"integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"path-is-absolute": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://npm.wsmgroup.ru/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
|
||||
"integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=",
|
||||
"dev": true
|
||||
},
|
||||
"pathval": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/pathval/-/pathval-1.1.0.tgz",
|
||||
"integrity": "sha1-uULm1L3mUwBe9rcTYd74cn0GReA=",
|
||||
"dev": true
|
||||
},
|
||||
"supports-color": {
|
||||
"version": "5.4.0",
|
||||
"resolved": "https://npm.wsmgroup.ru/supports-color/-/supports-color-5.4.0.tgz",
|
||||
"integrity": "sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"has-flag": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"type-detect": {
|
||||
"version": "4.0.8",
|
||||
"resolved": "https://npm.wsmgroup.ru/type-detect/-/type-detect-4.0.8.tgz",
|
||||
"integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==",
|
||||
"dev": true
|
||||
},
|
||||
"typescript": {
|
||||
"version": "2.9.1",
|
||||
"resolved": "https://npm.wsmgroup.ru/typescript/-/typescript-2.9.1.tgz",
|
||||
"integrity": "sha512-h6pM2f/GDchCFlldnriOhs1QHuwbnmj6/v7499eMHqPeW4V2G0elua2eIc2nu8v2NdHV0Gm+tzX83Hr6nUFjQA==",
|
||||
"dev": true
|
||||
},
|
||||
"wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://npm.wsmgroup.ru/wrappy/-/wrappy-1.0.2.tgz",
|
||||
"integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"devDependencies": {
|
||||
"lerna": "^2.11.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
package-lock.json
|
||||
coverage
|
||||
lib
|
||||
dist
|
||||
@@ -0,0 +1,182 @@
|
||||
const Tokenizer = require("./Tokenizer");
|
||||
const TokenType = Tokenizer.TYPE;
|
||||
const TokenChar = Tokenizer.CHAR;
|
||||
const getCharCode = Tokenizer.getCharCode;
|
||||
|
||||
const isTextToken = (token) => {
|
||||
const type = token[Tokenizer.TOKEN.TYPE_ID];
|
||||
|
||||
return type === TokenType.SPACE || type === TokenType.NEW_LINE || type === TokenType.WORD
|
||||
};
|
||||
|
||||
const isTagToken = (token) => token[Tokenizer.TOKEN.TYPE_ID] === TokenType.TAG;
|
||||
|
||||
const isTagStart = (token) => !isTagEnd(token);
|
||||
|
||||
const isTagEnd = (token) => getTokenValue(token).charCodeAt(0) === TokenChar.SLASH;
|
||||
|
||||
const isAttrNameToken = (token) => token[Tokenizer.TOKEN.TYPE_ID] === TokenType.ATTR_NAME;
|
||||
|
||||
const isAttrValueToken = (token) => token[Tokenizer.TOKEN.TYPE_ID] === TokenType.ATTR_VALUE;
|
||||
|
||||
const getTagName = (token) => {
|
||||
const value = getTokenValue(token);
|
||||
|
||||
return isTagEnd(token) ? value.slice(1) : value
|
||||
};
|
||||
|
||||
const convertTagToText = (token) => {
|
||||
let text = getCharCode(TokenChar.OPEN_BRAKET);
|
||||
|
||||
if (isTagEnd(token)) {
|
||||
text += getCharCode(TokenChar.SLASH)
|
||||
}
|
||||
|
||||
text += getTokenValue(token);
|
||||
text += getCharCode(TokenChar.CLOSE_BRAKET);
|
||||
|
||||
return text
|
||||
};
|
||||
|
||||
const getTokenValue = (token) => token[Tokenizer.TOKEN.VALUE_ID];
|
||||
|
||||
const createTagNode = (name, attrs = {}, content = []) => ({tag: name, attrs, content});
|
||||
|
||||
/**
|
||||
*
|
||||
{
|
||||
tag: 'div',
|
||||
attrs: {
|
||||
class: 'foo'
|
||||
},
|
||||
content: ['hello world!']
|
||||
}
|
||||
*/
|
||||
module.exports = class Parser {
|
||||
constructor(tokens, options = {}) {
|
||||
this.tokens = tokens;
|
||||
this.options = options
|
||||
}
|
||||
|
||||
parse() {
|
||||
const tokens = this.tokens;
|
||||
const nodes = [];
|
||||
const nestedNodes = [];
|
||||
const curTags = [];
|
||||
const curTagsAttrName = [];
|
||||
|
||||
const getCurTag = () => {
|
||||
if (curTags.length) {
|
||||
return curTags[curTags.length - 1]
|
||||
}
|
||||
|
||||
return null
|
||||
};
|
||||
|
||||
const createCurTag = (token) => {
|
||||
curTags.push(createTagNode(getTokenValue(token)))
|
||||
};
|
||||
|
||||
const clearCurTag = () => {
|
||||
if (curTags.length) {
|
||||
curTags.pop();
|
||||
|
||||
clearCurTagAttrName()
|
||||
}
|
||||
};
|
||||
|
||||
const getCurTagAttrName = () => {
|
||||
if (curTagsAttrName.length) {
|
||||
return curTagsAttrName[curTagsAttrName.length - 1]
|
||||
}
|
||||
|
||||
return null
|
||||
};
|
||||
|
||||
const createCurTagAttrName = (token) => {
|
||||
curTagsAttrName.push(getTokenValue(token))
|
||||
};
|
||||
|
||||
const clearCurTagAttrName = () => {
|
||||
if (curTagsAttrName.length) {
|
||||
curTagsAttrName.pop()
|
||||
}
|
||||
};
|
||||
|
||||
const getNodes = () => {
|
||||
if (nestedNodes.length) {
|
||||
const nestedNode = nestedNodes[nestedNodes.length - 1];
|
||||
return nestedNode.content
|
||||
}
|
||||
|
||||
return nodes
|
||||
};
|
||||
|
||||
let token;
|
||||
while (token = tokens.shift()) {
|
||||
if (!token) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isTagToken(token)) {
|
||||
if (this.isAllowedTag(getTagName(token))) {
|
||||
// [tag]
|
||||
if (isTagStart(token)) {
|
||||
createCurTag(token);
|
||||
|
||||
if (this.isCloseTag(getTokenValue(token))) {
|
||||
nestedNodes.push(getCurTag())
|
||||
} else {
|
||||
getNodes().push(getCurTag());
|
||||
clearCurTag()
|
||||
}
|
||||
}
|
||||
|
||||
// [/tag]
|
||||
if (isTagEnd(token)) {
|
||||
clearCurTag();
|
||||
|
||||
const lastNestedNode = nestedNodes.pop();
|
||||
|
||||
if (lastNestedNode) {
|
||||
getNodes().push(lastNestedNode)
|
||||
} else {
|
||||
debugger;
|
||||
console.warn(`Inconsistent tag '${getTokenValue(token)}'`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
getNodes().push(convertTagToText(token))
|
||||
}
|
||||
}
|
||||
|
||||
if (getCurTag()) {
|
||||
if (isAttrNameToken(token)) {
|
||||
createCurTagAttrName(token);
|
||||
getCurTag().attrs[getCurTagAttrName()] = null
|
||||
} else if (isAttrValueToken(token)) {
|
||||
getCurTag().attrs[getCurTagAttrName()] = getTokenValue(token);
|
||||
clearCurTagAttrName()
|
||||
} else if (isTextToken(token)) {
|
||||
getCurTag().content.push(getTokenValue(token))
|
||||
}
|
||||
} else if (isTextToken(token)) {
|
||||
getNodes().push(getTokenValue(token))
|
||||
}
|
||||
}
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
isCloseTag(value) {
|
||||
return this.options.closableTags && this.options.closableTags.indexOf(value) >= 0
|
||||
}
|
||||
|
||||
isAllowedTag(value) {
|
||||
if (this.options.allowOnlyTags && this.options.allowOnlyTags.length) {
|
||||
return this.options.allowOnlyTags.indexOf(value) >= 0
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,12 @@
|
||||
const Parser = require('./Parser');
|
||||
const TOKEN = require('./token');
|
||||
|
||||
describe("Parser", () => {
|
||||
test("parse paired tags tokens", () => {
|
||||
const parser = new Parser([
|
||||
[TOKEN.TYPE_TAG, 'ch'],
|
||||
[TOKEN.TYPE_TAG, '/ch']
|
||||
]);
|
||||
|
||||
})
|
||||
});
|
||||
@@ -0,0 +1,214 @@
|
||||
const CHAR = require('./char');
|
||||
const TOKEN = require('./token');
|
||||
|
||||
// const TOKEN.TYPE_ID = 0;
|
||||
// const TOKEN.VALUE_ID = 1;
|
||||
// const TOKEN.LINE_ID = 2;
|
||||
// const TOKEN.COLUMN_ID = 3;
|
||||
//
|
||||
// const TOKEN.TYPE_WORD = 'word';
|
||||
// const TOKEN.TYPE_TAG = 'tag';
|
||||
// const TOKEN.TYPE_ATTR_NAME = 'attr-name';
|
||||
// const TOKEN.TYPE_ATTR_VALUE = 'attr-value';
|
||||
// const TOKEN.TYPE_SPACE = 'space';
|
||||
// const TOKEN.TYPE_NEW_LINE = 'new-line';
|
||||
|
||||
const getCharCode = String.fromCharCode;
|
||||
|
||||
class Tokenizer {
|
||||
constructor(input) {
|
||||
this.buffer = input;
|
||||
this.colPos = 0;
|
||||
this.rowPos = 0;
|
||||
this.index = 0;
|
||||
}
|
||||
|
||||
tokenize() {
|
||||
let wordToken = this.createWordToken('');
|
||||
let tagToken = null;
|
||||
let attrNameToken = null;
|
||||
let attrValueToken = null;
|
||||
let attrTokens = [];
|
||||
let tokens = new Array(Math.floor(this.buffer.length / 2));
|
||||
let tokenIndex = -1;
|
||||
|
||||
const flushWord = () => {
|
||||
if (wordToken[TOKEN.VALUE_ID]) {
|
||||
tokenIndex++;
|
||||
tokens[tokenIndex] = wordToken;
|
||||
wordToken = this.createWordToken('')
|
||||
}
|
||||
};
|
||||
|
||||
const flushTag = () => {
|
||||
if (tagToken !== null) {
|
||||
tokenIndex++;
|
||||
tokens[tokenIndex] = tagToken;
|
||||
tagToken = null;
|
||||
}
|
||||
};
|
||||
|
||||
const flushAttrName = () => {
|
||||
if (attrNameToken) {
|
||||
attrTokens.push(attrNameToken);
|
||||
attrNameToken = null;
|
||||
}
|
||||
};
|
||||
|
||||
const flushAttrValue = () => {
|
||||
if (attrValueToken) {
|
||||
attrTokens.push(attrValueToken);
|
||||
attrValueToken = null
|
||||
}
|
||||
};
|
||||
|
||||
const flushAttrs = () => {
|
||||
if (attrTokens.length) {
|
||||
attrTokens.forEach(attrToken => {
|
||||
tokenIndex++;
|
||||
tokens[tokenIndex] = attrToken
|
||||
});
|
||||
|
||||
attrTokens = [];
|
||||
}
|
||||
};
|
||||
|
||||
// console.time('Lexer.tokenize');
|
||||
|
||||
while (this.index < this.buffer.length) {
|
||||
const charCode = this.buffer.charCodeAt(this.index);
|
||||
|
||||
switch (charCode) {
|
||||
case CHAR.TAB:
|
||||
case CHAR.SPACE:
|
||||
flushWord();
|
||||
|
||||
if (tagToken) {
|
||||
attrNameToken = this.createAttrNameToken('');
|
||||
}
|
||||
|
||||
const spaceCode = charCode === CHAR.TAB ? ' ' : ' ';
|
||||
|
||||
tokenIndex++;
|
||||
tokens[tokenIndex] = this.createSpaceToken(spaceCode);
|
||||
|
||||
this.colPos++;
|
||||
break;
|
||||
|
||||
case CHAR.N:
|
||||
flushWord();
|
||||
tokenIndex++;
|
||||
tokens[tokenIndex] = this.createNewLineToken(getCharCode(charCode));
|
||||
|
||||
this.rowPos++;
|
||||
this.colPos = 0;
|
||||
break;
|
||||
|
||||
case CHAR.OPEN_BRAKET:
|
||||
flushWord();
|
||||
tagToken = this.createTagToken('');
|
||||
|
||||
this.colPos++;
|
||||
break;
|
||||
|
||||
case CHAR.CLOSE_BRAKET:
|
||||
flushTag();
|
||||
flushAttrName();
|
||||
flushAttrValue();
|
||||
flushAttrs();
|
||||
|
||||
this.colPos++;
|
||||
break;
|
||||
|
||||
case CHAR.EQ:
|
||||
if (tagToken) {
|
||||
attrValueToken = this.createAttrValueToken('')
|
||||
} else {
|
||||
wordToken[TOKEN.VALUE_ID] += getCharCode(charCode);
|
||||
}
|
||||
|
||||
this.colPos++;
|
||||
break;
|
||||
|
||||
case CHAR.QUOTEMARK:
|
||||
if (attrValueToken && attrValueToken[TOKEN.VALUE_ID] > 0) {
|
||||
flushAttrName();
|
||||
flushAttrValue();
|
||||
} else if (tagToken === null) {
|
||||
wordToken[TOKEN.VALUE_ID] += getCharCode(charCode);
|
||||
}
|
||||
|
||||
this.colPos++;
|
||||
break;
|
||||
|
||||
default:
|
||||
if (tagToken && attrValueToken) {
|
||||
attrValueToken[TOKEN.VALUE_ID] += getCharCode(charCode)
|
||||
} else if (tagToken && attrNameToken) {
|
||||
attrNameToken[TOKEN.VALUE_ID] += getCharCode(charCode)
|
||||
} else if (tagToken) {
|
||||
tagToken[TOKEN.VALUE_ID] += getCharCode(charCode)
|
||||
} else {
|
||||
wordToken[TOKEN.VALUE_ID] += getCharCode(charCode);
|
||||
}
|
||||
|
||||
this.colPos++;
|
||||
break;
|
||||
}
|
||||
|
||||
this.index++;
|
||||
}
|
||||
|
||||
flushWord();
|
||||
|
||||
tokens.length = tokenIndex;
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
createWordToken(value) {
|
||||
return [TOKEN.TYPE_WORD, value, this.colPos, this.rowPos]
|
||||
}
|
||||
|
||||
createTagToken(value) {
|
||||
return [TOKEN.TYPE_TAG, value, this.colPos, this.rowPos]
|
||||
}
|
||||
|
||||
createAttrNameToken(value) {
|
||||
return [TOKEN.TYPE_ATTR_NAME, value, this.colPos, this.rowPos]
|
||||
}
|
||||
|
||||
createAttrValueToken(value) {
|
||||
return [TOKEN.TYPE_ATTR_VALUE, value, this.colPos, this.rowPos]
|
||||
}
|
||||
|
||||
createSpaceToken(value) {
|
||||
return [TOKEN.TYPE_SPACE, value, this.colPos, this.rowPos]
|
||||
}
|
||||
|
||||
createNewLineToken(value) {
|
||||
return [TOKEN.TYPE_NEW_LINE, value, this.colPos, this.rowPos]
|
||||
}
|
||||
}
|
||||
|
||||
// warm up tokenizer to elimitate code branches that never execute
|
||||
new Tokenizer(`[b param="hello"]Sample text[/b]\n\t[Chorus]`).tokenize();
|
||||
|
||||
module.exports = Tokenizer;
|
||||
module.exports.CHAR = CHAR;
|
||||
module.exports.TYPE = {
|
||||
WORD: TOKEN.TYPE_WORD,
|
||||
TAG: TOKEN.TYPE_TAG,
|
||||
ATTR_NAME: TOKEN.TYPE_ATTR_NAME,
|
||||
ATTR_VALUE: TOKEN.TYPE_ATTR_VALUE,
|
||||
SPACE: TOKEN.TYPE_SPACE,
|
||||
NEW_LINE: TOKEN.TYPE_NEW_LINE,
|
||||
};
|
||||
module.exports.TOKEN = {
|
||||
TYPE_ID: TOKEN.TYPE_ID,
|
||||
VALUE_ID: TOKEN.VALUE_ID,
|
||||
LINE_ID: TOKEN.LINE_ID,
|
||||
COLUMN_ID: TOKEN.COLUMN_ID,
|
||||
};
|
||||
module.exports.getCharCode = getCharCode;
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
const Tokenizer = require('./Tokenizer');
|
||||
|
||||
describe("Tokenizer", () => {
|
||||
it("tokenize single tag", () => {
|
||||
const input = `[SingleTag]`;
|
||||
|
||||
const tokens = new Tokenizer(input).tokenize();
|
||||
|
||||
console.log('tokens', tokens);
|
||||
|
||||
expect(tokens).toBeInstanceOf(Array);
|
||||
expect(tokens[0]).toEqual(['tag', 'SingleTag', 0, 0])
|
||||
})
|
||||
});
|
||||
@@ -0,0 +1,215 @@
|
||||
|
||||
const attrNameChars = '[a-zA-Z0-9\\.\\-_:;/]'
|
||||
const attrValueChars = '[a-zA-Z0-9\\.\\-_:;#/\\s]'
|
||||
const pattern = `\\[(\/\\w*)\\]|\\[(\\w*)+(=(["])${attrValueChars}*\\4)?( (${attrNameChars}+)?=(["])(${attrValueChars}+)\\7)*\\]`
|
||||
|
||||
const TAG_RE = new RegExp(pattern, 'g')
|
||||
|
||||
const EOL = '\n'
|
||||
const WHITESPACE = ' '
|
||||
const isNode = el => typeof el === 'object' && el.tag
|
||||
const isStringNode = el => typeof el === 'string'
|
||||
const isChordNode = el => el.tag === 'ch'
|
||||
const isTabNode = el => el.tag === 'tab'
|
||||
const isSyllableNode = el => el.tag === 'syllable'
|
||||
const isTextNode = el => el.tag === 'text'
|
||||
const isEOL = el => el === EOL
|
||||
|
||||
const getNodeLength = node => {
|
||||
if (isNode(node)) {
|
||||
node.content.reduce((count, contentNode) => count + getNodeLength(contentNode), 0)
|
||||
} else if (isStringNode(node)) {
|
||||
return node.length
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
const tagsDefinition = {
|
||||
ch: {
|
||||
closable: true,
|
||||
},
|
||||
syllable: {
|
||||
closable: true,
|
||||
},
|
||||
tab: {
|
||||
closable: true,
|
||||
},
|
||||
}
|
||||
|
||||
// @TODO: Разбить на парсер и токенайзер, ноды и токены должны жить отдельно
|
||||
/**
|
||||
* Парсит контент таба с BB кодами в AST дерево [{tag:'ch', attrs:{..}, content:[...]}]
|
||||
*
|
||||
* @example
|
||||
*
|
||||
* textTabParser
|
||||
* .parse('[Intro] [ch app=123]G[/ch] hello world', {ch: {closable: true}})
|
||||
*
|
||||
*/
|
||||
module.exports = {
|
||||
parse(str, tags = tagsDefinition) {
|
||||
this.tags = tags
|
||||
|
||||
const tokens = this.tokenize(str)
|
||||
const ast = this.parseTokens(tokens)
|
||||
|
||||
return ast
|
||||
},
|
||||
|
||||
tokenize(str) {
|
||||
let tokens = []
|
||||
let match
|
||||
let lastIndex = 0
|
||||
|
||||
// console.time('tokenize')
|
||||
while (match = TAG_RE.exec(str)) {
|
||||
const delta = match.index - lastIndex
|
||||
|
||||
if (delta > 0) {
|
||||
tokens = tokens.concat(this.toTextTokens(str.substr(lastIndex, delta)))
|
||||
}
|
||||
|
||||
tokens.push(this.tagToken(match))
|
||||
lastIndex = TAG_RE.lastIndex
|
||||
}
|
||||
|
||||
const delta = str.length - lastIndex
|
||||
|
||||
if (delta > 0) {
|
||||
tokens = tokens.concat(this.toTextTokens(str.substr(lastIndex, delta)))
|
||||
}
|
||||
// console.timeEnd('tokenize')
|
||||
|
||||
return tokens
|
||||
},
|
||||
|
||||
parseTokens(tokens) {
|
||||
const nodes = []
|
||||
let curToken
|
||||
const nestedNodes = []
|
||||
|
||||
function getNodes() {
|
||||
if (nestedNodes.length) {
|
||||
const nestedNode = nestedNodes[nestedNodes.length - 1]
|
||||
return nestedNode.content
|
||||
}
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
// console.time('parseTokens')
|
||||
while (curToken = tokens.shift()) {
|
||||
curToken = this.isTokenSupported(curToken) ? curToken : this.asTextToken(curToken)
|
||||
|
||||
if (curToken.isText) {
|
||||
getNodes().push(curToken.text)
|
||||
}
|
||||
|
||||
if (curToken.isTag) {
|
||||
const node = this.tagNode(curToken.tagName, curToken.attributes)
|
||||
|
||||
if (curToken.isStart) {
|
||||
if (this.isTokenHasCloseTag(curToken)) {
|
||||
nestedNodes.push(node)
|
||||
} else {
|
||||
getNodes().push(node)
|
||||
}
|
||||
}
|
||||
|
||||
if (curToken.isEnd) {
|
||||
const lastNestedNode = nestedNodes.pop()
|
||||
|
||||
if (lastNestedNode) {
|
||||
getNodes().push(lastNestedNode)
|
||||
} else {
|
||||
console.error(`Inconsistent tag '${curToken.tagName}'`)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// console.timeEnd('parseTokens')
|
||||
|
||||
return nodes
|
||||
},
|
||||
|
||||
isTokenSupported(token) {
|
||||
return token.isTag && this.tags && this.tags[token.tagName]
|
||||
},
|
||||
|
||||
isTokenHasCloseTag(token) {
|
||||
return this.tags && this.tags[token.tagName] && this.tags[token.tagName].closable
|
||||
},
|
||||
|
||||
tagNode(name, attrs, content = []) {
|
||||
return { tag: name, attrs, content }
|
||||
},
|
||||
|
||||
toTextTokens(text) {
|
||||
const tokens = []
|
||||
const chars = text.split('')
|
||||
let currText = ''
|
||||
|
||||
const flushText = () => {
|
||||
if (currText) {
|
||||
tokens.push(this.textToken(currText))
|
||||
currText = ''
|
||||
}
|
||||
}
|
||||
|
||||
chars.forEach((char) => {
|
||||
if (char === EOL || char === WHITESPACE) {
|
||||
flushText()
|
||||
tokens.push(this.textToken(char))
|
||||
} else {
|
||||
currText += char
|
||||
}
|
||||
})
|
||||
|
||||
if (currText) {
|
||||
tokens.push(this.textToken(currText))
|
||||
}
|
||||
|
||||
return tokens
|
||||
},
|
||||
|
||||
textToken(text) {
|
||||
return { isText: true, text }
|
||||
},
|
||||
|
||||
tagToken(match) {
|
||||
if (typeof match[1] === 'undefined') { // Start tag
|
||||
const tagName = match[2]
|
||||
const attributes = {}
|
||||
const ATTR_RE = new RegExp(`(${attrNameChars}+)?=(["])(${attrValueChars}+)\\2`, 'g')
|
||||
const attrStr = match[0].substr(1 + tagName.length, match[0].length - 2 - tagName.length)
|
||||
|
||||
let attrMatch
|
||||
|
||||
while (attrMatch = ATTR_RE.exec(attrStr)) {
|
||||
if (typeof attrMatch[1] === 'undefined') { // The tag attribute
|
||||
attributes[tagName] = attrMatch[3]
|
||||
} else { // Normal attribute
|
||||
attributes[attrMatch[1]] = attrMatch[3]
|
||||
}
|
||||
}
|
||||
|
||||
return { isStart: true, isTag: true, tagName, attributes, text: match[0] }
|
||||
}
|
||||
|
||||
// End tag
|
||||
return { isEnd: true, isTag: true, tagName: match[1].substr(1, match[1].length - 1) }
|
||||
},
|
||||
|
||||
asTextToken(token) {
|
||||
if (token.isTag && token.isStart) {
|
||||
return this.textToken(token.text)
|
||||
}
|
||||
|
||||
if (token.isTag && token.isEnd) {
|
||||
return this.textToken(`[/${token.tagName}]`)
|
||||
}
|
||||
|
||||
return token
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
const parse = require('../index');
|
||||
|
||||
const options = {
|
||||
closableTags: ['ch', 'syllable', 'tab']
|
||||
};
|
||||
|
||||
const textStub = require("./test/stub");
|
||||
|
||||
const count = 10;
|
||||
const parsers3 = [];
|
||||
|
||||
console.time('newParser');
|
||||
for (let i = 0; i <= count; i++) {
|
||||
const parser3 = parse(textStub, options);
|
||||
|
||||
parsers3.push(parser3);
|
||||
}
|
||||
console.timeEnd('newParser');
|
||||
// console.log(JSON.stringify(parsers3));
|
||||
@@ -0,0 +1,15 @@
|
||||
const OldParser = require('./OldParser')
|
||||
|
||||
const textStub = require("./test/stub");
|
||||
|
||||
const count = 10;
|
||||
const oldParsers3 = [];
|
||||
console.time('oldParser');
|
||||
for (let i = 0; i <= count; i++) {
|
||||
const oldParser3 = OldParser.parse(textStub);
|
||||
|
||||
oldParsers3.push(oldParser3);
|
||||
}
|
||||
console.timeEnd('oldParser');
|
||||
// console.log(JSON.stringify(oldParsers3));
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,26 @@
|
||||
const N = "\n".charCodeAt(0);
|
||||
const TAB = "\t".charCodeAt(0);
|
||||
const F = "\f".charCodeAt(0);
|
||||
const R = "\r".charCodeAt(0);
|
||||
|
||||
const EQ = "=".charCodeAt(0);
|
||||
const QUOTEMARK = "\"".charCodeAt(0);
|
||||
const SPACE = " ".charCodeAt(0);
|
||||
|
||||
const OPEN_BRAKET = "[".charCodeAt(0);
|
||||
const CLOSE_BRAKET = "]".charCodeAt(0);
|
||||
|
||||
const SLASH = "/".charCodeAt(0);
|
||||
|
||||
module.exports = {
|
||||
N,
|
||||
F,
|
||||
R,
|
||||
TAB,
|
||||
EQ,
|
||||
QUOTEMARK,
|
||||
SPACE,
|
||||
OPEN_BRAKET,
|
||||
CLOSE_BRAKET,
|
||||
SLASH
|
||||
};
|
||||
@@ -0,0 +1 @@
|
||||
module.exports = require('./parse');
|
||||
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"name": "bbob",
|
||||
"version": "1.0.0",
|
||||
"description": "Fast BB Code parser written in pure javascript, no dependencies",
|
||||
"main": "index.js",
|
||||
"directories": {
|
||||
"test": "test"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "jest"
|
||||
},
|
||||
"author": "Nikolay Kostyurin <jilizart@gmail.com>",
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"jest": "^23.1.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
const Tokenizer = require("./Tokenizer");
|
||||
const Parser = require("./Parser");
|
||||
|
||||
module.exports = function parse(input, options) {
|
||||
const tokenizer = new Tokenizer(input);
|
||||
const tokens = tokenizer.tokenize();
|
||||
const parser = new Parser(tokens, options);
|
||||
const ast = parser.parse();
|
||||
|
||||
return ast
|
||||
};
|
||||
@@ -0,0 +1,23 @@
|
||||
const parse = require('./index');
|
||||
const OldParser = require('./benchmark/OldParser');
|
||||
const tabText = require('./benchmark/test/stub');
|
||||
|
||||
const options = {
|
||||
closableTags: ['ch', 'syllable', 'tab'],
|
||||
allowOnlyTags: ['ch', 'syllable', 'tab'],
|
||||
};
|
||||
|
||||
describe("parse", () => {
|
||||
test("tag with spaces", () => {
|
||||
const ast = parse(`[Verse 2]`);
|
||||
|
||||
expect(ast).toEqual([{tag: 'Verse 2', attrs: {}, content: []}]);
|
||||
});
|
||||
|
||||
test("same as old parser", () => {
|
||||
const ast1 = parse(tabText, options);
|
||||
const ast2 = OldParser.parse(tabText);
|
||||
|
||||
expect(ast1).toEqual(ast2);
|
||||
})
|
||||
});
|
||||
@@ -0,0 +1,24 @@
|
||||
const TOKEN_TYPE_ID = 0;
|
||||
const TOKEN_VALUE_ID = 1;
|
||||
const TOKEN_LINE_ID = 2;
|
||||
const TOKEN_COLUMN_ID = 3;
|
||||
|
||||
const TOKEN_TYPE_WORD = 'word';
|
||||
const TOKEN_TYPE_TAG = 'tag';
|
||||
const TOKEN_TYPE_ATTR_NAME = 'attr-name';
|
||||
const TOKEN_TYPE_ATTR_VALUE = 'attr-value';
|
||||
const TOKEN_TYPE_SPACE = 'space';
|
||||
const TOKEN_TYPE_NEW_LINE = 'new-line';
|
||||
|
||||
module.exports = {
|
||||
TYPE_ID: TOKEN_TYPE_ID,
|
||||
VALUE_ID: TOKEN_VALUE_ID,
|
||||
LINE_ID: TOKEN_LINE_ID,
|
||||
COLUMN_ID: TOKEN_COLUMN_ID,
|
||||
TYPE_WORD: TOKEN_TYPE_WORD,
|
||||
TYPE_TAG: TOKEN_TYPE_TAG,
|
||||
TYPE_ATTR_NAME: TOKEN_TYPE_ATTR_NAME,
|
||||
TYPE_ATTR_VALUE: TOKEN_TYPE_ATTR_VALUE,
|
||||
TYPE_SPACE: TOKEN_TYPE_SPACE,
|
||||
TYPE_NEW_LINE: TOKEN_TYPE_NEW_LINE
|
||||
};
|
||||
Reference in New Issue
Block a user