2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00
This commit is contained in:
Nikolay Kostyurin
2018-06-04 23:18:50 +02:00
parent d4a41482d3
commit edf90de445
18 changed files with 2403 additions and 0 deletions
+7
View File
@@ -0,0 +1,7 @@
{
"lerna": "2.11.0",
"packages": [
"packages/bbob-parser"
],
"version": "independent"
}
+255
View File
@@ -0,0 +1,255 @@
{
"name": "bibob",
"version": "1.0.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"assertion-error": {
"version": "1.1.0",
"resolved": "https://npm.wsmgroup.ru/assertion-error/-/assertion-error-1.1.0.tgz",
"integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==",
"dev": true
},
"balanced-match": {
"version": "1.0.0",
"resolved": "https://npm.wsmgroup.ru/balanced-match/-/balanced-match-1.0.0.tgz",
"integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=",
"dev": true
},
"brace-expansion": {
"version": "1.1.11",
"resolved": "https://npm.wsmgroup.ru/brace-expansion/-/brace-expansion-1.1.11.tgz",
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
"dev": true,
"requires": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
}
},
"browser-stdout": {
"version": "1.3.1",
"resolved": "https://npm.wsmgroup.ru/browser-stdout/-/browser-stdout-1.3.1.tgz",
"integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==",
"dev": true
},
"chai": {
"version": "4.1.2",
"resolved": "https://npm.wsmgroup.ru/chai/-/chai-4.1.2.tgz",
"integrity": "sha1-D2RYS6ZC8PKs4oBiefTwbKI61zw=",
"dev": true,
"requires": {
"assertion-error": "^1.0.1",
"check-error": "^1.0.1",
"deep-eql": "^3.0.0",
"get-func-name": "^2.0.0",
"pathval": "^1.0.0",
"type-detect": "^4.0.0"
}
},
"check-error": {
"version": "1.0.2",
"resolved": "https://npm.wsmgroup.ru/check-error/-/check-error-1.0.2.tgz",
"integrity": "sha1-V00xLt2Iu13YkS6Sht1sCu1KrII=",
"dev": true
},
"commander": {
"version": "2.15.1",
"resolved": "https://npm.wsmgroup.ru/commander/-/commander-2.15.1.tgz",
"integrity": "sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag==",
"dev": true
},
"concat-map": {
"version": "0.0.1",
"resolved": "https://npm.wsmgroup.ru/concat-map/-/concat-map-0.0.1.tgz",
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
"dev": true
},
"debug": {
"version": "3.1.0",
"resolved": "https://npm.wsmgroup.ru/debug/-/debug-3.1.0.tgz",
"integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==",
"dev": true,
"requires": {
"ms": "2.0.0"
}
},
"deep-eql": {
"version": "3.0.1",
"resolved": "https://npm.wsmgroup.ru/deep-eql/-/deep-eql-3.0.1.tgz",
"integrity": "sha512-+QeIQyN5ZuO+3Uk5DYh6/1eKO0m0YmJFGNmFHGACpf1ClL1nmlV/p4gNgbl2pJGxgXb4faqo6UE+M5ACEMyVcw==",
"dev": true,
"requires": {
"type-detect": "^4.0.0"
}
},
"diff": {
"version": "3.5.0",
"resolved": "https://npm.wsmgroup.ru/diff/-/diff-3.5.0.tgz",
"integrity": "sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==",
"dev": true
},
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://npm.wsmgroup.ru/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
"dev": true
},
"fs.realpath": {
"version": "1.0.0",
"resolved": "https://npm.wsmgroup.ru/fs.realpath/-/fs.realpath-1.0.0.tgz",
"integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=",
"dev": true
},
"get-func-name": {
"version": "2.0.0",
"resolved": "https://npm.wsmgroup.ru/get-func-name/-/get-func-name-2.0.0.tgz",
"integrity": "sha1-6td0q+5y4gQJQzoGY2YCPdaIekE=",
"dev": true
},
"glob": {
"version": "7.1.2",
"resolved": "https://npm.wsmgroup.ru/glob/-/glob-7.1.2.tgz",
"integrity": "sha512-MJTUg1kjuLeQCJ+ccE4Vpa6kKVXkPYJ2mOCQyUuKLcLQsdrMCpBPUi8qVE6+YuaJkozeA9NusTAw3hLr8Xe5EQ==",
"dev": true,
"requires": {
"fs.realpath": "^1.0.0",
"inflight": "^1.0.4",
"inherits": "2",
"minimatch": "^3.0.4",
"once": "^1.3.0",
"path-is-absolute": "^1.0.0"
}
},
"growl": {
"version": "1.10.5",
"resolved": "https://npm.wsmgroup.ru/growl/-/growl-1.10.5.tgz",
"integrity": "sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==",
"dev": true
},
"has-flag": {
"version": "3.0.0",
"resolved": "https://npm.wsmgroup.ru/has-flag/-/has-flag-3.0.0.tgz",
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=",
"dev": true
},
"he": {
"version": "1.1.1",
"resolved": "https://npm.wsmgroup.ru/he/-/he-1.1.1.tgz",
"integrity": "sha1-k0EP0hsAlzUVH4howvJx80J+I/0=",
"dev": true
},
"inflight": {
"version": "1.0.6",
"resolved": "https://npm.wsmgroup.ru/inflight/-/inflight-1.0.6.tgz",
"integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=",
"dev": true,
"requires": {
"once": "^1.3.0",
"wrappy": "1"
}
},
"inherits": {
"version": "2.0.3",
"resolved": "https://npm.wsmgroup.ru/inherits/-/inherits-2.0.3.tgz",
"integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=",
"dev": true
},
"minimatch": {
"version": "3.0.4",
"resolved": "https://npm.wsmgroup.ru/minimatch/-/minimatch-3.0.4.tgz",
"integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
"dev": true,
"requires": {
"brace-expansion": "^1.1.7"
}
},
"minimist": {
"version": "0.0.8",
"resolved": "https://npm.wsmgroup.ru/minimist/-/minimist-0.0.8.tgz",
"integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=",
"dev": true
},
"mkdirp": {
"version": "0.5.1",
"resolved": "https://npm.wsmgroup.ru/mkdirp/-/mkdirp-0.5.1.tgz",
"integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=",
"dev": true,
"requires": {
"minimist": "0.0.8"
}
},
"mocha": {
"version": "5.2.0",
"resolved": "https://npm.wsmgroup.ru/mocha/-/mocha-5.2.0.tgz",
"integrity": "sha512-2IUgKDhc3J7Uug+FxMXuqIyYzH7gJjXECKe/w43IGgQHTSj3InJi+yAA7T24L9bQMRKiUEHxEX37G5JpVUGLcQ==",
"dev": true,
"requires": {
"browser-stdout": "1.3.1",
"commander": "2.15.1",
"debug": "3.1.0",
"diff": "3.5.0",
"escape-string-regexp": "1.0.5",
"glob": "7.1.2",
"growl": "1.10.5",
"he": "1.1.1",
"minimatch": "3.0.4",
"mkdirp": "0.5.1",
"supports-color": "5.4.0"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://npm.wsmgroup.ru/ms/-/ms-2.0.0.tgz",
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=",
"dev": true
},
"once": {
"version": "1.4.0",
"resolved": "https://npm.wsmgroup.ru/once/-/once-1.4.0.tgz",
"integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=",
"dev": true,
"requires": {
"wrappy": "1"
}
},
"path-is-absolute": {
"version": "1.0.1",
"resolved": "https://npm.wsmgroup.ru/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
"integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=",
"dev": true
},
"pathval": {
"version": "1.1.0",
"resolved": "https://npm.wsmgroup.ru/pathval/-/pathval-1.1.0.tgz",
"integrity": "sha1-uULm1L3mUwBe9rcTYd74cn0GReA=",
"dev": true
},
"supports-color": {
"version": "5.4.0",
"resolved": "https://npm.wsmgroup.ru/supports-color/-/supports-color-5.4.0.tgz",
"integrity": "sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==",
"dev": true,
"requires": {
"has-flag": "^3.0.0"
}
},
"type-detect": {
"version": "4.0.8",
"resolved": "https://npm.wsmgroup.ru/type-detect/-/type-detect-4.0.8.tgz",
"integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==",
"dev": true
},
"typescript": {
"version": "2.9.1",
"resolved": "https://npm.wsmgroup.ru/typescript/-/typescript-2.9.1.tgz",
"integrity": "sha512-h6pM2f/GDchCFlldnriOhs1QHuwbnmj6/v7499eMHqPeW4V2G0elua2eIc2nu8v2NdHV0Gm+tzX83Hr6nUFjQA==",
"dev": true
},
"wrappy": {
"version": "1.0.2",
"resolved": "https://npm.wsmgroup.ru/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=",
"dev": true
}
}
}
+5
View File
@@ -0,0 +1,5 @@
{
"devDependencies": {
"lerna": "^2.11.0"
}
}
+4
View File
@@ -0,0 +1,4 @@
package-lock.json
coverage
lib
dist
+182
View File
@@ -0,0 +1,182 @@
const Tokenizer = require("./Tokenizer");
const TokenType = Tokenizer.TYPE;
const TokenChar = Tokenizer.CHAR;
const getCharCode = Tokenizer.getCharCode;
const isTextToken = (token) => {
const type = token[Tokenizer.TOKEN.TYPE_ID];
return type === TokenType.SPACE || type === TokenType.NEW_LINE || type === TokenType.WORD
};
const isTagToken = (token) => token[Tokenizer.TOKEN.TYPE_ID] === TokenType.TAG;
const isTagStart = (token) => !isTagEnd(token);
const isTagEnd = (token) => getTokenValue(token).charCodeAt(0) === TokenChar.SLASH;
const isAttrNameToken = (token) => token[Tokenizer.TOKEN.TYPE_ID] === TokenType.ATTR_NAME;
const isAttrValueToken = (token) => token[Tokenizer.TOKEN.TYPE_ID] === TokenType.ATTR_VALUE;
const getTagName = (token) => {
const value = getTokenValue(token);
return isTagEnd(token) ? value.slice(1) : value
};
const convertTagToText = (token) => {
let text = getCharCode(TokenChar.OPEN_BRAKET);
if (isTagEnd(token)) {
text += getCharCode(TokenChar.SLASH)
}
text += getTokenValue(token);
text += getCharCode(TokenChar.CLOSE_BRAKET);
return text
};
const getTokenValue = (token) => token[Tokenizer.TOKEN.VALUE_ID];
const createTagNode = (name, attrs = {}, content = []) => ({tag: name, attrs, content});
/**
*
{
tag: 'div',
attrs: {
class: 'foo'
},
content: ['hello world!']
}
*/
module.exports = class Parser {
constructor(tokens, options = {}) {
this.tokens = tokens;
this.options = options
}
parse() {
const tokens = this.tokens;
const nodes = [];
const nestedNodes = [];
const curTags = [];
const curTagsAttrName = [];
const getCurTag = () => {
if (curTags.length) {
return curTags[curTags.length - 1]
}
return null
};
const createCurTag = (token) => {
curTags.push(createTagNode(getTokenValue(token)))
};
const clearCurTag = () => {
if (curTags.length) {
curTags.pop();
clearCurTagAttrName()
}
};
const getCurTagAttrName = () => {
if (curTagsAttrName.length) {
return curTagsAttrName[curTagsAttrName.length - 1]
}
return null
};
const createCurTagAttrName = (token) => {
curTagsAttrName.push(getTokenValue(token))
};
const clearCurTagAttrName = () => {
if (curTagsAttrName.length) {
curTagsAttrName.pop()
}
};
const getNodes = () => {
if (nestedNodes.length) {
const nestedNode = nestedNodes[nestedNodes.length - 1];
return nestedNode.content
}
return nodes
};
let token;
while (token = tokens.shift()) {
if (!token) {
continue;
}
if (isTagToken(token)) {
if (this.isAllowedTag(getTagName(token))) {
// [tag]
if (isTagStart(token)) {
createCurTag(token);
if (this.isCloseTag(getTokenValue(token))) {
nestedNodes.push(getCurTag())
} else {
getNodes().push(getCurTag());
clearCurTag()
}
}
// [/tag]
if (isTagEnd(token)) {
clearCurTag();
const lastNestedNode = nestedNodes.pop();
if (lastNestedNode) {
getNodes().push(lastNestedNode)
} else {
debugger;
console.warn(`Inconsistent tag '${getTokenValue(token)}'`);
}
}
} else {
getNodes().push(convertTagToText(token))
}
}
if (getCurTag()) {
if (isAttrNameToken(token)) {
createCurTagAttrName(token);
getCurTag().attrs[getCurTagAttrName()] = null
} else if (isAttrValueToken(token)) {
getCurTag().attrs[getCurTagAttrName()] = getTokenValue(token);
clearCurTagAttrName()
} else if (isTextToken(token)) {
getCurTag().content.push(getTokenValue(token))
}
} else if (isTextToken(token)) {
getNodes().push(getTokenValue(token))
}
}
return nodes
}
isCloseTag(value) {
return this.options.closableTags && this.options.closableTags.indexOf(value) >= 0
}
isAllowedTag(value) {
if (this.options.allowOnlyTags && this.options.allowOnlyTags.length) {
return this.options.allowOnlyTags.indexOf(value) >= 0
}
return true
}
};
+12
View File
@@ -0,0 +1,12 @@
const Parser = require('./Parser');
const TOKEN = require('./token');
describe("Parser", () => {
test("parse paired tags tokens", () => {
const parser = new Parser([
[TOKEN.TYPE_TAG, 'ch'],
[TOKEN.TYPE_TAG, '/ch']
]);
})
});
+214
View File
@@ -0,0 +1,214 @@
const CHAR = require('./char');
const TOKEN = require('./token');
// const TOKEN.TYPE_ID = 0;
// const TOKEN.VALUE_ID = 1;
// const TOKEN.LINE_ID = 2;
// const TOKEN.COLUMN_ID = 3;
//
// const TOKEN.TYPE_WORD = 'word';
// const TOKEN.TYPE_TAG = 'tag';
// const TOKEN.TYPE_ATTR_NAME = 'attr-name';
// const TOKEN.TYPE_ATTR_VALUE = 'attr-value';
// const TOKEN.TYPE_SPACE = 'space';
// const TOKEN.TYPE_NEW_LINE = 'new-line';
const getCharCode = String.fromCharCode;
class Tokenizer {
constructor(input) {
this.buffer = input;
this.colPos = 0;
this.rowPos = 0;
this.index = 0;
}
tokenize() {
let wordToken = this.createWordToken('');
let tagToken = null;
let attrNameToken = null;
let attrValueToken = null;
let attrTokens = [];
let tokens = new Array(Math.floor(this.buffer.length / 2));
let tokenIndex = -1;
const flushWord = () => {
if (wordToken[TOKEN.VALUE_ID]) {
tokenIndex++;
tokens[tokenIndex] = wordToken;
wordToken = this.createWordToken('')
}
};
const flushTag = () => {
if (tagToken !== null) {
tokenIndex++;
tokens[tokenIndex] = tagToken;
tagToken = null;
}
};
const flushAttrName = () => {
if (attrNameToken) {
attrTokens.push(attrNameToken);
attrNameToken = null;
}
};
const flushAttrValue = () => {
if (attrValueToken) {
attrTokens.push(attrValueToken);
attrValueToken = null
}
};
const flushAttrs = () => {
if (attrTokens.length) {
attrTokens.forEach(attrToken => {
tokenIndex++;
tokens[tokenIndex] = attrToken
});
attrTokens = [];
}
};
// console.time('Lexer.tokenize');
while (this.index < this.buffer.length) {
const charCode = this.buffer.charCodeAt(this.index);
switch (charCode) {
case CHAR.TAB:
case CHAR.SPACE:
flushWord();
if (tagToken) {
attrNameToken = this.createAttrNameToken('');
}
const spaceCode = charCode === CHAR.TAB ? ' ' : ' ';
tokenIndex++;
tokens[tokenIndex] = this.createSpaceToken(spaceCode);
this.colPos++;
break;
case CHAR.N:
flushWord();
tokenIndex++;
tokens[tokenIndex] = this.createNewLineToken(getCharCode(charCode));
this.rowPos++;
this.colPos = 0;
break;
case CHAR.OPEN_BRAKET:
flushWord();
tagToken = this.createTagToken('');
this.colPos++;
break;
case CHAR.CLOSE_BRAKET:
flushTag();
flushAttrName();
flushAttrValue();
flushAttrs();
this.colPos++;
break;
case CHAR.EQ:
if (tagToken) {
attrValueToken = this.createAttrValueToken('')
} else {
wordToken[TOKEN.VALUE_ID] += getCharCode(charCode);
}
this.colPos++;
break;
case CHAR.QUOTEMARK:
if (attrValueToken && attrValueToken[TOKEN.VALUE_ID] > 0) {
flushAttrName();
flushAttrValue();
} else if (tagToken === null) {
wordToken[TOKEN.VALUE_ID] += getCharCode(charCode);
}
this.colPos++;
break;
default:
if (tagToken && attrValueToken) {
attrValueToken[TOKEN.VALUE_ID] += getCharCode(charCode)
} else if (tagToken && attrNameToken) {
attrNameToken[TOKEN.VALUE_ID] += getCharCode(charCode)
} else if (tagToken) {
tagToken[TOKEN.VALUE_ID] += getCharCode(charCode)
} else {
wordToken[TOKEN.VALUE_ID] += getCharCode(charCode);
}
this.colPos++;
break;
}
this.index++;
}
flushWord();
tokens.length = tokenIndex;
return tokens;
}
createWordToken(value) {
return [TOKEN.TYPE_WORD, value, this.colPos, this.rowPos]
}
createTagToken(value) {
return [TOKEN.TYPE_TAG, value, this.colPos, this.rowPos]
}
createAttrNameToken(value) {
return [TOKEN.TYPE_ATTR_NAME, value, this.colPos, this.rowPos]
}
createAttrValueToken(value) {
return [TOKEN.TYPE_ATTR_VALUE, value, this.colPos, this.rowPos]
}
createSpaceToken(value) {
return [TOKEN.TYPE_SPACE, value, this.colPos, this.rowPos]
}
createNewLineToken(value) {
return [TOKEN.TYPE_NEW_LINE, value, this.colPos, this.rowPos]
}
}
// warm up tokenizer to elimitate code branches that never execute
new Tokenizer(`[b param="hello"]Sample text[/b]\n\t[Chorus]`).tokenize();
module.exports = Tokenizer;
module.exports.CHAR = CHAR;
module.exports.TYPE = {
WORD: TOKEN.TYPE_WORD,
TAG: TOKEN.TYPE_TAG,
ATTR_NAME: TOKEN.TYPE_ATTR_NAME,
ATTR_VALUE: TOKEN.TYPE_ATTR_VALUE,
SPACE: TOKEN.TYPE_SPACE,
NEW_LINE: TOKEN.TYPE_NEW_LINE,
};
module.exports.TOKEN = {
TYPE_ID: TOKEN.TYPE_ID,
VALUE_ID: TOKEN.VALUE_ID,
LINE_ID: TOKEN.LINE_ID,
COLUMN_ID: TOKEN.COLUMN_ID,
};
module.exports.getCharCode = getCharCode;
+14
View File
@@ -0,0 +1,14 @@
const Tokenizer = require('./Tokenizer');
describe("Tokenizer", () => {
it("tokenize single tag", () => {
const input = `[SingleTag]`;
const tokens = new Tokenizer(input).tokenize();
console.log('tokens', tokens);
expect(tokens).toBeInstanceOf(Array);
expect(tokens[0]).toEqual(['tag', 'SingleTag', 0, 0])
})
});
+215
View File
@@ -0,0 +1,215 @@
const attrNameChars = '[a-zA-Z0-9\\.\\-_:;/]'
const attrValueChars = '[a-zA-Z0-9\\.\\-_:;#/\\s]'
const pattern = `\\[(\/\\w*)\\]|\\[(\\w*)+(=(["])${attrValueChars}*\\4)?( (${attrNameChars}+)?=(["])(${attrValueChars}+)\\7)*\\]`
const TAG_RE = new RegExp(pattern, 'g')
const EOL = '\n'
const WHITESPACE = ' '
const isNode = el => typeof el === 'object' && el.tag
const isStringNode = el => typeof el === 'string'
const isChordNode = el => el.tag === 'ch'
const isTabNode = el => el.tag === 'tab'
const isSyllableNode = el => el.tag === 'syllable'
const isTextNode = el => el.tag === 'text'
const isEOL = el => el === EOL
const getNodeLength = node => {
if (isNode(node)) {
node.content.reduce((count, contentNode) => count + getNodeLength(contentNode), 0)
} else if (isStringNode(node)) {
return node.length
}
return 0
}
const tagsDefinition = {
ch: {
closable: true,
},
syllable: {
closable: true,
},
tab: {
closable: true,
},
}
// @TODO: Разбить на парсер и токенайзер, ноды и токены должны жить отдельно
/**
* Парсит контент таба с BB кодами в AST дерево [{tag:'ch', attrs:{..}, content:[...]}]
*
* @example
*
* textTabParser
* .parse('[Intro] [ch app=123]G[/ch] hello world', {ch: {closable: true}})
*
*/
module.exports = {
parse(str, tags = tagsDefinition) {
this.tags = tags
const tokens = this.tokenize(str)
const ast = this.parseTokens(tokens)
return ast
},
tokenize(str) {
let tokens = []
let match
let lastIndex = 0
// console.time('tokenize')
while (match = TAG_RE.exec(str)) {
const delta = match.index - lastIndex
if (delta > 0) {
tokens = tokens.concat(this.toTextTokens(str.substr(lastIndex, delta)))
}
tokens.push(this.tagToken(match))
lastIndex = TAG_RE.lastIndex
}
const delta = str.length - lastIndex
if (delta > 0) {
tokens = tokens.concat(this.toTextTokens(str.substr(lastIndex, delta)))
}
// console.timeEnd('tokenize')
return tokens
},
parseTokens(tokens) {
const nodes = []
let curToken
const nestedNodes = []
function getNodes() {
if (nestedNodes.length) {
const nestedNode = nestedNodes[nestedNodes.length - 1]
return nestedNode.content
}
return nodes
}
// console.time('parseTokens')
while (curToken = tokens.shift()) {
curToken = this.isTokenSupported(curToken) ? curToken : this.asTextToken(curToken)
if (curToken.isText) {
getNodes().push(curToken.text)
}
if (curToken.isTag) {
const node = this.tagNode(curToken.tagName, curToken.attributes)
if (curToken.isStart) {
if (this.isTokenHasCloseTag(curToken)) {
nestedNodes.push(node)
} else {
getNodes().push(node)
}
}
if (curToken.isEnd) {
const lastNestedNode = nestedNodes.pop()
if (lastNestedNode) {
getNodes().push(lastNestedNode)
} else {
console.error(`Inconsistent tag '${curToken.tagName}'`)
}
}
}
}
// console.timeEnd('parseTokens')
return nodes
},
isTokenSupported(token) {
return token.isTag && this.tags && this.tags[token.tagName]
},
isTokenHasCloseTag(token) {
return this.tags && this.tags[token.tagName] && this.tags[token.tagName].closable
},
tagNode(name, attrs, content = []) {
return { tag: name, attrs, content }
},
toTextTokens(text) {
const tokens = []
const chars = text.split('')
let currText = ''
const flushText = () => {
if (currText) {
tokens.push(this.textToken(currText))
currText = ''
}
}
chars.forEach((char) => {
if (char === EOL || char === WHITESPACE) {
flushText()
tokens.push(this.textToken(char))
} else {
currText += char
}
})
if (currText) {
tokens.push(this.textToken(currText))
}
return tokens
},
textToken(text) {
return { isText: true, text }
},
tagToken(match) {
if (typeof match[1] === 'undefined') { // Start tag
const tagName = match[2]
const attributes = {}
const ATTR_RE = new RegExp(`(${attrNameChars}+)?=(["])(${attrValueChars}+)\\2`, 'g')
const attrStr = match[0].substr(1 + tagName.length, match[0].length - 2 - tagName.length)
let attrMatch
while (attrMatch = ATTR_RE.exec(attrStr)) {
if (typeof attrMatch[1] === 'undefined') { // The tag attribute
attributes[tagName] = attrMatch[3]
} else { // Normal attribute
attributes[attrMatch[1]] = attrMatch[3]
}
}
return { isStart: true, isTag: true, tagName, attributes, text: match[0] }
}
// End tag
return { isEnd: true, isTag: true, tagName: match[1].substr(1, match[1].length - 1) }
},
asTextToken(token) {
if (token.isTag && token.isStart) {
return this.textToken(token.text)
}
if (token.isTag && token.isEnd) {
return this.textToken(`[/${token.tagName}]`)
}
return token
},
}
@@ -0,0 +1,19 @@
const parse = require('../index');
const options = {
closableTags: ['ch', 'syllable', 'tab']
};
const textStub = require("./test/stub");
const count = 10;
const parsers3 = [];
console.time('newParser');
for (let i = 0; i <= count; i++) {
const parser3 = parse(textStub, options);
parsers3.push(parser3);
}
console.timeEnd('newParser');
// console.log(JSON.stringify(parsers3));
@@ -0,0 +1,15 @@
const OldParser = require('./OldParser')
const textStub = require("./test/stub");
const count = 10;
const oldParsers3 = [];
console.time('oldParser');
for (let i = 0; i <= count; i++) {
const oldParser3 = OldParser.parse(textStub);
oldParsers3.push(oldParser3);
}
console.timeEnd('oldParser');
// console.log(JSON.stringify(oldParsers3));
File diff suppressed because it is too large Load Diff
+26
View File
@@ -0,0 +1,26 @@
const N = "\n".charCodeAt(0);
const TAB = "\t".charCodeAt(0);
const F = "\f".charCodeAt(0);
const R = "\r".charCodeAt(0);
const EQ = "=".charCodeAt(0);
const QUOTEMARK = "\"".charCodeAt(0);
const SPACE = " ".charCodeAt(0);
const OPEN_BRAKET = "[".charCodeAt(0);
const CLOSE_BRAKET = "]".charCodeAt(0);
const SLASH = "/".charCodeAt(0);
module.exports = {
N,
F,
R,
TAB,
EQ,
QUOTEMARK,
SPACE,
OPEN_BRAKET,
CLOSE_BRAKET,
SLASH
};
+1
View File
@@ -0,0 +1 @@
module.exports = require('./parse');
+17
View File
@@ -0,0 +1,17 @@
{
"name": "bbob",
"version": "1.0.0",
"description": "Fast BB Code parser written in pure javascript, no dependencies",
"main": "index.js",
"directories": {
"test": "test"
},
"scripts": {
"test": "jest"
},
"author": "Nikolay Kostyurin <jilizart@gmail.com>",
"license": "MIT",
"devDependencies": {
"jest": "^23.1.0"
}
}
+11
View File
@@ -0,0 +1,11 @@
const Tokenizer = require("./Tokenizer");
const Parser = require("./Parser");
module.exports = function parse(input, options) {
const tokenizer = new Tokenizer(input);
const tokens = tokenizer.tokenize();
const parser = new Parser(tokens, options);
const ast = parser.parse();
return ast
};
+23
View File
@@ -0,0 +1,23 @@
const parse = require('./index');
const OldParser = require('./benchmark/OldParser');
const tabText = require('./benchmark/test/stub');
const options = {
closableTags: ['ch', 'syllable', 'tab'],
allowOnlyTags: ['ch', 'syllable', 'tab'],
};
describe("parse", () => {
test("tag with spaces", () => {
const ast = parse(`[Verse 2]`);
expect(ast).toEqual([{tag: 'Verse 2', attrs: {}, content: []}]);
});
test("same as old parser", () => {
const ast1 = parse(tabText, options);
const ast2 = OldParser.parse(tabText);
expect(ast1).toEqual(ast2);
})
});
+24
View File
@@ -0,0 +1,24 @@
const TOKEN_TYPE_ID = 0;
const TOKEN_VALUE_ID = 1;
const TOKEN_LINE_ID = 2;
const TOKEN_COLUMN_ID = 3;
const TOKEN_TYPE_WORD = 'word';
const TOKEN_TYPE_TAG = 'tag';
const TOKEN_TYPE_ATTR_NAME = 'attr-name';
const TOKEN_TYPE_ATTR_VALUE = 'attr-value';
const TOKEN_TYPE_SPACE = 'space';
const TOKEN_TYPE_NEW_LINE = 'new-line';
module.exports = {
TYPE_ID: TOKEN_TYPE_ID,
VALUE_ID: TOKEN_VALUE_ID,
LINE_ID: TOKEN_LINE_ID,
COLUMN_ID: TOKEN_COLUMN_ID,
TYPE_WORD: TOKEN_TYPE_WORD,
TYPE_TAG: TOKEN_TYPE_TAG,
TYPE_ATTR_NAME: TOKEN_TYPE_ATTR_NAME,
TYPE_ATTR_VALUE: TOKEN_TYPE_ATTR_VALUE,
TYPE_SPACE: TOKEN_TYPE_SPACE,
TYPE_NEW_LINE: TOKEN_TYPE_NEW_LINE
};