mirror of
https://github.com/tenrok/BBob.git
synced 2026-05-21 13:24:05 +03:00
254 lines
6.0 KiB
JavaScript
254 lines
6.0 KiB
JavaScript
const {
|
|
getChar,
|
|
OPEN_BRAKET,
|
|
CLOSE_BRAKET, EQ, TAB, SPACE, N, QUOTEMARK,
|
|
PLACEHOLDER_SPACE, PLACEHOLDER_SPACE_TAB,
|
|
} = require('./char');
|
|
const TOKEN = require('./token');
|
|
|
|
class Tokenizer {
|
|
constructor(input) {
|
|
this.buffer = input;
|
|
this.colPos = 0;
|
|
this.rowPos = 0;
|
|
this.index = 0;
|
|
|
|
this.tokenIndex = -1;
|
|
this.tokens = [];
|
|
}
|
|
|
|
appendToken(token) {
|
|
this.tokenIndex += 1;
|
|
this.tokens[this.tokenIndex] = token;
|
|
}
|
|
|
|
nextCol() {
|
|
this.colPos += 1;
|
|
}
|
|
|
|
nextLine() {
|
|
this.rowPos += 1;
|
|
}
|
|
|
|
tokenize() {
|
|
let wordToken = null;
|
|
let tagToken = null;
|
|
let attrNameToken = null;
|
|
let attrValueToken = null;
|
|
let attrTokens = [];
|
|
this.tokens = new Array(Math.floor(this.buffer.length / 2));
|
|
|
|
const flushWord = () => {
|
|
if (wordToken && wordToken[TOKEN.VALUE_ID]) {
|
|
this.appendToken(wordToken);
|
|
wordToken = this.createWordToken('');
|
|
}
|
|
};
|
|
|
|
const createWord = (value, line, row) => {
|
|
if (!wordToken) {
|
|
wordToken = this.createWordToken(value, line, row);
|
|
}
|
|
};
|
|
|
|
const flushTag = () => {
|
|
if (tagToken !== null) {
|
|
// [] and [=] tag case
|
|
if (!tagToken[TOKEN.VALUE_ID]) {
|
|
const value = attrValueToken ? getChar(EQ) : '';
|
|
const word = getChar(OPEN_BRAKET) + value + getChar(CLOSE_BRAKET);
|
|
|
|
createWord('', 0, 0);
|
|
wordToken[TOKEN.VALUE_ID] += word;
|
|
|
|
tagToken = null;
|
|
|
|
if (attrValueToken) {
|
|
attrValueToken = null;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (attrNameToken && !attrValueToken) {
|
|
tagToken[TOKEN.VALUE_ID] += PLACEHOLDER_SPACE + attrNameToken[TOKEN.VALUE_ID];
|
|
attrNameToken = null;
|
|
}
|
|
|
|
this.appendToken(tagToken);
|
|
tagToken = null;
|
|
}
|
|
};
|
|
|
|
const flushUnclosedTag = () => {
|
|
if (tagToken !== null) {
|
|
const value = tagToken[TOKEN.VALUE_ID] + (attrValueToken ? getChar(EQ) : '');
|
|
|
|
tagToken[TOKEN.TYPE_ID] = TOKEN.TYPE_WORD;
|
|
tagToken[TOKEN.VALUE_ID] = getChar(OPEN_BRAKET) + value;
|
|
|
|
this.appendToken(tagToken);
|
|
|
|
tagToken = null;
|
|
|
|
if (attrValueToken) {
|
|
attrValueToken = null;
|
|
}
|
|
}
|
|
};
|
|
|
|
const flushAttrNames = () => {
|
|
if (attrNameToken) {
|
|
attrTokens.push(attrNameToken);
|
|
attrNameToken = null;
|
|
}
|
|
|
|
if (attrValueToken) {
|
|
attrTokens.push(attrValueToken);
|
|
attrValueToken = null;
|
|
}
|
|
};
|
|
|
|
const flushAttrs = () => {
|
|
if (attrTokens.length) {
|
|
attrTokens.forEach(this.appendToken.bind(this));
|
|
attrTokens = [];
|
|
}
|
|
};
|
|
|
|
// console.time('Lexer.tokenize');
|
|
|
|
while (this.index < this.buffer.length) {
|
|
const charCode = this.buffer.charCodeAt(this.index);
|
|
|
|
switch (charCode) {
|
|
case TAB:
|
|
case SPACE:
|
|
flushWord();
|
|
|
|
if (tagToken) {
|
|
attrNameToken = this.createAttrNameToken('');
|
|
} else {
|
|
const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE;
|
|
|
|
this.appendToken(this.createSpaceToken(spaceCode));
|
|
}
|
|
this.nextCol();
|
|
break;
|
|
|
|
case N:
|
|
flushWord();
|
|
this.appendToken(this.createNewLineToken(getChar(charCode)));
|
|
|
|
this.nextLine();
|
|
this.colPos = 0;
|
|
break;
|
|
|
|
case OPEN_BRAKET:
|
|
flushWord();
|
|
tagToken = this.createTagToken('');
|
|
|
|
this.nextCol();
|
|
break;
|
|
|
|
case CLOSE_BRAKET:
|
|
flushTag();
|
|
flushAttrNames();
|
|
flushAttrs();
|
|
|
|
this.nextCol();
|
|
break;
|
|
|
|
case EQ:
|
|
if (tagToken) {
|
|
attrValueToken = this.createAttrValueToken('');
|
|
} else {
|
|
wordToken[TOKEN.VALUE_ID] += getChar(charCode);
|
|
}
|
|
|
|
this.nextCol();
|
|
break;
|
|
|
|
case QUOTEMARK:
|
|
if (attrValueToken && attrValueToken[TOKEN.VALUE_ID] > 0) {
|
|
flushAttrNames();
|
|
} else if (tagToken === null) {
|
|
wordToken[TOKEN.VALUE_ID] += getChar(charCode);
|
|
}
|
|
|
|
this.nextCol();
|
|
break;
|
|
|
|
default:
|
|
if (tagToken && attrValueToken) {
|
|
attrValueToken[TOKEN.VALUE_ID] += getChar(charCode);
|
|
} else if (tagToken && attrNameToken) {
|
|
attrNameToken[TOKEN.VALUE_ID] += getChar(charCode);
|
|
} else if (tagToken) {
|
|
tagToken[TOKEN.VALUE_ID] += getChar(charCode);
|
|
} else {
|
|
createWord();
|
|
|
|
wordToken[TOKEN.VALUE_ID] += getChar(charCode);
|
|
}
|
|
|
|
this.nextCol();
|
|
break;
|
|
}
|
|
|
|
this.index += 1;
|
|
}
|
|
|
|
flushWord();
|
|
flushUnclosedTag();
|
|
|
|
this.tokens.length = this.tokenIndex + 1;
|
|
|
|
return this.tokens;
|
|
}
|
|
|
|
createWordToken(value = '', line = this.colPos, row = this.rowPos) {
|
|
return [TOKEN.TYPE_WORD, value, line, row];
|
|
}
|
|
|
|
createTagToken(value, line = this.colPos, row = this.rowPos) {
|
|
return [TOKEN.TYPE_TAG, value, line, row];
|
|
}
|
|
|
|
createAttrNameToken(value, line = this.colPos, row = this.rowPos) {
|
|
return [TOKEN.TYPE_ATTR_NAME, value, line, row];
|
|
}
|
|
|
|
createAttrValueToken(value, line = this.colPos, row = this.rowPos) {
|
|
return [TOKEN.TYPE_ATTR_VALUE, value, line, row];
|
|
}
|
|
|
|
createSpaceToken(value, line = this.colPos, row = this.rowPos) {
|
|
return [TOKEN.TYPE_SPACE, value, line, row];
|
|
}
|
|
|
|
createNewLineToken(value, line = this.colPos, row = this.rowPos) {
|
|
return [TOKEN.TYPE_NEW_LINE, value, line, row];
|
|
}
|
|
}
|
|
|
|
// warm up tokenizer to elimitate code branches that never execute
|
|
new Tokenizer('[b param="hello"]Sample text[/b]\n\t[Chorus 2]').tokenize();
|
|
|
|
module.exports = Tokenizer;
|
|
module.exports.TYPE = {
|
|
WORD: TOKEN.TYPE_WORD,
|
|
TAG: TOKEN.TYPE_TAG,
|
|
ATTR_NAME: TOKEN.TYPE_ATTR_NAME,
|
|
ATTR_VALUE: TOKEN.TYPE_ATTR_VALUE,
|
|
SPACE: TOKEN.TYPE_SPACE,
|
|
NEW_LINE: TOKEN.TYPE_NEW_LINE,
|
|
};
|
|
module.exports.TOKEN = {
|
|
TYPE_ID: TOKEN.TYPE_ID,
|
|
VALUE_ID: TOKEN.VALUE_ID,
|
|
LINE_ID: TOKEN.LINE_ID,
|
|
COLUMN_ID: TOKEN.COLUMN_ID,
|
|
};
|
|
|