mirror of
https://github.com/tenrok/BBob.git
synced 2026-05-15 11:59:37 +03:00
344 lines
8.4 KiB
JavaScript
344 lines
8.4 KiB
JavaScript
const {
|
|
getChar,
|
|
OPEN_BRAKET,
|
|
CLOSE_BRAKET, EQ, TAB, SPACE, N, QUOTEMARK,
|
|
PLACEHOLDER_SPACE, PLACEHOLDER_SPACE_TAB,
|
|
SLASH,
|
|
BACKSLASH,
|
|
} = require('./char');
|
|
const Token = require('./Token');
|
|
|
|
const createTokenOfType = (type, value, line, row) => new Token(type, value, line, row);
|
|
|
|
class Tokenizer {
|
|
constructor(input, options = {}) {
|
|
this.buffer = input;
|
|
this.colPos = 0;
|
|
this.rowPos = 0;
|
|
// eslint-disable-next-line no-bitwise
|
|
this.index = 2 ** 32;
|
|
|
|
this.tokenIndex = -1;
|
|
this.tokens = new Array(Math.floor(this.buffer.length));
|
|
this.dummyToken = null; // createTokenOfType('', '', '', '');
|
|
|
|
this.wordToken = this.dummyToken;
|
|
this.tagToken = this.dummyToken;
|
|
this.attrNameToken = this.dummyToken;
|
|
this.attrValueToken = this.dummyToken;
|
|
this.attrTokens = [];
|
|
|
|
this.options = options;
|
|
|
|
this.charMap = {
|
|
[TAB]: this.charSPACE.bind(this),
|
|
[SPACE]: this.charSPACE.bind(this),
|
|
[N]: this.charN.bind(this),
|
|
[OPEN_BRAKET]: this.charOPENBRAKET.bind(this),
|
|
[CLOSE_BRAKET]: this.charCLOSEBRAKET.bind(this),
|
|
[EQ]: this.charEQ.bind(this),
|
|
[QUOTEMARK]: this.charQUOTEMARK.bind(this),
|
|
[BACKSLASH]: this.charBACKSLASH.bind(this),
|
|
default: this.charWORD.bind(this),
|
|
};
|
|
}
|
|
|
|
emitToken(token) {
|
|
if (this.options.onToken) {
|
|
this.options.onToken(token);
|
|
}
|
|
}
|
|
|
|
appendToken(token) {
|
|
this.tokenIndex += 1;
|
|
this.tokens[this.tokenIndex] = token;
|
|
this.emitToken(token);
|
|
}
|
|
|
|
skipChar(num) {
|
|
this.index += num;
|
|
this.colPos += num;
|
|
}
|
|
|
|
seekChar(num) {
|
|
return this.buffer.charCodeAt(this.index + num);
|
|
}
|
|
|
|
nextCol() {
|
|
this.colPos += 1;
|
|
}
|
|
|
|
nextLine() {
|
|
this.rowPos += 1;
|
|
}
|
|
|
|
flushWord() {
|
|
if (this.inWord() && this.wordToken[Token.VALUE_ID]) {
|
|
this.appendToken(this.wordToken);
|
|
this.wordToken = this.createWordToken('');
|
|
}
|
|
}
|
|
|
|
createWord(value, line, row) {
|
|
if (!this.inWord()) {
|
|
this.wordToken = this.createWordToken(value, line, row);
|
|
}
|
|
}
|
|
|
|
flushTag() {
|
|
if (this.inTag()) {
|
|
// [] and [=] tag case
|
|
if (this.tagToken[Token.VALUE_ID] === '') {
|
|
const value = this.inAttrValue() ? getChar(EQ) : '';
|
|
const word = getChar(OPEN_BRAKET) + value + getChar(CLOSE_BRAKET);
|
|
|
|
this.createWord('', 0, 0);
|
|
this.wordToken[Token.VALUE_ID] += word;
|
|
|
|
this.tagToken = this.dummyToken;
|
|
|
|
if (this.inAttrValue()) {
|
|
this.attrValueToken = this.dummyToken;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (this.inAttrName() && !this.inAttrValue()) {
|
|
this.tagToken[Token.VALUE_ID] += PLACEHOLDER_SPACE + this.attrNameToken[Token.VALUE_ID];
|
|
this.attrNameToken = this.dummyToken;
|
|
}
|
|
|
|
this.appendToken(this.tagToken);
|
|
this.tagToken = this.dummyToken;
|
|
}
|
|
}
|
|
|
|
flushUnclosedTag() {
|
|
if (this.inTag()) {
|
|
const value = this.tagToken[Token.VALUE_ID] + (this.attrValueToken && this.attrValueToken[Token.VALUE_ID] ? getChar(EQ) : '');
|
|
|
|
this.tagToken[Token.TYPE_ID] = Token.TYPE_WORD;
|
|
this.tagToken[Token.VALUE_ID] = getChar(OPEN_BRAKET) + value;
|
|
|
|
this.appendToken(this.tagToken);
|
|
|
|
this.tagToken = this.dummyToken;
|
|
|
|
if (this.inAttrValue()) {
|
|
this.attrValueToken = this.dummyToken;
|
|
}
|
|
}
|
|
}
|
|
|
|
flushAttrNames() {
|
|
if (this.inAttrName()) {
|
|
this.attrTokens.push(this.attrNameToken);
|
|
this.attrNameToken = this.dummyToken;
|
|
}
|
|
|
|
if (this.inAttrValue()) {
|
|
this.attrValueToken.quoted = undefined;
|
|
this.attrTokens.push(this.attrValueToken);
|
|
this.attrValueToken = this.dummyToken;
|
|
}
|
|
}
|
|
|
|
flushAttrs() {
|
|
if (this.attrTokens.length) {
|
|
this.attrTokens.forEach(this.appendToken.bind(this));
|
|
this.attrTokens = [];
|
|
}
|
|
}
|
|
|
|
charSPACE(charCode) {
|
|
const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE;
|
|
|
|
this.flushWord();
|
|
|
|
if (this.inTag()) {
|
|
if (this.inAttrValue() && this.attrValueToken.quoted) {
|
|
this.attrValueToken[Token.VALUE_ID] += spaceCode;
|
|
} else {
|
|
this.flushAttrNames();
|
|
this.attrNameToken = this.createAttrNameToken('');
|
|
}
|
|
} else {
|
|
this.appendToken(this.createSpaceToken(spaceCode));
|
|
}
|
|
this.nextCol();
|
|
}
|
|
|
|
charN(charCode) {
|
|
this.flushWord();
|
|
this.appendToken(this.createNewLineToken(getChar(charCode)));
|
|
|
|
this.nextLine();
|
|
this.colPos = 0;
|
|
}
|
|
|
|
charOPENBRAKET() {
|
|
this.flushWord();
|
|
this.tagToken = this.createTagToken('');
|
|
|
|
this.nextCol();
|
|
}
|
|
|
|
charCLOSEBRAKET() {
|
|
this.nextCol();
|
|
this.flushTag();
|
|
this.flushAttrNames();
|
|
this.flushAttrs();
|
|
}
|
|
|
|
charEQ(charCode) {
|
|
const nextCharCode = this.seekChar(1);
|
|
const isNextQuotemark = nextCharCode === QUOTEMARK;
|
|
|
|
if (this.inTag()) {
|
|
this.attrValueToken = this.createAttrValueToken('');
|
|
|
|
if (isNextQuotemark) {
|
|
this.attrValueToken.quoted = true;
|
|
this.skipChar(1);
|
|
}
|
|
} else {
|
|
this.wordToken[Token.VALUE_ID] += getChar(charCode);
|
|
}
|
|
|
|
this.nextCol();
|
|
}
|
|
|
|
charQUOTEMARK(charCode) {
|
|
const prevCharCode = this.seekChar(-1);
|
|
const isPrevBackslash = prevCharCode === BACKSLASH;
|
|
|
|
if (this.inAttrValue() &&
|
|
this.attrValueToken[Token.VALUE_ID] &&
|
|
this.attrValueToken.quoted &&
|
|
!isPrevBackslash) {
|
|
this.flushAttrNames();
|
|
} else if (!this.inTag()) {
|
|
this.wordToken[Token.VALUE_ID] += getChar(charCode);
|
|
}
|
|
|
|
this.nextCol();
|
|
}
|
|
|
|
charBACKSLASH() {
|
|
const nextCharCode = this.seekChar(1);
|
|
const isNextQuotemark = nextCharCode === QUOTEMARK;
|
|
|
|
if (this.inAttrValue() &&
|
|
this.attrValueToken[Token.VALUE_ID] &&
|
|
this.attrValueToken.quoted &&
|
|
isNextQuotemark
|
|
) {
|
|
this.attrValueToken[Token.VALUE_ID] += getChar(nextCharCode);
|
|
this.skipChar(1);
|
|
}
|
|
|
|
this.nextCol();
|
|
}
|
|
|
|
charWORD(charCode) {
|
|
if (this.inTag()) {
|
|
if (this.inAttrValue()) {
|
|
this.attrValueToken[Token.VALUE_ID] += getChar(charCode);
|
|
} else if (this.inAttrName()) {
|
|
this.attrNameToken[Token.VALUE_ID] += getChar(charCode);
|
|
} else {
|
|
this.tagToken[Token.VALUE_ID] += getChar(charCode);
|
|
}
|
|
} else {
|
|
this.createWord();
|
|
|
|
this.wordToken[Token.VALUE_ID] += getChar(charCode);
|
|
}
|
|
|
|
this.nextCol();
|
|
}
|
|
|
|
tokenize() {
|
|
this.index = 0;
|
|
while (this.index < this.buffer.length) {
|
|
const charCode = this.buffer.charCodeAt(this.index);
|
|
|
|
(this.charMap[charCode] || this.charMap.default)(charCode);
|
|
|
|
// eslint-disable-next-line no-plusplus
|
|
++this.index;
|
|
}
|
|
|
|
this.flushWord();
|
|
this.flushUnclosedTag();
|
|
|
|
this.tokens.length = this.tokenIndex + 1;
|
|
|
|
return this.tokens;
|
|
}
|
|
|
|
inWord() {
|
|
return this.wordToken && this.wordToken[Token.TYPE_ID];
|
|
}
|
|
|
|
inTag() {
|
|
return this.tagToken && this.tagToken[Token.TYPE_ID];
|
|
}
|
|
|
|
inAttrValue() {
|
|
return this.attrValueToken && this.attrValueToken[Token.TYPE_ID];
|
|
}
|
|
|
|
inAttrName() {
|
|
return this.attrNameToken && this.attrNameToken[Token.TYPE_ID];
|
|
}
|
|
|
|
createWordToken(value = '', line = this.colPos, row = this.rowPos) {
|
|
return createTokenOfType(Token.TYPE_WORD, value, line, row);
|
|
}
|
|
|
|
createTagToken(value, line = this.colPos, row = this.rowPos) {
|
|
return createTokenOfType(Token.TYPE_TAG, value, line, row);
|
|
}
|
|
|
|
createAttrNameToken(value, line = this.colPos, row = this.rowPos) {
|
|
return createTokenOfType(Token.TYPE_ATTR_NAME, value, line, row);
|
|
}
|
|
|
|
createAttrValueToken(value, line = this.colPos, row = this.rowPos) {
|
|
return createTokenOfType(Token.TYPE_ATTR_VALUE, value, line, row);
|
|
}
|
|
|
|
createSpaceToken(value, line = this.colPos, row = this.rowPos) {
|
|
return createTokenOfType(Token.TYPE_SPACE, value, line, row);
|
|
}
|
|
|
|
createNewLineToken(value, line = this.colPos, row = this.rowPos) {
|
|
return createTokenOfType(Token.TYPE_NEW_LINE, value, line, row);
|
|
}
|
|
|
|
isTokenNested(token) {
|
|
const value = getChar(OPEN_BRAKET) + getChar(SLASH) + Token.getTokenValue(token);
|
|
return this.buffer.indexOf(value) > -1;
|
|
}
|
|
}
|
|
|
|
module.exports = Tokenizer;
|
|
module.exports.createTokenOfType = createTokenOfType;
|
|
module.exports.TYPE = {
|
|
WORD: Token.TYPE_WORD,
|
|
TAG: Token.TYPE_TAG,
|
|
ATTR_NAME: Token.TYPE_ATTR_NAME,
|
|
ATTR_VALUE: Token.TYPE_ATTR_VALUE,
|
|
SPACE: Token.TYPE_SPACE,
|
|
NEW_LINE: Token.TYPE_NEW_LINE,
|
|
};
|
|
module.exports.TOKEN = {
|
|
TYPE_ID: Token.TYPE_ID,
|
|
VALUE_ID: Token.VALUE_ID,
|
|
LINE_ID: Token.LINE_ID,
|
|
COLUMN_ID: Token.COLUMN_ID,
|
|
};
|
|
|