mirror of
https://github.com/tenrok/BBob.git
synced 2026-06-08 17:22:26 +03:00
feat(parser): rewrite lexer to make it faster (#50)
* feat(parser): first iteration of new lexer * feat(parser): convert token string props to number props * refactor(parser): optimize char grabber * refactor(parser): working on new lexer * refactor(parser): convert token string props to number props * refactor(parser): rebuild lexer, add tag attrs parsing * refactor(parser): rework word parsing and tag parsing * refactor(parser): rework to pass tests * refactor(parser): rework tag parsing * refactor(parser): rework escape tags parsing * refactor(parser): rework tests * refactor(parser): all test pass * refactor(parser): make lexer faster by move mode switching in loop * refactor(parser): remove all state map objects * refactor(parser): order of parsing states * refactor(parser): state switching without return * refactor(parser): rename buffers to chars * refactor(lexer): reduce function calls * feat(lexer): add new parser tests and code to pass it * fix(utils): remove unused variable in char grabber * feat(lexer): add test for new lexer bug * chore(*): add lexer and lexer2 to benchmark * chore(lexer): add some debug info for char grabber * feat(parser): add new test for single attributes without values * fix(lexer): paired tags tests * refactor(lexer): comment breaking changes tests for future releases * feat(core): improve tests * refactor(parser): add more tests, reduce char grabber size * refactor(parser): reduce utils size * refactor(parser): remove unused code from tag parsing code * refactor(parser): remove unused code from word to tag transforming code * chore(benchmark): fix benchmark imports
This commit is contained in:
committed by
GitHub
parent
fda6ddd6ee
commit
772d422d77
@@ -10,12 +10,12 @@ const TOKEN_VALUE_ID = 'value'; // 1;
|
||||
const TOKEN_COLUMN_ID = 'row'; // 2;
|
||||
const TOKEN_LINE_ID = 'line'; // 3;
|
||||
|
||||
const TOKEN_TYPE_WORD = 'word';
|
||||
const TOKEN_TYPE_TAG = 'tag';
|
||||
const TOKEN_TYPE_ATTR_NAME = 'attr-name';
|
||||
const TOKEN_TYPE_ATTR_VALUE = 'attr-value';
|
||||
const TOKEN_TYPE_SPACE = 'space';
|
||||
const TOKEN_TYPE_NEW_LINE = 'new-line';
|
||||
const TOKEN_TYPE_WORD = 1; // 'word';
|
||||
const TOKEN_TYPE_TAG = 2; // 'tag';
|
||||
const TOKEN_TYPE_ATTR_NAME = 3; // 'attr-name';
|
||||
const TOKEN_TYPE_ATTR_VALUE = 4; // 'attr-value';
|
||||
const TOKEN_TYPE_SPACE = 5; // 'space';
|
||||
const TOKEN_TYPE_NEW_LINE = 6; // 'new-line';
|
||||
|
||||
/**
|
||||
* @param {Token} token
|
||||
@@ -105,14 +105,15 @@ class Token {
|
||||
* @param row
|
||||
*/
|
||||
constructor(type, value, line, row) {
|
||||
this[TOKEN_TYPE_ID] = String(type);
|
||||
this[TOKEN_TYPE_ID] = Number(type);
|
||||
this[TOKEN_VALUE_ID] = String(value);
|
||||
this[TOKEN_LINE_ID] = Number(line);
|
||||
this[TOKEN_COLUMN_ID] = Number(row);
|
||||
}
|
||||
|
||||
isEmpty() {
|
||||
return !!this[TOKEN_TYPE_ID];
|
||||
// eslint-disable-next-line no-restricted-globals
|
||||
return isNaN(this[TOKEN_TYPE_ID]);
|
||||
}
|
||||
|
||||
isText() {
|
||||
|
||||
+236
-133
@@ -21,7 +21,7 @@ const EM = '!';
|
||||
|
||||
/**
|
||||
* Creates a Token entity class
|
||||
* @param {String} type
|
||||
* @param {Number} type
|
||||
* @param {String} value
|
||||
* @param {Number} r line number
|
||||
* @param {Number} cl char number in line
|
||||
@@ -44,14 +44,26 @@ const createToken = (type, value, r = 0, cl = 0) => new Token(type, value, r, cl
|
||||
* @return {Lexer}
|
||||
*/
|
||||
function createLexer(buffer, options = {}) {
|
||||
const STATE_WORD = 0;
|
||||
const STATE_TAG = 1;
|
||||
const STATE_TAG_ATTRS = 2;
|
||||
|
||||
const TAG_STATE_NAME = 0;
|
||||
const TAG_STATE_ATTR = 1;
|
||||
const TAG_STATE_VALUE = 2;
|
||||
|
||||
let row = 0;
|
||||
let col = 0;
|
||||
|
||||
let tokenIndex = -1;
|
||||
let stateMode = STATE_WORD;
|
||||
let tagMode = TAG_STATE_NAME;
|
||||
const tokens = new Array(Math.floor(buffer.length));
|
||||
const openTag = options.openTag || OPEN_BRAKET;
|
||||
const closeTag = options.closeTag || CLOSE_BRAKET;
|
||||
const escapeTags = options.enableEscapeTags;
|
||||
const escapeTags = !!options.enableEscapeTags;
|
||||
const onToken = options.onToken || (() => {
|
||||
});
|
||||
|
||||
const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
|
||||
const NOT_CHAR_TOKENS = [
|
||||
@@ -62,175 +74,266 @@ function createLexer(buffer, options = {}) {
|
||||
const SPECIAL_CHARS = [EQ, SPACE, TAB];
|
||||
|
||||
const isCharReserved = (char) => (RESERVED_CHARS.indexOf(char) >= 0);
|
||||
const isNewLine = (char) => char === N;
|
||||
const isWhiteSpace = (char) => (WHITESPACES.indexOf(char) >= 0);
|
||||
const isCharToken = (char) => (NOT_CHAR_TOKENS.indexOf(char) === -1);
|
||||
const isSpecialChar = (char) => (SPECIAL_CHARS.indexOf(char) >= 0);
|
||||
const isEscapableChar = (char) => (char === openTag || char === closeTag || char === BACKSLASH);
|
||||
const isEscapeChar = (char) => char === BACKSLASH;
|
||||
const onSkip = () => {
|
||||
col++;
|
||||
};
|
||||
|
||||
const unq = (val) => unquote(trimChar(val, QUOTEMARK));
|
||||
|
||||
const chars = createCharGrabber(buffer, { onSkip });
|
||||
|
||||
/**
|
||||
* Emits newly created token to subscriber
|
||||
* @param token
|
||||
* @param {Number} type
|
||||
* @param {String} value
|
||||
*/
|
||||
const emitToken = (token) => {
|
||||
if (options.onToken) {
|
||||
options.onToken(token);
|
||||
}
|
||||
function emitToken(type, value) {
|
||||
const token = createToken(type, value, row, col);
|
||||
|
||||
onToken(token);
|
||||
|
||||
tokenIndex += 1;
|
||||
tokens[tokenIndex] = token;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses params inside [myTag---params goes here---]content[/myTag]
|
||||
* @param str
|
||||
* @returns {{tag: *, attrs: Array}}
|
||||
*/
|
||||
const parseAttrs = (str) => {
|
||||
let tagName = null;
|
||||
let skipSpecialChars = false;
|
||||
function nextTagState(tagChars, isSingleValueTag) {
|
||||
if (tagMode === TAG_STATE_ATTR) {
|
||||
const validAttrName = (char) => !(char === EQ || isWhiteSpace(char));
|
||||
const name = tagChars.grabWhile(validAttrName);
|
||||
const isEnd = tagChars.isLast();
|
||||
const isValue = tagChars.getCurr() !== EQ;
|
||||
|
||||
const attrTokens = [];
|
||||
const attrCharGrabber = createCharGrabber(str);
|
||||
tagChars.skip();
|
||||
|
||||
const validAttr = (char) => {
|
||||
const isEQ = char === EQ;
|
||||
const isWS = isWhiteSpace(char);
|
||||
const prevChar = attrCharGrabber.getPrev();
|
||||
const nextChar = attrCharGrabber.getNext();
|
||||
const isPrevSLASH = prevChar === BACKSLASH;
|
||||
const isTagNameEmpty = tagName === null;
|
||||
|
||||
if (isTagNameEmpty) {
|
||||
return (isEQ || isWS || attrCharGrabber.isLast()) === false;
|
||||
}
|
||||
|
||||
if (skipSpecialChars && isSpecialChar(char)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (char === QUOTEMARK && !isPrevSLASH) {
|
||||
skipSpecialChars = !skipSpecialChars;
|
||||
|
||||
if (!skipSpecialChars && !(nextChar === EQ || isWhiteSpace(nextChar))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return (isEQ || isWS) === false;
|
||||
};
|
||||
|
||||
const nextAttr = () => {
|
||||
const attrStr = attrCharGrabber.grabWhile(validAttr);
|
||||
const currChar = attrCharGrabber.getCurr();
|
||||
|
||||
// first string before space is a tag name [tagName params...]
|
||||
if (tagName === null) {
|
||||
tagName = attrStr;
|
||||
} else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) {
|
||||
const escaped = unquote(trimChar(attrStr, QUOTEMARK));
|
||||
attrTokens.push(createToken(TYPE_ATTR_VALUE, escaped, row, col));
|
||||
if (isEnd || isValue) {
|
||||
emitToken(TYPE_ATTR_VALUE, unq(name));
|
||||
} else {
|
||||
attrTokens.push(createToken(TYPE_ATTR_NAME, attrStr, row, col));
|
||||
emitToken(TYPE_ATTR_NAME, name);
|
||||
}
|
||||
|
||||
attrCharGrabber.skip();
|
||||
};
|
||||
if (isEnd) {
|
||||
return TAG_STATE_NAME;
|
||||
}
|
||||
|
||||
while (attrCharGrabber.hasNext()) {
|
||||
nextAttr();
|
||||
if (isValue) {
|
||||
return TAG_STATE_ATTR;
|
||||
}
|
||||
|
||||
return TAG_STATE_VALUE;
|
||||
}
|
||||
if (tagMode === TAG_STATE_VALUE) {
|
||||
let stateSpecial = false;
|
||||
|
||||
const validAttrValue = (char) => {
|
||||
// const isEQ = char === EQ;
|
||||
const isQM = char === QUOTEMARK;
|
||||
const prevChar = tagChars.getPrev();
|
||||
const nextChar = tagChars.getNext();
|
||||
const isPrevSLASH = prevChar === BACKSLASH;
|
||||
const isNextEQ = nextChar === EQ;
|
||||
const isWS = isWhiteSpace(char);
|
||||
// const isPrevWS = isWhiteSpace(prevChar);
|
||||
const isNextWS = isWhiteSpace(nextChar);
|
||||
|
||||
if (stateSpecial && isSpecialChar(char)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (isQM && !isPrevSLASH) {
|
||||
stateSpecial = !stateSpecial;
|
||||
|
||||
if (!stateSpecial && !(isNextEQ || isNextWS)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isSingleValueTag) {
|
||||
return isWS === false;
|
||||
// return (isEQ || isWS) === false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
const name = tagChars.grabWhile(validAttrValue);
|
||||
|
||||
tagChars.skip();
|
||||
|
||||
emitToken(TYPE_ATTR_VALUE, unq(name));
|
||||
|
||||
if (tagChars.isLast()) {
|
||||
return TAG_STATE_NAME;
|
||||
}
|
||||
|
||||
return TAG_STATE_ATTR;
|
||||
}
|
||||
|
||||
return { tag: tagName, attrs: attrTokens };
|
||||
};
|
||||
const validName = (char) => !(char === EQ || isWhiteSpace(char) || tagChars.isLast());
|
||||
const name = tagChars.grabWhile(validName);
|
||||
|
||||
const bufferGrabber = createCharGrabber(buffer, {
|
||||
onSkip: () => {
|
||||
col++;
|
||||
},
|
||||
});
|
||||
emitToken(TYPE_TAG, name);
|
||||
|
||||
const next = () => {
|
||||
const currChar = bufferGrabber.getCurr();
|
||||
const nextChar = bufferGrabber.getNext();
|
||||
tagChars.skip();
|
||||
|
||||
// in cases when we has [url=someval]GET[/url] and we dont need to parse all
|
||||
if (isSingleValueTag) {
|
||||
return TAG_STATE_VALUE;
|
||||
}
|
||||
|
||||
const hasEQ = tagChars.includes(EQ);
|
||||
|
||||
return hasEQ ? TAG_STATE_ATTR : TAG_STATE_VALUE;
|
||||
}
|
||||
|
||||
function stateTag() {
|
||||
const currChar = chars.getCurr();
|
||||
|
||||
if (currChar === openTag) {
|
||||
const nextChar = chars.getNext();
|
||||
|
||||
chars.skip();
|
||||
|
||||
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
|
||||
const substr = chars.substrUntilChar(closeTag);
|
||||
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
|
||||
|
||||
if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
|
||||
emitToken(TYPE_WORD, currChar);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
// [myTag ]
|
||||
const isNoAttrsInTag = substr.indexOf(EQ) === -1;
|
||||
// [/myTag]
|
||||
const isClosingTag = substr[0] === SLASH;
|
||||
|
||||
if (isNoAttrsInTag || isClosingTag) {
|
||||
const name = chars.grabWhile((char) => char !== closeTag);
|
||||
|
||||
chars.skip(); // skip closeTag
|
||||
|
||||
emitToken(TYPE_TAG, name);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
return STATE_TAG_ATTRS;
|
||||
}
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
function stateAttrs() {
|
||||
const silent = true;
|
||||
const tagStr = chars.grabWhile((char) => char !== closeTag, silent);
|
||||
const tagGrabber = createCharGrabber(tagStr, { onSkip });
|
||||
const hasSpace = tagGrabber.includes(SPACE);
|
||||
|
||||
while (tagGrabber.hasNext()) {
|
||||
tagMode = nextTagState(tagGrabber, !hasSpace);
|
||||
}
|
||||
|
||||
chars.skip(); // skip closeTag
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
function stateWord() {
|
||||
if (isNewLine(chars.getCurr())) {
|
||||
emitToken(TYPE_NEW_LINE, chars.getCurr());
|
||||
|
||||
chars.skip();
|
||||
|
||||
if (currChar === N) {
|
||||
bufferGrabber.skip();
|
||||
col = 0;
|
||||
row++;
|
||||
|
||||
emitToken(createToken(TYPE_NEW_LINE, currChar, row, col));
|
||||
} else if (isWhiteSpace(currChar)) {
|
||||
const str = bufferGrabber.grabWhile(isWhiteSpace);
|
||||
emitToken(createToken(TYPE_SPACE, str, row, col));
|
||||
} else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) {
|
||||
bufferGrabber.skip(); // skip the \ without emitting anything
|
||||
bufferGrabber.skip(); // skip past the [, ] or \ as well
|
||||
emitToken(createToken(TYPE_WORD, nextChar, row, col));
|
||||
} else if (currChar === openTag) {
|
||||
bufferGrabber.skip(); // skip openTag
|
||||
|
||||
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
|
||||
const substr = bufferGrabber.substrUntilChar(closeTag);
|
||||
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
|
||||
|
||||
if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
|
||||
emitToken(createToken(TYPE_WORD, currChar, row, col));
|
||||
} else {
|
||||
const str = bufferGrabber.grabWhile((val) => val !== closeTag);
|
||||
|
||||
bufferGrabber.skip(); // skip closeTag
|
||||
// [myTag ]
|
||||
const isNoAttrsInTag = str.indexOf(EQ) === -1;
|
||||
// [/myTag]
|
||||
const isClosingTag = str[0] === SLASH;
|
||||
|
||||
if (isNoAttrsInTag || isClosingTag) {
|
||||
emitToken(createToken(TYPE_TAG, str, row, col));
|
||||
} else {
|
||||
const parsed = parseAttrs(str);
|
||||
|
||||
emitToken(createToken(TYPE_TAG, parsed.tag, row, col));
|
||||
|
||||
parsed.attrs.map(emitToken);
|
||||
}
|
||||
}
|
||||
} else if (currChar === closeTag) {
|
||||
bufferGrabber.skip(); // skip closeTag
|
||||
|
||||
emitToken(createToken(TYPE_WORD, currChar, row, col));
|
||||
} else if (isCharToken(currChar)) {
|
||||
if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) {
|
||||
bufferGrabber.skip();
|
||||
emitToken(createToken(TYPE_WORD, currChar, row, col));
|
||||
} else {
|
||||
const str = bufferGrabber.grabWhile((char) => {
|
||||
if (escapeTags) {
|
||||
return isCharToken(char) && !isEscapeChar(char);
|
||||
}
|
||||
return isCharToken(char);
|
||||
});
|
||||
|
||||
emitToken(createToken(TYPE_WORD, str, row, col));
|
||||
}
|
||||
return STATE_WORD;
|
||||
}
|
||||
};
|
||||
|
||||
const tokenize = () => {
|
||||
while (bufferGrabber.hasNext()) {
|
||||
next();
|
||||
if (isWhiteSpace(chars.getCurr())) {
|
||||
emitToken(TYPE_SPACE, chars.grabWhile(isWhiteSpace));
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
if (chars.getCurr() === openTag) {
|
||||
if (chars.includes(closeTag)) {
|
||||
return STATE_TAG;
|
||||
}
|
||||
|
||||
emitToken(TYPE_WORD, chars.getCurr());
|
||||
|
||||
chars.skip();
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
if (escapeTags) {
|
||||
if (isEscapeChar(chars.getCurr())) {
|
||||
const currChar = chars.getCurr();
|
||||
const nextChar = chars.getNext();
|
||||
|
||||
chars.skip(); // skip the \ without emitting anything
|
||||
|
||||
if (isEscapableChar(nextChar)) {
|
||||
chars.skip(); // skip past the [, ] or \ as well
|
||||
|
||||
emitToken(TYPE_WORD, nextChar);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
emitToken(TYPE_WORD, currChar);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
const isChar = (char) => isCharToken(char) && !isEscapeChar(char);
|
||||
|
||||
emitToken(TYPE_WORD, chars.grabWhile(isChar));
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
emitToken(TYPE_WORD, chars.grabWhile(isCharToken));
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
function tokenize() {
|
||||
while (chars.hasNext()) {
|
||||
switch (stateMode) {
|
||||
case STATE_TAG:
|
||||
stateMode = stateTag();
|
||||
break;
|
||||
case STATE_TAG_ATTRS:
|
||||
stateMode = stateAttrs();
|
||||
break;
|
||||
case STATE_WORD:
|
||||
stateMode = stateWord();
|
||||
break;
|
||||
default:
|
||||
stateMode = STATE_WORD;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tokens.length = tokenIndex + 1;
|
||||
|
||||
return tokens;
|
||||
};
|
||||
}
|
||||
|
||||
const isTokenNested = (token) => {
|
||||
function isTokenNested(token) {
|
||||
const value = openTag + SLASH + token.getValue();
|
||||
// potential bottleneck
|
||||
return buffer.indexOf(value) > -1;
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
tokenize,
|
||||
|
||||
@@ -0,0 +1,242 @@
|
||||
/* eslint-disable no-plusplus,no-param-reassign */
|
||||
import {
|
||||
OPEN_BRAKET,
|
||||
CLOSE_BRAKET,
|
||||
QUOTEMARK,
|
||||
BACKSLASH,
|
||||
SLASH,
|
||||
SPACE,
|
||||
TAB,
|
||||
EQ,
|
||||
N,
|
||||
} from '@bbob/plugin-helper/lib/char';
|
||||
|
||||
import {
|
||||
Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD,
|
||||
} from './Token';
|
||||
import { createCharGrabber, trimChar, unquote } from './utils';
|
||||
|
||||
// for cases <!-- -->
|
||||
const EM = '!';
|
||||
|
||||
/**
|
||||
* Creates a Token entity class
|
||||
* @param {String} type
|
||||
* @param {String} value
|
||||
* @param {Number} r line number
|
||||
* @param {Number} cl char number in line
|
||||
*/
|
||||
const createToken = (type, value, r = 0, cl = 0) => new Token(type, value, r, cl);
|
||||
|
||||
/**
|
||||
* @typedef {Object} Lexer
|
||||
* @property {Function} tokenize
|
||||
* @property {Function} isTokenNested
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {String} buffer
|
||||
* @param {Object} options
|
||||
* @param {Function} options.onToken
|
||||
* @param {String} options.openTag
|
||||
* @param {String} options.closeTag
|
||||
* @param {Boolean} options.enableEscapeTags
|
||||
* @return {Lexer}
|
||||
*/
|
||||
function createLexer(buffer, options = {}) {
|
||||
let row = 0;
|
||||
let col = 0;
|
||||
|
||||
let tokenIndex = -1;
|
||||
const tokens = new Array(Math.floor(buffer.length));
|
||||
const openTag = options.openTag || OPEN_BRAKET;
|
||||
const closeTag = options.closeTag || CLOSE_BRAKET;
|
||||
const escapeTags = options.enableEscapeTags;
|
||||
|
||||
const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
|
||||
const NOT_CHAR_TOKENS = [
|
||||
// ...(options.enableEscapeTags ? [BACKSLASH] : []),
|
||||
openTag, SPACE, TAB, N,
|
||||
];
|
||||
const WHITESPACES = [SPACE, TAB];
|
||||
const SPECIAL_CHARS = [EQ, SPACE, TAB];
|
||||
|
||||
const isCharReserved = (char) => (RESERVED_CHARS.indexOf(char) >= 0);
|
||||
const isWhiteSpace = (char) => (WHITESPACES.indexOf(char) >= 0);
|
||||
const isCharToken = (char) => (NOT_CHAR_TOKENS.indexOf(char) === -1);
|
||||
const isSpecialChar = (char) => (SPECIAL_CHARS.indexOf(char) >= 0);
|
||||
const isEscapableChar = (char) => (char === openTag || char === closeTag || char === BACKSLASH);
|
||||
const isEscapeChar = (char) => char === BACKSLASH;
|
||||
|
||||
/**
|
||||
* Emits newly created token to subscriber
|
||||
* @param token
|
||||
*/
|
||||
const emitToken = (token) => {
|
||||
if (options.onToken) {
|
||||
options.onToken(token);
|
||||
}
|
||||
|
||||
tokenIndex += 1;
|
||||
tokens[tokenIndex] = token;
|
||||
};
|
||||
|
||||
/**
|
||||
* Parses params inside [myTag---params goes here---]content[/myTag]
|
||||
* @param str
|
||||
* @returns {{tag: *, attrs: Array}}
|
||||
*/
|
||||
const parseAttrs = (str) => {
|
||||
let tagName = null;
|
||||
let skipSpecialChars = false;
|
||||
|
||||
const attrTokens = [];
|
||||
const attrCharGrabber = createCharGrabber(str);
|
||||
|
||||
const validAttr = (char) => {
|
||||
const isEQ = char === EQ;
|
||||
const isWS = isWhiteSpace(char);
|
||||
const prevChar = attrCharGrabber.getPrev();
|
||||
const nextChar = attrCharGrabber.getNext();
|
||||
const isPrevSLASH = prevChar === BACKSLASH;
|
||||
const isTagNameEmpty = tagName === null;
|
||||
|
||||
if (isTagNameEmpty) {
|
||||
return (isEQ || isWS || attrCharGrabber.isLast()) === false;
|
||||
}
|
||||
|
||||
if (skipSpecialChars && isSpecialChar(char)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (char === QUOTEMARK && !isPrevSLASH) {
|
||||
skipSpecialChars = !skipSpecialChars;
|
||||
|
||||
if (!skipSpecialChars && !(nextChar === EQ || isWhiteSpace(nextChar))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return (isEQ || isWS) === false;
|
||||
};
|
||||
|
||||
const nextAttr = () => {
|
||||
const attrStr = attrCharGrabber.grabWhile(validAttr);
|
||||
const currChar = attrCharGrabber.getCurr();
|
||||
|
||||
// first string before space is a tag name [tagName params...]
|
||||
if (tagName === null) {
|
||||
tagName = attrStr;
|
||||
} else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) {
|
||||
const escaped = unquote(trimChar(attrStr, QUOTEMARK));
|
||||
attrTokens.push(createToken(TYPE_ATTR_VALUE, escaped, row, col));
|
||||
} else {
|
||||
attrTokens.push(createToken(TYPE_ATTR_NAME, attrStr, row, col));
|
||||
}
|
||||
|
||||
attrCharGrabber.skip();
|
||||
};
|
||||
|
||||
while (attrCharGrabber.hasNext()) {
|
||||
nextAttr();
|
||||
}
|
||||
|
||||
return { tag: tagName, attrs: attrTokens };
|
||||
};
|
||||
|
||||
const bufferGrabber = createCharGrabber(buffer, {
|
||||
onSkip: () => {
|
||||
col++;
|
||||
},
|
||||
});
|
||||
|
||||
const next = () => {
|
||||
const currChar = bufferGrabber.getCurr();
|
||||
const nextChar = bufferGrabber.getNext();
|
||||
|
||||
if (currChar === N) {
|
||||
bufferGrabber.skip();
|
||||
col = 0;
|
||||
row++;
|
||||
|
||||
emitToken(createToken(TYPE_NEW_LINE, currChar, row, col));
|
||||
} else if (isWhiteSpace(currChar)) {
|
||||
const str = bufferGrabber.grabWhile(isWhiteSpace);
|
||||
emitToken(createToken(TYPE_SPACE, str, row, col));
|
||||
} else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) {
|
||||
bufferGrabber.skip(); // skip the \ without emitting anything
|
||||
bufferGrabber.skip(); // skip past the [, ] or \ as well
|
||||
emitToken(createToken(TYPE_WORD, nextChar, row, col));
|
||||
} else if (currChar === openTag) {
|
||||
bufferGrabber.skip(); // skip openTag
|
||||
|
||||
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
|
||||
const substr = bufferGrabber.substrUntilChar(closeTag);
|
||||
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
|
||||
|
||||
if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
|
||||
emitToken(createToken(TYPE_WORD, currChar, row, col));
|
||||
} else {
|
||||
const str = bufferGrabber.grabWhile((val) => val !== closeTag);
|
||||
|
||||
bufferGrabber.skip(); // skip closeTag
|
||||
// [myTag ]
|
||||
const isNoAttrsInTag = str.indexOf(EQ) === -1;
|
||||
// [/myTag]
|
||||
const isClosingTag = str[0] === SLASH;
|
||||
|
||||
if (isNoAttrsInTag || isClosingTag) {
|
||||
emitToken(createToken(TYPE_TAG, str, row, col));
|
||||
} else {
|
||||
const parsed = parseAttrs(str);
|
||||
|
||||
emitToken(createToken(TYPE_TAG, parsed.tag, row, col));
|
||||
|
||||
parsed.attrs.map(emitToken);
|
||||
}
|
||||
}
|
||||
} else if (currChar === closeTag) {
|
||||
bufferGrabber.skip(); // skip closeTag
|
||||
|
||||
emitToken(createToken(TYPE_WORD, currChar, row, col));
|
||||
} else if (isCharToken(currChar)) {
|
||||
if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) {
|
||||
bufferGrabber.skip();
|
||||
emitToken(createToken(TYPE_WORD, currChar, row, col));
|
||||
} else {
|
||||
const str = bufferGrabber.grabWhile((char) => {
|
||||
if (escapeTags) {
|
||||
return isCharToken(char) && !isEscapeChar(char);
|
||||
}
|
||||
return isCharToken(char);
|
||||
});
|
||||
|
||||
emitToken(createToken(TYPE_WORD, str, row, col));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const tokenize = () => {
|
||||
while (bufferGrabber.hasNext()) {
|
||||
next();
|
||||
}
|
||||
|
||||
tokens.length = tokenIndex + 1;
|
||||
|
||||
return tokens;
|
||||
};
|
||||
|
||||
const isTokenNested = (token) => {
|
||||
const value = openTag + SLASH + token.getValue();
|
||||
// potential bottleneck
|
||||
return buffer.indexOf(value) > -1;
|
||||
};
|
||||
|
||||
return {
|
||||
tokenize,
|
||||
isTokenNested,
|
||||
};
|
||||
}
|
||||
|
||||
export const createTokenOfType = createToken;
|
||||
export { createLexer };
|
||||
@@ -22,25 +22,25 @@ const parse = (input, opts = {}) => {
|
||||
/**
|
||||
* Result AST of nodes
|
||||
* @private
|
||||
* @type {ItemList}
|
||||
* @type {NodeList}
|
||||
*/
|
||||
const nodes = createList();
|
||||
/**
|
||||
* Temp buffer of nodes that's nested to another node
|
||||
* @private
|
||||
* @type {ItemList}
|
||||
* @type {NodeList}
|
||||
*/
|
||||
const nestedNodes = createList();
|
||||
/**
|
||||
* Temp buffer of nodes [tag..]...[/tag]
|
||||
* @private
|
||||
* @type {ItemList}
|
||||
* @type {NodeList}
|
||||
*/
|
||||
const tagNodes = createList();
|
||||
/**
|
||||
* Temp buffer of tag attributes
|
||||
* @private
|
||||
* @type {ItemList}
|
||||
* @type {NodeList}
|
||||
*/
|
||||
const tagNodesAttrName = createList();
|
||||
|
||||
|
||||
+112
-132
@@ -3,95 +3,107 @@ import {
|
||||
BACKSLASH,
|
||||
} from '@bbob/plugin-helper/lib/char';
|
||||
|
||||
/**
|
||||
* @typedef {Object} CharGrabber
|
||||
* @property {Function} skip
|
||||
* @property {Function} hasNext
|
||||
* @property {Function} isLast
|
||||
* @property {Function} grabWhile
|
||||
*/
|
||||
function CharGrabber(source, options) {
|
||||
const cursor = {
|
||||
pos: 0,
|
||||
len: source.length,
|
||||
};
|
||||
|
||||
const substrUntilChar = (char) => {
|
||||
const { pos } = cursor;
|
||||
const idx = source.indexOf(char, pos);
|
||||
|
||||
return idx >= 0 ? source.substr(pos, idx - pos) : '';
|
||||
};
|
||||
const includes = (val) => source.indexOf(val, cursor.pos) >= 0;
|
||||
const hasNext = () => cursor.len > cursor.pos;
|
||||
const isLast = () => cursor.pos === cursor.len;
|
||||
const skip = (num = 1, silent) => {
|
||||
cursor.pos += num;
|
||||
|
||||
if (options && options.onSkip && !silent) {
|
||||
options.onSkip();
|
||||
}
|
||||
};
|
||||
const rest = () => source.substr(cursor.pos);
|
||||
const curr = () => source[cursor.pos];
|
||||
const prev = () => {
|
||||
const prevPos = cursor.pos - 1;
|
||||
|
||||
return typeof source[prevPos] !== 'undefined' ? source[prevPos] : null;
|
||||
};
|
||||
const next = () => {
|
||||
const nextPos = cursor.pos + 1;
|
||||
|
||||
return nextPos <= (source.length - 1) ? source[nextPos] : null;
|
||||
};
|
||||
const grabWhile = (cond, silent) => {
|
||||
let start = 0;
|
||||
|
||||
if (hasNext()) {
|
||||
start = cursor.pos;
|
||||
|
||||
while (hasNext() && cond(curr())) {
|
||||
skip(1, silent);
|
||||
}
|
||||
}
|
||||
|
||||
return source.substr(start, cursor.pos - start);
|
||||
};
|
||||
/**
|
||||
* @type {skip}
|
||||
*/
|
||||
this.skip = skip;
|
||||
/**
|
||||
* @returns {Boolean}
|
||||
*/
|
||||
this.hasNext = hasNext;
|
||||
/**
|
||||
* @returns {String}
|
||||
*/
|
||||
this.getCurr = curr;
|
||||
/**
|
||||
* @returns {String}
|
||||
*/
|
||||
this.getRest = rest;
|
||||
/**
|
||||
* @returns {String}
|
||||
*/
|
||||
this.getNext = next;
|
||||
/**
|
||||
* @returns {String}
|
||||
*/
|
||||
this.getPrev = prev;
|
||||
/**
|
||||
* @returns {Boolean}
|
||||
*/
|
||||
this.isLast = isLast;
|
||||
/**
|
||||
* @returns {Boolean}
|
||||
*/
|
||||
this.includes = includes;
|
||||
/**
|
||||
* @param {Function} cond
|
||||
* @param {Boolean} silent
|
||||
* @return {String}
|
||||
*/
|
||||
this.grabWhile = grabWhile;
|
||||
/**
|
||||
* Grabs rest of string until it find a char
|
||||
* @param {String} char
|
||||
* @return {String}
|
||||
*/
|
||||
this.substrUntilChar = substrUntilChar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a grabber wrapper for source string, that helps to iterate over string char by char
|
||||
* @param {String} source
|
||||
* @param {Object} options
|
||||
* @param {Function} options.onSkip
|
||||
* @returns
|
||||
* @return CharGrabber
|
||||
*/
|
||||
export const createCharGrabber = (source, options) => {
|
||||
// let idx = 0;
|
||||
const cursor = {
|
||||
pos: 0,
|
||||
length: source.length,
|
||||
};
|
||||
|
||||
const skip = () => {
|
||||
cursor.pos += 1;
|
||||
|
||||
if (options && options.onSkip) {
|
||||
options.onSkip();
|
||||
}
|
||||
};
|
||||
const hasNext = () => cursor.length > cursor.pos;
|
||||
const getRest = () => source.substr(cursor.pos);
|
||||
const getCurr = () => source[cursor.pos];
|
||||
|
||||
return {
|
||||
skip,
|
||||
hasNext,
|
||||
isLast: () => (cursor.pos === cursor.length),
|
||||
/**
|
||||
* @param {Function} cond
|
||||
* @returns {string}
|
||||
*/
|
||||
grabWhile: (cond) => {
|
||||
let start = 0;
|
||||
|
||||
if (hasNext()) {
|
||||
start = cursor.pos;
|
||||
|
||||
while (hasNext() && cond(getCurr())) {
|
||||
skip();
|
||||
}
|
||||
}
|
||||
|
||||
return source.substr(start, cursor.pos - start);
|
||||
},
|
||||
getNext: () => {
|
||||
const nextPos = cursor.pos + 1;
|
||||
|
||||
if (nextPos <= (source.length - 1)) {
|
||||
return source[nextPos];
|
||||
}
|
||||
return null;
|
||||
},
|
||||
getPrev: () => {
|
||||
const prevPos = cursor.pos - 1;
|
||||
|
||||
if (typeof source[prevPos] !== 'undefined') {
|
||||
return source[prevPos];
|
||||
}
|
||||
return null;
|
||||
},
|
||||
getCurr,
|
||||
getRest,
|
||||
/**
|
||||
* Grabs rest of string until it find a char
|
||||
* @param {String} char
|
||||
* @return {String}
|
||||
*/
|
||||
substrUntilChar: (char) => {
|
||||
const restStr = getRest();
|
||||
const indexOfChar = restStr.indexOf(char);
|
||||
|
||||
if (indexOfChar >= 0) {
|
||||
return restStr.substr(0, indexOfChar);
|
||||
}
|
||||
|
||||
return '';
|
||||
},
|
||||
};
|
||||
};
|
||||
export const createCharGrabber = (source, options) => new CharGrabber(source, options);
|
||||
|
||||
/**
|
||||
* Trims string from start and end by char
|
||||
@@ -122,58 +134,26 @@ export const trimChar = (str, charToRemove) => {
|
||||
*/
|
||||
export const unquote = (str) => str.replace(BACKSLASH + QUOTEMARK, QUOTEMARK);
|
||||
|
||||
/**
|
||||
* @typedef {Object} ItemList
|
||||
* @type {Object}
|
||||
* @property {getLastCb} getLast
|
||||
* @property {flushLastCb} flushLast
|
||||
* @property {pushCb} push
|
||||
* @property {toArrayCb} toArray
|
||||
*/
|
||||
function NodeList(values = []) {
|
||||
const nodes = values;
|
||||
|
||||
const getLast = () => (
|
||||
Array.isArray(nodes) && nodes.length > 0 && typeof nodes[nodes.length - 1] !== 'undefined'
|
||||
? nodes[nodes.length - 1]
|
||||
: null);
|
||||
const flushLast = () => (nodes.length ? nodes.pop() : false);
|
||||
const push = (value) => nodes.push(value);
|
||||
const toArray = () => nodes;
|
||||
|
||||
this.push = push;
|
||||
this.toArray = toArray;
|
||||
this.getLast = getLast;
|
||||
this.flushLast = flushLast;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param values
|
||||
* @return {ItemList}
|
||||
* @return {NodeList}
|
||||
*/
|
||||
export const createList = (values = []) => {
|
||||
const nodes = values;
|
||||
/**
|
||||
* @callback getLastCb
|
||||
*/
|
||||
const getLast = () => {
|
||||
if (Array.isArray(nodes) && nodes.length > 0 && typeof nodes[nodes.length - 1] !== 'undefined') {
|
||||
return nodes[nodes.length - 1];
|
||||
}
|
||||
|
||||
return null;
|
||||
};
|
||||
/**
|
||||
* @callback flushLastCb
|
||||
* @return {*}
|
||||
*/
|
||||
const flushLast = () => {
|
||||
if (nodes.length) {
|
||||
return nodes.pop();
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
/**
|
||||
* @callback pushCb
|
||||
* @param value
|
||||
*/
|
||||
const push = (value) => nodes.push(value);
|
||||
|
||||
/**
|
||||
* @callback toArrayCb
|
||||
* @return {Array}
|
||||
*/
|
||||
|
||||
return {
|
||||
getLast,
|
||||
flushLast,
|
||||
push,
|
||||
toArray: () => nodes,
|
||||
};
|
||||
};
|
||||
export const createList = (values = []) => new NodeList(values);
|
||||
|
||||
Reference in New Issue
Block a user