mirror of
https://github.com/tenrok/BBob.git
synced 2026-06-08 17:22:26 +03:00
feat(parser): context free tag mode (#165)
* feat(parser): initial context free tag mode * fix: tests coverage * chore: update readme * chore: remove unused badge from readme
This commit is contained in:
@@ -171,5 +171,6 @@ export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME;
|
||||
export const TYPE_ATTR_VALUE = TOKEN_TYPE_ATTR_VALUE;
|
||||
export const TYPE_SPACE = TOKEN_TYPE_SPACE;
|
||||
export const TYPE_NEW_LINE = TOKEN_TYPE_NEW_LINE;
|
||||
|
||||
export { Token };
|
||||
export default Token;
|
||||
|
||||
@@ -58,16 +58,17 @@ function createLexer(buffer, options = {}) {
|
||||
let tokenIndex = -1;
|
||||
let stateMode = STATE_WORD;
|
||||
let tagMode = TAG_STATE_NAME;
|
||||
let contextFreeTag = '';
|
||||
const tokens = new Array(Math.floor(buffer.length));
|
||||
const openTag = options.openTag || OPEN_BRAKET;
|
||||
const closeTag = options.closeTag || CLOSE_BRAKET;
|
||||
const escapeTags = !!options.enableEscapeTags;
|
||||
const contextFreeTags = options.contextFreeTags || [];
|
||||
const onToken = options.onToken || (() => {
|
||||
});
|
||||
|
||||
const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
|
||||
const NOT_CHAR_TOKENS = [
|
||||
// ...(options.enableEscapeTags ? [BACKSLASH] : []),
|
||||
openTag, SPACE, TAB, N,
|
||||
];
|
||||
const WHITESPACES = [SPACE, TAB];
|
||||
@@ -86,6 +87,16 @@ function createLexer(buffer, options = {}) {
|
||||
|
||||
const unq = (val) => unquote(trimChar(val, QUOTEMARK));
|
||||
|
||||
const checkContextFreeMode = (name, isClosingTag) => {
|
||||
if (contextFreeTag !== '' && isClosingTag) {
|
||||
contextFreeTag = '';
|
||||
}
|
||||
|
||||
if (contextFreeTag === '' && contextFreeTags.includes(name)) {
|
||||
contextFreeTag = name;
|
||||
}
|
||||
};
|
||||
|
||||
const chars = createCharGrabber(buffer, { onSkip });
|
||||
|
||||
/**
|
||||
@@ -177,6 +188,7 @@ function createLexer(buffer, options = {}) {
|
||||
const name = tagChars.grabWhile(validName);
|
||||
|
||||
emitToken(TYPE_TAG, name);
|
||||
checkContextFreeMode(name);
|
||||
|
||||
tagChars.skip();
|
||||
|
||||
@@ -192,41 +204,37 @@ function createLexer(buffer, options = {}) {
|
||||
|
||||
function stateTag() {
|
||||
const currChar = chars.getCurr();
|
||||
const nextChar = chars.getNext();
|
||||
|
||||
if (currChar === openTag) {
|
||||
const nextChar = chars.getNext();
|
||||
chars.skip();
|
||||
|
||||
chars.skip();
|
||||
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
|
||||
const substr = chars.substrUntilChar(closeTag);
|
||||
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
|
||||
|
||||
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
|
||||
const substr = chars.substrUntilChar(closeTag);
|
||||
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
|
||||
if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
|
||||
emitToken(TYPE_WORD, currChar);
|
||||
|
||||
if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
|
||||
emitToken(TYPE_WORD, currChar);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
// [myTag ]
|
||||
const isNoAttrsInTag = substr.indexOf(EQ) === -1;
|
||||
// [/myTag]
|
||||
const isClosingTag = substr[0] === SLASH;
|
||||
|
||||
if (isNoAttrsInTag || isClosingTag) {
|
||||
const name = chars.grabWhile((char) => char !== closeTag);
|
||||
|
||||
chars.skip(); // skip closeTag
|
||||
|
||||
emitToken(TYPE_TAG, name);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
return STATE_TAG_ATTRS;
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
return STATE_WORD;
|
||||
// [myTag ]
|
||||
const isNoAttrsInTag = substr.indexOf(EQ) === -1;
|
||||
// [/myTag]
|
||||
const isClosingTag = substr[0] === SLASH;
|
||||
|
||||
if (isNoAttrsInTag || isClosingTag) {
|
||||
const name = chars.grabWhile((char) => char !== closeTag);
|
||||
|
||||
chars.skip(); // skip closeTag
|
||||
|
||||
emitToken(TYPE_TAG, name);
|
||||
checkContextFreeMode(name, isClosingTag);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
return STATE_TAG_ATTRS;
|
||||
}
|
||||
|
||||
function stateAttrs() {
|
||||
@@ -259,13 +267,24 @@ function createLexer(buffer, options = {}) {
|
||||
}
|
||||
|
||||
if (isWhiteSpace(chars.getCurr())) {
|
||||
emitToken(TYPE_SPACE, chars.grabWhile(isWhiteSpace));
|
||||
const word = chars.grabWhile(isWhiteSpace);
|
||||
|
||||
emitToken(TYPE_SPACE, word);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
if (chars.getCurr() === openTag) {
|
||||
if (chars.includes(closeTag)) {
|
||||
if (contextFreeTag) {
|
||||
const fullTagLen = openTag.length + SLASH.length + contextFreeTag.length;
|
||||
const fullTagName = `${openTag}${SLASH}${contextFreeTag}`;
|
||||
const foundTag = chars.grabN(fullTagLen);
|
||||
const isEndContextFreeMode = foundTag === fullTagName;
|
||||
|
||||
if (isEndContextFreeMode) {
|
||||
return STATE_TAG;
|
||||
}
|
||||
} else if (chars.includes(closeTag)) {
|
||||
return STATE_TAG;
|
||||
}
|
||||
|
||||
@@ -298,12 +317,16 @@ function createLexer(buffer, options = {}) {
|
||||
|
||||
const isChar = (char) => isCharToken(char) && !isEscapeChar(char);
|
||||
|
||||
emitToken(TYPE_WORD, chars.grabWhile(isChar));
|
||||
const word = chars.grabWhile(isChar);
|
||||
|
||||
emitToken(TYPE_WORD, word);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
|
||||
emitToken(TYPE_WORD, chars.grabWhile(isCharToken));
|
||||
const word = chars.grabWhile(isCharToken);
|
||||
|
||||
emitToken(TYPE_WORD, word);
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
@@ -320,10 +343,8 @@ function createLexer(buffer, options = {}) {
|
||||
stateMode = stateAttrs();
|
||||
break;
|
||||
case STATE_WORD:
|
||||
stateMode = stateWord();
|
||||
break;
|
||||
default:
|
||||
stateMode = STATE_WORD;
|
||||
stateMode = stateWord();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,9 +10,10 @@ import { createList } from './utils';
|
||||
* @param {Object} opts
|
||||
* @param {Function} opts.createTokenizer
|
||||
* @param {Array<string>} opts.onlyAllowTags
|
||||
* @param {Array<string>} opts.contextFreeTags
|
||||
* @param {Boolean} opts.enableEscapeTags
|
||||
* @param {String} opts.openTag
|
||||
* @param {String} opts.closeTag
|
||||
* @param {Boolean} opts.enableEscapeTags
|
||||
* @return {Array}
|
||||
*/
|
||||
const parse = (input, opts = {}) => {
|
||||
@@ -258,9 +259,10 @@ const parse = (input, opts = {}) => {
|
||||
|
||||
tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, {
|
||||
onToken,
|
||||
onlyAllowTags: options.onlyAllowTags,
|
||||
openTag,
|
||||
closeTag,
|
||||
onlyAllowTags: options.onlyAllowTags,
|
||||
contextFreeTags: options.contextFreeTags,
|
||||
enableEscapeTags: options.enableEscapeTags,
|
||||
});
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ function CharGrabber(source, options) {
|
||||
const { pos } = cursor;
|
||||
const idx = source.indexOf(char, pos);
|
||||
|
||||
return idx >= 0 ? source.substr(pos, idx - pos) : '';
|
||||
return idx >= 0 ? source.substring(pos, idx) : '';
|
||||
};
|
||||
const includes = (val) => source.indexOf(val, cursor.pos) >= 0;
|
||||
const hasNext = () => cursor.len > cursor.pos;
|
||||
@@ -25,7 +25,8 @@ function CharGrabber(source, options) {
|
||||
options.onSkip();
|
||||
}
|
||||
};
|
||||
const rest = () => source.substr(cursor.pos);
|
||||
const rest = () => source.substring(cursor.pos);
|
||||
const grabN = (num = 0) => source.substring(cursor.pos, cursor.pos + num);
|
||||
const curr = () => source[cursor.pos];
|
||||
const prev = () => {
|
||||
const prevPos = cursor.pos - 1;
|
||||
@@ -48,7 +49,7 @@ function CharGrabber(source, options) {
|
||||
}
|
||||
}
|
||||
|
||||
return source.substr(start, cursor.pos - start);
|
||||
return source.substring(start, cursor.pos);
|
||||
};
|
||||
/**
|
||||
* @type {skip}
|
||||
@@ -88,6 +89,11 @@ function CharGrabber(source, options) {
|
||||
* @return {String}
|
||||
*/
|
||||
this.grabWhile = grabWhile;
|
||||
/**
|
||||
* @param {Number} num
|
||||
* @return {String}
|
||||
*/
|
||||
this.grabN = grabN;
|
||||
/**
|
||||
* Grabs rest of string until it find a char
|
||||
* @param {String} char
|
||||
|
||||
Reference in New Issue
Block a user