mirror of
https://github.com/tenrok/BBob.git
synced 2026-06-08 17:22:26 +03:00
feat(parser): custom open and close tags support, html tags tests (#3)
This commit is contained in:
committed by
GitHub
parent
f5fd078eca
commit
790825af30
@@ -53,14 +53,14 @@ const convertTagToText = (token) => {
|
||||
|
||||
class Token {
|
||||
constructor(type, value, line, row) {
|
||||
this.type = String(type);
|
||||
this.value = String(value);
|
||||
this.line = Number(line);
|
||||
this.row = Number(row);
|
||||
this[TOKEN_TYPE_ID] = String(type);
|
||||
this[TOKEN_VALUE_ID] = String(value);
|
||||
this[TOKEN_LINE_ID] = Number(line);
|
||||
this[TOKEN_COLUMN_ID] = Number(row);
|
||||
}
|
||||
|
||||
isEmpty() {
|
||||
return !!this.type;
|
||||
return !!this[TOKEN_TYPE_ID];
|
||||
}
|
||||
|
||||
isText() {
|
||||
|
||||
@@ -13,13 +13,7 @@ import {
|
||||
|
||||
import { Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD } from './Token';
|
||||
|
||||
const RESERVED_CHARS = [CLOSE_BRAKET, OPEN_BRAKET, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N];
|
||||
const NOT_CHAR_TOKENS = [OPEN_BRAKET, SPACE, TAB, N];
|
||||
const WHITESPACES = [SPACE, TAB];
|
||||
|
||||
const isCharReserved = char => (RESERVED_CHARS.indexOf(char) >= 0);
|
||||
const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
|
||||
const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
|
||||
const EM = '!';
|
||||
|
||||
const createCharGrabber = (source) => {
|
||||
let idx = 0;
|
||||
@@ -69,6 +63,19 @@ function createLexer(buffer, options = {}) {
|
||||
|
||||
let tokenIndex = -1;
|
||||
const tokens = new Array(Math.floor(buffer.length));
|
||||
const openTag = options.openTag || OPEN_BRAKET;
|
||||
const closeTag = options.closeTag || CLOSE_BRAKET;
|
||||
|
||||
const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
|
||||
const NOT_CHAR_TOKENS = [openTag, SPACE, TAB, N];
|
||||
const WHITESPACES = [SPACE, TAB];
|
||||
const SPECIAL_CHARS = [EQ, SPACE, TAB];
|
||||
|
||||
const isCharReserved = char => (RESERVED_CHARS.indexOf(char) >= 0);
|
||||
const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
|
||||
const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
|
||||
const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
|
||||
|
||||
const emitToken = (token) => {
|
||||
if (options.onToken) {
|
||||
options.onToken(token);
|
||||
@@ -80,37 +87,46 @@ function createLexer(buffer, options = {}) {
|
||||
|
||||
const parseAttrs = (str) => {
|
||||
let tagName = null;
|
||||
let skipSpaces = false;
|
||||
let skipSpecialChars = false;
|
||||
|
||||
const attrTokens = [];
|
||||
const attrCharGrabber = createCharGrabber(str);
|
||||
const validAttr = (val) => {
|
||||
const isEQ = val === EQ;
|
||||
const isWS = isWhiteSpace(val);
|
||||
const isPrevSLASH = attrCharGrabber.getPrev() === SLASH;
|
||||
|
||||
if (tagName === null) {
|
||||
return !(isEQ || isWS || attrCharGrabber.isLast());
|
||||
const validAttr = (char) => {
|
||||
const isEQ = char === EQ;
|
||||
const isWS = isWhiteSpace(char);
|
||||
const prevChar = attrCharGrabber.getPrev();
|
||||
const nextChar = attrCharGrabber.getNext();
|
||||
const isPrevSLASH = prevChar === BACKSLASH;
|
||||
const isTagNameEmpty = tagName === null;
|
||||
|
||||
if (isTagNameEmpty) {
|
||||
return (isEQ || isWS || attrCharGrabber.isLast()) === false;
|
||||
}
|
||||
|
||||
if (skipSpaces && isWS) {
|
||||
if (skipSpecialChars && isSpecialChar(char)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (val === QUOTEMARK && !isPrevSLASH) {
|
||||
skipSpaces = !skipSpaces;
|
||||
if (char === QUOTEMARK && !isPrevSLASH) {
|
||||
skipSpecialChars = !skipSpecialChars;
|
||||
|
||||
if (!skipSpecialChars && !(nextChar === EQ || isWhiteSpace(nextChar))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return !(isEQ || isWS);
|
||||
return (isEQ || isWS) === false;
|
||||
};
|
||||
|
||||
const nextAttr = () => {
|
||||
const attrStr = attrCharGrabber.grabWhile(validAttr);
|
||||
const currChar = attrCharGrabber.getCurr();
|
||||
|
||||
// first string before space is a tag name
|
||||
if (tagName === null) {
|
||||
tagName = attrStr;
|
||||
} else if (isWhiteSpace(attrCharGrabber.getCurr()) || !attrCharGrabber.hasNext()) {
|
||||
} else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) {
|
||||
const escaped = unquote(trimChar(attrStr, QUOTEMARK));
|
||||
attrTokens.push(createToken(TYPE_ATTR_VALUE, escaped, row, col));
|
||||
} else {
|
||||
@@ -127,29 +143,29 @@ function createLexer(buffer, options = {}) {
|
||||
return { tag: tagName, attrs: attrTokens };
|
||||
};
|
||||
|
||||
const grabber = createCharGrabber(buffer);
|
||||
const bufferGrabber = createCharGrabber(buffer);
|
||||
|
||||
const next = () => {
|
||||
const char = grabber.getCurr();
|
||||
const char = bufferGrabber.getCurr();
|
||||
|
||||
if (char === N) {
|
||||
grabber.skip();
|
||||
bufferGrabber.skip();
|
||||
col = 0;
|
||||
row++;
|
||||
|
||||
emitToken(createToken(TYPE_NEW_LINE, char, row, col));
|
||||
} else if (isWhiteSpace(char)) {
|
||||
const str = grabber.grabWhile(isWhiteSpace);
|
||||
const str = bufferGrabber.grabWhile(isWhiteSpace);
|
||||
emitToken(createToken(TYPE_SPACE, str, row, col));
|
||||
} else if (char === OPEN_BRAKET) {
|
||||
const nextChar = grabber.getNext();
|
||||
grabber.skip(); // skip [
|
||||
} else if (char === openTag) {
|
||||
const nextChar = bufferGrabber.getNext();
|
||||
bufferGrabber.skip(); // skip [
|
||||
|
||||
if (isCharReserved(nextChar)) {
|
||||
emitToken(createToken(TYPE_WORD, char, row, col));
|
||||
} else {
|
||||
const str = grabber.grabWhile(val => val !== CLOSE_BRAKET);
|
||||
grabber.skip(); // skip ]
|
||||
const str = bufferGrabber.grabWhile(val => val !== closeTag);
|
||||
bufferGrabber.skip(); // skip ]
|
||||
|
||||
if (!(str.indexOf(EQ) > 0) || str[0] === SLASH) {
|
||||
emitToken(createToken(TYPE_TAG, str, row, col));
|
||||
@@ -160,19 +176,19 @@ function createLexer(buffer, options = {}) {
|
||||
parsed.attrs.map(emitToken);
|
||||
}
|
||||
}
|
||||
} else if (char === CLOSE_BRAKET) {
|
||||
grabber.skip();
|
||||
} else if (char === closeTag) {
|
||||
bufferGrabber.skip();
|
||||
|
||||
emitToken(createToken(TYPE_WORD, char, row, col));
|
||||
} else if (isCharToken(char)) {
|
||||
const str = grabber.grabWhile(isCharToken);
|
||||
const str = bufferGrabber.grabWhile(isCharToken);
|
||||
|
||||
emitToken(createToken(TYPE_WORD, str, row, col));
|
||||
}
|
||||
};
|
||||
|
||||
const tokenize = () => {
|
||||
while (grabber.hasNext()) {
|
||||
while (bufferGrabber.hasNext()) {
|
||||
next();
|
||||
}
|
||||
|
||||
@@ -182,7 +198,7 @@ function createLexer(buffer, options = {}) {
|
||||
};
|
||||
|
||||
const isTokenNested = (token) => {
|
||||
const value = OPEN_BRAKET + SLASH + token.getValue();
|
||||
const value = openTag + SLASH + token.getValue();
|
||||
return buffer.indexOf(value) > -1;
|
||||
};
|
||||
|
||||
|
||||
@@ -28,8 +28,6 @@ let tokenizer = null;
|
||||
// eslint-disable-next-line no-unused-vars
|
||||
let tokens = null;
|
||||
|
||||
const createTokenizer = (input, onToken) => createLexer(input, { onToken });
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param token
|
||||
@@ -41,7 +39,7 @@ const isTagNested = token => tokenizer.isTokenNested(token);
|
||||
* @private
|
||||
* @return {TagNode}
|
||||
*/
|
||||
const getTagNode = () => (tagNodes.length ? tagNodes[tagNodes.length - 1] : null);
|
||||
const getLastTagNode = () => (tagNodes.length ? tagNodes[tagNodes.length - 1] : null);
|
||||
|
||||
/**
|
||||
* @private
|
||||
@@ -61,7 +59,7 @@ const createTagNodeAttrName = token => tagNodesAttrName.push(token.getValue());
|
||||
* @return {Array}
|
||||
*/
|
||||
const getTagNodeAttrName = () =>
|
||||
(tagNodesAttrName.length ? tagNodesAttrName[tagNodesAttrName.length - 1] : getTagNode().tag);
|
||||
(tagNodesAttrName.length ? tagNodesAttrName[tagNodesAttrName.length - 1] : null);
|
||||
|
||||
/**
|
||||
* @private
|
||||
@@ -92,6 +90,7 @@ const clearTagNode = () => {
|
||||
const getNodes = () => {
|
||||
if (nestedNodes.length) {
|
||||
const nestedNode = nestedNodes[nestedNodes.length - 1];
|
||||
|
||||
return nestedNode.content;
|
||||
}
|
||||
|
||||
@@ -127,9 +126,9 @@ const handleTagStart = (token) => {
|
||||
createTagNode(token);
|
||||
|
||||
if (isTagNested(token)) {
|
||||
nestedNodes.push(getTagNode());
|
||||
nestedNodes.push(getLastTagNode());
|
||||
} else {
|
||||
appendNode(getTagNode());
|
||||
appendNode(getLastTagNode());
|
||||
clearTagNode();
|
||||
}
|
||||
}
|
||||
@@ -151,6 +150,7 @@ const handleTagEnd = (token) => {
|
||||
const tag = token.getValue();
|
||||
const line = token.getLine();
|
||||
const column = token.getColumn();
|
||||
|
||||
options.onError({
|
||||
message: `Inconsistent tag '${tag}' on line ${line} and column ${column}`,
|
||||
lineNumber: line,
|
||||
@@ -183,15 +183,22 @@ const handleTagToken = (token) => {
|
||||
* @param {Token} token
|
||||
*/
|
||||
const handleTagNode = (token) => {
|
||||
const tagNode = getTagNode();
|
||||
const tagNode = getLastTagNode();
|
||||
|
||||
if (tagNode) {
|
||||
if (token.isAttrName()) {
|
||||
createTagNodeAttrName(token);
|
||||
tagNode.attr(getTagNodeAttrName(), null);
|
||||
tagNode.attr(getTagNodeAttrName(), '');
|
||||
} else if (token.isAttrValue()) {
|
||||
tagNode.attr(getTagNodeAttrName(), token.getValue());
|
||||
clearTagNodeAttrName();
|
||||
const attrName = getTagNodeAttrName();
|
||||
const attrValue = token.getValue();
|
||||
|
||||
if (attrName) {
|
||||
tagNode.attr(getTagNodeAttrName(), attrValue);
|
||||
clearTagNodeAttrName();
|
||||
} else {
|
||||
tagNode.attr(attrValue, attrValue);
|
||||
}
|
||||
} else if (token.isText()) {
|
||||
tagNode.append(token.getValue());
|
||||
}
|
||||
@@ -215,7 +222,12 @@ const parseToken = (token) => {
|
||||
*/
|
||||
const parse = (input, opts = {}) => {
|
||||
options = opts;
|
||||
tokenizer = (opts.createTokenizer ? opts.createTokenizer : createTokenizer)(input, parseToken);
|
||||
tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, {
|
||||
onToken: parseToken,
|
||||
onlyAllowTags: options.onlyAllowTags,
|
||||
openTag: options.openTag,
|
||||
closeTag: options.closeTag,
|
||||
});
|
||||
|
||||
nodes = [];
|
||||
nestedNodes = [];
|
||||
|
||||
Reference in New Issue
Block a user