2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00

feat(parser): rewrite lexer to make it faster (#50)

* feat(parser): first iteration of new lexer

* feat(parser): convert token string props to number props

* refactor(parser): optimize char grabber

* refactor(parser): working on new lexer

* refactor(parser): convert token string props to number props

* refactor(parser): rebuild lexer, add tag attrs parsing

* refactor(parser): rework word parsing and tag parsing

* refactor(parser): rework to pass tests

* refactor(parser): rework tag parsing

* refactor(parser): rework escape tags parsing

* refactor(parser): rework tests

* refactor(parser): all test pass

* refactor(parser): make lexer faster by move mode switching in loop

* refactor(parser): remove all state map objects

* refactor(parser): order of parsing states

* refactor(parser): state switching without return

* refactor(parser): rename buffers to chars

* refactor(lexer): reduce function calls

* feat(lexer): add new parser tests and code to pass it

* fix(utils): remove unused variable in char grabber

* feat(lexer): add test for new lexer bug

* chore(*): add lexer and lexer2 to benchmark

* chore(lexer): add some debug info for char grabber

* feat(parser): add new test for single attributes without values

* fix(lexer): paired tags tests

* refactor(lexer): comment breaking changes tests for future releases

* feat(core): improve tests

* refactor(parser): add more tests, reduce char grabber size

* refactor(parser): reduce utils size

* refactor(parser): remove unused code from tag parsing code

* refactor(parser): remove unused code from word to tag transforming code

* chore(benchmark): fix benchmark imports
This commit is contained in:
Nikolay Kostyurin
2020-12-09 01:03:48 +02:00
committed by GitHub
parent fda6ddd6ee
commit 772d422d77
13 changed files with 998 additions and 359 deletions
+17 -3
View File
@@ -1,3 +1,4 @@
/* eslint-disable global-require */
const Benchmark = require('benchmark');
const stub = require('./test/stub');
@@ -38,9 +39,22 @@ suite
addInLineBreaks: false,
});
})
.add('@bbob/parser', () => require('../packages/bbob-parser/lib/index').parse(stub, {
onlyAllowTags: ['ch'],
}))
.add('@bbob/parser lexer old', () => {
const lexer1 = require('../packages/bbob-parser/lib/lexer_old');
return require('../packages/bbob-parser/lib/index').parse(stub, {
onlyAllowTags: ['ch'],
createTokenizer: lexer1.createLexer,
});
})
.add('@bbob/parser lexer', () => {
const lexer2 = require('../packages/bbob-parser/lib/lexer');
return require('../packages/bbob-parser/lib/index').parse(stub, {
onlyAllowTags: ['ch'],
createTokenizer: lexer2.createLexer,
});
})
// add listeners
.on('cycle', (event) => {
console.log(String(event.target));
+46 -10
View File
@@ -1,4 +1,7 @@
import { iterate } from '../src/utils';
import { iterate, match } from '../src/utils';
const stringify = val => JSON.stringify(val);
describe('@bbob/core utils', () => {
test('iterate', () => {
@@ -16,14 +19,47 @@ describe('@bbob/core utils', () => {
return node;
});
expect(resultArr).toEqual([{
one: true,
pass: 1,
content: [{ oneInside: true, pass: 1, }]
}, {
two: true,
pass: 1,
content: [{ twoInside: true, pass: 1, }]
}]);
const expected = [
{
one: true,
content: [{ oneInside: true, pass: 1, }],
pass: 1,
}, {
two: true,
content: [{ twoInside: true, pass: 1, }],
pass: 1,
}
];
expect(stringify(resultArr)).toEqual(stringify(expected));
});
test('match', () => {
const testArr = [
{ tag: 'mytag1', one: 1 },
{ tag: 'mytag2', two: 1 },
{ tag: 'mytag3', three: 1 },
{ tag: 'mytag4', four: 1 },
{ tag: 'mytag5', five: 1 },
{ tag: 'mytag6', six: 1 },
];
testArr.match = match;
const resultArr = testArr.match([{ tag: 'mytag1' }, { tag: 'mytag2' }], node => {
node.pass = 1;
return node;
});
const expected = [
{ tag: 'mytag1', one: 1, pass: 1 },
{ tag: 'mytag2', two: 1, pass: 1 },
{ tag: 'mytag3', three: 1 },
{ tag: 'mytag4', four: 1 },
{ tag: 'mytag5', five: 1 },
{ tag: 'mytag6', six: 1 },
];
expect(stringify(resultArr)).toEqual(stringify(expected))
})
});
+1 -1
View File
@@ -35,7 +35,7 @@
"build:es": "../../node_modules/.bin/cross-env BABEL_ENV=es NODE_ENV=production ../../node_modules/.bin/babel src --out-dir es",
"build:umd": "../../node_modules/.bin/cross-env BABEL_ENV=rollup NODE_ENV=production ../../node_modules/.bin/rollup --config ../../rollup.config.js",
"build": "npm run build:commonjs && npm run build:es && npm run build:umd",
"test": "../../node_modules/.bin/jest --",
"test": "../../node_modules/.bin/jest",
"cover": "../../node_modules/.bin/jest --coverage",
"lint": "../../node_modules/.bin/eslint .",
"size": "../../node_modules/.bin/cross-env NODE_ENV=production ../../node_modules/.bin/size-limit",
+9 -8
View File
@@ -10,12 +10,12 @@ const TOKEN_VALUE_ID = 'value'; // 1;
const TOKEN_COLUMN_ID = 'row'; // 2;
const TOKEN_LINE_ID = 'line'; // 3;
const TOKEN_TYPE_WORD = 'word';
const TOKEN_TYPE_TAG = 'tag';
const TOKEN_TYPE_ATTR_NAME = 'attr-name';
const TOKEN_TYPE_ATTR_VALUE = 'attr-value';
const TOKEN_TYPE_SPACE = 'space';
const TOKEN_TYPE_NEW_LINE = 'new-line';
const TOKEN_TYPE_WORD = 1; // 'word';
const TOKEN_TYPE_TAG = 2; // 'tag';
const TOKEN_TYPE_ATTR_NAME = 3; // 'attr-name';
const TOKEN_TYPE_ATTR_VALUE = 4; // 'attr-value';
const TOKEN_TYPE_SPACE = 5; // 'space';
const TOKEN_TYPE_NEW_LINE = 6; // 'new-line';
/**
* @param {Token} token
@@ -105,14 +105,15 @@ class Token {
* @param row
*/
constructor(type, value, line, row) {
this[TOKEN_TYPE_ID] = String(type);
this[TOKEN_TYPE_ID] = Number(type);
this[TOKEN_VALUE_ID] = String(value);
this[TOKEN_LINE_ID] = Number(line);
this[TOKEN_COLUMN_ID] = Number(row);
}
isEmpty() {
return !!this[TOKEN_TYPE_ID];
// eslint-disable-next-line no-restricted-globals
return isNaN(this[TOKEN_TYPE_ID]);
}
isText() {
+236 -133
View File
@@ -21,7 +21,7 @@ const EM = '!';
/**
* Creates a Token entity class
* @param {String} type
* @param {Number} type
* @param {String} value
* @param {Number} r line number
* @param {Number} cl char number in line
@@ -44,14 +44,26 @@ const createToken = (type, value, r = 0, cl = 0) => new Token(type, value, r, cl
* @return {Lexer}
*/
function createLexer(buffer, options = {}) {
const STATE_WORD = 0;
const STATE_TAG = 1;
const STATE_TAG_ATTRS = 2;
const TAG_STATE_NAME = 0;
const TAG_STATE_ATTR = 1;
const TAG_STATE_VALUE = 2;
let row = 0;
let col = 0;
let tokenIndex = -1;
let stateMode = STATE_WORD;
let tagMode = TAG_STATE_NAME;
const tokens = new Array(Math.floor(buffer.length));
const openTag = options.openTag || OPEN_BRAKET;
const closeTag = options.closeTag || CLOSE_BRAKET;
const escapeTags = options.enableEscapeTags;
const escapeTags = !!options.enableEscapeTags;
const onToken = options.onToken || (() => {
});
const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
const NOT_CHAR_TOKENS = [
@@ -62,175 +74,266 @@ function createLexer(buffer, options = {}) {
const SPECIAL_CHARS = [EQ, SPACE, TAB];
const isCharReserved = (char) => (RESERVED_CHARS.indexOf(char) >= 0);
const isNewLine = (char) => char === N;
const isWhiteSpace = (char) => (WHITESPACES.indexOf(char) >= 0);
const isCharToken = (char) => (NOT_CHAR_TOKENS.indexOf(char) === -1);
const isSpecialChar = (char) => (SPECIAL_CHARS.indexOf(char) >= 0);
const isEscapableChar = (char) => (char === openTag || char === closeTag || char === BACKSLASH);
const isEscapeChar = (char) => char === BACKSLASH;
const onSkip = () => {
col++;
};
const unq = (val) => unquote(trimChar(val, QUOTEMARK));
const chars = createCharGrabber(buffer, { onSkip });
/**
* Emits newly created token to subscriber
* @param token
* @param {Number} type
* @param {String} value
*/
const emitToken = (token) => {
if (options.onToken) {
options.onToken(token);
}
function emitToken(type, value) {
const token = createToken(type, value, row, col);
onToken(token);
tokenIndex += 1;
tokens[tokenIndex] = token;
};
}
/**
* Parses params inside [myTag---params goes here---]content[/myTag]
* @param str
* @returns {{tag: *, attrs: Array}}
*/
const parseAttrs = (str) => {
let tagName = null;
let skipSpecialChars = false;
function nextTagState(tagChars, isSingleValueTag) {
if (tagMode === TAG_STATE_ATTR) {
const validAttrName = (char) => !(char === EQ || isWhiteSpace(char));
const name = tagChars.grabWhile(validAttrName);
const isEnd = tagChars.isLast();
const isValue = tagChars.getCurr() !== EQ;
const attrTokens = [];
const attrCharGrabber = createCharGrabber(str);
tagChars.skip();
const validAttr = (char) => {
const isEQ = char === EQ;
const isWS = isWhiteSpace(char);
const prevChar = attrCharGrabber.getPrev();
const nextChar = attrCharGrabber.getNext();
const isPrevSLASH = prevChar === BACKSLASH;
const isTagNameEmpty = tagName === null;
if (isTagNameEmpty) {
return (isEQ || isWS || attrCharGrabber.isLast()) === false;
}
if (skipSpecialChars && isSpecialChar(char)) {
return true;
}
if (char === QUOTEMARK && !isPrevSLASH) {
skipSpecialChars = !skipSpecialChars;
if (!skipSpecialChars && !(nextChar === EQ || isWhiteSpace(nextChar))) {
return false;
}
}
return (isEQ || isWS) === false;
};
const nextAttr = () => {
const attrStr = attrCharGrabber.grabWhile(validAttr);
const currChar = attrCharGrabber.getCurr();
// first string before space is a tag name [tagName params...]
if (tagName === null) {
tagName = attrStr;
} else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) {
const escaped = unquote(trimChar(attrStr, QUOTEMARK));
attrTokens.push(createToken(TYPE_ATTR_VALUE, escaped, row, col));
if (isEnd || isValue) {
emitToken(TYPE_ATTR_VALUE, unq(name));
} else {
attrTokens.push(createToken(TYPE_ATTR_NAME, attrStr, row, col));
emitToken(TYPE_ATTR_NAME, name);
}
attrCharGrabber.skip();
};
if (isEnd) {
return TAG_STATE_NAME;
}
while (attrCharGrabber.hasNext()) {
nextAttr();
if (isValue) {
return TAG_STATE_ATTR;
}
return TAG_STATE_VALUE;
}
if (tagMode === TAG_STATE_VALUE) {
let stateSpecial = false;
const validAttrValue = (char) => {
// const isEQ = char === EQ;
const isQM = char === QUOTEMARK;
const prevChar = tagChars.getPrev();
const nextChar = tagChars.getNext();
const isPrevSLASH = prevChar === BACKSLASH;
const isNextEQ = nextChar === EQ;
const isWS = isWhiteSpace(char);
// const isPrevWS = isWhiteSpace(prevChar);
const isNextWS = isWhiteSpace(nextChar);
if (stateSpecial && isSpecialChar(char)) {
return true;
}
if (isQM && !isPrevSLASH) {
stateSpecial = !stateSpecial;
if (!stateSpecial && !(isNextEQ || isNextWS)) {
return false;
}
}
if (!isSingleValueTag) {
return isWS === false;
// return (isEQ || isWS) === false;
}
return true;
};
const name = tagChars.grabWhile(validAttrValue);
tagChars.skip();
emitToken(TYPE_ATTR_VALUE, unq(name));
if (tagChars.isLast()) {
return TAG_STATE_NAME;
}
return TAG_STATE_ATTR;
}
return { tag: tagName, attrs: attrTokens };
};
const validName = (char) => !(char === EQ || isWhiteSpace(char) || tagChars.isLast());
const name = tagChars.grabWhile(validName);
const bufferGrabber = createCharGrabber(buffer, {
onSkip: () => {
col++;
},
});
emitToken(TYPE_TAG, name);
const next = () => {
const currChar = bufferGrabber.getCurr();
const nextChar = bufferGrabber.getNext();
tagChars.skip();
// in cases when we has [url=someval]GET[/url] and we dont need to parse all
if (isSingleValueTag) {
return TAG_STATE_VALUE;
}
const hasEQ = tagChars.includes(EQ);
return hasEQ ? TAG_STATE_ATTR : TAG_STATE_VALUE;
}
function stateTag() {
const currChar = chars.getCurr();
if (currChar === openTag) {
const nextChar = chars.getNext();
chars.skip();
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = chars.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
emitToken(TYPE_WORD, currChar);
return STATE_WORD;
}
// [myTag ]
const isNoAttrsInTag = substr.indexOf(EQ) === -1;
// [/myTag]
const isClosingTag = substr[0] === SLASH;
if (isNoAttrsInTag || isClosingTag) {
const name = chars.grabWhile((char) => char !== closeTag);
chars.skip(); // skip closeTag
emitToken(TYPE_TAG, name);
return STATE_WORD;
}
return STATE_TAG_ATTRS;
}
return STATE_WORD;
}
function stateAttrs() {
const silent = true;
const tagStr = chars.grabWhile((char) => char !== closeTag, silent);
const tagGrabber = createCharGrabber(tagStr, { onSkip });
const hasSpace = tagGrabber.includes(SPACE);
while (tagGrabber.hasNext()) {
tagMode = nextTagState(tagGrabber, !hasSpace);
}
chars.skip(); // skip closeTag
return STATE_WORD;
}
function stateWord() {
if (isNewLine(chars.getCurr())) {
emitToken(TYPE_NEW_LINE, chars.getCurr());
chars.skip();
if (currChar === N) {
bufferGrabber.skip();
col = 0;
row++;
emitToken(createToken(TYPE_NEW_LINE, currChar, row, col));
} else if (isWhiteSpace(currChar)) {
const str = bufferGrabber.grabWhile(isWhiteSpace);
emitToken(createToken(TYPE_SPACE, str, row, col));
} else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) {
bufferGrabber.skip(); // skip the \ without emitting anything
bufferGrabber.skip(); // skip past the [, ] or \ as well
emitToken(createToken(TYPE_WORD, nextChar, row, col));
} else if (currChar === openTag) {
bufferGrabber.skip(); // skip openTag
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = bufferGrabber.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
emitToken(createToken(TYPE_WORD, currChar, row, col));
} else {
const str = bufferGrabber.grabWhile((val) => val !== closeTag);
bufferGrabber.skip(); // skip closeTag
// [myTag ]
const isNoAttrsInTag = str.indexOf(EQ) === -1;
// [/myTag]
const isClosingTag = str[0] === SLASH;
if (isNoAttrsInTag || isClosingTag) {
emitToken(createToken(TYPE_TAG, str, row, col));
} else {
const parsed = parseAttrs(str);
emitToken(createToken(TYPE_TAG, parsed.tag, row, col));
parsed.attrs.map(emitToken);
}
}
} else if (currChar === closeTag) {
bufferGrabber.skip(); // skip closeTag
emitToken(createToken(TYPE_WORD, currChar, row, col));
} else if (isCharToken(currChar)) {
if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) {
bufferGrabber.skip();
emitToken(createToken(TYPE_WORD, currChar, row, col));
} else {
const str = bufferGrabber.grabWhile((char) => {
if (escapeTags) {
return isCharToken(char) && !isEscapeChar(char);
}
return isCharToken(char);
});
emitToken(createToken(TYPE_WORD, str, row, col));
}
return STATE_WORD;
}
};
const tokenize = () => {
while (bufferGrabber.hasNext()) {
next();
if (isWhiteSpace(chars.getCurr())) {
emitToken(TYPE_SPACE, chars.grabWhile(isWhiteSpace));
return STATE_WORD;
}
if (chars.getCurr() === openTag) {
if (chars.includes(closeTag)) {
return STATE_TAG;
}
emitToken(TYPE_WORD, chars.getCurr());
chars.skip();
return STATE_WORD;
}
if (escapeTags) {
if (isEscapeChar(chars.getCurr())) {
const currChar = chars.getCurr();
const nextChar = chars.getNext();
chars.skip(); // skip the \ without emitting anything
if (isEscapableChar(nextChar)) {
chars.skip(); // skip past the [, ] or \ as well
emitToken(TYPE_WORD, nextChar);
return STATE_WORD;
}
emitToken(TYPE_WORD, currChar);
return STATE_WORD;
}
const isChar = (char) => isCharToken(char) && !isEscapeChar(char);
emitToken(TYPE_WORD, chars.grabWhile(isChar));
return STATE_WORD;
}
emitToken(TYPE_WORD, chars.grabWhile(isCharToken));
return STATE_WORD;
}
function tokenize() {
while (chars.hasNext()) {
switch (stateMode) {
case STATE_TAG:
stateMode = stateTag();
break;
case STATE_TAG_ATTRS:
stateMode = stateAttrs();
break;
case STATE_WORD:
stateMode = stateWord();
break;
default:
stateMode = STATE_WORD;
break;
}
}
tokens.length = tokenIndex + 1;
return tokens;
};
}
const isTokenNested = (token) => {
function isTokenNested(token) {
const value = openTag + SLASH + token.getValue();
// potential bottleneck
return buffer.indexOf(value) > -1;
};
}
return {
tokenize,
+242
View File
@@ -0,0 +1,242 @@
/* eslint-disable no-plusplus,no-param-reassign */
import {
OPEN_BRAKET,
CLOSE_BRAKET,
QUOTEMARK,
BACKSLASH,
SLASH,
SPACE,
TAB,
EQ,
N,
} from '@bbob/plugin-helper/lib/char';
import {
Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD,
} from './Token';
import { createCharGrabber, trimChar, unquote } from './utils';
// for cases <!-- -->
const EM = '!';
/**
* Creates a Token entity class
* @param {String} type
* @param {String} value
* @param {Number} r line number
* @param {Number} cl char number in line
*/
const createToken = (type, value, r = 0, cl = 0) => new Token(type, value, r, cl);
/**
* @typedef {Object} Lexer
* @property {Function} tokenize
* @property {Function} isTokenNested
*/
/**
* @param {String} buffer
* @param {Object} options
* @param {Function} options.onToken
* @param {String} options.openTag
* @param {String} options.closeTag
* @param {Boolean} options.enableEscapeTags
* @return {Lexer}
*/
function createLexer(buffer, options = {}) {
let row = 0;
let col = 0;
let tokenIndex = -1;
const tokens = new Array(Math.floor(buffer.length));
const openTag = options.openTag || OPEN_BRAKET;
const closeTag = options.closeTag || CLOSE_BRAKET;
const escapeTags = options.enableEscapeTags;
const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
const NOT_CHAR_TOKENS = [
// ...(options.enableEscapeTags ? [BACKSLASH] : []),
openTag, SPACE, TAB, N,
];
const WHITESPACES = [SPACE, TAB];
const SPECIAL_CHARS = [EQ, SPACE, TAB];
const isCharReserved = (char) => (RESERVED_CHARS.indexOf(char) >= 0);
const isWhiteSpace = (char) => (WHITESPACES.indexOf(char) >= 0);
const isCharToken = (char) => (NOT_CHAR_TOKENS.indexOf(char) === -1);
const isSpecialChar = (char) => (SPECIAL_CHARS.indexOf(char) >= 0);
const isEscapableChar = (char) => (char === openTag || char === closeTag || char === BACKSLASH);
const isEscapeChar = (char) => char === BACKSLASH;
/**
* Emits newly created token to subscriber
* @param token
*/
const emitToken = (token) => {
if (options.onToken) {
options.onToken(token);
}
tokenIndex += 1;
tokens[tokenIndex] = token;
};
/**
* Parses params inside [myTag---params goes here---]content[/myTag]
* @param str
* @returns {{tag: *, attrs: Array}}
*/
const parseAttrs = (str) => {
let tagName = null;
let skipSpecialChars = false;
const attrTokens = [];
const attrCharGrabber = createCharGrabber(str);
const validAttr = (char) => {
const isEQ = char === EQ;
const isWS = isWhiteSpace(char);
const prevChar = attrCharGrabber.getPrev();
const nextChar = attrCharGrabber.getNext();
const isPrevSLASH = prevChar === BACKSLASH;
const isTagNameEmpty = tagName === null;
if (isTagNameEmpty) {
return (isEQ || isWS || attrCharGrabber.isLast()) === false;
}
if (skipSpecialChars && isSpecialChar(char)) {
return true;
}
if (char === QUOTEMARK && !isPrevSLASH) {
skipSpecialChars = !skipSpecialChars;
if (!skipSpecialChars && !(nextChar === EQ || isWhiteSpace(nextChar))) {
return false;
}
}
return (isEQ || isWS) === false;
};
const nextAttr = () => {
const attrStr = attrCharGrabber.grabWhile(validAttr);
const currChar = attrCharGrabber.getCurr();
// first string before space is a tag name [tagName params...]
if (tagName === null) {
tagName = attrStr;
} else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) {
const escaped = unquote(trimChar(attrStr, QUOTEMARK));
attrTokens.push(createToken(TYPE_ATTR_VALUE, escaped, row, col));
} else {
attrTokens.push(createToken(TYPE_ATTR_NAME, attrStr, row, col));
}
attrCharGrabber.skip();
};
while (attrCharGrabber.hasNext()) {
nextAttr();
}
return { tag: tagName, attrs: attrTokens };
};
const bufferGrabber = createCharGrabber(buffer, {
onSkip: () => {
col++;
},
});
const next = () => {
const currChar = bufferGrabber.getCurr();
const nextChar = bufferGrabber.getNext();
if (currChar === N) {
bufferGrabber.skip();
col = 0;
row++;
emitToken(createToken(TYPE_NEW_LINE, currChar, row, col));
} else if (isWhiteSpace(currChar)) {
const str = bufferGrabber.grabWhile(isWhiteSpace);
emitToken(createToken(TYPE_SPACE, str, row, col));
} else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) {
bufferGrabber.skip(); // skip the \ without emitting anything
bufferGrabber.skip(); // skip past the [, ] or \ as well
emitToken(createToken(TYPE_WORD, nextChar, row, col));
} else if (currChar === openTag) {
bufferGrabber.skip(); // skip openTag
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = bufferGrabber.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
emitToken(createToken(TYPE_WORD, currChar, row, col));
} else {
const str = bufferGrabber.grabWhile((val) => val !== closeTag);
bufferGrabber.skip(); // skip closeTag
// [myTag ]
const isNoAttrsInTag = str.indexOf(EQ) === -1;
// [/myTag]
const isClosingTag = str[0] === SLASH;
if (isNoAttrsInTag || isClosingTag) {
emitToken(createToken(TYPE_TAG, str, row, col));
} else {
const parsed = parseAttrs(str);
emitToken(createToken(TYPE_TAG, parsed.tag, row, col));
parsed.attrs.map(emitToken);
}
}
} else if (currChar === closeTag) {
bufferGrabber.skip(); // skip closeTag
emitToken(createToken(TYPE_WORD, currChar, row, col));
} else if (isCharToken(currChar)) {
if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) {
bufferGrabber.skip();
emitToken(createToken(TYPE_WORD, currChar, row, col));
} else {
const str = bufferGrabber.grabWhile((char) => {
if (escapeTags) {
return isCharToken(char) && !isEscapeChar(char);
}
return isCharToken(char);
});
emitToken(createToken(TYPE_WORD, str, row, col));
}
}
};
const tokenize = () => {
while (bufferGrabber.hasNext()) {
next();
}
tokens.length = tokenIndex + 1;
return tokens;
};
const isTokenNested = (token) => {
const value = openTag + SLASH + token.getValue();
// potential bottleneck
return buffer.indexOf(value) > -1;
};
return {
tokenize,
isTokenNested,
};
}
export const createTokenOfType = createToken;
export { createLexer };
+4 -4
View File
@@ -22,25 +22,25 @@ const parse = (input, opts = {}) => {
/**
* Result AST of nodes
* @private
* @type {ItemList}
* @type {NodeList}
*/
const nodes = createList();
/**
* Temp buffer of nodes that's nested to another node
* @private
* @type {ItemList}
* @type {NodeList}
*/
const nestedNodes = createList();
/**
* Temp buffer of nodes [tag..]...[/tag]
* @private
* @type {ItemList}
* @type {NodeList}
*/
const tagNodes = createList();
/**
* Temp buffer of tag attributes
* @private
* @type {ItemList}
* @type {NodeList}
*/
const tagNodesAttrName = createList();
+112 -132
View File
@@ -3,95 +3,107 @@ import {
BACKSLASH,
} from '@bbob/plugin-helper/lib/char';
/**
* @typedef {Object} CharGrabber
* @property {Function} skip
* @property {Function} hasNext
* @property {Function} isLast
* @property {Function} grabWhile
*/
function CharGrabber(source, options) {
const cursor = {
pos: 0,
len: source.length,
};
const substrUntilChar = (char) => {
const { pos } = cursor;
const idx = source.indexOf(char, pos);
return idx >= 0 ? source.substr(pos, idx - pos) : '';
};
const includes = (val) => source.indexOf(val, cursor.pos) >= 0;
const hasNext = () => cursor.len > cursor.pos;
const isLast = () => cursor.pos === cursor.len;
const skip = (num = 1, silent) => {
cursor.pos += num;
if (options && options.onSkip && !silent) {
options.onSkip();
}
};
const rest = () => source.substr(cursor.pos);
const curr = () => source[cursor.pos];
const prev = () => {
const prevPos = cursor.pos - 1;
return typeof source[prevPos] !== 'undefined' ? source[prevPos] : null;
};
const next = () => {
const nextPos = cursor.pos + 1;
return nextPos <= (source.length - 1) ? source[nextPos] : null;
};
const grabWhile = (cond, silent) => {
let start = 0;
if (hasNext()) {
start = cursor.pos;
while (hasNext() && cond(curr())) {
skip(1, silent);
}
}
return source.substr(start, cursor.pos - start);
};
/**
* @type {skip}
*/
this.skip = skip;
/**
* @returns {Boolean}
*/
this.hasNext = hasNext;
/**
* @returns {String}
*/
this.getCurr = curr;
/**
* @returns {String}
*/
this.getRest = rest;
/**
* @returns {String}
*/
this.getNext = next;
/**
* @returns {String}
*/
this.getPrev = prev;
/**
* @returns {Boolean}
*/
this.isLast = isLast;
/**
* @returns {Boolean}
*/
this.includes = includes;
/**
* @param {Function} cond
* @param {Boolean} silent
* @return {String}
*/
this.grabWhile = grabWhile;
/**
* Grabs rest of string until it find a char
* @param {String} char
* @return {String}
*/
this.substrUntilChar = substrUntilChar;
}
/**
* Creates a grabber wrapper for source string, that helps to iterate over string char by char
* @param {String} source
* @param {Object} options
* @param {Function} options.onSkip
* @returns
* @return CharGrabber
*/
export const createCharGrabber = (source, options) => {
// let idx = 0;
const cursor = {
pos: 0,
length: source.length,
};
const skip = () => {
cursor.pos += 1;
if (options && options.onSkip) {
options.onSkip();
}
};
const hasNext = () => cursor.length > cursor.pos;
const getRest = () => source.substr(cursor.pos);
const getCurr = () => source[cursor.pos];
return {
skip,
hasNext,
isLast: () => (cursor.pos === cursor.length),
/**
* @param {Function} cond
* @returns {string}
*/
grabWhile: (cond) => {
let start = 0;
if (hasNext()) {
start = cursor.pos;
while (hasNext() && cond(getCurr())) {
skip();
}
}
return source.substr(start, cursor.pos - start);
},
getNext: () => {
const nextPos = cursor.pos + 1;
if (nextPos <= (source.length - 1)) {
return source[nextPos];
}
return null;
},
getPrev: () => {
const prevPos = cursor.pos - 1;
if (typeof source[prevPos] !== 'undefined') {
return source[prevPos];
}
return null;
},
getCurr,
getRest,
/**
* Grabs rest of string until it find a char
* @param {String} char
* @return {String}
*/
substrUntilChar: (char) => {
const restStr = getRest();
const indexOfChar = restStr.indexOf(char);
if (indexOfChar >= 0) {
return restStr.substr(0, indexOfChar);
}
return '';
},
};
};
export const createCharGrabber = (source, options) => new CharGrabber(source, options);
/**
* Trims string from start and end by char
@@ -122,58 +134,26 @@ export const trimChar = (str, charToRemove) => {
*/
export const unquote = (str) => str.replace(BACKSLASH + QUOTEMARK, QUOTEMARK);
/**
* @typedef {Object} ItemList
* @type {Object}
* @property {getLastCb} getLast
* @property {flushLastCb} flushLast
* @property {pushCb} push
* @property {toArrayCb} toArray
*/
function NodeList(values = []) {
const nodes = values;
const getLast = () => (
Array.isArray(nodes) && nodes.length > 0 && typeof nodes[nodes.length - 1] !== 'undefined'
? nodes[nodes.length - 1]
: null);
const flushLast = () => (nodes.length ? nodes.pop() : false);
const push = (value) => nodes.push(value);
const toArray = () => nodes;
this.push = push;
this.toArray = toArray;
this.getLast = getLast;
this.flushLast = flushLast;
}
/**
*
* @param values
* @return {ItemList}
* @return {NodeList}
*/
export const createList = (values = []) => {
const nodes = values;
/**
* @callback getLastCb
*/
const getLast = () => {
if (Array.isArray(nodes) && nodes.length > 0 && typeof nodes[nodes.length - 1] !== 'undefined') {
return nodes[nodes.length - 1];
}
return null;
};
/**
* @callback flushLastCb
* @return {*}
*/
const flushLast = () => {
if (nodes.length) {
return nodes.pop();
}
return false;
};
/**
* @callback pushCb
* @param value
*/
const push = (value) => nodes.push(value);
/**
* @callback toArrayCb
* @return {Array}
*/
return {
getLast,
flushLast,
push,
toArray: () => nodes,
};
};
export const createList = (values = []) => new NodeList(values);
+13 -13
View File
@@ -1,4 +1,4 @@
import Token from '../src/Token'
import Token, { TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE } from '../src/Token'
describe('Token', () => {
test('isEmpty', () => {
@@ -7,61 +7,61 @@ describe('Token', () => {
expect(token.isEmpty()).toBeTruthy()
});
test('isText', () => {
const token = new Token('word');
const token = new Token(TYPE_WORD);
expect(token.isText()).toBeTruthy();
});
test('isTag', () => {
const token = new Token('tag');
const token = new Token(TYPE_TAG);
expect(token.isTag()).toBeTruthy();
});
test('isAttrName', () => {
const token = new Token('attr-name');
const token = new Token(TYPE_ATTR_NAME);
expect(token.isAttrName()).toBeTruthy();
});
test('isAttrValue', () => {
const token = new Token('attr-value');
const token = new Token(TYPE_ATTR_VALUE);
expect(token.isAttrValue()).toBeTruthy();
});
test('isStart', () => {
const token = new Token('tag', 'my-tag');
const token = new Token(TYPE_TAG, 'my-tag');
expect(token.isStart()).toBeTruthy();
});
test('isEnd', () => {
const token = new Token('tag', '/my-tag');
const token = new Token(TYPE_TAG, '/my-tag');
expect(token.isEnd()).toBeTruthy();
});
test('getName', () => {
const token = new Token('tag', '/my-tag');
const token = new Token(TYPE_TAG, '/my-tag');
expect(token.getName()).toBe('my-tag');
});
test('getValue', () => {
const token = new Token('tag', '/my-tag');
const token = new Token(TYPE_TAG, '/my-tag');
expect(token.getValue()).toBe('/my-tag');
});
test('getLine', () => {
const token = new Token('tag', '/my-tag', 12);
const token = new Token(TYPE_TAG, '/my-tag', 12);
expect(token.getLine()).toBe(12);
});
test('getColumn', () => {
const token = new Token('tag', '/my-tag', 12, 14);
const token = new Token(TYPE_TAG, '/my-tag', 12, 14);
expect(token.getColumn()).toBe(14);
});
test('toString', () => {
const tokenEnd = new Token('tag', '/my-tag', 12, 14);
const tokenEnd = new Token(TYPE_TAG, '/my-tag', 12, 14);
expect(tokenEnd.toString()).toBe('[/my-tag]');
const tokenStart = new Token('tag', 'my-tag', 12, 14);
const tokenStart = new Token(TYPE_TAG, 'my-tag', 12, 14);
expect(tokenStart.toString()).toBe('[my-tag]');
});
+124 -35
View File
@@ -1,4 +1,4 @@
import {TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE} from '../src/Token'
import { TYPE_ID, VALUE_ID, TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE} from '../src/Token'
import { createLexer } from '../src/lexer'
const TYPE = {
@@ -10,19 +10,58 @@ const TYPE = {
NEW_LINE: TYPE_NEW_LINE,
};
const TYPE_NAMES = Object.fromEntries(Object.keys(TYPE).map(key => [TYPE[key], key]));
const tokenize = input => (createLexer(input).tokenize());
const tokenizeEscape = input => (createLexer(input, { enableEscapeTags: true }).tokenize());
describe('lexer', () => {
const expectOutput = (output, tokens) => {
expect(tokens.length).toBe(output.length);
expect(tokens).toBeInstanceOf(Array);
tokens.forEach((token, idx) => {
expect(token).toBeInstanceOf(Object);
expect(token.type).toEqual(output[idx][0]);
expect(token.value).toEqual(output[idx][1]);
});
};
expect.extend({
toBeMantchOutput(tokens, output) {
if (tokens.length !== output.length) {
return {
message: () =>
`expected tokens length ${tokens.length} to be ${output.length}`,
pass: false,
};
}
for (let idx = 0; idx < tokens.length; idx++) {
const token = tokens[idx];
const [type, value] = output[idx];
if (typeof token !== 'object') {
return {
message: () =>
`token must to be Object`,
pass: false,
};
}
if (token[TYPE_ID] !== type) {
return {
message: () =>
`expected token type ${TYPE_NAMES[type]} but recieved ${TYPE_NAMES[token[TYPE_ID]]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
if (token[VALUE_ID] !== value) {
return {
message: () =>
`expected token value ${value} but recieved ${token[VALUE_ID]} for ${JSON.stringify(output[idx])}`,
pass: false,
};
}
}
return {
message: () =>
`no valid output`,
pass: true,
};
},
});
test('single tag', () => {
const input = '[SingleTag]';
@@ -31,7 +70,7 @@ describe('lexer', () => {
[TYPE.TAG, 'SingleTag', '0', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('single tag with params', () => {
@@ -42,7 +81,19 @@ describe('lexer', () => {
[TYPE.ATTR_VALUE, '111', '0', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('single fake tag', () => {
const input = '[ user=111]';
const tokens = tokenize(input);
const output = [
[TYPE.WORD, '[', '0', '0'],
[TYPE.SPACE, ' ', '0', '0'],
[TYPE.WORD, 'user=111]', '0', '0'],
];
expect(tokens).toBeMantchOutput(output);
});
test('single tag with spaces', () => {
@@ -53,9 +104,25 @@ describe('lexer', () => {
[TYPE.TAG, 'Single Tag', '0', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
// @TODO: this is breaking change behavior
test.skip('tags with single attrs like disabled', () => {
const input = '[textarea disabled]world[/textarea]';
const tokens = tokenize(input);
const output = [
[TYPE.TAG, 'textarea', '0', '0'],
[TYPE.ATTR_VALUE, 'disabled', '0', '0'],
[TYPE.WORD, 'world"', '0', '0'],
[TYPE.TAG, '/textarea', '0', '0'],
];
expect(tokens).toBeMantchOutput(output);
});
test('string with quotemarks', () => {
const input = '"Someone Like You" by Adele';
const tokens = tokenize(input);
@@ -72,7 +139,7 @@ describe('lexer', () => {
[TYPE.WORD, 'Adele', '21', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('tags in brakets', () => {
@@ -89,7 +156,7 @@ describe('lexer', () => {
[TYPE.WORD, ']', '7', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('tag as param', () => {
@@ -102,7 +169,7 @@ describe('lexer', () => {
[TYPE.TAG, '/color', '21', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('tag with quotemark params with spaces', () => {
@@ -118,7 +185,7 @@ describe('lexer', () => {
[TYPE.TAG, '/url', '24', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('tag with escaped quotemark param', () => {
@@ -132,7 +199,7 @@ describe('lexer', () => {
[TYPE.TAG, '/url', '26', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('tag param without quotemarks', () => {
@@ -146,7 +213,7 @@ describe('lexer', () => {
[TYPE.TAG, '/style', '25', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('list tag with items', () => {
@@ -184,7 +251,29 @@ describe('lexer', () => {
[TYPE.TAG, '/list', '0', '4'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('few tags without spaces', () => {
const input = '[mytag1 size="15"]Tag1[/mytag1][mytag2 size="16"]Tag2[/mytag2][mytag3]Tag3[/mytag3]';
const tokens = tokenize(input);
const output = [
[TYPE.TAG, 'mytag1', 0, 0],
[TYPE.ATTR_NAME, 'size', 0, 0],
[TYPE.ATTR_VALUE, '15', 0, 0],
[TYPE.WORD, 'Tag1', 0, 0],
[TYPE.TAG, '/mytag1', 0, 0],
[TYPE.TAG, 'mytag2', 0, 0],
[TYPE.ATTR_NAME, 'size', 0, 0],
[TYPE.ATTR_VALUE, '16', 0, 0],
[TYPE.WORD, 'Tag2', 0, 0],
[TYPE.TAG, '/mytag2', 0, 0],
[TYPE.TAG, 'mytag3', 0, 0],
[TYPE.WORD, 'Tag3', 0, 0],
[TYPE.TAG, '/mytag3', 0, 0],
];
expect(tokens).toBeMantchOutput(output);
});
test('bad tags as texts', () => {
@@ -211,8 +300,8 @@ describe('lexer', () => {
[
[TYPE.WORD, '!', '0', '0'],
[TYPE.WORD, '[', '1', '0'],
[TYPE.WORD, ']', '1', '0'],
[TYPE.WORD, '(image.jpg)', '1', '0'],
[TYPE.WORD, '](image.jpg)', '1', '0'],
// [TYPE.WORD, '', '1', '0'],
],
[
[TYPE.WORD, 'x', '0', '0'],
@@ -253,7 +342,7 @@ describe('lexer', () => {
const tokens = tokenize(input);
const output = asserts[idx];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
});
@@ -271,7 +360,7 @@ describe('lexer', () => {
[TYPE.TAG, 'Finger', '0', '0']
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('no close tag', () => {
@@ -286,7 +375,7 @@ describe('lexer', () => {
[TYPE.WORD, 'A', '0', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('escaped tag', () => {
@@ -301,7 +390,7 @@ describe('lexer', () => {
[TYPE.WORD, '[', '0', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('escaped tag and escaped backslash', () => {
@@ -321,7 +410,7 @@ describe('lexer', () => {
[TYPE.WORD, ']', '0', '0'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('bad closed tag with escaped backslash', () => {
@@ -335,7 +424,7 @@ describe('lexer', () => {
[TYPE.WORD, 'b]', '0', '11'],
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
describe('html', () => {
@@ -358,7 +447,7 @@ describe('lexer', () => {
[TYPE.TAG, '/button', 2, 0]
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('attributes with no quotes or value', () => {
@@ -377,7 +466,7 @@ describe('lexer', () => {
[TYPE.TAG, '/button', 2, 0]
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test('attributes with no space between them. No valid, but accepted by the browser', () => {
@@ -395,7 +484,7 @@ describe('lexer', () => {
[TYPE.TAG, '/button', 2, 0]
];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test.skip('style tag', () => {
@@ -416,10 +505,10 @@ input.medium{width:100px;height:18px}
input.buttonred{cursor:hand;font-family:verdana;background:#d12124;color:#fff;height:1.4em;font-weight:bold;font-size:9pt;padding:0px 2px;margin:0px;border:0px none #000}
-->
</style>`
const tokens = tokenizeHTML(content);
const output = [];
const tokens = tokenizeHTML(content);
const output = [];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
});
test.skip('script tag', () => {
@@ -432,7 +521,7 @@ input.buttonred{cursor:hand;font-family:verdana;background:#d12124;color:#fff;he
const tokens = tokenizeHTML(content);
const output = [];
expectOutput(output, tokens);
expect(tokens).toBeMantchOutput(output);
})
})
});
+102 -19
View File
@@ -8,8 +8,7 @@ describe('Parser', () => {
test('parse paired tags tokens', () => {
const ast = parse('[best name=value]Foo Bar[/best]');
expectOutput(ast, [
const output = [
{
tag: 'best',
attrs: {
@@ -21,15 +20,16 @@ describe('Parser', () => {
'Bar',
],
},
]);
];
expectOutput(ast, output);
});
test('parse only allowed tags', () => {
const ast = parse('[h1 name=value]Foo [Bar] [/h1]', {
onlyAllowTags: ['h1']
});
expectOutput(ast, [
const output = [
{
tag: 'h1',
attrs: {
@@ -42,13 +42,14 @@ describe('Parser', () => {
' '
],
},
]);
];
expectOutput(ast, output);
});
test('parse inconsistent tags', () => {
const ast = parse('[h1 name=value]Foo [Bar] /h1]');
expectOutput(ast, [
const output = [
{
attrs: {
name: 'value'
@@ -65,13 +66,14 @@ describe('Parser', () => {
},
' ',
'/h1]',
]);
];
expectOutput(ast, output);
});
test('parse tag with value param', () => {
const ast = parse('[url=https://github.com/jilizart/bbob]BBob[/url]');
expectOutput(ast, [
const output = [
{
tag: 'url',
attrs: {
@@ -79,13 +81,14 @@ describe('Parser', () => {
},
content: ['BBob'],
},
]);
];
expectOutput(ast, output);
});
test('parse tag with quoted param with spaces', () => {
const ast = parse('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]');
expectOutput(ast, [
const output = [
{
tag: 'url',
attrs: {
@@ -95,13 +98,14 @@ describe('Parser', () => {
},
content: ['Text'],
},
]);
];
expectOutput(ast, output);
});
test('parse single tag with params', () => {
const ast = parse('[url=https://github.com/jilizart/bbob]');
expectOutput(ast, [
const output = [
{
tag: 'url',
attrs: {
@@ -109,12 +113,15 @@ describe('Parser', () => {
},
content: [],
},
]);
];
expectOutput(ast, output);
});
test('detect inconsistent tag', () => {
const onError = jest.fn();
const ast = parse('[c][/c][b]hello[/c][/b][b]', { onError });
parse('[c][/c][b]hello[/c][/b][b]', { onError });
expect(onError).toHaveBeenCalled();
});
@@ -145,6 +152,82 @@ describe('Parser', () => {
])
});
test('parse few tags without spaces', () => {
const ast = parse('[mytag1 size="15"]Tag1[/mytag1][mytag2 size="16"]Tag2[/mytag2][mytag3]Tag3[/mytag3]');
const output = [
{
tag: 'mytag1',
attrs: {
size: '15',
},
content: ['Tag1'],
},
{
tag: 'mytag2',
attrs: {
size: '16',
},
content: ['Tag2'],
},
{
tag: 'mytag3',
attrs: {},
content: ['Tag3'],
},
];
expectOutput(ast, output);
});
// @TODO: this is breaking change behavior
test.skip('parse tags with single attributes like disabled', () => {
const ast = parse('[b]hello[/b] [textarea disabled]world[/textarea]');
expectOutput(ast, [
{
tag: 'b',
attrs: {},
content: ['hello'],
},
' ',
{
tag: 'textarea',
attrs: {
disabled: 'disabled',
},
content: ['world'],
},
]);
});
test('parse url tag with get params', () => {
const ast = parse('[url=https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any]GET[/url]');
expectOutput(ast, [
{
tag: 'url',
attrs: {
'https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any': 'https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any',
},
content: ['GET'],
},
]);
});
test('parse url tag with # and = symbols [google docs]', () => {
const ast = parse('[url href=https://docs.google.com/spreadsheets/d/1W9VPUESF_NkbSa_HtRFrQNl0nYo8vPCxJFy7jD3Tpio/edit#gid=0]Docs[/url]');
expectOutput(ast, [
{
tag: 'url',
attrs: {
href: 'https://docs.google.com/spreadsheets/d/1W9VPUESF_NkbSa_HtRFrQNl0nYo8vPCxJFy7jD3Tpio/edit#gid=0',
},
content: ['Docs'],
},
]);
});
describe('html', () => {
const parseHTML = input => parse(input, { openTag: '<', closeTag: '>' });
+91
View File
@@ -0,0 +1,91 @@
import { createCharGrabber } from '../src/utils';
describe('utils', () => {
describe('createCharGrabber', () => {
test('#substrUntilChar ] 1', () => {
/**
}
*/
const bufferGrabber = createCharGrabber('[h1 name=value]Foo [Bar] [/h1]');
const substr = bufferGrabber.substrUntilChar(']');
expect(substr).toBe('[h1 name=value');
});
test('#substrUntilChar ] 2', () => {
/**
console.log src/utils.js:95
substrUntilChar { char: ']', indexOfChar: 63, curPos: 0 } {
result: '[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"',
source: '[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]'
}
console.log src/utils.js:104
substrUntilChar.new { char: ']', indexOfCharNew: 63, curPos: 0 } {
result: '[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]',
source: '[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]'
}
*/
const bufferGrabber = createCharGrabber('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]');
const substr = bufferGrabber.substrUntilChar(']');
expect(substr).toBe('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"');
});
test('#substrUntilChar ] 3', () => {
/**
console.log src/utils.js:95
substrUntilChar { char: ']', indexOfChar: 14, curPos: 7 } {
result: 'blah foo="bar"',
source: 'hello [blah foo="bar"]world[/blah]'
}
console.log src/utils.js:104
substrUntilChar.new { char: ']', indexOfCharNew: 21, curPos: 7 } {
result: 'blah foo="bar"]world[/',
source: 'hello [blah foo="bar"]world[/blah]'
}
*/
const bufferGrabber = createCharGrabber('hello [blah foo="bar"]world[/blah]');
const substr = bufferGrabber.substrUntilChar('[');
expect(substr).toBe('hello ');
});
test('#substrUntilChar not existed', () => {
/**
console.log src/utils.js:95
substrUntilChar { char: ']', indexOfChar: 14, curPos: 7 } {
result: 'blah foo="bar"',
source: 'hello [blah foo="bar"]world[/blah]'
}
console.log src/utils.js:104
substrUntilChar.new { char: ']', indexOfCharNew: 21, curPos: 7 } {
result: 'blah foo="bar"]world[/',
source: 'hello [blah foo="bar"]world[/blah]'
}
*/
const bufferGrabber = createCharGrabber('hello');
const substr = bufferGrabber.substrUntilChar('[');
expect(substr).toBe('');
});
test('getPrev is null', () => {
const bufferGrabber = createCharGrabber('');
const prev = bufferGrabber.getPrev();
expect(prev).toBe(null);
});
test('getRest', () => {
const bufferGrabber = createCharGrabber('hello [blah foo="bar"]world[/blah]');
bufferGrabber.skip();
const rest = bufferGrabber.getRest();
expect(rest).toBe('ello [blah foo="bar"]world[/blah]');
});
})
});
+1 -1
View File
@@ -24,7 +24,7 @@ class TagNode {
constructor(tag, attrs, content) {
this.tag = tag;
this.attrs = attrs;
this.content = [].concat(content);
this.content = Array.isArray(content) ? content : [content];
}
attr(name, value) {