diff --git a/benchmark/index.js b/benchmark/index.js index a19fc6d..130b4d5 100644 --- a/benchmark/index.js +++ b/benchmark/index.js @@ -1,3 +1,4 @@ +/* eslint-disable global-require */ const Benchmark = require('benchmark'); const stub = require('./test/stub'); @@ -38,9 +39,22 @@ suite addInLineBreaks: false, }); }) - .add('@bbob/parser', () => require('../packages/bbob-parser/lib/index').parse(stub, { - onlyAllowTags: ['ch'], - })) + .add('@bbob/parser lexer old', () => { + const lexer1 = require('../packages/bbob-parser/lib/lexer_old'); + + return require('../packages/bbob-parser/lib/index').parse(stub, { + onlyAllowTags: ['ch'], + createTokenizer: lexer1.createLexer, + }); + }) + .add('@bbob/parser lexer', () => { + const lexer2 = require('../packages/bbob-parser/lib/lexer'); + + return require('../packages/bbob-parser/lib/index').parse(stub, { + onlyAllowTags: ['ch'], + createTokenizer: lexer2.createLexer, + }); + }) // add listeners .on('cycle', (event) => { console.log(String(event.target)); diff --git a/packages/bbob-core/test/utils.test.js b/packages/bbob-core/test/utils.test.js index e66be1a..f48be0c 100644 --- a/packages/bbob-core/test/utils.test.js +++ b/packages/bbob-core/test/utils.test.js @@ -1,4 +1,7 @@ -import { iterate } from '../src/utils'; +import { iterate, match } from '../src/utils'; + +const stringify = val => JSON.stringify(val); + describe('@bbob/core utils', () => { test('iterate', () => { @@ -16,14 +19,47 @@ describe('@bbob/core utils', () => { return node; }); - expect(resultArr).toEqual([{ - one: true, - pass: 1, - content: [{ oneInside: true, pass: 1, }] - }, { - two: true, - pass: 1, - content: [{ twoInside: true, pass: 1, }] - }]); + const expected = [ + { + one: true, + content: [{ oneInside: true, pass: 1, }], + pass: 1, + }, { + two: true, + content: [{ twoInside: true, pass: 1, }], + pass: 1, + } + ]; + + expect(stringify(resultArr)).toEqual(stringify(expected)); }); + test('match', () => { + const testArr = [ + { tag: 'mytag1', one: 1 }, + { tag: 'mytag2', two: 1 }, + { tag: 'mytag3', three: 1 }, + { tag: 'mytag4', four: 1 }, + { tag: 'mytag5', five: 1 }, + { tag: 'mytag6', six: 1 }, + ]; + + testArr.match = match; + + const resultArr = testArr.match([{ tag: 'mytag1' }, { tag: 'mytag2' }], node => { + node.pass = 1; + + return node; + }); + + const expected = [ + { tag: 'mytag1', one: 1, pass: 1 }, + { tag: 'mytag2', two: 1, pass: 1 }, + { tag: 'mytag3', three: 1 }, + { tag: 'mytag4', four: 1 }, + { tag: 'mytag5', five: 1 }, + { tag: 'mytag6', six: 1 }, + ]; + + expect(stringify(resultArr)).toEqual(stringify(expected)) + }) }); diff --git a/packages/bbob-parser/package.json b/packages/bbob-parser/package.json index dc93792..d6862a8 100644 --- a/packages/bbob-parser/package.json +++ b/packages/bbob-parser/package.json @@ -35,7 +35,7 @@ "build:es": "../../node_modules/.bin/cross-env BABEL_ENV=es NODE_ENV=production ../../node_modules/.bin/babel src --out-dir es", "build:umd": "../../node_modules/.bin/cross-env BABEL_ENV=rollup NODE_ENV=production ../../node_modules/.bin/rollup --config ../../rollup.config.js", "build": "npm run build:commonjs && npm run build:es && npm run build:umd", - "test": "../../node_modules/.bin/jest --", + "test": "../../node_modules/.bin/jest", "cover": "../../node_modules/.bin/jest --coverage", "lint": "../../node_modules/.bin/eslint .", "size": "../../node_modules/.bin/cross-env NODE_ENV=production ../../node_modules/.bin/size-limit", diff --git a/packages/bbob-parser/src/Token.js b/packages/bbob-parser/src/Token.js index 6c5159c..d3f467c 100644 --- a/packages/bbob-parser/src/Token.js +++ b/packages/bbob-parser/src/Token.js @@ -10,12 +10,12 @@ const TOKEN_VALUE_ID = 'value'; // 1; const TOKEN_COLUMN_ID = 'row'; // 2; const TOKEN_LINE_ID = 'line'; // 3; -const TOKEN_TYPE_WORD = 'word'; -const TOKEN_TYPE_TAG = 'tag'; -const TOKEN_TYPE_ATTR_NAME = 'attr-name'; -const TOKEN_TYPE_ATTR_VALUE = 'attr-value'; -const TOKEN_TYPE_SPACE = 'space'; -const TOKEN_TYPE_NEW_LINE = 'new-line'; +const TOKEN_TYPE_WORD = 1; // 'word'; +const TOKEN_TYPE_TAG = 2; // 'tag'; +const TOKEN_TYPE_ATTR_NAME = 3; // 'attr-name'; +const TOKEN_TYPE_ATTR_VALUE = 4; // 'attr-value'; +const TOKEN_TYPE_SPACE = 5; // 'space'; +const TOKEN_TYPE_NEW_LINE = 6; // 'new-line'; /** * @param {Token} token @@ -105,14 +105,15 @@ class Token { * @param row */ constructor(type, value, line, row) { - this[TOKEN_TYPE_ID] = String(type); + this[TOKEN_TYPE_ID] = Number(type); this[TOKEN_VALUE_ID] = String(value); this[TOKEN_LINE_ID] = Number(line); this[TOKEN_COLUMN_ID] = Number(row); } isEmpty() { - return !!this[TOKEN_TYPE_ID]; + // eslint-disable-next-line no-restricted-globals + return isNaN(this[TOKEN_TYPE_ID]); } isText() { diff --git a/packages/bbob-parser/src/lexer.js b/packages/bbob-parser/src/lexer.js index 8cf2a64..d909067 100644 --- a/packages/bbob-parser/src/lexer.js +++ b/packages/bbob-parser/src/lexer.js @@ -21,7 +21,7 @@ const EM = '!'; /** * Creates a Token entity class - * @param {String} type + * @param {Number} type * @param {String} value * @param {Number} r line number * @param {Number} cl char number in line @@ -44,14 +44,26 @@ const createToken = (type, value, r = 0, cl = 0) => new Token(type, value, r, cl * @return {Lexer} */ function createLexer(buffer, options = {}) { + const STATE_WORD = 0; + const STATE_TAG = 1; + const STATE_TAG_ATTRS = 2; + + const TAG_STATE_NAME = 0; + const TAG_STATE_ATTR = 1; + const TAG_STATE_VALUE = 2; + let row = 0; let col = 0; let tokenIndex = -1; + let stateMode = STATE_WORD; + let tagMode = TAG_STATE_NAME; const tokens = new Array(Math.floor(buffer.length)); const openTag = options.openTag || OPEN_BRAKET; const closeTag = options.closeTag || CLOSE_BRAKET; - const escapeTags = options.enableEscapeTags; + const escapeTags = !!options.enableEscapeTags; + const onToken = options.onToken || (() => { + }); const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM]; const NOT_CHAR_TOKENS = [ @@ -62,175 +74,266 @@ function createLexer(buffer, options = {}) { const SPECIAL_CHARS = [EQ, SPACE, TAB]; const isCharReserved = (char) => (RESERVED_CHARS.indexOf(char) >= 0); + const isNewLine = (char) => char === N; const isWhiteSpace = (char) => (WHITESPACES.indexOf(char) >= 0); const isCharToken = (char) => (NOT_CHAR_TOKENS.indexOf(char) === -1); const isSpecialChar = (char) => (SPECIAL_CHARS.indexOf(char) >= 0); const isEscapableChar = (char) => (char === openTag || char === closeTag || char === BACKSLASH); const isEscapeChar = (char) => char === BACKSLASH; + const onSkip = () => { + col++; + }; + + const unq = (val) => unquote(trimChar(val, QUOTEMARK)); + + const chars = createCharGrabber(buffer, { onSkip }); /** * Emits newly created token to subscriber - * @param token + * @param {Number} type + * @param {String} value */ - const emitToken = (token) => { - if (options.onToken) { - options.onToken(token); - } + function emitToken(type, value) { + const token = createToken(type, value, row, col); + + onToken(token); tokenIndex += 1; tokens[tokenIndex] = token; - }; + } - /** - * Parses params inside [myTag---params goes here---]content[/myTag] - * @param str - * @returns {{tag: *, attrs: Array}} - */ - const parseAttrs = (str) => { - let tagName = null; - let skipSpecialChars = false; + function nextTagState(tagChars, isSingleValueTag) { + if (tagMode === TAG_STATE_ATTR) { + const validAttrName = (char) => !(char === EQ || isWhiteSpace(char)); + const name = tagChars.grabWhile(validAttrName); + const isEnd = tagChars.isLast(); + const isValue = tagChars.getCurr() !== EQ; - const attrTokens = []; - const attrCharGrabber = createCharGrabber(str); + tagChars.skip(); - const validAttr = (char) => { - const isEQ = char === EQ; - const isWS = isWhiteSpace(char); - const prevChar = attrCharGrabber.getPrev(); - const nextChar = attrCharGrabber.getNext(); - const isPrevSLASH = prevChar === BACKSLASH; - const isTagNameEmpty = tagName === null; - - if (isTagNameEmpty) { - return (isEQ || isWS || attrCharGrabber.isLast()) === false; - } - - if (skipSpecialChars && isSpecialChar(char)) { - return true; - } - - if (char === QUOTEMARK && !isPrevSLASH) { - skipSpecialChars = !skipSpecialChars; - - if (!skipSpecialChars && !(nextChar === EQ || isWhiteSpace(nextChar))) { - return false; - } - } - - return (isEQ || isWS) === false; - }; - - const nextAttr = () => { - const attrStr = attrCharGrabber.grabWhile(validAttr); - const currChar = attrCharGrabber.getCurr(); - - // first string before space is a tag name [tagName params...] - if (tagName === null) { - tagName = attrStr; - } else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) { - const escaped = unquote(trimChar(attrStr, QUOTEMARK)); - attrTokens.push(createToken(TYPE_ATTR_VALUE, escaped, row, col)); + if (isEnd || isValue) { + emitToken(TYPE_ATTR_VALUE, unq(name)); } else { - attrTokens.push(createToken(TYPE_ATTR_NAME, attrStr, row, col)); + emitToken(TYPE_ATTR_NAME, name); } - attrCharGrabber.skip(); - }; + if (isEnd) { + return TAG_STATE_NAME; + } - while (attrCharGrabber.hasNext()) { - nextAttr(); + if (isValue) { + return TAG_STATE_ATTR; + } + + return TAG_STATE_VALUE; + } + if (tagMode === TAG_STATE_VALUE) { + let stateSpecial = false; + + const validAttrValue = (char) => { + // const isEQ = char === EQ; + const isQM = char === QUOTEMARK; + const prevChar = tagChars.getPrev(); + const nextChar = tagChars.getNext(); + const isPrevSLASH = prevChar === BACKSLASH; + const isNextEQ = nextChar === EQ; + const isWS = isWhiteSpace(char); + // const isPrevWS = isWhiteSpace(prevChar); + const isNextWS = isWhiteSpace(nextChar); + + if (stateSpecial && isSpecialChar(char)) { + return true; + } + + if (isQM && !isPrevSLASH) { + stateSpecial = !stateSpecial; + + if (!stateSpecial && !(isNextEQ || isNextWS)) { + return false; + } + } + + if (!isSingleValueTag) { + return isWS === false; + // return (isEQ || isWS) === false; + } + + return true; + }; + const name = tagChars.grabWhile(validAttrValue); + + tagChars.skip(); + + emitToken(TYPE_ATTR_VALUE, unq(name)); + + if (tagChars.isLast()) { + return TAG_STATE_NAME; + } + + return TAG_STATE_ATTR; } - return { tag: tagName, attrs: attrTokens }; - }; + const validName = (char) => !(char === EQ || isWhiteSpace(char) || tagChars.isLast()); + const name = tagChars.grabWhile(validName); - const bufferGrabber = createCharGrabber(buffer, { - onSkip: () => { - col++; - }, - }); + emitToken(TYPE_TAG, name); - const next = () => { - const currChar = bufferGrabber.getCurr(); - const nextChar = bufferGrabber.getNext(); + tagChars.skip(); + + // in cases when we has [url=someval]GET[/url] and we dont need to parse all + if (isSingleValueTag) { + return TAG_STATE_VALUE; + } + + const hasEQ = tagChars.includes(EQ); + + return hasEQ ? TAG_STATE_ATTR : TAG_STATE_VALUE; + } + + function stateTag() { + const currChar = chars.getCurr(); + + if (currChar === openTag) { + const nextChar = chars.getNext(); + + chars.skip(); + + // detect case where we have '[My word [tag][/tag]' or we have '[My last line word' + const substr = chars.substrUntilChar(closeTag); + const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0; + + if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) { + emitToken(TYPE_WORD, currChar); + + return STATE_WORD; + } + + // [myTag ] + const isNoAttrsInTag = substr.indexOf(EQ) === -1; + // [/myTag] + const isClosingTag = substr[0] === SLASH; + + if (isNoAttrsInTag || isClosingTag) { + const name = chars.grabWhile((char) => char !== closeTag); + + chars.skip(); // skip closeTag + + emitToken(TYPE_TAG, name); + + return STATE_WORD; + } + + return STATE_TAG_ATTRS; + } + + return STATE_WORD; + } + + function stateAttrs() { + const silent = true; + const tagStr = chars.grabWhile((char) => char !== closeTag, silent); + const tagGrabber = createCharGrabber(tagStr, { onSkip }); + const hasSpace = tagGrabber.includes(SPACE); + + while (tagGrabber.hasNext()) { + tagMode = nextTagState(tagGrabber, !hasSpace); + } + + chars.skip(); // skip closeTag + + return STATE_WORD; + } + + function stateWord() { + if (isNewLine(chars.getCurr())) { + emitToken(TYPE_NEW_LINE, chars.getCurr()); + + chars.skip(); - if (currChar === N) { - bufferGrabber.skip(); col = 0; row++; - emitToken(createToken(TYPE_NEW_LINE, currChar, row, col)); - } else if (isWhiteSpace(currChar)) { - const str = bufferGrabber.grabWhile(isWhiteSpace); - emitToken(createToken(TYPE_SPACE, str, row, col)); - } else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) { - bufferGrabber.skip(); // skip the \ without emitting anything - bufferGrabber.skip(); // skip past the [, ] or \ as well - emitToken(createToken(TYPE_WORD, nextChar, row, col)); - } else if (currChar === openTag) { - bufferGrabber.skip(); // skip openTag - - // detect case where we have '[My word [tag][/tag]' or we have '[My last line word' - const substr = bufferGrabber.substrUntilChar(closeTag); - const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0; - - if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) { - emitToken(createToken(TYPE_WORD, currChar, row, col)); - } else { - const str = bufferGrabber.grabWhile((val) => val !== closeTag); - - bufferGrabber.skip(); // skip closeTag - // [myTag ] - const isNoAttrsInTag = str.indexOf(EQ) === -1; - // [/myTag] - const isClosingTag = str[0] === SLASH; - - if (isNoAttrsInTag || isClosingTag) { - emitToken(createToken(TYPE_TAG, str, row, col)); - } else { - const parsed = parseAttrs(str); - - emitToken(createToken(TYPE_TAG, parsed.tag, row, col)); - - parsed.attrs.map(emitToken); - } - } - } else if (currChar === closeTag) { - bufferGrabber.skip(); // skip closeTag - - emitToken(createToken(TYPE_WORD, currChar, row, col)); - } else if (isCharToken(currChar)) { - if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) { - bufferGrabber.skip(); - emitToken(createToken(TYPE_WORD, currChar, row, col)); - } else { - const str = bufferGrabber.grabWhile((char) => { - if (escapeTags) { - return isCharToken(char) && !isEscapeChar(char); - } - return isCharToken(char); - }); - - emitToken(createToken(TYPE_WORD, str, row, col)); - } + return STATE_WORD; } - }; - const tokenize = () => { - while (bufferGrabber.hasNext()) { - next(); + if (isWhiteSpace(chars.getCurr())) { + emitToken(TYPE_SPACE, chars.grabWhile(isWhiteSpace)); + + return STATE_WORD; + } + + if (chars.getCurr() === openTag) { + if (chars.includes(closeTag)) { + return STATE_TAG; + } + + emitToken(TYPE_WORD, chars.getCurr()); + + chars.skip(); + + return STATE_WORD; + } + + if (escapeTags) { + if (isEscapeChar(chars.getCurr())) { + const currChar = chars.getCurr(); + const nextChar = chars.getNext(); + + chars.skip(); // skip the \ without emitting anything + + if (isEscapableChar(nextChar)) { + chars.skip(); // skip past the [, ] or \ as well + + emitToken(TYPE_WORD, nextChar); + + return STATE_WORD; + } + + emitToken(TYPE_WORD, currChar); + + return STATE_WORD; + } + + const isChar = (char) => isCharToken(char) && !isEscapeChar(char); + + emitToken(TYPE_WORD, chars.grabWhile(isChar)); + + return STATE_WORD; + } + + emitToken(TYPE_WORD, chars.grabWhile(isCharToken)); + + return STATE_WORD; + } + + function tokenize() { + while (chars.hasNext()) { + switch (stateMode) { + case STATE_TAG: + stateMode = stateTag(); + break; + case STATE_TAG_ATTRS: + stateMode = stateAttrs(); + break; + case STATE_WORD: + stateMode = stateWord(); + break; + default: + stateMode = STATE_WORD; + break; + } } tokens.length = tokenIndex + 1; return tokens; - }; + } - const isTokenNested = (token) => { + function isTokenNested(token) { const value = openTag + SLASH + token.getValue(); // potential bottleneck return buffer.indexOf(value) > -1; - }; + } return { tokenize, diff --git a/packages/bbob-parser/src/lexer_old.js b/packages/bbob-parser/src/lexer_old.js new file mode 100644 index 0000000..8cf2a64 --- /dev/null +++ b/packages/bbob-parser/src/lexer_old.js @@ -0,0 +1,242 @@ +/* eslint-disable no-plusplus,no-param-reassign */ +import { + OPEN_BRAKET, + CLOSE_BRAKET, + QUOTEMARK, + BACKSLASH, + SLASH, + SPACE, + TAB, + EQ, + N, +} from '@bbob/plugin-helper/lib/char'; + +import { + Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD, +} from './Token'; +import { createCharGrabber, trimChar, unquote } from './utils'; + +// for cases +const EM = '!'; + +/** + * Creates a Token entity class + * @param {String} type + * @param {String} value + * @param {Number} r line number + * @param {Number} cl char number in line + */ +const createToken = (type, value, r = 0, cl = 0) => new Token(type, value, r, cl); + +/** + * @typedef {Object} Lexer + * @property {Function} tokenize + * @property {Function} isTokenNested + */ + +/** + * @param {String} buffer + * @param {Object} options + * @param {Function} options.onToken + * @param {String} options.openTag + * @param {String} options.closeTag + * @param {Boolean} options.enableEscapeTags + * @return {Lexer} + */ +function createLexer(buffer, options = {}) { + let row = 0; + let col = 0; + + let tokenIndex = -1; + const tokens = new Array(Math.floor(buffer.length)); + const openTag = options.openTag || OPEN_BRAKET; + const closeTag = options.closeTag || CLOSE_BRAKET; + const escapeTags = options.enableEscapeTags; + + const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM]; + const NOT_CHAR_TOKENS = [ + // ...(options.enableEscapeTags ? [BACKSLASH] : []), + openTag, SPACE, TAB, N, + ]; + const WHITESPACES = [SPACE, TAB]; + const SPECIAL_CHARS = [EQ, SPACE, TAB]; + + const isCharReserved = (char) => (RESERVED_CHARS.indexOf(char) >= 0); + const isWhiteSpace = (char) => (WHITESPACES.indexOf(char) >= 0); + const isCharToken = (char) => (NOT_CHAR_TOKENS.indexOf(char) === -1); + const isSpecialChar = (char) => (SPECIAL_CHARS.indexOf(char) >= 0); + const isEscapableChar = (char) => (char === openTag || char === closeTag || char === BACKSLASH); + const isEscapeChar = (char) => char === BACKSLASH; + + /** + * Emits newly created token to subscriber + * @param token + */ + const emitToken = (token) => { + if (options.onToken) { + options.onToken(token); + } + + tokenIndex += 1; + tokens[tokenIndex] = token; + }; + + /** + * Parses params inside [myTag---params goes here---]content[/myTag] + * @param str + * @returns {{tag: *, attrs: Array}} + */ + const parseAttrs = (str) => { + let tagName = null; + let skipSpecialChars = false; + + const attrTokens = []; + const attrCharGrabber = createCharGrabber(str); + + const validAttr = (char) => { + const isEQ = char === EQ; + const isWS = isWhiteSpace(char); + const prevChar = attrCharGrabber.getPrev(); + const nextChar = attrCharGrabber.getNext(); + const isPrevSLASH = prevChar === BACKSLASH; + const isTagNameEmpty = tagName === null; + + if (isTagNameEmpty) { + return (isEQ || isWS || attrCharGrabber.isLast()) === false; + } + + if (skipSpecialChars && isSpecialChar(char)) { + return true; + } + + if (char === QUOTEMARK && !isPrevSLASH) { + skipSpecialChars = !skipSpecialChars; + + if (!skipSpecialChars && !(nextChar === EQ || isWhiteSpace(nextChar))) { + return false; + } + } + + return (isEQ || isWS) === false; + }; + + const nextAttr = () => { + const attrStr = attrCharGrabber.grabWhile(validAttr); + const currChar = attrCharGrabber.getCurr(); + + // first string before space is a tag name [tagName params...] + if (tagName === null) { + tagName = attrStr; + } else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) { + const escaped = unquote(trimChar(attrStr, QUOTEMARK)); + attrTokens.push(createToken(TYPE_ATTR_VALUE, escaped, row, col)); + } else { + attrTokens.push(createToken(TYPE_ATTR_NAME, attrStr, row, col)); + } + + attrCharGrabber.skip(); + }; + + while (attrCharGrabber.hasNext()) { + nextAttr(); + } + + return { tag: tagName, attrs: attrTokens }; + }; + + const bufferGrabber = createCharGrabber(buffer, { + onSkip: () => { + col++; + }, + }); + + const next = () => { + const currChar = bufferGrabber.getCurr(); + const nextChar = bufferGrabber.getNext(); + + if (currChar === N) { + bufferGrabber.skip(); + col = 0; + row++; + + emitToken(createToken(TYPE_NEW_LINE, currChar, row, col)); + } else if (isWhiteSpace(currChar)) { + const str = bufferGrabber.grabWhile(isWhiteSpace); + emitToken(createToken(TYPE_SPACE, str, row, col)); + } else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) { + bufferGrabber.skip(); // skip the \ without emitting anything + bufferGrabber.skip(); // skip past the [, ] or \ as well + emitToken(createToken(TYPE_WORD, nextChar, row, col)); + } else if (currChar === openTag) { + bufferGrabber.skip(); // skip openTag + + // detect case where we have '[My word [tag][/tag]' or we have '[My last line word' + const substr = bufferGrabber.substrUntilChar(closeTag); + const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0; + + if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) { + emitToken(createToken(TYPE_WORD, currChar, row, col)); + } else { + const str = bufferGrabber.grabWhile((val) => val !== closeTag); + + bufferGrabber.skip(); // skip closeTag + // [myTag ] + const isNoAttrsInTag = str.indexOf(EQ) === -1; + // [/myTag] + const isClosingTag = str[0] === SLASH; + + if (isNoAttrsInTag || isClosingTag) { + emitToken(createToken(TYPE_TAG, str, row, col)); + } else { + const parsed = parseAttrs(str); + + emitToken(createToken(TYPE_TAG, parsed.tag, row, col)); + + parsed.attrs.map(emitToken); + } + } + } else if (currChar === closeTag) { + bufferGrabber.skip(); // skip closeTag + + emitToken(createToken(TYPE_WORD, currChar, row, col)); + } else if (isCharToken(currChar)) { + if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) { + bufferGrabber.skip(); + emitToken(createToken(TYPE_WORD, currChar, row, col)); + } else { + const str = bufferGrabber.grabWhile((char) => { + if (escapeTags) { + return isCharToken(char) && !isEscapeChar(char); + } + return isCharToken(char); + }); + + emitToken(createToken(TYPE_WORD, str, row, col)); + } + } + }; + + const tokenize = () => { + while (bufferGrabber.hasNext()) { + next(); + } + + tokens.length = tokenIndex + 1; + + return tokens; + }; + + const isTokenNested = (token) => { + const value = openTag + SLASH + token.getValue(); + // potential bottleneck + return buffer.indexOf(value) > -1; + }; + + return { + tokenize, + isTokenNested, + }; +} + +export const createTokenOfType = createToken; +export { createLexer }; diff --git a/packages/bbob-parser/src/parse.js b/packages/bbob-parser/src/parse.js index c70ee8a..e6ecb32 100644 --- a/packages/bbob-parser/src/parse.js +++ b/packages/bbob-parser/src/parse.js @@ -22,25 +22,25 @@ const parse = (input, opts = {}) => { /** * Result AST of nodes * @private - * @type {ItemList} + * @type {NodeList} */ const nodes = createList(); /** * Temp buffer of nodes that's nested to another node * @private - * @type {ItemList} + * @type {NodeList} */ const nestedNodes = createList(); /** * Temp buffer of nodes [tag..]...[/tag] * @private - * @type {ItemList} + * @type {NodeList} */ const tagNodes = createList(); /** * Temp buffer of tag attributes * @private - * @type {ItemList} + * @type {NodeList} */ const tagNodesAttrName = createList(); diff --git a/packages/bbob-parser/src/utils.js b/packages/bbob-parser/src/utils.js index b6d11df..2b21383 100644 --- a/packages/bbob-parser/src/utils.js +++ b/packages/bbob-parser/src/utils.js @@ -3,95 +3,107 @@ import { BACKSLASH, } from '@bbob/plugin-helper/lib/char'; -/** - * @typedef {Object} CharGrabber - * @property {Function} skip - * @property {Function} hasNext - * @property {Function} isLast - * @property {Function} grabWhile - */ +function CharGrabber(source, options) { + const cursor = { + pos: 0, + len: source.length, + }; + + const substrUntilChar = (char) => { + const { pos } = cursor; + const idx = source.indexOf(char, pos); + + return idx >= 0 ? source.substr(pos, idx - pos) : ''; + }; + const includes = (val) => source.indexOf(val, cursor.pos) >= 0; + const hasNext = () => cursor.len > cursor.pos; + const isLast = () => cursor.pos === cursor.len; + const skip = (num = 1, silent) => { + cursor.pos += num; + + if (options && options.onSkip && !silent) { + options.onSkip(); + } + }; + const rest = () => source.substr(cursor.pos); + const curr = () => source[cursor.pos]; + const prev = () => { + const prevPos = cursor.pos - 1; + + return typeof source[prevPos] !== 'undefined' ? source[prevPos] : null; + }; + const next = () => { + const nextPos = cursor.pos + 1; + + return nextPos <= (source.length - 1) ? source[nextPos] : null; + }; + const grabWhile = (cond, silent) => { + let start = 0; + + if (hasNext()) { + start = cursor.pos; + + while (hasNext() && cond(curr())) { + skip(1, silent); + } + } + + return source.substr(start, cursor.pos - start); + }; + /** + * @type {skip} + */ + this.skip = skip; + /** + * @returns {Boolean} + */ + this.hasNext = hasNext; + /** + * @returns {String} + */ + this.getCurr = curr; + /** + * @returns {String} + */ + this.getRest = rest; + /** + * @returns {String} + */ + this.getNext = next; + /** + * @returns {String} + */ + this.getPrev = prev; + /** + * @returns {Boolean} + */ + this.isLast = isLast; + /** + * @returns {Boolean} + */ + this.includes = includes; + /** + * @param {Function} cond + * @param {Boolean} silent + * @return {String} + */ + this.grabWhile = grabWhile; + /** + * Grabs rest of string until it find a char + * @param {String} char + * @return {String} + */ + this.substrUntilChar = substrUntilChar; +} /** * Creates a grabber wrapper for source string, that helps to iterate over string char by char * @param {String} source * @param {Object} options * @param {Function} options.onSkip - * @returns + * @return CharGrabber */ -export const createCharGrabber = (source, options) => { - // let idx = 0; - const cursor = { - pos: 0, - length: source.length, - }; - - const skip = () => { - cursor.pos += 1; - - if (options && options.onSkip) { - options.onSkip(); - } - }; - const hasNext = () => cursor.length > cursor.pos; - const getRest = () => source.substr(cursor.pos); - const getCurr = () => source[cursor.pos]; - - return { - skip, - hasNext, - isLast: () => (cursor.pos === cursor.length), - /** - * @param {Function} cond - * @returns {string} - */ - grabWhile: (cond) => { - let start = 0; - - if (hasNext()) { - start = cursor.pos; - - while (hasNext() && cond(getCurr())) { - skip(); - } - } - - return source.substr(start, cursor.pos - start); - }, - getNext: () => { - const nextPos = cursor.pos + 1; - - if (nextPos <= (source.length - 1)) { - return source[nextPos]; - } - return null; - }, - getPrev: () => { - const prevPos = cursor.pos - 1; - - if (typeof source[prevPos] !== 'undefined') { - return source[prevPos]; - } - return null; - }, - getCurr, - getRest, - /** - * Grabs rest of string until it find a char - * @param {String} char - * @return {String} - */ - substrUntilChar: (char) => { - const restStr = getRest(); - const indexOfChar = restStr.indexOf(char); - - if (indexOfChar >= 0) { - return restStr.substr(0, indexOfChar); - } - - return ''; - }, - }; -}; +export const createCharGrabber = (source, options) => new CharGrabber(source, options); /** * Trims string from start and end by char @@ -122,58 +134,26 @@ export const trimChar = (str, charToRemove) => { */ export const unquote = (str) => str.replace(BACKSLASH + QUOTEMARK, QUOTEMARK); -/** - * @typedef {Object} ItemList - * @type {Object} - * @property {getLastCb} getLast - * @property {flushLastCb} flushLast - * @property {pushCb} push - * @property {toArrayCb} toArray - */ +function NodeList(values = []) { + const nodes = values; + + const getLast = () => ( + Array.isArray(nodes) && nodes.length > 0 && typeof nodes[nodes.length - 1] !== 'undefined' + ? nodes[nodes.length - 1] + : null); + const flushLast = () => (nodes.length ? nodes.pop() : false); + const push = (value) => nodes.push(value); + const toArray = () => nodes; + + this.push = push; + this.toArray = toArray; + this.getLast = getLast; + this.flushLast = flushLast; +} /** * * @param values - * @return {ItemList} + * @return {NodeList} */ -export const createList = (values = []) => { - const nodes = values; - /** - * @callback getLastCb - */ - const getLast = () => { - if (Array.isArray(nodes) && nodes.length > 0 && typeof nodes[nodes.length - 1] !== 'undefined') { - return nodes[nodes.length - 1]; - } - - return null; - }; - /** - * @callback flushLastCb - * @return {*} - */ - const flushLast = () => { - if (nodes.length) { - return nodes.pop(); - } - - return false; - }; - /** - * @callback pushCb - * @param value - */ - const push = (value) => nodes.push(value); - - /** - * @callback toArrayCb - * @return {Array} - */ - - return { - getLast, - flushLast, - push, - toArray: () => nodes, - }; -}; +export const createList = (values = []) => new NodeList(values); diff --git a/packages/bbob-parser/test/Token.test.js b/packages/bbob-parser/test/Token.test.js index e92abc2..f68c91e 100644 --- a/packages/bbob-parser/test/Token.test.js +++ b/packages/bbob-parser/test/Token.test.js @@ -1,4 +1,4 @@ -import Token from '../src/Token' +import Token, { TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE } from '../src/Token' describe('Token', () => { test('isEmpty', () => { @@ -7,61 +7,61 @@ describe('Token', () => { expect(token.isEmpty()).toBeTruthy() }); test('isText', () => { - const token = new Token('word'); + const token = new Token(TYPE_WORD); expect(token.isText()).toBeTruthy(); }); test('isTag', () => { - const token = new Token('tag'); + const token = new Token(TYPE_TAG); expect(token.isTag()).toBeTruthy(); }); test('isAttrName', () => { - const token = new Token('attr-name'); + const token = new Token(TYPE_ATTR_NAME); expect(token.isAttrName()).toBeTruthy(); }); test('isAttrValue', () => { - const token = new Token('attr-value'); + const token = new Token(TYPE_ATTR_VALUE); expect(token.isAttrValue()).toBeTruthy(); }); test('isStart', () => { - const token = new Token('tag', 'my-tag'); + const token = new Token(TYPE_TAG, 'my-tag'); expect(token.isStart()).toBeTruthy(); }); test('isEnd', () => { - const token = new Token('tag', '/my-tag'); + const token = new Token(TYPE_TAG, '/my-tag'); expect(token.isEnd()).toBeTruthy(); }); test('getName', () => { - const token = new Token('tag', '/my-tag'); + const token = new Token(TYPE_TAG, '/my-tag'); expect(token.getName()).toBe('my-tag'); }); test('getValue', () => { - const token = new Token('tag', '/my-tag'); + const token = new Token(TYPE_TAG, '/my-tag'); expect(token.getValue()).toBe('/my-tag'); }); test('getLine', () => { - const token = new Token('tag', '/my-tag', 12); + const token = new Token(TYPE_TAG, '/my-tag', 12); expect(token.getLine()).toBe(12); }); test('getColumn', () => { - const token = new Token('tag', '/my-tag', 12, 14); + const token = new Token(TYPE_TAG, '/my-tag', 12, 14); expect(token.getColumn()).toBe(14); }); test('toString', () => { - const tokenEnd = new Token('tag', '/my-tag', 12, 14); + const tokenEnd = new Token(TYPE_TAG, '/my-tag', 12, 14); expect(tokenEnd.toString()).toBe('[/my-tag]'); - const tokenStart = new Token('tag', 'my-tag', 12, 14); + const tokenStart = new Token(TYPE_TAG, 'my-tag', 12, 14); expect(tokenStart.toString()).toBe('[my-tag]'); }); diff --git a/packages/bbob-parser/test/lexer.test.js b/packages/bbob-parser/test/lexer.test.js index 8eb6f1b..d585793 100644 --- a/packages/bbob-parser/test/lexer.test.js +++ b/packages/bbob-parser/test/lexer.test.js @@ -1,4 +1,4 @@ -import {TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE} from '../src/Token' +import { TYPE_ID, VALUE_ID, TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE} from '../src/Token' import { createLexer } from '../src/lexer' const TYPE = { @@ -10,19 +10,58 @@ const TYPE = { NEW_LINE: TYPE_NEW_LINE, }; +const TYPE_NAMES = Object.fromEntries(Object.keys(TYPE).map(key => [TYPE[key], key])); + const tokenize = input => (createLexer(input).tokenize()); const tokenizeEscape = input => (createLexer(input, { enableEscapeTags: true }).tokenize()); describe('lexer', () => { - const expectOutput = (output, tokens) => { - expect(tokens.length).toBe(output.length); - expect(tokens).toBeInstanceOf(Array); - tokens.forEach((token, idx) => { - expect(token).toBeInstanceOf(Object); - expect(token.type).toEqual(output[idx][0]); - expect(token.value).toEqual(output[idx][1]); - }); - }; + expect.extend({ + toBeMantchOutput(tokens, output) { + if (tokens.length !== output.length) { + return { + message: () => + `expected tokens length ${tokens.length} to be ${output.length}`, + pass: false, + }; + } + + for (let idx = 0; idx < tokens.length; idx++) { + const token = tokens[idx]; + const [type, value] = output[idx]; + + if (typeof token !== 'object') { + return { + message: () => + `token must to be Object`, + pass: false, + }; + } + + if (token[TYPE_ID] !== type) { + return { + message: () => + `expected token type ${TYPE_NAMES[type]} but recieved ${TYPE_NAMES[token[TYPE_ID]]} for ${JSON.stringify(output[idx])}`, + pass: false, + }; + } + + if (token[VALUE_ID] !== value) { + return { + message: () => + `expected token value ${value} but recieved ${token[VALUE_ID]} for ${JSON.stringify(output[idx])}`, + pass: false, + }; + } + } + + return { + message: () => + `no valid output`, + pass: true, + }; + }, + }); test('single tag', () => { const input = '[SingleTag]'; @@ -31,7 +70,7 @@ describe('lexer', () => { [TYPE.TAG, 'SingleTag', '0', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('single tag with params', () => { @@ -42,7 +81,19 @@ describe('lexer', () => { [TYPE.ATTR_VALUE, '111', '0', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); + }); + + test('single fake tag', () => { + const input = '[ user=111]'; + const tokens = tokenize(input); + const output = [ + [TYPE.WORD, '[', '0', '0'], + [TYPE.SPACE, ' ', '0', '0'], + [TYPE.WORD, 'user=111]', '0', '0'], + ]; + + expect(tokens).toBeMantchOutput(output); }); test('single tag with spaces', () => { @@ -53,9 +104,25 @@ describe('lexer', () => { [TYPE.TAG, 'Single Tag', '0', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); + // @TODO: this is breaking change behavior + test.skip('tags with single attrs like disabled', () => { + const input = '[textarea disabled]world[/textarea]'; + const tokens = tokenize(input); + + const output = [ + [TYPE.TAG, 'textarea', '0', '0'], + [TYPE.ATTR_VALUE, 'disabled', '0', '0'], + [TYPE.WORD, 'world"', '0', '0'], + [TYPE.TAG, '/textarea', '0', '0'], + ]; + + expect(tokens).toBeMantchOutput(output); + }); + + test('string with quotemarks', () => { const input = '"Someone Like You" by Adele'; const tokens = tokenize(input); @@ -72,7 +139,7 @@ describe('lexer', () => { [TYPE.WORD, 'Adele', '21', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('tags in brakets', () => { @@ -89,7 +156,7 @@ describe('lexer', () => { [TYPE.WORD, ']', '7', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('tag as param', () => { @@ -102,7 +169,7 @@ describe('lexer', () => { [TYPE.TAG, '/color', '21', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('tag with quotemark params with spaces', () => { @@ -118,7 +185,7 @@ describe('lexer', () => { [TYPE.TAG, '/url', '24', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('tag with escaped quotemark param', () => { @@ -132,7 +199,7 @@ describe('lexer', () => { [TYPE.TAG, '/url', '26', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('tag param without quotemarks', () => { @@ -146,7 +213,7 @@ describe('lexer', () => { [TYPE.TAG, '/style', '25', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('list tag with items', () => { @@ -184,7 +251,29 @@ describe('lexer', () => { [TYPE.TAG, '/list', '0', '4'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); + }); + + test('few tags without spaces', () => { + const input = '[mytag1 size="15"]Tag1[/mytag1][mytag2 size="16"]Tag2[/mytag2][mytag3]Tag3[/mytag3]'; + const tokens = tokenize(input); + const output = [ + [TYPE.TAG, 'mytag1', 0, 0], + [TYPE.ATTR_NAME, 'size', 0, 0], + [TYPE.ATTR_VALUE, '15', 0, 0], + [TYPE.WORD, 'Tag1', 0, 0], + [TYPE.TAG, '/mytag1', 0, 0], + [TYPE.TAG, 'mytag2', 0, 0], + [TYPE.ATTR_NAME, 'size', 0, 0], + [TYPE.ATTR_VALUE, '16', 0, 0], + [TYPE.WORD, 'Tag2', 0, 0], + [TYPE.TAG, '/mytag2', 0, 0], + [TYPE.TAG, 'mytag3', 0, 0], + [TYPE.WORD, 'Tag3', 0, 0], + [TYPE.TAG, '/mytag3', 0, 0], + ]; + + expect(tokens).toBeMantchOutput(output); }); test('bad tags as texts', () => { @@ -211,8 +300,8 @@ describe('lexer', () => { [ [TYPE.WORD, '!', '0', '0'], [TYPE.WORD, '[', '1', '0'], - [TYPE.WORD, ']', '1', '0'], - [TYPE.WORD, '(image.jpg)', '1', '0'], + [TYPE.WORD, '](image.jpg)', '1', '0'], + // [TYPE.WORD, '', '1', '0'], ], [ [TYPE.WORD, 'x', '0', '0'], @@ -253,7 +342,7 @@ describe('lexer', () => { const tokens = tokenize(input); const output = asserts[idx]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); }); @@ -271,7 +360,7 @@ describe('lexer', () => { [TYPE.TAG, 'Finger', '0', '0'] ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('no close tag', () => { @@ -286,7 +375,7 @@ describe('lexer', () => { [TYPE.WORD, 'A', '0', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('escaped tag', () => { @@ -301,7 +390,7 @@ describe('lexer', () => { [TYPE.WORD, '[', '0', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('escaped tag and escaped backslash', () => { @@ -321,7 +410,7 @@ describe('lexer', () => { [TYPE.WORD, ']', '0', '0'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('bad closed tag with escaped backslash', () => { @@ -335,7 +424,7 @@ describe('lexer', () => { [TYPE.WORD, 'b]', '0', '11'], ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); describe('html', () => { @@ -358,7 +447,7 @@ describe('lexer', () => { [TYPE.TAG, '/button', 2, 0] ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('attributes with no quotes or value', () => { @@ -377,7 +466,7 @@ describe('lexer', () => { [TYPE.TAG, '/button', 2, 0] ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test('attributes with no space between them. No valid, but accepted by the browser', () => { @@ -395,7 +484,7 @@ describe('lexer', () => { [TYPE.TAG, '/button', 2, 0] ]; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test.skip('style tag', () => { @@ -416,10 +505,10 @@ input.medium{width:100px;height:18px} input.buttonred{cursor:hand;font-family:verdana;background:#d12124;color:#fff;height:1.4em;font-weight:bold;font-size:9pt;padding:0px 2px;margin:0px;border:0px none #000} --> ` - const tokens = tokenizeHTML(content); - const output = []; + const tokens = tokenizeHTML(content); + const output = []; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }); test.skip('script tag', () => { @@ -432,7 +521,7 @@ input.buttonred{cursor:hand;font-family:verdana;background:#d12124;color:#fff;he const tokens = tokenizeHTML(content); const output = []; - expectOutput(output, tokens); + expect(tokens).toBeMantchOutput(output); }) }) }); diff --git a/packages/bbob-parser/test/parse.test.js b/packages/bbob-parser/test/parse.test.js index 8d7df2b..4623f70 100644 --- a/packages/bbob-parser/test/parse.test.js +++ b/packages/bbob-parser/test/parse.test.js @@ -8,8 +8,7 @@ describe('Parser', () => { test('parse paired tags tokens', () => { const ast = parse('[best name=value]Foo Bar[/best]'); - - expectOutput(ast, [ + const output = [ { tag: 'best', attrs: { @@ -21,15 +20,16 @@ describe('Parser', () => { 'Bar', ], }, - ]); + ]; + + expectOutput(ast, output); }); test('parse only allowed tags', () => { const ast = parse('[h1 name=value]Foo [Bar] [/h1]', { onlyAllowTags: ['h1'] }); - - expectOutput(ast, [ + const output = [ { tag: 'h1', attrs: { @@ -42,13 +42,14 @@ describe('Parser', () => { ' ' ], }, - ]); + ]; + + expectOutput(ast, output); }); test('parse inconsistent tags', () => { const ast = parse('[h1 name=value]Foo [Bar] /h1]'); - - expectOutput(ast, [ + const output = [ { attrs: { name: 'value' @@ -65,13 +66,14 @@ describe('Parser', () => { }, ' ', '/h1]', - ]); + ]; + + expectOutput(ast, output); }); test('parse tag with value param', () => { const ast = parse('[url=https://github.com/jilizart/bbob]BBob[/url]'); - - expectOutput(ast, [ + const output = [ { tag: 'url', attrs: { @@ -79,13 +81,14 @@ describe('Parser', () => { }, content: ['BBob'], }, - ]); + ]; + + expectOutput(ast, output); }); test('parse tag with quoted param with spaces', () => { const ast = parse('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]'); - - expectOutput(ast, [ + const output = [ { tag: 'url', attrs: { @@ -95,13 +98,14 @@ describe('Parser', () => { }, content: ['Text'], }, - ]); + ]; + + expectOutput(ast, output); }); test('parse single tag with params', () => { const ast = parse('[url=https://github.com/jilizart/bbob]'); - - expectOutput(ast, [ + const output = [ { tag: 'url', attrs: { @@ -109,12 +113,15 @@ describe('Parser', () => { }, content: [], }, - ]); + ]; + + expectOutput(ast, output); }); test('detect inconsistent tag', () => { const onError = jest.fn(); - const ast = parse('[c][/c][b]hello[/c][/b][b]', { onError }); + + parse('[c][/c][b]hello[/c][/b][b]', { onError }); expect(onError).toHaveBeenCalled(); }); @@ -145,6 +152,82 @@ describe('Parser', () => { ]) }); + test('parse few tags without spaces', () => { + const ast = parse('[mytag1 size="15"]Tag1[/mytag1][mytag2 size="16"]Tag2[/mytag2][mytag3]Tag3[/mytag3]'); + const output = [ + { + tag: 'mytag1', + attrs: { + size: '15', + }, + content: ['Tag1'], + }, + { + tag: 'mytag2', + attrs: { + size: '16', + }, + content: ['Tag2'], + }, + { + tag: 'mytag3', + attrs: {}, + content: ['Tag3'], + }, + ]; + + expectOutput(ast, output); + }); + + // @TODO: this is breaking change behavior + test.skip('parse tags with single attributes like disabled', () => { + const ast = parse('[b]hello[/b] [textarea disabled]world[/textarea]'); + + expectOutput(ast, [ + { + tag: 'b', + attrs: {}, + content: ['hello'], + }, + ' ', + { + tag: 'textarea', + attrs: { + disabled: 'disabled', + }, + content: ['world'], + }, + ]); + }); + + test('parse url tag with get params', () => { + const ast = parse('[url=https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any]GET[/url]'); + + expectOutput(ast, [ + { + tag: 'url', + attrs: { + 'https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any': 'https://github.com/JiLiZART/bbob/search?q=any&unscoped_q=any', + }, + content: ['GET'], + }, + ]); + }); + + test('parse url tag with # and = symbols [google docs]', () => { + const ast = parse('[url href=https://docs.google.com/spreadsheets/d/1W9VPUESF_NkbSa_HtRFrQNl0nYo8vPCxJFy7jD3Tpio/edit#gid=0]Docs[/url]'); + + expectOutput(ast, [ + { + tag: 'url', + attrs: { + href: 'https://docs.google.com/spreadsheets/d/1W9VPUESF_NkbSa_HtRFrQNl0nYo8vPCxJFy7jD3Tpio/edit#gid=0', + }, + content: ['Docs'], + }, + ]); + }); + describe('html', () => { const parseHTML = input => parse(input, { openTag: '<', closeTag: '>' }); diff --git a/packages/bbob-parser/test/utils.test.js b/packages/bbob-parser/test/utils.test.js new file mode 100644 index 0000000..9ff9294 --- /dev/null +++ b/packages/bbob-parser/test/utils.test.js @@ -0,0 +1,91 @@ +import { createCharGrabber } from '../src/utils'; + + +describe('utils', () => { + describe('createCharGrabber', () => { + + test('#substrUntilChar ] 1', () => { + /** + + } + */ + const bufferGrabber = createCharGrabber('[h1 name=value]Foo [Bar] [/h1]'); + const substr = bufferGrabber.substrUntilChar(']'); + + expect(substr).toBe('[h1 name=value'); + }); + + test('#substrUntilChar ] 2', () => { + /** + console.log src/utils.js:95 + substrUntilChar { char: ']', indexOfChar: 63, curPos: 0 } { + result: '[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"', + source: '[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]' + } + console.log src/utils.js:104 + substrUntilChar.new { char: ']', indexOfCharNew: 63, curPos: 0 } { + result: '[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]', + source: '[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]' + } + */ + const bufferGrabber = createCharGrabber('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]'); + const substr = bufferGrabber.substrUntilChar(']'); + + expect(substr).toBe('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"'); + }); + + test('#substrUntilChar ] 3', () => { + /** + console.log src/utils.js:95 + substrUntilChar { char: ']', indexOfChar: 14, curPos: 7 } { + result: 'blah foo="bar"', + source: 'hello [blah foo="bar"]world[/blah]' + } + console.log src/utils.js:104 + substrUntilChar.new { char: ']', indexOfCharNew: 21, curPos: 7 } { + result: 'blah foo="bar"]world[/', + source: 'hello [blah foo="bar"]world[/blah]' + } + */ + const bufferGrabber = createCharGrabber('hello [blah foo="bar"]world[/blah]'); + const substr = bufferGrabber.substrUntilChar('['); + + expect(substr).toBe('hello '); + }); + + test('#substrUntilChar not existed', () => { + /** + console.log src/utils.js:95 + substrUntilChar { char: ']', indexOfChar: 14, curPos: 7 } { + result: 'blah foo="bar"', + source: 'hello [blah foo="bar"]world[/blah]' + } + console.log src/utils.js:104 + substrUntilChar.new { char: ']', indexOfCharNew: 21, curPos: 7 } { + result: 'blah foo="bar"]world[/', + source: 'hello [blah foo="bar"]world[/blah]' + } + */ + const bufferGrabber = createCharGrabber('hello'); + const substr = bufferGrabber.substrUntilChar('['); + + expect(substr).toBe(''); + }); + + test('getPrev is null', () => { + const bufferGrabber = createCharGrabber(''); + const prev = bufferGrabber.getPrev(); + + expect(prev).toBe(null); + }); + + test('getRest', () => { + const bufferGrabber = createCharGrabber('hello [blah foo="bar"]world[/blah]'); + bufferGrabber.skip(); + const rest = bufferGrabber.getRest(); + + expect(rest).toBe('ello [blah foo="bar"]world[/blah]'); + }); + + }) +}); diff --git a/packages/bbob-plugin-helper/src/TagNode.js b/packages/bbob-plugin-helper/src/TagNode.js index c2d6e25..57017ce 100644 --- a/packages/bbob-plugin-helper/src/TagNode.js +++ b/packages/bbob-plugin-helper/src/TagNode.js @@ -24,7 +24,7 @@ class TagNode { constructor(tag, attrs, content) { this.tag = tag; this.attrs = attrs; - this.content = [].concat(content); + this.content = Array.isArray(content) ? content : [content]; } attr(name, value) {