From ef6a778f457280be36573d7acdb8087e8ed6c568 Mon Sep 17 00:00:00 2001 From: Nikolay Kostyurin Date: Sat, 2 Mar 2019 22:21:44 +0200 Subject: [PATCH] refactor(parser): jsdoc, move some utility functions to separate files --- packages/bbob-parser/src/lexer.js | 101 ++++++++++-------------------- packages/bbob-parser/src/parse.js | 9 ++- packages/bbob-parser/src/utils.js | 94 +++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 68 deletions(-) create mode 100644 packages/bbob-parser/src/utils.js diff --git a/packages/bbob-parser/src/lexer.js b/packages/bbob-parser/src/lexer.js index 8ad936d..163ac35 100644 --- a/packages/bbob-parser/src/lexer.js +++ b/packages/bbob-parser/src/lexer.js @@ -12,65 +12,18 @@ import { } from '@bbob/plugin-helper/lib/char'; import { Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD } from './Token'; +import { createCharGrabber, trimChar, unquote } from './utils'; +// for cases const EM = '!'; -const createCharGrabber = (source, { onSkip } = {}) => { - let idx = 0; - - const skip = () => { - idx += 1; - - if (onSkip) { - onSkip(); - } - }; - const hasNext = () => source.length > idx; - const getRest = () => source.substr(idx); - - return { - skip, - hasNext, - isLast: () => (idx === source.length), - grabWhile: (cond) => { - const start = idx; - - while (hasNext() && cond(source[idx])) { - skip(); - } - - return source.substr(start, idx - start); - }, - getNext: () => source[idx + 1], - getPrev: () => source[idx - 1], - getCurr: () => source[idx], - getRest, - substrUntilChar: (char) => { - const restStr = getRest(); - const indexOfChar = restStr.indexOf(char); - - if (indexOfChar >= 0) { - return restStr.substr(0, indexOfChar); - } - - return ''; - }, - }; -}; - -const trimChar = (str, charToRemove) => { - while (str.charAt(0) === charToRemove) { - str = str.substring(1); - } - - while (str.charAt(str.length - 1) === charToRemove) { - str = str.substring(0, str.length - 1); - } - - return str; -}; - -const unquote = str => str.replace(BACKSLASH + QUOTEMARK, QUOTEMARK); +/** + * Creates a Token entity class + * @param {String} type + * @param {String} value + * @param {Number} r line number + * @param {Number} cl char number in line + */ const createToken = (type, value, r = 0, cl = 0) => new Token(type, value, r, cl); /** @@ -106,6 +59,10 @@ function createLexer(buffer, options = {}) { const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1); const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0); + /** + * Emits newly created token to subscriber + * @param token + */ const emitToken = (token) => { if (options.onToken) { options.onToken(token); @@ -115,6 +72,11 @@ function createLexer(buffer, options = {}) { tokens[tokenIndex] = token; }; + /** + * Parses params inside [myTag---params goes here---]content[/myTag] + * @param str + * @returns {{tag: *, attrs: Array}} + */ const parseAttrs = (str) => { let tagName = null; let skipSpecialChars = false; @@ -153,7 +115,7 @@ function createLexer(buffer, options = {}) { const attrStr = attrCharGrabber.grabWhile(validAttr); const currChar = attrCharGrabber.getCurr(); - // first string before space is a tag name + // first string before space is a tag name [tagName params...] if (tagName === null) { tagName = attrStr; } else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) { @@ -180,18 +142,18 @@ function createLexer(buffer, options = {}) { }); const next = () => { - const char = bufferGrabber.getCurr(); + const currChar = bufferGrabber.getCurr(); - if (char === N) { + if (currChar === N) { bufferGrabber.skip(); col = 0; row++; - emitToken(createToken(TYPE_NEW_LINE, char, row, col)); - } else if (isWhiteSpace(char)) { + emitToken(createToken(TYPE_NEW_LINE, currChar, row, col)); + } else if (isWhiteSpace(currChar)) { const str = bufferGrabber.grabWhile(isWhiteSpace); emitToken(createToken(TYPE_SPACE, str, row, col)); - } else if (char === openTag) { + } else if (currChar === openTag) { const nextChar = bufferGrabber.getNext(); bufferGrabber.skip(); // skip openTag @@ -200,12 +162,15 @@ function createLexer(buffer, options = {}) { const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0; if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) { - emitToken(createToken(TYPE_WORD, char, row, col)); + emitToken(createToken(TYPE_WORD, currChar, row, col)); } else { + // const str = bufferGrabber.grabWhile(val => val !== closeTag); bufferGrabber.skip(); // skip closeTag + // [myTag ] const isNoAttrsInTag = str.indexOf(EQ) === -1; + // [/myTag] const isClosingTag = str[0] === SLASH; if (isNoAttrsInTag || isClosingTag) { @@ -214,14 +179,15 @@ function createLexer(buffer, options = {}) { const parsed = parseAttrs(str); emitToken(createToken(TYPE_TAG, parsed.tag, row, col)); + parsed.attrs.map(emitToken); } } - } else if (char === closeTag) { - bufferGrabber.skip(); + } else if (currChar === closeTag) { + bufferGrabber.skip(); // skip closeTag - emitToken(createToken(TYPE_WORD, char, row, col)); - } else if (isCharToken(char)) { + emitToken(createToken(TYPE_WORD, currChar, row, col)); + } else if (isCharToken(currChar)) { const str = bufferGrabber.grabWhile(isCharToken); emitToken(createToken(TYPE_WORD, str, row, col)); @@ -240,6 +206,7 @@ function createLexer(buffer, options = {}) { const isTokenNested = (token) => { const value = openTag + SLASH + token.getValue(); + // potential bottleneck return buffer.indexOf(value) > -1; }; diff --git a/packages/bbob-parser/src/parse.js b/packages/bbob-parser/src/parse.js index 78cc97a..8944f33 100644 --- a/packages/bbob-parser/src/parse.js +++ b/packages/bbob-parser/src/parse.js @@ -59,7 +59,7 @@ const createTagNodeAttrName = token => tagNodesAttrName.push(token.getValue()); * @return {Array} */ const getTagNodeAttrName = () => - (tagNodesAttrName.length ? tagNodesAttrName[tagNodesAttrName.length - 1] : null); + (tagNodesAttrName.length ? tagNodesAttrName[tagNodesAttrName.length - 1] : null); /** * @private @@ -153,6 +153,7 @@ const handleTagEnd = (token) => { options.onError({ message: `Inconsistent tag '${tag}' on line ${line} and column ${column}`, + tagName: tag, lineNumber: line, columnNumber: column, }); @@ -218,6 +219,12 @@ const parseToken = (token) => { /** * @public + * @param input + * @param opts + * @param {Function} opts.createTokenizer + * @param {Array} opts.onlyAllowTags + * @param {String} opts.openTag + * @param {String} opts.closeTag * @return {Array} */ const parse = (input, opts = {}) => { diff --git a/packages/bbob-parser/src/utils.js b/packages/bbob-parser/src/utils.js new file mode 100644 index 0000000..4f5354f --- /dev/null +++ b/packages/bbob-parser/src/utils.js @@ -0,0 +1,94 @@ +import { + QUOTEMARK, + BACKSLASH, +} from '@bbob/plugin-helper/lib/char'; + +/** + * @typedef {Object} CharGrabber + * @property {Function} skip + * @property {Function} hasNext + * @property {Function} isLast + * @property {Function} grabWhile + */ + +/** + * Creates a grabber wrapper for source string, that helps to iterate over string char by char + * @param {String} source + * @param {Function} onSkip + * @returns + */ +export const createCharGrabber = (source, { onSkip } = {}) => { + let idx = 0; + + const skip = () => { + idx += 1; + + if (onSkip) { + onSkip(); + } + }; + const hasNext = () => source.length > idx; + const getRest = () => source.substr(idx); + const getCurr = () => source[idx]; + + return { + skip, + hasNext, + isLast: () => (idx === source.length), + grabWhile: (cond) => { + const start = idx; + + while (hasNext() && cond(getCurr())) { + skip(); + } + + return source.substr(start, idx - start); + }, + getNext: () => source[idx + 1], + getPrev: () => source[idx - 1], + getCurr, + getRest, + /** + * Grabs rest of string until it find a char + * @param {String} char + * @return {String} + */ + substrUntilChar: (char) => { + const restStr = getRest(); + const indexOfChar = restStr.indexOf(char); + + if (indexOfChar >= 0) { + return restStr.substr(0, indexOfChar); + } + + return ''; + }, + }; +}; + +/** + * Trims string from start and end by char + * @example + * trimChar('*hello*', '*') ==> 'hello' + * @param {String} str + * @param {String} charToRemove + * @returns {String} + */ +export const trimChar = (str, charToRemove) => { + while (str.charAt(0) === charToRemove) { + str = str.substring(1); + } + + while (str.charAt(str.length - 1) === charToRemove) { + str = str.substring(0, str.length - 1); + } + + return str; +}; + +/** + * Unquotes \" to " + * @param str + * @return {String} + */ +export const unquote = str => str.replace(BACKSLASH + QUOTEMARK, QUOTEMARK);