diff --git a/packages/bbob-parser/src/Token.js b/packages/bbob-parser/src/Token.js index c483dc1..60a65c1 100644 --- a/packages/bbob-parser/src/Token.js +++ b/packages/bbob-parser/src/Token.js @@ -41,10 +41,6 @@ const getTagName = (token) => { const convertTagToText = (token) => { let text = OPEN_BRAKET; - if (isTagEnd(token)) { - text += SLASH; - } - text += getTokenValue(token); text += CLOSE_BRAKET; diff --git a/packages/bbob-parser/src/lexer.js b/packages/bbob-parser/src/lexer.js index 163ac35..c136094 100644 --- a/packages/bbob-parser/src/lexer.js +++ b/packages/bbob-parser/src/lexer.js @@ -164,7 +164,6 @@ function createLexer(buffer, options = {}) { if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) { emitToken(createToken(TYPE_WORD, currChar, row, col)); } else { - // const str = bufferGrabber.grabWhile(val => val !== closeTag); bufferGrabber.skip(); // skip closeTag diff --git a/packages/bbob-parser/src/parse.js b/packages/bbob-parser/src/parse.js index 8944f33..dad94a5 100644 --- a/packages/bbob-parser/src/parse.js +++ b/packages/bbob-parser/src/parse.js @@ -1,151 +1,135 @@ import TagNode from '@bbob/plugin-helper/lib/TagNode'; import { createLexer } from './lexer'; +import { createList } from './utils'; /** - * @private - * @type {Array} - */ -let nodes; -/** - * @private - * @type {Array} - */ -let nestedNodes; -/** - * @private - * @type {Array} - */ -let tagNodes; -/** - * @private - * @type {Array} - */ -let tagNodesAttrName; - -let options = {}; -let tokenizer = null; - -// eslint-disable-next-line no-unused-vars -let tokens = null; - -/** - * @private - * @param token - * @return {*} - */ -const isTagNested = token => tokenizer.isTokenNested(token); - -/** - * @private - * @return {TagNode} - */ -const getLastTagNode = () => (tagNodes.length ? tagNodes[tagNodes.length - 1] : null); - -/** - * @private - * @param {Token} token + * @public + * @param {String} input + * @param {Object} opts + * @param {Function} opts.createTokenizer + * @param {Array} opts.onlyAllowTags + * @param {String} opts.openTag + * @param {String} opts.closeTag * @return {Array} */ -const createTagNode = token => tagNodes.push(TagNode.create(token.getValue())); -/** - * @private - * @param {Token} token - * @return {Array} - */ -const createTagNodeAttrName = token => tagNodesAttrName.push(token.getValue()); +const parse = (input, opts = {}) => { + const options = opts; -/** - * @private - * @return {Array} - */ -const getTagNodeAttrName = () => - (tagNodesAttrName.length ? tagNodesAttrName[tagNodesAttrName.length - 1] : null); + let tokenizer = null; -/** - * @private - * @return {Array} - */ -const clearTagNodeAttrName = () => { - if (tagNodesAttrName.length) { - tagNodesAttrName.pop(); - } -}; + /** + * Result AST of nodes + * @private + * @type {ItemList} + */ + const nodes = createList(); + /** + * Temp buffer of nodes that's nested to another node + * @private + * @type {ItemList} + */ + const nestedNodes = createList(); + /** + * Temp buffer of nodes [tag..]...[/tag] + * @private + * @type {ItemList} + */ + const tagNodes = createList(); + /** + * Temp buffer of tag attributes + * @private + * @type {ItemList} + */ + const tagNodesAttrName = createList(); -/** - * @private - * @return {Array} - */ -const clearTagNode = () => { - if (tagNodes.length) { - tagNodes.pop(); + /** + * Cache for nested tags checks + * @type {{}} + */ + const nestedTagsMap = {}; - clearTagNodeAttrName(); - } -}; - -/** - * @private - * @return {Array} - */ -const getNodes = () => { - if (nestedNodes.length) { - const nestedNode = nestedNodes[nestedNodes.length - 1]; - - return nestedNode.content; - } - - return nodes; -}; - -/** - * @private - * @param tag - */ -const appendNode = (tag) => { - getNodes().push(tag); -}; - -/** - * @private - * @param value - * @return {boolean} - */ -const isAllowedTag = (value) => { - if (options.onlyAllowTags && options.onlyAllowTags.length) { - return options.onlyAllowTags.indexOf(value) >= 0; - } - - return true; -}; -/** - * @private - * @param {Token} token - */ -const handleTagStart = (token) => { - if (token.isStart()) { - createTagNode(token); - - if (isTagNested(token)) { - nestedNodes.push(getLastTagNode()); - } else { - appendNode(getLastTagNode()); - clearTagNode(); + const isTokenNested = (token) => { + if (typeof nestedTagsMap[token.getValue()] === 'undefined') { + nestedTagsMap[token.getValue()] = tokenizer.isTokenNested(token); } - } -}; -/** - * @private - * @param {Token} token - */ -const handleTagEnd = (token) => { - if (token.isEnd()) { - clearTagNode(); + return nestedTagsMap[token.getValue()]; + }; - const lastNestedNode = nestedNodes.pop(); + const isTagNested = tagName => !!nestedTagsMap[tagName]; + + /** + * Flushes temp tag nodes and its attributes buffers + * @private + * @return {Array} + */ + const flushTagNodes = () => { + if (tagNodes.flushLast()) { + tagNodesAttrName.flushLast(); + } + }; + + /** + * @private + * @return {Array} + */ + const getNodes = () => { + const lastNestedNode = nestedNodes.getLast(); + + return lastNestedNode ? lastNestedNode.content : nodes.toArray(); + }; + + /** + * @private + * @param {TagNode} tag + */ + const appendNodes = (tag) => { + getNodes().push(tag); + }; + + /** + * @private + * @param {String} value + * @return {boolean} + */ + const isAllowedTag = (value) => { + if (options.onlyAllowTags && options.onlyAllowTags.length) { + return options.onlyAllowTags.indexOf(value) >= 0; + } + + return true; + }; + + /** + * @private + * @param {Token} token + */ + const handleTagStart = (token) => { + flushTagNodes(); + + const tagNode = TagNode.create(token.getValue()); + const isNested = isTokenNested(token); + + tagNodes.push(tagNode); + + if (isNested) { + nestedNodes.push(tagNode); + } else { + appendNodes(tagNode); + } + }; + + /** + * @private + * @param {Token} token + */ + const handleTagEnd = (token) => { + flushTagNodes(); + + const lastNestedNode = nestedNodes.flushLast(); if (lastNestedNode) { - appendNode(lastNestedNode); + appendNodes(lastNestedNode); } else if (options.onError) { const tag = token.getValue(); const line = token.getLine(); @@ -158,92 +142,90 @@ const handleTagEnd = (token) => { columnNumber: column, }); } - } -}; + }; -/** - * @private - * @param {Token} token - */ -const handleTagToken = (token) => { - if (token.isTag()) { - if (isAllowedTag(token.getName())) { - // [tag] + /** + * @private + * @param {Token} token + */ + const handleTag = (token) => { + // [tag] + if (token.isStart()) { handleTagStart(token); - - // [/tag] - handleTagEnd(token); - } else { - appendNode(token.toString()); } - } -}; -/** - * @private - * @param {Token} token - */ -const handleTagNode = (token) => { - const tagNode = getLastTagNode(); + // [/tag] + if (token.isEnd()) { + handleTagEnd(token); + } + }; - if (tagNode) { - if (token.isAttrName()) { - createTagNodeAttrName(token); - tagNode.attr(getTagNodeAttrName(), ''); - } else if (token.isAttrValue()) { - const attrName = getTagNodeAttrName(); - const attrValue = token.getValue(); + /** + * @private + * @param {Token} token + */ + const handleNode = (token) => { + /** + * @type {TagNode} + */ + const lastTagNode = tagNodes.getLast(); + const tokenValue = token.getValue(); + const isNested = isTagNested(token); - if (attrName) { - tagNode.attr(getTagNodeAttrName(), attrValue); - clearTagNodeAttrName(); - } else { - tagNode.attr(attrValue, attrValue); + if (lastTagNode) { + if (token.isAttrName()) { + tagNodesAttrName.push(tokenValue); + lastTagNode.attr(tagNodesAttrName.getLast(), ''); + } else if (token.isAttrValue()) { + const attrName = tagNodesAttrName.getLast(); + + if (attrName) { + lastTagNode.attr(attrName, tokenValue); + tagNodesAttrName.flushLast(); + } else { + lastTagNode.attr(tokenValue, tokenValue); + } + } else if (token.isText()) { + if (isNested) { + lastTagNode.append(tokenValue); + } else { + appendNodes(tokenValue); + } + } else if (token.isTag()) { + // if tag is not allowed, just past it as is + appendNodes(token.toString()); } } else if (token.isText()) { - tagNode.append(token.getValue()); + appendNodes(tokenValue); + } else if (token.isTag()) { + // if tag is not allowed, just past it as is + appendNodes(token.toString()); } - } else if (token.isText()) { - appendNode(token.getValue()); - } -}; + }; -/** - * @private - * @param token - */ -const parseToken = (token) => { - handleTagToken(token); - handleTagNode(token); -}; + /** + * @private + * @param {Token} token + */ + const onToken = (token) => { + if (token.isTag() && isAllowedTag(token.getName())) { + handleTag(token); + } else { + handleNode(token); + } + }; -/** - * @public - * @param input - * @param opts - * @param {Function} opts.createTokenizer - * @param {Array} opts.onlyAllowTags - * @param {String} opts.openTag - * @param {String} opts.closeTag - * @return {Array} - */ -const parse = (input, opts = {}) => { - options = opts; tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, { - onToken: parseToken, + onToken, onlyAllowTags: options.onlyAllowTags, openTag: options.openTag, closeTag: options.closeTag, }); - nodes = []; - nestedNodes = []; - tagNodes = []; - tagNodesAttrName = []; + // eslint-disable-next-line no-unused-vars + const tokens = tokenizer.tokenize(); - tokens = tokenizer.tokenize(); - - return nodes; + return nodes.toArray(); }; export { parse }; diff --git a/packages/bbob-parser/src/utils.js b/packages/bbob-parser/src/utils.js index 4f5354f..683a715 100644 --- a/packages/bbob-parser/src/utils.js +++ b/packages/bbob-parser/src/utils.js @@ -76,10 +76,12 @@ export const createCharGrabber = (source, { onSkip } = {}) => { */ export const trimChar = (str, charToRemove) => { while (str.charAt(0) === charToRemove) { + // eslint-disable-next-line no-param-reassign str = str.substring(1); } while (str.charAt(str.length - 1) === charToRemove) { + // eslint-disable-next-line no-param-reassign str = str.substring(0, str.length - 1); } @@ -92,3 +94,53 @@ export const trimChar = (str, charToRemove) => { * @return {String} */ export const unquote = str => str.replace(BACKSLASH + QUOTEMARK, QUOTEMARK); + +/** + * @typedef {Object} ItemList + * @type {Object} + * @property {getLastCb} getLast + * @property {flushLastCb} flushLast + * @property {pushCb} push + * @property {toArrayCb} toArray + */ + +/** + * + * @param values + * @return {ItemList} + */ +export const createList = (values = []) => { + const nodes = values; + /** + * @callback getLastCb + */ + const getLast = () => (nodes.length ? nodes[nodes.length - 1] : null); + /** + * @callback flushLastCb + * @return {*} + */ + const flushLast = () => { + if (nodes.length) { + return nodes.pop(); + } + + return false; + }; + /** + * @callback pushCb + * @param value + */ + const push = value => nodes.push(value); + + /** + * @callback toArrayCb + * @return {Array} + */ + + return { + getLast, + flushLast, + push, + toArray: () => nodes, + }; +}; diff --git a/packages/bbob-parser/test/Token.test.js b/packages/bbob-parser/test/Token.test.js new file mode 100644 index 0000000..e92abc2 --- /dev/null +++ b/packages/bbob-parser/test/Token.test.js @@ -0,0 +1,68 @@ +import Token from '../src/Token' + +describe('Token', () => { + test('isEmpty', () => { + const token = new Token(); + + expect(token.isEmpty()).toBeTruthy() + }); + test('isText', () => { + const token = new Token('word'); + + expect(token.isText()).toBeTruthy(); + }); + test('isTag', () => { + const token = new Token('tag'); + + expect(token.isTag()).toBeTruthy(); + }); + test('isAttrName', () => { + const token = new Token('attr-name'); + + expect(token.isAttrName()).toBeTruthy(); + }); + test('isAttrValue', () => { + const token = new Token('attr-value'); + + expect(token.isAttrValue()).toBeTruthy(); + }); + test('isStart', () => { + const token = new Token('tag', 'my-tag'); + + expect(token.isStart()).toBeTruthy(); + }); + test('isEnd', () => { + const token = new Token('tag', '/my-tag'); + + expect(token.isEnd()).toBeTruthy(); + }); + test('getName', () => { + const token = new Token('tag', '/my-tag'); + + expect(token.getName()).toBe('my-tag'); + }); + test('getValue', () => { + const token = new Token('tag', '/my-tag'); + + expect(token.getValue()).toBe('/my-tag'); + }); + test('getLine', () => { + const token = new Token('tag', '/my-tag', 12); + + expect(token.getLine()).toBe(12); + }); + test('getColumn', () => { + const token = new Token('tag', '/my-tag', 12, 14); + + expect(token.getColumn()).toBe(14); + }); + test('toString', () => { + const tokenEnd = new Token('tag', '/my-tag', 12, 14); + + expect(tokenEnd.toString()).toBe('[/my-tag]'); + + const tokenStart = new Token('tag', 'my-tag', 12, 14); + + expect(tokenStart.toString()).toBe('[my-tag]'); + }); +}); diff --git a/packages/bbob-parser/test/lexer.test.js b/packages/bbob-parser/test/lexer.test.js index 7476f4f..e73075a 100644 --- a/packages/bbob-parser/test/lexer.test.js +++ b/packages/bbob-parser/test/lexer.test.js @@ -1,13 +1,13 @@ -const Token = require('../src/Token'); -const { createLexer } = require('../src/lexer'); +import {TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE} from '../src/Token' +import { createLexer } from '../src/lexer' const TYPE = { - WORD: Token.TYPE_WORD, - TAG: Token.TYPE_TAG, - ATTR_NAME: Token.TYPE_ATTR_NAME, - ATTR_VALUE: Token.TYPE_ATTR_VALUE, - SPACE: Token.TYPE_SPACE, - NEW_LINE: Token.TYPE_NEW_LINE, + WORD: TYPE_WORD, + TAG: TYPE_TAG, + ATTR_NAME: TYPE_ATTR_NAME, + ATTR_VALUE: TYPE_ATTR_VALUE, + SPACE: TYPE_SPACE, + NEW_LINE: TYPE_NEW_LINE, }; const tokenize = input => (createLexer(input).tokenize()); @@ -33,6 +33,17 @@ describe('lexer', () => { expectOutput(output, tokens); }); + test('single tag with params', () => { + const input = '[user=111]'; + const tokens = tokenize(input); + const output = [ + [TYPE.TAG, 'user', '0', '0'], + [TYPE.ATTR_VALUE, '111', '0', '0'], + ]; + + expectOutput(output, tokens); + }); + test('single tag with spaces', () => { const input = '[Single Tag]'; const tokens = tokenize(input); @@ -245,7 +256,6 @@ describe('lexer', () => { }); }); - test('bad unclosed tag', () => { const input = `[Finger Part A [Finger]`; const tokens = tokenize(input); @@ -278,11 +288,10 @@ describe('lexer', () => { expectOutput(output, tokens); }); - describe('html', () => { const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize(); - test('Normal attributes', () => { + test('normal attributes', () => { const content = ``; const tokens = tokenizeHTML(content); const output = [ @@ -302,7 +311,7 @@ describe('lexer', () => { expectOutput(output, tokens); }); - test('Attributes with no quotes or value', () => { + test('attributes with no quotes or value', () => { const content = ``; const tokens = tokenizeHTML(content); const output = [ @@ -321,7 +330,7 @@ describe('lexer', () => { expectOutput(output, tokens); }); - test('Attributes with no space between them. No valid, but accepted by the browser', () => { + test('attributes with no space between them. No valid, but accepted by the browser', () => { const content = ``; const tokens = tokenizeHTML(content); const output = [ @@ -338,5 +347,42 @@ describe('lexer', () => { expectOutput(output, tokens); }); + + test.skip('style tag', () => { + const content = `` + const tokens = tokenizeHTML(content); + const output = []; + + expectOutput(output, tokens); + }); + + test.skip('script tag', () => { + const content = ``; + const tokens = tokenizeHTML(content); + const output = []; + + expectOutput(output, tokens); + }) }) }); diff --git a/packages/bbob-parser/test/parse.test.js b/packages/bbob-parser/test/parse.test.js index ec9a244..53d2833 100644 --- a/packages/bbob-parser/test/parse.test.js +++ b/packages/bbob-parser/test/parse.test.js @@ -50,7 +50,9 @@ describe('Parser', () => { expectOutput(ast, [ { - attrs: {}, + attrs: { + name: 'value' + }, tag: 'h1', content: [] }, @@ -96,6 +98,20 @@ describe('Parser', () => { ]); }); + test('parse single tag with params', () => { + const ast = parse('[url=https://github.com/jilizart/bbob]'); + + expectOutput(ast, [ + { + tag: 'url', + attrs: { + 'https://github.com/jilizart/bbob': 'https://github.com/jilizart/bbob', + }, + content: [], + }, + ]); + }); + test('detect inconsistent tag', () => { const onError = jest.fn(); const ast = parse('[c][/c][b]hello[/c][/b][b]', { onError });