From 3e15b54efa111f51f4a68c5d8fc15cadc38371d1 Mon Sep 17 00:00:00 2001 From: Nikolay Kostyurin Date: Mon, 25 Jun 2018 00:53:49 +0200 Subject: [PATCH] optimize Parser, remove while loop, add event to Tokenizer --- packages/bbob-parser/lib/Parser.js | 52 +++++--- packages/bbob-parser/lib/Token.js | 128 ++++++++++++++++++ packages/bbob-parser/lib/Tokenizer.js | 138 +++++++++++--------- packages/bbob-parser/lib/parse.js | 5 +- packages/bbob-parser/lib/token.js | 79 ----------- packages/bbob-parser/test/Parser.test.js | 18 +-- packages/bbob-parser/test/Tokenizer.test.js | 50 ++++--- 7 files changed, 270 insertions(+), 200 deletions(-) create mode 100644 packages/bbob-parser/lib/Token.js delete mode 100644 packages/bbob-parser/lib/token.js diff --git a/packages/bbob-parser/lib/Parser.js b/packages/bbob-parser/lib/Parser.js index 1044dce..eeed71c 100644 --- a/packages/bbob-parser/lib/Parser.js +++ b/packages/bbob-parser/lib/Parser.js @@ -10,13 +10,15 @@ const { isTagToken, isTextToken, isTagEnd, -} = require('./token'); +} = require('./Token'); const { SLASH, getChar, } = require('./char'); +const Tokenizer = require('./Tokenizer'); + const createTagNode = (tag, attrs = {}, content = []) => ({ tag, attrs, content }); /** @@ -29,12 +31,16 @@ const createTagNode = (tag, attrs = {}, content = []) => ({ tag, attrs, content content: ['hello world!'] } */ -module.exports = class Parser { - constructor(tokens, options = {}) { - this.tokens = tokens; +class Parser { + constructor(input, options = {}) { + this.tokenizer = new Tokenizer(input, { + onToken: (token) => { + this.parseToken(token); + }, + }); + this.options = options; - this.closableTags = this.findNestedTags(); this.nodes = []; this.nestedNodes = []; this.curTags = []; @@ -42,7 +48,7 @@ module.exports = class Parser { } isNestedTag(token) { - return this.closableTags.indexOf(getTokenValue(token)) >= 0; + return this.tokenizer.isTokenNested(token); } getCurTag() { @@ -154,25 +160,32 @@ module.exports = class Parser { } } + parseToken(token) { + this.handleTagToken(token); + this.handleCurTag(token); + } + parse() { - let token; - // eslint-disable-next-line no-cond-assign - while (token = this.tokens.shift()) { - if (!token) { - // eslint-disable-next-line no-continue - continue; + if (this.tokens) { + let token; + // eslint-disable-next-line no-cond-assign + while (token = this.tokens.shift()) { + if (!token) { + // eslint-disable-next-line no-continue + continue; + } + + this.parseToken(token); } - - this.handleTagToken(token); - - this.handleCurTag(token); + } else { + this.tokens = this.tokenizer.tokenize(); } return this.nodes; } findNestedTags() { - const tags = this.tokens.filter(isTagToken).reduce((acc, token) => { + const tags = (this.tokens || []).filter(isTagToken).reduce((acc, token) => { acc[getTokenValue(token)] = true; return acc; @@ -196,6 +209,9 @@ module.exports = class Parser { return true; } -}; +} +new Parser('[Verse 2]').parse(); + +module.exports = Parser; module.exports.createTagNode = createTagNode; diff --git a/packages/bbob-parser/lib/Token.js b/packages/bbob-parser/lib/Token.js new file mode 100644 index 0000000..bf0ae96 --- /dev/null +++ b/packages/bbob-parser/lib/Token.js @@ -0,0 +1,128 @@ +const { + getChar, + OPEN_BRAKET, + CLOSE_BRAKET, + SLASH, +} = require('./char'); + +// type, value, line, row, +const TOKEN_TYPE_ID = 'type'; // 0; +const TOKEN_VALUE_ID = 'value'; // 1; +const TOKEN_COLUMN_ID = 'row'; // 2; +const TOKEN_LINE_ID = 'line'; // 3; + +const TOKEN_TYPE_WORD = 'word'; +const TOKEN_TYPE_TAG = 'tag'; +const TOKEN_TYPE_ATTR_NAME = 'attr-name'; +const TOKEN_TYPE_ATTR_VALUE = 'attr-value'; +const TOKEN_TYPE_SPACE = 'space'; +const TOKEN_TYPE_NEW_LINE = 'new-line'; + +const getTokenValue = token => token[TOKEN_VALUE_ID]; +const getTokenLine = token => token[TOKEN_LINE_ID]; +const getTokenColumn = token => token[TOKEN_COLUMN_ID]; + +const isTextToken = (token) => { + const type = token[TOKEN_TYPE_ID]; + + return type === TOKEN_TYPE_SPACE || type === TOKEN_TYPE_NEW_LINE || type === TOKEN_TYPE_WORD; +}; + +const isTagToken = token => token[TOKEN_TYPE_ID] === TOKEN_TYPE_TAG; +const isTagEnd = token => getTokenValue(token).charCodeAt(0) === SLASH; +const isTagStart = token => !isTagEnd(token); +const isAttrNameToken = token => token[TOKEN_TYPE_ID] === TOKEN_TYPE_ATTR_NAME; +const isAttrValueToken = token => token[TOKEN_TYPE_ID] === TOKEN_TYPE_ATTR_VALUE; + +const getTagName = (token) => { + const value = getTokenValue(token); + + return isTagEnd(token) ? value.slice(1) : value; +}; + +const convertTagToText = (token) => { + let text = getChar(OPEN_BRAKET); + + if (isTagEnd(token)) { + text += getChar(SLASH); + } + + text += getTokenValue(token); + text += getChar(CLOSE_BRAKET); + + return text; +}; + +class Token { + constructor(type, value, line, row) { + this.type = String(type); + this.value = String(value); + this.line = Number(line); + this.row = Number(row); + } + + isText() { + return isTextToken(this); + } + + isTag() { + return isTagToken(this); + } + + isAttrName() { + return isAttrNameToken(this); + } + + isAttrValue() { + return isAttrValueToken(this); + } + + isEnd() { + return isTagEnd(this); + } + + getName() { + return getTagName(this); + } + + getValue() { + return getTokenValue(this); + } + + getLine() { + return getTokenLine(this); + } + + getColumn() { + return getTokenColumn(this); + } + + toString() { + return convertTagToText(this); + } +} + +module.exports = Token; + +module.exports.TYPE_ID = TOKEN_TYPE_ID; +module.exports.VALUE_ID = TOKEN_VALUE_ID; +module.exports.LINE_ID = TOKEN_LINE_ID; +module.exports.COLUMN_ID = TOKEN_COLUMN_ID; +module.exports.TYPE_WORD = TOKEN_TYPE_WORD; +module.exports.TYPE_TAG = TOKEN_TYPE_TAG; +module.exports.TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME; +module.exports.TYPE_ATTR_VALUE = TOKEN_TYPE_ATTR_VALUE; +module.exports.TYPE_SPACE = TOKEN_TYPE_SPACE; +module.exports.TYPE_NEW_LINE = TOKEN_TYPE_NEW_LINE; + +module.exports.convertTagToText = convertTagToText; +module.exports.getTagName = getTagName; +module.exports.getTokenColumn = getTokenColumn; +module.exports.getTokenLine = getTokenLine; +module.exports.getTokenValue = getTokenValue; +module.exports.isAttrNameToken = isAttrNameToken; +module.exports.isAttrValueToken = isAttrValueToken; +module.exports.isTagStart = isTagStart; +module.exports.isTagToken = isTagToken; +module.exports.isTextToken = isTextToken; +module.exports.isTagEnd = isTagEnd; diff --git a/packages/bbob-parser/lib/Tokenizer.js b/packages/bbob-parser/lib/Tokenizer.js index e632058..383074b 100644 --- a/packages/bbob-parser/lib/Tokenizer.js +++ b/packages/bbob-parser/lib/Tokenizer.js @@ -3,11 +3,14 @@ const { OPEN_BRAKET, CLOSE_BRAKET, EQ, TAB, SPACE, N, QUOTEMARK, PLACEHOLDER_SPACE, PLACEHOLDER_SPACE_TAB, + SLASH, } = require('./char'); -const TOKEN = require('./token'); +const Token = require('./Token'); + +const createTokenOfType = (type, value, line, row) => new Token(type, value, line, row); class Tokenizer { - constructor(input) { + constructor(input, options = {}) { this.buffer = input; this.colPos = 0; this.rowPos = 0; @@ -15,18 +18,27 @@ class Tokenizer { this.tokenIndex = -1; this.tokens = new Array(Math.floor(this.buffer.length)); - this.dummyArray = ['', '', '', '']; + this.dummyToken = createTokenOfType('', '', '', ''); - this.wordToken = this.dummyArray; - this.tagToken = this.dummyArray; - this.attrNameToken = this.dummyArray; - this.attrValueToken = this.dummyArray; + this.wordToken = this.dummyToken; + this.tagToken = this.dummyToken; + this.attrNameToken = this.dummyToken; + this.attrValueToken = this.dummyToken; this.attrTokens = []; + + this.options = options; + } + + emitToken(token) { + if (this.options.onToken) { + this.options.onToken(token); + } } appendToken(token) { this.tokenIndex += 1; this.tokens[this.tokenIndex] = token; + this.emitToken(token); } nextCol() { @@ -38,73 +50,73 @@ class Tokenizer { } flushWord() { - if (this.wordToken[TOKEN.TYPE_ID] && this.wordToken[TOKEN.VALUE_ID]) { + if (this.wordToken[Token.TYPE_ID] && this.wordToken[Token.VALUE_ID]) { this.appendToken(this.wordToken); this.wordToken = this.createWordToken(''); } } createWord(value, line, row) { - if (this.wordToken[TOKEN.TYPE_ID] === '') { + if (this.wordToken[Token.TYPE_ID] === '') { this.wordToken = this.createWordToken(value, line, row); } } flushTag() { - if (this.tagToken[TOKEN.TYPE_ID]) { + if (this.tagToken[Token.TYPE_ID]) { // [] and [=] tag case - if (this.tagToken[TOKEN.VALUE_ID] === '') { - const value = this.attrValueToken[TOKEN.TYPE_ID] ? getChar(EQ) : ''; + if (this.tagToken[Token.VALUE_ID] === '') { + const value = this.attrValueToken[Token.TYPE_ID] ? getChar(EQ) : ''; const word = getChar(OPEN_BRAKET) + value + getChar(CLOSE_BRAKET); this.createWord('', 0, 0); - this.wordToken[TOKEN.VALUE_ID] += word; + this.wordToken[Token.VALUE_ID] += word; - this.tagToken = this.dummyArray; + this.tagToken = this.dummyToken; - if (this.attrValueToken[TOKEN.TYPE_ID]) { - this.attrValueToken = this.dummyArray; + if (this.attrValueToken[Token.TYPE_ID]) { + this.attrValueToken = this.dummyToken; } return; } - if (this.attrNameToken[TOKEN.TYPE_ID] && !this.attrValueToken[TOKEN.TYPE_ID]) { - this.tagToken[TOKEN.VALUE_ID] += PLACEHOLDER_SPACE + this.attrNameToken[TOKEN.VALUE_ID]; - this.attrNameToken = this.dummyArray; + if (this.attrNameToken[Token.TYPE_ID] && !this.attrValueToken[Token.TYPE_ID]) { + this.tagToken[Token.VALUE_ID] += PLACEHOLDER_SPACE + this.attrNameToken[Token.VALUE_ID]; + this.attrNameToken = this.dummyToken; } this.appendToken(this.tagToken); - this.tagToken = this.dummyArray; + this.tagToken = this.dummyToken; } } flushUnclosedTag() { - if (this.tagToken[TOKEN.TYPE_ID]) { - const value = this.tagToken[TOKEN.VALUE_ID] + (this.attrValueToken[TOKEN.VALUE_ID] ? getChar(EQ) : ''); + if (this.tagToken[Token.TYPE_ID]) { + const value = this.tagToken[Token.VALUE_ID] + (this.attrValueToken[Token.VALUE_ID] ? getChar(EQ) : ''); - this.tagToken[TOKEN.TYPE_ID] = TOKEN.TYPE_WORD; - this.tagToken[TOKEN.VALUE_ID] = getChar(OPEN_BRAKET) + value; + this.tagToken[Token.TYPE_ID] = Token.TYPE_WORD; + this.tagToken[Token.VALUE_ID] = getChar(OPEN_BRAKET) + value; this.appendToken(this.tagToken); - this.tagToken = this.dummyArray; + this.tagToken = this.dummyToken; - if (this.attrValueToken[TOKEN.TYPE_ID]) { - this.attrValueToken = this.dummyArray; + if (this.attrValueToken[Token.TYPE_ID]) { + this.attrValueToken = this.dummyToken; } } } flushAttrNames() { - if (this.attrNameToken[TOKEN.TYPE_ID]) { + if (this.attrNameToken[Token.TYPE_ID]) { this.attrTokens.push(this.attrNameToken); - this.attrNameToken = this.dummyArray; + this.attrNameToken = this.dummyToken; } - if (this.attrValueToken[TOKEN.TYPE_ID]) { + if (this.attrValueToken[Token.TYPE_ID]) { this.attrTokens.push(this.attrValueToken); - this.attrValueToken = this.dummyArray; + this.attrValueToken = this.dummyToken; } } @@ -118,7 +130,7 @@ class Tokenizer { charSPACE(charCode) { this.flushWord(); - if (this.tagToken[TOKEN.TYPE_ID]) { + if (this.tagToken[Token.TYPE_ID]) { this.attrNameToken = this.createAttrNameToken(''); } else { const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE; @@ -152,36 +164,36 @@ class Tokenizer { } charEQ(charCode) { - if (this.tagToken[TOKEN.TYPE_ID]) { + if (this.tagToken[Token.TYPE_ID]) { this.attrValueToken = this.createAttrValueToken(''); } else { - this.wordToken[TOKEN.VALUE_ID] += getChar(charCode); + this.wordToken[Token.VALUE_ID] += getChar(charCode); } this.nextCol(); } charQUOTEMARK(charCode) { - if (this.attrValueToken[TOKEN.TYPE_ID] && this.attrValueToken[TOKEN.VALUE_ID] > 0) { + if (this.attrValueToken[Token.TYPE_ID] && this.attrValueToken[Token.VALUE_ID] > 0) { this.flushAttrNames(); - } else if (this.tagToken[TOKEN.TYPE_ID] === '') { - this.wordToken[TOKEN.VALUE_ID] += getChar(charCode); + } else if (this.tagToken[Token.TYPE_ID] === '') { + this.wordToken[Token.VALUE_ID] += getChar(charCode); } this.nextCol(); } charWORD(charCode) { - if (this.tagToken[TOKEN.TYPE_ID] && this.attrValueToken[TOKEN.TYPE_ID]) { - this.attrValueToken[TOKEN.VALUE_ID] += getChar(charCode); - } else if (this.tagToken[TOKEN.TYPE_ID] && this.attrNameToken[TOKEN.TYPE_ID]) { - this.attrNameToken[TOKEN.VALUE_ID] += getChar(charCode); - } else if (this.tagToken[TOKEN.TYPE_ID]) { - this.tagToken[TOKEN.VALUE_ID] += getChar(charCode); + if (this.tagToken[Token.TYPE_ID] && this.attrValueToken[Token.TYPE_ID]) { + this.attrValueToken[Token.VALUE_ID] += getChar(charCode); + } else if (this.tagToken[Token.TYPE_ID] && this.attrNameToken[Token.TYPE_ID]) { + this.attrNameToken[Token.VALUE_ID] += getChar(charCode); + } else if (this.tagToken[Token.TYPE_ID]) { + this.tagToken[Token.VALUE_ID] += getChar(charCode); } else { this.createWord(); - this.wordToken[TOKEN.VALUE_ID] += getChar(charCode); + this.wordToken[Token.VALUE_ID] += getChar(charCode); } this.nextCol(); @@ -234,31 +246,32 @@ class Tokenizer { } createWordToken(value = '', line = this.colPos, row = this.rowPos) { - return this.createTokenOfType(TOKEN.TYPE_WORD, value, line, row); + return createTokenOfType(Token.TYPE_WORD, value, line, row); } createTagToken(value, line = this.colPos, row = this.rowPos) { - return this.createTokenOfType(TOKEN.TYPE_TAG, value, line, row); + return createTokenOfType(Token.TYPE_TAG, value, line, row); } createAttrNameToken(value, line = this.colPos, row = this.rowPos) { - return this.createTokenOfType(TOKEN.TYPE_ATTR_NAME, value, line, row); + return createTokenOfType(Token.TYPE_ATTR_NAME, value, line, row); } createAttrValueToken(value, line = this.colPos, row = this.rowPos) { - return this.createTokenOfType(TOKEN.TYPE_ATTR_VALUE, value, line, row); + return createTokenOfType(Token.TYPE_ATTR_VALUE, value, line, row); } createSpaceToken(value, line = this.colPos, row = this.rowPos) { - return this.createTokenOfType(TOKEN.TYPE_SPACE, value, line, row); + return createTokenOfType(Token.TYPE_SPACE, value, line, row); } createNewLineToken(value, line = this.colPos, row = this.rowPos) { - return this.createTokenOfType(TOKEN.TYPE_NEW_LINE, value, line, row); + return createTokenOfType(Token.TYPE_NEW_LINE, value, line, row); } - createTokenOfType(type, value, line = this.colPos, row = this.rowPos) { - return [String(type), String(value), String(line), String(row)]; + isTokenNested(token) { + const value = getChar(OPEN_BRAKET) + getChar(SLASH) + Token.getTokenValue(token); + return this.buffer.indexOf(value) > -1; } } @@ -266,18 +279,19 @@ class Tokenizer { new Tokenizer('[b param="hello"]Sample text[/b]\n\t[Chorus 2] x html([a. title][, alt][, classes]) x [=] [/y]').tokenize(); module.exports = Tokenizer; +module.exports.createTokenOfType = createTokenOfType; module.exports.TYPE = { - WORD: TOKEN.TYPE_WORD, - TAG: TOKEN.TYPE_TAG, - ATTR_NAME: TOKEN.TYPE_ATTR_NAME, - ATTR_VALUE: TOKEN.TYPE_ATTR_VALUE, - SPACE: TOKEN.TYPE_SPACE, - NEW_LINE: TOKEN.TYPE_NEW_LINE, + WORD: Token.TYPE_WORD, + TAG: Token.TYPE_TAG, + ATTR_NAME: Token.TYPE_ATTR_NAME, + ATTR_VALUE: Token.TYPE_ATTR_VALUE, + SPACE: Token.TYPE_SPACE, + NEW_LINE: Token.TYPE_NEW_LINE, }; module.exports.TOKEN = { - TYPE_ID: TOKEN.TYPE_ID, - VALUE_ID: TOKEN.VALUE_ID, - LINE_ID: TOKEN.LINE_ID, - COLUMN_ID: TOKEN.COLUMN_ID, + TYPE_ID: Token.TYPE_ID, + VALUE_ID: Token.VALUE_ID, + LINE_ID: Token.LINE_ID, + COLUMN_ID: Token.COLUMN_ID, }; diff --git a/packages/bbob-parser/lib/parse.js b/packages/bbob-parser/lib/parse.js index a60d737..ae91e5d 100644 --- a/packages/bbob-parser/lib/parse.js +++ b/packages/bbob-parser/lib/parse.js @@ -1,10 +1,7 @@ -const Tokenizer = require('./Tokenizer'); const Parser = require('./Parser'); module.exports = function parse(input, options) { - const tokenizer = new Tokenizer(input); - const tokens = tokenizer.tokenize(); - const parser = new Parser(tokens, options); + const parser = new Parser(input, options); const ast = parser.parse(); return ast; diff --git a/packages/bbob-parser/lib/token.js b/packages/bbob-parser/lib/token.js deleted file mode 100644 index 41811ad..0000000 --- a/packages/bbob-parser/lib/token.js +++ /dev/null @@ -1,79 +0,0 @@ -const { - getChar, - OPEN_BRAKET, - CLOSE_BRAKET, - SLASH, -} = require('./char'); - -const TOKEN_TYPE_ID = 0; -const TOKEN_VALUE_ID = 1; -const TOKEN_COLUMN_ID = 2; -const TOKEN_LINE_ID = 3; - -const TOKEN_TYPE_WORD = 'word'; -const TOKEN_TYPE_TAG = 'tag'; -const TOKEN_TYPE_ATTR_NAME = 'attr-name'; -const TOKEN_TYPE_ATTR_VALUE = 'attr-value'; -const TOKEN_TYPE_SPACE = 'space'; -const TOKEN_TYPE_NEW_LINE = 'new-line'; - -const getTokenValue = token => token[TOKEN_VALUE_ID]; -const getTokenLine = token => token[TOKEN_LINE_ID]; -const getTokenColumn = token => token[TOKEN_COLUMN_ID]; - -const isTextToken = (token) => { - const type = token[TOKEN_TYPE_ID]; - - return type === TOKEN_TYPE_SPACE || type === TOKEN_TYPE_NEW_LINE || type === TOKEN_TYPE_WORD; -}; - -const isTagToken = token => token[TOKEN_TYPE_ID] === TOKEN_TYPE_TAG; -const isTagEnd = token => getTokenValue(token).charCodeAt(0) === SLASH; -const isTagStart = token => !isTagEnd(token); -const isAttrNameToken = token => token[TOKEN_TYPE_ID] === TOKEN_TYPE_ATTR_NAME; -const isAttrValueToken = token => token[TOKEN_TYPE_ID] === TOKEN_TYPE_ATTR_VALUE; - -const getTagName = (token) => { - const value = getTokenValue(token); - - return isTagEnd(token) ? value.slice(1) : value; -}; - - -const convertTagToText = (token) => { - let text = getChar(OPEN_BRAKET); - - if (isTagEnd(token)) { - text += getChar(SLASH); - } - - text += getTokenValue(token); - text += getChar(CLOSE_BRAKET); - - return text; -}; - - -module.exports = { - TYPE_ID: TOKEN_TYPE_ID, - VALUE_ID: TOKEN_VALUE_ID, - LINE_ID: TOKEN_LINE_ID, - COLUMN_ID: TOKEN_COLUMN_ID, - TYPE_WORD: TOKEN_TYPE_WORD, - TYPE_TAG: TOKEN_TYPE_TAG, - TYPE_ATTR_NAME: TOKEN_TYPE_ATTR_NAME, - TYPE_ATTR_VALUE: TOKEN_TYPE_ATTR_VALUE, - TYPE_SPACE: TOKEN_TYPE_SPACE, - TYPE_NEW_LINE: TOKEN_TYPE_NEW_LINE, - convertTagToText, - getTagName, - getTokenColumn, - getTokenLine, - getTokenValue, - isAttrNameToken, - isAttrValueToken, - isTagStart, - isTagToken, - isTextToken, - isTagEnd, -}; diff --git a/packages/bbob-parser/test/Parser.test.js b/packages/bbob-parser/test/Parser.test.js index 705d082..488bdad 100644 --- a/packages/bbob-parser/test/Parser.test.js +++ b/packages/bbob-parser/test/Parser.test.js @@ -1,23 +1,10 @@ const Parser = require('../lib/Parser'); -const TOKEN = require('../lib/token'); -const Tokenizer = require('../lib/Tokenizer'); const parse = input => (new Parser(input).parse()); -const tokenize = input => (new Tokenizer(input).tokenize()); describe('Parser', () => { test('parse paired tags tokens', () => { - const input = [ - [TOKEN.TYPE_TAG, 'best'], - [TOKEN.TYPE_ATTR_NAME, 'name'], - [TOKEN.TYPE_ATTR_VALUE, 'value'], - [TOKEN.TYPE_WORD, 'Foo'], - [TOKEN.TYPE_SPACE, ' '], - [TOKEN.TYPE_WORD, 'Bar'], - [TOKEN.TYPE_TAG, '/best'], - ]; - - const ast = parse(input); + const ast = parse('[best name=value]Foo Bar[/best]'); expect(ast).toBeInstanceOf(Array); expect(ast).toEqual([ @@ -36,8 +23,7 @@ describe('Parser', () => { }); test('parse tag with value param', () => { - const tokens = tokenize('[url=https://github.com/jilizart/bbob]BBob[/url]'); - const ast = parse(tokens); + const ast = parse('[url=https://github.com/jilizart/bbob]BBob[/url]'); expect(ast).toBeInstanceOf(Array); expect(ast).toEqual([ diff --git a/packages/bbob-parser/test/Tokenizer.test.js b/packages/bbob-parser/test/Tokenizer.test.js index 91d7cae..7c7301b 100644 --- a/packages/bbob-parser/test/Tokenizer.test.js +++ b/packages/bbob-parser/test/Tokenizer.test.js @@ -5,51 +5,60 @@ const { TYPE } = Tokenizer; const tokenize = input => (new Tokenizer(input).tokenize()); describe('Tokenizer', () => { + const expectOutput = (output, tokens) => { + expect(tokens).toBeInstanceOf(Array); + output.forEach((token, idx) => { + expect(tokens[idx]).toBeInstanceOf(Object); + expect(tokens[idx]).toEqual(Tokenizer.createTokenOfType(...token)); + }); + }; + test('tokenize single tag', () => { const input = '[SingleTag]'; const tokens = tokenize(input); - - expect(tokens).toBeInstanceOf(Array); - expect(tokens).toEqual([ + const output = [ [TYPE.TAG, 'SingleTag', '0', '0'], - ]); + ]; + + expectOutput(output, tokens); }); test('tokenize single tag with spaces', () => { const input = '[Single Tag]'; const tokens = tokenize(input); - expect(tokens).toBeInstanceOf(Array); - expect(tokens).toEqual([ + const output = [ [TYPE.TAG, 'Single Tag', '0', '0'], - ]); + ]; + + expectOutput(output, tokens); }); test('tokenize tag as param', () => { const input = '[color="#ff0000"]Text[/color]'; const tokens = tokenize(input); - - expect(tokens).toBeInstanceOf(Array); - expect(tokens).toEqual([ + const output = [ [TYPE.TAG, 'color', '0', '0'], [TYPE.ATTR_VALUE, '#ff0000', '6', '0'], [TYPE.WORD, 'Text', '17', '0'], [TYPE.TAG, '/color', '21', '0'], - ]); + ]; + + expectOutput(output, tokens); }); test('tokenize tag param without quotemarks', () => { const input = '[style color=#ff0000]Text[/style]'; const tokens = tokenize(input); - - expect(tokens).toBeInstanceOf(Array); - expect(tokens).toEqual([ + const output = [ [TYPE.TAG, 'style', '0', '0'], [TYPE.ATTR_NAME, 'color', '6', '0'], [TYPE.ATTR_VALUE, '#ff0000', '12', '0'], [TYPE.WORD, 'Text', '21', '0'], [TYPE.TAG, '/style', '25', '0'], - ]); + ]; + + expectOutput(output, tokens); }); test('tokenize list tag with items', () => { @@ -60,9 +69,7 @@ describe('Tokenizer', () => { [/list]`; const tokens = tokenize(input); - - expect(tokens).toBeInstanceOf(Array); - expect(tokens).toEqual([ + const output = [ [TYPE.TAG, 'list', '0', '0'], [TYPE.NEW_LINE, '\n', '6', '0'], [TYPE.SPACE, ' ', '0', '1'], @@ -93,7 +100,9 @@ describe('Tokenizer', () => { [TYPE.WORD, '3.', '11', '3'], [TYPE.NEW_LINE, '\n', '14', '3'], [TYPE.TAG, '/list', '0', '4'], - ]); + ]; + + expectOutput(output, tokens); }); test('tokenize bad tags as texts', () => { @@ -140,8 +149,7 @@ describe('Tokenizer', () => { inputs.forEach((input, idx) => { const tokens = tokenize(input); - expect(tokens).toBeInstanceOf(Array); - expect(tokens).toEqual(asserts[idx]); + expectOutput(asserts[idx], tokens); }); }); });