From 8a9e9304c1aaa2dd37cbbea4e991ef892228f865 Mon Sep 17 00:00:00 2001 From: David Ferguson Date: Tue, 18 Jun 2019 08:39:41 +0100 Subject: [PATCH] fix(parser): fix issue with escaping backslashes when enableEscapeTags is set (#20) there is a bug in the lexer where when enableEscapeTags is set, backslashes are not always escaped (ie. \\[b] is treated as an escaped tag, rather than a literal backslash, and then a tag). --- packages/bbob-parser/src/lexer.js | 6 +++++- packages/bbob-parser/test/lexer.test.js | 23 +++++++++++++++++++++++ packages/bbob-parser/test/parse.test.js | 22 +++++++++++++++++++++- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/packages/bbob-parser/src/lexer.js b/packages/bbob-parser/src/lexer.js index 34255ca..f110b61 100644 --- a/packages/bbob-parser/src/lexer.js +++ b/packages/bbob-parser/src/lexer.js @@ -53,7 +53,7 @@ function createLexer(buffer, options = {}) { const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM]; const NOT_CHAR_TOKENS = [ ...(options.enableEscapeTags ? [BACKSLASH] : []), - openTag, SPACE, TAB, N, BACKSLASH, + openTag, SPACE, TAB, N, ]; const WHITESPACES = [SPACE, TAB]; const SPECIAL_CHARS = [EQ, SPACE, TAB]; @@ -163,6 +163,10 @@ function createLexer(buffer, options = {}) { bufferGrabber.skip(); // skip the \ without emitting anything bufferGrabber.skip(); // skip past the [ or ] as well emitToken(createToken(TYPE_WORD, nextChar, row, col)); + } else if (options.enableEscapeTags && currChar === BACKSLASH && nextChar === BACKSLASH) { + bufferGrabber.skip(); // skip the first \ without emitting anything + bufferGrabber.skip(); // skip past the second \ and emit it + emitToken(createToken(TYPE_WORD, nextChar, row, col)); } else if (currChar === openTag) { bufferGrabber.skip(); // skip openTag diff --git a/packages/bbob-parser/test/lexer.test.js b/packages/bbob-parser/test/lexer.test.js index 1ee641d..3a92cff 100644 --- a/packages/bbob-parser/test/lexer.test.js +++ b/packages/bbob-parser/test/lexer.test.js @@ -305,6 +305,29 @@ describe('lexer', () => { expectOutput(output, tokens); }); + test('escaped tag and escaped backslash', () => { + const tokenizeEscape = input => (createLexer(input, { + enableEscapeTags: true + }).tokenize()); + const input = '\\\\\\[b\\\\\\]test\\\\\\[/b\\\\\\]'; + const tokens = tokenizeEscape(input); + const output = [ + [TYPE.WORD, '\\', '0', '0'], + [TYPE.WORD, '[', '0', '0'], + [TYPE.WORD, 'b', '0', '0'], + [TYPE.WORD, '\\', '0', '0'], + [TYPE.WORD, ']', '0', '0'], + [TYPE.WORD, 'test', '0', '0'], + [TYPE.WORD, '\\', '0', '0'], + [TYPE.WORD, '[', '0', '0'], + [TYPE.WORD, '/b', '0', '0'], + [TYPE.WORD, '\\', '0', '0'], + [TYPE.WORD, ']', '0', '0'], + ]; + + expectOutput(output, tokens); + }); + describe('html', () => { const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize(); diff --git a/packages/bbob-parser/test/parse.test.js b/packages/bbob-parser/test/parse.test.js index 8ada554..00f3e8c 100644 --- a/packages/bbob-parser/test/parse.test.js +++ b/packages/bbob-parser/test/parse.test.js @@ -184,7 +184,7 @@ describe('Parser', () => { ]); }); - test('parse escaped tags tags', () => { + test('parse escaped tags', () => { const ast = parse('\\[b\\]test\\[/b\\]', { enableEscapeTags: true }); @@ -199,5 +199,25 @@ describe('Parser', () => { ']', ]); }); + + test('parse escaped tags and escaped backslash', () => { + const ast = parse('\\\\\\[b\\\\\\]test\\\\\\[/b\\\\\\]', { + enableEscapeTags: true + }); + + expectOutput(ast, [ + '\\', + '[', + 'b', + '\\', + ']', + 'test', + '\\', + '[', + '/b', + '\\', + ']', + ]); + }); }); });