From 305643daa2f12617895d7fce085021d307962143 Mon Sep 17 00:00:00 2001
From: Nikolay Kostyurin <jilizart@gmail.com>
Date: Sun, 10 Jun 2018 22:13:51 +0200
Subject: [PATCH] more Tokenizer invalid cases tests

---
 packages/bbob-parser/Tokenizer.js      | 107 +++++++++++++++++--------
 packages/bbob-parser/Tokenizer.test.js |  49 +++++++++++
 packages/bbob-parser/parse.test.js     |  48 ++++++-----
 3 files changed, 145 insertions(+), 59 deletions(-)

diff --git a/packages/bbob-parser/Tokenizer.js b/packages/bbob-parser/Tokenizer.js
index 66fc2d5..9569313 100644
--- a/packages/bbob-parser/Tokenizer.js
+++ b/packages/bbob-parser/Tokenizer.js
@@ -52,6 +52,14 @@ class Tokenizer {
         this.colPos = 0;
         this.rowPos = 0;
         this.index = 0;
+        
+        this.tokenIndex = -1;
+        this.tokens = [];
+    }
+
+    appendToken(token) {
+        this.tokenIndex++;
+        this.tokens[this.tokenIndex] = token;
     }
 
     tokenize() {
@@ -60,30 +68,67 @@ class Tokenizer {
         let attrNameToken = null;
         let attrValueToken = null;
         let attrTokens = [];
-        let tokens = new Array(Math.floor(this.buffer.length / 2));
-        let tokenIndex = -1;
+        this.tokens = new Array(Math.floor(this.buffer.length / 2));
 
         const flushWord = () => {
             if (wordToken && wordToken[TOKEN.VALUE_ID]) {
-                tokenIndex++;
-                tokens[tokenIndex] = wordToken;
+                this.appendToken(wordToken);
                 wordToken = this.createWordToken('')
             }
         };
 
+        const createWord = (value, line, row) => {
+            if (!wordToken) {
+                wordToken = this.createWordToken(value, line, row)
+            }
+        };
+
         const flushTag = () => {
             if (tagToken !== null) {
+                // [] and [=] tag case
+                if (!tagToken[TOKEN.VALUE_ID]) {
+                    const value = attrValueToken ? getChar(CHAR.EQ) : '';
+                    const word = getChar(CHAR.OPEN_BRAKET) + value + getChar(CHAR.CLOSE_BRAKET);
+
+                    createWord('', 0, 0);
+                    wordToken[TOKEN.VALUE_ID] += word;
+
+                    tagToken = null;
+
+                    if (attrValueToken) {
+                        attrValueToken = null
+                    }
+
+                    return;
+                }
+
                 if (attrNameToken && !attrValueToken) {
-                    tagToken[TOKEN.VALUE_ID] += SPACE + attrNameToken[TOKEN.VALUE_ID]
+                    tagToken[TOKEN.VALUE_ID] += SPACE + attrNameToken[TOKEN.VALUE_ID];
                     attrNameToken = null
                 }
 
-                tokenIndex++;
-                tokens[tokenIndex] = tagToken;
+                this.appendToken(tagToken);
                 tagToken = null;
             }
         };
 
+        const flushUnclosedTag = () => {
+            if (tagToken !== null) {
+                const value = tagToken[TOKEN.VALUE_ID] + (attrValueToken ? getChar(CHAR.EQ) : '');
+
+                tagToken[TOKEN.TYPE_ID] = TOKEN.TYPE_WORD;
+                tagToken[TOKEN.VALUE_ID] = getChar(CHAR.OPEN_BRAKET) + value;
+
+                this.appendToken(tagToken);
+
+                tagToken = null;
+
+                if (attrValueToken) {
+                    attrValueToken = null
+                }
+            }
+        };
+
         const flushAttrNames = () => {
             if (attrNameToken) {
                 attrTokens.push(attrNameToken);
@@ -98,11 +143,7 @@ class Tokenizer {
 
         const flushAttrs = () => {
             if (attrTokens.length) {
-                attrTokens.forEach(attrToken => {
-                    tokenIndex++;
-                    tokens[tokenIndex] = attrToken
-                });
-
+                attrTokens.forEach(this.appendToken.bind(this));
                 attrTokens = [];
             }
         };
@@ -122,16 +163,14 @@ class Tokenizer {
                     } else {
                         const spaceCode = charCode === CHAR.TAB ? SPACE_TAB : SPACE;
 
-                        tokenIndex++;
-                        tokens[tokenIndex] = this.createSpaceToken(spaceCode);
+                        this.appendToken(this.createSpaceToken(spaceCode));
                     }
                     this.colPos++;
                     break;
 
                 case CHAR.N:
                     flushWord();
-                    tokenIndex++;
-                    tokens[tokenIndex] = this.createNewLineToken(getChar(charCode));
+                    this.appendToken(this.createNewLineToken(getChar(charCode)));
 
                     this.rowPos++;
                     this.colPos = 0;
@@ -180,9 +219,7 @@ class Tokenizer {
                     } else if (tagToken) {
                         tagToken[TOKEN.VALUE_ID] += getChar(charCode)
                     } else {
-                        if (!wordToken) {
-                            wordToken = this.createWordToken('')
-                        }
+                        createWord();
 
                         wordToken[TOKEN.VALUE_ID] += getChar(charCode);
                     }
@@ -195,39 +232,41 @@ class Tokenizer {
         }
 
         flushWord();
+        flushUnclosedTag();
 
-        tokens.length = tokenIndex + 1;
+        this.tokens.length = this.tokenIndex + 1;
 
-        return tokens;
+        return this.tokens;
     }
 
-    createWordToken(value) {
-        return [TOKEN.TYPE_WORD, value, this.colPos, this.rowPos]
+    createWordToken(value = '', line = this.colPos, row = this.rowPos) {
+        return [TOKEN.TYPE_WORD, value, line, row]
     }
 
-    createTagToken(value) {
-        return [TOKEN.TYPE_TAG, value, this.colPos, this.rowPos]
+    createTagToken(value, line = this.colPos, row = this.rowPos) {
+        return [TOKEN.TYPE_TAG, value, line, row]
     }
 
-    createAttrNameToken(value) {
-        return [TOKEN.TYPE_ATTR_NAME, value, this.colPos, this.rowPos]
+    createAttrNameToken(value, line = this.colPos, row = this.rowPos) {
+        return [TOKEN.TYPE_ATTR_NAME, value, line, row]
     }
 
-    createAttrValueToken(value) {
-        return [TOKEN.TYPE_ATTR_VALUE, value, this.colPos, this.rowPos]
+    createAttrValueToken(value, line = this.colPos, row = this.rowPos) {
+        return [TOKEN.TYPE_ATTR_VALUE, value, line, row]
     }
 
-    createSpaceToken(value) {
-        return [TOKEN.TYPE_SPACE, value, this.colPos, this.rowPos]
+    createSpaceToken(value, line = this.colPos, row = this.rowPos) {
+        return [TOKEN.TYPE_SPACE, value, line, row]
     }
 
-    createNewLineToken(value) {
-        return [TOKEN.TYPE_NEW_LINE, value, this.colPos, this.rowPos]
+    createNewLineToken(value, line = this.colPos, row = this.rowPos) {
+        return [TOKEN.TYPE_NEW_LINE, value, line, row]
     }
 }
 
 // warm up tokenizer to elimitate code branches that never execute
-new Tokenizer(`[b param="hello"]Sample text[/b]\n\t[Chorus]`).tokenize();
+new Tokenizer(`[sc=asdasd`).tokenize();
+//new Tokenizer(`[b param="hello"]Sample text[/b]\n\t[Chorus]`).tokenize();
 
 module.exports = Tokenizer;
 module.exports.CHAR = CHAR;
diff --git a/packages/bbob-parser/Tokenizer.test.js b/packages/bbob-parser/Tokenizer.test.js
index 30c0388..286389c 100644
--- a/packages/bbob-parser/Tokenizer.test.js
+++ b/packages/bbob-parser/Tokenizer.test.js
@@ -91,5 +91,54 @@ describe("Tokenizer", () => {
             [TYPE.NEW_LINE, '\n', 14, 3],
             [TYPE.TAG, '/list', 0, 4]
         ])
+    });
+
+    test("tokenize bad tags as texts", () => {
+        const inputs = [
+            '[]',
+            '[=]',
+            '![](image.jpg)',
+            'x html([a. title][, alt][, classes]) x',
+            '[/y]',
+            '[sc',
+            '[sc / [/sc]',
+            '[sc arg="val',
+        ];
+
+        const asserts = [
+            [[TYPE.WORD, '[]', 0, 0]],
+            [[TYPE.WORD, '[=]', 0, 0]],
+            [
+                [TYPE.WORD, '!', 0, 0],
+                [TYPE.WORD, '[](image.jpg)', 1, 0]
+            ],
+            [
+                [TYPE.WORD, "x", 0, 0],
+                [TYPE.SPACE, " ", 1, 0],
+                [TYPE.WORD, "html(", 1, 0],
+                [TYPE.TAG, "a. title", 7, 0],
+                [TYPE.TAG, ", alt", 17, 0],
+                [TYPE.TAG, ", classes", 24, 0],
+                [TYPE.WORD, ")", 7, 0],
+                [TYPE.SPACE, " ", 36, 0],
+                [TYPE.WORD, "x", 36, 0]
+            ],
+            [[TYPE.TAG, "/y", 0, 0]],
+            [[TYPE.WORD, '[sc', 0, 0]],
+            [
+                [TYPE.WORD, '[sc', 0, 0],
+                [TYPE.SPACE, ' ', 0, 0],
+                [TYPE.WORD, '/', 0, 0],
+                [TYPE.SPACE, ' ', 0, 0],
+                [TYPE.WORD, '[/sc]', 0, 0]
+            ],
+        ];
+
+        inputs.forEach((input, idx) => {
+            const tokens = new Tokenizer(input).tokenize();
+
+            expect(tokens).toBeInstanceOf(Array);
+            expect(tokens).toEqual(asserts[idx])
+        });
     })
 });
\ No newline at end of file
diff --git a/packages/bbob-parser/parse.test.js b/packages/bbob-parser/parse.test.js
index 4d5000d..8a11c3e 100644
--- a/packages/bbob-parser/parse.test.js
+++ b/packages/bbob-parser/parse.test.js
@@ -1,5 +1,4 @@
 const parse = require('./index');
-const OldParser = require('./benchmark/OldParser');
 
 const options = {
     allowOnlyTags: ['ch', 'syllable', 'tab'],
@@ -12,28 +11,27 @@ describe("parse", () => {
         expect(ast).toEqual([{tag: 'Verse 2', attrs: {}, content: []}]);
     });
 
-    test("same as old parser", () => {
-        const input = `[Verse 2]
-[ch]Eb[/ch]            [ch]Fm[/ch]
-  I'm walking around
-[ch]Ab[/ch]               [ch]Cm[/ch]
-  With my little raincloud
-[ch]Eb[/ch]                [ch]Fm[/ch]
-  Hanging over my head
-[ch]Cm[/ch]                    [ch]Ab[/ch]
-  And it ain’t coming down
-[ch]Eb[/ch]           [ch]Fm[/ch]
-  Where do I go?
-[ch]Ab[/ch]                   [ch]Cm[/ch]
-  Gimme some sort of sign
-[ch]Eb[/ch]            [ch]Fm[/ch]
-  Hit me with lightning!
-[ch]Cm[/ch]                [ch]Ab[/ch]
-  Maybe I’ll come alive
-`;
-        const ast1 = parse(input, options);
-        const ast2 = OldParser.parse(input);
-
-        expect(ast1).toEqual(ast2);
-    })
+    // test("pass invalid tags", () => {
+    //     const inputs = [
+    //         '[]',
+    //         '![](image.jpg)',
+    //         'x html([a. title][, alt][, classes]) x',
+    //         '[/y]',
+    //         '[sc',
+    //         '[sc / [/sc]',
+    //         '[sc arg="val',
+    //     ];
+    //
+    //     const ast1 = parse(inputs[0]);
+    //
+    //
+    //
+    //     console.log('ast1', ast1);
+    //
+    //
+    //
+    //     expect(ast1).toEqual([
+    //
+    //     ]);
+    // })
 });
\ No newline at end of file