2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-06-20 20:00:33 +03:00

more Tokenizer invalid cases tests

This commit is contained in:
Nikolay Kostyurin
2018-06-10 22:13:51 +02:00
parent 5e34dd9d43
commit 305643daa2
3 changed files with 145 additions and 59 deletions
+73 -34
View File
@@ -52,6 +52,14 @@ class Tokenizer {
this.colPos = 0; this.colPos = 0;
this.rowPos = 0; this.rowPos = 0;
this.index = 0; this.index = 0;
this.tokenIndex = -1;
this.tokens = [];
}
appendToken(token) {
this.tokenIndex++;
this.tokens[this.tokenIndex] = token;
} }
tokenize() { tokenize() {
@@ -60,30 +68,67 @@ class Tokenizer {
let attrNameToken = null; let attrNameToken = null;
let attrValueToken = null; let attrValueToken = null;
let attrTokens = []; let attrTokens = [];
let tokens = new Array(Math.floor(this.buffer.length / 2)); this.tokens = new Array(Math.floor(this.buffer.length / 2));
let tokenIndex = -1;
const flushWord = () => { const flushWord = () => {
if (wordToken && wordToken[TOKEN.VALUE_ID]) { if (wordToken && wordToken[TOKEN.VALUE_ID]) {
tokenIndex++; this.appendToken(wordToken);
tokens[tokenIndex] = wordToken;
wordToken = this.createWordToken('') wordToken = this.createWordToken('')
} }
}; };
const createWord = (value, line, row) => {
if (!wordToken) {
wordToken = this.createWordToken(value, line, row)
}
};
const flushTag = () => { const flushTag = () => {
if (tagToken !== null) { if (tagToken !== null) {
// [] and [=] tag case
if (!tagToken[TOKEN.VALUE_ID]) {
const value = attrValueToken ? getChar(CHAR.EQ) : '';
const word = getChar(CHAR.OPEN_BRAKET) + value + getChar(CHAR.CLOSE_BRAKET);
createWord('', 0, 0);
wordToken[TOKEN.VALUE_ID] += word;
tagToken = null;
if (attrValueToken) {
attrValueToken = null
}
return;
}
if (attrNameToken && !attrValueToken) { if (attrNameToken && !attrValueToken) {
tagToken[TOKEN.VALUE_ID] += SPACE + attrNameToken[TOKEN.VALUE_ID] tagToken[TOKEN.VALUE_ID] += SPACE + attrNameToken[TOKEN.VALUE_ID];
attrNameToken = null attrNameToken = null
} }
tokenIndex++; this.appendToken(tagToken);
tokens[tokenIndex] = tagToken;
tagToken = null; tagToken = null;
} }
}; };
const flushUnclosedTag = () => {
if (tagToken !== null) {
const value = tagToken[TOKEN.VALUE_ID] + (attrValueToken ? getChar(CHAR.EQ) : '');
tagToken[TOKEN.TYPE_ID] = TOKEN.TYPE_WORD;
tagToken[TOKEN.VALUE_ID] = getChar(CHAR.OPEN_BRAKET) + value;
this.appendToken(tagToken);
tagToken = null;
if (attrValueToken) {
attrValueToken = null
}
}
};
const flushAttrNames = () => { const flushAttrNames = () => {
if (attrNameToken) { if (attrNameToken) {
attrTokens.push(attrNameToken); attrTokens.push(attrNameToken);
@@ -98,11 +143,7 @@ class Tokenizer {
const flushAttrs = () => { const flushAttrs = () => {
if (attrTokens.length) { if (attrTokens.length) {
attrTokens.forEach(attrToken => { attrTokens.forEach(this.appendToken.bind(this));
tokenIndex++;
tokens[tokenIndex] = attrToken
});
attrTokens = []; attrTokens = [];
} }
}; };
@@ -122,16 +163,14 @@ class Tokenizer {
} else { } else {
const spaceCode = charCode === CHAR.TAB ? SPACE_TAB : SPACE; const spaceCode = charCode === CHAR.TAB ? SPACE_TAB : SPACE;
tokenIndex++; this.appendToken(this.createSpaceToken(spaceCode));
tokens[tokenIndex] = this.createSpaceToken(spaceCode);
} }
this.colPos++; this.colPos++;
break; break;
case CHAR.N: case CHAR.N:
flushWord(); flushWord();
tokenIndex++; this.appendToken(this.createNewLineToken(getChar(charCode)));
tokens[tokenIndex] = this.createNewLineToken(getChar(charCode));
this.rowPos++; this.rowPos++;
this.colPos = 0; this.colPos = 0;
@@ -180,9 +219,7 @@ class Tokenizer {
} else if (tagToken) { } else if (tagToken) {
tagToken[TOKEN.VALUE_ID] += getChar(charCode) tagToken[TOKEN.VALUE_ID] += getChar(charCode)
} else { } else {
if (!wordToken) { createWord();
wordToken = this.createWordToken('')
}
wordToken[TOKEN.VALUE_ID] += getChar(charCode); wordToken[TOKEN.VALUE_ID] += getChar(charCode);
} }
@@ -195,39 +232,41 @@ class Tokenizer {
} }
flushWord(); flushWord();
flushUnclosedTag();
tokens.length = tokenIndex + 1; this.tokens.length = this.tokenIndex + 1;
return tokens; return this.tokens;
} }
createWordToken(value) { createWordToken(value = '', line = this.colPos, row = this.rowPos) {
return [TOKEN.TYPE_WORD, value, this.colPos, this.rowPos] return [TOKEN.TYPE_WORD, value, line, row]
} }
createTagToken(value) { createTagToken(value, line = this.colPos, row = this.rowPos) {
return [TOKEN.TYPE_TAG, value, this.colPos, this.rowPos] return [TOKEN.TYPE_TAG, value, line, row]
} }
createAttrNameToken(value) { createAttrNameToken(value, line = this.colPos, row = this.rowPos) {
return [TOKEN.TYPE_ATTR_NAME, value, this.colPos, this.rowPos] return [TOKEN.TYPE_ATTR_NAME, value, line, row]
} }
createAttrValueToken(value) { createAttrValueToken(value, line = this.colPos, row = this.rowPos) {
return [TOKEN.TYPE_ATTR_VALUE, value, this.colPos, this.rowPos] return [TOKEN.TYPE_ATTR_VALUE, value, line, row]
} }
createSpaceToken(value) { createSpaceToken(value, line = this.colPos, row = this.rowPos) {
return [TOKEN.TYPE_SPACE, value, this.colPos, this.rowPos] return [TOKEN.TYPE_SPACE, value, line, row]
} }
createNewLineToken(value) { createNewLineToken(value, line = this.colPos, row = this.rowPos) {
return [TOKEN.TYPE_NEW_LINE, value, this.colPos, this.rowPos] return [TOKEN.TYPE_NEW_LINE, value, line, row]
} }
} }
// warm up tokenizer to elimitate code branches that never execute // warm up tokenizer to elimitate code branches that never execute
new Tokenizer(`[b param="hello"]Sample text[/b]\n\t[Chorus]`).tokenize(); new Tokenizer(`[sc=asdasd`).tokenize();
//new Tokenizer(`[b param="hello"]Sample text[/b]\n\t[Chorus]`).tokenize();
module.exports = Tokenizer; module.exports = Tokenizer;
module.exports.CHAR = CHAR; module.exports.CHAR = CHAR;
+49
View File
@@ -91,5 +91,54 @@ describe("Tokenizer", () => {
[TYPE.NEW_LINE, '\n', 14, 3], [TYPE.NEW_LINE, '\n', 14, 3],
[TYPE.TAG, '/list', 0, 4] [TYPE.TAG, '/list', 0, 4]
]) ])
});
test("tokenize bad tags as texts", () => {
const inputs = [
'[]',
'[=]',
'![](image.jpg)',
'x html([a. title][, alt][, classes]) x',
'[/y]',
'[sc',
'[sc / [/sc]',
'[sc arg="val',
];
const asserts = [
[[TYPE.WORD, '[]', 0, 0]],
[[TYPE.WORD, '[=]', 0, 0]],
[
[TYPE.WORD, '!', 0, 0],
[TYPE.WORD, '[](image.jpg)', 1, 0]
],
[
[TYPE.WORD, "x", 0, 0],
[TYPE.SPACE, " ", 1, 0],
[TYPE.WORD, "html(", 1, 0],
[TYPE.TAG, "a. title", 7, 0],
[TYPE.TAG, ", alt", 17, 0],
[TYPE.TAG, ", classes", 24, 0],
[TYPE.WORD, ")", 7, 0],
[TYPE.SPACE, " ", 36, 0],
[TYPE.WORD, "x", 36, 0]
],
[[TYPE.TAG, "/y", 0, 0]],
[[TYPE.WORD, '[sc', 0, 0]],
[
[TYPE.WORD, '[sc', 0, 0],
[TYPE.SPACE, ' ', 0, 0],
[TYPE.WORD, '/', 0, 0],
[TYPE.SPACE, ' ', 0, 0],
[TYPE.WORD, '[/sc]', 0, 0]
],
];
inputs.forEach((input, idx) => {
const tokens = new Tokenizer(input).tokenize();
expect(tokens).toBeInstanceOf(Array);
expect(tokens).toEqual(asserts[idx])
});
}) })
}); });
+23 -25
View File
@@ -1,5 +1,4 @@
const parse = require('./index'); const parse = require('./index');
const OldParser = require('./benchmark/OldParser');
const options = { const options = {
allowOnlyTags: ['ch', 'syllable', 'tab'], allowOnlyTags: ['ch', 'syllable', 'tab'],
@@ -12,28 +11,27 @@ describe("parse", () => {
expect(ast).toEqual([{tag: 'Verse 2', attrs: {}, content: []}]); expect(ast).toEqual([{tag: 'Verse 2', attrs: {}, content: []}]);
}); });
test("same as old parser", () => { // test("pass invalid tags", () => {
const input = `[Verse 2] // const inputs = [
[ch]Eb[/ch] [ch]Fm[/ch] // '[]',
I'm walking around // '![](image.jpg)',
[ch]Ab[/ch] [ch]Cm[/ch] // 'x html([a. title][, alt][, classes]) x',
With my little raincloud // '[/y]',
[ch]Eb[/ch] [ch]Fm[/ch] // '[sc',
Hanging over my head // '[sc / [/sc]',
[ch]Cm[/ch] [ch]Ab[/ch] // '[sc arg="val',
And it aint coming down // ];
[ch]Eb[/ch] [ch]Fm[/ch] //
Where do I go? // const ast1 = parse(inputs[0]);
[ch]Ab[/ch] [ch]Cm[/ch] //
Gimme some sort of sign //
[ch]Eb[/ch] [ch]Fm[/ch] //
Hit me with lightning! // console.log('ast1', ast1);
[ch]Cm[/ch] [ch]Ab[/ch] //
Maybe Ill come alive //
`; //
const ast1 = parse(input, options); // expect(ast1).toEqual([
const ast2 = OldParser.parse(input); //
// ]);
expect(ast1).toEqual(ast2); // })
})
}); });