2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00

refactor(parser): better jsdoc, some behavior fixes, more tests

— all operations on nodes moved to `createList` function
- fixed problem with single tags with value only like `[url=value]` fixes #6
- write tests for `Token` class
- moved all node arrays to parse func, now parser supports many instances
- add jsdoc to critical parts of the parser to better understanding how it works
This commit is contained in:
Nikolay Kostyurin
2019-03-04 02:24:12 +02:00
parent ef6a778f45
commit 8cb1d495dd
7 changed files with 379 additions and 220 deletions
-4
View File
@@ -41,10 +41,6 @@ const getTagName = (token) => {
const convertTagToText = (token) => {
let text = OPEN_BRAKET;
if (isTagEnd(token)) {
text += SLASH;
}
text += getTokenValue(token);
text += CLOSE_BRAKET;
-1
View File
@@ -164,7 +164,6 @@ function createLexer(buffer, options = {}) {
if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
emitToken(createToken(TYPE_WORD, currChar, row, col));
} else {
//
const str = bufferGrabber.grabWhile(val => val !== closeTag);
bufferGrabber.skip(); // skip closeTag
+183 -201
View File
@@ -1,151 +1,135 @@
import TagNode from '@bbob/plugin-helper/lib/TagNode';
import { createLexer } from './lexer';
import { createList } from './utils';
/**
* @private
* @type {Array}
*/
let nodes;
/**
* @private
* @type {Array}
*/
let nestedNodes;
/**
* @private
* @type {Array}
*/
let tagNodes;
/**
* @private
* @type {Array}
*/
let tagNodesAttrName;
let options = {};
let tokenizer = null;
// eslint-disable-next-line no-unused-vars
let tokens = null;
/**
* @private
* @param token
* @return {*}
*/
const isTagNested = token => tokenizer.isTokenNested(token);
/**
* @private
* @return {TagNode}
*/
const getLastTagNode = () => (tagNodes.length ? tagNodes[tagNodes.length - 1] : null);
/**
* @private
* @param {Token} token
* @public
* @param {String} input
* @param {Object} opts
* @param {Function} opts.createTokenizer
* @param {Array<string>} opts.onlyAllowTags
* @param {String} opts.openTag
* @param {String} opts.closeTag
* @return {Array}
*/
const createTagNode = token => tagNodes.push(TagNode.create(token.getValue()));
/**
* @private
* @param {Token} token
* @return {Array}
*/
const createTagNodeAttrName = token => tagNodesAttrName.push(token.getValue());
const parse = (input, opts = {}) => {
const options = opts;
/**
* @private
* @return {Array}
*/
const getTagNodeAttrName = () =>
(tagNodesAttrName.length ? tagNodesAttrName[tagNodesAttrName.length - 1] : null);
let tokenizer = null;
/**
* @private
* @return {Array}
*/
const clearTagNodeAttrName = () => {
if (tagNodesAttrName.length) {
tagNodesAttrName.pop();
}
};
/**
* Result AST of nodes
* @private
* @type {ItemList}
*/
const nodes = createList();
/**
* Temp buffer of nodes that's nested to another node
* @private
* @type {ItemList}
*/
const nestedNodes = createList();
/**
* Temp buffer of nodes [tag..]...[/tag]
* @private
* @type {ItemList}
*/
const tagNodes = createList();
/**
* Temp buffer of tag attributes
* @private
* @type {ItemList}
*/
const tagNodesAttrName = createList();
/**
* @private
* @return {Array}
*/
const clearTagNode = () => {
if (tagNodes.length) {
tagNodes.pop();
/**
* Cache for nested tags checks
* @type {{}}
*/
const nestedTagsMap = {};
clearTagNodeAttrName();
}
};
/**
* @private
* @return {Array}
*/
const getNodes = () => {
if (nestedNodes.length) {
const nestedNode = nestedNodes[nestedNodes.length - 1];
return nestedNode.content;
}
return nodes;
};
/**
* @private
* @param tag
*/
const appendNode = (tag) => {
getNodes().push(tag);
};
/**
* @private
* @param value
* @return {boolean}
*/
const isAllowedTag = (value) => {
if (options.onlyAllowTags && options.onlyAllowTags.length) {
return options.onlyAllowTags.indexOf(value) >= 0;
}
return true;
};
/**
* @private
* @param {Token} token
*/
const handleTagStart = (token) => {
if (token.isStart()) {
createTagNode(token);
if (isTagNested(token)) {
nestedNodes.push(getLastTagNode());
} else {
appendNode(getLastTagNode());
clearTagNode();
const isTokenNested = (token) => {
if (typeof nestedTagsMap[token.getValue()] === 'undefined') {
nestedTagsMap[token.getValue()] = tokenizer.isTokenNested(token);
}
}
};
/**
* @private
* @param {Token} token
*/
const handleTagEnd = (token) => {
if (token.isEnd()) {
clearTagNode();
return nestedTagsMap[token.getValue()];
};
const lastNestedNode = nestedNodes.pop();
const isTagNested = tagName => !!nestedTagsMap[tagName];
/**
* Flushes temp tag nodes and its attributes buffers
* @private
* @return {Array}
*/
const flushTagNodes = () => {
if (tagNodes.flushLast()) {
tagNodesAttrName.flushLast();
}
};
/**
* @private
* @return {Array}
*/
const getNodes = () => {
const lastNestedNode = nestedNodes.getLast();
return lastNestedNode ? lastNestedNode.content : nodes.toArray();
};
/**
* @private
* @param {TagNode} tag
*/
const appendNodes = (tag) => {
getNodes().push(tag);
};
/**
* @private
* @param {String} value
* @return {boolean}
*/
const isAllowedTag = (value) => {
if (options.onlyAllowTags && options.onlyAllowTags.length) {
return options.onlyAllowTags.indexOf(value) >= 0;
}
return true;
};
/**
* @private
* @param {Token} token
*/
const handleTagStart = (token) => {
flushTagNodes();
const tagNode = TagNode.create(token.getValue());
const isNested = isTokenNested(token);
tagNodes.push(tagNode);
if (isNested) {
nestedNodes.push(tagNode);
} else {
appendNodes(tagNode);
}
};
/**
* @private
* @param {Token} token
*/
const handleTagEnd = (token) => {
flushTagNodes();
const lastNestedNode = nestedNodes.flushLast();
if (lastNestedNode) {
appendNode(lastNestedNode);
appendNodes(lastNestedNode);
} else if (options.onError) {
const tag = token.getValue();
const line = token.getLine();
@@ -158,92 +142,90 @@ const handleTagEnd = (token) => {
columnNumber: column,
});
}
}
};
};
/**
* @private
* @param {Token} token
*/
const handleTagToken = (token) => {
if (token.isTag()) {
if (isAllowedTag(token.getName())) {
// [tag]
/**
* @private
* @param {Token} token
*/
const handleTag = (token) => {
// [tag]
if (token.isStart()) {
handleTagStart(token);
// [/tag]
handleTagEnd(token);
} else {
appendNode(token.toString());
}
}
};
/**
* @private
* @param {Token} token
*/
const handleTagNode = (token) => {
const tagNode = getLastTagNode();
// [/tag]
if (token.isEnd()) {
handleTagEnd(token);
}
};
if (tagNode) {
if (token.isAttrName()) {
createTagNodeAttrName(token);
tagNode.attr(getTagNodeAttrName(), '');
} else if (token.isAttrValue()) {
const attrName = getTagNodeAttrName();
const attrValue = token.getValue();
/**
* @private
* @param {Token} token
*/
const handleNode = (token) => {
/**
* @type {TagNode}
*/
const lastTagNode = tagNodes.getLast();
const tokenValue = token.getValue();
const isNested = isTagNested(token);
if (attrName) {
tagNode.attr(getTagNodeAttrName(), attrValue);
clearTagNodeAttrName();
} else {
tagNode.attr(attrValue, attrValue);
if (lastTagNode) {
if (token.isAttrName()) {
tagNodesAttrName.push(tokenValue);
lastTagNode.attr(tagNodesAttrName.getLast(), '');
} else if (token.isAttrValue()) {
const attrName = tagNodesAttrName.getLast();
if (attrName) {
lastTagNode.attr(attrName, tokenValue);
tagNodesAttrName.flushLast();
} else {
lastTagNode.attr(tokenValue, tokenValue);
}
} else if (token.isText()) {
if (isNested) {
lastTagNode.append(tokenValue);
} else {
appendNodes(tokenValue);
}
} else if (token.isTag()) {
// if tag is not allowed, just past it as is
appendNodes(token.toString());
}
} else if (token.isText()) {
tagNode.append(token.getValue());
appendNodes(tokenValue);
} else if (token.isTag()) {
// if tag is not allowed, just past it as is
appendNodes(token.toString());
}
} else if (token.isText()) {
appendNode(token.getValue());
}
};
};
/**
* @private
* @param token
*/
const parseToken = (token) => {
handleTagToken(token);
handleTagNode(token);
};
/**
* @private
* @param {Token} token
*/
const onToken = (token) => {
if (token.isTag() && isAllowedTag(token.getName())) {
handleTag(token);
} else {
handleNode(token);
}
};
/**
* @public
* @param input
* @param opts
* @param {Function} opts.createTokenizer
* @param {Array<string>} opts.onlyAllowTags
* @param {String} opts.openTag
* @param {String} opts.closeTag
* @return {Array}
*/
const parse = (input, opts = {}) => {
options = opts;
tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, {
onToken: parseToken,
onToken,
onlyAllowTags: options.onlyAllowTags,
openTag: options.openTag,
closeTag: options.closeTag,
});
nodes = [];
nestedNodes = [];
tagNodes = [];
tagNodesAttrName = [];
// eslint-disable-next-line no-unused-vars
const tokens = tokenizer.tokenize();
tokens = tokenizer.tokenize();
return nodes;
return nodes.toArray();
};
export { parse };
+52
View File
@@ -76,10 +76,12 @@ export const createCharGrabber = (source, { onSkip } = {}) => {
*/
export const trimChar = (str, charToRemove) => {
while (str.charAt(0) === charToRemove) {
// eslint-disable-next-line no-param-reassign
str = str.substring(1);
}
while (str.charAt(str.length - 1) === charToRemove) {
// eslint-disable-next-line no-param-reassign
str = str.substring(0, str.length - 1);
}
@@ -92,3 +94,53 @@ export const trimChar = (str, charToRemove) => {
* @return {String}
*/
export const unquote = str => str.replace(BACKSLASH + QUOTEMARK, QUOTEMARK);
/**
* @typedef {Object} ItemList
* @type {Object}
* @property {getLastCb} getLast
* @property {flushLastCb} flushLast
* @property {pushCb} push
* @property {toArrayCb} toArray
*/
/**
*
* @param values
* @return {ItemList}
*/
export const createList = (values = []) => {
const nodes = values;
/**
* @callback getLastCb
*/
const getLast = () => (nodes.length ? nodes[nodes.length - 1] : null);
/**
* @callback flushLastCb
* @return {*}
*/
const flushLast = () => {
if (nodes.length) {
return nodes.pop();
}
return false;
};
/**
* @callback pushCb
* @param value
*/
const push = value => nodes.push(value);
/**
* @callback toArrayCb
* @return {Array}
*/
return {
getLast,
flushLast,
push,
toArray: () => nodes,
};
};
+68
View File
@@ -0,0 +1,68 @@
import Token from '../src/Token'
describe('Token', () => {
test('isEmpty', () => {
const token = new Token();
expect(token.isEmpty()).toBeTruthy()
});
test('isText', () => {
const token = new Token('word');
expect(token.isText()).toBeTruthy();
});
test('isTag', () => {
const token = new Token('tag');
expect(token.isTag()).toBeTruthy();
});
test('isAttrName', () => {
const token = new Token('attr-name');
expect(token.isAttrName()).toBeTruthy();
});
test('isAttrValue', () => {
const token = new Token('attr-value');
expect(token.isAttrValue()).toBeTruthy();
});
test('isStart', () => {
const token = new Token('tag', 'my-tag');
expect(token.isStart()).toBeTruthy();
});
test('isEnd', () => {
const token = new Token('tag', '/my-tag');
expect(token.isEnd()).toBeTruthy();
});
test('getName', () => {
const token = new Token('tag', '/my-tag');
expect(token.getName()).toBe('my-tag');
});
test('getValue', () => {
const token = new Token('tag', '/my-tag');
expect(token.getValue()).toBe('/my-tag');
});
test('getLine', () => {
const token = new Token('tag', '/my-tag', 12);
expect(token.getLine()).toBe(12);
});
test('getColumn', () => {
const token = new Token('tag', '/my-tag', 12, 14);
expect(token.getColumn()).toBe(14);
});
test('toString', () => {
const tokenEnd = new Token('tag', '/my-tag', 12, 14);
expect(tokenEnd.toString()).toBe('[/my-tag]');
const tokenStart = new Token('tag', 'my-tag', 12, 14);
expect(tokenStart.toString()).toBe('[my-tag]');
});
});
+59 -13
View File
@@ -1,13 +1,13 @@
const Token = require('../src/Token');
const { createLexer } = require('../src/lexer');
import {TYPE_WORD, TYPE_TAG, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_SPACE, TYPE_NEW_LINE} from '../src/Token'
import { createLexer } from '../src/lexer'
const TYPE = {
WORD: Token.TYPE_WORD,
TAG: Token.TYPE_TAG,
ATTR_NAME: Token.TYPE_ATTR_NAME,
ATTR_VALUE: Token.TYPE_ATTR_VALUE,
SPACE: Token.TYPE_SPACE,
NEW_LINE: Token.TYPE_NEW_LINE,
WORD: TYPE_WORD,
TAG: TYPE_TAG,
ATTR_NAME: TYPE_ATTR_NAME,
ATTR_VALUE: TYPE_ATTR_VALUE,
SPACE: TYPE_SPACE,
NEW_LINE: TYPE_NEW_LINE,
};
const tokenize = input => (createLexer(input).tokenize());
@@ -33,6 +33,17 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('single tag with params', () => {
const input = '[user=111]';
const tokens = tokenize(input);
const output = [
[TYPE.TAG, 'user', '0', '0'],
[TYPE.ATTR_VALUE, '111', '0', '0'],
];
expectOutput(output, tokens);
});
test('single tag with spaces', () => {
const input = '[Single Tag]';
const tokens = tokenize(input);
@@ -245,7 +256,6 @@ describe('lexer', () => {
});
});
test('bad unclosed tag', () => {
const input = `[Finger Part A [Finger]`;
const tokens = tokenize(input);
@@ -278,11 +288,10 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
describe('html', () => {
const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();
test('Normal attributes', () => {
test('normal attributes', () => {
const content = `<button id="test0" class="value0" title="value1">class="value0" title="value1"</button>`;
const tokens = tokenizeHTML(content);
const output = [
@@ -302,7 +311,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('Attributes with no quotes or value', () => {
test('attributes with no quotes or value', () => {
const content = `<button id="test1" class=value2 disabled>class=value2 disabled</button>`;
const tokens = tokenizeHTML(content);
const output = [
@@ -321,7 +330,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('Attributes with no space between them. No valid, but accepted by the browser', () => {
test('attributes with no space between them. No valid, but accepted by the browser', () => {
const content = `<button id="test2" class="value4"title="value5">class="value4"title="value5"</button>`;
const tokens = tokenizeHTML(content);
const output = [
@@ -338,5 +347,42 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test.skip('style tag', () => {
const content = `<style type="text/css">
<!--
p{font-family:geneva,helvetica,arial,"lucida sans",sans-serif}
p{font-size:10pt}
p{color:#333}
span{font-family:geneva,helvetica,arial,"lucida sans",sans-serif}
span{font-size:10pt}
.sp2{font-size:2px}
div{font-family:geneva,helvetica,arial,"lucida sans",sans-serif}
div{font-size:10pt}
div.sitelinks{padding:0px 10px 0px 10px;font-size:9pt}
input{font-family:geneva,helvetica,arial,"lucida sans",sans-serif}
input{padding:0px;margin:0px;font-size:9pt}
input.medium{width:100px;height:18px}
input.buttonred{cursor:hand;font-family:verdana;background:#d12124;color:#fff;height:1.4em;font-weight:bold;font-size:9pt;padding:0px 2px;margin:0px;border:0px none #000}
-->
</style>`
const tokens = tokenizeHTML(content);
const output = [];
expectOutput(output, tokens);
});
test.skip('script tag', () => {
const content = `<script language="JavaScript" type="text/javascript">
<!--//
if ((navigator.platform=='MacPPC')&&(navigator.appVersion.substr(17,8) != "MSIE 5.0")) {document.write('<LINK rel="stylesheet" href="styles/main-3.css" type="text/css">')}
if (screen.width > 1200) {document.write('<LINK rel="stylesheet" href="styles/main-3.css" type="text/css">')}
//-->
</script>`;
const tokens = tokenizeHTML(content);
const output = [];
expectOutput(output, tokens);
})
})
});
+17 -1
View File
@@ -50,7 +50,9 @@ describe('Parser', () => {
expectOutput(ast, [
{
attrs: {},
attrs: {
name: 'value'
},
tag: 'h1',
content: []
},
@@ -96,6 +98,20 @@ describe('Parser', () => {
]);
});
test('parse single tag with params', () => {
const ast = parse('[url=https://github.com/jilizart/bbob]');
expectOutput(ast, [
{
tag: 'url',
attrs: {
'https://github.com/jilizart/bbob': 'https://github.com/jilizart/bbob',
},
content: [],
},
]);
});
test('detect inconsistent tag', () => {
const onError = jest.fn();
const ast = parse('[c][/c][b]hello[/c][/b][b]', { onError });