2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00

feat(parser): custom open and close tags support, html tags tests (#3)

This commit is contained in:
Nikolay Kostyurin
2018-09-18 23:41:52 +02:00
committed by GitHub
parent f5fd078eca
commit 790825af30
9 changed files with 268 additions and 100 deletions
+3 -2
View File
@@ -7,7 +7,7 @@
"lint": "lerna run build && lerna run link && lerna run lint"
},
"author": {
"name": "Nikolay Kostyurin",
"name": "Nikolay Kostyurin <jilizart@gmail.com>",
"url": "https://artkost.ru/"
},
"license": "MIT",
@@ -15,10 +15,10 @@
"babel-cli": "^6.26.0",
"babel-core": "^6.26.3",
"babel-jest": "^23.4.2",
"babel-plugin-transform-object-rest-spread": "^6.26.0",
"babel-plugin-external-helpers": "^6.22.0",
"babel-plugin-transform-decorators-legacy": "^1.3.5",
"babel-plugin-transform-es2015-modules-commonjs": "^6.26.2",
"babel-plugin-transform-object-rest-spread": "^6.26.0",
"babel-preset-env": "^1.7.0",
"babel-preset-es2015": "^6.24.1",
"babel-preset-react": "^6.24.1",
@@ -37,6 +37,7 @@
"lint-staged": "^7.2.2",
"microtime": "^2.1.8",
"posthtml-render": "^1.1.4",
"rimraf": "^2.6.2",
"rollup": "^0.62.0",
"rollup-plugin-babel": "^3.0.7",
"rollup-plugin-commonjs": "^9.1.6",
+3 -4
View File
@@ -3,16 +3,15 @@ import {render} from '../src';
const process = (input, params) => {
const ast = parse(input);
const html = render(ast, params);
return html
return render(ast, params)
};
describe('@bbob/html', () => {
test('render bbcode tag with single param as html tag', () => {
const input = '[url=https://ru.wikipedia.org]Text[/url]';
const expected = '<url url="https://ru.wikipedia.org">Text</url>';
const result = process(input)
const expected = '<url https://ru.wikipedia.org="https://ru.wikipedia.org">Text</url>';
const result = process(input);
expect(result).toBe(expected);
});
+5 -5
View File
@@ -53,14 +53,14 @@ const convertTagToText = (token) => {
class Token {
constructor(type, value, line, row) {
this.type = String(type);
this.value = String(value);
this.line = Number(line);
this.row = Number(row);
this[TOKEN_TYPE_ID] = String(type);
this[TOKEN_VALUE_ID] = String(value);
this[TOKEN_LINE_ID] = Number(line);
this[TOKEN_COLUMN_ID] = Number(row);
}
isEmpty() {
return !!this.type;
return !!this[TOKEN_TYPE_ID];
}
isText() {
+49 -33
View File
@@ -13,13 +13,7 @@ import {
import { Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD } from './Token';
const RESERVED_CHARS = [CLOSE_BRAKET, OPEN_BRAKET, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N];
const NOT_CHAR_TOKENS = [OPEN_BRAKET, SPACE, TAB, N];
const WHITESPACES = [SPACE, TAB];
const isCharReserved = char => (RESERVED_CHARS.indexOf(char) >= 0);
const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
const EM = '!';
const createCharGrabber = (source) => {
let idx = 0;
@@ -69,6 +63,19 @@ function createLexer(buffer, options = {}) {
let tokenIndex = -1;
const tokens = new Array(Math.floor(buffer.length));
const openTag = options.openTag || OPEN_BRAKET;
const closeTag = options.closeTag || CLOSE_BRAKET;
const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
const NOT_CHAR_TOKENS = [openTag, SPACE, TAB, N];
const WHITESPACES = [SPACE, TAB];
const SPECIAL_CHARS = [EQ, SPACE, TAB];
const isCharReserved = char => (RESERVED_CHARS.indexOf(char) >= 0);
const isWhiteSpace = char => (WHITESPACES.indexOf(char) >= 0);
const isCharToken = char => (NOT_CHAR_TOKENS.indexOf(char) === -1);
const isSpecialChar = char => (SPECIAL_CHARS.indexOf(char) >= 0);
const emitToken = (token) => {
if (options.onToken) {
options.onToken(token);
@@ -80,37 +87,46 @@ function createLexer(buffer, options = {}) {
const parseAttrs = (str) => {
let tagName = null;
let skipSpaces = false;
let skipSpecialChars = false;
const attrTokens = [];
const attrCharGrabber = createCharGrabber(str);
const validAttr = (val) => {
const isEQ = val === EQ;
const isWS = isWhiteSpace(val);
const isPrevSLASH = attrCharGrabber.getPrev() === SLASH;
if (tagName === null) {
return !(isEQ || isWS || attrCharGrabber.isLast());
const validAttr = (char) => {
const isEQ = char === EQ;
const isWS = isWhiteSpace(char);
const prevChar = attrCharGrabber.getPrev();
const nextChar = attrCharGrabber.getNext();
const isPrevSLASH = prevChar === BACKSLASH;
const isTagNameEmpty = tagName === null;
if (isTagNameEmpty) {
return (isEQ || isWS || attrCharGrabber.isLast()) === false;
}
if (skipSpaces && isWS) {
if (skipSpecialChars && isSpecialChar(char)) {
return true;
}
if (val === QUOTEMARK && !isPrevSLASH) {
skipSpaces = !skipSpaces;
if (char === QUOTEMARK && !isPrevSLASH) {
skipSpecialChars = !skipSpecialChars;
if (!skipSpecialChars && !(nextChar === EQ || isWhiteSpace(nextChar))) {
return false;
}
}
return !(isEQ || isWS);
return (isEQ || isWS) === false;
};
const nextAttr = () => {
const attrStr = attrCharGrabber.grabWhile(validAttr);
const currChar = attrCharGrabber.getCurr();
// first string before space is a tag name
if (tagName === null) {
tagName = attrStr;
} else if (isWhiteSpace(attrCharGrabber.getCurr()) || !attrCharGrabber.hasNext()) {
} else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) {
const escaped = unquote(trimChar(attrStr, QUOTEMARK));
attrTokens.push(createToken(TYPE_ATTR_VALUE, escaped, row, col));
} else {
@@ -127,29 +143,29 @@ function createLexer(buffer, options = {}) {
return { tag: tagName, attrs: attrTokens };
};
const grabber = createCharGrabber(buffer);
const bufferGrabber = createCharGrabber(buffer);
const next = () => {
const char = grabber.getCurr();
const char = bufferGrabber.getCurr();
if (char === N) {
grabber.skip();
bufferGrabber.skip();
col = 0;
row++;
emitToken(createToken(TYPE_NEW_LINE, char, row, col));
} else if (isWhiteSpace(char)) {
const str = grabber.grabWhile(isWhiteSpace);
const str = bufferGrabber.grabWhile(isWhiteSpace);
emitToken(createToken(TYPE_SPACE, str, row, col));
} else if (char === OPEN_BRAKET) {
const nextChar = grabber.getNext();
grabber.skip(); // skip [
} else if (char === openTag) {
const nextChar = bufferGrabber.getNext();
bufferGrabber.skip(); // skip [
if (isCharReserved(nextChar)) {
emitToken(createToken(TYPE_WORD, char, row, col));
} else {
const str = grabber.grabWhile(val => val !== CLOSE_BRAKET);
grabber.skip(); // skip ]
const str = bufferGrabber.grabWhile(val => val !== closeTag);
bufferGrabber.skip(); // skip ]
if (!(str.indexOf(EQ) > 0) || str[0] === SLASH) {
emitToken(createToken(TYPE_TAG, str, row, col));
@@ -160,19 +176,19 @@ function createLexer(buffer, options = {}) {
parsed.attrs.map(emitToken);
}
}
} else if (char === CLOSE_BRAKET) {
grabber.skip();
} else if (char === closeTag) {
bufferGrabber.skip();
emitToken(createToken(TYPE_WORD, char, row, col));
} else if (isCharToken(char)) {
const str = grabber.grabWhile(isCharToken);
const str = bufferGrabber.grabWhile(isCharToken);
emitToken(createToken(TYPE_WORD, str, row, col));
}
};
const tokenize = () => {
while (grabber.hasNext()) {
while (bufferGrabber.hasNext()) {
next();
}
@@ -182,7 +198,7 @@ function createLexer(buffer, options = {}) {
};
const isTokenNested = (token) => {
const value = OPEN_BRAKET + SLASH + token.getValue();
const value = openTag + SLASH + token.getValue();
return buffer.indexOf(value) > -1;
};
+23 -11
View File
@@ -28,8 +28,6 @@ let tokenizer = null;
// eslint-disable-next-line no-unused-vars
let tokens = null;
const createTokenizer = (input, onToken) => createLexer(input, { onToken });
/**
* @private
* @param token
@@ -41,7 +39,7 @@ const isTagNested = token => tokenizer.isTokenNested(token);
* @private
* @return {TagNode}
*/
const getTagNode = () => (tagNodes.length ? tagNodes[tagNodes.length - 1] : null);
const getLastTagNode = () => (tagNodes.length ? tagNodes[tagNodes.length - 1] : null);
/**
* @private
@@ -61,7 +59,7 @@ const createTagNodeAttrName = token => tagNodesAttrName.push(token.getValue());
* @return {Array}
*/
const getTagNodeAttrName = () =>
(tagNodesAttrName.length ? tagNodesAttrName[tagNodesAttrName.length - 1] : getTagNode().tag);
(tagNodesAttrName.length ? tagNodesAttrName[tagNodesAttrName.length - 1] : null);
/**
* @private
@@ -92,6 +90,7 @@ const clearTagNode = () => {
const getNodes = () => {
if (nestedNodes.length) {
const nestedNode = nestedNodes[nestedNodes.length - 1];
return nestedNode.content;
}
@@ -127,9 +126,9 @@ const handleTagStart = (token) => {
createTagNode(token);
if (isTagNested(token)) {
nestedNodes.push(getTagNode());
nestedNodes.push(getLastTagNode());
} else {
appendNode(getTagNode());
appendNode(getLastTagNode());
clearTagNode();
}
}
@@ -151,6 +150,7 @@ const handleTagEnd = (token) => {
const tag = token.getValue();
const line = token.getLine();
const column = token.getColumn();
options.onError({
message: `Inconsistent tag '${tag}' on line ${line} and column ${column}`,
lineNumber: line,
@@ -183,15 +183,22 @@ const handleTagToken = (token) => {
* @param {Token} token
*/
const handleTagNode = (token) => {
const tagNode = getTagNode();
const tagNode = getLastTagNode();
if (tagNode) {
if (token.isAttrName()) {
createTagNodeAttrName(token);
tagNode.attr(getTagNodeAttrName(), null);
tagNode.attr(getTagNodeAttrName(), '');
} else if (token.isAttrValue()) {
tagNode.attr(getTagNodeAttrName(), token.getValue());
clearTagNodeAttrName();
const attrName = getTagNodeAttrName();
const attrValue = token.getValue();
if (attrName) {
tagNode.attr(getTagNodeAttrName(), attrValue);
clearTagNodeAttrName();
} else {
tagNode.attr(attrValue, attrValue);
}
} else if (token.isText()) {
tagNode.append(token.getValue());
}
@@ -215,7 +222,12 @@ const parseToken = (token) => {
*/
const parse = (input, opts = {}) => {
options = opts;
tokenizer = (opts.createTokenizer ? opts.createTokenizer : createTokenizer)(input, parseToken);
tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, {
onToken: parseToken,
onlyAllowTags: options.onlyAllowTags,
openTag: options.openTag,
closeTag: options.closeTag,
});
nodes = [];
nestedNodes = [];
+86 -14
View File
@@ -14,15 +14,16 @@ const tokenize = input => (createLexer(input).tokenize());
describe('lexer', () => {
const expectOutput = (output, tokens) => {
expect(tokens.length).toBe(output.length);
expect(tokens).toBeInstanceOf(Array);
output.forEach((token, idx) => {
expect(tokens[idx]).toBeInstanceOf(Object);
expect(tokens[idx].type).toEqual(token[0]);
expect(tokens[idx].value).toEqual(token[1]);
tokens.forEach((token, idx) => {
expect(token).toBeInstanceOf(Object);
expect(token.type).toEqual(output[idx][0]);
expect(token.value).toEqual(output[idx][1]);
});
};
test('tokenize single tag', () => {
test('single tag', () => {
const input = '[SingleTag]';
const tokens = tokenize(input);
const output = [
@@ -32,7 +33,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('tokenize single tag with spaces', () => {
test('single tag with spaces', () => {
const input = '[Single Tag]';
const tokens = tokenize(input);
@@ -43,7 +44,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('tokenize string with quotemarks', () => {
test('string with quotemarks', () => {
const input = '"Someone Like You" by Adele';
const tokens = tokenize(input);
@@ -62,7 +63,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('tokenize tags in brakets', () => {
test('tags in brakets', () => {
const input = '[ [h1]G[/h1] ]';
const tokens = tokenize(input);
@@ -79,7 +80,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('tokenize tag as param', () => {
test('tag as param', () => {
const input = '[color="#ff0000"]Text[/color]';
const tokens = tokenize(input);
const output = [
@@ -92,7 +93,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('tokenize tag with quotemark params with spaces', () => {
test('tag with quotemark params with spaces', () => {
const input = '[url text="Foo Bar" text2="Foo Bar 2"]Text[/url]';
const tokens = tokenize(input);
const output = [
@@ -108,7 +109,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('tokenize tag with escaped quotemark param', () => {
test('tag with escaped quotemark param', () => {
const input = `[url text="Foo \\"Bar"]Text[/url]`;
const tokens = tokenize(input);
const output = [
@@ -122,7 +123,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('tokenize tag param without quotemarks', () => {
test('tag param without quotemarks', () => {
const input = '[style color=#ff0000]Text[/style]';
const tokens = tokenize(input);
const output = [
@@ -136,7 +137,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('tokenize list tag with items', () => {
test('list tag with items', () => {
const input = `[list]
[*] Item 1.
[*] Item 2.
@@ -174,7 +175,7 @@ describe('lexer', () => {
expectOutput(output, tokens);
});
test('tokenize bad tags as texts', () => {
test('bad tags as texts', () => {
const inputs = [
'[]',
'[=]',
@@ -234,4 +235,75 @@ describe('lexer', () => {
expectOutput(asserts[idx], tokens);
});
});
/*
test('bad unclosed tag', () => {
const input = `[Finger tapping; R.H. = Right Hand) Part A [Finger tapping (Right hand -15-, -16-)]`;
const tokens = tokenize(input);
const output = [];
expectOutput(output, tokens);
});
*/
describe('html', () => {
const tokenizeHTML = input => createLexer(input, { openTag: '<', closeTag: '>' }).tokenize();
test('Normal attributes', () => {
const content = `<button id="test0" class="value0" title="value1">class="value0" title="value1"</button>`;
const tokens = tokenizeHTML(content);
const output = [
[TYPE.TAG, 'button', 2, 0],
[TYPE.ATTR_NAME, 'id', 2, 0],
[TYPE.ATTR_VALUE, 'test0', 2, 0],
[TYPE.ATTR_NAME, 'class', 2, 0],
[TYPE.ATTR_VALUE, 'value0', 2, 0],
[TYPE.ATTR_NAME, 'title', 2, 0],
[TYPE.ATTR_VALUE, 'value1', 2, 0],
[TYPE.WORD, "class=\"value0\"", 2, 0],
[TYPE.SPACE, " ", 2, 0],
[TYPE.WORD, "title=\"value1\"", 2, 0],
[TYPE.TAG, '/button', 2, 0]
];
expectOutput(output, tokens);
});
test('Attributes with no quotes or value', () => {
const content = `<button id="test1" class=value2 disabled>class=value2 disabled</button>`;
const tokens = tokenizeHTML(content);
const output = [
[TYPE.TAG, 'button', 2, 0],
[TYPE.ATTR_NAME, 'id', 2, 0],
[TYPE.ATTR_VALUE, 'test1', 2, 0],
[TYPE.ATTR_NAME, 'class', 2, 0],
[TYPE.ATTR_VALUE, 'value2', 2, 0],
[TYPE.ATTR_VALUE, 'disabled', 2, 0],
[TYPE.WORD, "class=value2", 2, 0],
[TYPE.SPACE, " ", 2, 0],
[TYPE.WORD, "disabled", 2, 0],
[TYPE.TAG, '/button', 2, 0]
];
expectOutput(output, tokens);
});
test('Attributes with no space between them. No valid, but accepted by the browser', () => {
const content = `<button id="test2" class="value4"title="value5">class="value4"title="value5"</button>`;
const tokens = tokenizeHTML(content);
const output = [
[TYPE.TAG, 'button', 2, 0],
[TYPE.ATTR_NAME, 'id', 2, 0],
[TYPE.ATTR_VALUE, 'test2', 2, 0],
[TYPE.ATTR_NAME, 'class', 2, 0],
[TYPE.ATTR_VALUE, 'value4', 2, 0],
[TYPE.ATTR_NAME, 'title', 2, 0],
[TYPE.ATTR_VALUE, 'value5', 2, 0],
[TYPE.WORD, "class=\"value4\"title=\"value5\"", 2, 0],
[TYPE.TAG, '/button', 2, 0]
];
expectOutput(output, tokens);
});
})
});
+93 -29
View File
@@ -1,11 +1,15 @@
import { parse } from '../src'
describe('Parser', () => {
const expectOutput = (ast, output) => {
expect(ast).toBeInstanceOf(Array);
expect(ast).toEqual(output);
};
test('parse paired tags tokens', () => {
const ast = parse('[best name=value]Foo Bar[/best]');
expect(ast).toBeInstanceOf(Array);
expect(ast).toEqual([
expectOutput(ast, [
{
tag: 'best',
attrs: {
@@ -25,8 +29,7 @@ describe('Parser', () => {
onlyAllowTags: ['h1']
});
expect(ast).toBeInstanceOf(Array);
expect(ast).toEqual([
expectOutput(ast, [
{
tag: 'h1',
attrs: {
@@ -45,36 +48,32 @@ describe('Parser', () => {
test('parse inconsistent tags', () => {
const ast = parse('[h1 name=value]Foo [Bar] /h1]');
expect(ast).toBeInstanceOf(Array);
expect(ast).toEqual(
[
{
attrs: {},
tag: 'h1',
content: []
},
'Foo',
' ',
{
tag: 'Bar',
attrs: {},
content: []
},
' ',
'/h1]',
]
);
expectOutput(ast, [
{
attrs: {},
tag: 'h1',
content: []
},
'Foo',
' ',
{
tag: 'Bar',
attrs: {},
content: []
},
' ',
'/h1]',
]);
});
test('parse tag with value param', () => {
const ast = parse('[url=https://github.com/jilizart/bbob]BBob[/url]');
expect(ast).toBeInstanceOf(Array);
expect(ast).toEqual([
expectOutput(ast, [
{
tag: 'url',
attrs: {
url: 'https://github.com/jilizart/bbob',
'https://github.com/jilizart/bbob': 'https://github.com/jilizart/bbob',
},
content: ['BBob'],
},
@@ -84,8 +83,7 @@ describe('Parser', () => {
test('parse tag with quoted param with spaces', () => {
const ast = parse('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]');
expect(ast).toBeInstanceOf(Array);
expect(ast).toEqual([
expectOutput(ast, [
{
tag: 'url',
attrs: {
@@ -103,5 +101,71 @@ describe('Parser', () => {
const ast = parse('[c][/c][b]hello[/c][/b][b]', { onError });
expect(onError).toHaveBeenCalled();
})
});
describe('html', () => {
const parseHTML = input => parse(input, { openTag: '<', closeTag: '>' });
test('normal attributes', () => {
const content = `<button id="test0" class="value0" title="value1">class="value0" title="value1"</button>`;
const ast = parseHTML(content);
expectOutput(ast, [
{
"tag": "button",
"attrs": {
"id": "test0",
"class": "value0",
"title": "value1"
},
"content": [
"class=\"value0\"",
" ",
"title=\"value1\""
]
}
]);
});
test('attributes with no quotes or value', () => {
const content = `<button id="test1" class=value2 disabled required>class=value2 disabled</button>`;
const ast = parseHTML(content);
expectOutput(ast, [
{
"tag": "button",
"attrs": {
"id": "test1",
"class": "value2",
"disabled": "disabled",
"required": "required"
},
"content": [
"class=value2",
" ",
"disabled"
]
}
]);
});
test('attributes with no space between them. no valid, but accepted by the browser', () => {
const content = `<button id="test2" class="value4"title="value5">class="value4"title="value5"</button>`;
const ast = parseHTML(content);
expectOutput(ast, [
{
"tag": "button",
"attrs": {
"id": "test2",
"class": "value4",
"title": "value5"
},
"content": [
"class=\"value4\"title=\"value5\""
]
}
]);
});
});
});
+1 -1
View File
@@ -7,7 +7,7 @@ describe('posthtml-render', () => {
const ast = parse('[size=150][b]PostHTML render test[/b][/size]');
const html = render(ast);
expect(html).toBe('<size size="150"><b>PostHTML render test</b></size>')
expect(html).toBe('<size 150="150"><b>PostHTML render test</b></size>')
})
});
@@ -56,6 +56,10 @@ const asListItems = (content) => {
return [].concat(listItems);
};
const getUniqAttr = attrs => Object
.keys(attrs)
.reduce((res, key) => (attrs[key] === key ? attrs[key] : null), null);
export default {
b: node => ({
tag: 'span',
@@ -88,7 +92,7 @@ export default {
url: (node, { render }) => ({
tag: 'a',
attrs: {
href: node.attrs.url ? node.attrs.url : render(node.content),
href: getUniqAttr(node.attrs) ? getUniqAttr(node.attrs) : render(node.content),
},
content: node.content,
}),