2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00

quoted bbcode params support, support escape backslash

This commit is contained in:
Nikolay Kostyurin
2018-07-06 00:33:49 +02:00
parent e1e9874642
commit 12144415cc
6 changed files with 144 additions and 18 deletions
+15 -2
View File
@@ -1,2 +1,15 @@
# bbob-parser
Fast BB Code parser written in pure javascript, no dependencies
# @bbob/parser
Parses BBCode and returns AST Tree looks like
```json
[
{
tag: 'url',
attrs: {
url: 'https://github.com/JiLiZART/bbob/tree/master/packages/bbob-parser'
},
content: ['hello', ' ', 'world!']
}
]
```
+4
View File
@@ -61,6 +61,10 @@ class Token {
this.row = Number(row);
}
isEmpty() {
return !!this.type;
}
isText() {
return isTextToken(this);
}
+78 -16
View File
@@ -4,6 +4,7 @@ const {
CLOSE_BRAKET, EQ, TAB, SPACE, N, QUOTEMARK,
PLACEHOLDER_SPACE, PLACEHOLDER_SPACE_TAB,
SLASH,
BACKSLASH,
} = require('./char');
const Token = require('./Token');
@@ -41,6 +42,15 @@ class Tokenizer {
this.emitToken(token);
}
skipChar(num) {
this.index += num;
this.colPos += num;
}
seekChar(num) {
return this.buffer.charCodeAt(this.index + num);
}
nextCol() {
this.colPos += 1;
}
@@ -115,6 +125,7 @@ class Tokenizer {
}
if (this.attrValueToken[Token.TYPE_ID]) {
delete this.attrValueToken.quoted;
this.attrTokens.push(this.attrValueToken);
this.attrValueToken = this.dummyToken;
}
@@ -129,12 +140,16 @@ class Tokenizer {
charSPACE(charCode) {
this.flushWord();
const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE;
if (this.tagToken[Token.TYPE_ID]) {
this.attrNameToken = this.createAttrNameToken('');
if (this.inTag()) {
if (this.inAttrValue() && this.attrValueToken.quoted) {
this.attrValueToken[Token.VALUE_ID] += spaceCode;
} else {
this.flushAttrNames();
this.attrNameToken = this.createAttrNameToken('');
}
} else {
const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE;
this.appendToken(this.createSpaceToken(spaceCode));
}
this.nextCol();
@@ -156,16 +171,23 @@ class Tokenizer {
}
charCLOSEBRAKET() {
this.nextCol();
this.flushTag();
this.flushAttrNames();
this.flushAttrs();
this.nextCol();
}
charEQ(charCode) {
if (this.tagToken[Token.TYPE_ID]) {
const nextCharCode = this.seekChar(1);
const isNextQuotemark = nextCharCode === QUOTEMARK;
if (this.inTag()) {
this.attrValueToken = this.createAttrValueToken('');
if (isNextQuotemark) {
this.attrValueToken.quoted = true;
this.skipChar(1);
}
} else {
this.wordToken[Token.VALUE_ID] += getChar(charCode);
}
@@ -174,7 +196,13 @@ class Tokenizer {
}
charQUOTEMARK(charCode) {
if (this.attrValueToken[Token.TYPE_ID] && this.attrValueToken[Token.VALUE_ID] > 0) {
const prevCharCode = this.seekChar(-1);
const isPrevBackslash = prevCharCode === BACKSLASH;
if (this.inAttrValue() &&
this.attrValueToken[Token.VALUE_ID] &&
this.attrValueToken.quoted &&
!isPrevBackslash) {
this.flushAttrNames();
} else if (this.tagToken[Token.TYPE_ID] === '') {
this.wordToken[Token.VALUE_ID] += getChar(charCode);
@@ -183,13 +211,31 @@ class Tokenizer {
this.nextCol();
}
charBACKSLASH() {
const nextCharCode = this.seekChar(1);
const isNextQuotemark = nextCharCode === QUOTEMARK;
if (this.inAttrValue() &&
this.attrValueToken[Token.VALUE_ID] &&
this.attrValueToken.quoted &&
isNextQuotemark
) {
this.attrValueToken[Token.VALUE_ID] += getChar(nextCharCode);
this.skipChar(1);
}
this.nextCol();
}
charWORD(charCode) {
if (this.tagToken[Token.TYPE_ID] && this.attrValueToken[Token.TYPE_ID]) {
this.attrValueToken[Token.VALUE_ID] += getChar(charCode);
} else if (this.tagToken[Token.TYPE_ID] && this.attrNameToken[Token.TYPE_ID]) {
this.attrNameToken[Token.VALUE_ID] += getChar(charCode);
} else if (this.tagToken[Token.TYPE_ID]) {
this.tagToken[Token.VALUE_ID] += getChar(charCode);
if (this.inTag()) {
if (this.inAttrValue()) {
this.attrValueToken[Token.VALUE_ID] += getChar(charCode);
} else if (this.inAttrName()) {
this.attrNameToken[Token.VALUE_ID] += getChar(charCode);
} else {
this.tagToken[Token.VALUE_ID] += getChar(charCode);
}
} else {
this.createWord();
@@ -214,11 +260,11 @@ class Tokenizer {
break;
case OPEN_BRAKET:
this.charOPENBRAKET();
this.charOPENBRAKET(charCode);
break;
case CLOSE_BRAKET:
this.charCLOSEBRAKET();
this.charCLOSEBRAKET(charCode);
break;
case EQ:
@@ -229,6 +275,10 @@ class Tokenizer {
this.charQUOTEMARK(charCode);
break;
case BACKSLASH:
this.charBACKSLASH(charCode);
break;
default:
this.charWORD(charCode);
break;
@@ -245,6 +295,18 @@ class Tokenizer {
return this.tokens;
}
inTag() {
return this.tagToken[Token.TYPE_ID];
}
inAttrValue() {
return this.attrValueToken[Token.TYPE_ID];
}
inAttrName() {
return this.attrNameToken[Token.TYPE_ID];
}
createWordToken(value = '', line = this.colPos, row = this.rowPos) {
return createTokenOfType(Token.TYPE_WORD, value, line, row);
}
+2
View File
@@ -11,6 +11,7 @@ const OPEN_BRAKET = '['.charCodeAt(0);
const CLOSE_BRAKET = ']'.charCodeAt(0);
const SLASH = '/'.charCodeAt(0);
const BACKSLASH = '\\'.charCodeAt(0);
const PLACEHOLDER_SPACE_TAB = ' ';
const PLACEHOLDER_SPACE = ' ';
@@ -31,4 +32,5 @@ module.exports = {
SLASH,
PLACEHOLDER_SPACE_TAB,
PLACEHOLDER_SPACE,
BACKSLASH,
};
+17
View File
@@ -36,4 +36,21 @@ describe('Parser', () => {
},
]);
});
test('parse tag with quoted param with spaces', () => {
const ast = parse('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]');
expect(ast).toBeInstanceOf(Array);
expect(ast).toEqual([
{
tag: 'url',
attrs: {
href: 'https://ru.wikipedia.org',
target: '_blank',
text: 'Foo Bar',
},
content: ['Text'],
},
]);
});
});
@@ -47,6 +47,34 @@ describe('Tokenizer', () => {
expectOutput(output, tokens);
});
test('tokenize tag with quotemark params with spaces', () => {
const input = '[url text="Foo Bar"]Text[/url]';
const tokens = tokenize(input);
const output = [
[TYPE.TAG, 'url', '0', '0'],
[TYPE.ATTR_NAME, 'text', '4', '0'],
[TYPE.ATTR_VALUE, 'Foo Bar', '9', '0'],
[TYPE.WORD, 'Text', '20', '0'],
[TYPE.TAG, '/url', '24', '0'],
];
expectOutput(output, tokens);
});
test('tokenize tag with escaped quotemark param', () => {
const input = `[url text="Foo \\"Bar"]Text[/url]`;
const tokens = tokenize(input);
const output = [
[TYPE.TAG, 'url', '0', '0'],
[TYPE.ATTR_NAME, 'text', '4', '0'],
[TYPE.ATTR_VALUE, 'Foo "Bar', '9', '0'],
[TYPE.WORD, 'Text', '22', '0'],
[TYPE.TAG, '/url', '26', '0'],
];
expectOutput(output, tokens);
});
test('tokenize tag param without quotemarks', () => {
const input = '[style color=#ff0000]Text[/style]';
const tokens = tokenize(input);