mirror of
https://github.com/tenrok/BBob.git
synced 2026-05-15 11:59:37 +03:00
quoted bbcode params support, support escape backslash
This commit is contained in:
@@ -1,2 +1,15 @@
|
||||
# bbob-parser
|
||||
Fast BB Code parser written in pure javascript, no dependencies
|
||||
# @bbob/parser
|
||||
|
||||
Parses BBCode and returns AST Tree looks like
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
tag: 'url',
|
||||
attrs: {
|
||||
url: 'https://github.com/JiLiZART/bbob/tree/master/packages/bbob-parser'
|
||||
},
|
||||
content: ['hello', ' ', 'world!']
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
@@ -61,6 +61,10 @@ class Token {
|
||||
this.row = Number(row);
|
||||
}
|
||||
|
||||
isEmpty() {
|
||||
return !!this.type;
|
||||
}
|
||||
|
||||
isText() {
|
||||
return isTextToken(this);
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ const {
|
||||
CLOSE_BRAKET, EQ, TAB, SPACE, N, QUOTEMARK,
|
||||
PLACEHOLDER_SPACE, PLACEHOLDER_SPACE_TAB,
|
||||
SLASH,
|
||||
BACKSLASH,
|
||||
} = require('./char');
|
||||
const Token = require('./Token');
|
||||
|
||||
@@ -41,6 +42,15 @@ class Tokenizer {
|
||||
this.emitToken(token);
|
||||
}
|
||||
|
||||
skipChar(num) {
|
||||
this.index += num;
|
||||
this.colPos += num;
|
||||
}
|
||||
|
||||
seekChar(num) {
|
||||
return this.buffer.charCodeAt(this.index + num);
|
||||
}
|
||||
|
||||
nextCol() {
|
||||
this.colPos += 1;
|
||||
}
|
||||
@@ -115,6 +125,7 @@ class Tokenizer {
|
||||
}
|
||||
|
||||
if (this.attrValueToken[Token.TYPE_ID]) {
|
||||
delete this.attrValueToken.quoted;
|
||||
this.attrTokens.push(this.attrValueToken);
|
||||
this.attrValueToken = this.dummyToken;
|
||||
}
|
||||
@@ -129,12 +140,16 @@ class Tokenizer {
|
||||
|
||||
charSPACE(charCode) {
|
||||
this.flushWord();
|
||||
const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE;
|
||||
|
||||
if (this.tagToken[Token.TYPE_ID]) {
|
||||
this.attrNameToken = this.createAttrNameToken('');
|
||||
if (this.inTag()) {
|
||||
if (this.inAttrValue() && this.attrValueToken.quoted) {
|
||||
this.attrValueToken[Token.VALUE_ID] += spaceCode;
|
||||
} else {
|
||||
this.flushAttrNames();
|
||||
this.attrNameToken = this.createAttrNameToken('');
|
||||
}
|
||||
} else {
|
||||
const spaceCode = charCode === TAB ? PLACEHOLDER_SPACE_TAB : PLACEHOLDER_SPACE;
|
||||
|
||||
this.appendToken(this.createSpaceToken(spaceCode));
|
||||
}
|
||||
this.nextCol();
|
||||
@@ -156,16 +171,23 @@ class Tokenizer {
|
||||
}
|
||||
|
||||
charCLOSEBRAKET() {
|
||||
this.nextCol();
|
||||
this.flushTag();
|
||||
this.flushAttrNames();
|
||||
this.flushAttrs();
|
||||
|
||||
this.nextCol();
|
||||
}
|
||||
|
||||
charEQ(charCode) {
|
||||
if (this.tagToken[Token.TYPE_ID]) {
|
||||
const nextCharCode = this.seekChar(1);
|
||||
const isNextQuotemark = nextCharCode === QUOTEMARK;
|
||||
|
||||
if (this.inTag()) {
|
||||
this.attrValueToken = this.createAttrValueToken('');
|
||||
|
||||
if (isNextQuotemark) {
|
||||
this.attrValueToken.quoted = true;
|
||||
this.skipChar(1);
|
||||
}
|
||||
} else {
|
||||
this.wordToken[Token.VALUE_ID] += getChar(charCode);
|
||||
}
|
||||
@@ -174,7 +196,13 @@ class Tokenizer {
|
||||
}
|
||||
|
||||
charQUOTEMARK(charCode) {
|
||||
if (this.attrValueToken[Token.TYPE_ID] && this.attrValueToken[Token.VALUE_ID] > 0) {
|
||||
const prevCharCode = this.seekChar(-1);
|
||||
const isPrevBackslash = prevCharCode === BACKSLASH;
|
||||
|
||||
if (this.inAttrValue() &&
|
||||
this.attrValueToken[Token.VALUE_ID] &&
|
||||
this.attrValueToken.quoted &&
|
||||
!isPrevBackslash) {
|
||||
this.flushAttrNames();
|
||||
} else if (this.tagToken[Token.TYPE_ID] === '') {
|
||||
this.wordToken[Token.VALUE_ID] += getChar(charCode);
|
||||
@@ -183,13 +211,31 @@ class Tokenizer {
|
||||
this.nextCol();
|
||||
}
|
||||
|
||||
charBACKSLASH() {
|
||||
const nextCharCode = this.seekChar(1);
|
||||
const isNextQuotemark = nextCharCode === QUOTEMARK;
|
||||
|
||||
if (this.inAttrValue() &&
|
||||
this.attrValueToken[Token.VALUE_ID] &&
|
||||
this.attrValueToken.quoted &&
|
||||
isNextQuotemark
|
||||
) {
|
||||
this.attrValueToken[Token.VALUE_ID] += getChar(nextCharCode);
|
||||
this.skipChar(1);
|
||||
}
|
||||
|
||||
this.nextCol();
|
||||
}
|
||||
|
||||
charWORD(charCode) {
|
||||
if (this.tagToken[Token.TYPE_ID] && this.attrValueToken[Token.TYPE_ID]) {
|
||||
this.attrValueToken[Token.VALUE_ID] += getChar(charCode);
|
||||
} else if (this.tagToken[Token.TYPE_ID] && this.attrNameToken[Token.TYPE_ID]) {
|
||||
this.attrNameToken[Token.VALUE_ID] += getChar(charCode);
|
||||
} else if (this.tagToken[Token.TYPE_ID]) {
|
||||
this.tagToken[Token.VALUE_ID] += getChar(charCode);
|
||||
if (this.inTag()) {
|
||||
if (this.inAttrValue()) {
|
||||
this.attrValueToken[Token.VALUE_ID] += getChar(charCode);
|
||||
} else if (this.inAttrName()) {
|
||||
this.attrNameToken[Token.VALUE_ID] += getChar(charCode);
|
||||
} else {
|
||||
this.tagToken[Token.VALUE_ID] += getChar(charCode);
|
||||
}
|
||||
} else {
|
||||
this.createWord();
|
||||
|
||||
@@ -214,11 +260,11 @@ class Tokenizer {
|
||||
break;
|
||||
|
||||
case OPEN_BRAKET:
|
||||
this.charOPENBRAKET();
|
||||
this.charOPENBRAKET(charCode);
|
||||
break;
|
||||
|
||||
case CLOSE_BRAKET:
|
||||
this.charCLOSEBRAKET();
|
||||
this.charCLOSEBRAKET(charCode);
|
||||
break;
|
||||
|
||||
case EQ:
|
||||
@@ -229,6 +275,10 @@ class Tokenizer {
|
||||
this.charQUOTEMARK(charCode);
|
||||
break;
|
||||
|
||||
case BACKSLASH:
|
||||
this.charBACKSLASH(charCode);
|
||||
break;
|
||||
|
||||
default:
|
||||
this.charWORD(charCode);
|
||||
break;
|
||||
@@ -245,6 +295,18 @@ class Tokenizer {
|
||||
return this.tokens;
|
||||
}
|
||||
|
||||
inTag() {
|
||||
return this.tagToken[Token.TYPE_ID];
|
||||
}
|
||||
|
||||
inAttrValue() {
|
||||
return this.attrValueToken[Token.TYPE_ID];
|
||||
}
|
||||
|
||||
inAttrName() {
|
||||
return this.attrNameToken[Token.TYPE_ID];
|
||||
}
|
||||
|
||||
createWordToken(value = '', line = this.colPos, row = this.rowPos) {
|
||||
return createTokenOfType(Token.TYPE_WORD, value, line, row);
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ const OPEN_BRAKET = '['.charCodeAt(0);
|
||||
const CLOSE_BRAKET = ']'.charCodeAt(0);
|
||||
|
||||
const SLASH = '/'.charCodeAt(0);
|
||||
const BACKSLASH = '\\'.charCodeAt(0);
|
||||
|
||||
const PLACEHOLDER_SPACE_TAB = ' ';
|
||||
const PLACEHOLDER_SPACE = ' ';
|
||||
@@ -31,4 +32,5 @@ module.exports = {
|
||||
SLASH,
|
||||
PLACEHOLDER_SPACE_TAB,
|
||||
PLACEHOLDER_SPACE,
|
||||
BACKSLASH,
|
||||
};
|
||||
|
||||
@@ -36,4 +36,21 @@ describe('Parser', () => {
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test('parse tag with quoted param with spaces', () => {
|
||||
const ast = parse('[url href=https://ru.wikipedia.org target=_blank text="Foo Bar"]Text[/url]');
|
||||
|
||||
expect(ast).toBeInstanceOf(Array);
|
||||
expect(ast).toEqual([
|
||||
{
|
||||
tag: 'url',
|
||||
attrs: {
|
||||
href: 'https://ru.wikipedia.org',
|
||||
target: '_blank',
|
||||
text: 'Foo Bar',
|
||||
},
|
||||
content: ['Text'],
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -47,6 +47,34 @@ describe('Tokenizer', () => {
|
||||
expectOutput(output, tokens);
|
||||
});
|
||||
|
||||
test('tokenize tag with quotemark params with spaces', () => {
|
||||
const input = '[url text="Foo Bar"]Text[/url]';
|
||||
const tokens = tokenize(input);
|
||||
const output = [
|
||||
[TYPE.TAG, 'url', '0', '0'],
|
||||
[TYPE.ATTR_NAME, 'text', '4', '0'],
|
||||
[TYPE.ATTR_VALUE, 'Foo Bar', '9', '0'],
|
||||
[TYPE.WORD, 'Text', '20', '0'],
|
||||
[TYPE.TAG, '/url', '24', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
});
|
||||
|
||||
test('tokenize tag with escaped quotemark param', () => {
|
||||
const input = `[url text="Foo \\"Bar"]Text[/url]`;
|
||||
const tokens = tokenize(input);
|
||||
const output = [
|
||||
[TYPE.TAG, 'url', '0', '0'],
|
||||
[TYPE.ATTR_NAME, 'text', '4', '0'],
|
||||
[TYPE.ATTR_VALUE, 'Foo "Bar', '9', '0'],
|
||||
[TYPE.WORD, 'Text', '22', '0'],
|
||||
[TYPE.TAG, '/url', '26', '0'],
|
||||
];
|
||||
|
||||
expectOutput(output, tokens);
|
||||
});
|
||||
|
||||
test('tokenize tag param without quotemarks', () => {
|
||||
const input = '[style color=#ff0000]Text[/style]';
|
||||
const tokens = tokenize(input);
|
||||
|
||||
Reference in New Issue
Block a user