2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-05-15 11:59:37 +03:00

feat(#271): whitespaceInTags mode (#272)

* fix: test for buggy behavior

* feat: implement whitespaceInTags mode

* feat: move all char arrays to Map

* feat: revert Map for char arrays
This commit is contained in:
Nikolay Kost
2025-03-09 02:23:03 +02:00
committed by GitHub
parent f0c9da683d
commit 0566241e23
5 changed files with 77 additions and 4 deletions
+29
View File
@@ -0,0 +1,29 @@
---
"@bbob/parser": minor
"@bbob/types": minor
"@bbob/cli": minor
"@bbob/core": minor
"@bbob/html": minor
"@bbob/plugin-helper": minor
"@bbob/preset": minor
"@bbob/preset-html5": minor
"@bbob/preset-react": minor
"@bbob/preset-vue": minor
"@bbob/react": minor
"@bbob/vue2": minor
"@bbob/vue3": minor
---
Added `whitespaceInTags` parsing option (true by default) with this option you can disable parsing `[tags with spaces]` it will be considered as text
```js
import html5 from '@bbob/preset-html5'
import parse from '@bbob/html'
const html = parse('[b]lorem[/b] [foo bar] [i]ipsum[/i]', html5(), {
whitespaceInTags: false
})
console.log(html) // <b>lorem </b> [foo bar] <i>ipsum</i>
```
+9 -4
View File
@@ -67,7 +67,6 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
const NOT_CHAR_TOKENS = [
openTag, SPACE, TAB, N,
];
const isCharReserved = (char: string) => (RESERVED_CHARS.indexOf(char) >= 0);
const isCharToken = (char: string) => (NOT_CHAR_TOKENS.indexOf(char) === -1);
const isEscapableChar = (char: string) => (char === openTag || char === closeTag || char === BACKSLASH);
@@ -198,13 +197,19 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
const currChar = chars.getCurr();
const nextChar = chars.getNext();
chars.skip();
chars.skip(); // skip openTag
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = chars.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
if ((nextChar && isCharReserved(nextChar)) || hasInvalidChars || chars.isLast()) {
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
const isNextCharReserved = nextChar && isCharReserved(nextChar)
const isLastChar = chars.isLast()
const hasSpace = substr.indexOf(SPACE) >= 0;
const isSpaceRestricted = hasSpace && options.whitespaceInTags === false;
if (isNextCharReserved || hasInvalidChars || isLastChar || isSpaceRestricted) {
emitToken(TYPE_WORD, currChar);
return STATE_WORD;
+1
View File
@@ -321,6 +321,7 @@ function parse(input: string, opts: ParseOptions = {}) {
contextFreeTags: options.contextFreeTags,
caseFreeTags: options.caseFreeTags,
enableEscapeTags: options.enableEscapeTags,
whitespaceInTags: options.whitespaceInTags,
});
// eslint-disable-next-line no-unused-vars
+37
View File
@@ -869,6 +869,43 @@ sdfasdfasdf
]);
});
test('parse invalid tags', () => {
const input = parse('[b]Press Release[/b] [statement redacted] [i]This is more content[/i]', {
whitespaceInTags: false
})
expectOutput(input, [
{
tag: 'b',
attrs: {},
content: [
'Press',
' ',
'Release'
],
},
' ',
'[',
'statement',
' ',
'redacted]',
' ',
{
tag: 'i',
attrs: {},
content: [
'This',
' ',
'is',
' ',
'more',
' ',
'content'
],
},
]);
})
describe('html', () => {
const parseHTML = (input: string) => parse(input, { openTag: '<', closeTag: '>' });
+1
View File
@@ -32,6 +32,7 @@ export interface CommonOptions {
onlyAllowTags?: string[];
enableEscapeTags?: boolean;
caseFreeTags?: boolean;
whitespaceInTags?: boolean;
contextFreeTags?: string[];
}