From ccab54a4547b67d0ea61644e36ca57fdbe8c6491 Mon Sep 17 00:00:00 2001 From: Nikolay Kost Date: Thu, 17 Oct 2024 00:26:06 +0300 Subject: [PATCH] feat(#250): introduce caseFreeTags option (#251) * chore: initial tests * feat: parser test * feat: add case free tags support * fix: coverage upload * fix: --disable=gcov * fix: npm publish sha commit * fix: change codecov to coveralls * fix: change workflow pr build and publish * chore: change coverage badge [skip ci] --- .changeset/poor-pears-marry.md | 42 ++++ .github/workflows/pr.yml | 9 +- .github/workflows/test.yml | 7 +- README.md | 28 ++- benchmark/package.json | 3 +- package.json | 3 +- packages/bbob-parser/src/Token.ts | 10 +- packages/bbob-parser/src/lexer.ts | 11 +- packages/bbob-parser/src/parse.ts | 33 ++- packages/bbob-parser/test/parse.test.ts | 289 ++++++++++++++---------- packages/bbob-types/src/parser.ts | 15 +- scripts/package.json | 3 +- 12 files changed, 293 insertions(+), 160 deletions(-) create mode 100644 .changeset/poor-pears-marry.md diff --git a/.changeset/poor-pears-marry.md b/.changeset/poor-pears-marry.md new file mode 100644 index 0000000..b8e4fdf --- /dev/null +++ b/.changeset/poor-pears-marry.md @@ -0,0 +1,42 @@ +--- +"@bbob/parser": minor +"@bbob/types": minor +"@bbob/cli": minor +"@bbob/core": minor +"@bbob/html": minor +"@bbob/plugin-helper": minor +"@bbob/preset": minor +"@bbob/preset-html5": minor +"@bbob/preset-react": minor +"@bbob/preset-vue": minor +"@bbob/react": minor +"@bbob/vue2": minor +"@bbob/vue3": minor +--- + +New option flag `caseFreeTags` has been added + +This flag allows to parse case insensitive tags like `[h1]some[/H1]` -> `

some

` + +```js +import html from '@bbob/html' +import presetHTML5 from '@bbob/preset-html5' + +const processed = html(`[h1]some[/H1]`, presetHTML5(), { caseFreeTags: true }) + +console.log(processed); //

some

+``` + +Also now you can pass `caseFreeTags` to `parse` function + +```js +import { parse } from '@bbob/parser' + +const ast = parse('[h1]some[/H1]', { + caseFreeTags: true +}); +``` + +BREAKING CHANGE: `isTokenNested` function now accepts string `tokenValue` instead of `token` + +Changed codecov.io to coveralls.io for test coverage diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 6590d28..624c488 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -1,9 +1,16 @@ name: Pull Request on: +# workflow_run: +# workflows: +# - Tests +# - Benchmark +# types: +# - completed pull_request: paths-ignore: - '.changeset/**' - '.husky/**' + workflow_dispatch: concurrency: group: ci-pull-request=${{github.ref}}-1 @@ -30,7 +37,7 @@ jobs: - name: Set SHA id: sha run: | - SHORT_SHA=$(git rev-parse --short "$GITHUB_SHA") + SHORT_SHA=$(git rev-parse --short "${{ github.event.pull_request.head.sha }}") echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT - name: Install pnpm diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 574af1f..d49fc32 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -37,11 +37,8 @@ jobs: - name: Run the lint run: pnpm run lint - - name: Install coverage - run: pnpm install --global codecov - - name: Run the coverage run: pnpm run cover - - name: Run the coverage - run: codecov + - name: Coveralls + uses: coverallsapp/github-action@v2 diff --git a/README.md b/README.md index 2e0f688..c0f3c5f 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ written in pure javascript, no dependencies [![Tests](https://github.com/JiLiZART/BBob/actions/workflows/test.yml/badge.svg)](https://github.com/JiLiZART/BBob/actions/workflows/test.yml) [![Benchmark](https://github.com/JiLiZART/BBob/actions/workflows/benchmark.yml/badge.svg)](https://github.com/JiLiZART/BBob/actions/workflows/benchmark.yml) - - codecov - + + Coverage Status + CodeFactor @@ -230,6 +230,28 @@ const processed = bbobHTML(`[b]Text[/b]'\\[b\\]Text\\[/b\\]'`, presetHTML5(), { console.log(processed); // Text[b]Text[/b] ``` +#### caseFreeTags + +Allows to parse case insensitive tags like `[h1]some[/H1]` -> `

some

` + +```js +import bbobHTML from '@bbob/html' +import presetHTML5 from '@bbob/preset-html5' + +const processed = bbobHTML(`[h1]some[/H1]`, presetHTML5(), { caseFreeTags: true }) + +console.log(processed); //

some

+``` + +```js +import bbobHTML from '@bbob/html' +import presetHTML5 from '@bbob/preset-html5' + +const processed = bbobHTML(`[b]Text[/b]'\\[b\\]Text\\[/b\\]'`, presetHTML5(), { enableEscapeTags: true }) + +console.log(processed); // Text[b]Text[/b] +``` + ### Presets diff --git a/benchmark/package.json b/benchmark/package.json index c6d1f0b..7b91ae8 100644 --- a/benchmark/package.json +++ b/benchmark/package.json @@ -10,8 +10,7 @@ "cpupro": "node --require cpupro benchmark.js" }, "author": { - "name": "Nikolay Kostyurin ", - "url": "https://artkost.ru/" + "name": "Nikolay Kostyurin " }, "dependencies": { "@bbob/parser": "*", diff --git a/package.json b/package.json index 95bab67..d35a713 100644 --- a/package.json +++ b/package.json @@ -20,8 +20,7 @@ "cleanup": "node scripts/cleanup" }, "author": { - "name": "Nikolay Kostyurin ", - "url": "https://artkost.ru/" + "name": "Nikolay Kostyurin " }, "license": "MIT", "devDependencies": { diff --git a/packages/bbob-parser/src/Token.ts b/packages/bbob-parser/src/Token.ts index fbc9af1..468f725 100644 --- a/packages/bbob-parser/src/Token.ts +++ b/packages/bbob-parser/src/Token.ts @@ -81,11 +81,11 @@ const getTagName = (token: Token) => { return isTagEnd(token) ? value.slice(1) : value; }; -const tokenToText = (token: Token) => { - let text = OPEN_BRAKET; +const tokenToText = (token: Token, openTag = OPEN_BRAKET, closeTag = CLOSE_BRAKET) => { + let text = openTag; text += getTokenValue(token); - text += CLOSE_BRAKET; + text += closeTag; return text; }; @@ -167,8 +167,8 @@ class Token implements TokenInterface { return getEndPosition(this); } - toString() { - return tokenToText(this); + toString({ openTag = OPEN_BRAKET, closeTag = CLOSE_BRAKET } = {}) { + return tokenToText(this, openTag, closeTag); } } diff --git a/packages/bbob-parser/src/lexer.ts b/packages/bbob-parser/src/lexer.ts index 0ca43d6..c57ba7e 100644 --- a/packages/bbob-parser/src/lexer.ts +++ b/packages/bbob-parser/src/lexer.ts @@ -51,13 +51,14 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo let stateMode = STATE_WORD; let tagMode = TAG_STATE_NAME; let contextFreeTag = ''; - const tokens = new Array>(Math.floor(buffer.length)); + const tokens = new Array(Math.floor(buffer.length)); const openTag = options.openTag || OPEN_BRAKET; const closeTag = options.closeTag || CLOSE_BRAKET; const escapeTags = !!options.enableEscapeTags; const contextFreeTags = (options.contextFreeTags || []) .filter(Boolean) .map((tag) => tag.toLowerCase()); + const caseFreeTags = options.caseFreeTags || false; const nestedMap = new Map(); const onToken = options.onToken || (() => { }); @@ -88,8 +89,6 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo /** * Emits newly created token to subscriber - * @param {Number} type - * @param {String} value */ function emitToken(type: number, value: string, startPos?: number, endPos?: number) { const token = createTokenOfType(type, value, row, prevCol, startPos, endPos); @@ -352,13 +351,13 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo return tokens; } - function isTokenNested(token: Token) { - const value = openTag + SLASH + token.getValue(); + function isTokenNested(tokenValue: string) { + const value = openTag + SLASH + tokenValue; if (nestedMap.has(value)) { return !!nestedMap.get(value); } else { - const status = (buffer.indexOf(value) > -1); + const status = caseFreeTags ? (buffer.toLowerCase().indexOf(value.toLowerCase()) > -1) : (buffer.indexOf(value) > -1); nestedMap.set(value, status); diff --git a/packages/bbob-parser/src/parse.ts b/packages/bbob-parser/src/parse.ts index 4d5bb14..1217614 100644 --- a/packages/bbob-parser/src/parse.ts +++ b/packages/bbob-parser/src/parse.ts @@ -52,8 +52,9 @@ function parse(input: string, opts: ParseOptions = {}) { const onlyAllowTags = (options.onlyAllowTags || []) .filter(Boolean) .map((tag) => tag.toLowerCase()); + const caseFreeTags = options.caseFreeTags || false; - let tokenizer: LexerTokenizer | null = null; + let tokenizer: ReturnType | null = null; /** * Result AST of nodes @@ -85,10 +86,11 @@ function parse(input: string, opts: ParseOptions = {}) { const nestedTagsMap = new Set(); function isTokenNested(token: Token) { - const value = token.getValue(); + const tokenValue = token.getValue(); + const value = caseFreeTags ? tokenValue.toLowerCase() : tokenValue; const { isTokenNested } = tokenizer || {}; - if (!nestedTagsMap.has(value) && isTokenNested && isTokenNested(token)) { + if (!nestedTagsMap.has(value) && isTokenNested && isTokenNested(value)) { nestedTagsMap.add(value); return true; @@ -101,7 +103,7 @@ function parse(input: string, opts: ParseOptions = {}) { * @private */ function isTagNested(tagName: string) { - return Boolean(nestedTagsMap.has(tagName)); + return Boolean(nestedTagsMap.has(caseFreeTags ? tagName.toLowerCase() : tagName)); } /** @@ -203,17 +205,23 @@ function parse(input: string, opts: ParseOptions = {}) { * @param {Token} token */ function handleTagEnd(token: Token) { - const lastTagNode = nestedNodes.last(); - if (isTagNode(lastTagNode)) { - lastTagNode.setEnd({ from: token.getStart(), to: token.getEnd() }); - } - flushTagNodes(); - + const tagName = token.getValue().slice(1); const lastNestedNode = nestedNodes.flush(); + flushTagNodes(); + if (lastNestedNode) { const nodes = getNodes(); + + if (isTagNode(lastNestedNode)) { + lastNestedNode.setEnd({ from: token.getStart(), to: token.getEnd() }); + } + appendNodes(nodes, lastNestedNode); + } else if (!isTagNested(tagName)) { // when we have only close tag [/some] without any open tag + const nodes = getNodes(); + + appendNodes(nodes, token.toString({ openTag, closeTag })); } else if (typeof options.onError === "function") { const tag = token.getValue(); const line = token.getLine(); @@ -281,13 +289,13 @@ function parse(input: string, opts: ParseOptions = {}) { } } else if (token.isTag()) { // if tag is not allowed, just pass it as is - appendNodes(nodes, token.toString()); + appendNodes(nodes, token.toString({ openTag, closeTag })); } } else if (token.isText()) { appendNodes(nodes, tokenValue); } else if (token.isTag()) { // if tag is not allowed, just pass it as is - appendNodes(nodes, token.toString()); + appendNodes(nodes, token.toString({ openTag, closeTag })); } } @@ -311,6 +319,7 @@ function parse(input: string, opts: ParseOptions = {}) { closeTag, onlyAllowTags: options.onlyAllowTags, contextFreeTags: options.contextFreeTags, + caseFreeTags: options.caseFreeTags, enableEscapeTags: options.enableEscapeTags, }); diff --git a/packages/bbob-parser/test/parse.test.ts b/packages/bbob-parser/test/parse.test.ts index f2dbb16..3478ac9 100644 --- a/packages/bbob-parser/test/parse.test.ts +++ b/packages/bbob-parser/test/parse.test.ts @@ -247,6 +247,58 @@ describe('Parser', () => { }); }); + describe('caseFreeTags', () => { + test('default case tags', () => { + const ast = parse('[h1 name=value]Foo[/H1]', { + caseFreeTags: false + }); + const output = [ + { + tag: 'h1', + attrs: { + name: 'value' + }, + content: [], + start: { + from: 0, + to: 15, + } + }, + "Foo", + "[/H1]" + ]; + + expectOutput(ast, output); + }); + + test('case free tags', () => { + const ast = parse('[h1 name=value]Foo[/H1]', { + caseFreeTags: true + }); + const output = [ + { + tag: 'h1', + attrs: { + name: 'value' + }, + content: [ + "Foo" + ], + start: { + from: 0, + to: 15, + }, + end: { + from: 18, + to: 23, + }, + } + ]; + + expectOutput(ast, output); + }); + }) + test('parse inconsistent tags', () => { const ast = parse('[h1 name=value]Foo [Bar] /h1]'); const output = [ @@ -279,6 +331,15 @@ describe('Parser', () => { expectOutput(ast, output); }); + test('parse closed tag', () => { + const ast = parse('[/h1]'); + const output = [ + '[/h1]', + ]; + + expectOutput(ast, output); + }); + test('parse tag with value param', () => { const ast = parse('[url=https://github.com/jilizart/bbob]BBob[/url]'); const output = [ @@ -650,49 +711,49 @@ sdfasdfasdf [url=xxx]xxx[/url]`; expectOutput( - parse(str), - [ - { - tag: 'quote', attrs: {}, content: ['some'], - start: { - from: 0, - to: 7, + parse(str), + [ + { + tag: 'quote', attrs: {}, content: ['some'], + start: { + from: 0, + to: 7, + }, + end: { + from: 11, + to: 19, + }, }, - end: { - from: 11, - to: 19, + { + tag: 'color', attrs: { red: 'red' }, content: ['test'], + start: { + from: 19, + to: 30, + }, + end: { + from: 34, + to: 42, + }, }, - }, - { - tag: 'color', attrs: { red: 'red' }, content: ['test'], - start: { - from: 19, - to: 30, - }, - end: { - from: 34, - to: 42, - }, - }, - '\n', - '[quote]', - 'xxxsdfasdf', - '\n', - 'sdfasdfasdf', - '\n', - '\n', - { - tag: 'url', attrs: { xxx: 'xxx' }, content: ['xxx'], - start: { - from: 74, - to: 83, - }, - end: { - from: 86, - to: 92, - }, - } - ] + '\n', + '[quote]', + 'xxxsdfasdf', + '\n', + 'sdfasdfasdf', + '\n', + '\n', + { + tag: 'url', attrs: { xxx: 'xxx' }, content: ['xxx'], + start: { + from: 74, + to: 83, + }, + end: { + from: 86, + to: 92, + }, + } + ] ); }); @@ -700,45 +761,45 @@ sdfasdfasdf const str = `[quote]xxxsdfasdf[quote]some[/quote][color=red]test[/color]sdfasdfasdf[url=xxx]xxx[/url]`; expectOutput( - parse(str), - [ - '[quote]', - 'xxxsdfasdf', - { - tag: 'quote', attrs: {}, content: ['some'], - start: { - from: 17, - to: 24, + parse(str), + [ + '[quote]', + 'xxxsdfasdf', + { + tag: 'quote', attrs: {}, content: ['some'], + start: { + from: 17, + to: 24, + }, + end: { + from: 28, + to: 36, + }, }, - end: { - from: 28, - to: 36, + { + tag: 'color', attrs: { red: 'red' }, content: ['test'], + start: { + from: 36, + to: 47, + }, + end: { + from: 51, + to: 59, + }, }, - }, - { - tag: 'color', attrs: { red: 'red' }, content: ['test'], - start: { - from: 36, - to: 47, - }, - end: { - from: 51, - to: 59, - }, - }, - 'sdfasdfasdf', - { - tag: 'url', attrs: { xxx: 'xxx' }, content: ['xxx'], - start: { - from: 70, - to: 79, - }, - end: { - from: 82, - to: 88, - }, - } - ] + 'sdfasdfasdf', + { + tag: 'url', attrs: { xxx: 'xxx' }, content: ['xxx'], + start: { + from: 70, + to: 79, + }, + end: { + from: 82, + to: 88, + }, + } + ] ); }); @@ -746,45 +807,45 @@ sdfasdfasdf const str = `[quote]some[/quote][color=red]test[/color]sdfasdfasdf[url=xxx]xxx[/url][quote]xxxsdfasdf`; expectOutput( - parse(str), - [ - { - tag: 'quote', attrs: {}, content: ['some'], - start: { - from: 0, - to: 7, + parse(str), + [ + { + tag: 'quote', attrs: {}, content: ['some'], + start: { + from: 0, + to: 7, + }, + end: { + from: 11, + to: 19, + }, }, - end: { - from: 11, - to: 19, + { + tag: 'color', attrs: { red: 'red' }, content: ['test'], + start: { + from: 19, + to: 30, + }, + end: { + from: 34, + to: 42, + }, }, - }, - { - tag: 'color', attrs: { red: 'red' }, content: ['test'], - start: { - from: 19, - to: 30, + 'sdfasdfasdf', + { + tag: 'url', attrs: { xxx: 'xxx' }, content: ['xxx'], + start: { + from: 53, + to: 62, + }, + end: { + from: 65, + to: 71, + }, }, - end: { - from: 34, - to: 42, - }, - }, - 'sdfasdfasdf', - { - tag: 'url', attrs: { xxx: 'xxx' }, content: ['xxx'], - start: { - from: 53, - to: 62, - }, - end: { - from: 65, - to: 71, - }, - }, - '[quote]', - 'xxxsdfasdf', - ] + '[quote]', + 'xxxsdfasdf', + ] ); }); diff --git a/packages/bbob-types/src/parser.ts b/packages/bbob-types/src/parser.ts index abe1244..c845dea 100644 --- a/packages/bbob-types/src/parser.ts +++ b/packages/bbob-types/src/parser.ts @@ -23,24 +23,23 @@ export interface Token { export interface LexerTokenizer { tokenize: () => Token[]; - isTokenNested?: (token: Token) => boolean; + isTokenNested?: (tokenValue: string) => boolean; } -export interface LexerOptions { +export interface CommonOptions { openTag?: string; closeTag?: string; onlyAllowTags?: string[]; enableEscapeTags?: boolean; + caseFreeTags?: boolean; contextFreeTags?: string[]; +} + +export interface LexerOptions extends CommonOptions { onToken?: (token?: Token) => void; } -export interface ParseOptions { +export interface ParseOptions extends CommonOptions { createTokenizer?: (input: string, options?: LexerOptions) => LexerTokenizer; - openTag?: string; - closeTag?: string; - onlyAllowTags?: string[]; - contextFreeTags?: string[]; - enableEscapeTags?: boolean; onError?: (error: ParseError) => void; } diff --git a/scripts/package.json b/scripts/package.json index 3898c75..f0891ef 100644 --- a/scripts/package.json +++ b/scripts/package.json @@ -5,7 +5,6 @@ "pkg-task": "pkg-task" }, "author": { - "name": "Nikolay Kostyurin ", - "url": "https://artkost.ru/" + "name": "Nikolay Kostyurin " } }