initial

2026-06-05 16:42:27 +03:00 · 2018-06-04 23:18:50 +02:00
parent d4a41482d3
commit edf90de445
18 changed files with 2403 additions and 0 deletions
@@ -0,0 +1,215 @@
+
+const attrNameChars = '[a-zA-Z0-9\\.\\-_:;/]'
+const attrValueChars = '[a-zA-Z0-9\\.\\-_:;#/\\s]'
+const pattern = `\\[(\/\\w*)\\]|\\[(\\w*)+(=(["])${attrValueChars}*\\4)?( (${attrNameChars}+)?=(["])(${attrValueChars}+)\\7)*\\]`
+
+const TAG_RE = new RegExp(pattern, 'g')
+
+const EOL = '\n'
+const WHITESPACE = ' '
+const isNode = el => typeof el === 'object' && el.tag
+const isStringNode = el => typeof el === 'string'
+const isChordNode = el => el.tag === 'ch'
+const isTabNode = el => el.tag === 'tab'
+const isSyllableNode = el => el.tag === 'syllable'
+const isTextNode = el => el.tag === 'text'
+const isEOL = el => el === EOL
+
+const getNodeLength = node => {
+    if (isNode(node)) {
+        node.content.reduce((count, contentNode) => count + getNodeLength(contentNode), 0)
+    } else if (isStringNode(node)) {
+        return node.length
+    }
+
+    return 0
+}
+
+const tagsDefinition = {
+    ch: {
+        closable: true,
+    },
+    syllable: {
+        closable: true,
+    },
+    tab: {
+        closable: true,
+    },
+}
+
+// @TODO: Разбить на парсер и токенайзер, ноды и токены должны жить отдельно
+/**
+ * Парсит контент таба с BB кодами в AST дерево [{tag:'ch', attrs:{..}, content:[...]}]
+ *
+ * @example
+ *
+ *  textTabParser
+ *      .parse('[Intro] [ch app=123]G[/ch] hello world', {ch: {closable: true}})
+ *
+ */
+module.exports = {
+    parse(str, tags = tagsDefinition) {
+        this.tags = tags
+
+        const tokens = this.tokenize(str)
+        const ast = this.parseTokens(tokens)
+
+        return ast
+    },
+
+    tokenize(str) {
+        let tokens = []
+        let match
+        let lastIndex = 0
+
+        // console.time('tokenize')
+        while (match = TAG_RE.exec(str)) {
+            const delta = match.index - lastIndex
+
+            if (delta > 0) {
+                tokens = tokens.concat(this.toTextTokens(str.substr(lastIndex, delta)))
+            }
+
+            tokens.push(this.tagToken(match))
+            lastIndex = TAG_RE.lastIndex
+        }
+
+        const delta = str.length - lastIndex
+
+        if (delta > 0) {
+            tokens = tokens.concat(this.toTextTokens(str.substr(lastIndex, delta)))
+        }
+        // console.timeEnd('tokenize')
+
+        return tokens
+    },
+
+    parseTokens(tokens) {
+        const nodes = []
+        let curToken
+        const nestedNodes = []
+
+        function getNodes() {
+            if (nestedNodes.length) {
+                const nestedNode = nestedNodes[nestedNodes.length - 1]
+                return nestedNode.content
+            }
+
+            return nodes
+        }
+
+        // console.time('parseTokens')
+        while (curToken = tokens.shift()) {
+            curToken = this.isTokenSupported(curToken) ? curToken : this.asTextToken(curToken)
+
+            if (curToken.isText) {
+                getNodes().push(curToken.text)
+            }
+
+            if (curToken.isTag) {
+                const node = this.tagNode(curToken.tagName, curToken.attributes)
+
+                if (curToken.isStart) {
+                    if (this.isTokenHasCloseTag(curToken)) {
+                        nestedNodes.push(node)
+                    } else {
+                        getNodes().push(node)
+                    }
+                }
+
+                if (curToken.isEnd) {
+                    const lastNestedNode = nestedNodes.pop()
+
+                    if (lastNestedNode) {
+                        getNodes().push(lastNestedNode)
+                    } else {
+                        console.error(`Inconsistent tag '${curToken.tagName}'`)
+                    }
+                }
+            }
+        }
+        // console.timeEnd('parseTokens')
+
+        return nodes
+    },
+
+    isTokenSupported(token) {
+        return token.isTag && this.tags && this.tags[token.tagName]
+    },
+
+    isTokenHasCloseTag(token) {
+        return this.tags && this.tags[token.tagName] && this.tags[token.tagName].closable
+    },
+
+    tagNode(name, attrs, content = []) {
+        return { tag: name, attrs, content }
+    },
+
+    toTextTokens(text) {
+        const tokens = []
+        const chars = text.split('')
+        let currText = ''
+
+        const flushText = () => {
+            if (currText) {
+                tokens.push(this.textToken(currText))
+                currText = ''
+            }
+        }
+
+        chars.forEach((char) => {
+            if (char === EOL || char === WHITESPACE) {
+                flushText()
+                tokens.push(this.textToken(char))
+            } else {
+                currText += char
+            }
+        })
+
+        if (currText) {
+            tokens.push(this.textToken(currText))
+        }
+
+        return tokens
+    },
+
+    textToken(text) {
+        return { isText: true, text }
+    },
+
+    tagToken(match) {
+        if (typeof match[1] === 'undefined') { // Start tag
+            const tagName = match[2]
+            const attributes = {}
+            const ATTR_RE = new RegExp(`(${attrNameChars}+)?=(["])(${attrValueChars}+)\\2`, 'g')
+            const attrStr = match[0].substr(1 + tagName.length, match[0].length - 2 - tagName.length)
+
+            let attrMatch
+
+            while (attrMatch = ATTR_RE.exec(attrStr)) {
+                if (typeof attrMatch[1] === 'undefined') { // The tag attribute
+                    attributes[tagName] = attrMatch[3]
+                } else { // Normal attribute
+                    attributes[attrMatch[1]] = attrMatch[3]
+                }
+            }
+
+            return { isStart: true, isTag: true, tagName, attributes, text: match[0] }
+        }
+
+        // End tag
+        return { isEnd: true, isTag: true, tagName: match[1].substr(1, match[1].length - 1) }
+    },
+
+    asTextToken(token) {
+        if (token.isTag && token.isStart) {
+            return this.textToken(token.text)
+        }
+
+        if (token.isTag && token.isEnd) {
+            return this.textToken(`[/${token.tagName}]`)
+        }
+
+        return token
+    },
+}
@@ -0,0 +1,19 @@
+const parse = require('../index');
+
+const options = {
+    closableTags: ['ch', 'syllable', 'tab']
+};
+
+const textStub = require("./test/stub");
+
+const count = 10;
+const parsers3 = [];
+
+console.time('newParser');
+for (let i = 0; i <= count; i++) {
+    const parser3 = parse(textStub, options);
+
+    parsers3.push(parser3);
+}
+console.timeEnd('newParser');
+// console.log(JSON.stringify(parsers3));
@@ -0,0 +1,15 @@
+const OldParser = require('./OldParser')
+
+const textStub = require("./test/stub");
+
+const count = 10;
+const oldParsers3 = [];
+console.time('oldParser');
+for (let i = 0; i <= count; i++) {
+    const oldParser3 = OldParser.parse(textStub);
+
+    oldParsers3.push(oldParser3);
+}
+console.timeEnd('oldParser');
+// console.log(JSON.stringify(oldParsers3));
+