mirror of
https://github.com/tenrok/BBob.git
synced 2026-06-11 18:02:26 +03:00
feat: add start and end positions of tag nodes (#246)
Closes #134 * feat: Add start and end positions of tag nodes Improves accuracy of row/col error reporting. Now targets the start of the relevant token instead of the end. * Simplify language for TagNode and Token * Update static TagNode.create to ingest setStart() logic improve readability of end pos offset for no attr tags
This commit is contained in:
@@ -5,12 +5,14 @@ import {
|
||||
} from '@bbob/plugin-helper';
|
||||
import type { Token as TokenInterface } from "@bbob/types";
|
||||
|
||||
// type, value, line, row,
|
||||
// type, value, line, row, start pos, end pos
|
||||
|
||||
const TOKEN_TYPE_ID = 't'; // 0;
|
||||
const TOKEN_VALUE_ID = 'v'; // 1;
|
||||
const TOKEN_COLUMN_ID = 'r'; // 2;
|
||||
const TOKEN_LINE_ID = 'l'; // 3;
|
||||
const TOKEN_START_POS_ID = 's'; // 4;
|
||||
const TOKEN_END_POS_ID = 'e'; // 5;
|
||||
|
||||
const TOKEN_TYPE_WORD = 1; // 'word';
|
||||
const TOKEN_TYPE_TAG = 2; // 'tag';
|
||||
@@ -31,11 +33,15 @@ const getTokenLine = (token: Token) => (token && token[TOKEN_LINE_ID]) || 0;
|
||||
|
||||
const getTokenColumn = (token: Token) => (token && token[TOKEN_COLUMN_ID]) || 0;
|
||||
|
||||
const getStartPosition = (token: Token) => (token && token[TOKEN_START_POS_ID]) || 0;
|
||||
|
||||
const getEndPosition = (token: Token) => (token && token[TOKEN_END_POS_ID]) || 0;
|
||||
|
||||
const isTextToken = (token: Token) => {
|
||||
if (token && typeof token[TOKEN_TYPE_ID] !== 'undefined') {
|
||||
return token[TOKEN_TYPE_ID] === TOKEN_TYPE_SPACE
|
||||
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE
|
||||
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
|
||||
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE
|
||||
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -88,21 +94,25 @@ const tokenToText = (token: Token) => {
|
||||
* @export
|
||||
* @class Token
|
||||
*/
|
||||
class Token<TokenValue = string> implements TokenInterface {
|
||||
readonly t: number // type
|
||||
readonly v: string // value
|
||||
readonly l: number // line
|
||||
readonly r: number // row
|
||||
class Token<TokenValue = string> implements TokenInterface {
|
||||
readonly t: number; // type
|
||||
readonly v: string; // value
|
||||
readonly l: number; // line
|
||||
readonly r: number; // row
|
||||
readonly s: number; // start pos
|
||||
readonly e: number; // end pos
|
||||
|
||||
constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0) {
|
||||
constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0, start: number = 0, end: number = 0) {
|
||||
this[TOKEN_LINE_ID] = row;
|
||||
this[TOKEN_COLUMN_ID] = col;
|
||||
this[TOKEN_TYPE_ID] = type || 0;
|
||||
this[TOKEN_VALUE_ID] = String(value);
|
||||
this[TOKEN_START_POS_ID] = start;
|
||||
this[TOKEN_END_POS_ID] = end;
|
||||
}
|
||||
|
||||
get type() {
|
||||
return this[TOKEN_TYPE_ID]
|
||||
return this[TOKEN_TYPE_ID];
|
||||
}
|
||||
|
||||
isEmpty() {
|
||||
@@ -149,6 +159,14 @@ class Token<TokenValue = string> implements TokenInterface {
|
||||
return getTokenColumn(this);
|
||||
}
|
||||
|
||||
getStart() {
|
||||
return getStartPosition(this);
|
||||
}
|
||||
|
||||
getEnd() {
|
||||
return getEndPosition(this);
|
||||
}
|
||||
|
||||
toString() {
|
||||
return tokenToText(this);
|
||||
}
|
||||
@@ -158,6 +176,8 @@ export const TYPE_ID = TOKEN_TYPE_ID;
|
||||
export const VALUE_ID = TOKEN_VALUE_ID;
|
||||
export const LINE_ID = TOKEN_LINE_ID;
|
||||
export const COLUMN_ID = TOKEN_COLUMN_ID;
|
||||
export const START_POS_ID = TOKEN_START_POS_ID;
|
||||
export const END_POS_ID = TOKEN_END_POS_ID;
|
||||
export const TYPE_WORD = TOKEN_TYPE_WORD;
|
||||
export const TYPE_TAG = TOKEN_TYPE_TAG;
|
||||
export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME;
|
||||
|
||||
@@ -20,8 +20,8 @@ import { CharGrabber, createCharGrabber, trimChar, unquote } from './utils';
|
||||
// for cases <!-- -->
|
||||
const EM = '!';
|
||||
|
||||
export function createTokenOfType(type: number, value: string, r = 0, cl = 0) {
|
||||
return new Token(type, value, r, cl)
|
||||
export function createTokenOfType(type: number, value: string, r = 0, cl = 0, p = 0, e = 0) {
|
||||
return new Token(type, value, r, cl, p, e);
|
||||
}
|
||||
|
||||
const STATE_WORD = 0;
|
||||
@@ -34,6 +34,7 @@ const TAG_STATE_VALUE = 2;
|
||||
|
||||
const WHITESPACES = [SPACE, TAB];
|
||||
const SPECIAL_CHARS = [EQ, SPACE, TAB];
|
||||
const END_POS_OFFSET = 2; // length + start position offset
|
||||
|
||||
const isWhiteSpace = (char: string) => (WHITESPACES.indexOf(char) >= 0);
|
||||
const isEscapeChar = (char: string) => char === BACKSLASH;
|
||||
@@ -43,6 +44,7 @@ const unq = (val: string) => unquote(trimChar(val, QUOTEMARK));
|
||||
|
||||
export function createLexer(buffer: string, options: LexerOptions = {}): LexerTokenizer {
|
||||
let row = 0;
|
||||
let prevCol = 0;
|
||||
let col = 0;
|
||||
|
||||
let tokenIndex = -1;
|
||||
@@ -89,16 +91,17 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
* @param {Number} type
|
||||
* @param {String} value
|
||||
*/
|
||||
function emitToken(type: number, value: string) {
|
||||
const token = createTokenOfType(type, value, row, col);
|
||||
function emitToken(type: number, value: string, startPos?: number, endPos?: number) {
|
||||
const token = createTokenOfType(type, value, row, prevCol, startPos, endPos);
|
||||
|
||||
onToken(token);
|
||||
|
||||
prevCol = col;
|
||||
tokenIndex += 1;
|
||||
tokens[tokenIndex] = token;
|
||||
}
|
||||
|
||||
function nextTagState(tagChars: CharGrabber, isSingleValueTag: boolean) {
|
||||
function nextTagState(tagChars: CharGrabber, isSingleValueTag: boolean, masterStartPos: number) {
|
||||
if (tagMode === TAG_STATE_ATTR) {
|
||||
const validAttrName = (char: string) => !(char === EQ || isWhiteSpace(char));
|
||||
const name = tagChars.grabWhile(validAttrName);
|
||||
@@ -161,6 +164,9 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
tagChars.skip();
|
||||
|
||||
emitToken(TYPE_ATTR_VALUE, unq(name));
|
||||
if (tagChars.getPrev() === QUOTEMARK) {
|
||||
prevCol++;
|
||||
}
|
||||
|
||||
if (tagChars.isLast()) {
|
||||
return TAG_STATE_NAME;
|
||||
@@ -169,13 +175,15 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
return TAG_STATE_ATTR;
|
||||
}
|
||||
|
||||
const start = masterStartPos + tagChars.getPos() - 1;
|
||||
const validName = (char: string) => !(char === EQ || isWhiteSpace(char) || tagChars.isLast());
|
||||
const name = tagChars.grabWhile(validName);
|
||||
|
||||
emitToken(TYPE_TAG, name);
|
||||
emitToken(TYPE_TAG, name, start, masterStartPos + tagChars.getLength() + 1);
|
||||
checkContextFreeMode(name);
|
||||
|
||||
tagChars.skip();
|
||||
prevCol++;
|
||||
|
||||
// in cases when we has [url=someval]GET[/url] and we dont need to parse all
|
||||
if (isSingleValueTag) {
|
||||
@@ -209,11 +217,13 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
const isClosingTag = substr[0] === SLASH;
|
||||
|
||||
if (isNoAttrsInTag || isClosingTag) {
|
||||
const startPos = chars.getPos() - 1;
|
||||
const name = chars.grabWhile((char) => char !== closeTag);
|
||||
const endPos = startPos + name.length + END_POS_OFFSET;
|
||||
|
||||
chars.skip(); // skip closeTag
|
||||
|
||||
emitToken(TYPE_TAG, name);
|
||||
emitToken(TYPE_TAG, name, startPos, endPos);
|
||||
checkContextFreeMode(name, isClosingTag);
|
||||
|
||||
return STATE_WORD;
|
||||
@@ -223,6 +233,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
}
|
||||
|
||||
function stateAttrs() {
|
||||
const startPos = chars.getPos();
|
||||
const silent = true;
|
||||
const tagStr = chars.grabWhile((char) => char !== closeTag, silent);
|
||||
const tagGrabber = createCharGrabber(tagStr, { onSkip });
|
||||
@@ -231,7 +242,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
tagMode = TAG_STATE_NAME;
|
||||
|
||||
while (tagGrabber.hasNext()) {
|
||||
tagMode = nextTagState(tagGrabber, !hasSpace);
|
||||
tagMode = nextTagState(tagGrabber, !hasSpace, startPos);
|
||||
}
|
||||
|
||||
chars.skip(); // skip closeTag
|
||||
@@ -246,6 +257,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
chars.skip();
|
||||
|
||||
col = 0;
|
||||
prevCol = 0;
|
||||
row++;
|
||||
|
||||
return STATE_WORD;
|
||||
@@ -276,6 +288,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
emitToken(TYPE_WORD, chars.getCurr());
|
||||
|
||||
chars.skip();
|
||||
prevCol++;
|
||||
|
||||
return STATE_WORD;
|
||||
}
|
||||
@@ -345,7 +358,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
if (nestedMap.has(value)) {
|
||||
return !!nestedMap.get(value);
|
||||
} else {
|
||||
const status = (buffer.indexOf(value) > -1)
|
||||
const status = (buffer.indexOf(value) > -1);
|
||||
|
||||
nestedMap.set(value, status);
|
||||
|
||||
@@ -356,5 +369,5 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
|
||||
return {
|
||||
tokenize,
|
||||
isTokenNested,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -185,7 +185,7 @@ function parse(input: string, opts: ParseOptions = {}) {
|
||||
function handleTagStart(token: Token) {
|
||||
flushTagNodes();
|
||||
|
||||
const tagNode = TagNode.create(token.getValue(), {}, []);
|
||||
const tagNode = TagNode.create(token.getValue(), {}, [], { from: token.getStart(), to: token.getEnd() });
|
||||
const isNested = isTokenNested(token);
|
||||
|
||||
tagNodes.push(tagNode);
|
||||
@@ -203,6 +203,10 @@ function parse(input: string, opts: ParseOptions = {}) {
|
||||
* @param {Token} token
|
||||
*/
|
||||
function handleTagEnd(token: Token) {
|
||||
const lastTagNode = nestedNodes.last();
|
||||
if (isTagNode(lastTagNode)) {
|
||||
lastTagNode.setEnd({ from: token.getStart(), to: token.getEnd() });
|
||||
}
|
||||
flushTagNodes();
|
||||
|
||||
const lastNestedNode = nestedNodes.flush();
|
||||
|
||||
@@ -42,6 +42,14 @@ export class CharGrabber {
|
||||
return this.s[this.c.pos]
|
||||
}
|
||||
|
||||
getPos() {
|
||||
return this.c.pos;
|
||||
}
|
||||
|
||||
getLength() {
|
||||
return this.c.len;
|
||||
}
|
||||
|
||||
getRest() {
|
||||
return this.s.substring(this.c.pos)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user