2
0
mirror of https://github.com/tenrok/BBob.git synced 2026-06-11 18:02:26 +03:00

feat: add start and end positions of tag nodes (#246)

Closes #134

* feat: Add start and end positions of tag nodes

Improves accuracy of row/col error reporting. Now targets the start of the relevant token instead of the end.

* Simplify language for TagNode and Token

* Update static TagNode.create to ingest setStart() logic

improve readability of end pos offset for no attr tags
This commit is contained in:
Steven Chang
2024-08-01 00:42:29 -07:00
committed by GitHub
parent 0beab56d7f
commit 40848747d4
13 changed files with 929 additions and 386 deletions
+30 -10
View File
@@ -5,12 +5,14 @@ import {
} from '@bbob/plugin-helper';
import type { Token as TokenInterface } from "@bbob/types";
// type, value, line, row,
// type, value, line, row, start pos, end pos
const TOKEN_TYPE_ID = 't'; // 0;
const TOKEN_VALUE_ID = 'v'; // 1;
const TOKEN_COLUMN_ID = 'r'; // 2;
const TOKEN_LINE_ID = 'l'; // 3;
const TOKEN_START_POS_ID = 's'; // 4;
const TOKEN_END_POS_ID = 'e'; // 5;
const TOKEN_TYPE_WORD = 1; // 'word';
const TOKEN_TYPE_TAG = 2; // 'tag';
@@ -31,11 +33,15 @@ const getTokenLine = (token: Token) => (token && token[TOKEN_LINE_ID]) || 0;
const getTokenColumn = (token: Token) => (token && token[TOKEN_COLUMN_ID]) || 0;
const getStartPosition = (token: Token) => (token && token[TOKEN_START_POS_ID]) || 0;
const getEndPosition = (token: Token) => (token && token[TOKEN_END_POS_ID]) || 0;
const isTextToken = (token: Token) => {
if (token && typeof token[TOKEN_TYPE_ID] !== 'undefined') {
return token[TOKEN_TYPE_ID] === TOKEN_TYPE_SPACE
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_NEW_LINE
|| token[TOKEN_TYPE_ID] === TOKEN_TYPE_WORD;
}
return false;
@@ -88,21 +94,25 @@ const tokenToText = (token: Token) => {
* @export
* @class Token
*/
class Token<TokenValue = string> implements TokenInterface {
readonly t: number // type
readonly v: string // value
readonly l: number // line
readonly r: number // row
class Token<TokenValue = string> implements TokenInterface {
readonly t: number; // type
readonly v: string; // value
readonly l: number; // line
readonly r: number; // row
readonly s: number; // start pos
readonly e: number; // end pos
constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0) {
constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0, start: number = 0, end: number = 0) {
this[TOKEN_LINE_ID] = row;
this[TOKEN_COLUMN_ID] = col;
this[TOKEN_TYPE_ID] = type || 0;
this[TOKEN_VALUE_ID] = String(value);
this[TOKEN_START_POS_ID] = start;
this[TOKEN_END_POS_ID] = end;
}
get type() {
return this[TOKEN_TYPE_ID]
return this[TOKEN_TYPE_ID];
}
isEmpty() {
@@ -149,6 +159,14 @@ class Token<TokenValue = string> implements TokenInterface {
return getTokenColumn(this);
}
getStart() {
return getStartPosition(this);
}
getEnd() {
return getEndPosition(this);
}
toString() {
return tokenToText(this);
}
@@ -158,6 +176,8 @@ export const TYPE_ID = TOKEN_TYPE_ID;
export const VALUE_ID = TOKEN_VALUE_ID;
export const LINE_ID = TOKEN_LINE_ID;
export const COLUMN_ID = TOKEN_COLUMN_ID;
export const START_POS_ID = TOKEN_START_POS_ID;
export const END_POS_ID = TOKEN_END_POS_ID;
export const TYPE_WORD = TOKEN_TYPE_WORD;
export const TYPE_TAG = TOKEN_TYPE_TAG;
export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME;
+23 -10
View File
@@ -20,8 +20,8 @@ import { CharGrabber, createCharGrabber, trimChar, unquote } from './utils';
// for cases <!-- -->
const EM = '!';
export function createTokenOfType(type: number, value: string, r = 0, cl = 0) {
return new Token(type, value, r, cl)
export function createTokenOfType(type: number, value: string, r = 0, cl = 0, p = 0, e = 0) {
return new Token(type, value, r, cl, p, e);
}
const STATE_WORD = 0;
@@ -34,6 +34,7 @@ const TAG_STATE_VALUE = 2;
const WHITESPACES = [SPACE, TAB];
const SPECIAL_CHARS = [EQ, SPACE, TAB];
const END_POS_OFFSET = 2; // length + start position offset
const isWhiteSpace = (char: string) => (WHITESPACES.indexOf(char) >= 0);
const isEscapeChar = (char: string) => char === BACKSLASH;
@@ -43,6 +44,7 @@ const unq = (val: string) => unquote(trimChar(val, QUOTEMARK));
export function createLexer(buffer: string, options: LexerOptions = {}): LexerTokenizer {
let row = 0;
let prevCol = 0;
let col = 0;
let tokenIndex = -1;
@@ -89,16 +91,17 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
* @param {Number} type
* @param {String} value
*/
function emitToken(type: number, value: string) {
const token = createTokenOfType(type, value, row, col);
function emitToken(type: number, value: string, startPos?: number, endPos?: number) {
const token = createTokenOfType(type, value, row, prevCol, startPos, endPos);
onToken(token);
prevCol = col;
tokenIndex += 1;
tokens[tokenIndex] = token;
}
function nextTagState(tagChars: CharGrabber, isSingleValueTag: boolean) {
function nextTagState(tagChars: CharGrabber, isSingleValueTag: boolean, masterStartPos: number) {
if (tagMode === TAG_STATE_ATTR) {
const validAttrName = (char: string) => !(char === EQ || isWhiteSpace(char));
const name = tagChars.grabWhile(validAttrName);
@@ -161,6 +164,9 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
tagChars.skip();
emitToken(TYPE_ATTR_VALUE, unq(name));
if (tagChars.getPrev() === QUOTEMARK) {
prevCol++;
}
if (tagChars.isLast()) {
return TAG_STATE_NAME;
@@ -169,13 +175,15 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
return TAG_STATE_ATTR;
}
const start = masterStartPos + tagChars.getPos() - 1;
const validName = (char: string) => !(char === EQ || isWhiteSpace(char) || tagChars.isLast());
const name = tagChars.grabWhile(validName);
emitToken(TYPE_TAG, name);
emitToken(TYPE_TAG, name, start, masterStartPos + tagChars.getLength() + 1);
checkContextFreeMode(name);
tagChars.skip();
prevCol++;
// in cases when we has [url=someval]GET[/url] and we dont need to parse all
if (isSingleValueTag) {
@@ -209,11 +217,13 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
const isClosingTag = substr[0] === SLASH;
if (isNoAttrsInTag || isClosingTag) {
const startPos = chars.getPos() - 1;
const name = chars.grabWhile((char) => char !== closeTag);
const endPos = startPos + name.length + END_POS_OFFSET;
chars.skip(); // skip closeTag
emitToken(TYPE_TAG, name);
emitToken(TYPE_TAG, name, startPos, endPos);
checkContextFreeMode(name, isClosingTag);
return STATE_WORD;
@@ -223,6 +233,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
}
function stateAttrs() {
const startPos = chars.getPos();
const silent = true;
const tagStr = chars.grabWhile((char) => char !== closeTag, silent);
const tagGrabber = createCharGrabber(tagStr, { onSkip });
@@ -231,7 +242,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
tagMode = TAG_STATE_NAME;
while (tagGrabber.hasNext()) {
tagMode = nextTagState(tagGrabber, !hasSpace);
tagMode = nextTagState(tagGrabber, !hasSpace, startPos);
}
chars.skip(); // skip closeTag
@@ -246,6 +257,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
chars.skip();
col = 0;
prevCol = 0;
row++;
return STATE_WORD;
@@ -276,6 +288,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
emitToken(TYPE_WORD, chars.getCurr());
chars.skip();
prevCol++;
return STATE_WORD;
}
@@ -345,7 +358,7 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
if (nestedMap.has(value)) {
return !!nestedMap.get(value);
} else {
const status = (buffer.indexOf(value) > -1)
const status = (buffer.indexOf(value) > -1);
nestedMap.set(value, status);
@@ -356,5 +369,5 @@ export function createLexer(buffer: string, options: LexerOptions = {}): LexerTo
return {
tokenize,
isTokenNested,
}
};
}
+5 -1
View File
@@ -185,7 +185,7 @@ function parse(input: string, opts: ParseOptions = {}) {
function handleTagStart(token: Token) {
flushTagNodes();
const tagNode = TagNode.create(token.getValue(), {}, []);
const tagNode = TagNode.create(token.getValue(), {}, [], { from: token.getStart(), to: token.getEnd() });
const isNested = isTokenNested(token);
tagNodes.push(tagNode);
@@ -203,6 +203,10 @@ function parse(input: string, opts: ParseOptions = {}) {
* @param {Token} token
*/
function handleTagEnd(token: Token) {
const lastTagNode = nestedNodes.last();
if (isTagNode(lastTagNode)) {
lastTagNode.setEnd({ from: token.getStart(), to: token.getEnd() });
}
flushTagNodes();
const lastNestedNode = nestedNodes.flush();
+8
View File
@@ -42,6 +42,14 @@ export class CharGrabber {
return this.s[this.c.pos]
}
getPos() {
return this.c.pos;
}
getLength() {
return this.c.len;
}
getRest() {
return this.s.substring(this.c.pos)
}