
com.alipay.sofa.lookout.server.prom.ql.lex.Lexer Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alipay.sofa.lookout.server.prom.ql.lex;
import com.alipay.sofa.lookout.server.prom.util.NoopUtils;
import org.apache.commons.lang3.StringUtils;
import java.util.LinkedList;
import static com.alipay.sofa.lookout.server.prom.ql.lex.ItemType.*;
/**
* refer to the Lexer of prometheus
* Created by [email protected] on 2018/2/7.
*/
public class Lexer {
static final String lineComment = "#";
static final char EOF = Character.MIN_VALUE;
String input; // The string being scanned.
StateFn state; // The next lexing function to enter.
int pos = 0; // Current position in the input.
int start; // Start position of this item.
int width; // Width of last char read from input.
int lastPos; // Position of most recent item returned by nextItem.
LinkedList- queue = new LinkedList<>(); // Channel of scanned items.
int parenDepth; // Nesting depth of ( ) exprs.
boolean braceOpen; // Whether a { is opened.
boolean bracketOpen; // Whether a [ is opened.
char stringOpen; // Quote rune of the string currently being read.
// seriesDesc is set when a series description for the testing
// language is lexed.
boolean seriesDesc;
private Lexer(String input) {
this.input = input;
}
// lex creates a new scanner for the input string.
public static Lexer lex(String input) {
Lexer l = new Lexer(input);
//caller do resolve
l.run();
return l;
}
// run runs the state machine for the lexer.(函数类型,直接用函数名赋予引用)
private void run() {
state = lexStatements;
while (state != null) {
state = state.invoke(this);
}
}
// next returns the next rune in the input.
char next() {
if (pos >= input.length()) {
width = 0;
return EOF;
}
char r = input.charAt(pos);
width = 1;//char num;
pos += width;
return r;
}
// peek returns but does not consume the next rune in the input.
char peek() {
char r = next();
backup();
return r;
}
// backup steps back one rune. Can only be called once per call of next.
void backup() {
pos -= width;
}
// emit passes an item back to the client.
private void emit(ItemType t) {
queue.add(new Item(t, start, input.substring(start, pos)));
start = pos;//标记下一次启动地址;
}
// ignore skips over the pending input before this point.
void ignore() {
start = pos;
}
// accept consumes the next rune if it's from the valid set.
//判断下一个字符,是否包含存在于valid字符串中;
boolean accept(String valid) {
if (StringUtils.contains(valid, next())) {
return true;
}
backup();
return false;
}
// acceptRun consumes a run of runes from the valid set.
//如果匹配,则继续循环匹配下一个,效果相当于消费掉了匹配的;
void acceptRun(String valid) {
while (StringUtils.contains(valid, next())) {
NoopUtils.noop();
}
backup();
}
// lineNumber reports which line we're on, based on the position of
// the previous item returned by nextItem. Doing it this way
// means we don't have to worry about peek double counting.
//含有多少个换行符+1;表示当前行;
public int lineNumber() {
int number = StringUtils.countMatches(input.substring(0, lastPos), "\n");
return 1 + number;
}
// linePosition reports at which character in the current line
// we are on.
public int linePosition() {
int lb = input.substring(0, lastPos).lastIndexOf("\n");
if (lb < 0) {
return 1 + lastPos;
}
return 1 + lastPos - lb;
}
// errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.nextItem.
StateFn errorf(String format, Object... args) {
queue.add(new Item(itemError, start, String.format(format, args)));
return null;
}
// StateFn errorf(String format) throws InterruptedException {
// return errorf(format, null);
// }
// nextItem returns the next item from the input.
public Item nextItem() {
Item item = queue.poll();
if (item == null) {//may be null
return null;
}
lastPos = item.pos;
return item;
}
// lexStatements is the top-level state for lexing.
static StateFn lexStatements = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
if (l.braceOpen) {
return lexInsideBraces;
}
if (StringUtils.startsWith(l.input.substring(l.pos), lineComment)) {
return lexLineComment;
}
char r = l.next();
if (r == EOF) {
if (l.parenDepth != 0) {
return l.errorf("unclosed left parenthesis");
} else if (l.bracketOpen) {
return l.errorf("unclosed left bracket");
}
l.emit(itemEOF);
return null;
} else if (r == ',') {
l.emit(itemComma);
} else if (isSpace(r)) {
return lexSpace;
} else if (r == '*')
l.emit(itemMUL);
else if (r == '/')
l.emit(itemDIV);
else if (r == '%')
l.emit(itemMOD);
else if (r == '+')
l.emit(itemADD);
else if (r == '-')
l.emit(itemSUB);
else if (r == '^')
l.emit(itemPOW);
else if (r == '=') {
char t = l.peek();
if (t == '=') {
l.next();
l.emit(itemEQL);
} else if (t == '~') {
return l.errorf("unexpected character after '=': %q", t);
} else {
l.emit(itemAssign);
}
} else if (r == '!') {
char t = l.next();
if (t == '=') {
l.emit(itemNEQ);
} else {
return l.errorf("unexpected character after '!': %q", t);
}
} else if (r == '<') {
//可能(< or <=),就需要peek试探;
char t = l.peek();
if (t == '=') {
l.next();
l.emit(itemLTE);
} else {
l.emit(itemLSS);
}
} else if (r == '>') {
char t = l.peek();
if (t == '=') {
l.next();
l.emit(itemGTE);
} else {
l.emit(itemGTR);
}
} else if (isDigit(r) || (r == '.' && isDigit(l.peek()))) {
l.backup();
return lexNumberOrDuration;
} else if (r == '"' || r == '\'') {
l.stringOpen = r;
return lexString;
} else if (r == '`') {
l.stringOpen = r;
return lexRawString;
} else if (isAlpha(r) || r == ':') {
l.backup();
return lexKeywordOrIdentifier;
} else if (r == '(') {
l.emit(itemLeftParen);
l.parenDepth++;
return lexStatements;
} else if (r == ')') {
l.emit(itemRightParen);
l.parenDepth--;
if (l.parenDepth < 0) {
return l.errorf("unexpected right parenthesis %q", r);
}
return lexStatements;
} else if (r == '{') {
l.emit(itemLeftBrace);
l.braceOpen = true;
return lexInsideBraces.invoke(l);
} else if (r == '[') {
if (l.bracketOpen) {
return l.errorf("unexpected left bracket %q", r);
}
l.emit(itemLeftBracket);
l.bracketOpen = true;
return lexDuration;
} else if (r == ']') {
if (!l.bracketOpen) {
return l.errorf("unexpected right bracket %q", r);
}
l.emit(itemRightBracket);
l.bracketOpen = false;
} else {
return l.errorf("unexpected character: %q", r);
}
return lexStatements;
}
};
// lexInsideBraces scans the inside of a vector selector. Keywords are ignored and
// scanned as identifiers.
static StateFn lexInsideBraces = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
if (StringUtils.startsWith(l.input.substring(l.pos), lineComment)) {
return lexLineComment;
}
char r = l.next();
if (r == EOF) {
return l.errorf("unexpected end of input inside braces");
} else if (isSpace(r)) {
return lexSpace;
} else if (isAlpha(r)) {
l.backup();
return lexIdentifier;
} else if (r == ',') {
l.emit(itemComma);
} else if (r == '"' || r == '\'') {
l.stringOpen = r;
return lexString;
} else if (r == '`') {
l.stringOpen = r;
return lexRawString;
} else if (r == '=') {
if (l.next() == '~') {
if (l.peek() == '|') {
l.next();
l.emit(itemLiteralOr);
} else {
l.emit(itemEQLRegex);
}
} else {
l.backup();
l.emit(itemEQL);
}
} else if (r == '!') {
char nr = l.next();
switch (nr) {
case '~':
if (l.peek() == '|') {
l.next();
l.emit(itemNotLiteralOr);
} else {
l.emit(itemNEQRegex);
}
break;
case '=':
l.emit(itemNEQ);
break;
default:
return l.errorf("unexpected character after '!' inside braces: %q", nr);
}
} else if (r == '{') {
return l.errorf("unexpected left brace %q", r);
} else if (r == '}') {
l.emit(itemRightBrace);
l.braceOpen = false;
if (l.seriesDesc) {
return lexValueSequence;
}
return lexStatements;
} else {
l.errorf("unexpected character inside braces: %q", r);
}
return lexInsideBraces;
}
};
// lexValueSequence scans a value sequence of a series description.
static StateFn lexValueSequence = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
char r = l.next();
if (r == EOF) {
return lexStatements;
} else if (isSpace(r)) {
lexSpace.invoke(l);
} else if (r == '+') {
l.emit(itemADD);
} else if (r == '-') {
l.emit(itemSUB);
} else if (r == 'x') {
l.emit(itemTimes);
} else if (r == '_') {
l.emit(itemBlank);
} else if (isDigit(r) || (r == '.' && isDigit(l.peek()))) {
l.backup();
lexNumber.invoke(l);
} else if (isAlpha(r)) {
l.backup();
// We might lex invalid items here but this will be caught by the parser.
return lexKeywordOrIdentifier;
} else {
return l.errorf("unexpected character in series sequence: %q", r);
}
return lexValueSequence;
}
};
// lexEscape scans a string escape sequence. The initial escaping character (\)
// has already been seen.
//
// NOTE: This function as well as the helper function digitVal() and associated
// tests have been adapted from the corresponding functions in the "go/scanner"
// package of the Go standard library to work for Prometheus-style strings.
// None of the actual escaping/quoting logic was changed in this function - it
// was only modified to integrate with our lexer.
static void lexEscape(Lexer l) {
int n = 0;
int base = 0;
int max = 0;
char ch = l.next();
if (ch == 'a' || ch == 'b' || ch == 'f' || ch == 'n' || ch == 'r' || ch == 't' || ch == 'v' || ch == '\\' || ch == l.stringOpen) {
return;
} else if (ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7') {
n = 3;
base = 8;
max = 255;
} else if (ch == 'x') {
ch = l.next();
n = 2;
base = 16;
max = 255;
} else if (ch == 'u') {
ch = l.next();
n = 4;
base = 16;
max = Character.MAX_CODE_POINT;
} else if (ch == 'U') {
ch = l.next();
n = 8;
base = 16;
max = Character.MAX_CODE_POINT;
} else if (ch == EOF) {
l.errorf("escape sequence not terminated");
} else {
l.errorf("unknown escape sequence %#U", ch);
}
int x = 0;
while (n > 0) {
int d = digitVal(ch);
if (d >= base) {
if (ch == EOF) {
l.errorf("escape sequence not terminated");
}
l.errorf("illegal character %#U in escape sequence", ch);
}
x = x * base + d;
ch = l.next();
n--;
}
if (x > max || 0xD800 <= x && x < 0xE000) {
l.errorf("escape sequence is an invalid Unicode code point");
}
}
// digitVal returns the digit value of a rune or 16 in case the rune does not
// represent a valid digit.
private static int digitVal(char ch) {
if ('0' <= ch && ch <= '9') {
return (int) (ch - '0');
} else if ('a' <= ch && ch <= 'f') {
return (int) (ch - 'a' + 10);
} else if ('A' <= ch && ch <= 'F') {
return (int) (ch - 'A' + 10);
}
return 16; // Larger than any legal digit val.
}
// lexString scans a quoted string. The initial quote has already been seen.
static StateFn lexString = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
while (true) {
char x = l.next();
if (x == '\\')
lexEscape(l);
else if (x == EOF || x == '\n') {
return l.errorf("unterminated quoted string");
} else if (x == l.stringOpen) {
break;
}
}
l.emit(itemString);
return lexStatements;
}
};
// lexRawString scans a raw quoted string. The initial quote has already been seen.
static StateFn lexRawString = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
for (; true; ) {
char r = l.next();
if (r == EOF)
return l.errorf("unterminated raw string");
else if (r == l.stringOpen) {
break;
}
}
l.emit(itemString);
return lexStatements;
}
};
// lexSpace scans a run of space characters. One space has already been seen.
static StateFn lexSpace = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
while (isSpace(l.peek())) {
l.next();
}
l.ignore();
return lexStatements;
}
};
// lexLineComment scans a line comment. Left comment marker is known to be present.
static StateFn lexLineComment = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
l.pos += lineComment.length();
char r;
for (r = l.next(); !isEndOfLine(r) && r != EOF; ) {
r = l.next();
}
l.backup();
l.emit(itemComment);
return lexStatements;
}
};
static StateFn lexDuration = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
if (scanNumber(l)) {
return l.errorf("missing unit character in duration");
}
// Next two chars must be a valid unit and a non-alphanumeric.
if (l.accept("smhdwy")) {
if (isAlphaNumeric(l.next())) {
return l.errorf("bad duration syntax: %q", l.input.substring(l.start, l.pos));
}
l.backup();
l.emit(itemDuration);
return lexStatements;
}
return l.errorf("bad duration syntax: %q", l.input.substring(l.start, l.pos));
}
};
// lexNumber scans a number: decimal, hex, oct or float.
static StateFn lexNumber = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
if (!scanNumber(l)) {
return l.errorf("bad number syntax: %q", l.input.substring(l.start, l.pos));
}
l.emit(itemNumber);
return lexStatements;
}
};
// lexNumberOrDuration scans a number or a duration item.
static StateFn lexNumberOrDuration = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
if (scanNumber(l)) {
l.emit(itemNumber);
return lexStatements;
}
// Next two chars must be a valid unit and a non-alphanumeric.
if (l.accept("smhdwy")) {
if (isAlphaNumeric(l.next())) {
return l.errorf("bad number or duration syntax: %q", l.input.substring(l.start, l.pos));
}
l.backup();
l.emit(itemDuration);
return lexStatements;
}
return l.errorf("bad number or duration syntax: %q", l.input.substring(l.start, l.pos));
}
};
// scanNumber scans numbers of different formats. The scanned item is
// not necessarily a valid number. This case is caught by the parser.
private static boolean scanNumber(Lexer l) {
String digits = "0123456789";
// Disallow hexadecimal in series descriptions as the syntax is ambiguous.
//是否十六进制;
if (!l.seriesDesc && l.accept("0") && l.accept("xX")) {
digits = "0123456789abcdefABCDEF";
}
l.acceptRun(digits);
if (l.accept(".")) {
l.acceptRun(digits);
}
if (l.accept("eE")) {
l.accept("+-");
l.acceptRun("0123456789");
}
// Next thing must not be alphanumeric unless it's the times token
// for series repetitions.
char r = l.peek();
if ((l.seriesDesc && r == 'x') || !isAlphaNumeric(r)) {
return true;
}
return false;
}
// lexIdentifier scans an alphanumeric identifier. The next character
// is known to be a letter.
static StateFn lexIdentifier = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
for (; isAlphaNumeric(l.next()); ) {
// absorb
}
l.backup();
l.emit(itemIdentifier);
return lexStatements;
}
};
// lexKeywordOrIdentifier scans an alphanumeric identifier which may contain
// a colon rune. If the identifier is a keyword the respective keyword item
// is scanned.
static StateFn lexKeywordOrIdentifier = new StateFn() {
@Override
public StateFn invoke(Lexer l) {
for (; true; ) {
String word = null;
char r = l.next();
if (isAlphaNumeric(r) || r == ':' || r == '.') {
NoopUtils.noop();
} else {
l.backup();
word = l.input.substring(l.start, l.pos);
ItemType kw = ItemTypeSupport.key.get(StringUtils.lowerCase(word));
if (kw != null) {
l.emit(kw);
} else if (!StringUtils.contains(word, ":")) {
l.emit(itemIdentifier);
} else {
l.emit(itemMetricIdentifier);
}
break;
}
}
if (l.seriesDesc && l.peek() != '{') {
return lexValueSequence;
}
return lexStatements;
}
};
private static boolean isSpace(char r) {
return r == ' ' || r == '\t' || r == '\n' || r == '\r';
}
// isEndOfLine reports whether r is an end-of-line character.
private static boolean isEndOfLine(char r) {
return r == '\r' || r == '\n';
}
// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
private static boolean isAlphaNumeric(char r) {
return isAlpha(r) || isDigit(r);
}
// isDigit reports whether r is a digit. Note: we cannot use unicode.IsDigit()
// instead because that also classifies non-Latin digits as digits. See
// https://github.com/prometheus/prometheus/issues/939.
private static boolean isDigit(char r) {
return '0' <= r && r <= '9';
}
// isAlpha reports whether r is an alphabetic or underscore.
private static boolean isAlpha(char r) {
return r == '_' || ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z');
}
// isLabel reports whether the string can be used as label. 字母或数字都可以
public static boolean isLabel(String s) {
if (StringUtils.isEmpty(s) || !isAlpha(s.charAt(0))) {
return false;
}
char[] chars = s.toCharArray();
for (int i = 1; i < s.length(); i++) {
if (!isAlphaNumeric(chars[i])) {
return false;
}
}
return true;
}
//===getter&&setter===
public String getInput() {
return input;
}
public int getLastPos() {
return lastPos;
}
public void setSeriesDesc(boolean seriesDesc) {
this.seriesDesc = seriesDesc;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy