io.questdb.std.GenericLexer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of questdb Show documentation
Show all versions of questdb Show documentation
QuestDB is high performance SQL time series database
/*******************************************************************************
* ___ _ ____ ____
* / _ \ _ _ ___ ___| |_| _ \| __ )
* | | | | | | |/ _ \/ __| __| | | | _ \
* | |_| | |_| | __/\__ \ |_| |_| | |_) |
* \__\_\\__,_|\___||___/\__|____/|____/
*
* Copyright (c) 2014-2019 Appsicle
* Copyright (c) 2019-2024 QuestDB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package io.questdb.std;
import io.questdb.griffin.SqlException;
import io.questdb.std.str.AbstractCharSequence;
import io.questdb.std.str.Utf16Sink;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.util.ArrayDeque;
import java.util.Comparator;
public class GenericLexer implements ImmutableIterator {
public static final LenComparator COMPARATOR = new LenComparator();
public static final CharSequenceHashSet WHITESPACE = new CharSequenceHashSet();
public static final IntHashSet WHITESPACE_CH = new IntHashSet();
private final ObjectPool csPairPool;
private final ObjectPool csPool;
private final CharSequence flyweightSequence = new InternalFloatingSequence();
private final IntStack parkedPosition = new IntStack();
private final ArrayDeque parkedUnparsed = new ArrayDeque<>();
private final IntObjHashMap> symbols = new IntObjHashMap<>();
private final ArrayDeque unparsed = new ArrayDeque<>();
private final IntStack unparsedPosition = new IntStack();
private int _hi;
private int _len;
private int _lo;
private int _pos;
private int _start;
private CharSequence content;
private CharSequence last;
private CharSequence next = null;
public GenericLexer(int poolCapacity) {
csPool = new ObjectPool<>(FloatingSequence::new, poolCapacity);
csPairPool = new ObjectPool<>(FloatingSequencePair::new, poolCapacity);
for (int i = 0, n = WHITESPACE.size(); i < n; i++) {
defineSymbol(Chars.toString(WHITESPACE.get(i)));
}
}
public static CharSequence assertNoDots(CharSequence value, int position) throws SqlException {
int len = value.length();
if (len == 1 && value.charAt(0) == '.') {
throw SqlException.position(position).put("'.' is an invalid table name");
}
for (int i = 0; i < len; i++) {
char c = value.charAt(i);
if ((c == '.' && i < len - 1 && value.charAt(i + 1) == '.')) {
throw SqlException.position(position + i).put('\'').put(c).put("' is not allowed");
}
}
return value;
}
public static CharSequence assertNoDotsAndSlashes(CharSequence value, int position) throws SqlException {
int len = value.length();
if (len == 1 && value.charAt(0) == '.') {
throw SqlException.position(position).put("'.' is an invalid table name");
}
for (int i = 0; i < len; i++) {
char c = value.charAt(i);
if (c == '/' || c == '\\' || (c == '.' && i < len - 1 && value.charAt(i + 1) == '.')) {
throw SqlException.position(position + i).put('\'').put(c).put("' is not allowed");
}
}
return value;
}
public static CharSequence immutableOf(CharSequence value) {
if (value instanceof InternalFloatingSequence) {
GenericLexer lexer = ((InternalFloatingSequence) value).getParent();
FloatingSequence that = lexer.csPool.next();
that.lo = lexer._lo;
that.hi = lexer._hi;
assert that.lo <= that.hi;
return that;
}
return value;
}
public static CharSequence unquote(CharSequence value) {
if (Chars.isQuoted(value)) {
return value.subSequence(1, value.length() - 1);
}
return immutableOf(value);
}
public void backTo(int position, CharSequence lastSeen) {
if (position < 0 || position > _len) {
throw new IndexOutOfBoundsException();
}
_pos = position;
last = lastSeen;
next = null;
}
public final void defineSymbol(String token) {
char c0 = token.charAt(0);
ObjList l;
int index = symbols.keyIndex(c0);
if (index > -1) {
l = new ObjList<>();
symbols.putAt(index, c0, l);
} else {
l = symbols.valueAtQuick(index);
}
l.add(token);
l.sort(COMPARATOR);
}
public CharSequence getContent() {
return content;
}
public int getPosition() {
return _pos;
}
public int getTokenHi() {
return _hi;
}
public void goToPosition(int position) {
assert position <= this._len;
this._pos = position;
}
@Override
public boolean hasNext() {
boolean n = next != null || hasUnparsed() || (content != null && _pos < _len);
if (!n && last != null) {
last = null;
}
return n;
}
public boolean hasUnparsed() {
return !unparsed.isEmpty();
}
public CharSequence immutableBetween(int lo, int hi) {
FloatingSequence that = csPool.next();
that.lo = lo;
that.hi = hi;
assert that.lo <= that.hi;
return that;
}
public CharSequence immutablePairOf(CharSequence value0, CharSequence value1) {
return immutablePairOf(value0, FloatingSequencePair.NO_SEPARATOR, value1);
}
public CharSequence immutablePairOf(CharSequence value0, char separator, CharSequence value1) {
FloatingSequencePair seqPair = csPairPool.next();
seqPair.cs0 = (FloatingSequence) value0;
seqPair.cs1 = (FloatingSequence) immutableOf(value1);
seqPair.sep = separator;
return seqPair;
}
public int lastTokenPosition() {
return _lo;
}
@Override
public CharSequence next() {
if (!unparsed.isEmpty()) {
this._lo = unparsedPosition.pollLast();
this._pos = unparsedPosition.pollLast();
return last = unparsed.pollLast();
}
this._lo = this._hi;
if (next != null) {
CharSequence result = next;
next = null;
return last = result;
}
this._lo = this._hi = _pos;
char term = 0;
int openTermIdx = -1;
while (_pos < _len) {
char c = content.charAt(_pos++);
CharSequence token;
switch (term) {
case 0:
switch (c) {
case '\'':
term = '\'';
openTermIdx = _pos - 1;
break;
case '"':
term = '"';
openTermIdx = _pos - 1;
break;
case '`':
term = '`';
openTermIdx = _pos - 1;
break;
default:
if ((token = token(c)) != null) {
return last = token;
} else {
_hi++;
}
break;
}
break;
case '\'':
if (c == '\'') {
_hi += 2;
if (_pos < _len && content.charAt(_pos) == '\'') {
_pos++;
} else {
return last = flyweightSequence;
}
} else {
_hi++;
}
break;
case '"':
if (c == '"') {
_hi += 2;
if (_pos < _len && content.charAt(_pos) == '"') {
_pos++;
} else {
return last = flyweightSequence;
}
} else {
_hi++;
}
break;
case '`':
if (c == '`') {
_hi += 2;
return last = flyweightSequence;
} else {
_hi++;
}
break;
default:
break;
}
}
if (openTermIdx != -1) { // dangling terms
if (_len == 1) {
_hi += 1; // emit term
} else {
if (openTermIdx == _lo) { // term is at the start
_hi = _lo + 1; // emit term
_pos = _hi; // rewind pos
} else if (openTermIdx == _len - 1) { // term is at the end, high is right on term
FloatingSequence termFs = csPool.next();
termFs.lo = _hi;
termFs.hi = _hi + 1;
next = termFs; // emit term next
} else { // term is somewhere in between
_hi = openTermIdx; // emit whatever comes before term
_pos = openTermIdx; // rewind pos
}
}
}
return last = flyweightSequence;
}
public void of(CharSequence cs) {
of(cs, 0, cs == null ? 0 : cs.length());
}
public void of(CharSequence cs, int lo, int hi) {
this.csPool.clear();
this.csPairPool.clear();
this.content = cs;
this._start = lo;
this._pos = lo;
this._len = hi;
this.next = null;
this.unparsed.clear();
this.unparsedPosition.clear();
this.last = null;
}
public CharSequence peek() {
return next;
}
public void restart() {
this.csPool.clear();
this.csPairPool.clear();
this._pos = this._start;
this.next = null;
this.unparsed.clear();
this.unparsedPosition.clear();
this.last = null;
this.parkedPosition.clear();
this.parkedUnparsed.clear();
}
public void stash() {
int count = 0;
while (!unparsed.isEmpty()) {
parkedUnparsed.push(unparsed.pop());
parkedPosition.push(unparsedPosition.pop());
parkedPosition.push(unparsedPosition.pop());
count++;
}
parkedPosition.push(getPosition());
parkedPosition.push(count);
// clear next because we create a new parsing context
next = null;
}
public void unparse(CharSequence what, int last, int pos) {
unparsed.push(what);
unparsedPosition.push(last);
unparsedPosition.push(pos);
}
public void unparseLast() {
if (last != null) {
unparsed.push(immutableOf(last));
unparsedPosition.push(lastTokenPosition());
unparsedPosition.push(getPosition());
}
}
public void unstash() {
int count = parkedPosition.pop();
_pos = parkedPosition.pop();
unparsed.clear();
unparsedPosition.clear();
while (count > 0) {
unparsed.push(parkedUnparsed.pop());
unparsedPosition.push(parkedPosition.pop()); // last
unparsedPosition.push(parkedPosition.pop()); // pos
count--;
}
// clear next because we create a new parsing context
next = null;
}
private static CharSequence findToken0(char c, CharSequence content, int _pos, int _len, IntObjHashMap> symbols) {
final int index = symbols.keyIndex(c);
return index > -1 ? null : findToken00(content, _pos, _len, symbols, index);
}
@Nullable
private static CharSequence findToken00(CharSequence content, int _pos, int _len, IntObjHashMap> symbols, int index) {
final ObjList l = symbols.valueAt(index);
for (int i = 0, sz = l.size(); i < sz; i++) {
CharSequence txt = l.getQuick(i);
int n = txt.length();
boolean match = (n - 2) < (_len - _pos);
if (match) {
for (int k = 1; k < n; k++) {
if (content.charAt(_pos + (k - 1)) != txt.charAt(k)) {
match = false;
break;
}
}
}
if (match) {
return txt;
}
}
return null;
}
private CharSequence token(char c) {
CharSequence t = findToken0(c, content, _pos, _len, symbols);
if (t != null) {
_pos = _pos + t.length() - 1;
if (_lo == _hi) {
return t;
}
next = t;
return flyweightSequence;
} else {
return null;
}
}
public static class FloatingSequencePair extends AbstractCharSequence implements Mutable {
public static final char NO_SEPARATOR = (char) 0;
FloatingSequence cs0;
FloatingSequence cs1;
char sep = NO_SEPARATOR;
@Override
public char charAt(int index) {
int cs0Len = cs0.length();
if (index < cs0Len) {
return cs0.charAt(index);
}
if (sep == NO_SEPARATOR) {
return cs1.charAt(index - cs0Len);
}
return index == cs0Len ? sep : cs1.charAt(index - cs0Len - 1);
}
@Override
public void clear() {
// no-op
}
@Override
public int length() {
return cs0.length() + cs1.length() + (sep != NO_SEPARATOR ? 1 : 0);
}
@NotNull
@Override
public String toString() {
final Utf16Sink b = Misc.getThreadLocalSink();
b.put(cs0);
if (sep != NO_SEPARATOR) {
b.put(sep);
}
b.put(cs1);
return b.toString();
}
}
public static class LenComparator implements Comparator {
@Override
public int compare(CharSequence o1, CharSequence o2) {
return o2.length() - o1.length();
}
}
public class FloatingSequence extends AbstractCharSequence implements Mutable {
int hi;
int lo;
@Override
public char charAt(int index) {
return content.charAt(lo + index);
}
@Override
public void clear() {
}
public int getHi() {
return hi;
}
public int getLo() {
return lo;
}
@Override
public int length() {
return hi - lo;
}
public void setHi(int hi) {
this.hi = hi;
}
public void setLo(int lo) {
this.lo = lo;
}
@Override
protected final CharSequence _subSequence(int start, int end) {
FloatingSequence that = csPool.next();
that.lo = lo + start;
that.hi = lo + end;
assert that.lo <= that.hi;
return that;
}
}
public class InternalFloatingSequence extends AbstractCharSequence {
@Override
public char charAt(int index) {
return content.charAt(_lo + index);
}
@Override
public int length() {
return _hi - _lo;
}
@Override
protected CharSequence _subSequence(int start, int end) {
FloatingSequence next = csPool.next();
next.lo = _lo + start;
next.hi = _lo + end;
assert next.lo <= next.hi;
return next;
}
GenericLexer getParent() {
return GenericLexer.this;
}
}
static {
WHITESPACE.add(" ");
WHITESPACE.add("\t");
WHITESPACE.add("\n");
WHITESPACE.add("\r");
WHITESPACE_CH.add(' ');
WHITESPACE_CH.add('\t');
WHITESPACE_CH.add('\n');
WHITESPACE_CH.add('\r');
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy