
io.questdb.cutlass.line.tcp.LineTcpParser Maven / Gradle / Ivy
/*******************************************************************************
* ___ _ ____ ____
* / _ \ _ _ ___ ___| |_| _ \| __ )
* | | | | | | |/ _ \/ __| __| | | | _ \
* | |_| | |_| | __/\__ \ |_| |_| | |_) |
* \__\_\\__,_|\___||___/\__|____/|____/
*
* Copyright (c) 2014-2019 Appsicle
* Copyright (c) 2019-2022 QuestDB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package io.questdb.cutlass.line.tcp;
import io.questdb.griffin.SqlKeywords;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.std.Numbers;
import io.questdb.std.NumericException;
import io.questdb.std.ObjList;
import io.questdb.std.Unsafe;
import io.questdb.std.str.DirectByteCharSequence;
public class LineTcpParser {
public static final long NULL_TIMESTAMP = Numbers.LONG_NaN;
public static final byte ENTITY_TYPE_NULL = 0;
public static final byte ENTITY_TYPE_TAG = 1;
public static final byte ENTITY_TYPE_FLOAT = 2;
public static final byte ENTITY_TYPE_INTEGER = 3;
public static final byte ENTITY_TYPE_STRING = 4;
public static final byte ENTITY_TYPE_SYMBOL = 5;
public static final byte ENTITY_TYPE_BOOLEAN = 6;
public static final byte ENTITY_TYPE_LONG256 = 7;
public static final byte ENTITY_TYPE_CACHED_TAG = 8;
public static final byte ENTITY_TYPE_GEOBYTE = 9;
public static final byte ENTITY_TYPE_GEOSHORT = 10;
public static final byte ENTITY_TYPE_GEOINT = 11;
public static final byte ENTITY_TYPE_GEOLONG = 12;
public static final byte ENTITY_TYPE_TIMESTAMP = 13;
public static final int N_ENTITY_TYPES = ENTITY_TYPE_TIMESTAMP + 1;
public static final byte ENTITY_TYPE_LONG = 14;
public static final byte ENTITY_TYPE_DOUBLE = 15;
public static final byte ENTITY_TYPE_SHORT = 16;
public static final byte ENTITY_TYPE_BYTE = 17;
public static final byte ENTITY_TYPE_DATE = 18;
public static final byte ENTITY_TYPE_CHAR = 19;
public static final int N_MAPPED_ENTITY_TYPES = ENTITY_TYPE_CHAR + 1;
static final byte ENTITY_TYPE_NONE = (byte) 0xff; // visible for testing
private static final Log LOG = LogFactory.getLog(LineTcpParser.class);
private final DirectByteCharSequence measurementName = new DirectByteCharSequence();
private final DirectByteCharSequence charSeq = new DirectByteCharSequence();
private final ObjList entityCache = new ObjList<>();
private final EntityHandler entityEndOfLineHandler = this::expectEndOfLine;
private final boolean stringAsTagSupported;
private final boolean symbolAsFieldSupported;
private long bufAt;
private long entityLo;
private boolean tagsComplete;
private boolean tagStartsWithQuote;
private int nEscapedChars;
private boolean isQuotedFieldValue;
private int nEntities;
private ProtoEntity currentEntity;
private ErrorCode errorCode;
private EntityHandler entityHandler;
private long timestamp;
private final EntityHandler entityTimestampHandler = this::expectTimestamp;
private int nQuoteCharacters;
private final EntityHandler entityTableHandler = this::expectTableName;
private boolean scape;
private final EntityHandler entityValueHandler = this::expectEntityValue;
private boolean nextValueCanBeOpenQuote;
private final EntityHandler entityNameHandler = this::expectEntityName;
private boolean hasNonAscii;
public LineTcpParser(boolean stringAsTagSupported, boolean symbolAsFieldSupported) {
this.stringAsTagSupported = stringAsTagSupported;
this.symbolAsFieldSupported = symbolAsFieldSupported;
}
public long getBufferAddress() {
return bufAt;
}
public ProtoEntity getEntity(int n) {
assert n < nEntities;
return entityCache.get(n);
}
public int getEntityCount() {
return nEntities;
}
public ErrorCode getErrorCode() {
return errorCode;
}
public DirectByteCharSequence getMeasurementName() {
return measurementName;
}
public long getTimestamp() {
return timestamp;
}
public boolean hasNonAsciiChars() {
return hasNonAscii;
}
public boolean hasTimestamp() {
return timestamp != NULL_TIMESTAMP;
}
public LineTcpParser of(long bufLo) {
this.bufAt = bufLo - 1;
startNextMeasurement();
return this;
}
public ParseResult parseMeasurement(long bufHi) {
assert bufAt != 0 && bufHi >= bufAt;
// We can resume from random place of the line message
// the class member variables should resume byte by byte parsing from the last place
// processing stopped.
if (nQuoteCharacters == 1 && tagsComplete && entityHandler == entityValueHandler) {
// when nQuoteCharacters it means that parsing of quoted value has started.
// continue parsing quoted value
if (!prepareQuotedEntity(entityLo, bufHi)) {
// quoted value parsing did not reach the end
if (errorCode == ErrorCode.INVALID_FIELD_VALUE_STR_UNDERFLOW) {
// because buffer exhausted
return ParseResult.BUFFER_UNDERFLOW;
}
// because it reached EOL or another error
return ParseResult.ERROR;
}
nQuoteCharacters = 0;
bufAt++;
}
// Main parsing loop
while (bufAt < bufHi) {
// take the byte
byte b = Unsafe.getUnsafe().getByte(bufAt);
hasNonAscii |= b < 0;
boolean endOfLine = false;
boolean appendByte = false;
switch (b) {
case '\n':
case '\r':
endOfLine = true;
b = '\n';
case '=':
case ',':
case ' ':
isQuotedFieldValue = false;
if (!entityHandler.completeEntity(b, bufHi)) {
// parse of key or value is unsuccessful
if (errorCode == ErrorCode.EMPTY_LINE) {
// An empty line
bufAt++;
entityLo = bufAt;
break;
}
if (errorCode == ErrorCode.INVALID_FIELD_VALUE_STR_UNDERFLOW) {
return ParseResult.BUFFER_UNDERFLOW;
}
return ParseResult.ERROR;
}
if (endOfLine) {
// EOL reached, time to return
if (nEntities > 0) {
entityHandler = entityEndOfLineHandler;
return ParseResult.MEASUREMENT_COMPLETE;
}
errorCode = ErrorCode.NO_FIELDS;
return ParseResult.ERROR;
}
// skip the separator
bufAt++;
if (!isQuotedFieldValue) {
// reset few indicators
nEscapedChars = 0;
// start next value from here
entityLo = bufAt;
}
break;
case '\\':
// escape next character
// look forward, skip the slash
if ((bufAt + 1) >= bufHi) {
return ParseResult.BUFFER_UNDERFLOW;
}
nEscapedChars++;
bufAt++;
b = Unsafe.getUnsafe().getByte(bufAt);
if (b == '\\' && (entityHandler != entityValueHandler)) {
return getError();
}
hasNonAscii |= b < 0;
appendByte = true;
break;
case '"':
if (nextValueCanBeOpenQuote && ++nQuoteCharacters == 1) {
// This means that the processing resumed from "
// and it's allowed to start quoted value at this point
bufAt += 1;
// parse quoted value
if (!prepareQuotedEntity(bufAt - 1, bufHi)) {
// parsing not successful
if (errorCode == ErrorCode.INVALID_FIELD_VALUE_STR_UNDERFLOW) {
// need more data
return ParseResult.BUFFER_UNDERFLOW;
}
// invalid character sequence in the quoted value or EOL found
return ParseResult.ERROR;
}
errorCode = ErrorCode.NONE;
nQuoteCharacters = 0;
bufAt += 1;
break;
} else if (isQuotedFieldValue) {
return getError();
} else if (entityLo == bufAt) {
tagStartsWithQuote = true;
}
default:
appendByte = true;
nextValueCanBeOpenQuote = false;
break;
case '\0':
LOG.info().$("could not parse [byte=\\0]").$();
return getError();
case '/':
if (entityHandler != entityValueHandler) {
LOG.info().$("could not parse [byte=/]").$();
return getError();
}
appendByte = true;
nextValueCanBeOpenQuote = false;
break;
}
if (appendByte) {
// If there is escaped character, like \" or \\ then the escape slash has to be excluded
// from the result key / value.
// shift copy current byte back
if (nEscapedChars > 0) {
Unsafe.getUnsafe().putByte(bufAt - nEscapedChars, b);
}
bufAt++;
}
}
return ParseResult.BUFFER_UNDERFLOW;
}
public void shl(long shl) {
bufAt -= shl;
entityLo -= shl;
measurementName.shl(shl);
charSeq.shl(shl);
for (int i = 0; i < nEntities; i++) {
entityCache.getQuick(i).shl(shl);
}
}
public ParseResult skipMeasurement(long bufHi) {
assert bufAt != 0 && bufHi >= bufAt;
while (bufAt < bufHi) {
byte b = Unsafe.getUnsafe().getByte(bufAt);
if (b == (byte) '\n' || b == (byte) '\r') {
return ParseResult.MEASUREMENT_COMPLETE;
}
bufAt++;
}
return ParseResult.BUFFER_UNDERFLOW;
}
public void startNextMeasurement() {
bufAt++;
nEscapedChars = 0;
isQuotedFieldValue = false;
entityLo = bufAt;
tagsComplete = false;
tagStartsWithQuote = false;
nEntities = 0;
currentEntity = null;
entityHandler = entityTableHandler;
timestamp = NULL_TIMESTAMP;
errorCode = ErrorCode.NONE;
nQuoteCharacters = 0;
scape = false;
nextValueCanBeOpenQuote = false;
hasNonAscii = false;
}
private boolean expectEndOfLine(byte endOfEntityByte, long bufHi) {
assert endOfEntityByte == '\n';
return true;
}
private boolean expectEntityName(byte endOfEntityByte, long bufHi) {
if (endOfEntityByte == (byte) '=') {
if (bufAt - entityLo - nEscapedChars == 0) { // no tag/field name
errorCode = tagsComplete ? ErrorCode.INCOMPLETE_FIELD : ErrorCode.INCOMPLETE_TAG;
return false;
}
if (entityCache.size() <= nEntities) {
currentEntity = new ProtoEntity();
entityCache.add(currentEntity);
} else {
currentEntity = entityCache.get(nEntities);
currentEntity.clear();
}
nEntities++;
currentEntity.setName();
entityHandler = entityValueHandler;
if (tagsComplete) {
if (bufAt + 3 < bufHi) { // peek oncoming value's 1st byte, only caring for valid strings (2 quotes plus a follow-up byte)
long candidateQuoteIdx = bufAt + 1;
byte b = Unsafe.getUnsafe().getByte(candidateQuoteIdx);
if (b == (byte) '"') {
nEscapedChars = 0;
nQuoteCharacters++;
bufAt += 2;
return prepareQuotedEntity(candidateQuoteIdx, bufHi);// go to first byte of the string, past the '"'
} else {
nextValueCanBeOpenQuote = false;
}
} else {
nextValueCanBeOpenQuote = true;
}
}
return true;
}
boolean emptyEntity = bufAt == entityLo;
if (emptyEntity) {
if (endOfEntityByte == (byte) ' ') {
if (tagsComplete) {
entityHandler = entityTimestampHandler;
} else {
tagsComplete = true;
}
return true;
}
if (endOfEntityByte == (byte) '\n') {
return true;
}
} else if (tagsComplete && (endOfEntityByte == '\n' || endOfEntityByte == '\r')) {
if (currentEntity != null && currentEntity.getType() == ENTITY_TYPE_TAG) {
// One token after last tag, and no fields
// This must be the timestamp
return expectTimestamp(endOfEntityByte, bufHi);
}
}
if (tagsComplete) {
errorCode = ErrorCode.INCOMPLETE_FIELD;
} else {
errorCode = ErrorCode.INCOMPLETE_TAG;
}
return false;
}
private boolean expectEntityValue(byte endOfEntityByte, long bufHi) {
boolean endOfSet = endOfEntityByte == (byte) ' ';
if (endOfSet || endOfEntityByte == (byte) ',' || endOfEntityByte == (byte) '\n') {
if (currentEntity.setValue()) {
if (endOfSet) {
if (tagsComplete) {
entityHandler = entityTimestampHandler;
} else {
entityHandler = entityNameHandler;
tagsComplete = true;
}
} else {
entityHandler = entityNameHandler;
}
return true;
}
errorCode = tagsComplete ? ErrorCode.INVALID_FIELD_VALUE : ErrorCode.INVALID_TAG_VALUE;
return false;
}
errorCode = ErrorCode.INVALID_FIELD_SEPARATOR;
return false;
}
private boolean expectTableName(byte endOfEntityByte, long bufHi) {
tagsComplete = endOfEntityByte == (byte) ' ';
if (endOfEntityByte == (byte) ',' || tagsComplete) {
long hi = bufAt - nEscapedChars;
measurementName.of(entityLo, hi);
entityHandler = entityNameHandler;
return true;
}
if (entityLo == bufAt) {
errorCode = ErrorCode.EMPTY_LINE;
} else {
errorCode = ErrorCode.NO_FIELDS;
}
return false;
}
private boolean expectTimestamp(byte endOfEntityByte, long bufHi) {
try {
if (endOfEntityByte == (byte) '\n') {
if (entityLo < bufAt - nEscapedChars) {
timestamp = Numbers.parseLong(charSeq.of(entityLo, bufAt - nEscapedChars));
}
entityHandler = null;
return true;
}
errorCode = ErrorCode.INVALID_FIELD_SEPARATOR;
return false;
} catch (NumericException ex) {
errorCode = ErrorCode.INVALID_TIMESTAMP;
return false;
}
}
private ParseResult getError() {
if (entityHandler == entityNameHandler) {
errorCode = ErrorCode.INVALID_COLUMN_NAME;
} else if (entityHandler == entityTableHandler) {
errorCode = ErrorCode.INVALID_TABLE_NAME;
} else if (entityHandler == entityValueHandler) {
errorCode = ErrorCode.INVALID_FIELD_VALUE;
}
return ParseResult.ERROR;
}
private boolean prepareQuotedEntity(long openQuoteIdx, long bufHi) {
// the byte at openQuoteIdx (bufAt + 1) is '"', from here it can only be
// the start of a string value. Get it ready for immediate consumption by
// the next completeEntity call, moving butAt to the next '"'
entityLo = openQuoteIdx; // from the quote
boolean copyByte;
while (bufAt < bufHi) { // consume until the next quote, '\n', or eof
byte b = Unsafe.getUnsafe().getByte(bufAt);
copyByte = true;
hasNonAscii |= b < 0;
switch (b) {
case (byte) '\\':
if (!scape) {
nEscapedChars++;
copyByte = false;
}
scape = !scape;
break;
case (byte) '"':
if (!scape) {
isQuotedFieldValue = true;
nQuoteCharacters--;
if (nEscapedChars > 0) {
Unsafe.getUnsafe().putByte(bufAt - nEscapedChars, b);
}
return true;
}
scape = false;
break;
case (byte) '\n':
if (!scape) {
errorCode = ErrorCode.INVALID_FIELD_VALUE;
return false; // missing tail quote
}
scape = false;
break;
default:
scape = false;
break;
}
nextValueCanBeOpenQuote = false;
if (copyByte && nEscapedChars > 0) {
Unsafe.getUnsafe().putByte(bufAt - nEscapedChars, b);
}
bufAt++;
}
errorCode = ErrorCode.INVALID_FIELD_VALUE_STR_UNDERFLOW;
return false; // missing tail quote as the string extends past the max allowed size
}
public enum ParseResult {
MEASUREMENT_COMPLETE, BUFFER_UNDERFLOW, ERROR
}
public enum ErrorCode {
EMPTY_LINE,
NO_FIELDS,
INCOMPLETE_TAG,
INCOMPLETE_FIELD,
INVALID_FIELD_SEPARATOR,
INVALID_TIMESTAMP,
INVALID_TAG_VALUE,
INVALID_FIELD_VALUE,
INVALID_FIELD_VALUE_STR_UNDERFLOW,
INVALID_TABLE_NAME,
INVALID_COLUMN_NAME,
NONE
}
@FunctionalInterface
private interface EntityHandler {
boolean completeEntity(byte endOfEntityByte, long bufHi);
}
public class ProtoEntity {
private final DirectByteCharSequence name = new DirectByteCharSequence();
private final DirectByteCharSequence value = new DirectByteCharSequence();
private byte type = ENTITY_TYPE_NONE;
private long longValue;
private boolean booleanValue;
private double floatValue;
public boolean getBooleanValue() {
return booleanValue;
}
public double getFloatValue() {
return floatValue;
}
public long getLongValue() {
return longValue;
}
public DirectByteCharSequence getName() {
return name;
}
public byte getType() {
return type;
}
public DirectByteCharSequence getValue() {
return value;
}
public void shl(long shl) {
name.shl(shl);
value.shl(shl);
}
private void clear() {
type = ENTITY_TYPE_NONE;
}
private boolean parse(byte last, int valueLen) {
switch (last) {
case 'i':
if (valueLen > 1 && value.charAt(1) != 'x') {
return parseLong(ENTITY_TYPE_INTEGER);
}
if (valueLen > 3 && value.charAt(0) == '0' && (value.charAt(1) | 32) == 'x') {
value.decHi(); // remove 'i'
type = ENTITY_TYPE_LONG256;
return true;
}
type = ENTITY_TYPE_SYMBOL;
return true;
case 't':
if (valueLen > 1) {
return parseLong(ENTITY_TYPE_TIMESTAMP);
}
// fall through
case 'T':
case 'f':
case 'F':
case 'e':
case 'E':
// t
// T
// f
// F
// tru(e)
// fals(e)
if (valueLen == 1) {
if (last != 'e') {
booleanValue = (last | 32) == 't';
type = ENTITY_TYPE_BOOLEAN;
} else {
type = ENTITY_TYPE_SYMBOL;
}
} else {
charSeq.of(value.getLo(), value.getHi());
if (SqlKeywords.isTrueKeyword(charSeq)) {
booleanValue = true;
type = ENTITY_TYPE_BOOLEAN;
} else if (SqlKeywords.isFalseKeyword(charSeq)) {
booleanValue = false;
type = ENTITY_TYPE_BOOLEAN;
} else {
type = ENTITY_TYPE_SYMBOL;
}
}
return true;
case '"': {
byte b = value.byteAt(0);
if (valueLen > 1 && b == '"') {
value.squeeze();
type = ENTITY_TYPE_STRING;
return true;
}
type = ENTITY_TYPE_SYMBOL;
return true;
}
default:
try {
floatValue = Numbers.parseDouble(value);
type = ENTITY_TYPE_FLOAT;
} catch (NumericException ex) {
type = ENTITY_TYPE_SYMBOL;
}
return true;
}
}
private boolean parseLong(byte entityType) {
try {
charSeq.of(value.getLo(), value.getHi() - 1);
longValue = Numbers.parseLong(charSeq);
value.decHi(); // remove 'i'
type = entityType;
} catch (NumericException notANumber) {
type = ENTITY_TYPE_SYMBOL;
}
return true;
}
private void setName() {
name.of(entityLo, bufAt - nEscapedChars);
}
private boolean setValue() {
assert type == ENTITY_TYPE_NONE;
long bufHi = bufAt - nEscapedChars;
int valueLen = (int) (bufHi - entityLo);
value.of(entityLo, bufHi);
if (tagsComplete) {
if (valueLen > 0) {
byte lastByte = value.byteAt(valueLen - 1);
return parse(lastByte, valueLen) && (symbolAsFieldSupported || type != ENTITY_TYPE_SYMBOL);
}
type = ENTITY_TYPE_NULL;
return true;
}
type = ENTITY_TYPE_TAG;
return !tagStartsWithQuote || valueLen < 2 || value.byteAt(valueLen - 1) != '"' || stringAsTagSupported;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy