All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.questdb.cutlass.line.LineProtoLexer Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 *     ___                  _   ____  ____
 *    / _ \ _   _  ___  ___| |_|  _ \| __ )
 *   | | | | | | |/ _ \/ __| __| | | |  _ \
 *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
 *    \__\_\\__,_|\___||___/\__|____/|____/
 *
 *  Copyright (c) 2014-2019 Appsicle
 *  Copyright (c) 2019-2020 QuestDB
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 ******************************************************************************/

package io.questdb.cutlass.line;

import io.questdb.std.*;
import io.questdb.std.str.AbstractCharSequence;
import io.questdb.std.str.AbstractCharSink;
import io.questdb.std.str.CharSink;

import java.io.Closeable;

public class LineProtoLexer implements Mutable, Closeable {

    private static final Runnable NOOP = LineProtoLexer::noop;
    private final ArrayBackedCharSink sink = new ArrayBackedCharSink();
    private final ArrayBackedCharSequence cs = new ArrayBackedCharSequence();
    private final FloatingCharSequence floatingCharSequence = new FloatingCharSequence();
    private final ObjList charHandlers = new ObjList<>();
    private int state = LineProtoParser.EVT_MEASUREMENT;
    private boolean escape = false;
    private long buffer;
    private final CharSequenceCache charSequenceCache;
    private long bufferHi;
    private long dstPos = 0;
    private long dstTop = 0;
    private boolean skipLine = false;
    private LineProtoParser parser;
    private long utf8ErrorTop;
    private long utf8ErrorPos;
    private int errorCode = 0;
    private boolean unquoted = true;

    public LineProtoLexer(int bufferSize) {
        buffer = Unsafe.malloc(bufferSize);
        bufferHi = buffer + bufferSize;
        charSequenceCache = address -> {
            floatingCharSequence.lo = buffer + Numbers.decodeHighInt(address);
            floatingCharSequence.hi = buffer + Numbers.decodeLowInt(address) - 2;
            assert floatingCharSequence.hi < bufferHi;
            assert floatingCharSequence.lo >= buffer;
            return floatingCharSequence;
        };
        populateCharHandlers();
        clear();
    }

    @Override
    public final void clear() {
        escape = false;
        dstTop = dstPos = buffer;
        state = LineProtoParser.EVT_MEASUREMENT;
        utf8ErrorTop = utf8ErrorPos = -1;
        skipLine = false;
        unquoted = true;
        errorCode = 0;
    }

    @Override
    public void close() {
        Unsafe.free(buffer, bufferHi - buffer);
    }

    /**
     * Parses line-protocol as UTF8-encoded sequence of bytes.
     *
     * @param bytesPtr byte array address
     * @param hi       high watermark for byte array address
     */
    public void parse(long bytesPtr, long hi) {
        long p = bytesPtr;

        while (p < hi) {

            final byte b = Unsafe.getUnsafe().getByte(p);

            if (skipLine) {
                doSkipLine(b);
                p++;
                continue;
            }

            if (escape) {
                dstPos -= 2;
            }

            try {
                if (b < 0) {
                    try {
                        p = utf8Decode(p, hi, b);
                    } catch (Utf8RepairContinue e) {
                        break;
                    }
                } else {
                    sink.put((char) b);
                    p++;
                }

                dstPos += 2;

                if (escape) {
                    escape = false;
                    continue;
                }

                if (b > -1) {
                    final Runnable runnable = charHandlers.getQuick(b);
                    if (runnable != NOOP) {
                        runnable.run();
                    }
                }

            } catch (LineProtoException ex) {
                skipLine = true;
                parser.onError((int) (dstPos - 2 - buffer) / 2, state, errorCode);
            }
        }
    }

    public void parseLast() {
        if (!skipLine) {
            dstPos += 2;
            try {
                onEol();
            } catch (LineProtoException e) {
                parser.onError((int) (dstPos - 2 - buffer) / 2, state, errorCode);
            }
        }
        clear();
    }

    public void withParser(LineProtoParser parser) {
        this.parser = parser;
    }

    private static void noop() {
    }

    private void chop() {
        dstTop = dstPos;
    }

    private void doSkipLine(byte b) {
        if (b == '\n' || b == '\r') {
            clear();
        }
    }

    private void fireEvent() throws LineProtoException {
        // two bytes less between these and one more byte so we don't have to use >=
        if (dstTop > dstPos - 3) {
            errorCode = LineProtoParser.ERROR_EMPTY;
            throw LineProtoException.INSTANCE;
        }
        parser.onEvent(cs, state, charSequenceCache);
        chop();
    }

    private void fireEventTransition(int evtTagName, int evtFieldName) {
        switch (state) {
            case LineProtoParser.EVT_MEASUREMENT:
            case LineProtoParser.EVT_TAG_VALUE:
                fireEvent();
                state = evtTagName;
                break;
            case LineProtoParser.EVT_FIELD_VALUE:
                fireEvent();
                state = evtFieldName;
                break;
            default:
                errorCode = LineProtoParser.ERROR_EXPECTED;
                throw LineProtoException.INSTANCE;
        }
    }

    private void fireEventTransition2() {
        switch (state) {
            case LineProtoParser.EVT_TAG_NAME:
                fireEvent();
                state = LineProtoParser.EVT_TAG_VALUE;
                break;
            case LineProtoParser.EVT_FIELD_NAME:
                fireEvent();
                state = LineProtoParser.EVT_FIELD_VALUE;
                break;
            default:
                errorCode = LineProtoParser.ERROR_EXPECTED;
                throw LineProtoException.INSTANCE;
        }
    }

    private void onComma() {
        if (unquoted) {
            fireEventTransition(LineProtoParser.EVT_TAG_NAME, LineProtoParser.EVT_FIELD_NAME);
        }
    }

    private void onEol() throws LineProtoException {
        switch (state) {
            case LineProtoParser.EVT_MEASUREMENT:
                chop();
                break;
            case LineProtoParser.EVT_TAG_VALUE:
            case LineProtoParser.EVT_FIELD_VALUE:
            case LineProtoParser.EVT_TIMESTAMP:
                fireEvent();
                parser.onLineEnd(charSequenceCache);
                clear();
                break;
            default:
                errorCode = LineProtoParser.ERROR_EXPECTED;
                throw LineProtoException.INSTANCE;
        }
    }

    private void onEquals() {
        if (unquoted) {
            fireEventTransition2();
        }
    }

    private void onEsc() {
        escape = true;
    }

    private void onQuote() {
        unquoted = !unquoted;
    }

    private void onSpace() {
        if (unquoted) {
            fireEventTransition(LineProtoParser.EVT_FIELD_NAME, LineProtoParser.EVT_TIMESTAMP);
        }
    }

    private void populateCharHandlers() {
        final Runnable eol = this::onEol;
        for (int i = 0; i <= Byte.MAX_VALUE; i++) {
            charHandlers.add(NOOP);
        }
        charHandlers.extendAndSet('"', this::onQuote);
        charHandlers.extendAndSet('\n', eol);
        charHandlers.extendAndSet('\r', eol);
        charHandlers.extendAndSet(' ', this::onSpace);
        charHandlers.extendAndSet('\\', this::onEsc);
        charHandlers.extendAndSet(',', this::onComma);
        charHandlers.extendAndSet('=', this::onEquals);
    }

    private long repairMultiByteChar(long lo, long hi, byte b) throws LineProtoException {
        int n = -1;
        do {
            // UTF8 error
            if (utf8ErrorTop == -1) {
                utf8ErrorTop = utf8ErrorPos = dstPos + 1;
            }
            // store partial byte
            dstPos = utf8ErrorPos;
            utf8ErrorPos += 1;
            sink.put((char) b);

            // try to decode partial bytes
            long errorLen = utf8ErrorPos - utf8ErrorTop;
            if (errorLen > 1) {
                dstPos = utf8ErrorTop - 1;
                n = Chars.utf8DecodeMultiByte(utf8ErrorTop, utf8ErrorPos, Unsafe.getUnsafe().getByte(utf8ErrorTop), sink);
            }

            if (n == -1 && errorLen > 3) {
                errorCode = LineProtoParser.ERROR_ENCODING;
                throw LineProtoException.INSTANCE;
            }

            if (n == -1 && ++lo < hi) {
                b = Unsafe.getUnsafe().getByte(lo);
            } else {
                break;
            }
        } while (true);

        // we can only be in error when we ran out of bytes to read
        // in which case we return array pointer to original position and exit method
        dstPos = utf8ErrorTop - 1;

        if (n > 0) {
            // if we are successful, reset error pointers
            utf8ErrorTop = utf8ErrorPos = -1;
            // bump pos by one more byte in addition to what we may have incremented in the loop
            return lo + 1;
        }
        throw Utf8RepairContinue.INSTANCE;
    }

    private long utf8Decode(long lo, long hi, byte b) throws LineProtoException {
        if (utf8ErrorPos > -1) {
            return repairMultiByteChar(lo, hi, b);
        }

        int n = Chars.utf8DecodeMultiByte(lo, hi, b, sink);
        if (n == -1) {
            return repairMultiByteChar(lo, hi, b);
        } else {
            return lo + n;
        }
    }

    private static class FloatingCharSequence extends AbstractCharSequence {
        long lo, hi;

        @Override
        public int length() {
            return (int) (hi - lo) / 2;
        }

        @Override
        public char charAt(int index) {
            return Unsafe.getUnsafe().getChar(lo + index * 2L);
        }
    }

    private class ArrayBackedCharSink extends AbstractCharSink {

        @Override
        public CharSink put(char c) {
            if (dstPos == bufferHi) {
                extend();
            }
            Unsafe.getUnsafe().putChar(dstPos, c);
            return this;
        }

        private void extend() {
            int capacity = ((int) (bufferHi - buffer) * 2);
            if (capacity < 0) {
                // can't realistically reach this in test :(
                throw LineProtoException.INSTANCE;
            }
            long buf = Unsafe.malloc(capacity);
            Unsafe.getUnsafe().copyMemory(buffer, buf, (dstPos - buffer));
            Unsafe.free(buffer, bufferHi - buffer);

            long offset = dstTop - buffer;
            bufferHi = buf + capacity;
            buffer = buf;
            dstPos = buf + offset + (dstPos - dstTop);
            dstTop = buf + offset;
        }

        @Override
        public CharSink put(char[] chars, int start, int len) {
            throw new UnsupportedOperationException();
        }
    }

    private class ArrayBackedCharSequence extends AbstractCharSequence implements CachedCharSequence {

        @Override
        public long getCacheAddress() {
            return Numbers.encodeLowHighInts((int) (dstPos - buffer), (int) (dstTop - buffer));
        }

        @Override
        public int length() {
            return (int) ((dstPos - dstTop) / 2 - 1);
        }

        @Override
        public char charAt(int index) {
            return Unsafe.getUnsafe().getChar(dstTop + index * 2L);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy