All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.openhft.chronicle.wire.CSVWire Maven / Gradle / Ivy

/*
 * Copyright 2016-2020 chronicle.software
 *
 *       https://chronicle.software
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.openhft.chronicle.wire;

import net.openhft.chronicle.bytes.*;
import net.openhft.chronicle.core.io.InvalidMarshallableException;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

/**
 * Represents a CSV (Comma Separated Values) based wire format.
 * It extends the generic TextWire format to specifically handle the parsing and representation
 * of data in the CSV format. This class provides functionalities for reading from a CSV
 * formatted byte source and handling the common aspects of this format like escaping, headers, etc.
 */
public class CSVWire extends TextWire {

    // A thread-local definition to manage stopping characters that handle escaping in CSV.
    private static final ThreadLocal ESCAPED_END_OF_TEXT = ThreadLocal.withInitial(
            StopCharTesters.COMMA_STOP::escaping);

    // A list to manage headers in the CSV file.
    private final List header = new ArrayList<>();

    /**
     * Constructs a new CSVWire instance from a given byte source and a flag indicating
     * the use of 8-bit characters. Also, reads and initializes the CSV headers.
     *
     * @param bytes The byte source containing CSV data.
     * @param use8bit A flag indicating whether to use 8-bit characters or not.
     */
    @SuppressWarnings("rawtypes")
    public CSVWire(@NotNull Bytes bytes, boolean use8bit) {
        super(bytes, use8bit);
        while (lineStart == 0) {
            long start = bytes.readPosition();
            header.add(valueIn.text());
            if (bytes.readPosition() == start)
                break;
        }
    }

    /**
     * Constructs a new CSVWire instance from a given byte source with default character set.
     *
     * @param bytes The byte source containing CSV data.
     */
    @SuppressWarnings("rawtypes")
    public CSVWire(@NotNull Bytes bytes) {
        this(bytes, false);
    }

    /**
     * Constructs a new CSVWire instance by reading data from a specified file.
     * Uses 8-bit characters by default.
     *
     * @param name The name of the file to read CSV data from.
     * @return A new instance of CSVWire populated with data from the specified file.
     * @throws IOException If any I/O error occurs while reading the file.
     */
    @NotNull
    public static CSVWire fromFile(String name) throws IOException {
        return new CSVWire(BytesUtil.readFile(name), true);
    }

    /**
     * Constructs a new CSVWire instance from a provided string text.
     *
     * @param text The string containing CSV data.
     * @return A new instance of CSVWire populated with data from the provided text.
     */
    @NotNull
    public static CSVWire from(@NotNull String text) {
        return new CSVWire(Bytes.from(text));
    }

    /**
     * Retrieves and resets the CSV escaping mechanism that dictates
     * when to stop during text extraction.
     *
     * @return An instance of StopCharTester adjusted for CSV escaping rules.
     */
    @NotNull
    static StopCharTester getEscapingCSVEndOfText() {
        StopCharTester escaping = ESCAPED_END_OF_TEXT.get();
        // reset the tester.
        escaping.isStopChar(' ');
        return escaping;
    }

    @NotNull
    @Override
    protected CSVValueOut createValueOut() {
        return new CSVValueOut();
    }

    @NotNull
    @Override
    protected TextValueIn createValueIn() {
        return new CSVValueIn();
    }

    @Override
    @NotNull
    public StringBuilder readField(@NotNull StringBuilder sb) {
        valueIn.text(sb);
        return sb;
    }

    /**
     * Consumes padding and whitespace at the beginning of the data source. This method is
     * essential to handle any comments (lines starting with '#') and whitespace before
     * the actual data starts in the CSV content.
     */
    public void consumePaddingStart() {
        for (; ; ) {
            int codePoint = peekCode();
            // Checks if the code point represents a comment.
            if (codePoint == '#') {
                // If so, skip characters until the end of the line.
                while (readCode() >= ' ') ;
                continue;
            }
            if (Character.isWhitespace(codePoint)) {
                // Handle newline or carriage return; set lineStart to the next position.
                if (codePoint == '\n' || codePoint == '\r')
                    this.lineStart = bytes.readPosition() + 1;
                // Skips the current whitespace character.
                bytes.readSkip(1);
            } else {
                // If the code point is neither a comment nor whitespace, exit the loop.
                break;
            }
        }
    }

    @Override
    public void consumePadding() {
        for (; ; ) {
            int codePoint = peekCode();
            if (Character.isWhitespace(codePoint) && codePoint >= ' ') {
                bytes.readSkip(1);
            } else {
                break;
            }
        }
    }

    @NotNull
    @Override
    public ValueIn read(@NotNull WireKey key) {
        return valueIn;
    }

    @NotNull
    @Override
    public ValueIn read(@NotNull StringBuilder name) {
        consumePadding();
        readField(name);
        return valueIn;
    }

    @NotNull
    @Override
    public Wire readComment(@NotNull StringBuilder s) {
        s.setLength(0);
        return this;
    }

    /**
     * Represents the value output functionality specific to the CSV format.
     * It extends the YamlValueOut class to handle specific behaviors associated with
     * writing values in CSV. This includes certain restrictions, such as not supporting
     * type literals and serializable objects in CSV format.
     *
         */
    class CSVValueOut extends YamlValueOut {
        @NotNull
        @Override
        public CSVWire typeLiteral(@Nullable CharSequence type) {
            if (type == null)
                return (CSVWire) nu11();
            throw new UnsupportedOperationException("Type literals not supported in CSV, cannot write " + type);
        }

        @NotNull
        @Override
        public CSVWire marshallable(@NotNull Serializable object) {
            throw new UnsupportedOperationException("Serializable objects not supported in CSV, cannot write " + object);
        }
    }

    /**
     * Represents the value input functionality specific to the CSV format.
     * It extends the TextValueIn class to handle specific behaviors associated with
     * reading values from CSV. This includes handling CSV specific escape sequences and delimiters.
     */
    class CSVValueIn extends TextValueIn {

        @Override
        public boolean hasNext() {
            consumePaddingStart();
            return bytes.readRemaining() > 0;
        }

        @Override
        @Nullable  T textTo0(@NotNull T a) {
            consumePadding();
            int ch = peekCode();

            switch (ch) {
                case '"': {
                    bytes.readSkip(1);
                    if (use8bit)
                        bytes.parse8bit(a, getEscapingQuotes());
                    else
                        bytes.parseUtf8(a, getEscapingQuotes());
                    unescape(a);
                    int code = peekCode();
                    if (code == '"')
                        readCode();
                    code = peekCode();
                    if (code == ',')
                        readCode();
                    break;

                }
                case '\'': {
                    bytes.readSkip(1);
                    if (use8bit)
                        bytes.parse8bit(a, TextWire.getEscapingSingleQuotes());
                    else
                        bytes.parseUtf8(a, TextWire.getEscapingSingleQuotes());
                    unescape(a);
                    int code = peekCode();
                    if (code == '\'')
                        readCode();
                    break;

                }
                default: {
                    if (bytes.readRemaining() > 0) {
                        if (a instanceof Bytes || use8bit)
                            bytes.parse8bit(a, getEscapingCSVEndOfText());
                        else
                            bytes.parseUtf8(a, getEscapingCSVEndOfText());

                    } else {
                        AppendableUtil.setLength(a, 0);
                    }
                    // trim trailing spaces.
                    while (a.length() > 0)
                        if (Character.isWhitespace(a.charAt(a.length() - 1)))
                            AppendableUtil.setLength(a, a.length() - 1);
                        else
                            break;
                    break;
                }
            }

            int prev = peekBack();
            if (END_CHARS.get(prev))
                bytes.readSkip(-1);
            return a;
        }

        @Override
        protected long readLengthMarshallable() {
            long start = bytes.readPosition();
            try {
                consumePadding();
                for (; ; ) {
                    int code = readCode();
                    switch (code) {
                        case '\r':
                        case '\n':
                        case 0:
                        case -1:
                            return bytes.readPosition() - start - 1;
                    }
                }
            } finally {
                bytes.readPosition(start);
            }
        }

        @Override
        public boolean hasNextSequenceItem() {
            consumePadding();
            int ch = peekCode();
            if (ch == ',') {
                bytes.readSkip(1);
                return true;
            }
            return ch > 0 && ch != ']';
        }

        @Override
        public boolean marshallable(@NotNull ReadMarshallable object) throws InvalidMarshallableException {
            if (isNull())
                return false;
            pushState();
            final long len = readLengthMarshallable();

            final long limit = bytes.readLimit();
            final long position = bytes.readPosition();

            final long newLimit = position + len;
            try {
                // ensure that you can read past the end of this marshable object

                bytes.readLimit(newLimit);
                consumePadding();
                object.readMarshallable(CSVWire.this);
            } finally {
                bytes.readLimit(limit);
                bytes.readPosition(newLimit);
                popState();
            }

            consumePadding();
            return true;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy