All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.xceptance.common.util.CsvUtilsDecode Maven / Gradle / Ivy

Go to download

XLT (Xceptance LoadTest) is an extensive load and performance test tool developed and maintained by Xceptance.

There is a newer version: 8.4.1
Show newest version
/*
 * Copyright (c) 2005-2023 Xceptance Software Technologies GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.xceptance.common.util;

import java.text.ParseException;

import com.xceptance.xlt.api.util.SimpleArrayList;
import com.xceptance.xlt.api.util.XltCharBuffer;

/**
 * The {@link CsvUtilsDecode} class provides helper methods to decode values from the CSV format.
 * This is the high performance and most efficient method. It will avoid copying data at all cost and move
 * through the cache very efficiently.
 * 
 * @author René Schwietzke
 * 
 * @since 7.0.0
 */
public final class CsvUtilsDecode
{
    /**
     * Character constant representing a comma.
     */
    private static final char COMMA = ',';

    /**
     * Character constant representing a double quote.
     */
    private static final char QUOTE_CHAR = '"';

    /**
     * Default constructor. Declared private to prevent external instantiation.
     */
    private CsvUtilsDecode()
    {
    }

    /**
     * Decodes the given CSV-encoded data record and returns the plain unquoted fields.
     * 
     * @param s
     *            the CSV-encoded data record
     * @return the plain fields
     */
    public static SimpleArrayList parse(final String s)
    {
        return parse(new SimpleArrayList<>(32), XltCharBuffer.valueOf(s), COMMA);
    }

    // our bit flags for the parser
    @SuppressWarnings("unused")
    private static final int NONE = 0;
    private static final int IN_QUOTES = 1;
    private static final int START_MODE = 2;
    @SuppressWarnings("unused")
    private static final int SEPARATOR_EXPECTED = 4;
    private static final int LAST_WAS_SEPARATOR = 8;
    private static final int JUST_LEFT_QUOTES = 16;    
    private static final int COPY_REST = 32;    
    
    /**
     * Encodes the given fields to a CSV-encoded data record using the given field separator.
     * 
     * @param list a list to append to, for memory efficiency, we hand one in instead of creating our own
     * @param src the buffer to read from
     * @param fieldSeparator the field separator to use
     * @return the CSV-encoded data record
     * @throws ParseException 
     */
    public static SimpleArrayList parse(final SimpleArrayList result, final XltCharBuffer src, final char fieldSeparator)
    {
        final int size = src.length();

        int state = START_MODE;
        int pos = 0;
        int start = 0;
        int offset = 0;
        
        Main:
        while (pos < size)
        {
            final char c = src.charAt(pos);
            
            state = state | COPY_REST;
            state = state & ~LAST_WAS_SEPARATOR;
            
            // do we have a quote?
            if (c == QUOTE_CHAR)
            {
                // our first quote because this all handled in the inner loop
                state = state | IN_QUOTES;
                
                pos++;
                
                // when we just started, we want to move the offset as well to avoid
                // too much copying
                if ((state & START_MODE) == START_MODE)
                {
                    offset++; // we also change the offset to avoid copying too much
                    start = offset;
                }
                
                // now read until we leave this state again or exhaust the buffer
                InQuotes:
                while (pos < size)
                {
                    final char c2 = src.charAt(pos);
                    
                    if (c2 == QUOTE_CHAR)
                    {
                        // this is either the end quote or a quoted quote
                        
                        // peak
                        final char peakedChar = src.peakAhead(pos + 1);
                        if (peakedChar == 0)
                        {
                            // there is nothing anymore, break here, pos is right because
                            // we just peaked and did not move the cursor
                            state = state | COPY_REST;
                            
                            break Main;
                        }
                        
                        // we have not reached the end, let's see what we got
                        if (peakedChar == QUOTE_CHAR)
                        {
                            // this is a quoted quote, preserve it and jump over
                            // it, but move the offset only by one because now
                            // everything has to be moved until we reach the end OR
                            // worse... more of these
                            src.put(offset, c2);

                            offset++;
                            pos += 2;
                            
                            continue InQuotes;
                        }
                        
                        // we have to jump over the quote, we are at the end
                        pos++;
                        
                        state = state & ~(IN_QUOTES | START_MODE);
                        state = state | JUST_LEFT_QUOTES;
                        
                        continue Main;
                    }
                    
                    // ok, no quote, so this is just text in quotes
                    if (pos != offset)
                    {
                        src.put(offset, c2);
                    }
                    pos++;
                    offset++;

                    state = state & ~START_MODE;
                }
                
                // if we got here, quotes does not close
                // deal with it in a soft way at the moment and copy the rest
                state = state | COPY_REST;
                
                break Main; 
            }
                
            if (c == fieldSeparator)
            {
                // if we just left the quotes, just reset because that is expected
                // or we already skipped a few characters
                state = state & ~(JUST_LEFT_QUOTES | COPY_REST);

                // we saw a separator and we start a new col
                state = state | (LAST_WAS_SEPARATOR | START_MODE);

                // we need the data
                result.add(src.viewFromTo(start, offset));
                
                pos++;
                offset = pos;
                start = pos;
                
                continue Main;
            }

            // this feature is not yet supported because we are in control of our data and don't have 
            // whitespaces around the separator
//            if ((state & JUST_LEFT_QUOTES) == JUST_LEFT_QUOTES)
//            {
//                // we have left quotes, but not seen a separator, so we have garbage or spaces, ignore
//                pos++;
//                
//                continue Main;
//            }
            
            // move the char up if we have to
            if (pos != offset)
            {
                src.put(offset, c);
            }
            
            pos++;
            offset++;
            
            state = state & ~START_MODE;
        }
        
        // There is a rest to copy
        if ((state & COPY_REST) == COPY_REST)
        {
            result.add(src.viewFromTo(start, offset));
        }
        else if ((state & LAST_WAS_SEPARATOR) == LAST_WAS_SEPARATOR)
        {
            // we had a separator at the end, so we have an empty string to add
            result.add(XltCharBuffer.empty());
        }
        else if (size == 0)
        {
            // the rare case if an empty string
            result.add(XltCharBuffer.empty());
        }
        
        return result;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy