All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.oneandone.sushi.csv.Format Maven / Gradle / Ivy

/**
 * Copyright 1&1 Internet AG, https://github.com/1and1/
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.oneandone.sushi.csv;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.List;

/**
 * Parser and Serializer for csv file lines.  
 * 
 * Note:
 * o whitespace is always significant because it's never stripped by OOCalc 
 *   (except for digit-only values).
 * o comments are not supported because OOCalc would consider them as strings.
 * o a line has at least one cell
 * 
 * You have to implement some pre- or post-processing if you want comments of whitespace handling.
 *
 *
 * Grammar
 * 
 * line    = cell (SEP cell)* newline ;
 * cell    = values 
 *         | QUOTE values QUOTE ;
 * values  = value (VSEP value)* 
 *         | EMPTY
 *         | NULL ;
 * value   = char* ;
 * char    = QUOTE QUOTE
 *         | ESC CH
 *         | SEP                               <-- aber nur wenn drumrum quotes stehen
 *         | CH \ QUOTE \ ESC \ SEP \ VSEP  ;
 * newline = CR | LF | CR LF ;
 * 
 * Terminals as configured for the format: 
 *   QUOTE, SEP, VSEP, NEWLINE, COMMENT, NULL, EMPTY, ESC
 *   
 * Other Terminals:
 *   CH: arbitrary unicode character
 */
public class Format {
    public final boolean merged;

    /** between cells */
    private final char separator;
    
    /** between values */
    private final char valueSeparator;

    private final String empty;
    
    private final String nul;
    
    /** escape special characters */
    private final char escape;
    
    /** to quote cells */
    private final char quote;

    private final String unquoted;
    
    public Format() {
        this(false);
    }
    
    public Format(boolean merged) {
        this(merged, '\\', '"');
    }
    public Format(boolean merged, char escape, char quote) {
        this(merged, "\n", ';', '|', "EMPTY", "NULL", escape, quote, "01234567890");
    }
    
    public Format(boolean merged, String newline, char separator, char valueSeparator, 
            String empty, String nul, char escape, char quote, String unquoted) {
        this.merged = merged;
        this.separator = separator;
        this.valueSeparator = valueSeparator;
        this.empty = "EMPTY";
        this.nul = "NULL";
        this.escape = escape;
        this.quote = quote;
        this.unquoted = unquoted;
    }
    
    public Line read(String line) throws CsvLineException {
        Line result;
        Source src;
        int c;
        
        result = new Line();
        src = new Source(line);
        while (true) {
            cell(src, result);
            c = src.peek();
            if (c == Source.END) {
                return result;
            }
            if (c != separator) {
                throw new CsvLineException("separator expected");
            }
            src.eat();
        }
    }

    private void cell(Source src, Line result) throws CsvLineException {
        boolean quoted;
        
        quoted = src.peek() == quote;
        if (quoted) {
            src.eat();
        }
        values(src, quoted, result);
        if (quoted) {
            if (src.peek() != quote) {
                throw new CsvLineException("quote not closed");
            }
            src.eat();
        }
    }

    private void values(Source src, boolean quoted, Line result) throws CsvLineException {
        List values;

        if (src.eat(nul, quoted ? quote : separator)) {
            result.addNull();
            return;
        }
        values = result.add();
        if (src.eat(empty, quoted ? quote : separator)) {
            return;
        }
        while (true) {
            value(src, quoted, values);
            if (src.peek() != valueSeparator) {
                return;
            }
            src.eat();
        }
    }

    private void value(Source src, boolean quoted, List values) throws CsvLineException { 
        StringBuilder builder;
        int c;
        
        builder = new StringBuilder();
        while (true) {
            c = src.peek();
            if (c == -1) {
                break;
            } else if (c == quote) {
                if (src.peekNext() == quote) {
                    src.eat();
                    builder.append(quote);
                } else {
                    break;
                }
            } else if (c == separator) {
                if (quoted) {
                    builder.append(c);
                } else {
                    break;
                }
            } else if (c == valueSeparator) {
                break;
            } else if (c == escape) {
                src.eat();
                c = src.peek();
                if (c == Source.END) {
                    throw new CsvLineException("tailing escape character " + escape);
                }
                builder.append((char) c);
            } else {
                builder.append((char) c);
            }
            src.eat();
        }
        values.add(builder.toString());
    }
    
    //--
    
    public String write(Line line) {
        StringWriter dest;
        
        dest = new StringWriter();
        try {
            write(line, dest);
        } catch (IOException e) {
            throw new RuntimeException("unused", e);
        }
        return dest.toString();
    }
    
    public void write(Line line, Writer dest) throws IOException {
        for (int i = 0, max = line.size(); i < max; i++) {
            if (i > 0) {
                dest.write(separator);
            }
            writeCell(line.get(i), dest);
        }
        dest.write('\n');
    }
    
    private void writeCell(List values, Writer dest) throws IOException {
        boolean quoted;
        char c;
        boolean first;

        if (values == null) {
            dest.write(nul);
            return;
        }
        if (values.size() == 0) {
            dest.write(empty);
            return;
        }
        quoted = values.size() != 1 || needsQuotes(values.get(0));
        if (quoted) {
            dest.write(quote);
        }
        first = true;
        for (String value : values) {
            if (first) {
                first = false;
            } else {
                dest.write(valueSeparator);
            }
            if (value.equals(nul)) {
                dest.write(escape);
                dest.write(nul);
            } else if (value.equals(empty)) {
                dest.write(escape);
                dest.write(empty);
            } else {
                for (int i = 0, max = value.length(); i < max; i++) {
                    c = value.charAt(i);
                    if (c == quote) { 
                        dest.write(quote);
                        dest.write(quote);
                    } else if (c == separator || c == valueSeparator || c == escape) {
                        dest.write(escape);
                        dest.write(c);
                    } else {
                        dest.write(c);
                    }
                }
            }
        }
        if (quoted) {
            dest.write(quote);
        }
    }

    private boolean needsQuotes(String value) {
        for (int i = 0, max = value.length(); i < max; i++) {
            if (unquoted.indexOf(value.charAt(i)) == -1) {
                return true;
            }
        }
        return false;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy