All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.internal.csv.CSVPrinter Maven / Gradle / Ivy

There is a newer version: 9.6.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.internal.csv;

import java.io.IOException;
import java.io.Writer;

/**
 * Print values as a comma separated list.
 */
public class CSVPrinter {

  /** The place that the values get written. */
  protected final Writer out;
  protected final CSVStrategy strategy;

  /** True if we just began a new line. */
  protected boolean newLine = true;

  protected char[] buf = new char[0];  // temporary buffer

  /**
   * Create a printer that will print values to the given
   * stream following the CSVStrategy.
   *
   * Currently, only a pure encapsulation strategy or a pure escaping strategy
   * is supported.  Hybrid strategies (encapsulation and escaping with a different character) are not supported.
   *
   * @param out stream to which to print.
   * @param strategy describes the CSV variation.
   */
  public CSVPrinter(Writer out, CSVStrategy strategy) {
    this.out = out;
    this.strategy = strategy==null ? CSVStrategy.DEFAULT_STRATEGY : strategy;
  }
  
  // ======================================================
  //  printing implementation
  // ======================================================

  /**
   * Output a blank line
   */
  public void println() throws IOException {
    out.write(strategy.getPrinterNewline());
    newLine = true;
  }

  public void flush() throws IOException {
    out.flush();
  }


  /**
   * Print a single line of comma separated values.
   * The values will be quoted if needed.  Quotes and
   * newLine characters will be escaped.
   *
   * @param values values to be outputted.
   */
  public void println(String[] values) throws IOException {
    for (int i = 0; i < values.length; i++) {
      print(values[i]);
    }
    println();
  }


  /**
   * Put a comment among the comma separated values.
   * Comments will always begin on a new line and occupy a
   * least one full line. The character specified to star
   * comments and a space will be inserted at the beginning of
   * each new line in the comment.
   *
   * @param comment the comment to output
   */
  public void printlnComment(String comment) throws IOException {
    if(this.strategy.isCommentingDisabled()) {
        return;
    }
    if (!newLine) {
      println();
    }
    out.write(this.strategy.getCommentStart());
    out.write(' ');
    for (int i = 0; i < comment.length(); i++) {
      char c = comment.charAt(i);
      switch (c) {
        case '\r' :
          if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
            i++;
          }
          // break intentionally excluded.
        case '\n' :
          println();
          out.write(this.strategy.getCommentStart());
          out.write(' ');
          break;
        default :
          out.write(c);
          break;
      }
    }
    println();
  }


  public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException {
    if (!checkForEscape) {
      printSep();
      out.write(value, offset, len);
      return;
    }

    if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) {
      printAndEncapsulate(value, offset, len);
    } else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) {
      printAndEscape(value, offset, len);
    } else {
      printSep();
      out.write(value, offset, len);
    }
  }

  void printSep() throws IOException {
    if (newLine) {
      newLine = false;
    } else {
      out.write(this.strategy.getDelimiter());
    }
  }

  void printAndEscape(char[] value, int offset, int len) throws IOException {
    int start = offset;
    int pos = offset;
    int end = offset + len;

    printSep();

    char delim = this.strategy.getDelimiter();
    char escape = this.strategy.getEscape();

    while (pos < end) {
      char c = value[pos];
      if (c == '\r' || c=='\n' || c==delim || c==escape) {
        // write out segment up until this char
        int l = pos-start;
        if (l>0) {
          out.write(value, start, l);
        }
        if (c=='\n') c='n';
        else if (c=='\r') c='r';

        out.write(escape);
        out.write(c);

        start = pos+1; // start on the current char after this one
      }

      pos++;
    }

    // write last segment
    int l = pos-start;
    if (l>0) {
      out.write(value, start, l);      
    }
  }

  void printAndEncapsulate(char[] value, int offset, int len) throws IOException {
    boolean first = newLine;  // is this the first value on this line?
    boolean quote = false;
    int start = offset;
    int pos = offset;
    int end = offset + len;

    printSep();    

    char delim = this.strategy.getDelimiter();
    char encapsulator = this.strategy.getEncapsulator();

    if (len <= 0) {
      // always quote an empty token that is the first
      // on the line, as it may be the only thing on the
      // line. If it were not quoted in that case,
      // an empty line has no tokens.
      if (first) {
        quote = true;
      }
    } else {
      char c = value[pos];

      // Hmmm, where did this rule come from?
      if (first
          && (c < '0'
          || (c > '9' && c < 'A')
          || (c > 'Z' && c < 'a')
          || (c > 'z'))) {
        quote = true;
      // } else if (c == ' ' || c == '\f' || c == '\t') {
      } else if (c <= '#') {
        // Some other chars at the start of a value caused the parser to fail, so for now
        // encapsulate if we start in anything less than '#'.  We are being conservative
        // by including the default comment char too.
        quote = true;
      } else {
        while (pos < end) {
          c = value[pos];
          if (c=='\n' || c=='\r' || c==encapsulator || c==delim) {
            quote = true;
            break;
          }
          pos++;
        }

        if (!quote) {
          pos = end-1;
          c = value[pos];
          // if (c == ' ' || c == '\f' || c == '\t') {
          // Some other chars at the end caused the parser to fail, so for now
          // encapsulate if we end in anything less than ' '
          if (c <= ' ') {
            quote = true;
          }
        }
      }
    }

    if (!quote) {
      // no encapsulation needed - write out the original value
      out.write(value, offset, len);
      return;
    }

    // we hit something that needed encapsulation
    out.write(encapsulator);

    // Pick up where we left off: pos should be positioned on the first character that caused
    // the need for encapsulation.
    while (pos




© 2015 - 2024 Weber Informatics LLC | Privacy Policy