All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.cyclopsgroup.caff.token.QuotedValueTokenizer Maven / Gradle / Ivy

The newest version!
package org.cyclopsgroup.caff.token;

/**
 * String tokenizer which split string into segments considering quotation and character escaping
 *
 * @author Jiaqi Guo
 */
public class QuotedValueTokenizer implements ValueTokenizer {
  private enum ParsingState {
    /**
     * Ready for escaping next character or end a quoted segment
     */
    ESCAPED_OR_QUOTE_END,
    /**
     * Quotation started
     */
    QUOTED,
    /**
     * Ready for new word
     */
    READY,
    /**
     * Word started without quotation
     */
    WORD_STARTED;
  }

  private final char delimiter;

  private final char quotation;

  /**
   * Default constructor that uses white space as delimiter and " as quotation character
   */
  public QuotedValueTokenizer() {
    this(' ', '\"');
  }

  /**
   * @param delimiter Delimiter character
   * @param quotation Quotation character
   */
  public QuotedValueTokenizer(char delimiter, char quotation) {
    this.delimiter = delimiter;
    this.quotation = quotation;
  }

  @Override
  public void parse(CharSequence input, TokenEventHandler handler) {
    ParsingState state = ParsingState.READY;
    StringBuilder buf = null;
    int wordStart = 0;
    for (int i = 0; i < input.length(); i++) {
      char c = input.charAt(i);
      switch (state) {
        case READY:
          assert buf == null;
          if (c == delimiter) {
            continue;
          } else if (c == quotation) {
            state = ParsingState.QUOTED;
            wordStart = i;
            buf = new StringBuilder();
          } else {
            state = ParsingState.WORD_STARTED;
            wordStart = i;
            buf = new StringBuilder();
            buf.append(c);
          }
          break;
        case WORD_STARTED:
          assert buf != null;
          if (c == delimiter) {
            state = ParsingState.READY;
            handler.handleEvent(new TokenEvent(buf.toString(), wordStart, i, true));
            buf = null;
          } else {
            buf.append(c);
          }
          break;
        case QUOTED:
          assert buf != null;
          if (c == quotation) {
            state = ParsingState.ESCAPED_OR_QUOTE_END;
          } else {
            buf.append(c);
          }
          break;
        case ESCAPED_OR_QUOTE_END:
          assert buf != null;
          if (c == delimiter) {
            state = ParsingState.READY;
            handler.handleEvent(new TokenEvent(buf.toString(), wordStart, i, true, true));
            buf = null;
          } else {
            buf.append(c);
            state = ParsingState.QUOTED;
          }
          break;
      }
    }
    if (buf != null) {
      handler.handleEvent(new TokenEvent(buf.toString(), wordStart, input.length(), false,
          state == ParsingState.QUOTED));
    }
  }

  public String escape(String output) {
    int d = output.indexOf(delimiter);
    int q = output.indexOf(quotation);

    if (d == -1 && q == -1) {
      return output;
    }
    StringBuffer sb = new StringBuffer().append(quotation);
    for (int i = 0, j = 0; i < output.length();) {
      j = output.indexOf(quotation, i);
      if (j == -1) {
        sb.append(output.substring(i));
        break;
      }
      sb.append(output.substring(i, j)).append(quotation).append(output.charAt(j));
      i = ++j;
    }
    sb.append(quotation);
    return sb.toString();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy