com.sonalake.utah.Parser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of utah-parser Show documentation
A Java library for parsing semi-structured text files
The newest version!
package com.sonalake.utah;

import com.sonalake.utah.config.Config;
import com.sonalake.utah.config.Delimiter;
import org.apache.commons.lang3.StringUtils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Map;
import java.util.TreeMap;

/**
 * Parse a semi-structured text file, that can defined by the config.
 */
public class Parser {

  /**
   * The config.
   */
  private final Config config;

  /**
   * The source of the data
   */
  private final BufferedReader reader;
  private final TreeMap commonRecord;
  private String previousDelim;

  private int recordNumber;

  /**
   * Build the parser.
   *
   * @param config The delimiter is used to determine the end of a record.
   * @param in     the input stream, the client is responsible for closing this
   * @return a parser
   */
  public static Parser parse(Config config, Reader in) {
    return new Parser(config, in);
  }

  /**
   * Build the parser
   *
   * @param config the config
   * @param in     the input stream, the client is responsible for closing this
   */
  private Parser(Config config, Reader in) {
    this.config = config;
    this.reader = new BufferedReader(in);
    this.previousDelim = "";
    recordNumber = 0;

    commonRecord = new TreeMap();
    if (config.hasHeaderDelim()) {
      String header = getNextRecord(true);
      commonRecord.putAll(config.buildHeader(header));
    }
  }

  /**
   * Get the next record from the file.
   *
   * @return The next record, or null if there are none
   */
  public Map next() {
    String rawRecord = getNextRecord(false);
    if (null == rawRecord) {
      return null;
    } else {
      Map record = config.buildRecord(rawRecord);
      record.putAll(commonRecord);
      return record;
    }
  }

  /**
   * Get the next raw record
   *
   * @param isSelectingHeader True, if we're parsing the header, or false if we're parsing records
   * @return The next record, or null if there are none.
   */
  private String getNextRecord(boolean isSelectingHeader) {
    //  loop through the file until we get to the record break
    try {
      boolean isReaderFinished = false;
      boolean isRecordLoaded = false;
      StringBuilder buffer = new StringBuilder();


      // we may need to skip the first delim in some cases
      boolean wasDelimMatched = false;
      while (!isRecordLoaded) {
        String currentLine = reader.readLine();
        if (null == currentLine) {
          isReaderFinished = true;
          isRecordLoaded = true;
        } else {
          if (StringUtils.isNotBlank(previousDelim)) {
            buffer.append(previousDelim + "\n");
            previousDelim = "";
          }
          if (isSelectingHeader && config.matchesHeaderDelim(currentLine)) {
            isRecordLoaded = true;
          } else if (!isSelectingHeader && config.matchesRecordDelim(currentLine)) {
            Delimiter applicableDelim = config.getApplicableDelim(currentLine);
            // if the delimiter says we're at the start of the record,
            // and this is the first record, we need to treat it differently
            boolean isFirstDelimOfInterest = 0 == recordNumber && !wasDelimMatched;
            if (applicableDelim.isDelimAtStartOfRecord() && isFirstDelimOfInterest) {
              // this is the first record, so we don't stop here
              wasDelimMatched = true;
            } else {
              if (applicableDelim.isRetainDelim()) {
                previousDelim = currentLine;
              }
              isRecordLoaded = true;
            }
          }
        }
        if (StringUtils.isNotBlank(currentLine)) {
          buffer.append(currentLine + "\n");
        }
      }
      if (isReaderFinished && buffer.length() == 0) {
        return null;
      } else {
        recordNumber++;
        return buffer.toString();
      }
    } catch (IOException e) {
      throw new RuntimeException("Problem reading source", e);
    }
  }

}