All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.parser.avro.AvroParserProvider Maven / Gradle / Ivy

There is a newer version: 3.46.0.6
Show newest version
package water.parser.avro;

import water.DKV;
import water.Iced;
import water.Job;
import water.Key;
import water.exceptions.H2OIllegalArgumentException;
import water.fvec.ByteVec;
import water.fvec.Frame;
import water.parser.DefaultParserProviders;
import water.parser.ParseSetup;
import water.parser.Parser;
import water.parser.ParserInfo;
import water.parser.ParserProvider;

/**
 * Avro parser provider.
 */
public class AvroParserProvider implements ParserProvider {

  /* Setup for this parser */
  static ParserInfo AVRO_INFO = new ParserInfo("AVRO", DefaultParserProviders.MAX_CORE_PRIO + 10, true, true);

  @Override
  public ParserInfo info() {
    return AVRO_INFO;
  }

  @Override
  public Parser createParser(ParseSetup setup, Key jobKey) {
    return new AvroParser(setup, jobKey);
  }

  @Override
  public ParseSetup guessSetup(byte[] bits, byte sep, int ncols, boolean singleQuotes,
                               int checkHeader, String[] columnNames, byte[] columnTypes,
                               String[][] domains, String[][] naStrings) {
    return AvroParser.guessSetup(bits);
  }

  @Override
  public ParseSetup createParserSetup(Key[] inputs, ParseSetup requiredSetup) {
    // We need to get header of Avro file to configure the Avro parser.
    // The code expects that inputs are consistent and extract only header
    // from the first file.
    // Also expect that files are not compressed
    assert inputs != null && inputs.length > 0 : "Inputs cannot be empty!";
    Key firstInput = inputs[0];
    Iced ice = DKV.getGet(firstInput);
    if (ice == null) throw new H2OIllegalArgumentException("Missing data", "Did not find any data under key " + firstInput);
    ByteVec bv = (ByteVec)(ice instanceof ByteVec ? ice : ((Frame)ice).vecs()[0]);
    byte [] bits = bv.getFirstBytes();

    try {
      AvroParser.AvroInfo avroInfo = AvroParser.extractAvroInfo(bits, requiredSetup);
      return new AvroParser.AvroParseSetup(requiredSetup, avroInfo.header, avroInfo.firstBlockSize, avroInfo.domains);
    } catch (Throwable e) {
      throw new H2OIllegalArgumentException("Wrong data", "Cannot find Avro header in input file: " + firstInput, e);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy