All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adgear.anoa.read.AvroStreams Maven / Gradle / Ivy

package com.adgear.anoa.read;

import com.adgear.anoa.Anoa;
import com.adgear.anoa.AnoaHandler;
import com.fasterxml.jackson.core.JsonParser;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.JsonDecoder;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificRecord;
import org.jooq.lambda.Unchecked;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.stream.Stream;

/**
 * Utility class for deserializing Avro {@link org.apache.avro.generic.GenericRecord} or {@link
 * org.apache.avro.specific.SpecificRecord} instances as a {@link java.util.stream.Stream}.
 */
final public class AvroStreams {

  private AvroStreams() {
  }

  /**
   * @param schema      Avro record schema
   * @param inputStream data source
   */
  static public Stream binary(
      Schema schema,
      InputStream inputStream) {
    return binary(new GenericDatumReader<>(schema), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param schema      Avro record schema
   * @param inputStream data source
   * @param          Metadata type
   */
  static public  Stream> binary(
      AnoaHandler anoaHandler,
      Schema schema,
      InputStream inputStream) {
    return binary(anoaHandler, new GenericDatumReader<>(schema), inputStream);
  }

  /**
   * @param writer      Avro schema with which the record was originally serialized
   * @param reader      Avro schema to use for deserialization
   * @param inputStream data source
   */
  static public Stream binary(
      Schema writer,
      Schema reader,
      InputStream inputStream) {
    return binary(new GenericDatumReader<>(writer, reader), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param writer      Avro schema with which the record was originally serialized
   * @param reader      Avro schema to use for deserialization
   * @param inputStream data source
   * @param          Metadata type
   */
  static public  Stream> binary(
      AnoaHandler anoaHandler,
      Schema writer,
      Schema reader,
      InputStream inputStream) {
    return binary(anoaHandler, new GenericDatumReader<>(writer, reader), inputStream);
  }

  /**
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param          Avro SpecificData record type
   */
  static public  Stream binary(
      Class recordClass,
      InputStream inputStream) {
    return binary(new SpecificDatumReader<>(recordClass), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param          Avro SpecificData record type
   * @param          Metadata type
   */
  static public  Stream> binary(
      AnoaHandler anoaHandler,
      Class recordClass,
      InputStream inputStream) {
    return binary(anoaHandler, new SpecificDatumReader<>(recordClass), inputStream);
  }

  static  Stream binary(
      DatumReader reader,
      InputStream inputStream) {
    final BinaryDecoder d = DecoderFactory.get().binaryDecoder(inputStream, null);
    return LookAheadIteratorFactory
        .avro(reader, d, Unchecked.supplier(d::isEnd), inputStream).asStream();
  }

  static  Stream> binary(
      AnoaHandler anoaHandler,
      DatumReader reader,
      InputStream inputStream) {
    final BinaryDecoder d = DecoderFactory.get().binaryDecoder(inputStream, null);
    return LookAheadIteratorFactory
        .avro(anoaHandler, reader, d, Unchecked.supplier(d::isEnd), inputStream).asStream();
  }

  /**
   * @param schema      Avro record schema
   * @param inputStream data source
   */
  static public Stream json(
      Schema schema,
      InputStream inputStream) {
    return json(new GenericDatumReader<>(schema), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param schema      Avro record schema
   * @param inputStream data source
   * @param          Metadata type
   */
  static public  Stream> json(
      AnoaHandler anoaHandler,
      Schema schema,
      InputStream inputStream) {
    return json(anoaHandler, new GenericDatumReader<>(schema), inputStream);
  }

  /**
   * @param writer      Avro schema with which the record was originally serialized
   * @param reader      Avro schema to use for deserialization
   * @param inputStream data source
   */
  static public Stream json(
      Schema writer,
      Schema reader,
      InputStream inputStream) {
    return json(new GenericDatumReader<>(writer, reader), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param writer      Avro schema with which the record was originally serialized
   * @param reader      Avro schema to use for deserialization
   * @param inputStream data source
   * @param          Metadata type
   */
  static public  Stream> json(
      AnoaHandler anoaHandler,
      Schema writer,
      Schema reader,
      InputStream inputStream) {
    return json(anoaHandler, new GenericDatumReader<>(writer, reader), inputStream);
  }

  /**
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param          Avro SpecificData record type
   */
  static public  Stream json(
      Class recordClass,
      InputStream inputStream) {
    return json(new SpecificDatumReader<>(recordClass), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param          Avro SpecificData record type
   * @param          Metadata type
   */
  static public  Stream> json(
      AnoaHandler anoaHandler,
      Class recordClass,
      InputStream inputStream) {
    return json(anoaHandler, new SpecificDatumReader<>(recordClass), inputStream);
  }

  static  Stream json(
      GenericDatumReader reader,
      InputStream inputStream) {
    final JsonDecoder decoder;
    try {
      decoder = DecoderFactory.get().jsonDecoder(reader.getExpected(), inputStream);
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
    return LookAheadIteratorFactory.avro(reader, decoder, () -> false, inputStream).asStream();
  }

  static  Stream> json(
      AnoaHandler anoaHandler,
      GenericDatumReader reader,
      InputStream inputStream) {
    final JsonDecoder decoder;
    try {
      decoder = DecoderFactory.get().jsonDecoder(reader.getExpected(), inputStream);
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
    return LookAheadIteratorFactory
        .avro(anoaHandler, reader, decoder, () -> false, inputStream).asStream();
  }

  /**
   * @param schema      Avro record schema
   * @param inputStream data source
   */
  static public Stream batch(
      Schema schema,
      InputStream inputStream) {
    try {
      return batch(new DataFileStream<>(inputStream, new GenericDatumReader<>(schema)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param schema      Avro record schema
   * @param inputStream data source
   * @param          Metadata type
   */
  static public  Stream> batch(
      AnoaHandler anoaHandler,
      Schema schema,
      InputStream inputStream) {
    try {
      return batch(anoaHandler,
                   new DataFileStream<>(inputStream, new GenericDatumReader<>(schema)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param schema Avro record schema
   * @param file   data source
   */
  static public Stream batch(
      Schema schema,
      File file) {
    try {
      return batch(new DataFileReader<>(file, new GenericDatumReader<>(schema)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param schema      Avro record schema
   * @param file        data source
   * @param          Metadata type
   */
  static public  Stream> batch(
      AnoaHandler anoaHandler,
      Schema schema,
      File file) {
    try {
      return batch(anoaHandler, new DataFileReader<>(file, new GenericDatumReader<>(schema)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param inputStream data source
   */
  static public Stream batch(
      InputStream inputStream) {
    return batch((Schema) null, inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param inputStream data source
   * @param          Metadata type
   */
  static public  Stream> batch(
      AnoaHandler anoaHandler,
      InputStream inputStream) {
    return batch(anoaHandler, (Schema) null, inputStream);
  }

  /**
   * @param file data source
   */
  static public Stream batch(
      File file) {
    return batch((Schema) null, file);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param file        data source
   * @param          Metadata type
   */
  static public  Stream> batch(
      AnoaHandler anoaHandler,
      File file) {
    return batch(anoaHandler, (Schema) null, file);
  }

  /**
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param          Avro SpecificData record type
   */
  static public  Stream batch(
      Class recordClass,
      InputStream inputStream) {
    final DataFileStream dataFileStream;
    try {
      dataFileStream = new DataFileStream<>(inputStream, new SpecificDatumReader<>(recordClass));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
    return batch(dataFileStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param          Avro SpecificData record type
   * @param          Metadata type
   */
  static public  Stream> batch(
      AnoaHandler anoaHandler,
      Class recordClass,
      InputStream inputStream) {
    final DataFileStream dataFileStream;
    try {
      dataFileStream = new DataFileStream<>(inputStream, new SpecificDatumReader<>(recordClass));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
    return batch(anoaHandler, dataFileStream);
  }


  /**
   * @param recordClass Avro SpecificRecord class object
   * @param file        data source
   * @param          Avro SpecificData record type
   */
  static public  Stream batch(
      Class recordClass,
      File file) {
    try {
      return batch(new DataFileReader<>(file, new SpecificDatumReader<>(recordClass)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param recordClass Avro SpecificRecord class object
   * @param file        data source
   * @param          Avro SpecificData record type
   * @param          Metadata type
   */
  static public  Stream> batch(
      AnoaHandler anoaHandler,
      Class recordClass,
      File file) {
    try {
      return batch(anoaHandler, new DataFileReader<>(file, new SpecificDatumReader<>(recordClass)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param dataFileStream data source
   * @param             Avro record type
   */
  static public  Stream batch(
      DataFileStream dataFileStream) {
    return LookAheadIteratorFactory.avro(dataFileStream).asStream();
  }

  /**
   * @param anoaHandler    {@code AnoaHandler} instance to use for exception handling
   * @param dataFileStream data source
   * @param             Avro record type
   * @param             Metadata type
   */
  static public  Stream> batch(
      AnoaHandler anoaHandler,
      DataFileStream dataFileStream) {
    return LookAheadIteratorFactory.avro(anoaHandler, dataFileStream).asStream();
  }

  /**
   * @param schema        Avro record schema
   * @param jacksonParser JsonParser instance from which to read
   */
  static public Stream jackson(
      Schema schema,
      JsonParser jacksonParser) {
    return new AvroReader.GenericReader(schema).stream(jacksonParser);
  }


  /**
   * @param schema        Avro record schema
   * @param jacksonParser JsonParser instance from which to read
   */
  static public Stream jacksonStrict(
      Schema schema,
      JsonParser jacksonParser) {
    return new AvroReader.GenericReader(schema).streamStrict(jacksonParser);
  }

  /**
   * @param anoaHandler   {@code AnoaHandler} instance to use for exception handling
   * @param schema        Avro record schema
   * @param jacksonParser JsonParser instance from which to read
   * @param            Metadata type
   */
  static public  Stream> jackson(
      AnoaHandler anoaHandler,
      Schema schema,
      JsonParser jacksonParser) {
    return new AvroReader.GenericReader(schema).stream(anoaHandler, jacksonParser);
  }

  /**
   * @param anoaHandler   {@code AnoaHandler} instance to use for exception handling
   * @param schema        Avro record schema
   * @param jacksonParser JsonParser instance from which to read
   * @param            Metadata type
   */
  static public  Stream> jacksonStrict(
      AnoaHandler anoaHandler,
      Schema schema,
      JsonParser jacksonParser) {
    return new AvroReader.GenericReader(schema).streamStrict(anoaHandler, jacksonParser);
  }

  /**
   * @param recordClass   Avro SpecificRecord class object
   * @param jacksonParser JsonParser instance from which to read
   * @param            Avro SpecificData record type
   */
  static public  Stream jackson(
      Class recordClass,
      JsonParser jacksonParser) {
    return new AvroReader.SpecificReader<>(recordClass).stream(jacksonParser);
  }

  /**
   * @param recordClass   Avro SpecificRecord class object
   * @param jacksonParser JsonParser instance from which to read
   * @param            Avro SpecificData record type
   */
  static public  Stream jacksonStrict(
      Class recordClass,
      JsonParser jacksonParser) {
    return new AvroReader.SpecificReader<>(recordClass).streamStrict(jacksonParser);
  }

  /**
   * @param anoaHandler   {@code AnoaHandler} instance to use for exception handling
   * @param recordClass   Avro SpecificRecord class object
   * @param jacksonParser JsonParser instance from which to read
   * @param            Avro SpecificData record type
   * @param            Metadata type
   */
  static public  Stream> jackson(
      AnoaHandler anoaHandler,
      Class recordClass,
      JsonParser jacksonParser) {
    return new AvroReader.SpecificReader<>(recordClass).stream(anoaHandler, jacksonParser);
  }

  /**
   * @param anoaHandler   {@code AnoaHandler} instance to use for exception handling
   * @param recordClass   Avro SpecificRecord class object
   * @param jacksonParser JsonParser instance from which to read
   * @param            Avro SpecificData record type
   * @param            Metadata type
   */
  static public  Stream> jacksonStrict(
      AnoaHandler anoaHandler,
      Class recordClass,
      JsonParser jacksonParser) {
    return new AvroReader.SpecificReader<>(recordClass).stream(anoaHandler, jacksonParser);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy