All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adgear.anoa.read.AvroStreams Maven / Gradle / Ivy

Go to download

Core classes for Anoa library, which aims to be a safe, convenient and fast record de/serialization wrapper for the Avro, Thrift and Jackson libraries, using the functional idioms of Java 8. The anoa-core module tries to keep upstream dependencies to a minimum.

There is a newer version: 3.1.2
Show newest version
package com.adgear.anoa.read;

import com.adgear.anoa.Anoa;
import com.adgear.anoa.AnoaHandler;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.TreeNode;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.JsonDecoder;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificRecord;
import org.jooq.lambda.Unchecked;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.stream.Stream;

/**
 * Utility class for deserializing Avro {@link org.apache.avro.generic.GenericRecord} or
 * {@link org.apache.avro.specific.SpecificRecord} instances as a {@link java.util.stream.Stream}.
 */
public class AvroStreams {

  protected AvroStreams() {
  }

  /**
   * @param schema Avro record schema
   * @param inputStream data source
   */
  static public Stream binary(
      /*@NonNull*/ Schema schema,
      /*@NonNull*/ InputStream inputStream) {
    return binary(new GenericDatumReader<>(schema), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param schema Avro record schema
   * @param inputStream data source
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> binary(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Schema schema,
      /*@NonNull*/ InputStream inputStream) {
    return binary(anoaHandler, new GenericDatumReader<>(schema), inputStream);
  }

  /**
   * @param writer Avro schema with which the record was originally serialized 
   * @param reader Avro schema to use for deserialization
   * @param inputStream data source
   */
  static public Stream binary(
      /*@NonNull*/ Schema writer,
      /*@NonNull*/ Schema reader,
      /*@NonNull*/ InputStream inputStream) {
    return binary(new GenericDatumReader<>(writer, reader), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param writer Avro schema with which the record was originally serialized 
   * @param reader Avro schema to use for deserialization
   * @param inputStream data source
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> binary(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Schema writer,
      /*@NonNull*/ Schema reader,
      /*@NonNull*/ InputStream inputStream) {
    return binary(anoaHandler, new GenericDatumReader<>(writer, reader), inputStream);
  }

  /**
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param  Avro SpecificData record type
   */
  static public  /*@NonNull*/ Stream binary(
      /*@NonNull*/ Class recordClass,
      /*@NonNull*/ InputStream inputStream) {
    return binary(new SpecificDatumReader<>(recordClass), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param  Avro SpecificData record type
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> binary(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Class recordClass,
      /*@NonNull*/ InputStream inputStream) {
    return binary(anoaHandler, new SpecificDatumReader<>(recordClass), inputStream);
  }

  static  /*@NonNull*/ Stream binary(
      /*@NonNull*/ GenericDatumReader reader,
      /*@NonNull*/ InputStream inputStream) {
    final BinaryDecoder d = DecoderFactory.get().binaryDecoder(inputStream, null);
    return LookAheadIteratorFactory
        .avro(reader, d, Unchecked.supplier(d::isEnd), inputStream).asStream();
  }

  static  /*@NonNull*/ Stream> binary(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ GenericDatumReader reader,
      /*@NonNull*/ InputStream inputStream) {
    final BinaryDecoder d = DecoderFactory.get().binaryDecoder(inputStream, null);
    return LookAheadIteratorFactory
        .avro(anoaHandler, reader, d, Unchecked.supplier(d::isEnd), inputStream).asStream();
  }

  /**
   * @param schema Avro record schema
   * @param inputStream data source
   */
  static public /*@NonNull*/ Stream json(
      /*@NonNull*/ Schema schema,
      /*@NonNull*/ InputStream inputStream) {
    return json(new GenericDatumReader<>(schema), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param schema Avro record schema
   * @param inputStream data source
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> json(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Schema schema,
      /*@NonNull*/ InputStream inputStream) {
    return json(anoaHandler, new GenericDatumReader<>(schema), inputStream);
  }

  /**
   * @param writer Avro schema with which the record was originally serialized 
   * @param reader Avro schema to use for deserialization
   * @param inputStream data source
   */
  static public /*@NonNull*/ Stream json(
      /*@NonNull*/ Schema writer,
      /*@NonNull*/ Schema reader,
      /*@NonNull*/ InputStream inputStream) {
    return json(new GenericDatumReader<>(writer, reader), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param writer Avro schema with which the record was originally serialized 
   * @param reader Avro schema to use for deserialization
   * @param inputStream data source
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> json(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Schema writer,
      /*@NonNull*/ Schema reader,
      /*@NonNull*/ InputStream inputStream) {
    return json(anoaHandler, new GenericDatumReader<>(writer, reader), inputStream);
  }

  /**
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param  Avro SpecificData record type
   */
  static public  /*@NonNull*/ Stream json(
      /*@NonNull*/ Class recordClass,
      /*@NonNull*/ InputStream inputStream) {
    return json(new SpecificDatumReader<>(recordClass), inputStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param  Avro SpecificData record type
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> json(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Class recordClass,
      /*@NonNull*/ InputStream inputStream) {
    return json(anoaHandler, new SpecificDatumReader<>(recordClass), inputStream);
  }

  static  /*@NonNull*/ Stream json(
      /*@NonNull*/ GenericDatumReader reader,
      /*@NonNull*/ InputStream inputStream) {
    final JsonDecoder decoder;
    try {
      decoder = DecoderFactory.get().jsonDecoder(reader.getExpected(), inputStream);
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
    return LookAheadIteratorFactory.avro(reader, decoder, () -> false, inputStream).asStream();
  }

  static  /*@NonNull*/ Stream> json(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ GenericDatumReader reader,
      /*@NonNull*/ InputStream inputStream) {
    final JsonDecoder decoder;
    try {
      decoder = DecoderFactory.get().jsonDecoder(reader.getExpected(), inputStream);
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
    return LookAheadIteratorFactory
        .avro(anoaHandler, reader, decoder, () -> false, inputStream).asStream();
  }

  /**
   * @param inputStream data source
   */
  static public /*@NonNull*/ Stream batch(
      /*@NonNull*/ InputStream inputStream) {
    try {
      return batch(new DataFileStream<>(inputStream, new GenericDatumReader<>()));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param inputStream data source
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> batch(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ InputStream inputStream) {
    try {
      return batch(anoaHandler, new DataFileStream<>(inputStream, new GenericDatumReader<>()));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param file data source
   */
  static public /*@NonNull*/ Stream batch(
      /*@NonNull*/ File file) {
    try {
      return batch(new DataFileReader<>(file, new GenericDatumReader()));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param file data source
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> batch(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ File file) {
    try {
      return batch(anoaHandler, new DataFileReader<>(file, new GenericDatumReader<>()));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param schema Avro record schema
   * @param inputStream data source
   */
  static public Stream batch(
      /*@Nullable*/ Schema schema,
      /*@NonNull*/ InputStream inputStream) {
    try {
      return batch(new DataFileStream<>(inputStream, new GenericDatumReader<>(schema)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param schema Avro record schema
   * @param inputStream data source
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> batch(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@Nullable*/ Schema schema,
      /*@NonNull*/ InputStream inputStream) {
    try {
      return batch(anoaHandler,
                   new DataFileStream<>(inputStream, new GenericDatumReader<>(schema)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param schema Avro record schema
   * @param file data source
   */
  static public Stream batch(
      /*@Nullable*/ Schema schema,
      /*@NonNull*/ File file) {
    try {
      return batch(new DataFileReader<>(file, new GenericDatumReader<>(schema)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param schema Avro record schema
   * @param file data source
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> batch(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@Nullable*/ Schema schema,
      /*@NonNull*/ File file) {
    try {
      return batch(anoaHandler, new DataFileReader<>(file, new GenericDatumReader<>(schema)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param  Avro SpecificData record type
   */
  static public  /*@NonNull*/ Stream batch(
      /*@NonNull*/ Class recordClass,
      /*@NonNull*/ InputStream inputStream) {
    final DataFileStream dataFileStream;
    try {
      dataFileStream = new DataFileStream<>(inputStream, new SpecificDatumReader<>(recordClass));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
    return batch(dataFileStream);
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param recordClass Avro SpecificRecord class object
   * @param inputStream data source
   * @param  Avro SpecificData record type
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> batch(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Class recordClass,
      /*@NonNull*/ InputStream inputStream) {
    final DataFileStream dataFileStream;
    try {
      dataFileStream = new DataFileStream<>(inputStream, new SpecificDatumReader<>(recordClass));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
    return batch(anoaHandler, dataFileStream);
  }


  /**
   * @param recordClass Avro SpecificRecord class object
   * @param file data source
   * @param  Avro SpecificData record type
   */
  static public  /*@NonNull*/ Stream batch(
      /*@NonNull*/ Class recordClass,
      /*@NonNull*/ File file) {
    try {
      return batch(new DataFileReader<>(file, new SpecificDatumReader<>(recordClass)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param recordClass Avro SpecificRecord class object
   * @param file data source
   * @param  Avro SpecificData record type
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> batch(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Class recordClass,
      /*@NonNull*/ File file) {
    try {
      return batch(anoaHandler, new DataFileReader<>(file, new SpecificDatumReader<>(recordClass)));
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }
  }

  /**
   * @param dataFileStream data source
   * @param  Avro record type
   */
  static public  /*@NonNull*/ Stream batch(
      /*@NonNull*/ DataFileStream dataFileStream) {
    return LookAheadIteratorFactory.avro(dataFileStream).asStream();
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param dataFileStream data source
   * @param  Avro record type
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> batch(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ DataFileStream dataFileStream) {
    return LookAheadIteratorFactory.avro(anoaHandler, dataFileStream).asStream();
  }

  /**
   * @param schema Avro record schema
   * @param strict enable strict type checking
   * @param jacksonParser JsonParser instance from which to read
   */
  static public Stream jackson(
      /*@NonNull*/ Schema schema,
      boolean strict,
      /*@NonNull*/ JsonParser jacksonParser) {
    return LookAheadIteratorFactory.jackson(jacksonParser).asStream()
        .map(TreeNode::traverse)
        .map(AvroDecoders.jackson(schema, strict));
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param schema Avro record schema
   * @param strict enable strict type checking
   * @param jacksonParser JsonParser instance from which to read
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> jackson(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Schema schema,
      boolean strict,
      /*@NonNull*/ JsonParser jacksonParser) {
    return LookAheadIteratorFactory.jackson(anoaHandler, jacksonParser).asStream()
        .map(anoaHandler.function(TreeNode::traverse))
        .map(AvroDecoders.jackson(anoaHandler, schema, strict));
  }

  /**
   * @param recordClass Avro SpecificRecord class object
   * @param strict enable strict type checking
   * @param jacksonParser JsonParser instance from which to read
   * @param  Avro SpecificData record type
   */
  static public  /*@NonNull*/ Stream jackson(
      /*@NonNull*/ Class recordClass,
      boolean strict,
      /*@NonNull*/ JsonParser jacksonParser) {
    return LookAheadIteratorFactory.jackson(jacksonParser).asStream()
        .map(TreeNode::traverse)
        .map(AvroDecoders.jackson(recordClass, strict));
  }

  /**
   * @param anoaHandler {@code AnoaHandler} instance to use for exception handling
   * @param recordClass Avro SpecificRecord class object
   * @param strict enable strict type checking
   * @param jacksonParser JsonParser instance from which to read
   * @param  Avro SpecificData record type
   * @param  Metadata type
   */
  static public  /*@NonNull*/ Stream> jackson(
      /*@NonNull*/ AnoaHandler anoaHandler,
      /*@NonNull*/ Class recordClass,
      boolean strict,
      /*@NonNull*/ JsonParser jacksonParser) {
    return LookAheadIteratorFactory.jackson(anoaHandler, jacksonParser).asStream()
        .map(anoaHandler.function(TreeNode::traverse))
        .map(AvroDecoders.jackson(anoaHandler, recordClass, strict));
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy