All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.avro.ExportAvro Maven / Gradle / Ivy

/*
 *  ExportAvro
 * 
 *  Created on Nov 3, 2016
 */
package net.maizegenetics.analysis.avro;

import java.io.File;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.ImportUtils;
import net.maizegenetics.util.LoggingUtils;
import net.maizegenetics.util.Utils;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/**
 *
 * @author Terry Casstevens
 */
public class ExportAvro {

    private static final Logger myLogger = LogManager.getLogger(ExportAvro.class);

    private ExportAvro() {
        // utility
    }

    public static String write(GenotypeTable genotype, String filename) {

        filename = Utils.addSuffixIfNeeded(filename, ".avro");

        try {

            int numTaxa = genotype.numberOfTaxa();
            int numSites = genotype.numberOfSites();

            SchemaBuilder.FieldAssembler genotypeSchemaBuilder = SchemaBuilder
                    .builder("net.maizegenetics")
                    .record("genotype")
                    .fields();
            for (int s = 0; s < numSites; s += AvroConstants.GENOTYPE_BLOCK_SIZE) {
                for (int t = 0; t < numTaxa; t += AvroConstants.GENOTYPE_BLOCK_SIZE) {
                    genotypeSchemaBuilder = genotypeSchemaBuilder
                            .name(AvroConstants.getKey(t, s))
                            .type(AvroConstants.BYTE_BLOCK_SCHEMA)
                            .noDefault();
                }
            }
            Schema genotypeSchema = genotypeSchemaBuilder.endRecord();

            Schema tasselSchema = SchemaBuilder
                    .builder("net.maizegenetics")
                    .record("tassel")
                    .fields()
                    .name("taxa").type(AvroConstants.TAXA_SCHEMA).noDefault()
                    .name("positions").type(AvroConstants.POSITIONS_SCHEMA).noDefault()
                    .name("genotype").type(genotypeSchema).noDefault()
                    .endRecord();

            DatumWriter datumWriter = new GenericDatumWriter<>(tasselSchema);
            try (DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter)) {
                dataFileWriter.setCodec(CodecFactory.snappyCodec());
                dataFileWriter.create(tasselSchema, new File(filename));
                dataFileWriter.append(new GenericRecordGenotypeTable(tasselSchema, genotype));
            }

        } catch (Exception e) {
            myLogger.debug(e.getMessage(), e);
            throw new IllegalStateException("ExportAvro: write: problem writing file: " + filename + ". " + e.getMessage());
        }
        
        return filename;

    }
    
    public static void main(String[] args) {
        LoggingUtils.setupDebugLogging();
        GenotypeTable genotype = ImportUtils.read("mdp_genotype.hmp.txt");
        write(genotype, "test");
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy