All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dishevelled.bio.variant.vcf.StreamingVcfParser Maven / Gradle / Ivy

The newest version!
/*

    dsh-bio-variant  Variants.
    Copyright (c) 2013-2024 held jointly by the individual authors.

    This library is free software; you can redistribute it and/or modify it
    under the terms of the GNU Lesser General Public License as published
    by the Free Software Foundation; either version 3 of the License, or (at
    your option) any later version.

    This library is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
    License for more details.

    You should have received a copy of the GNU Lesser General Public License
    along with this library;  if not, write to the Free Software Foundation,
    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.

    > http://www.fsf.org/licensing/licenses/lgpl.html
    > http://www.opensource.org/licenses/lgpl-license.php

*/
package org.dishevelled.bio.variant.vcf;

import static com.google.common.base.Preconditions.checkNotNull;

import java.io.IOException;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ListMultimap;

/**
 * Streaming VCF parser.
 *
 * @author  Michael Heuer
 */
public final class StreamingVcfParser {

    /**
     * Private no-arg constructor.
     */
    private StreamingVcfParser() {
        // empty
    }

    /**
     * Stream the specified readable.
     *
     * @param readable readable, must not be null
     * @param listener event based reader callback, must not be null
     * @throws IOException if an I/O error occurs
     */
    public static void stream(final Readable readable, final VcfStreamListener listener) throws IOException {
        checkNotNull(readable);
        checkNotNull(listener);

        VcfParser.parse(readable, new VcfParseAdapter() {
                /** VCF record builder. */
                private final VcfRecord.Builder builder = VcfRecord.builder();

                /** File format, e.g. VCFv4.2, the only required header field. */
                private String fileFormat;

                /** List of meta-information header lines. */
                private List meta = new ArrayList();

                /** VCF samples keyed by name. */
                private Map samples = new HashMap();


                @Override
                public void lineNumber(final long lineNumber) throws IOException {
                    builder.withLineNumber(lineNumber);
                }

                @Override
                public void meta(final String meta) throws IOException {
                    this.meta.add(meta.trim());
                    if (meta.startsWith("##fileformat=")) {
                        fileFormat = meta.substring(13).trim();
                    }
                    else if (meta.startsWith("##SAMPLE=")) {
                        ListMultimap values = ArrayListMultimap.create();
                        String[] tokens = meta.substring(10).split(",");
                        for (String token : tokens) {
                            String[] metaTokens = token.split("=");
                            String key = metaTokens[0];
                            String[] valueTokens = metaTokens[1].split(";");
                            for (String valueToken : valueTokens) {
                                values.put(key, valueToken.replace("\"", "").replace(">", ""));
                            }
                        }

                        String id = values.get("ID").get(0);
                        List genomeIds = values.get("Genomes");
                        List mixtures = values.get("Mixture");
                        List descriptions = values.get("Description");

                        List genomes = new ArrayList(genomeIds.size());
                        for (int i = 0, size = genomeIds.size(); i < size; i++) {
                            genomes.add(new VcfGenome(genomeIds.get(i), Double.parseDouble(mixtures.get(i)), descriptions.get(i)));
                        }
                        samples.put(id, new VcfSample(id, genomes.toArray(new VcfGenome[genomes.size()])));
                    }
                }

                @Override
                public void samples(final String... samples) throws IOException {
                    for (String sample : samples) {
                        // add if missing in meta lines
                        if (!this.samples.containsKey(sample)) {
                            this.samples.put(sample, new VcfSample(sample));
                        }
                    }

                    // at end of header lines, notify listener of header
                    listener.header(new VcfHeader(fileFormat, meta));
                    // ...and samples
                    for (VcfSample sample : this.samples.values()) {
                        listener.sample(sample);
                    }
                }

                @Override
                public void chrom(final String chrom) throws IOException {
                    builder.withChrom(chrom);
                }

                @Override
                public void pos(final long pos) throws IOException {
                    builder.withPos(pos);
                }

                @Override
                public void id(final String... id) throws IOException {
                    builder.withId(id);
                }

                @Override
                public void ref(final String ref) throws IOException {
                    builder.withRef(ref);
                }

                @Override
                public void alt(final String... alt) throws IOException {
                    builder.withAlt(alt);
                }

                @Override
                public void qual(final Double qual) throws IOException {
                    builder.withQual(qual);
                }

                @Override
                public void filter(final String... filter) throws IOException {
                    builder.withFilter(filter);
                }

                @Override
                public void info(final String infoId, final String... values) throws IOException {
                    builder.withInfo(infoId, values);
                }

                @Override
                public void format(final String... format) throws IOException {
                    builder.withFormat(format);
                }

                @Override
                public void genotype(final String sampleId, final String formatId, final String... values) throws IOException {
                    builder.withGenotype(sampleId, formatId, values);
                }

                @Override
                public boolean complete() throws IOException {
                    listener.record(builder.build());

                    builder.reset();
                    fileFormat = null;
                    meta = null;
                    samples = null;

                    return true;
                }
            });
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy