All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.biodata.tools.variant.VariantReferenceBlockCreatorTask Maven / Gradle / Ivy

There is a newer version: 3.3.0
Show newest version
package org.opencb.biodata.tools.variant;

import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFContigHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.StudyEntry;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantBuilder;
import org.opencb.biodata.models.variant.avro.SampleEntry;
import org.opencb.biodata.models.variant.metadata.VariantFileHeader;
import org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine;
import org.opencb.commons.run.Task;

import java.util.*;

public class VariantReferenceBlockCreatorTask implements Task {


    private String chromosome= null;
    private int position;
    private int end;
    private String studyId;
    private String fileId;
    private LinkedHashMap samplesPosition;
    private List missingGtSamplesData;
    private Map contigs;

    public VariantReferenceBlockCreatorTask() {
    }

    public VariantReferenceBlockCreatorTask(Map contigs) {
        this.contigs = new HashMap<>(contigs);
        for (Map.Entry entry : contigs.entrySet()) {
            this.contigs.put(Region.normalizeChromosome(entry.getKey()), entry.getValue());
        }
    }

    public VariantReferenceBlockCreatorTask(VariantFileHeader fileHeader) {
        this.contigs = new HashMap<>();
        for (VariantFileHeaderComplexLine line : fileHeader.getComplexLines()) {
            if (line.getKey().equals(VCFConstants.CONTIG_HEADER_KEY)) {
                String contig = line.getId();
                String length = line.getGenericFields().get("length");
                if (StringUtils.isNumeric(length)) {
                    contigs.put(contig, Integer.valueOf(length));
                    contigs.put(Region.normalizeChromosome(contig), Integer.valueOf(length));
                }
            }
        }
    }

    public VariantReferenceBlockCreatorTask(VCFHeader fileHeader) {
        this.contigs = new HashMap<>();
        for (VCFContigHeaderLine line : fileHeader.getContigLines()) {
            SAMSequenceRecord record = line.getSAMSequenceRecord();
            String contig = record.getSequenceName();
            int length = record.getSequenceLength();
            if (length > 0) {
                contigs.put(contig, length);
                contigs.put(Region.normalizeChromosome(contig), length);
            }
        }
    }

    @Override
    public void pre() throws Exception {
    }

    @Override
    public List apply(List list) throws Exception {
        List fixedList = new ArrayList<>(((int) (list.size() * 1.2)));
        for (Variant variant : list) {
            if (chromosome == null) {
                init(variant);

                // Create first telomere ref block (if needed)
                fixedList.addAll(createContigFirstBlock());
            } else {
                if (!variant.getChromosome().equals(chromosome)) {
                    // Change chromosome
                    // Create first and last telomere ref block (if needed)
                    fixedList.addAll(createContigLastBlock());
                    init(variant);
                    fixedList.addAll(createContigFirstBlock());
                } else {
                    if (variant.getStart() != position) {
                        // Check if need to create a block

                        if ((end + 1) < variant.getStart()) {
                            // Create ref block
                            fixedList.add(createRefBlock(chromosome, end + 1, variant.getStart() - 1));
                        }

                        position = variant.getStart();
                        end = variant.getEnd();
                    } else {
                        // Update end
                        end = Math.max(variant.getEnd(), end);
                    }
                }
            }
            fixedList.add(variant);
        }
        return fixedList;
    }

    @Override
    public List drain() throws Exception {
        return createContigLastBlock();
    }

    protected void init(Variant variant) {
        chromosome = variant.getChromosome();
        position = variant.getStart();
        end = variant.getEnd();
        if (!variant.getStudies().isEmpty()) {
            StudyEntry studyEntry = variant.getStudies().get(0);
            studyId = studyEntry.getStudyId();
            fileId = studyEntry.getFiles().get(0).getFileId();
            samplesPosition = studyEntry.getSamplesPosition();
            missingGtSamplesData = new ArrayList<>(samplesPosition.size());
            for (int i = 0; i < samplesPosition.size(); i++) {
                missingGtSamplesData.add(new SampleEntry(null, null, Collections.singletonList("./.")));
            }
        }
    }

    protected List createContigFirstBlock() {
        if (position <= 1) {
            return Collections.emptyList();
        } else {
            return Collections.singletonList(createRefBlock(chromosome, 1, position - 1));
        }
    }

    protected List createContigLastBlock() {
        if (!contigs.containsKey(chromosome)) {
            return Collections.emptyList();
        } else {
            Integer length = contigs.get(chromosome);
            if (end >= length) {
                return Collections.emptyList();
            }
            return Collections.singletonList(createRefBlock(chromosome, end + 1, length));
        }
    }

    protected Variant createRefBlock(String chromosome, int start, int end) {
        VariantBuilder builder = new VariantBuilder(chromosome, start, end, "N", ".");
        if (studyId != null) {
            builder.setStudyId(studyId)
                    .setFileId(fileId)
                    .setSamplesPosition(samplesPosition)
                    .setFilter(VCFConstants.UNFILTERED)
                    .setSampleDataKeys("GT")
                    .setSamples(missingGtSamplesData);
        }
        return builder.build();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy