All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.dna.snp.io.ReadBedfile Maven / Gradle / Ivy

/*
 *  ReadBedfile
 * 
 *  Created on Feb 15, 2017
 */
package net.maizegenetics.dna.snp.io;

import com.google.common.collect.Range;
import com.google.common.collect.RangeMap;
import com.google.common.collect.RangeSet;
import com.google.common.collect.TreeRangeMap;
import com.google.common.collect.TreeRangeSet;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.util.Utils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

import static java.util.stream.Collectors.collectingAndThen;

/**
 * @author Terry Casstevens
 */
public class ReadBedfile {

    private static final Logger myLogger = LogManager.getLogger(ReadBedfile.class);

    private ReadBedfile() {
        // utility
    }

    public static List getRanges(String bedFile) {

        List result = new ArrayList<>();

        String line = null;
        try (BufferedReader reader = Utils.getBufferedReader(bedFile)) {
            int lineNum = 1;
            line = reader.readLine();
            while (line != null) {
                String[] tokens = line.trim().split("\t");
                if (tokens.length < 3) {
                    throw new IllegalStateException("getRanges: Expecting at least 3 columns on line: " + lineNum);
                }

                // tokens[0] is chromosome
                // tokens[1] is start postion from bed file.
                // plus one because bed files are 0-base
                int startPos = Integer.parseInt(tokens[1]) + 1;

                // tokens[2] is start postion from bed file.
                // plus one because bed files are 0-base
                int endPos = Integer.parseInt(tokens[2]) + 1;

                // tokens[3] is name from bed file
                String name = null;
                if (tokens.length > 3) {
                    if (tokens[3] == null || tokens[3].isEmpty()) {
                        name = null;
                    } else {
                        name = tokens[3];
                    }
                }

                result.add(new BedFileRange(tokens[0], startPos, endPos, name));

                line = reader.readLine();
                lineNum++;
            }
        } catch (Exception e) {
            myLogger.debug(e.getMessage(), e);
            throw new IllegalStateException("getRanges: problem reading: " + bedFile + " line: " + line);
        }

        return result;

    }

    public static RangeSet getRangesAsPositions(String bedfile) {
        return getRanges(bedfile).stream()
                .map(bedFileRange -> Range.closed(Position.of(bedFileRange.myChrInt, bedFileRange.myStartPos),
                        Position.of(bedFileRange.myChrInt, bedFileRange.myEndPos)))
                .collect(collectingAndThen(Collectors.toSet(), TreeRangeSet::create));
    }

    public static RangeMap getRangesAsPositionMap(String bedfile) {
        TreeRangeMap positionNameRangeMap = TreeRangeMap.create();
        for (BedFileRange bedFileRange : getRanges(bedfile)) {
            positionNameRangeMap.put(Range.closed(Position.of(bedFileRange.myChrInt, bedFileRange.myStartPos),
                    Position.of(bedFileRange.myChrInt, bedFileRange.myEndPos)),
                    bedFileRange.myName);
        }
        return positionNameRangeMap;
    }

    public static class BedFileRange implements Comparable {

        private final String myChr;
        private final int myChrInt;
        private final int myStartPos;
        private final int myEndPos;
        private final String myName;

        public BedFileRange(String chr, int startPos, int endPos, String name) {
            myChr = chr;
            int temp;
            try {
                temp = Integer.parseInt(chr);
            } catch (Exception e) {
                temp = -1;
            }
            myChrInt = temp;
            myStartPos = startPos;
            myEndPos = endPos;
            myName = name;
        }

        /**
         * Return chromosome
         *
         * @return chromosome
         */
        public String chr() {
            return myChr;
        }

        /**
         * Returns start position (inclusive)
         *
         * @return start position
         */
        public int start() {
            return myStartPos;
        }

        /**
         * Returns end position (exclusive)
         *
         * @return end position
         */
        public int end() {
            return myEndPos;
        }

        public String name() {
            return myName;
        }

        @Override
        public int compareTo(BedFileRange o) {

            if (myChrInt != -1) {
                if (myChrInt < o.myChrInt) {
                    return -1;
                } else if (myChrInt > o.myChrInt) {
                    return 1;
                }
            } else if (!myChr.equals(o.myChr)) {
                return myChr.compareTo(o.myChr);
            }

            if (myStartPos < o.myStartPos) {
                return -1;
            } else if (myStartPos > o.myStartPos) {
                return 1;
            }

            if (myEndPos < o.myEndPos) {
                return -1;
            } else if (myEndPos > o.myEndPos) {
                return 1;
            } else {
                return 0;
            }

        }

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy