All Downloads are FREE. Search and download functionalities are using the official Maven repository.

umcg.genetica.io.bedgraph.BedGraphFile Maven / Gradle / Ivy

There is a newer version: 1.0.7
Show newest version
package umcg.genetica.io.bedgraph;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import umcg.genetica.collections.intervaltree.PerChrIntervalTree;
import umcg.genetica.io.gtf.GffElement;

/**
 *
 * @author Patrick Deelen
 */
public class BedGraphFile implements Iterable {

	private final File bedGraphFile;
	private final boolean omitChr;
	private final boolean makeOneBased;
	
	private static final Pattern CHR_PATTERN = Pattern.compile("^chr(.*)$", Pattern.CASE_INSENSITIVE);

	public BedGraphFile(String bedGraphFilePath) throws FileNotFoundException, IOException {
		this(new File(bedGraphFilePath), false, false);
	}

	public BedGraphFile(String bedGraphFilePath, boolean omitChr, boolean makeOneBased) throws FileNotFoundException, IOException {
		this(new File(bedGraphFilePath), omitChr, makeOneBased);
	}

	public BedGraphFile(File bedGraphFile) throws FileNotFoundException, IOException {
		this(bedGraphFile, false, false);
	}

	public BedGraphFile(File bedGraphFile, boolean omitChr, boolean makeOneBased) throws FileNotFoundException, IOException {
		this.bedGraphFile = bedGraphFile;
		this.omitChr = omitChr;
		this.makeOneBased = makeOneBased;


		if (!this.bedGraphFile.exists()) {
			throw new FileNotFoundException("BedGraph file not found at: " + bedGraphFile.getAbsolutePath());
		} else if (!this.bedGraphFile.isFile()) {
			throw new IOException("Error reading BedGraph file at: " + bedGraphFile.getAbsolutePath());
		} else if (!this.bedGraphFile.canRead()) {
			throw new IOException("Error reading BedGraph file at: " + bedGraphFile.getAbsolutePath());
		}
	}

	private static BedGraphEntry parseLine(String line, boolean omitChr, boolean makeOneBased) throws IOException {
		String[] lineElements = StringUtils.split(line);

		if (lineElements.length != 4) {
			throw new IOException("Error parsing BedGraph, did not find 4 fields on line: " + line);
		}
		
		final String chr;
		if(omitChr){
			chr = removeChr(lineElements[0]).intern();
		} else {
			chr = lineElements[0].intern();
		}
		

		int start;
		try {
			start = Integer.parseInt(lineElements[1]);
		} catch (NumberFormatException ex) {
			throw new IOException("Error parsing BedGraph, Start is not an int on line: " + line);
		}
		
		int stop;
		try {
			stop = Integer.parseInt(lineElements[2]);
		} catch (NumberFormatException ex) {
			throw new IOException("Error parsing BedGraph, Stop is not an int on line: " + line);
		}
		
		final double value;
		try {
			value = Double.parseDouble(lineElements[3]);
		} catch (NumberFormatException ex) {
			throw new IOException("Error parsing BedGraph, Value is not a double on line: " + line);
		}
		
		if (makeOneBased) {
			start = start + 1;
			stop = stop + 1;
		}
		
		
		
		return new BedGraphEntry(chr, start, stop, value);
	}
	
		/**
	 * This method removes a chromosome?
	 *
	 * @param chromosome
	 * @return
	 */
	private static String removeChr(String chromosome) {

		Matcher chrMatcher = CHR_PATTERN.matcher(chromosome);
		if (chrMatcher.find()) {
			return chrMatcher.group(1);
		} else {
			return chromosome;
		}

	}

	@Override
	public Iterator iterator() {
		try {

			return new Iterator() {
				private final BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(bedGraphFile), "UTF-8"));
				private BedGraphEntry next;
				private boolean atNext;
				private boolean atEnd = false;

				@Override
				public boolean hasNext() {

					if (atEnd) {
						return false;
					}

					if (atNext) {
						return true;
					}

					try {
						String line;
						while ((line = reader.readLine()) != null) {
							if (line.startsWith("browser") || line.startsWith("track") || line.charAt(0) == '#') {
								continue;
							} else {
								break;
							}
						}
						if (line == null) {
							atEnd = true;
							try {
								reader.close();
							} catch (IOException ex) {
							}
							return false;
						} else {
							next = parseLine(line, omitChr, makeOneBased);
							atNext = true;
							return true;
						}
					} catch (IOException ex) {
						throw new RuntimeException(ex);
					}

				}

				@Override
				public BedGraphEntry next() {
					if (!hasNext()) {
						throw new NoSuchElementException();
					}
					atNext = false;
					return next;
				}

				@Override
				public void remove() {
					throw new UnsupportedOperationException("Not supported yet.");
				}
			};
		} catch (Exception ex) {
			throw new RuntimeException(ex);
		}
	}
	
	public PerChrIntervalTree createIntervalTree() throws Exception{
		
		return PerChrIntervalTree.createFromChrGroupedIterable(this, BedGraphEntry.class);
		
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy