
org.snpeff.interval.MarkerUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.interval;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import org.snpeff.fileIterator.LineFileIterator;
/**
* Generic utility methods for Markers
*
* @author pcingola
*/
public class MarkerUtil {
/**
* Collapse adjacent intervals (i.e. intervals separated by a gap of zero length
* E.g.: The markers [1-100] and [101-200] are collapsed into one single marker [1-200]
*
* @return A set of new markers that can replace the old ones, or the same set if no change is required.
*/
public static Map collapseZeroGap(Markers markersOri) {
Map collapse = new HashMap();
// Sort markers by start
Markers sorted = new Markers();
sorted.add(markersOri);
sorted.sort(false, false);
// Create new set of markers
Marker markerPrev = null; // Previous marker in the list
Marker markerToAdd = null;
int countCollapsed = 0;
for (Marker m : sorted) {
if (markerToAdd == null) markerToAdd = m.clone();
if (markerPrev != null) {
// Find start, end and gap size
int start = markerPrev.getEnd() + 1;
int end = m.getStart() - 1;
int gapSize = end - start + 1;
if (gapSize <= 0) {
countCollapsed++;
if (markerToAdd.getEnd() < m.getEnd()) markerToAdd.setEnd(m.getEnd()); // Set new end for this marker (we are collapsing it with the previous one)
// Do we need to correct frame information?
if (markerToAdd.isStrandMinus() && (markerToAdd instanceof MarkerWithFrame) && (m instanceof MarkerWithFrame)) {
MarkerWithFrame markerToAddWf = (MarkerWithFrame) markerToAdd;
MarkerWithFrame mwf = (MarkerWithFrame) m;
markerToAddWf.setFrame(mwf.getFrame());
}
} else markerToAdd = m.clone(); // Get ready for next iteration
}
collapse.put(m, markerToAdd);
markerPrev = m;
}
// Sanity check
HashSet collapsed = new HashSet();
collapsed.addAll(collapse.values());
if ((markersOri.size() - countCollapsed) != collapsed.size()) throw new RuntimeException("Sanitycheck failed. This should never happen!\n\tmarkers.size: " + markersOri.size() + "\n\tcountCollapsed: " + countCollapsed + "\n\treplaced.size : " + collapsed.size());
return collapse;
}
/**
* Read intervals from a file using a simplt TXT format
* Format:
* chr \t start \t end \t id
*
* Note: Zero-based positions
*
* @param fileName : Path to file
* @param genome : Genome to use. Can be null (a new one will be created)
* @param positionBase : Position offset. Use '1' for one-based coordinates and '0' for zero-based coordinates.
*/
public static Markers readTxt(String fileName, Genome genome, int positionBase) {
if (genome == null) genome = new Genome();
Markers markers = new Markers();
// Parse lines
LineFileIterator lfi = new LineFileIterator(fileName);
int lineNum = 1;
for (String line : lfi) {
Marker interval = new Marker();
interval.readTxt(line, lineNum, genome, positionBase);
markers.add(interval);
lineNum++;
}
return markers;
}
/**
* Redundant markers in a list: Find intervals that are totally included in other intervals in the list
* @param markersOri
* @return A map markerIncluded -> markerLarge, where markerIncluded in completely included in markerLarge
*
* WARNING: Markers having start > end (i.e. circular chromosome with
* uncorrected coordinates) are not processed correctly by this
* method (they are explicitly ignored)
*/
public static Map redundant(Collection extends Marker> markersOri) {
Map redundant = new HashMap();
// Find which markers are redundant?
ArrayList markers = new ArrayList();
// Add all markers. Exception: Markers form circular chromosomes
// whose coordinates have not been corrected (we take care of
// these when we correct circular coordinates)
for (Marker m : markersOri)
if (m.getStart() <= m.getEnd()) {
markers.add(m);
}
int size = markers.size();
// Iterate on all markers
for (int i = 0; i < size; i++) {
Marker mi = markers.get(i);
// Is marker 'mi' included in any other marker?
Marker markerLarge = null;
for (int j = 0; (j < size) && (markerLarge == null); j++) {
Marker mj = markers.get(j);
if ((i != j) && (mj.includes(mi))) { // Not the same interval and it is fully included?
if (mi.includes(mj) && (i > j)) {
// If they are included both ways, it means that they are exactly the same.
// We have to avoid deleting both of them twice, so we arbitrarely don't add them if (i > j)
} else markerLarge = mj;
}
}
// Add to redundant marker
if (markerLarge != null) redundant.put(mi, markerLarge);
}
return redundant;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy