Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* The MIT License
*
* Copyright (c) 2014 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.tribble.index.tabix;
import htsjdk.samtools.BinningIndexBuilder;
import htsjdk.samtools.BinningIndexContent;
import htsjdk.samtools.Chunk;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.tribble.Feature;
import htsjdk.tribble.index.Index;
import htsjdk.tribble.index.IndexCreator;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* IndexCreator for Tabix.
* Features are expected to be 1-based, inclusive.
*/
public class TabixIndexCreator implements IndexCreator {
private final TabixFormat formatSpec;
private final List indexContents = new ArrayList();
private final List sequenceNames = new ArrayList();
// Merely a faster way to ensure that features are added in a specific sequence name order
private final Set sequenceNamesSeen = new HashSet();
// A sequence dictionary is not required, but if it is provided all sequences names must be present in it.
// It is used to determine the length of a sequence in order to optimize index memory allocation.
private final SAMSequenceDictionary sequenceDictionary;
private String currentSequenceName = null;
private BinningIndexBuilder indexBuilder = null;
// A feature can't be added to the index until the next feature is added because the next feature
// defines the location of the end of the previous feature in the output file.
private TabixFeature previousFeature = null;
/**
* @param sequenceDictionary is not required, but if present all features added must refer to sequences in the
* dictionary. It is used to optimize the memory needed to build the index.
*/
public TabixIndexCreator(final SAMSequenceDictionary sequenceDictionary,
final TabixFormat formatSpec) {
this.sequenceDictionary = sequenceDictionary;
this.formatSpec = formatSpec.clone();
}
public TabixIndexCreator(final TabixFormat formatSpec) {
this(null, formatSpec);
}
@Override
public void addFeature(final Feature feature, final long filePosition) {
final String sequenceName = feature.getContig();
final int referenceIndex;
if (sequenceName.equals(currentSequenceName)) {
referenceIndex = sequenceNames.size() - 1;
} else {
referenceIndex = sequenceNames.size();
if (currentSequenceName != null && sequenceNamesSeen.contains(sequenceName)) {
throw new IllegalArgumentException("Sequence " + feature + " added out sequence of order");
}
}
final TabixFeature thisFeature = new TabixFeature(referenceIndex, feature.getStart(), feature.getEnd(), filePosition);
if (previousFeature != null) {
if (previousFeature.compareTo(thisFeature) > 0) {
throw new IllegalArgumentException(String.format("Features added out of order: previous (%s) > next (%s)",
previousFeature, thisFeature));
}
finalizeFeature(filePosition);
}
previousFeature = thisFeature;
if (referenceIndex == sequenceNames.size()) {
advanceToReference(sequenceName);
}
}
private void finalizeFeature(final long featureEndPosition) {
previousFeature.featureEndFilePosition = featureEndPosition;
if (previousFeature.featureStartFilePosition >= previousFeature.featureEndFilePosition) {
throw new IllegalArgumentException(String.format("Feature start position %d >= feature end position %d",
previousFeature.featureStartFilePosition, previousFeature.featureEndFilePosition));
}
indexBuilder.processFeature(previousFeature);
}
private void advanceToReference(final String sequenceName) {
if (indexBuilder != null) {
indexContents.add(indexBuilder.generateIndexContent());
}
// If sequence dictionary is provided, BinningIndexBuilder can reduce size of array it allocates.
final int sequenceLength;
if (sequenceDictionary != null) {
sequenceLength = sequenceDictionary.getSequence(sequenceName).getSequenceLength();
} else {
sequenceLength = 0;
}
indexBuilder = new BinningIndexBuilder(sequenceNames.size(), sequenceLength);
sequenceNames.add(sequenceName);
currentSequenceName = sequenceName;
sequenceNamesSeen.add(sequenceName);
}
@Override
public Index finalizeIndex(final long finalFilePosition) {
if (previousFeature != null) {
finalizeFeature(finalFilePosition);
}
if (indexBuilder != null) {
indexContents.add(indexBuilder.generateIndexContent());
}
// Make this as big as the sequence dictionary, even if there is not content for every sequence,
// but truncate the sequence dictionary before its end if there are sequences in the sequence dictionary without
// any features.
final BinningIndexContent[] indices = indexContents.toArray(new BinningIndexContent[sequenceNames.size()]);
return new TabixIndex(formatSpec, sequenceNames, indices);
}
private static class TabixFeature implements BinningIndexBuilder.FeatureToBeIndexed, Comparable {
private final int referenceIndex;
private final int start;
private final int end;
private final long featureStartFilePosition;
// Position after this feature in the file.
private long featureEndFilePosition = -1;
private TabixFeature(final int referenceIndex, final int start, final int end, final long featureStartFilePosition) {
this.referenceIndex = referenceIndex;
this.start = start;
this.end = end;
this.featureStartFilePosition = featureStartFilePosition;
}
@Override
public int getStart() {
return start;
}
@Override
public int getEnd() {
return end;
}
/**
*
* @return null -- Let index builder compute this.
*/
@Override
public Integer getIndexingBin() {
return null;
}
@Override
public Chunk getChunk() {
if (featureEndFilePosition == -1) {
throw new IllegalStateException("End position is not set");
}
return new Chunk(featureStartFilePosition, featureEndFilePosition);
}
@Override
public int compareTo(final TabixFeature other) {
final int ret = this.referenceIndex - other.referenceIndex;
if (ret != 0) return ret;
return this.start - other.start;
}
@Override
public String toString() {
return "TabixFeature{" +
"referenceIndex=" + referenceIndex +
", start=" + start +
", end=" + end +
", featureStartFilePosition=" + featureStartFilePosition +
", featureEndFilePosition=" + featureEndFilePosition +
'}';
}
}
}