org.monarchinitiative.phenol.io.obo.go.GoGeneAnnotationParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phenol-io Show documentation
Show all versions of phenol-io Show documentation
phenol-io contains the generic I/O functionality for ontologies
package org.monarchinitiative.phenol.io.obo.go;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.monarchinitiative.phenol.base.PhenolException;
import org.monarchinitiative.phenol.formats.go.GoGaf21Annotation;
import org.monarchinitiative.phenol.io.base.TermAnnotationParserException;
import com.google.common.collect.ImmutableList;
import org.monarchinitiative.phenol.ontology.data.TermAnnotation;
/**
* Parser for GO "gene annotation file" (GAF) format.
*
* Usage Example
*
*
* File inputFile = "goa_human.gaf";
* try {
* GoGeneAnnotationFileParser parser = new GoGeneAnnotationFileParser(inputFile);
* List = parser.getAnnotations();
* ...
* }
* } except (PhenolException e) {
* System.err.println("Problem reading from file.");
* }
*
*
* @author Manuel Holtgrewe
* @author Peter Robinson
*/
public final class GoGeneAnnotationParser {
/** The {@link File} to read from. */
private final File file;
/** The {@link BufferedReader} to use for reading line-wise. */
private final BufferedReader reader;
/** The next line. */
private String nextLine;
private final List annotations;
/**
* Create new parser for GO gene annotation file.
*
* @param file The file to read from.
* @throws PhenolException In case of problems with opening and reading from file
.
*/
public GoGeneAnnotationParser(File file) throws PhenolException {
this.file = file;
try {
this.reader = new BufferedReader(new FileReader(file));
this.nextLine = reader.readLine();
skipHeaderAndCheckFirst();
ImmutableList.Builder builder = new ImmutableList.Builder<>();
while (hasNext()) {
GoGaf21Annotation annot = next();
builder.add(annot);
}
reader.close();
annotations=builder.build();
} catch (IOException | TermAnnotationParserException e) {
String msg=String.format("Could not parse GO annotation file: %s",e.getMessage());
throw new PhenolException(msg);
}
}
public GoGeneAnnotationParser(String absolutepath)throws PhenolException {
this(new File(absolutepath));
}
/**
* Skip all header lines and check next data line.
*
* @throws TermAnnotationParserException If the first line is not as expected.
* @throws IOException If there is a problem with reading from the file.
*/
private void skipHeaderAndCheckFirst() throws TermAnnotationParserException, IOException {
skipUntilData();
if (nextLine == null) {
throw new TermAnnotationParserException("GAF2.1 file contained no data!");
}
final String[] arr = nextLine.split("\t");
if (arr.length < 15 || arr.length > 17) {
throw new TermAnnotationParserException(
"First line of file had "
+ arr.length
+ " columns, but expected between 15 and 17 entries.");
}
}
/*
* Skip comment lines until data
*
*/
private void skipUntilData() throws IOException {
while (nextLine == null || nextLine.startsWith("!")) {
nextLine = reader.readLine();
}
}
public List getAnnotations() {
return this.annotations;
}
/**
* @return Return a list of GoGafAnnotation as {@link TermAnnotation} objects.
*/
public List getTermAnnotations() {
return new ArrayList<>(this.annotations);
}
/** Use an iterator paradigm internally to parse the file. */
private boolean hasNext() {
return nextLine != null;
}
/** Use an iterator paradigm internally to parse the file. */
private GoGaf21Annotation next() throws IOException, PhenolException {
skipUntilData();
final String[] arr = nextLine.split("\t");
GoGaf21Annotation annot = new GoGaf21Annotation(arr);
nextLine = reader.readLine();
return annot;
}
public File getFile() {
return file;
}
}