
de.julielab.geneexpbase.genemodel.DictionaryGeneIdRecord Maven / Gradle / Ivy
package de.julielab.geneexpbase.genemodel;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
/**
*
* Represents an entry from a dictionary that matches gene mentions and assigns possible IDs.
*
* The source of the match is a dictionary that lists gene names and, for each name, its possible IDs in different organisms.
* The respective information is encoded into string form to be used as entity category by the gazetteer component:
*
* id1:tax1:5|id2:tax2:3
*
* The format for each ID is the ID, the corresponding taxonomy ID and the priority level the gene name has respective
* to the gene ID. Multiple such entries are separated by pipe symbols. The priority denotes the source of the gene name
* for the specific gene ID. It is a number that ranges from -1 to 7. -1 indicates that the name is the official gene symbol
* for the ID, 7 means that the name came from the BioThesaurus which sometimes introduces false positives due to rather general names.
*
*/
public class DictionaryGeneIdRecord {
private String geneName;
private final List recordItems;
public DictionaryGeneIdRecord(String recordString) {
// split the name from the ID record
String[] split = recordString.split("\t");
this.geneName = split[0];
// now parse the individual elements of the record which are separated by pipe symbols
recordItems = Arrays.stream(split[1].split("\\|")).map(DictionaryGeneIdRecordItem::new).collect(Collectors.toList());
}
public String getGeneName() {
return geneName;
}
public void setGeneName(String geneName) {
this.geneName = geneName;
}
public Stream getRecordItemsForTaxId(String tax) {
return recordItems.stream().filter(r -> r.getTaxId().equals(tax));
}
public Stream getGeneIds() {
return recordItems.stream().map(DictionaryGeneIdRecordItem::getGeneId);
}
public Stream getTaxIds() {
return recordItems.stream().map(DictionaryGeneIdRecordItem::getTaxId);
}
public IntStream getPriorities() {
return recordItems.stream().mapToInt(DictionaryGeneIdRecordItem::getPriority);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy