
de.julielab.geneexpbase.data.DocumentSourceFiles Maven / Gradle / Ivy
package de.julielab.geneexpbase.data;
import de.julielab.geneexpbase.genemodel.GeneMention;
import java.io.File;
import java.util.List;
import java.util.Set;
public class DocumentSourceFiles {
private String name;
private String basePath;
private String predictedGenesPath;
private String sentencesPath;
private String chunksPath;
private String posPath;
private String speciesPath;
private String acronymsPath;
private String corefPath;
private String appositionsPath;
private String meshPath;
private String substancesPath;
private String ontologyMentionsPath;
private String docTextPath;
private String goldGeneList;
private List allowedGeneTypes;
private Set taggersToUse;
private boolean filterSpecies;
private boolean addReferenceSpecies;
private boolean hasGeneIds = true;
private final GeneMention.SpecificType defaultEntityType = GeneMention.SpecificType.GENE;
private boolean speciesCorpus;
private boolean inferDocumentLevelLabelsToMentions;
private boolean completelyAnnotated;
private String nonGenePhrasesPath;
private String gazetteerMatchesWithIds;
public DocumentSourceFiles() {
this.basePath = "";
}
public String getAppositionsPath() {
return appositionsPath;
}
public void setAppositionsPath(String appositionsPath) {
this.appositionsPath = resolveToBasePath(appositionsPath);
}
public String getCorefPath() {
return corefPath;
}
public void setCorefPath(String corefPath) {
this.corefPath = resolveToBasePath(corefPath);
}
public GeneMention.SpecificType getDefaultEntityType() {
return defaultEntityType;
}
/**
* @deprecated This setting should be set by a parameter
*/
@Deprecated
public boolean isAddReferenceSpecies() {
return addReferenceSpecies;
}
/**
* @deprecated This setting should be set by a parameter
*/
@Deprecated
public void setAddReferenceSpecies(boolean addReferenceSpecies) {
this.addReferenceSpecies = addReferenceSpecies;
}
/**
* @deprecated This setting should be set by a parameter
*/
@Deprecated
public boolean isFilterSpecies() {
return filterSpecies;
}
/**
* @deprecated This setting should be set by a parameter
*/
@Deprecated
public void setFilterSpecies(boolean filterSpecies) {
this.filterSpecies = filterSpecies;
}
/**
* A list of strings that match the "entity type" field of the input files. Only entity types listed here
* are accepted as gene mentions.
*
* @return
*/
public List getAllowedGeneTypes() {
return allowedGeneTypes;
}
public void setAllowedGeneTypes(List allowedGeneTypes) {
this.allowedGeneTypes = allowedGeneTypes;
}
public Set getTaggersToUse() {
return taggersToUse;
}
public void setTaggersToUse(Set taggersToUse) {
this.taggersToUse = taggersToUse;
}
public String getPosPath() {
return posPath;
}
public void setPosPath(String posPath) {
this.posPath = resolveToBasePath(posPath);
}
public String getBasePath() {
return basePath;
}
/**
* Sets a base path to which all other paths will be relative. Must be set before other paths are set that should be relative to the base path.
* This is optional and can safely be omitted.
*
* @param basePath
*/
public void setBasePath(String basePath) {
this.basePath = basePath;
}
public String getPredictedGenesPath() {
return predictedGenesPath;
}
public void setPredictedGenesPath(String predictedGenesPath) {
this.predictedGenesPath = resolveToBasePath(predictedGenesPath);
}
public String getSentencesPath() {
return sentencesPath;
}
public void setSentencesPath(String sentencesPath) {
this.sentencesPath = resolveToBasePath(sentencesPath);
}
public String getChunksPath() {
return chunksPath;
}
public void setChunksPath(String chunksPath) {
this.chunksPath = resolveToBasePath(chunksPath);
}
public String getSpeciesPath() {
return speciesPath;
}
public void setSpeciesPath(String speciesPath) {
this.speciesPath = resolveToBasePath(speciesPath);
}
public String getAcronymsPath() {
return acronymsPath;
}
public void setAcronymsPath(String acronymsPath) {
this.acronymsPath = resolveToBasePath(acronymsPath);
}
public String getMeshPath() {
return meshPath;
}
public void setMeshPath(String meshPath) {
setMeshPath(meshPath, true);
}
public String getSubstancesPath() {
return substancesPath;
}
public void setSubstancesPath(String substancesPath) {
setSubstancesPath(substancesPath, true);
}
public String getDocTextPath() {
return docTextPath;
}
public void setDocTextPath(String docTextPath) {
this.docTextPath = resolveToBasePath(docTextPath);
}
public String getGoldGeneList() {
return goldGeneList;
}
public void setGoldGeneList(String goldGeneList) {
this.goldGeneList = resolveToBasePath(goldGeneList);
}
public void setMeshPath(String meshPath, boolean appendToBasePath) {
this.meshPath = appendToBasePath ? resolveToBasePath(meshPath) : meshPath;
}
public void setSubstancesPath(String substancesPath, boolean appendtoBasePath) {
this.substancesPath = appendtoBasePath ? resolveToBasePath(substancesPath) : substancesPath;
}
public boolean hashMesh() {
return this.meshPath != null && !this.meshPath.isBlank();
}
public boolean hasSubstances() {
return this.substancesPath != null && !this.substancesPath.isBlank();
}
private String resolveToBasePath(String path) {
return this.basePath.isBlank() ? path : this.basePath + File.separator + path;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public boolean isHasGeneIds() {
return hasGeneIds;
}
public void setHasGeneIds(boolean hasGeneIds) {
this.hasGeneIds = hasGeneIds;
}
public boolean isSpeciesCorpus() {
return speciesCorpus;
}
public void setSpeciesCorpus(boolean speciesCorpus) {
this.speciesCorpus = speciesCorpus;
}
public boolean getInferDocumentLevelLabelsToMentions() {
return inferDocumentLevelLabelsToMentions;
}
/**
* If set to true, the {@link DocumentLoader} will make a best effort to derive gold mentions for corpora
* that have only been annotated on the document level.
* This is done by searching for candidates for each predicted gene mention and use the best scored candidate
* that has a ID found in the set of valid IDs annotated for the document.
*
* @param inferDocumentLevelLabelsToMentions Whether to perform document annotation to mention inference.
*/
public void setInferDocumentLevelLabelsToMentions(boolean inferDocumentLevelLabelsToMentions) {
this.inferDocumentLevelLabelsToMentions = inferDocumentLevelLabelsToMentions;
}
/**
* Some gene corpora have annotated all gene occurrences while other focus on the most important genes
* with regard to a specific task.
*
* @return Whether all genes in this corpus have been annotated or only a subset.
*/
public boolean isCompletelyAnnotated() {
return completelyAnnotated;
}
/**
* Some gene corpora have annotated all gene occurrences while other focus on the most important genes
* with regard to a specific task.
*
* @param completelyAnnotated Whether all genes in this corpus have been annotated or only a subset.
*/
public void setCompletelyAnnotated(boolean completelyAnnotated) {
this.completelyAnnotated = completelyAnnotated;
}
public String getOntologyMentionsPath() {
return ontologyMentionsPath;
}
public void setOntologyMentionsPath(String ontologyMentionsPath) {
this.ontologyMentionsPath = resolveToBasePath(ontologyMentionsPath);
}
public void setNonGenePhrasesPath(String nonGenePhrasesPath) {
this.nonGenePhrasesPath = resolveToBasePath(nonGenePhrasesPath);
}
public String getNonGenePhrasesPath() {
return nonGenePhrasesPath;
}
public String getGazetteerMatchesWithIds() {
return gazetteerMatchesWithIds;
}
public void setGazetteerMatchesWithIds(String gazetteerMatchesWithIds) {
this.gazetteerMatchesWithIds = resolveToBasePath(gazetteerMatchesWithIds);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy