org.opencb.biodata.tools.clinical.DiseasePanelParsers Maven / Gradle / Ivy
package org.opencb.biodata.tools.clinical;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.MapperFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.clinical.ClinicalProperty;
import org.opencb.biodata.models.clinical.interpretation.CancerPanel;
import org.opencb.biodata.models.clinical.interpretation.DiseasePanel;
import org.opencb.biodata.models.core.OntologyTerm;
import org.opencb.biodata.models.core.Xref;
import org.opencb.commons.utils.FileUtils;
import org.opencb.commons.utils.ListUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Path;
import java.util.*;
public class DiseasePanelParsers {
protected static Logger logger = LoggerFactory.getLogger(DiseasePanelParsers.class);
public static DiseasePanel parseCensus(Path censusTsvFile) throws IOException {
Set myKeys = new HashSet<>(Arrays.asList("Gene Symbol", "Name", "Entrez GeneId", "Genome Location",
"Tier", "Hallmark", "Chr Band", "Somatic", "Germline", "Tumour Types(Somatic)",
"Tumour Types(Germline)", "Cancer Syndrome", "Tissue Type", "Molecular Genetics", "Role in Cancer",
"Mutation Types", "Translocation Partner", "Other Germline Mut", "Other Syndrome", "Synonyms"));
try (BufferedReader bufferedReader = FileUtils.newBufferedReader(censusTsvFile)) {
Map keyPositionMap = new HashMap<>();
String[] header = bufferedReader.readLine().split("\t");
for (int i = 0; i < header.length; i++) {
String key = header[i];
if (!myKeys.contains(key)) {
throw new IOException("Key '" + key + "' from census file not found in our whitelist");
}
keyPositionMap.put(i, key);
}
DiseasePanel panel = new DiseasePanel("gene-census", "gene-census", new LinkedList<>(), new LinkedList<>(),
new LinkedList<>(), new LinkedList<>(), new LinkedList<>(), new LinkedList<>(), new LinkedList<>(),
new HashMap<>(), new DiseasePanel.SourcePanel("gene-census", "gene-census", "", "Cosmic",
"Cancer Gene Census"), "", "", "", new HashMap<>());
String line;
while ((line = bufferedReader.readLine()) != null) {
String[] splittedLine = line.split("\t");
DiseasePanel.GenePanel genePanel = new DiseasePanel.GenePanel("", "", new LinkedList<>(),
ClinicalProperty.ModeOfInheritance.UNKNOWN, null, ClinicalProperty.Imprinted.UNKNOWN, null, new LinkedList<>(), new LinkedList<>(),
new LinkedList<>(), new LinkedList<>(), new CancerPanel(false, false, new LinkedList<>(), new LinkedList<>(),
new LinkedList<>(), new LinkedList<>(), new LinkedList<>()));
for (int i = 0; i < splittedLine.length; i++) {
String value = StringUtils.strip(splittedLine[i], "\"'");
switch (keyPositionMap.get(i)) {
case "Gene Symbol":
genePanel.setId(value);
genePanel.setName(value);
break;
case "Name":
genePanel.getXrefs().add(new Xref(value, "Census", "Census", "Name"));
break;
case "Entrez GeneId":
genePanel.getXrefs().add(new Xref(value, "Census", "Census", "Entrez GeneId"));
break;
case "Genome Location":
genePanel.getCoordinates().add(new DiseasePanel.Coordinate("GRCh38", value, "Census"));
break;
case "Tier":
if ("1".equals(value)) {
genePanel.setConfidence(ClinicalProperty.Confidence.HIGH);
} else if ("2".equals(value)) {
genePanel.setConfidence(ClinicalProperty.Confidence.MEDIUM);
} else {
genePanel.setConfidence(ClinicalProperty.Confidence.LOW);
}
break;
case "Hallmark":
break;
case "Chr Band":
break;
case "Somatic":
if ("yes".equals(value)) {
genePanel.getCancer().setSomatic(true);
}
break;
case "Germline":
if ("yes".equals(value)) {
genePanel.getCancer().setGermline(true);
}
break;
case "Tumour Types(Somatic)":
if (StringUtils.isNotEmpty(value)) {
List tumourTypes = Arrays.asList(value.split(", "));
genePanel.getCancer().setSomaticTumourTypes(tumourTypes);
}
break;
case "Tumour Types(Germline)":
if (StringUtils.isNotEmpty(value)) {
List tumourTypes = Arrays.asList(value.split(", "));
genePanel.getCancer().setGermlineTumourTypes(tumourTypes);
}
break;
case "Cancer Syndrome":
if (StringUtils.isNotEmpty(value)) {
genePanel.getPhenotypes().add(new OntologyTerm(value, value, "Census", "", "", "",
Collections.emptyList(), Collections.emptyList(), Collections.emptyList(),
Collections.emptyList()));
}
break;
case "Tissue Type":
if (StringUtils.isNotEmpty(value)) {
List tissues = Arrays.asList(value.split(", "));
genePanel.getCancer().setTissues(tissues);
}
break;
case "Molecular Genetics":
if (StringUtils.isNotEmpty(value)) {
if ("Dom".equals(value)) {
genePanel.setModesOfInheritance(Collections.singletonList(
ClinicalProperty.ModeOfInheritance.AUTOSOMAL_DOMINANT)
);
} else if ("Rec".equals(value)) {
genePanel.setModesOfInheritance(Collections.singletonList(
ClinicalProperty.ModeOfInheritance.AUTOSOMAL_RECESSIVE)
);
} else if ("Dom/Rec".equals(value)) {
genePanel.setModesOfInheritance(Collections.singletonList(
ClinicalProperty.ModeOfInheritance.AUTOSOMAL_DOMINANT)
);
} else if ("Rec/X".equals(value)) {
genePanel.setModesOfInheritance(Collections.singletonList(
ClinicalProperty.ModeOfInheritance.X_LINKED_RECESSIVE)
);
} else {
System.out.println("Unknown moi '" + value + "'");
}
}
break;
case "Role in Cancer":
if (StringUtils.isNotEmpty(value)) {
String[] roles = value.split(", ");
ClinicalProperty.RoleInCancer roleInCancer = null;
for (String role : roles) {
ClinicalProperty.RoleInCancer tmpRole = null;
if ("TSG".equals(role)) {
tmpRole = ClinicalProperty.RoleInCancer.TUMOR_SUPPRESSOR_GENE;
} else if ("oncogene".equals(role)) {
tmpRole = ClinicalProperty.RoleInCancer.ONCOGENE;
}
if (tmpRole != null && roleInCancer == null) {
roleInCancer = tmpRole;
} else if (tmpRole != null) {
if (tmpRole != roleInCancer) {
roleInCancer = ClinicalProperty.RoleInCancer.BOTH;
} else {
System.out.println("Found repeated roles?");
}
}
}
if (roleInCancer != null) {
genePanel.getCancer().setRoles(Collections.singletonList(roleInCancer));
}
}
break;
case "Mutation Types":
break;
case "Translocation Partner":
if (StringUtils.isNotEmpty(value) && !"?".equals(value)) {
List partners = Arrays.asList(value.split(", "));
genePanel.getCancer().setFusionPartners(partners);
}
break;
case "Other Germline Mut":
break;
case "Other Syndrome":
break;
case "Synonyms":
String[] synonyms = value.split(",");
for (String synonym : synonyms) {
genePanel.getXrefs().add(new Xref(synonym, "Census", "Census", "Synonyms"));
}
break;
default:
break;
}
}
panel.getGenes().add(genePanel);
}
fillDefaultStats(panel);
return panel;
}
}
public static DiseasePanel parsePanelApp(Path panelAppJsonFile) throws IOException {
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
objectMapper.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false);
objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
objectMapper.configure(DeserializationFeature.FAIL_ON_NULL_FOR_PRIMITIVES, false);
objectMapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true);
Map panelInfo = objectMapper.readValue(panelAppJsonFile.toFile(), Map.class);
List categories = new ArrayList<>(2);
categories.add(new DiseasePanel.PanelCategory(String.valueOf(panelInfo.get("disease_group")), 1));
categories.add(new DiseasePanel.PanelCategory(String.valueOf(panelInfo.get("disease_sub_group")), 2));
List disorders = new ArrayList<>();
for (String relevantDisorder : (List) panelInfo.get("relevant_disorders")) {
if (StringUtils.isNotEmpty(relevantDisorder)) {
disorders.add(new OntologyTerm(relevantDisorder, relevantDisorder, "", "", "", "",
Collections.emptyList(), Collections.emptyList(), Collections.emptyList(),
Collections.emptyList()));
}
}
List genes = new ArrayList<>();
for (Map gene : (List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy