org.biojava.nbio.survival.cox.SurvivalInfoHelper Maven / Gradle / Ivy
The newest version!
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.survival.cox;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
/**
* Used to work with SurvivalInfo
* @author Scooter Willis
*/
public class SurvivalInfoHelper {
/**
* For each analysis this allows outputing of the data used in the calculations to a printstream/file. This then
* allows the file to be loaded into R and calculations can be verified.
* @param DataT
* @param ps
* @param delimiter
*/
public static void dump(ArrayList DataT, PrintStream ps, String delimiter) {
ArrayList variables = DataT.get(0).getDataVariables();
ps.print("Seq" + delimiter);
for (String variable : variables) {
ps.print(variable + delimiter);
}
ps.print("TIME" + delimiter + "STATUS" + delimiter + "WEIGHT" + delimiter + "STRATA");
ps.println();
for (SurvivalInfo si : DataT) {
ps.print(si.getOrder() + delimiter);
for (String variable : variables) {
Double value = si.getVariable(variable);
ps.print(value + delimiter);
}
ps.print(si.getTime() + delimiter + si.getStatus() + delimiter + si.getWeight() + delimiter + si.getStrata());
ps.println();
}
}
/**
* If any not numeric value then categorical
* @param values
* @return
*/
private static boolean isCategorical(LinkedHashMap values) {
try {
for (String value : values.keySet()) {
Double.parseDouble(value);
}
return false;
} catch (Exception e) {
return true;
}
}
/**
* Take a collection of categorical data and convert it to numeric to be used in cox calculations
* @param DataT
*/
public static void categorizeData(ArrayList DataT) {
//Go through and get all variable value pairs
LinkedHashMap> valueMap = new LinkedHashMap<>();
for (SurvivalInfo si : DataT) {
for (String key : si.unknownDataType.keySet()) {
LinkedHashMap map = valueMap.get(key);
if (map == null) {
map = new LinkedHashMap<>();
valueMap.put(key, map);
}
map.put(si.unknownDataType.get(key), null);
}
}
for (String variable : valueMap.keySet()) {
LinkedHashMap values = valueMap.get(variable);
if (isCategorical(values)) {
ArrayList categories = new ArrayList<>(values.keySet());
Collections.sort(categories); //go ahead and put in alphabetical order
if (categories.size() == 2) {
for (String value : values.keySet()) {
int index = categories.indexOf(value);
values.put(value, index + 0.0);
}
} else {
for (String value : values.keySet()) {
int index = categories.indexOf(value);
values.put(value, index + 1.0);
}
}
} else {
for (String value : values.keySet()) {
Double d = Double.parseDouble(value);
values.put(value, d);
}
}
}
for (SurvivalInfo si : DataT) {
for (String key : si.unknownDataType.keySet()) {
LinkedHashMap map = valueMap.get(key);
String value = si.unknownDataType.get(key);
Double d = map.get(value);
si.data.put(key, d);
}
}
for (SurvivalInfo si : DataT) {
si.unknownDataType.clear();
}
}
/**
* To test for interactions use two variables and create a third variable where the two are multiplied together.
* @param variable1
* @param variable2
* @param survivalInfoList
* @return
*/
public static ArrayList addInteraction(String variable1, String variable2, ArrayList survivalInfoList) {
ArrayList variables = new ArrayList<>();
variables.add(variable1);
variables.add(variable2);
variables.add(variable1 + ":" + variable2);
for (SurvivalInfo si : survivalInfoList) {
Double value1 = si.getVariable(variable1);
Double value2 = si.getVariable(variable2);
Double value3 = value1 * value2;
si.addContinuousVariable(variable1 + ":" + variable2, value3);
}
return variables;
}
/**
* Need to allow a range of values similar to cut in R and a continuous c
*
* @param range
* @param variable
* @param groupName
* @param survivalInfoList
* @throws Exception
*/
public static void groupByRange(double[] range, String variable, String groupName, ArrayList survivalInfoList) throws Exception {
ArrayList labels = new ArrayList<>();
for (int i = 0; i < range.length; i++) {
String label = "";
if (i == 0) {
label = "[<=" + range[i] + "]";
} else if (i == range.length - 1) {
label = "[" + (range[i - 1] + 1) + "-" + range[i] + "]";
labels.add(label);
label = "[>" + range[i] + "]";
} else {
label = "[" + (range[i - 1] + 1) + "-" + range[i] + "]";
}
labels.add(label);
}
ArrayList validLabels = new ArrayList<>();
//need to find the categories so we can set 1 and 0 and not include ranges with no values
for (SurvivalInfo si : survivalInfoList) {
Double value = si.getContinuousVariable(variable);
if (value == null) {
throw new Exception("Variable " + variable + " not found in " + si.toString());
}
int rangeIndex = getRangeIndex(range, value);
String label = labels.get(rangeIndex);
if (!validLabels.contains(groupName + "_" + label)) {
validLabels.add(groupName + "_" + label);
}
}
Collections.sort(validLabels);
System.out.println("Valid Lables:" + validLabels);
for (SurvivalInfo si : survivalInfoList) {
Double value = si.getContinuousVariable(variable);
if (value == null) {
throw new Exception("Variable " + variable + " not found in " + si.toString());
}
int rangeIndex = getRangeIndex(range, value);
String label = labels.get(rangeIndex);
String inLable = groupName + "_" + label;
for (String gl : validLabels) {
if (gl.equals(inLable)) {
si.addContinuousVariable(gl, 1.0);
} else {
si.addContinuousVariable(gl, 0.0);
}
}
}
}
/**
*
* @param groupName
* @param survivalInfoList
* @return
*/
public static ArrayList getGroupCategories(String groupName, ArrayList survivalInfoList) {
return survivalInfoList.get(0).getGroupCategories(groupName);
}
private static int getRangeIndex(double[] range, double value) throws Exception {
for (int i = 0; i < range.length; i++) {
if (i == 0 && value <= range[i]) {
return i;
}
if (value <= range[i]) {
return i;
}
}
if (value > range[range.length - 1]) {
return range.length;
}
throw new Exception("Value " + value + " not found in range ");
}
}