org.biojava.nbio.survival.cox.CoxHelper Maven / Gradle / Ivy
The newest version!
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.survival.cox;
import org.biojava.nbio.survival.data.WorkSheet;
import java.util.ArrayList;
/**
* The CoxHelper class is provided to start with a tab delimited file in a similar process in R and return the results as a CoxInfo class.
* Given the number of options for adjusting the calculations using weighting, strata, clustering etc the helper class can be used to hide
* the complexity for typical use case.
*
* @author Scooter Willis
*/
public class CoxHelper {
/**
*
* @param datafile The tab delimited file containing survival data and variables. The first column needs to be unique index
* @param timeColumn The column representing the event/censor time
* @param statusColumn The column representing an event=1 and censor=0
* @param weightColumn For case-cohort data sets may require weighting to reflect the entire cohort
* @param strataColumn A column representing strata data
* @param clusterColumn If robost variation calculation is required the cluster column will group samples by the value in this column
* @param variables The variables to be used in the cox regression analysis. For Interactions using variable1:variable2
* @param useStrata Boolean to indicate if strata column should be used
* @param useWeights Boolean to indicate if weight column should be used
* @return
* @throws Exception
*/
public static CoxInfo process(String datafile, String timeColumn, String statusColumn, String weightColumn, String strataColumn, String clusterColumn, ArrayList variables, boolean useStrata, boolean useWeights) throws Exception {
WorkSheet worksheet = WorkSheet.readCSV(datafile, '\t');
return process(worksheet, timeColumn, statusColumn, weightColumn, strataColumn, clusterColumn, variables, useStrata, useWeights);
}
/**
*
* @param worksheet
* @param timeColumn The column representing the event/censor time
* @param statusColumn The column representing an event=1 and censor=0
* @param weightColumn For case-cohort data sets may require weighting to reflect the entire cohort
* @param strataColumn A column representing strata data
* @param clusterColumn If robost variation calculation is required the cluster column will group samples by the value in this column
* @param variables The variables to be used in the cox regression analysis. For Interactions using variable1:variable2
* @param useStrata Boolean to indicate if strata column should be used
* @param useWeights Boolean to indicate if weight column should be used
* @return
*/
public static CoxInfo process(WorkSheet worksheet, String timeColumn, String statusColumn, String weightColumn, String strataColumn, String clusterColumn, ArrayList variables, boolean useStrata, boolean useWeights) {
try {
ArrayList survivalInfoList = new ArrayList<>();
int i = 1;
for (String row : worksheet.getRows()) {
double time = worksheet.getCellDouble(row, timeColumn);
double c = worksheet.getCellDouble(row, statusColumn);
double weight = 1.0;
if (weightColumn != null && weightColumn.length() > 0) {
weight = worksheet.getCellDouble(row, weightColumn);
}
int strata = 0;
if (strataColumn != null && strataColumn.length() > 0) {
strata = worksheet.getCellDouble(row, strataColumn).intValue();
}
int censor = (int) c;
if (weight <= 0) {
// System.out.println("Weight <= 0 Sample=" + row + " weight=" + weight);
i++;
continue;
}
SurvivalInfo si = new SurvivalInfo(time, censor);
si.setOrder(i);
si.setWeight(weight);
si.setStrata(strata);
for (String column : variables) {
if (column.contains(":")) {
continue;
}
String value = worksheet.getCell(row, column);
si.addUnknownDataTypeVariable(column, value);
}
if (clusterColumn != null && clusterColumn.length() > 0) {
String v = worksheet.getCell(row, clusterColumn);
si.setClusterValue(v);
}
survivalInfoList.add(si);
i++;
}
boolean cluster = false;
boolean robust = false;
if (clusterColumn != null && clusterColumn.length() > 0) {
cluster = true;
robust = true;
}
// variables.add("TREAT:AGE");
CoxR cox = new CoxR();
CoxInfo ci = cox.process(variables, survivalInfoList, useStrata, useWeights, robust, cluster);
// System.out.println(ci);
//applying Bob Gray's correction for weighted strata wtexamples.docx
// CoxCC.process(ci, survivalInfoList);
// ci.dump();
// ci.calcSummaryValues();
return ci;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
// TODO code application logic here
try {
if (true) {
String datafile = "/Users/Scooter/scripps/ngs/DataSets/E2197/misc/ecoglabtransfer/500790/2013.05.10.12.28.58.313/clindasl0228.txt";
ArrayList variables = new ArrayList<>();
variables.add("nndpos");
variables.add("meno");
// variables.add("er1");
// variables.add("meno:er1");
CoxInfo ci = CoxHelper.process(datafile, "ttr", "recind", "wt", "sstrat", "Seq", variables, false, true);
// ci.dump();
System.out.println(ci);
System.out.println();
CoxCC.process(ci);
ci.dump();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}