All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.survival.kaplanmeier.figure.SurvFitKM Maven / Gradle / Ivy

The newest version!
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.survival.kaplanmeier.figure;

import org.biojava.nbio.survival.cox.StrataInfo;
import org.biojava.nbio.survival.cox.SurvFitInfo;
import org.biojava.nbio.survival.cox.SurvivalInfo;
import org.biojava.nbio.survival.data.WorkSheet;

import javax.swing.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;

/**
 * Ported from survfitKM.S When combining multiple entries with same time not
 * sure how the weighting adds up
 *
 * @author Scooter Willis 
 */
public class SurvFitKM {

	/**
	 *
	 */
	public enum Method {

		/**
		 *
		 */
		kaplanMeier,
		/**
		 *
		 */
		flemingHarrington,
		/**
		 *
		 */
		fh2;
	}

	/**
	 *
	 */
	public enum Error {

		/**
		 *
		 */
		greenwood,
		/**
		 *
		 */
		tsiatis;
	}

	/**
	 *
	 */
	public enum ConfType {

		/**
		 *
		 */
		log,
		/**
		 *
		 */
		log_log,
		/**
		 *
		 */
		plain,
		/**
		 *
		 */
		none;
	}

	/**
	 *
	 */
	public enum ConfLower {

		/**
		 *
		 */
		usual,
		/**
		 *
		 */
		peto,
		/**
		 *
		 */
		modified;
	}

	/**
	 *
	 * @param survivalData
	 * @param useWeights
	 * @return
	 * @throws Exception
	 */
	public SurvFitInfo process(LinkedHashMap> survivalData, boolean useWeights) throws Exception {
		ArrayList survivalInfoList = new ArrayList<>();
		int i = 0;
		for (String strata : survivalData.keySet()) {
			ArrayList csList = survivalData.get(strata);
			for (CensorStatus cs : csList) {
				SurvivalInfo si = new SurvivalInfo(cs.time, Integer.parseInt(cs.censored));
				si.setOrder(i);
				i++;
				si.setWeight(cs.weight);
				si.addUnknownDataTypeVariable("STRATA", strata);
				si.addUnknownDataTypeVariable("VALUE", cs.value + "");
				survivalInfoList.add(si);
			}
		}

		return process("STRATA", survivalInfoList, useWeights);

	}

	/**
	 *
	 * @param datafile
	 * @param timeColumn
	 * @param statusColumn
	 * @param weightColumn
	 * @param variableColumn
	 * @param useWeights
	 * @return
	 * @throws Exception
	 */
	public SurvFitInfo process(String datafile, String timeColumn, String statusColumn, String weightColumn, String variableColumn, boolean useWeights) throws Exception {
		WorkSheet worksheet = WorkSheet.readCSV(datafile, '\t');
		ArrayList survivalInfoList = new ArrayList<>();

		int i = 1;
		for (String row : worksheet.getRows()) {
			double time = worksheet.getCellDouble(row, timeColumn);

			double c = worksheet.getCellDouble(row, statusColumn);
			double weight = 1.0;
			if (weightColumn != null && weightColumn.length() > 0) {
				weight = worksheet.getCellDouble(row, weightColumn);

			}
			int strata = 0;

			int censor = (int) c;

			if (weight <= 0) {
				//   System.out.println("Weight <= 0 Sample=" + row + " weight=" + weight);
				i++;
				continue;
			}



			SurvivalInfo si = new SurvivalInfo(time, censor);
			si.setOrder(i);
			si.setWeight(weight);
			si.setStrata(strata);


			String value = worksheet.getCell(row, variableColumn);
			si.addUnknownDataTypeVariable(variableColumn, value);



			survivalInfoList.add(si);
			i++;
		}



		return process(variableColumn, survivalInfoList, useWeights);
	}

	/**
	 *
	 * @param variable
	 * @param dataT
	 * @param useWeighted
	 * @return
	 * @throws Exception
	 */
	public SurvFitInfo process(String variable, ArrayList dataT, boolean useWeighted) throws Exception {
		return this.process(variable, dataT, Method.kaplanMeier, Error.greenwood, true, .95, ConfType.log, ConfLower.usual, null, null, useWeighted);
	}


	public LinkedHashMap  processStrataInfo(String variable, ArrayList dataT, SurvFitKM.Method method, SurvFitKM.Error error, boolean seFit, double confInt, ConfType confType, ConfLower confLower, Double startTime, Double newTime, boolean useWeighted) throws Exception{
				Collections.sort(dataT);
		if (startTime == null && newTime != null) {
			startTime = newTime;
		}
		//int ny = 2; // setup for right censored versus counting
		if (startTime != null) {
			throw new Exception("Filter on startTime not implemented");
		}
		int n = dataT.size();


		LinkedHashMap levels = new LinkedHashMap<>();
		LinkedHashMap> strataHashMap = new LinkedHashMap<>();

		for (int i = 0; i < n; i++) {
			SurvivalInfo si = dataT.get(i);
			String value = si.getUnknownDataTypeVariable(variable);
			Integer count = levels.get(value);
			if (count == null) {
				count = 0;
			}
			count++;
			levels.put(value, count);
			ArrayList strataList = strataHashMap.get(value);
			if (strataList == null) {
				strataList = new ArrayList<>();
				strataHashMap.put(value, strataList);
			}
			strataList.add(si);

		}

		//int nstrat = levels.size();

		LinkedHashMap strataInfoHashMap = new LinkedHashMap<>();

		for (String strata : strataHashMap.keySet()) {

			ArrayList strataList = strataHashMap.get(strata);
			StrataInfo strataInfo = new StrataInfo();
			strataInfoHashMap.put(strata, strataInfo);


			Double previousTime = null;
			for (SurvivalInfo si : strataList) {
				double w = 1.0;
				if (useWeighted) {
					w = si.getWeight();
				}

				if (previousTime == null || si.getTime() != previousTime) {
					strataInfo.getTime().add(si.getTime());
					if (si.getStatus() == 0) {
						strataInfo.getStatus().add(0);
						strataInfo.getNcens().add(w);
						strataInfo.getNevent().add(0.0);
					} else {
						strataInfo.getNcens().add(0.0);
						strataInfo.getNevent().add(w);
						strataInfo.getStatus().add(1);
					}
					strataInfo.getNrisk().add(0.0);
					strataInfo.getStderr().add(0.0);
					strataInfo.getWeight().add(w);
				} else {
					//we have the same time so add to previous entry
					int index = strataInfo.getTime().size() - 1;
					if (si.getStatus() == 0) {
						double nw = strataInfo.getNcens().get(index) + w;
						strataInfo.getNcens().remove(index);
						strataInfo.getNcens().add(nw);
						// strataInfo.nevent.add(0.0);
					} else {
						// strataInfo.ncens.add(0.0);
						double nw = strataInfo.getNevent().get(index) + w;
						strataInfo.getNevent().remove(index);
						strataInfo.getNevent().add(nw);

					}
					double nw = strataInfo.getWeight().get(index) + w;
					strataInfo.getWeight().remove(index);
					strataInfo.getWeight().add(nw);
				}
				previousTime = si.getTime();
				//  strataInfo.status.add(si.status);



				Integer ndead = strataInfo.getNdead().get(si.getTime());
				if (ndead == null) {
					ndead = 0;
				}
				if (si.getStatus() == 1) {
					ndead++;
				}
				strataInfo.getNdead().put(si.getTime(), ndead);

			}



			int j = strataInfo.getWeight().size() - 1;
			double cw = 0.0;
			for (int i = strataInfo.getWeight().size() - 1; i >= 0; i--) {
				double c = strataInfo.getWeight().get(i);
				cw = cw + c;
				strataInfo.getNrisk().set(j, cw);
				j--;
			}
			if (method == Method.kaplanMeier) {

				for (int i = 0; i < strataInfo.getNrisk().size(); i++) {
					double t = (strataInfo.getNrisk().get(i) - strataInfo.getNevent().get(i)) / strataInfo.getNrisk().get(i);
					if (i == 0) {
						strataInfo.getSurv().add(t);
					} else {
						strataInfo.getSurv().add(t * strataInfo.getSurv().get(i - 1));
					}
				}
			} else if (method == Method.flemingHarrington) {
				for (int i = 0; i < strataInfo.getNrisk().size(); i++) {
					double hazard = (strataInfo.getNevent().get(i)) / strataInfo.getNrisk().get(i);
					if (i == 0) {
						strataInfo.getSurv().add(Math.exp(-1.0 * hazard));
					} else {
						strataInfo.getSurv().add(Math.exp(-1.0 * (hazard + strataInfo.getSurv().get(i - 1))));
					}
				}
			} else if (method == Method.fh2) {
				throw new Exception("Method.fh2 not supported. Need to implement survfit4.c ");
			}

			if (seFit) {
				if (error == Error.greenwood) {
					for (int i = 0; i < strataInfo.getNrisk().size(); i++) {
						double t = (strataInfo.getNevent().get(i)) / (strataInfo.getNrisk().get(i) * (strataInfo.getNrisk().get(i) - strataInfo.getNevent().get(i)));
						if (i == 0) {
							strataInfo.getVarhaz().add(t);
						} else {
							strataInfo.getVarhaz().add(t + strataInfo.getVarhaz().get(i - 1));
						}
					}
				} else if (method == Method.kaplanMeier || method == Method.flemingHarrington) {
					for (int i = 0; i < strataInfo.getNrisk().size(); i++) {
						double t = (strataInfo.getNevent().get(i)) / (strataInfo.getNrisk().get(i) * strataInfo.getNrisk().get(i));
						if (i == 0) {
							strataInfo.getVarhaz().add(t);
						} else {
							strataInfo.getVarhaz().add(t + strataInfo.getVarhaz().get(i - 1));
						}
					}

				} else {
					//varhaz[[i]] <- cumsum(nevent* tsum$sum2)
					throw new Exception("Method.fh2 not supported. Need to implement survfit4.c ");
				}
				strataInfo.getStderr().clear();
				for (int i = 0; i < strataInfo.getNrisk().size(); i++) {

					strataInfo.getStderr().add(Math.sqrt(strataInfo.getVarhaz().get(i)));
				}


			}

		}

		ArrayList events = new ArrayList<>();
		ArrayList nrisk = new ArrayList<>();
		for (StrataInfo strataInfo : strataInfoHashMap.values()) {
			boolean firsttime = true;
			for (int j = 0; j < strataInfo.getNevent().size(); j++) {
				Double d = strataInfo.getNevent().get(j);
				if (d > 0 || firsttime) {
					firsttime = false;
					events.add(true);
					nrisk.add(strataInfo.getNrisk().get(j));
				} else {
					events.add(false);
				}
			}

		}
		ArrayList zz = new ArrayList<>();
		for (int i = 0; i < events.size(); i++) {
			if (events.get(i)) {
				zz.add(i + 1);
			}
		}
		zz.add(events.size() + 1);
		ArrayList diffzz = new ArrayList<>();
		for (int i = 0; i < zz.size() - 1; i++) {
			diffzz.add(zz.get(i + 1) - zz.get(i));
		}
		//System.out.println(diffzz);
		ArrayList nlag = new ArrayList<>();
		for (int j = 0; j < nrisk.size(); j++) {
			int count = diffzz.get(j);
			for (int c = 0; c < count; c++) {
				nlag.add(nrisk.get(j));
			}
		}

		// System.out.println(nlag);
		// System.out.println("nlag.size=" + nlag.size());
		if (confLower == ConfLower.usual) {
			for (StrataInfo strataInfo : strataInfoHashMap.values()) {
				strataInfo.setStdlow(strataInfo.getStderr());
			}
		} else if (confLower == ConfLower.peto) {
			for (StrataInfo strataInfo : strataInfoHashMap.values()) {
				for (int j = 0; j < strataInfo.getSurv().size(); j++) {
					double v = Math.sqrt((1.0 - strataInfo.getSurv().get(j)) / strataInfo.getNrisk().get(j));
					strataInfo.getStdlow().add(v);
				}
			}
		} else if (confLower == ConfLower.modified) {
			int i = 0;
			for (StrataInfo strataInfo : strataInfoHashMap.values()) {
				for (int j = 0; j < strataInfo.getSurv().size(); j++) {
					double v = strataInfo.getStderr().get(j) * Math.sqrt(nlag.get(i) / strataInfo.getNrisk().get(j));
					strataInfo.getStdlow().add(v);
					i++;
				}
			}
		}

		//zval <- qnorm(1- (1-conf.int)/2, 0,1)
		double zvalue = 1.959964;

		if (confType == ConfType.plain) {
			for (StrataInfo strataInfo : strataInfoHashMap.values()) {
				for (int j = 0; j < strataInfo.getSurv().size(); j++) {
					double upper = strataInfo.getSurv().get(j) + zvalue * strataInfo.getStderr().get(j) * strataInfo.getSurv().get(j);
					double lower = strataInfo.getSurv().get(j) - zvalue * strataInfo.getStdlow().get(j) * strataInfo.getSurv().get(j);
					strataInfo.getLower().add(lower);
					strataInfo.getUpper().add(upper);
				}
			}

		} else if (confType == ConfType.log) {
			for (StrataInfo strataInfo : strataInfoHashMap.values()) {
				for (int j = 0; j < strataInfo.getSurv().size(); j++) {
					double surv = strataInfo.getSurv().get(j);
					if (surv == 0) {
						strataInfo.getLower().add(Double.NaN);
						strataInfo.getUpper().add(Double.NaN);
					} else {
						double upper = Math.exp(Math.log(surv) + zvalue * strataInfo.getStderr().get(j));
						double lower = Math.exp(Math.log(surv) - zvalue * strataInfo.getStdlow().get(j));
						strataInfo.getLower().add(lower);
						strataInfo.getUpper().add(upper);
					}
				}
			}

		} else if (confType == ConfType.log_log) {
			throw new Exception("ConfType log-log currently not supported");
		}


//        if (false) {
//            for (String strata : strataInfoHashMap.keySet()) {
//                StrataInfo strataInfo = strataInfoHashMap.get(strata);
//                System.out.println(strataInfo.toString());
//                System.out.println();
//            }
//            System.out.println();
//        }

		return strataInfoHashMap;
	}

	/**
	 *
	 * @param variable
	 * @param dataT
	 * @param method
	 * @param error
	 * @param seFit
	 * @param confInt
	 * @param confType
	 * @param confLower
	 * @param startTime
	 * @param newTime
	 * @param useWeighted
	 * @return
	 * @throws Exception
	 */
	public SurvFitInfo process(String variable, ArrayList dataT, SurvFitKM.Method method, SurvFitKM.Error error, boolean seFit, double confInt, ConfType confType, ConfLower confLower, Double startTime, Double newTime, boolean useWeighted) throws Exception {
		SurvFitInfo si = new SurvFitInfo();

		LinkedHashMap strataInfoHashMap = this.processStrataInfo(variable, dataT, method, error, seFit, confInt, confType, confLower, startTime, newTime, useWeighted);
		si.setStrataInfoHashMap(strataInfoHashMap);
		LinkedHashMap unweightedStrataInfoHashMap = this.processStrataInfo(variable, dataT, method, error, seFit, confInt, confType, confLower, startTime, newTime, false);
		si.setUnweightedStrataInfoHashMap(unweightedStrataInfoHashMap);
		si.setWeighted(useWeighted);
		return si;
	}

	/**
	 * @param args the command line arguments
	 */
	public static void main(String[] args) {
		// TODO code application logic here
		try {
			String datafile = "/Users/Scooter/scripps/ngs/BLJ/E2197/Predictive Signatures/V$HSF_Q6-E2197 TTR.txt";

			SurvFitKM survFitKM = new SurvFitKM();

			SurvFitInfo si = survFitKM.process(datafile, "TIME", "STATUS", "WEIGHT", "MEAN", true);



			if (true) {

				KaplanMeierFigure kaplanMeierFigure = new KaplanMeierFigure();





				JFrame application = new JFrame();
				application.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
				application.add(kaplanMeierFigure);
				kaplanMeierFigure.setSize(500, 400);

				application.setSize(500, 400);         // window is 500 pixels wide, 400 high
				application.setVisible(true);

				ArrayList titles = new ArrayList<>();
				titles.add("Line 1");
				titles.add("line 2");
				kaplanMeierFigure.setSurvivalData(titles, si, null);

				ArrayList figureInfo = new ArrayList<>();
				//   figureInfo.add("HR=2.1 95% CI(1.8-2.5)");
				//   figureInfo.add("p-value=.001");
				kaplanMeierFigure.setFigureLineInfo(figureInfo);

				kaplanMeierFigure.savePNG("/Users/Scooter/Downloads/test.png");



			}



		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy