org.broadinstitute.hellbender.tools.copynumber.plotting.PlotDenoisedCopyRatios.R Maven / Gradle / Ivy
The newest version!
#NOTE: the Java wrapper for this script first sources CNVPlottingLibrary.R
options(error = quote({dump.frames(dumpto = "plotting_dump", to.file = TRUE); q(status = 1)})) # Useful for debugging
library(optparse)
library(data.table)
option_list = list(
make_option(c("--sample_name"), dest="sample_name", action="store"),
make_option(c("--standardized_copy_ratios_file"), dest="standardized_copy_ratios_file", action="store"),
make_option(c("--denoised_copy_ratios_file"), dest="denoised_copy_ratios_file", action="store"),
make_option(c("--contig_names"), dest="contig_names", action="store"), #string with elements separated by "CONTIG_DELIMITER"
make_option(c("--contig_lengths"), dest="contig_lengths", action="store"), #string with elements separated by "CONTIG_DELIMITER"
make_option(c("--maximum_copy_ratio"), dest="maximum_copy_ratio", action="store", type="double"),
make_option(c("--point_size_copy_ratio"), dest="point_size_copy_ratio", action="store", type="double"),
make_option(c("--output_dir"), dest="output_dir", action="store"),
make_option(c("--output_prefix"), dest="output_prefix", action="store"))
opt = parse_args(OptionParser(option_list=option_list))
sample_name = opt[["sample_name"]]
standardized_copy_ratios_file = opt[["standardized_copy_ratios_file"]]
denoised_copy_ratios_file = opt[["denoised_copy_ratios_file"]]
contig_names_string = opt[["contig_names"]]
contig_lengths_string = opt[["contig_lengths"]]
maximum_copy_ratio = opt[["maximum_copy_ratio"]]
point_size_copy_ratio = opt[["point_size_copy_ratio"]]
output_dir = opt[["output_dir"]]
output_prefix = opt[["output_prefix"]]
#check that input files exist; if not, quit with error code that GATK will pick up
if (!all(file.exists(c(standardized_copy_ratios_file, denoised_copy_ratios_file)))) {
quit(save="no", status=1, runLast=FALSE)
}
contig_names = as.list(strsplit(contig_names_string, "CONTIG_DELIMITER")[[1]])
contig_lengths = as.list(strsplit(contig_lengths_string, "CONTIG_DELIMITER")[[1]])
contig_ends = cumsum(contig_lengths)
contig_starts = c(0, head(contig_ends, -1))
CalculateMedianAbsoluteDeviation = function(dat) {
return(median(abs(diff(dat))))
}
#plotting is extracted to a function for debugging purposes
WriteDenoisingPlots = function(sample_name, standardized_copy_ratios_file, denoised_copy_ratios_file, contig_names, output_dir, output_prefix) {
standardized_copy_ratios_df = ReadTSV(standardized_copy_ratios_file)
denoised_copy_ratios_df = ReadTSV(denoised_copy_ratios_file)
#transform to linear copy ratio
standardized_copy_ratios_df[["COPY_RATIO"]] = 2^standardized_copy_ratios_df[["LOG2_COPY_RATIO"]]
denoised_copy_ratios_df[["COPY_RATIO"]] = 2^denoised_copy_ratios_df[["LOG2_COPY_RATIO"]]
#determine copy-ratio midpoints
standardized_copy_ratios_df[["MIDDLE"]] = round((standardized_copy_ratios_df[["START"]] + standardized_copy_ratios_df[["END"]]) / 2)
denoised_copy_ratios_df[["MIDDLE"]] = round((denoised_copy_ratios_df[["START"]] + denoised_copy_ratios_df[["END"]]) / 2)
#write the MAD files
standardizedMAD = CalculateMedianAbsoluteDeviation(standardized_copy_ratios_df[["COPY_RATIO"]])
denoisedMAD = CalculateMedianAbsoluteDeviation(denoised_copy_ratios_df[["COPY_RATIO"]])
write.table(round(standardizedMAD, 3), file.path(output_dir, paste(output_prefix, ".standardizedMAD.txt", sep="")), col.names=FALSE, row.names=FALSE)
write.table(round(denoisedMAD, 3), file.path(output_dir, paste(output_prefix, ".denoisedMAD.txt", sep="")), col.names=FALSE, row.names=FALSE)
write.table(round(standardizedMAD - denoisedMAD, 3), file.path(output_dir, paste(output_prefix, ".deltaMAD.txt", sep="")), col.names=FALSE, row.names=FALSE)
write.table(round((standardizedMAD - denoisedMAD) / standardizedMAD, 3), file.path(output_dir, paste(output_prefix, ".scaledDeltaMAD.txt", sep="")), col.names=FALSE, row.names=FALSE)
#plot standardized and denoised copy ratio on top of each other
pre_color_blue = "#3B5DFF"
post_color_green = "#4FC601"
#plot up to maximum_copy_ratio (or full range, if maximum_copy_ratio = Infinity)
denoising_plot_file = file.path(output_dir, paste(output_prefix, ".denoised.png", sep=""))
png(denoising_plot_file, 12, 7, units="in", type="cairo", res=300, bg="white")
par(mfrow=c(2, 1), cex=0.75, las=1)
maximum_standardized_copy_ratio = if(is.finite(maximum_copy_ratio)) maximum_copy_ratio else 1.05 * max(standardized_copy_ratios_df[["COPY_RATIO"]])
SetUpPlot(sample_name, "standardized copy ratio", 0, maximum_standardized_copy_ratio, paste("median absolute deviation = ", round(standardizedMAD, 3), sep=""), contig_names, contig_starts, contig_ends, FALSE)
PlotCopyRatios(standardized_copy_ratios_df, pre_color_blue, contig_names, contig_starts, point_size_copy_ratio)
maximum_denoised_copy_ratio = if(is.finite(maximum_copy_ratio)) maximum_copy_ratio else 1.05 * max(denoised_copy_ratios_df[["COPY_RATIO"]])
SetUpPlot(sample_name, "denoised copy ratio", 0, maximum_denoised_copy_ratio, paste("median absolute deviation = ", round(denoisedMAD, 3), sep=""), contig_names, contig_starts, contig_ends, TRUE)
PlotCopyRatios(denoised_copy_ratios_df, post_color_green, contig_names, contig_starts, point_size_copy_ratio)
dev.off()
#check for created files and quit with error code if not found
if (!all(file.exists(c(denoising_plot_file)))) {
quit(save="no", status=1, runLast=FALSE)
}
}
WriteDenoisingPlots(sample_name, standardized_copy_ratios_file, denoised_copy_ratios_file, contig_names, output_dir, output_prefix) © 2015 - 2025 Weber Informatics LLC | Privacy Policy