org.broadinstitute.pgen.PgenWriter Maven / Gradle / Ivy
The newest version!
/**
* Copyright (c) 2023, Broad Institute, Inc. All rights reserved.
*/
package org.broadinstitute.pgen;
import com.github.luben.zstd.ZstdOutputStream;
import htsjdk.io.HtsPath;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* An [HTSJDK](https://github.com/samtools/htsjdk) [VariantContextWriter]
* (https://github.com/samtools/htsjdk/blob/master/src/main/java/htsjdk/variant/variantcontext/writer/VariantContextWriter.java)
* that writes [VariantContext](https://github.com/samtools/htsjdk/blob/master/src/main/java/htsjdk/variant/variantcontext/VariantContext.java)
* objects to a file in the [plink2](https://www.cog-genomics.org/plink/2.0) in the plink2 PGEN format. See the plink2 PGEN
* [spec](https://github.com/chrchang/plink-ng/tree/master/pgen_spec) for more information about the PGEN file format.
*
* The writer implementation uses an underlying native component, which is built from a combination of local source files,
* plus some source files taken directly from the plink2 (that are used by plink2 to build the pgenlib target). Only
* some platforms are supported (macos and linux intel platforms).
*/
public class PgenWriter implements VariantContextWriter {
private static Log logger = Log.getInstance(PgenWriter.class);
/**
* variant count is not known up front, as long as the corresponding file mode param used is either {@link #PgenWriteMode.PGEN_FILE_MODE_WRITE_SEPARATE_INDEX}
* or {@link #PgenWriteMode.PGEN_FILE_MODE_WRITE_AND_COPY} (the write mode must not be {@link #PgenWriteMode.PGEN_FILE_MODE_BACKWARD_SEEK}, which requires an accurate
* upfront variant count).
*/
public static long VARIANT_COUNT_UNKNOWN = 0x7ffffffd; // plink2::kPglMaxVariantCt
/**
* Sentinel used by plink2 for missing data.
*/
public static final int PLINK2_NO_CALL_VALUE = -9;
/**
* The maximum number of alternate alleles that plink2/PGEN can handle (this is determined/defined by plink2)
*/
public static final int PLINK2_MAX_ALTERNATE_ALLELES = 254; // plink2::kPglMaxAltAlleleCt
public static String PGEN_EXTENSION = ".pgen";
public static String PGEN_INDEX_EXTENSION = ".pgen.pgi";
public static String PVAR_EXTENSION = ".pvar.zst";
public static String PSAM_EXTENSION = ".psam";
/**
* Enum for representing the plink2 PGEN file write modes. See plink2::PgenWriteMode.
*/
public enum PgenWriteMode {
/**
* write the pgen in single pass; requires backward seeks when writing; can only be used when an accurate upfront variant count
* is provided to the {@link PgenWriter}
*/
PGEN_FILE_MODE_BACKWARD_SEEK(0), // plink2::PgenWriteMode::kPgenWriteBackwardSeek
/**
* write a separate .pgi index file
*/
PGEN_FILE_MODE_WRITE_SEPARATE_INDEX(1), // plink2::PgenWriteMode::kPgenWriteSeparateIndex
/**
* the final real .pgen is only created at the end, by writing the index and then appending the body of the first
* temporary .pgen (which is then deleted).
*/
PGEN_FILE_MODE_WRITE_AND_COPY(2); // plink2::PgenWriteMode::kPgenWriteAndCopy
private final int mode;
private PgenWriteMode(final int mode) { this.mode = mode; }
public int value() { return this.mode; }
};
/**
* Enum for supported write mode flags.
*/
public enum PgenWriteFlag {
// This enum, and the corresponding enum values must be kept in sync with the corresponding constants
// in pgenlib::PgenWriteFlags.
PRESERVE_PHASING(0x1), // pgenlib::kWriteFlagPreservePhasing
MULTI_ALLELIC(0x2); // pgenlib::kWriteFlagMultiAllelic
private final int flag;
private PgenWriteFlag(final int flag) { this.flag = flag; }
public int value() { return this.flag; }
/**
* Convert an EnumSet into the corresponding pgenlib bitwise/integer flags.
*/
private static int toIntFlags(final EnumSet flagsSet) {
return (flagsSet.contains(PRESERVE_PHASING) ? PRESERVE_PHASING.value() : 0) |
(flagsSet.contains(MULTI_ALLELIC) ? MULTI_ALLELIC.value() : 0);
}
}
/**
* Enum for representing the subset of plink2 chromosome coding schemes that are supported by this writer.
* In plink2, the chromosome coding scheme is used when writing various output formats
* (see https://www.cog-genomics.org/plink/2.0/data#irreg_output).
* The codes are used here to determine the names of the haploid sex chromosomes in the incoming data set, for purposes
* of correctly handling haploid calls.
*/
public enum PgenChromosomeCode {
/**
* Autosomes numeric, X/Y single-character, MT two-character, XY/PAR1/PAR2 as usual.
* Aligns with the b37 reference. This is the default coding used by PLINK 2.
*/
PLINK_CHROMOSOME_CODE_MT("MT", "X", "Y", "MT"),
/**
* PAR1/PAR2 as usual, other chromosomes are 'chr' followed by a numeric code.
* Aligns with the hg38 reference.
*/
PLINK_CHROMOSOME_CODE_CHRM("chrM", "chrX", "chrY", "chrM");
private final String codeString; // the corresponding chromosome code as recognized by the plink2 command line
private final String xChromosomeName; // the chromosome name for the X chromosome for this chromosome code
private final String yChromosomeName; // the chromosome name for the Y chromosome for this chromosome code
private final String mChromosomeName; // the chromosome name for the mitochondrial chromosome for this chromosome code
private PgenChromosomeCode(final String codeString, final String xChromosomeName, final String yChromosomeName, final String mChromosomeName) {
this.codeString = codeString;
this.xChromosomeName = xChromosomeName;
this.yChromosomeName = yChromosomeName;
this.mChromosomeName = mChromosomeName;
}
public String value() { return this.codeString; }
public String getXChromosomeName() { return xChromosomeName; };
public String getYChromosomeName() { return yChromosomeName; };
public String getMChromosomeName() { return mChromosomeName; };
};
private static final byte PHASED_CODE = (byte) 1;
private static final byte UNPHASED_CODE = (byte) 0;
private static final int HAPLOID_PLOIDY = 1;
private static final int DIPLOID_PLOIDY = 2;
private final int maxAltAlleles;
private final boolean lenientPloidyValidation;
private final List sampleNames;
private final String xChromosomeName;
private final String yChromosomeName;
private final String mChromosomeName;
private HtsPath pVarFile;
private HtsPath pSamFile;
private HtsPath logFile;
private VariantContextWriter pVarWriter;
private BufferedWriter logFileWriter;
private long pgenContextHandle;
private ByteBuffer alleleBuffer;
private ByteBuffer phasingBuffer;
private long expectedVariantCount = 0L;
private long droppedVariantCount = 0L;
private long droppedSampleCount = 0L;
// ******************** Native JNI methods ********************
private static native long openPgen(String file, int pgenWriteModeInt, int writeFlags, long numberOfVariants, int numberOfSamples, int maxAltAlleles);
private static native boolean closePgen(long pgenContextHandle, long numDroppedVariants);
private static native long getPgenVariantCount(long pgenContextHandle);
private static native boolean appendAlleles(long pgenContextHandle, ByteBuffer alleles, ByteBuffer phasing, int alleleCount);
private static native ByteBuffer createBuffer(int length);
private static native boolean destroyByteBuffer(ByteBuffer buffer);
// ******************** End Native JNI methods ********************
// If this java property is set/exists, the native PGEN writer component will be loaded from java.libary.path,
// otherwise it is assumed to be included as a resource at the top level of a jar on the classpath.
private static final String LOAD_PGEN_FROM_LIBRARY_PATH = "LOAD_PGEN_FROM_LIBRARY_PATH";
static {
if (System.getProperty(LOAD_PGEN_FROM_LIBRARY_PATH) != null) {
// for local testing within the IDE
System.loadLibrary("pgen");
} else {
// otherwise, load it from a jar file on the classpath
NativeLibraryUtils.loadLibraryFromClasspath(
NativeLibraryUtils.runningOnMac() ?
"/libpgen.dylib" :
"/libpgen.so"
);
}
}
/**
* Create a PGEN writer for writing VariantContexts to a Plink2 PGEN file. The writer creates a set of related PGEN files (.pgen and
* .pvar/.psam files). Depending on the {@code #PgenWriteMode} specified, may also create a .pgen.pgi file.
*
* Supports only diploid sites with fewer than {@link #PLINK2_MAX_ALTERNATE_ALLELES} (this value can be further limited using
* {@code maxAltAlleles}. Sites that exceed the maximum number of alternate alleles are silently dropped (but will be written to the
* log file if one is provided). If no log file is provided, any sample that does not have a conforming ploidy (haploid or diploid for
* sex chromosomes, or stricly diploid for autosomes) will cause an exception to be thrown. Failure to provide a log file does not
* change the behavior of
*
* @param pgenFileName the name of the PGEN file to be created (must end in .pgen)
* @param vcfHeader a valid VCF header to use to create the PGEN
* @param pgenWriteMode the PGEN write mode to use (see {@code PgenWriteMode})
* @param writeFlags the write flags to use - see {@code PgenWriteFlag}. If phase information is present for the source genotypes, include
* the {@link PgenWriteFlag#PRESERVE_PHASING} flag. If multi allelic variants are present, include the {@link PgenWriteFlag#MULTI_ALLELIC} flag.
* @param chromosomeCode the plink2 chromosome coding scheme to use - see {@link PgenChromosomeCode}
* @param lenientPloidyValidation PGEN requires individual sample to be diploid (except for sex chromsomes, which may be haploid - these are accepted
* and recoded for pgen as heterozygous/diploid). By default, any ploidy failure will result in an exception to be thrown. Use tru for this value to
* tolerate ploidy failures (samples will be recoded as missing, and logged if a pg file is provided).
* @param numberOfVariants the number of variants to be written. if the number is unknown, use the sentinel value {@link PgenWriter#VARIANT_COUNT_UNKNOWN},
* but doing so precludes the use of the PGEN write mode {@link PgenWriteMode#PGEN_FILE_MODE_BACKWARD_SEEK}
* @param maxAltAlleles the maximum number of alternate alleles to consider; site with more alterate alleles than this value will be
* silently dropped
* @parm logFile any variants or samples that are dropped are logged to this file. may be null.
**/
public PgenWriter(
final HtsPath pgenFileName,
final VCFHeader vcfHeader,
final PgenWriteMode pgenWriteMode,
final EnumSet writeFlags,
final PgenChromosomeCode chromosomeCode,
final boolean lenientPloidyValidation,
final long numberOfVariants,
final int maxAltAlleles,
final String logFile) {
if (!pgenFileName.hasExtension(PGEN_EXTENSION)) {
throw new PgenException(
String.format("Invalid PGEN file name: %s. PGEN files must use the .pgen extension", pgenFileName.getRawInputString()));
}
if (!pgenFileName.getScheme().equals("file")) {
throw new PgenException(String.format("Invalid PGEN file name: %s. PGEN files must be local files", pgenFileName));
}
if (maxAltAlleles > PLINK2_MAX_ALTERNATE_ALLELES) {
throw new PgenException(
String.format("Requested max alternate alleles of (%d) exceeds the supported PGEN max of (%d)",
maxAltAlleles,
PLINK2_MAX_ALTERNATE_ALLELES));
}
this.maxAltAlleles = maxAltAlleles;
this.expectedVariantCount = numberOfVariants;
this.sampleNames = vcfHeader.getGenotypeSamples();
switch(chromosomeCode) {
// at the moment, the only difference between the two supported codes is the name of the mitochondrial chromosome, but capture the
// x and y chromosome names for completeness
case PLINK_CHROMOSOME_CODE_MT:
case PLINK_CHROMOSOME_CODE_CHRM:
this.xChromosomeName = chromosomeCode.getXChromosomeName();
this.yChromosomeName = chromosomeCode.getYChromosomeName();
this.mChromosomeName = chromosomeCode.getMChromosomeName();
break;
default:
throw new PgenException(
String.format("Unrecognized chromosome code name (%s)", chromosomeCode));
}
if (logFile != null) {
this.logFile = new HtsPath(logFile);
try {
logFileWriter = Files.newBufferedWriter(this.logFile.toPath());
} catch (IOException e) {
throw new RuntimeIOException(String.format("Error opening dropped variants log file %s", logFile), e);
}
}
this.lenientPloidyValidation = lenientPloidyValidation;
pgenContextHandle = openPgen(
pgenFileName.getRawInputString(),
pgenWriteMode.value(),
PgenWriteFlag.toIntFlags(writeFlags),
numberOfVariants,
vcfHeader.getNGenotypeSamples(),
maxAltAlleles);
if (pgenContextHandle == 0) {
//openPgen threw an async Java exception
return;
}
alleleBuffer = createBuffer(vcfHeader.getNGenotypeSamples() * DIPLOID_PLOIDY * 4); //samples * ploidy * bytes in int32_t (sizeof AlleleCode)
if (alleleBuffer == null) {
//createBuffer threw an async Java exception
return;
}
alleleBuffer.order(ByteOrder.LITTLE_ENDIAN);
phasingBuffer = createBuffer(vcfHeader.getNGenotypeSamples());
if (phasingBuffer == null) {
//createBuffer threw an async Java exception
return;
}
phasingBuffer.order(ByteOrder.LITTLE_ENDIAN);
// create the .pvar, and write the entire psam
pVarFile = createPVAR(pgenFileName, vcfHeader);
pSamFile = writePSAM(pgenFileName, vcfHeader);
}
@Override
public void writeHeader(final VCFHeader header) {
throw new UnsupportedOperationException("PGEN writer does not support independent header write.");
}
@Override
public void setHeader(final VCFHeader header) {
throw new UnsupportedOperationException("PGEN writer does not support independent setHeader");
}
@Override
public void close() {
pVarWriter.close();
pVarWriter = null;
if (logFileWriter != null) {
try {
logFileWriter.close();
} catch (IOException e) {
throw new RuntimeIOException(String.format("Error closing dropped variants log file %s", logFile), e);
}
}
// closePgen returns false if it had to throw an async Java exception, so test for that, and if it failed,
// don't do anything else that might throw.
//
// Tell the writer how many variants we dropped (due to exceeding the # of alternate alleles) so it
// doesn't throw if the number written doesn't match the number expected (which is provided when the
// writer is opened)
if (closePgen(pgenContextHandle, droppedVariantCount)) {
pgenContextHandle = 0;
//destroyByteBuffer might return false if for some reason it has to throw an async Java exception, but
// we don't need to test for that here since we're only nulling out a variable on return
destroyByteBuffer(alleleBuffer);
alleleBuffer = null;
destroyByteBuffer(phasingBuffer);
phasingBuffer = null;
}
}
@Override
public boolean checkError() {
return false;
}
@Override
public void add(final VariantContext vc) {
if (vc.getNAlleles() > maxAltAlleles) {
droppedVariantCount++;
if (logFileWriter != null) {
try {
logFileWriter.write(String.format("Dropped variant at: %s/%d - too many alleles (%d)\n", vc.getContig(), vc.getStart(), vc.getNAlleles()));
} catch (IOException e) {
throw new RuntimeIOException(String.format("Error writing to dropped variants log file %s", logFile), e);
}
}
return;
}
alleleBuffer.clear();
phasingBuffer.clear();
final Map alleleMap = buildAlleleMap(vc);
// Because there may be missing genotyopes, it is significantly simpler to have the primary iteration be
// through the sample names from the header, rather than through the genotypes.
// Note: As it stands, this code does not detect or reject the case where there are one or more genotypes
// in the VC that have a sample name that is not in the header (we could certainly keep track of that and
// throw, but is it worth the expense ?), or that the order of the genotypes does not match that of the
// header.
for (final String sampleName : sampleNames) {
final Genotype g = vc.getGenotype(sampleName);
if (g != null) {
final int ploidy = g.getPloidy();
if (ploidy == HAPLOID_PLOIDY && (vc.getContig().equals(xChromosomeName) || vc.getContig().equals(yChromosomeName))) {
// we have a haploid X or Y, and need to convert it to diploid to satisfy plink
final List alleles = g.getAlleles();
if (alleles.size() != 1) {
throw new PgenException(
String.format("A genotype with haploid ploidy (%d) does not have one allele (%d) at variant (%s)",
ploidy,
alleles.size(),
vc.toStringWithoutGenotypes()));
}
final Allele allele = alleles.get(0);
final Integer alleleCode = alleleMap.get(allele);
if (alleleCode == null) {
// do we need this test ? VariantContext doesn't seem to allow such a thing to be created
throw new PgenException(
String.format("Allele %s not found in allele map for variant %s", allele.toString(), vc.toStringWithoutGenotypes()));
}
updateAlleleBuffer(vc, g, allele, alleleCode);
updateAlleleBuffer(vc, g, allele, alleleCode);
updatePhasingBuffer(vc, g, g.isPhased() ? PHASED_CODE : UNPHASED_CODE);
} else if (ploidy != DIPLOID_PLOIDY) {
if (lenientPloidyValidation) {
// if lenient, fill in unphased diploid no-call values for any genotype with questionable ploidy
updateAlleleBuffer(vc, g, null, PLINK2_NO_CALL_VALUE);
updateAlleleBuffer(vc, g, null, PLINK2_NO_CALL_VALUE);
updatePhasingBuffer(vc, null, UNPHASED_CODE);
if (logFileWriter != null) {
try {
logFileWriter.write(String.format("Coding non-diploid sample %s as missing at contig/start: %s %d",
g.getSampleName(),
vc.getContig(),
vc.getStart()));
droppedSampleCount++;
} catch (IOException e) {
throw new RuntimeIOException(String.format("Error writing to dropped variants log file %s", logFile), e);
}
}
} else {
throw new PgenException(
String.format("PGEN only supports diploid calls, but a non-diploid sample (%s) with ploidy (%d) was found at variant (%s)",
g.getSampleName(),
ploidy,
vc.toStringWithoutGenotypes()));
}
} else {
for (final Allele allele : g.getAlleles()) {
final Integer alleleCode = alleleMap.get(allele);
if (alleleCode == null) {
// do we need this test ? VariantContext doesn't seem to allow such a thing to be created
throw new PgenException(
String.format("Allele %s not found in allele map for variant %s", allele.toString(), vc.toStringWithoutGenotypes()));
}
updateAlleleBuffer(vc, g, allele, alleleCode);
}
updatePhasingBuffer(vc, g, g.isPhased() ? PHASED_CODE : UNPHASED_CODE);
}
} else {
// fill in unphased diploid no-call values for the missing genotype
updateAlleleBuffer(vc, null, null, PLINK2_NO_CALL_VALUE);
updateAlleleBuffer(vc, null, null, PLINK2_NO_CALL_VALUE);
updatePhasingBuffer(vc, null, UNPHASED_CODE);
}
}
if (alleleBuffer.position() != alleleBuffer.limit()) {
throw new IllegalStateException(
String.format("Allele buffer is not completely filled, position is %d but expected %d.",
alleleBuffer.position(),
alleleBuffer.limit()));
} else if (phasingBuffer.position() != phasingBuffer.limit()) {
throw new IllegalStateException(
String.format("Phase buffer is not completely filled, position is %d but expected %d.",
phasingBuffer.position(),
phasingBuffer.limit()));
}
alleleBuffer.rewind();
phasingBuffer.rewind();
final boolean appendRet = appendAlleles(pgenContextHandle, alleleBuffer, phasingBuffer, alleleMap.size() - 1);
if (appendRet) { // only add to the pvar if appendAlleles succeeded
pVarWriter.add(vc);
}
}
/**
* @return the number of variants dropped because they exceeded the max alternate allele count. dropped variants are not written
* to the .pvar file, but are written to the log file if one was provided.
*/
public long getDroppedVariantCount() { return droppedVariantCount; }
/**
* @return the number of times a sample was dropped because it did not satisfy the ploidy requirements (note that the same sample
* may be dropped multiple times, once for each site that it fails to satisfy the ploidy requirements for). dropped samples are
* written to the log file if one was provided.
*/
public long getDroppedSampleCount() { return droppedSampleCount; }
/**
* @return the number of variants actually written to the PGEN
*
* Delegates to the pgenlib code to get the actual number recorded by the pgen library code.
*/
public long getWrittenVariantCount() { return getPgenVariantCount(pgenContextHandle); }
/**
* given a Path, return the absolute path of the file, without the trailing extension
*/
// Visible for testing
public static String getAbsoluteFileNameWithoutExtension(final Path targetPath, final String extension) {
final String targetAbsolutePath = targetPath.toAbsolutePath().toString();
return targetAbsolutePath.substring(0, targetAbsolutePath.lastIndexOf(extension));
}
/**
* Create a .pvar companion file for {@code pgenFile}.
*/
private HtsPath createPVAR(final HtsPath pgenFile, final VCFHeader vcfHeader) {
final String pgenFilePrefix = getAbsoluteFileNameWithoutExtension(pgenFile.toPath(), PGEN_EXTENSION);
final HtsPath pVarFile = new HtsPath(pgenFile.toPath()
.resolveSibling(pgenFilePrefix + PgenWriter.PVAR_EXTENSION)
.toAbsolutePath().toString());
final OutputStream pVarOutputStream = pVarFile.getOutputStream();
ZstdOutputStream zstdStream;
try {
zstdStream = new ZstdOutputStream(pVarOutputStream);
} catch (final IOException e) {
throw new RuntimeIOException(String.format("Error creating the zstd output stream for the .pvar file %s", pVarFile.getRawInputString()), e);
}
// technically we're writing a PVAR, not a VCF, but we can do so by composing the VCFWriter with a ZstdOutputStream
pVarWriter = new VariantContextWriterBuilder()
.clearOptions()
.setOptions(EnumSet.of(Options.DO_NOT_WRITE_GENOTYPES, Options.ALLOW_MISSING_FIELDS_IN_HEADER))
.setOutputStream(zstdStream)
.build();
// Ideally there would be a way to record the provenance/origin of a PGEN file right in the file itself, so we can identify
// files written by this writer, but there isn't. So instead add a "source=..." VCF header line to the .pvar, similar to the
// "##source=PLINKv2.00" line plink2 adds when it writes a .pvar. Attempt to use the jar's implementation version from the
// manifest, if it's available.
final String implementationVersion = this.getClass().getPackage().getImplementationVersion();
vcfHeader.addMetaDataLine(new VCFHeaderLine(
"source",
String.format("\"Broad Institute PGEN/PVAR writer version=%s\"",
implementationVersion == null ?
"no version found in jar manifest" :
implementationVersion)));
pVarWriter.writeHeader(vcfHeader);
return pVarFile;
}
/**
* Creates writes a .psam companion file for {@code pgenFile}.
*/
private HtsPath writePSAM(final HtsPath pgenFile, final VCFHeader vcfHeader) {
final String PSAM_HEADER_LINE = "#IID\tSEX\n";
final String PSAM_DETAIL_LINE = "\tN/A\n";
// create, write, and close the .psam up front, so we don't have to retain the header until the end
final String pgenFilePrefix = getAbsoluteFileNameWithoutExtension(pgenFile.toPath(), PGEN_EXTENSION);
final HtsPath pSamFile = new HtsPath(pgenFile.toPath()
.resolveSibling(pgenFilePrefix + PgenWriter.PSAM_EXTENSION)
.toAbsolutePath().toString());
try (final BufferedWriter psamWriter = Files.newBufferedWriter(pSamFile.toPath())) {
psamWriter.append(PSAM_HEADER_LINE);
// Sample name order matters here. If you use plink2 to create a VCF from a PGEN file set, it appears to use the order of the samples in
// the .psam as the basis for linking the genotypes in the PGEN back to the VCF samples. So if we don't preserve the order in the .psam,
// the genotypes in the roundtripped VCF won't match the original VCF, and will be incorrect.
for (final String sampleName : vcfHeader.getGenotypeSamples()) {
psamWriter.write(sampleName);
psamWriter.write(PSAM_DETAIL_LINE);
}
} catch (final IOException e) {
throw new RuntimeIOException(String.format("Error writing the .psam file %s", pSamFile.getRawInputString()), e);
}
return pSamFile;
}
private void updateAlleleBuffer(final VariantContext vc, final Genotype genotype, final Allele allele, final Integer alleleCode) {
try {
alleleBuffer.putInt(alleleCode);
} catch (final BufferOverflowException e) {
throw new RuntimeException(
String.format(
"Allele buffer overflow at position: %d code: %d for variant: %s, genotype: %s allele: %s",
alleleBuffer.position(),
alleleCode,
vc.toStringWithoutGenotypes(),
genotype == null ? "genotype missing" : genotype.toString(),
allele == null ? "no allele present" : allele.toString()),
e);
}
}
private void updatePhasingBuffer(final VariantContext vc, final Genotype genotype, final byte phaseCode) {
try {
phasingBuffer.put(phaseCode);
} catch (final BufferOverflowException e) {
throw new RuntimeException(
String.format(
"Phase buffer overflow at position: %d code: %d for variant: %s, genotype: %s",
alleleBuffer.position(),
phaseCode,
vc.toStringWithoutGenotypes(),
genotype == null ? "genotype missing" : genotype.toString()),
e);
}
}
private static Map buildAlleleMap(final VariantContext vc) {
final Map alleleMap = new HashMap<>(vc.getAlleles().size() + 1);
alleleMap.put(Allele.NO_CALL, PLINK2_NO_CALL_VALUE); // convenience for lookup
final List alleles = vc.getAlleles();
for (int i = 0; i < alleles.size(); i++) {
alleleMap.put(alleles.get(i), i);
}
return alleleMap;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy