All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.dna.snp.bit.DynamicBitStorage Maven / Gradle / Ivy

package net.maizegenetics.dna.snp.bit;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.cache.Weigher;
import net.maizegenetics.dna.WHICH_ALLELE;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import net.maizegenetics.dna.snp.genotypecall.GenotypeCallTable;
import net.maizegenetics.util.BitSet;
import net.maizegenetics.util.UnmodifiableBitSet;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;

/**
 * Provides rapid conversion routines and caching from byte encoding of
 * nucleotides to bit encoding. Only two alleles are supported for each scope
 * (e.g. Major and minor, or Reference and Alternate).
 * 

*

* The cache is designed to support multiple scopes, but currently scope must be * passed in at construction. *

*

* It is not clear that site or taxa optimization is needed. The code should be * highly parallelizable as long as the gets are not for adjacent sites. * * @author Ed Buckler */ public class DynamicBitStorage implements BitStorage { private GenotypeCallTable myGenotype; private final WHICH_ALLELE myWhichAllele; private final byte[] myPrefAllele; private final int myTaxaCount; private final int mySiteCount; private static final int SBoff = 58; private enum SB { TAXA(0), SITE(1); public final int index; SB(int index) { this.index = index; } }; Weigher weighByLength = new Weigher() { @Override public int weigh(Long key, BitSet value) { return value.getNumWords() * 8; } //words are longs so *8 converts to bytes }; private LoadingCache bitCache; private CacheLoader bitLoader = new CacheLoader() { @Override public BitSet load(Long key) { if (getDirectionFromKey(key) == SB.TAXA) { int taxon = getSiteOrTaxonFromKey(key); if ((myPrefAllele.length == 1) && (myPrefAllele[0] == GenotypeTable.UNKNOWN_DIPLOID_ALLELE)) { return UnmodifiableBitSet.getInstance(GenotypeTableUtils.calcBitUnknownPresenceFromGenotype(myGenotype.genotypeAllSites(taxon))); } else { return UnmodifiableBitSet.getInstance(GenotypeTableUtils.calcBitPresenceFromGenotype(myGenotype.genotypeAllSites(taxon), myPrefAllele)); //allele comp } } else { ArrayList toFill = new ArrayList<>(); toFill.add(key); try { bitCache.putAll(loadAll(toFill)); return bitCache.get(key); } catch (Exception e) { e.printStackTrace(); return null; } } } @Override public Map loadAll(Iterable keys) throws Exception { long key = keys.iterator().next(); //This pivoting code is needed if myGenotype is store in taxa direction //It runs about 7 times faster than getting base sequentially across taxa. HashMap result = new HashMap(64); int site = getSiteOrTaxonFromKey(key); int length = (mySiteCount - site < 64) ? mySiteCount - site : 64; byte[][] genotypeTBlock = new byte[length][myTaxaCount]; for (int t = 0; t < myTaxaCount; t++) { for (int s = 0; s < genotypeTBlock.length; s++) { genotypeTBlock[s][t] = myGenotype.genotype(t, site + s); } } if ((myPrefAllele.length == 1) && (myPrefAllele[0] == GenotypeTable.UNKNOWN_DIPLOID_ALLELE)) { for (int i = 0; i < length; i++) { BitSet bs = UnmodifiableBitSet.getInstance(GenotypeTableUtils.calcBitUnknownPresenceFromGenotype(genotypeTBlock[i])); result.put(getKey(SB.SITE, site + i), bs); } } else { for (int i = 0; i < length; i++) { byte a1 = myPrefAllele[site + i]; BitSet bs = UnmodifiableBitSet.getInstance(GenotypeTableUtils.calcBitPresenceFromGenotype(genotypeTBlock[i], a1)); result.put(getKey(SB.SITE, site + i), bs); } } return result; } }; private long getKey(SB direction, int siteOrTaxon) { return ((long) direction.index << SBoff) | (long) siteOrTaxon; } private int getSiteOrTaxonFromKey(long key) { return (int) ((key << 32) >>> 32); } private SB getDirectionFromKey(long key) { if (key >>> SBoff == SB.TAXA.index) { return SB.TAXA; } return SB.SITE; } @Override public BitSet allelePresenceForAllSites(int taxon) { try { return bitCache.get(getKey(SB.TAXA, taxon)); } catch (ExecutionException e) { e.printStackTrace(); return null; } } @Override public BitSet allelePresenceForAllTaxa(int site) { try { return bitCache.get(getKey(SB.SITE, site)); } catch (ExecutionException e) { e.printStackTrace(); return null; } } @Override public long[] allelePresenceForSitesBlock(int taxon, int startBlock, int endBlock) { BitSet result = allelePresenceForAllSites(taxon); if (result == null) { return new long[0]; } return result.getBits(startBlock, endBlock - 1); //BitSet is inclusive, while this method is exclusive. } @Override public BitSet haplotypeAllelePresenceForAllSites(int taxon, boolean firstParent) { throw new UnsupportedOperationException("Not supported yet."); } @Override public BitSet haplotypeAllelePresenceForAllTaxa(int site, boolean firstParent) { throw new UnsupportedOperationException("Not supported yet."); } @Override public long[] haplotypeAllelePresenceForSitesBlock(int taxon, boolean firstParent, int startBlock, int endBlock) { throw new UnsupportedOperationException("Not supported yet."); } public DynamicBitStorage(GenotypeCallTable genotype, WHICH_ALLELE allele, byte[] prefAllele) { myGenotype = genotype; myWhichAllele = allele; mySiteCount = myGenotype.numberOfSites(); myTaxaCount = myGenotype.numberOfTaxa(); myPrefAllele = Arrays.copyOf(prefAllele, prefAllele.length); bitCache = CacheBuilder.newBuilder() .maximumWeight(2_000_000_000L) .weigher(weighByLength) .build(bitLoader); } public static DynamicBitStorage getUnknownInstance(GenotypeCallTable genotype) { return new DynamicBitStorage(genotype, WHICH_ALLELE.Unknown, new byte[]{GenotypeTable.UNKNOWN_DIPLOID_ALLELE}); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy