All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.stats.linearmodels.ModelEffectUtils Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

There is a newer version: 5.2.94
Show newest version
package net.maizegenetics.stats.linearmodels;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import net.maizegenetics.analysis.modelfitter.AddPlusDomModelEffect;
import net.maizegenetics.dna.snp.GenotypeTable;
import net.maizegenetics.dna.snp.GenotypeTableUtils;
import net.maizegenetics.dna.snp.NucleotideAlignmentConstants;
import net.maizegenetics.matrixalgebra.Matrix.DoubleMatrix;

public class ModelEffectUtils {
	private static final List homGeno = Arrays.asList(
			NucleotideAlignmentConstants.getNucleotideDiploidByte("A"),
			NucleotideAlignmentConstants.getNucleotideDiploidByte("C"),
			NucleotideAlignmentConstants.getNucleotideDiploidByte("G"),
			NucleotideAlignmentConstants.getNucleotideDiploidByte("T"),
			NucleotideAlignmentConstants.getNucleotideDiploidByte("Z"),
			NucleotideAlignmentConstants.getNucleotideDiploidByte("+"),
			NucleotideAlignmentConstants.getNucleotideDiploidByte("-"));
	
	private ModelEffectUtils() {}
	
	public static DoubleMatrix getXtY(ModelEffect X, ModelEffect Y) {
		if (X instanceof FactorModelEffect) {
			FactorModelEffect fme = (FactorModelEffect) X;
			if (Y instanceof FactorModelEffect) {
				return fme.getXtX2((FactorModelEffect) Y);
			} else if (Y instanceof CovariateModelEffect) {
				return fme.getXty(((CovariateModelEffect) Y).getCovariate());
			} else if (Y instanceof NestedCovariateModelEffect) {
				return fme.getX().mult(Y.getX(), true, false);
			} else if (Y instanceof AddPlusDomModelEffect) {
				return fme.getX().crossproduct(Y.getX());
			}
			
		} else if (X instanceof CovariateModelEffect) {
			double[] cov = ((CovariateModelEffect) X).getCovariate();
			if (Y instanceof FactorModelEffect) {
				return getXtY(Y,X).transpose();
			} else if (Y instanceof CovariateModelEffect) {
				return Y.getXty(cov);
			} else if (Y instanceof NestedCovariateModelEffect) {
				return Y.getXty(cov).transpose();
			} else if (Y instanceof AddPlusDomModelEffect) {
				return Y.getXty(cov).transpose();
			}
			
		} else if (X instanceof NestedCovariateModelEffect) {
			if (Y instanceof FactorModelEffect) {
				return X.getX().mult(Y.getX(), true, false);
			} else if (Y instanceof CovariateModelEffect) {
				return X.getXty(((CovariateModelEffect) Y).getCovariate());
			} else if (Y instanceof NestedCovariateModelEffect) {
				return ((NestedCovariateModelEffect) X).getXtX2((NestedCovariateModelEffect) Y);
			} else if (Y instanceof AddPlusDomModelEffect) {
				return X.getX().crossproduct(Y.getX());
			}
			
		} else if (X instanceof AddPlusDomModelEffect) {
			return X.getX().crossproduct(Y.getX());
		}

		return null;
	}
	
    public static int[] getIntegerLevels(Object[] originalLevels) {
    	int nLevels = originalLevels.length;
    	int[] intLevels = new int[nLevels];
    	HashMap levelMap = new HashMap();
    	for (int i = 0; i < nLevels; i++) {
    		Integer ndx = levelMap.get(originalLevels[i]);
    		if (ndx == null) {
    			ndx = new Integer(levelMap.size());
    			levelMap.put(originalLevels[i], ndx);
    		}
    		intLevels[i] = ndx.intValue();
    	}
    	return intLevels;
    }
    
    public static  int[] getIntegerLevels(T[] originalLevels, ArrayList ids) {
    	int nLevels = originalLevels.length;
    	int[] intLevels = new int[nLevels];
    	HashMap levelMap = new HashMap();
    	for (int i = 0; i < nLevels; i++) {
    		Integer ndx = levelMap.get(originalLevels[i]);
    		if (ndx == null) {
    			ndx = new Integer(levelMap.size());
    			levelMap.put(originalLevels[i], ndx);
    		}
    		intLevels[i] = ndx.intValue();
    	}
    	
    	if (ids != null) {
        	TreeSet> sortedEntries = new TreeSet>(new Comparator>(){

    			@Override
    			public int compare(Entry arg0, Entry arg1) {
    				return arg0.getValue().compareTo(arg1.getValue());
    			}
        		
        	});
        	
        	sortedEntries.addAll(levelMap.entrySet());
        	for (Entry entry:sortedEntries) {
        		ids.add(entry.getKey());
        	}
    	}
    	
    	return intLevels;
    }
    
    public static int[] getIntegerLevelsSortedByGenotype(String[] originalLevels, ArrayList ids) {
    	int nLevels = originalLevels.length;
    	int[] intLevels = new int[nLevels];
    	
    	String nukes = "ACGT";
    	HashSet idSet = Arrays.stream(originalLevels).collect(Collectors.toCollection(HashSet::new));
    	ids.addAll(idSet);
    	Collections.sort(ids, (a,b) -> {
    		String id1 = a.toString();
    		String id2 = b.toString();
    		if (nukes.contains(id1)) {
    			if (nukes.contains(id2)) return id1.compareTo(id2);
    			else return -1;
    		} else {
    			if (nukes.contains(id2)) return 1;
    			else return id1.compareTo(id2);
    		}
    	});
    	
    	HashMap levelMap = new HashMap<>();
    	for (int i = 0; i < ids.size(); i++) {
    		levelMap.put(ids.get(i), i);
    	}
    	for (int i = 0; i < nLevels; i++) intLevels[i] = levelMap.get(originalLevels[i]);
    	return intLevels;
    }
    
    /**
     * @param genotypes		a byte array of genotypes
     * @param ids			an empty array list that will hold the Bytes corresponding to each level
     * @return				these genotypes translated into factor levels. On return, ids will hold the Byte labels for each level.
     * 						Levels will be sorted with homozygotes first then heterozygotes.
     */
    public static int[] getIntegerLevels(byte[] genotypes, ArrayList ids) {
    	int n = genotypes.length;
    	
    	//create a tree set that sorts genotypes with homozygotes first
    	TreeSet genotypeSet = new TreeSet<>((a,b) -> {
    		if (homGeno.contains(a)) {
    			if (homGeno.contains(b)) return a.compareTo(b);
    			return -1;
    		} else {
    			if (homGeno.contains(b)) return 1;
    			else return a.compareTo(b);
    		}
    	});
    	
    	for (byte b : genotypes) genotypeSet.add(b);
    	ids.addAll(genotypeSet);
    	HashMap genoMap = new HashMap<>();
    	for (int i = 0; i < ids.size(); i++) genoMap.put(ids.get(i), i);
    	
    	return IntStream.range(0,n).map(i -> genoMap.get(genotypes[i]).intValue()).toArray();
    }

    public static byte[] genotypesToUnphasedSorted(byte[] genotypes) {
        byte[] unphasedSorted = new byte[genotypes.length];
        for (int g = 0; g < genotypes.length; g++) 
            unphasedSorted[g] = GenotypeTableUtils.getUnphasedSortedDiploidValue(genotypes[g]);
        return unphasedSorted;
    }

    public static  int[] getIntegerLevels(ArrayList originalLevels, ArrayList ids) {
    	int[] intLevels = new int[originalLevels.size()];
    	HashMap levelMap = new HashMap();
    	int count = 0;
    	for (T level:originalLevels) {
    		Integer ndx = levelMap.get(level);
    		if (ndx == null) {
    			ndx = new Integer(levelMap.size());
    			levelMap.put(level, ndx);
    		}
    		intLevels[count++] = ndx.intValue();
    	}

    	if (ids != null) {
        	TreeSet> sortedEntries = new TreeSet>(new Comparator>(){

    			@Override
    			public int compare(Entry arg0, Entry arg1) {
    				return arg0.getValue().compareTo(arg1.getValue());
    			}
        		
        	});
        	
        	sortedEntries.addAll(levelMap.entrySet());
        	for (Entry entry:sortedEntries) {
        		ids.add(entry.getKey());
        	}
    	}
    	return intLevels;
    }
    
    public static  int[] getIntegerLevels(ArrayList originalLevels) {
    	return getIntegerLevels(originalLevels, null);
    }
    
    public static int[] getIntegerLevels(int[] originalLevels) {
        TreeSet originalSet = Arrays.stream(originalLevels).collect(TreeSet::new, TreeSet::add, TreeSet::addAll);
        int norig = originalLevels.length;
        if (originalSet.size() == norig) return originalLevels;
        
        int[] index = new int[norig];
        Arrays.fill(index, -1);
        int count = 0;
        for (Integer level : originalSet) index[level] = count++;
        int[] newLevels = new int[norig];
        for (int i = 0; i < norig; i++) newLevels[i] = index[originalLevels[i]];
        return newLevels;
    }
    
    public static double[] getNumericCodingForAdditiveModel(Object[] marker, String allele) {
    	int nmarkers = marker.length;
    	if (allele.equals(GenotypeTable.UNKNOWN_ALLELE_STR)) return new double[nmarkers];
    	
    	String firstMarker = ((String) marker[0]);
    	double[] values = new double[nmarkers];
    	
    	if (firstMarker.contains(":")) {
        	Pattern colon = Pattern.compile(":");
        	for (int m = 0; m < nmarkers; m++) {
        		String markerval = (String) marker[m];
        		String[] markerAlleles = colon.split(markerval);
        		if (markerAlleles[0].equals(allele)) values[m]++;
        		if (markerAlleles[1].equals(allele)) values[m]++;
        	}
    	} else {
    		Pattern nuc = Pattern.compile("[RSYWKM0]");
    		for (int m = 0; m < nmarkers; m++) {
    			String markerval = (String) marker[m];
    			if (markerval.equals(allele)) values[m] = 2;
    			else if (nuc.matcher((String) marker[m]).matches()) values[m] = 1;
    			else values[m] = 0;
    		}
    	}
    	return values;
    }
    
    /**
     * @param marker	the genotypes for a taxon at all sites as bytes
     * @param allele	the allele that should be coded as 1. all others will be 0
     * @return			a vector of values (0,1,2) that encode genotypes equal to the dosage of allele
     */
    public static double[] getNumericCodingForAdditiveModel(byte[] marker, byte allele) {
    	int nmarkers = marker.length;
    	double[] values = new double[nmarkers];
    	
    	for (int m = 0; m < nmarkers; m++) {
    		byte[] markerval = GenotypeTableUtils.getDiploidValues(marker[m]);
    		for (byte alleleval : markerval) if (alleleval == allele) values[m] += 1.0; 
    	}
    	return values;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy