All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.taxa.IdentifierSynonymizer Maven / Gradle / Ivy

package net.maizegenetics.taxa;


import net.maizegenetics.taxa.Taxon.Builder;
import net.maizegenetics.util.GeneralAnnotation;
import net.maizegenetics.util.TableReport;
import net.maizegenetics.util.AbstractTableReport;

import java.io.PrintWriter;
import java.io.Serializable;
import java.util.*;

import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.codec.language.RefinedSoundex;
import org.apache.commons.codec.language.Soundex;

import com.google.common.collect.ImmutableMultimap;


/**
 * User: Ed
 * Date: Mar 30, 2005
 * Time: 1:39:47 PM
 */
public class IdentifierSynonymizer extends AbstractTableReport implements Serializable, TableReport {

    ArrayList taxaListSynonymized = new ArrayList<>();
    TaxaList tempTaxaList = null;
    private TaxaList referenceIDGroup;
    private int matchCount = 0;
    private int unmatchCount = 0;
    private int technique = 0;
    private String delimiter = "";
    private double globalMin = Double.POSITIVE_INFINITY;
    private double globalMax = Double.NEGATIVE_INFINITY;

    public IdentifierSynonymizer(TaxaList preferredTaxa, TaxaList[] alternateTaxaSets) {
        init2(preferredTaxa, alternateTaxaSets);
    }
    public IdentifierSynonymizer(TaxaList preferredTaxa, TaxaList[] alternateTaxaSets,int technique) {
        this.technique = technique;
        init2(preferredTaxa, alternateTaxaSets);
    }
    public IdentifierSynonymizer(TaxaList preferredTaxa, TaxaList[] alternateTaxaSets,int technique,String delimiter) {
        this.technique = technique;
        this.delimiter = delimiter;
        init2(preferredTaxa, alternateTaxaSets);
    }

    public IdentifierSynonymizer(TaxaList preferredTaxa, TaxaList alternateTaxa) {
        TaxaList[] alternateTaxaSets = new TaxaList[1];
        alternateTaxaSets[0] = alternateTaxa;
        init2(preferredTaxa, alternateTaxaSets);
    }


    //Taxa List implementation This will become init when finished
    //TODO Stream implementation
    private void init2(TaxaList referenceTaxa, TaxaList[] alternateTaxaSets) {
        referenceIDGroup = referenceTaxa;
        ImmutableMultimap.Builder changeSynBuild = new ImmutableMultimap.Builder();
        
        //Go Through each alternateTaxaSet and compute the similarity to the referenceTaxa entries
        for(TaxaList altTaxaList:alternateTaxaSets) { 
            for(Taxon altTaxon:altTaxaList){
                ArrayList theBest = findBestMatch(altTaxon.getName(),referenceTaxa);
                //If the score is better than the current score, put it on the change Syn
                if (theBest.size() == 1) {
                    String bs = theBest.get(0);
                    if(!bs.equals(altTaxon.getName())) {
                        changeSynBuild.put(altTaxon.getName(), bs);
                    }
                    matchCount++;
                } else {
                    for(String score:theBest) {
                        changeSynBuild.put(altTaxon.getName(),score);
                    }
                    unmatchCount++;
                }
            } 
        }
        ImmutableMultimap changeSyn = changeSynBuild.build();
        //Then go through and build a new taxaList adding the Synonyms to this
        for(TaxaList altTaxaList:alternateTaxaSets) {
            //Create a taxaList builder to make the new list
            TaxaListBuilder tlb = new TaxaListBuilder();
            
            //Go through previous list(Taxon level)
            for(Taxon taxon:altTaxaList) {
                //Copy all previous datafrom altTaxaList
                GeneralAnnotation ga = taxon.getAnnotation();
                Taxon.Builder tb = new Taxon.Builder(taxon.getName());
                Set keys = ga.getAnnotationKeys();
                //Copy Keys
                for(String key:keys) {
                    if(!key.equals(Taxon.SynonymKey)) {
                        String[] values = ga.getTextAnnotation(key);
                        for(String value:values) {
                            tb.addAnno(key, value);
                        }
                    }
                }
                //If changeSyn has key for a given Taxon
                if(changeSyn.keySet().contains(taxon.getName())) {
                    //Add synonym to Taxon object
                    for(String entry:changeSyn.get(taxon.getName())) {
                        tb.addAnno(Taxon.SynonymKey,entry);
                    }
                }
                //Build Taxon and Add Taxon to new list builder tlb
                tlb.add(tb.build());
            }
            //Build tlb and Add to taxaListToBeSyn
            taxaListSynonymized.add(tlb.build());
        }
        resetTempTaxaList();
        System.out.println(toString());
    }
   
    public TaxaList getTaxaList() {
        return taxaListSynonymized.get(0);
    }

    public int getTechnique() {
        return technique;
    }
    public void setGlobalMax(double max) {
        this.globalMax = max;
    }
    private ArrayList findBestMatch(String unmatchedString) {
        ArrayList bestMatches = new ArrayList<>();
        double maxScore = -1;
        double minScore = Double.POSITIVE_INFINITY;
        double sm;
        int levelOfRestriction = 0;
        boolean ignoreCase = true, ignoreWhite = false, ignorePunc = false;
        while ((bestMatches.size() != 1) && (levelOfRestriction < 4)) {
            switch (levelOfRestriction) {
                case 1:
                    ignoreCase = true;
                    break;
                case 2:
                    ignoreWhite = true;
                    break;
                case 3:
                    ignorePunc = true;
                    break;
            }
            /*
            for (int i = 0; i < referenceIDGroup.numberOfTaxa(); i++) {
                sm = getScore(referenceIDGroup.taxaName(i), unmatchedString, ignoreCase, ignoreWhite, ignorePunc,technique);
                //sm = scoreMatch(referenceIDGroup.taxaName(i), unmatchedString, ignoreCase, ignoreWhite, ignorePunc);
                if (sm > maxScore) {
                    bestMatches.clear();
                    bestMatches.add(referenceIDGroup.taxaName(i));
                    maxScore = sm;
                } else if (sm == maxScore) {
                    bestMatches.add(referenceIDGroup.taxaName(i));
                }
            }*/
            
            for (int i = 0; i < referenceIDGroup.numberOfTaxa(); i++) {
                if(technique==7) {
                    sm = getScore(referenceIDGroup.taxaName(i), unmatchedString, ignoreCase, ignoreWhite, ignorePunc,technique,delimiter);
                    
                }
                else {
                    sm = getScore(referenceIDGroup.taxaName(i), unmatchedString, ignoreCase, ignoreWhite, ignorePunc,technique);
                }
                
                //sm = scoreMatch(referenceIDGroup.taxaName(i), unmatchedString, ignoreCase, ignoreWhite, ignorePunc);
                if (sm < minScore) {
                    bestMatches.clear();
                    bestMatches.add(referenceIDGroup.taxaName(i));
                    minScore = sm;
                    if(minScoreglobalMax) {
                globalMax = minScore;
            }
            
            levelOfRestriction++;
        }
        return bestMatches;
    }
    public ArrayList findBestMatch(String taxaName, TaxaList referenceTaxa) {
        ArrayList bestMatches = new ArrayList<>();
        double maxScore = -1;
        double minScore = Double.POSITIVE_INFINITY;
        double sm;
        int levelOfRestriction = 0;
        boolean ignoreCase = true, ignoreWhite = false, ignorePunc = false;
        while ((bestMatches.size() != 1) && (levelOfRestriction < 4)) {
            switch (levelOfRestriction) {
                case 1:
                    ignoreCase = true;
                    break;
                case 2:
                    ignoreWhite = true;
                    break;
                case 3:
                    ignorePunc = true;
                    break;
            }
            for(Taxon refTaxa:referenceTaxa) {
                if(technique==7) {
                    sm = getScore(refTaxa.getName(),taxaName,ignoreCase,ignoreWhite,ignorePunc,technique,delimiter);
                }
                else {
                    sm = getScore(refTaxa.getName(), taxaName, ignoreCase, ignoreWhite, ignorePunc,technique);
                }
                if (sm < minScore) {
                    bestMatches.clear();
                    bestMatches.add(refTaxa.getName());
                    minScore = sm;
                    if(minScoreglobalMax) {
                globalMax = minScore;
            }
            
            levelOfRestriction++;
        }   
        System.out.println("GlobalMin"+globalMin);
        System.out.println("GlobalMax"+globalMax);
        return bestMatches;
    }
    public ArrayList findOrderedMatches(String unmatchedString, int levelOfRestriction) {
        SortedMap theSortMap = new TreeMap<>();
        double sm;
        boolean ignoreCase = false, ignoreWhite = false, ignorePunc = false;
        if (levelOfRestriction > 0) {
            ignoreCase = true;
        }
        if (levelOfRestriction > 1) {
            ignoreWhite = true;
        }
        if (levelOfRestriction > 2) {
            ignorePunc = true;
        }
        for (int i = 0; i < referenceIDGroup.numberOfTaxa(); i++) {
            //sm = scoreMatch(referenceIDGroup.taxaName(i), unmatchedString, ignoreCase, ignoreWhite, ignorePunc);
            if(technique==7) {
                sm = getScore(referenceIDGroup.taxaName(i), unmatchedString, ignoreCase, ignoreWhite, ignorePunc,technique,delimiter);
            }
            else {
                sm = getScore(referenceIDGroup.taxaName(i), unmatchedString, ignoreCase, ignoreWhite, ignorePunc,technique);
            }
            sm = 1.0-((sm - globalMin)/(globalMax-globalMin));
            theSortMap.put(1 - sm - ((double) i / 100000.0), referenceIDGroup.taxaName(i));
            //theSortMap.put(sm - ((double) i / 100000.0), referenceIDGroup.taxaName(i));
        }
        return new ArrayList<>(theSortMap.values());
    }

    public static double getScore(String s1, String s2, boolean ignoreCase, boolean ignoreWhite, boolean ignorePunc, int technique) {
        double score = 0.0;
        if(s1.equals(s2)) {
            return score;
        }
      
        //dice need to do a 1- as high similarity = low distance
        if(technique == 0) {
            score = 1.0 - scoreMatch(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        //String edit
        else if(technique == 1) {
            score = editDistanceScoreMatch(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        //DTW with hamming
        else if(technique == 2) {
            score = dtwDist(s1,s2,"hamming",ignoreCase,true,ignorePunc);
        }
        //DTW with keyboard dist
        else if(technique == 3) {
            score = dtwDist(s1,s2,"key",ignoreCase,true,ignorePunc);
        }
        //Hamming with soundex
        else if(technique == 4) {
            score = hammingDistSoundex(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        //Dice with metaphone  need to do a 1- as high similarity = low distance
        else if(technique == 5) {
            score = 1 - diceWithMetaphone(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        //Edit Distance with metaphone
        else if(technique == 6) {
            score=editWithMetaphone(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        
        return score;
    }
    public static double getScore(String s1, String s2, boolean ignoreCase, boolean ignoreWhite, boolean ignorePunc, int technique,String delimiter) {
        double score = 0.0;
        if(s1.equals(s2)) {
            return score;
        }
      
        //dice need to do a 1- as high similarity = low distance
        if(technique == 0) {
            score = 1.0 - scoreMatch(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        //String edit
        else if(technique == 1) {
            score = editDistanceScoreMatch(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        //DTW with hamming
        else if(technique == 2) {
            score = dtwDist(s1,s2,"hamming",ignoreCase,true,ignorePunc);
        }
        //DTW with keyboard dist
        else if(technique == 3) {
            score = dtwDist(s1,s2,"key",ignoreCase,true,ignorePunc);
        }
        //Hamming with soundex
        else if(technique == 4) {
            score = hammingDistSoundex(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        //Dice with metaphone  need to do a 1- as high similarity = low distance
        else if(technique == 5) {
            score = 1 - diceWithMetaphone(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        //Edit Distance with metaphone
        else if(technique == 6) {
            score=editWithMetaphone(s1,s2,ignoreCase,ignoreWhite,ignorePunc);
        }
        else if(technique == 7) {
            score= 1- delimiterDistance(s1,s2,ignoreCase,ignoreWhite,ignorePunc,delimiter);
        }
        return score;
    }
    
    public static double hammingDistSoundex(String s1, String s2, boolean ignoreCase, boolean ignoreWhite, boolean ignorePunc) {
        s1 = soundex2(s1, true, true, true);
        s2 = soundex2(s2, true, true, true);
        int sum = 0;
        for(int i = 0; i parsed = new ArrayList();
        String current = "";
        boolean digitMode = false;
        if(Character.isDigit(s1.charAt(0))) {
            current+=s1.charAt(0);
            digitMode = true;
        }
        for(int i = 0; i map = new HashMap<>();
        map.put('`', new Integer[]{0,0});
        map.put('~', new Integer[]{0,0});
        map.put('1', new Integer[]{0,1});
        map.put('!', new Integer[]{0,1});
        map.put('2', new Integer[]{0,2});
        map.put('@', new Integer[]{0,2});
        map.put('3', new Integer[]{0,3});
        map.put('#', new Integer[]{0,3});
        map.put('4', new Integer[]{0,4});
        map.put('$', new Integer[]{0,4});
        map.put('5', new Integer[]{0,5});
        map.put('%', new Integer[]{0,5});
        map.put('6', new Integer[]{0,6});
        map.put('^', new Integer[]{0,6});
        map.put('7', new Integer[]{0,7});
        map.put('&', new Integer[]{0,7});
        map.put('8', new Integer[]{0,8});
        map.put('*', new Integer[]{0,8});
        map.put('9', new Integer[]{0,9});
        map.put('(', new Integer[]{0,9});
        map.put('0', new Integer[]{0,10});
        map.put(')', new Integer[]{0,10});
        map.put('-', new Integer[]{0,11});
        map.put('_', new Integer[]{0,11});
        map.put('=', new Integer[]{0,12});
        map.put('+', new Integer[]{0,12});
        
        map.put('q', new Integer[]{1,1});
        map.put('Q', new Integer[]{1,1});
        map.put('w', new Integer[]{1,2});
        map.put('W', new Integer[]{1,2});
        map.put('e', new Integer[]{1,3});
        map.put('E', new Integer[]{1,3});
        map.put('r', new Integer[]{1,4});
        map.put('R', new Integer[]{1,4});
        map.put('t', new Integer[]{1,5});
        map.put('T', new Integer[]{1,5});
        map.put('y', new Integer[]{1,6});
        map.put('Y', new Integer[]{1,6});
        map.put('u', new Integer[]{1,7});
        map.put('U', new Integer[]{1,7});
        map.put('i', new Integer[]{1,8});
        map.put('I', new Integer[]{1,8});
        map.put('o', new Integer[]{1,9});
        map.put('O', new Integer[]{1,9});
        map.put('p', new Integer[]{1,10});
        map.put('P', new Integer[]{1,10});
        map.put('[', new Integer[]{1,11});
        map.put('{', new Integer[]{1,11});
        map.put(']', new Integer[]{1,12});
        map.put('}', new Integer[]{1,12});
        map.put('\\', new Integer[]{1,13});
        map.put('|', new Integer[]{1,13});
        
        map.put('a', new Integer[]{2,1});
        map.put('A', new Integer[]{2,1});
        map.put('s', new Integer[]{2,2});
        map.put('S', new Integer[]{2,2});
        map.put('d', new Integer[]{2,3});
        map.put('D', new Integer[]{2,3});
        map.put('f', new Integer[]{2,4});
        map.put('F', new Integer[]{2,4});
        map.put('g', new Integer[]{2,5});
        map.put('G', new Integer[]{2,5});
        map.put('h', new Integer[]{2,6});
        map.put('H', new Integer[]{2,6});
        map.put('j', new Integer[]{2,7});
        map.put('J', new Integer[]{2,7});
        map.put('k', new Integer[]{2,8});
        map.put('K', new Integer[]{2,8});
        map.put('l', new Integer[]{2,9});
        map.put('L', new Integer[]{2,9});
        map.put(';', new Integer[]{2,10});
        map.put(':', new Integer[]{2,10});
        map.put('\'', new Integer[]{2,11});
        map.put('"', new Integer[]{2,11});
        
        map.put('z', new Integer[]{3,1});
        map.put('Z', new Integer[]{3,1});
        map.put('x', new Integer[]{3,2});
        map.put('X', new Integer[]{3,2});
        map.put('c', new Integer[]{3,3});
        map.put('C', new Integer[]{3,3});
        map.put('v', new Integer[]{3,4});
        map.put('V', new Integer[]{3,4});
        map.put('b', new Integer[]{3,5});
        map.put('B', new Integer[]{3,5});
        map.put('n', new Integer[]{3,6});
        map.put('N', new Integer[]{3,6});
        map.put('m', new Integer[]{3,7});
        map.put('M', new Integer[]{3,7});
        map.put(',', new Integer[]{3,8});
        map.put('<', new Integer[]{3,8});
        map.put('.', new Integer[]{3,9});
        map.put('>', new Integer[]{3,9});
        map.put('/', new Integer[]{3,10});
        map.put('?', new Integer[]{3,10});
        
       Integer[] coords1 = map.get(firstChar);
       Integer[] coords2 = map.get(secondChar);
       
       //calculate manhattan distance between the characters
       return Math.abs(coords1[0] - coords2[0]) + Math.abs(coords1[1] - coords2[1]);
        
    }
    
    private static int hammingDist(char firstChar, char secondChar) {
        if(firstChar == secondChar) {
            return 0;
        }
        else{
            return 1;
        }   
    }
   
    /*
     * Simple implementation of Dynamic Time Warping distance
     * 
     * Currently uses KeyboardDistance as the distance measurement
     */
    private static double dtwDist(String str1, String str2, String distMeas,boolean ignoreCase, boolean ignoreWhite, boolean ignorePunc) {
        str1 = cleanName(str1,ignoreCase,ignoreWhite,ignorePunc);
        str2 = cleanName(str2,ignoreCase,ignoreWhite,ignorePunc);
        double[][] costMat = new double[str1.length()+1][str2.length()+1];
        //Initialize arrays
        for(int i = 0; istr2.length())?str2.length():str1.length();
        int errorCount = 0;
        for(int i = 0; idistance) {
                distance = currDist;
            }
        }
        
        return distance;
    }
    
    /** @return an array of adjacent letter pairs contained in the input string */
    private static ArrayList letterPairs(String str) {
        ArrayList allPairs = new ArrayList<>();
        //int numPairs = str.length()-1;
        //String[] pairs = new String[numPairs];
        for (int i = 0; i < (str.length() - 1); i++) {
            allPairs.add(str.substring(i, i + 2));
        }
        return allPairs;
    }

    private static String cleanName(String s, boolean ignoreCase, boolean ignoreWhite, boolean ignorePunc) {
        if (ignoreCase) {
            s = s.toUpperCase();
        }
        //StringBuffer sb=new StringBuffer(s);
        //int x;
        if (ignoreWhite) {
            s.replaceAll("\\s", "");
        // while((x=sb.indexOf(" "))>=0) {sb.deleteCharAt(x);}
        }
        if (ignorePunc) {
            //           s=s.replaceAll("\\W","");
            s = s.replaceAll("[^a-zA-Z0-9]", "");
        }
        // sb=new StringBuffer(s);
        return s;
    }

    public void changeAlignmentIdentifiers(TaxaList alternateIdGroups) {
        TaxaList[] aidg = new TaxaList[1];
        aidg[0] = alternateIdGroups;
        changeAlignmentIdentifiers(aidg[0]);
    }

    
    public String toString() {
        String s = "Synonym Table\n";
        int counter = 0;
        for(TaxaList tl:taxaListSynonymized) {
            s+="SynonymFile "+counter+":\n";
            for(Taxon tx:tl) {
                s+="Name: " + tx.getName()+", Synonyms: ";
                for(String syn:tx.getAnnotation().getTextAnnotation(Taxon.SynonymKey)) {
                    s+=syn+", ";
                }
                s+="\n";
            }
            counter++;
        }
        return s;    //To change body of overridden methods use File | Settings | File Templates.
    }

    public void deleteByThreshold(double threshold) {
        ImmutableMultimap.Builder removeBuilder = new ImmutableMultimap.Builder();
        //Go through taxa list
        for(Taxon tx:tempTaxaList) {
            String taxonName = tx.getName();
            //Go through the list of synonyms
            for(String synName:tx.getAnnotation().getTextAnnotation(Taxon.SynonymKey)) {
                double score = 0.0;
                if(technique==7) {
                    score = getScore(taxonName,synName,true,false,false,technique,delimiter);
                }
                else {
                    score = getScore(taxonName,synName,true,false,false,technique);
                }
                //double score = getScore(taxonName,synName,true,false,false,technique);
                
                if(technique==4) {
                    globalMax = 4.0;
                }
                score = 1.0-((score - globalMin)/(globalMax-globalMin));
                //If Score is less than thresh add to remove list
                if(score removeMap = removeBuilder.build();
        TaxaListBuilder tlb = new TaxaListBuilder();
        for(Taxon tx:tempTaxaList) {
            GeneralAnnotation ga = tx.getAnnotation();
            Taxon.Builder tb = new Taxon.Builder(tx.getName());
            Set keys = ga.getAnnotationKeys();
            //Copy Keys
            for(String key:keys) {
                if(!key.equals(Taxon.SynonymKey)) {
                    String[] values = ga.getTextAnnotation(key);
                    for(String value:values) {
                        tb.addAnno(key, value);
                    }
                }
            }
            //If removeMap has key for a given Taxon
            if(removeMap.keySet().contains(tx.getName())) {
                //Loop through and ignore any that are in removeMap
                for(String value:ga.getTextAnnotation(Taxon.SynonymKey)) {
                    if(!removeMap.get(tx.getName()).contains(value)) {
                        tb.addAnno(Taxon.SynonymKey, value);
                    }
                }
            }
            else {
                //Loop through add add all the synonyms
                for(String value:ga.getTextAnnotation(Taxon.SynonymKey)) {
                    tb.addAnno(Taxon.SynonymKey, value);
                }
            }
            //Build Taxon and Add Taxon to new list builder tlb
            tlb.add(tb.build());
        }
        tempTaxaList = tlb.build();
    }

    public Object[] getRealNames() {
        Object[] idArray = new Object[referenceIDGroup.numberOfTaxa()];
        for (int i = 0; i < referenceIDGroup.numberOfTaxa(); i++) {
            idArray[i] = referenceIDGroup.get(i).toString();
        }
        return idArray;
    }

    public void report(PrintWriter out) {
        //String s="Synonym Table\n"+idSynonyms.toString()+"\n\n"+"Unmatched\n"+unmatchedIDs.toString();
        out.println("Synonym Table");
        //out.println(idSynonyms.size() + " unique matches");
        out.println(matchCount + " unique matches");
        out.println(unmatchCount + " multiple matches:");
    }

    public Object[] getTableColumnNames() {
        String[] cn = new String[3];
        cn[0] = "TaxaName";
        cn[1] = "Synonyms";
        cn[2] = "MatchScore";
        return cn;
    }

    
    /**
     * Returns specified row.
     *
     * @param row row number
     *
     * @return row
     */
    public Object[] getRow(long rowLong) {
        int row = (int) rowLong;
        Object[] data = new Object[3];
        
        //TaxaList tl = taxaListSynonymized.get(0);
        TaxaList tl = tempTaxaList;
        Taxon tx = tl.get(row);
        
        //Object[] keyArray = idSynonyms.keySet().toArray();
        data[0] = tx.getName();
        String synString = "";
        String firstMatch = "";
        boolean first = true;
        for(String syn:tx.getAnnotation().getTextAnnotation(Taxon.SynonymKey)){
            synString+=syn+",";
            if(first) {
                firstMatch = syn;
                first = false;
            }
        }
        
        data[2] = "";
        if(firstMatch.equals("")) {
            data[1] = "";
            data[2] = "1.0";
        }
        else {
            data[1] = synString.substring(0,synString.length()-1);
            if(technique == 4) {
                globalMax = 4.0;
            }
            if(technique==0) {
                data[2] = "" + scoreMatch("" + data[0], "" + firstMatch, true, false, false);
            }
            else if(technique==5) {
                data[2] = "" + (1.0-getScore("" + data[0], "" + firstMatch, true, false, false,technique));  
            }
            else if(technique==7) {
                data[2] = "" + (1.0-getScore("" + data[0], "" + firstMatch, true, false, false,technique,delimiter));  
            }
            else {
                //To fix the - number bug
                if((1.0-((getScore("" + data[0], "" + firstMatch, true, false, false,technique) - globalMin)/(globalMax-globalMin)))<0.0) {
                    if((1.0-((getScore("" + data[0], "" + firstMatch, true, true, false,technique) - globalMin)/(globalMax-globalMin)))<0.0) {
                        if ((1.0-((getScore("" + data[0], "" + firstMatch, true, true, true,technique) - globalMin)/(globalMax-globalMin)))<0.0) {
                            data[2] = ""+0.0;
                        }
                        else {
                            data[2] = "" + (1.0-((getScore("" + data[0], "" + firstMatch, true, true, true,technique) - globalMin)/(globalMax-globalMin)));
                        }
                    }
                    else {
                        data[2] = "" + (1.0-((getScore("" + data[0], "" + firstMatch, true, true, false,technique) - globalMin)/(globalMax-globalMin)));
                    }
                }
                else {
                    data[2] = "" + (1.0-((getScore("" + data[0], "" + firstMatch, true, false, false,technique) - globalMin)/(globalMax-globalMin)));
                }
            }
        }
        return data;
    }
    
    

    public void resetTempTaxaList() {
        TaxaListBuilder tlb = new TaxaListBuilder();
        for(Taxon tx:taxaListSynonymized.get(0)) {
            tlb.add(tx);
        }
        tempTaxaList = tlb.build();
    }
    //Method to save changes from TempTaxaList and reset the TempTaxaList
    public void saveTempTaxaList() {
       taxaListSynonymized.set(0, tempTaxaList);
       resetTempTaxaList();
    }

    public void removeSynonyms(int rowNumber) {
        TaxaListBuilder tlb = new TaxaListBuilder();
        int rowCounter = 0;
        for(Taxon tx:tempTaxaList) {
            if(rowNumber == rowCounter) {
                //Copy all annotations except for Synonyms
                GeneralAnnotation ga = tx.getAnnotation();
                Taxon.Builder tb = new Taxon.Builder(tx.getName());
                Set keys = ga.getAnnotationKeys();
                //Copy Keys
                for(String key:keys) {
                    if(!key.equals(Taxon.SynonymKey)) {
                        String[] values = ga.getTextAnnotation(key);
                        for(String value:values) {
                            tb.addAnno(key, value);
                        }
                    }
                }
                tlb.add(tb.build());
            }
            else {
                //Copy taxon
                tlb.add(tx);
            }
            rowCounter++;
        }
        tempTaxaList = tlb.build();
    }
    public void updateSynonym(int rowNumber, String newName) {
        TaxaListBuilder tlb = new TaxaListBuilder();
        int rowCounter = 0;
        for(Taxon tx:tempTaxaList) {
            if(rowNumber == rowCounter) {
                //Copy all annotations except for Synonyms
                GeneralAnnotation ga = tx.getAnnotation();
                Taxon.Builder tb = new Taxon.Builder(tx.getName());
                Set keys = ga.getAnnotationKeys();
                //Copy Keys
                for(String key:keys) {
                    if(!key.equals(Taxon.SynonymKey)) {
                        String[] values = ga.getTextAnnotation(key);
                        for(String value:values) {
                            tb.addAnno(key, value);
                        }
                    }  
                }
                tb.addAnno(Taxon.SynonymKey,newName);
                tlb.add(tb.build());
            }
            else {
                //Copy taxon
                tlb.add(tx);
            }
            rowCounter++;
        }
        tempTaxaList = tlb.build();
    }
    
    
    public void deleteElements(String name) {
        TaxaListBuilder tlb = new TaxaListBuilder();
        for(Taxon tx:tempTaxaList) {
            if(!tx.getName().equals(name)) {
                tlb.add(tx);
            }
        }
        tempTaxaList = tlb.build();
    }

    public boolean checkSynForDups() {
        for(TaxaList tl:taxaListSynonymized) {
            ArrayList viewedTaxa = new ArrayList();
            for(Taxon tx:tl) {
                GeneralAnnotation ga = tx.getAnnotation();
                String[] values = ga.getTextAnnotation(Taxon.SynonymKey);
                
                if(values.length==0||values==null){
                    viewedTaxa.add(tx.getName());
                }
                else if(viewedTaxa.contains(values[0])) {
                    return true;
                }
                else {
                    viewedTaxa.add(values[0]);
                }
            }
                
        }
        return false;
    }
    
    public ArrayList swapSynonyms() {
        ArrayList newTaxaListArray = new ArrayList<>();
        for(TaxaList tl:taxaListSynonymized) {
            TaxaListBuilder tlb = new TaxaListBuilder();
            for(Taxon tx:tl) {
                GeneralAnnotation ga = tx.getAnnotation();
                Taxon.Builder tb = null;
                Set keys = ga.getAnnotationKeys();
                if(ga.getTextAnnotation(Taxon.SynonymKey).length==0) {
                    tb = new Taxon.Builder(tx);
                }
                else {
                    tb = new Taxon.Builder(ga.getTextAnnotation(Taxon.SynonymKey)[0]);
                    //Copy Keys
                    for(String key:keys) {
                        if(!key.equals(Taxon.SynonymKey)) {
                            String[] values = ga.getTextAnnotation(key);
                            for(String value:values) {
                                tb.addAnno(key, value);
                            }
                        }
                    }
                    String[] synVals = ga.getTextAnnotation(Taxon.SynonymKey);
                    tb.addAnno(Taxon.SynonymKey, tx.getName());
                    for(int i = 1; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy