All Downloads are FREE. Search and download functionalities are using the official Maven repository.

lphy.base.evolution.alignment.MetaDataAlignment Maven / Gradle / Ivy

Go to download

The standard library of LPhy, which contains the required generative distributions and basic functions.

The newest version!
package lphy.base.evolution.alignment;

import jebl.evolution.sequences.SequenceType;
import lphy.base.evolution.Taxa;
import lphy.base.evolution.Taxon;
import lphy.core.logger.LoggerUtils;
import lphy.core.model.NarrativeName;
import lphy.core.model.annotation.GeneratorCategory;
import lphy.core.model.annotation.MethodInfo;
import lphy.core.model.annotation.TypeInfo;

import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * The metadata parsed from a nexus file and stored in {@link Taxon},
 * and the charsets.
 * @author Walter Xie
 */
@TypeInfo(description = "An alignment containing metadata and parsed from a nexus file.",
        examples = {"twoPartitionCoalescentNex.lphy","https://linguaphylo.github.io/tutorials/time-stamped-data/"})
public class MetaDataAlignment extends SimpleAlignment implements NarrativeName {

    // if null, then no charset in the nexus file
    protected Map> charsetMap;

    // ages/dates will be parsed and go to Taxon.
    protected double minAge;
    protected double maxAge;
    // SCALE, TODO fix to year at the moment
    protected ChronoUnit chronoUnit = ChronoUnit.YEARS;

    // default to forward
    protected AgeDirection ageDirection = AgeDirection.forward;
    protected String ageRegxStr;

    protected String spRegxStr;

    //*** age direction ***//

    public enum AgeDirection {
        forward,  // virus
        backward, // fossils
        dates,    // forward
        ages      // backward
    }


    public MetaDataAlignment(Taxa taxa, int nchar, SequenceType sequenceType) {
        super(taxa, nchar, sequenceType);
    }


    //*** ages ***//

    /**
     * Parse age/date string in Map, and assign to {@link Taxa}.
     * @param ageStringMap  Taxon name <=> age/date string,
     * @param ageDirectionStr  {@link AgeDirection}
     */
    public void assignAges(final Map ageStringMap, final String ageDirectionStr) {

        this.ageDirection = getAgeDirection(ageDirectionStr);

        //*** string processing ***//

        String[] datesStr = Objects.requireNonNull(ageStringMap).values().toArray(String[]::new);
        // check if double, or date format (return null)
        double[] vals = parseDateString(datesStr);

        if (vals == null) {// if it is date uuuu-MM-dd
            assert AgeDirection.forward.equals(ageDirection) || AgeDirection.dates.equals(ageDirection);
            // only forward in time using dates
            //TODO this method is hard coded to get years only.
            // For other units, it requires conversion after this method.
            vals = convertDateToAge(datesStr, chronoUnit);
        }
        // find min max for forward or backward
        maxAge = vals[0];
        minAge = vals[0];
        for (int i = 1; i < vals.length; i++) {
            if (vals[i] > maxAge) maxAge = vals[i];
            else if (vals[i] < minAge) minAge = vals[i];
        }

        //*** assign ages ***//

        // same order as String[] datesStr
        String[] taxaNames = ageStringMap.keySet().toArray(String[]::new);

        if (ageStringMap.size() != ntaxa())
            throw new IllegalArgumentException("Invalid ages/dates map : size " + ageStringMap.size() +
                    " !=  taxa " + ntaxa());

        for (int i = 0; i < taxaNames.length; i++) {
            String taxonName = taxaNames[i];
            // make sure use the correct taxon
            int indexOfTaxon = indexOfTaxon(taxonName);
            if (indexOfTaxon < 0)
                throw new RuntimeException("Cannot locate taxon name " + taxonName +
                        " from ages/dates map in getAlignment() taxa " + Arrays.toString(getTaxaNames()));

            if (AgeDirection.forward.equals(ageDirection) || AgeDirection.dates.equals(ageDirection)) {
                // like virus
                getTaxon(indexOfTaxon).setAge(maxAge - vals[i]);
            } else if (AgeDirection.backward.equals(ageDirection)|| AgeDirection.ages.equals(ageDirection)) {
                // like fossils
                getTaxon(indexOfTaxon).setAge(vals[i] - minAge);
            } else {
                throw new IllegalArgumentException("Not recognised age direction to convert dates or ages : " + ageDirection);
            }

        }

    }

    /**
     * TreeMap of Taxon name <=> age/date string,
     * which can be alternatively obtained from the nexus file.
     * @param ageRegxStr  Java regular expression to extract dates from taxa names.
     * @param ageDirectionStr  {@link AgeDirection}
     */
    public void setAgesParsedFromTaxaName(final String ageRegxStr, final String ageDirectionStr) {
        this.ageRegxStr = ageRegxStr;

        Map ageStringMap = new TreeMap<>();
        // guess dates
        final Pattern regx = Pattern.compile(ageRegxStr);

        for (String taxonName : getTaxaNames()) {
            // TODO take nth element given separator
            String ageStr = getAttrFirstMatch(taxonName, regx);
            ageStringMap.put(taxonName, ageStr);
        }

        assignAges(ageStringMap, ageDirectionStr);
    }

    public void setSpeciesParsedFromTaxaName(String spRegxStr) {
        this.spRegxStr = spRegxStr;
        // guess species
        final Pattern regx = Pattern.compile(spRegxStr);

        for (Taxon taxon : getTaxonArray()) {
            String taxonName = taxon.getName();
            String spStr = getAttrFirstMatch(taxonName, regx);
            taxon.setSpecies(Objects.requireNonNull(spStr));
        }
    }

    //*** ChronoUnit ***//

    public ChronoUnit getChronoUnit() {
        return chronoUnit;
    }

    public void setChronoUnit(ChronoUnit chronoUnit) {
        this.chronoUnit = chronoUnit;
    }

    //*** charsets ***//

    @MethodInfo(description="return a partition alignment. " +
            "If the string doesn't match charset's syntax, then check if the string matches " +
            "a defined name in the nexus file. Otherwise it is an error. " +
            "The string is referred to one partition at a call, but can be multiple blocks, " +
            "such as a dummy example: d.charset([\"2-457\\3\", \"660-896\\3\", \"1-.\\3\"]).",
            narrativeName = "character set",
            category = GeneratorCategory.TAXA_ALIGNMENT,
            examples = {"twoPartitionCoalescentNex.lphy","https://linguaphylo.github.io/tutorials/time-stamped-data/"})
    public Alignment charset(String str) {
        List charSetBlocks = new ArrayList<>();
        //*** charsets or part names ***//
        if (CharSetBlock.Utils.isValid(str)) {
            // is charset
            charSetBlocks = CharSetBlock.Utils.getCharSetBlocks(str);
        } else if (hasCharsets(charsetMap)) {
            // There is the partition name in the nexus file
            // IllegalArgumentException if str not exist
            charSetBlocks = getCharSet(str, charsetMap);
        }
        if (charSetBlocks.size() < 1)
            throw new IllegalArgumentException("Not recognised string " + str + " assign to charset !");
        return AlignmentUtils.getCharSetAlignment(charSetBlocks, this);
    }

//    @MethodInfo(description="return a trait alignment, which contains the set of traits
" + // "extracted from taxa names in this alignment.
" + // "The sepStr is the substring to split the taxa names,
" + // "where Java regular expression escape characters will be given no special meaning.
" + // "The i (>=0) is the index to extract the trait value." ) // public Alignment extractTrait(String sepStr, Integer i) { // String[] taxaNames = this.getTaxaNames(); // String[] traitVal = new String[taxaNames.length]; // // for (int t = 0; t < taxaNames.length; t++) { // String[] parts = taxaNames[t].split(Pattern.quote(sepStr)); // if (parts.length > i) // traitVal[t] = parts[i]; // else // throw new IllegalArgumentException("Cannot find " + i + // "th element after splitting name " + taxaNames[t] + " by substring " + sepStr); // } // // no sorting demes // Set uniqTraitVal = new LinkedHashSet<>(Arrays.asList(traitVal)); // List uniqueDemes = new ArrayList<>(uniqTraitVal); // // state names are sorted unique demes // Standard standard = new Standard(uniqueDemes); // SimpleAlignment traitAl = new SimpleAlignment(this.getTaxa(), 1, standard); // // fill in trait values, traitVal and taxaNames have to maintain the same order // for (int t = 0; t < traitVal.length; t++) { // int demeIndex = standard.getStateNameIndex(traitVal[t]); // traitAl.setState(t, 0, demeIndex); // } // return traitAl; // } public void setCharsetMap(Map> charsetMap) { this.charsetMap = charsetMap; } public Map> getCharsetMap() { return charsetMap; } public AgeDirection getAgeDirection() { return ageDirection; } //*** summary ***// /** * @return a summary of loading nexus file. */ @Override public String toString() { StringBuilder sb = new StringBuilder(super.toString()); if (getCharsetMap() != null) sb.append(", ").append( getCharsetMap().size() ). append(" charset").append(getCharsetMap().size() > 1 ? "s" : ""); if (isUltrametric()) sb.append(", age direction is ").append( getAgeDirection() ); return sb.toString(); } @Override public String getNarrativeName() { return "metadata alignment"; } // @Override // public JComponent getComponent(Value value) { // StringBuilder sb = new StringBuilder(super.toString()); // if (getCharsetMap() != null) { // sb.append("\n").append( getCharsetMap().toString() ); // } // if (hasAges()) { // sb.append("\nageDirection = ").append( getAgeDirection() ); // // wrap map string by comma // sb.append("\nages = ").append( Arrays.toString( getAges() ) ); // } // // JTextArea textArea = new JTextArea(sb.toString()); // textArea.setEditable(false); // // return textArea; // } //****** private ******// // default to forward private AgeDirection getAgeDirection(String ageDirectionStr){ if (ageDirectionStr == null) { ageDirectionStr = AgeDirection.forward.toString(); LoggerUtils.log.warning("Tip calibration type is not defined, set to " + ageDirectionStr + " as default."); } return AgeDirection.valueOf(ageDirectionStr.toLowerCase()); } /** * @param taxonName * @param regx * @return extracted attribute from a taxon name using regx */ private String getAttrFirstMatch(final String taxonName, final Pattern regx) { Matcher matcher = regx.matcher(taxonName); if (matcher.find()) return matcher.group(1); throw new IllegalArgumentException("Cannot extract attributes from " + taxonName + " using " + regx); } // return null, if cannot parseDouble, // which assumes the string is a date in uuuu-MM-dd format private double[] parseDateString(final String[] datesStr) { double[] vals = new double[Objects.requireNonNull(datesStr).length]; // parse the age value for (int i = 0; i < datesStr.length; i++) { try { vals[i] = Double.parseDouble(datesStr[i]); } catch (NumberFormatException e) { // the val is Date not Number LoggerUtils.log.warning("Warning: the value (" + datesStr[i] + ") is not numeric, so guess it is a date by uuuu-MM-dd format"); return null; } } return vals; } // convert uuuu-MM-dd to the unit of years in decimal currently private double[] convertDateToAge(final String[] datesStr, ChronoUnit unit) { final String formatter = "uuuu-MM-dd"; DateTimeFormatter f = DateTimeFormatter.ofPattern(formatter); if (!unit.equals(ChronoUnit.YEARS)) throw new UnsupportedOperationException("Only support year as unit for parsing a date '" + formatter + "', but the current unit is " + unit + " !"); double[] vals = new double[Objects.requireNonNull(datesStr).length]; for (int i = 0; i < datesStr.length; i++) { try { LocalDate date = LocalDate.parse(datesStr[i], f); // decimal year, e.g. 1999.55 vals[i] = date.getYear() + (date.getDayOfYear() - 1.0) / (date.isLeapYear() ? 366.0 : 365.0); } catch (DateTimeParseException e) { throw new RuntimeException("Cannot parse the date string by " + formatter + " ! " + datesStr[i]); } } return vals; } /** * @param charsetMap obtained from NexusImporter * @return true, if the nexus file defines "charset". */ private boolean hasCharsets(Map> charsetMap) { return ! (charsetMap == null || charsetMap.size() == 0); } /** * @param partName the charset name defined in the nexus file. * @param charsetMap obtained from NexusImporter * @return the List matching to the charset name defined in the nexus file. */ private List getCharSet(String partName, Map> charsetMap) { List blocks = Objects.requireNonNull(charsetMap).get(partName); if (blocks == null) throw new IllegalArgumentException("Charset name " + partName + " not exist !"); return blocks; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy