All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.dna.map.Chromosome Maven / Gradle / Ivy

package net.maizegenetics.dna.map;

import net.maizegenetics.util.GeneralAnnotation;
import net.maizegenetics.util.GeneralAnnotationStorage;

import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Defines the chromosome structure and length. The name and length recorded for
 * each chromosome.
 *
 * @author Terry Casstevens and Ed Buckler
 */
public class Chromosome implements Comparable {

    private static final Pattern DIGITS = Pattern.compile("^\\d+");

    public static Chromosome UNKNOWN = new Chromosome("Unknown");
    private final String myName;
    private final int myChromosomeNumber;
    private final String myCompareString;
    private final int myLength;
    private final GeneralAnnotation myGA;
    private final int hashCode;

    // since there are numerous redundant chromosome, this class use a hash, so that
    // only the pointers are stored.
    private static final ConcurrentMap CHR_HASH = new ConcurrentHashMap<>(50);

    // this is chromosome cache for when only name is specified
    private static final ConcurrentHashMap CHROMOSOME_NAME_ONLY = new ConcurrentHashMap<>(25);

    public static Chromosome getCanonicalChromosome(Chromosome chr) {
        if (CHR_HASH.size() > 1000) {
            CHR_HASH.clear();
        }
        Chromosome canon = CHR_HASH.putIfAbsent(chr, chr);
        return (canon == null) ? chr : canon;
    }

    /**
     * Creates Chromosome instance with specified name. Returns single instance given same name multiple times.
     *
     * @param name chromosome name
     *
     * @return Chromosome
     */
    public static Chromosome instance(String name) {
        return CHROMOSOME_NAME_ONLY.computeIfAbsent(name, s -> new Chromosome(name));
    }

    /**
     * Creates Chromosome instance with specified name. Returns single instance given same name multiple times.
     *
     * @param name chromosome name
     *
     * @return Chromosome
     */
    public static Chromosome instance(int name) {
        String chr = String.valueOf(name);
        return instance(chr);
    }

    /**
     * @param name Name of the chromosome
     * @param length Length of chromosome in base pairs
     * @param features Map of features about the chromosome
     */
    public Chromosome(String name, int length, GeneralAnnotation features) {
        if (name == null || name.isEmpty()) {
            throw new IllegalArgumentException("Chromosome: name can't be null or empty.");
        }
        myName = parseName(name);
        Matcher matcher = DIGITS.matcher(myName);
        if (matcher.find()) {
            StringBuilder builder = new StringBuilder();
            for (int i = 0; i < 5 - matcher.end(); i++) {
                builder.append("0");
            }
            builder.append(myName);
            myCompareString = builder.toString();
        } else {
            myCompareString = myName;
        }
        myLength = length;
        int convChr = Integer.MAX_VALUE;
        try {
            convChr = Integer.parseInt(myName);
        } catch (NumberFormatException ne) {
            // Use Integer.MAX_VALUE
        }
        myChromosomeNumber = convChr;
        myGA = features;
        hashCode = calcHashCode();
    }

    /**
     * @deprecated use {@link #instance(String)}
     */
    @Deprecated
    public Chromosome(String name) {
        this(name, -1, parseAnnotationFromName(name));
    }

    public String getName() {
        return myName;
    }

    /**
     * Returns the integer value of the chromosome (if name is not a number then
     * Integer.MAX_VALUE is returned)
     */
    public int getChromosomeNumber() {
        return myChromosomeNumber;
    }

    public int getLength() {
        return myLength;
    }

    public GeneralAnnotation getAnnotation() {
        return myGA;
    }

    @Override
    public String toString() {
        return getName();
    }

    @Override
    public int hashCode() {
        return hashCode;
    }

    private int calcHashCode() {
        return 79 * 7 + myName.hashCode();
    }

    @Override
    public boolean equals(Object obj) {
        if (obj == this) {
            return true;
        }
        if (!(obj instanceof Chromosome)) {
            return false;
        }
        if (hashCode != obj.hashCode()) return false;
        return (compareTo((Chromosome) obj) == 0);
    }

    /**
     * Compares chromosomes numerically if both are numbers.  Otherwise compares as strings.
     *
     * @param o other chromosome
     *
     * @return a negative integer, zero, or a positive integer as this object is less than, equal to, or greater than
     * the specified object.
     */
    @Override
    public int compareTo(Chromosome o) {
        if (this == o) {
            return 0;
        }
        if (myChromosomeNumber != Integer.MAX_VALUE && o.myChromosomeNumber != Integer.MAX_VALUE) {
            return Integer.compare(myChromosomeNumber, o.myChromosomeNumber);
        }
        return myCompareString.compareTo(o.myCompareString);
    }

    /**
     * Takes a string, makes all upper case, removes leading CHROMOSOME/CHR,
     * returns the resulting string
     *
     * @param name name of chromosome
     *
     * @return the input string minus a leading "chr" or "chromsome"
     */
    private static String parseName(String name) {
        String parsedName = name.trim();
        parsedName = parsedName.toUpperCase();
        if (parsedName.startsWith("CHROMOSOME")) {
            parsedName = parsedName.replaceFirst("CHROMOSOME", "");
        }
        if (parsedName.startsWith("CHR")) {
            parsedName = parsedName.replaceFirst("CHR", "");
        }
        int spaceIndex = parsedName.indexOf(" ");
        if (spaceIndex > 0) {
            parsedName = parsedName.substring(0, parsedName.indexOf(" "));
        }
        return parsedName;
    }

    /**
     * Takes a chromosome name, looks for the first space, returns
     * the data beyond as an annotation.  This takes care of lines in
     * a fasta file that look like this:
     * >3 This is a description
     *
     * @param name - the string chromosome passed in
     *
     * @return Annotations built from the string beyond the name
     */
    private static GeneralAnnotation parseAnnotationFromName(String name) {
        GeneralAnnotation annotations = null;
        int spaceIndex = name.indexOf(" ");
        if (spaceIndex > 0) {
            String currChrDesc = name.substring(name.indexOf(" ") + 1);
            annotations = GeneralAnnotationStorage.getBuilder().addAnnotation("Description", currChrDesc).build();
        }

        return annotations;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy