All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.genesys.taxonomy.checker.TaxonomyChecker Maven / Gradle / Ivy

/*
 * Copyright 2016 Global Crop Diversity Trust
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.genesys.taxonomy.checker;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;

import javax.annotation.Nonnull;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.taxonomy.gringlobal.model.IGrinSpecies;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Objects;

import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;

/**
 * Suggest fixes to scientific names based on an in-memory list of valid taxonomies.
 */
public class TaxonomyChecker {

	private static final String UNKNOWN_GENUS = "Unknown";
	private static final String[] UNKNOWN_GENUS_ALT = { UNKNOWN_GENUS, "Unidentified" };

	private static final String UNKNOWN_SPECIES = "sp.";
	private static final String[] UNKNOWN_SPECIES_ALT = { UNKNOWN_SPECIES, "spp.", "sp", "spp" };

	/** The Constant LOG. */
	private final static Logger LOG = LoggerFactory.getLogger(TaxonomyChecker.class);

	/** The database. */
	private TaxonomyDatabase database;

	/**
	 * Sets the taxonomy database.
	 *
	 * @param database the new taxonomy database
	 */
	public void setTaxonomyDatabase(TaxonomyDatabase database) {
		this.database = database;
	}

	/**
	 * Find suggestions for GENUS.
	 *
	 * @param genus the genus
	 * @param maxSize the max size
	 * @return suggested fixes for genus or empty list when genus is fine or when there are no suggestions.
	 */
	public List suggestGenus(String genus, int maxSize) {
		if (StringUtils.isBlank(genus) || ArrayUtils.contains(UNKNOWN_GENUS_ALT, genus)) {
			return Arrays.asList(UNKNOWN_GENUS);
		} else if (database.containsGenus(genus)) {
			LOG.trace("Database contains genus={}", genus);
			return Arrays.asList(genus);
		} else {
			LOG.debug("Database does not contain genus={}", genus);
		}
		return database.findSimilarGenus(genus, maxSize);
	}

	/**
	 * Find suggestions for SPECIES.
	 *
	 * @param genus the genus
	 * @param species the species
	 * @param maxSize maximum number of suggestions to return
	 * @return suggested fixes for genus or empty list if species is fine or when there are no suggestions.
	 */
	public List suggestSpecies(String genus, String species, int maxSize) {
		return suggestSpecies(genus, species, null, maxSize);
	}

	/**
	 * Find suggestions for SPECIES.
	 *
	 * @param genus the genus
	 * @param species the species
	 * @param spAuthor species authority
	 * @param maxSize maximum number of suggestions to return
	 * @return suggested fixes for genus or empty list if species is fine or when there are no suggestions.
	 */
	public List suggestSpecies(String genus, String species, String spAuthor, int maxSize) {
		LOG.debug("Suggesting species for genus={} species={}", genus, species);

		if (ArrayUtils.contains(UNKNOWN_GENUS_ALT, genus)) {
			return Arrays.asList(UNKNOWN_SPECIES);
		}
		if (StringUtils.isBlank(species)) {
			return List.of(UNKNOWN_SPECIES);
		}
		if (ArrayUtils.contains(UNKNOWN_SPECIES_ALT, species)) {
			return List.of(UNKNOWN_SPECIES);
		}

		if (database.containsSpecies(genus, species)) {
			return Arrays.asList(species);
		}

		if (database.containsGenus(genus)) {
			// Genus exists
			LOG.debug("Database contains genus={}", genus);
			return database.findSimilarSpecies(genus, species, spAuthor, maxSize);
		} else {
			LOG.debug("Database does not contain genus={}", genus);
		}

		// Genus not listed in the database, go through suggestions
		List suggestions = new ArrayList<>(0);
		for (String suggestedGenus : suggestGenus(genus, 2)) {
			LOG.debug("Suggesting with suggested genus={}", suggestedGenus);
			suggestions.addAll(database.findSimilarSpecies(suggestedGenus, species, spAuthor, 2).stream()
					.map(suggestion -> suggestedGenus.equalsIgnoreCase(genus) ? suggestion : "(" + suggestedGenus + ") " + suggestion).collect(Collectors.toList()));
		}
		return suggestions;
	}

	/**
	 * Return species authority for the genus + species.
	 *
	 * @param genus the genus
	 * @param species the species
	 * @return species authority if there's one matching record in the database
	 */
	public List getSpeciesAuthority(String genus, String species) {
		return getSpeciesAuthority(genus, species, (String) null);
	}

	/**
	 * Return species authority for the genus + species.
	 *
	 * @param genus the genus
	 * @param species the species
	 * @param spAuthor species authority
	 * @return species authority if there's one matching record in the database
	 */
	public List getSpeciesAuthority(String genus, String species, String spAuthor) {
		if (!database.containsSpecies(genus, species)) {
			return null;
		}

		return database.getSpeciesAuthority(genus, species, spAuthor);
	}

	/**
	 * Find suggestions for SUBTAXA.
	 *
	 * @param genus must be valid genus in the database
	 * @param species species must be valid species within genus
	 * @param subtaxa current subtaxa, must not be null or blank
	 * @param maxSize maximum number of suggestions to return
	 * @return suggested fixes for subtaxa or empty list if there are no suggestions.
	 */
	public List suggestSubtaxa(String genus, String species, String subtaxa, int maxSize) {
		return suggestSubtaxa(genus, species, null, subtaxa, null, maxSize);
	}

	/**
	 * Find suggestions for SUBTAXA.
	 *
	 * @param genus must be valid genus in the database
	 * @param species species must be valid species within genus
	 * @param spAuthor species authority
	 * @param subtaxa current subtaxa, must not be null or blank
	 * @param subtAuthor species authority at the lowest classification level
	 * @param maxSize maximum number of suggestions to return
	 * @return suggested fixes for subtaxa or empty list if there are no suggestions.
	 */
	public List suggestSubtaxa(String genus, String species, String spAuthor, String subtaxa, String subtAuthor, int maxSize) {
		if (StringUtils.isBlank(subtaxa) || !database.containsSpecies(genus, species, spAuthor)) {
			return List.of();
		}
		if (database.containsSubtaxa(genus, species, spAuthor, subtaxa, subtAuthor)) {
			return List.of(subtaxa);
		}
		return database.findSimilarSubtaxa(genus, species, spAuthor, subtaxa, subtAuthor, maxSize);
	}

	/**
	 * Return authority for the genus + species + subtaxa
	 *
	 * @param genus the genus
	 * @param species the species
	 * @param spAuthor species authority
	 * @param subtaxa subtaxa to check
	 * @param subtAuthor species authority at the lowest classification level
	 * @return species authority if there's one matching record in the database
	 */
	public List getSubtaxaAuthority(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {
		if (!database.containsSubtaxa(genus, species, subtaxa)) {
			return null;
		}

		return database.getSubtaxaAuthority(genus, species, spAuthor, subtaxa, subtAuthor);
	}

	/**
	 * Suggest a list of matches.
	 * 
	 * @param genus valid genus
	 * @param species valid species
	 * @param spAuthor species authority
	 * @param subtaxa subtaxa to check
	 * @param subtAuthor species authority at the lowest classification level
	 * @param maxSize maximum number of suggestions to return
	 * @return list of suggestions, ordered by preference, never null
	 * @since 3.10
	 */
	public @Nonnull List findTaxa(String genus, String species, String spAuthor, String subtaxa, String subtAuthor, int maxSize) {
		return database.findTaxa(genus, species, spAuthor, subtaxa, subtAuthor, maxSize);
	}

	/**
	 * Get exact match.
	 * 
	 * @param genus valid genus
	 * @param species valid species
	 * @param spAuthor species authority
	 * @param subtaxa subtaxa to check
	 * @param subtAuthor species authority at the lowest classification level
	 * @return One exactly matching record
	 * @since 3.10
	 */
	public Taxon getTaxon(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {
		return database.getTaxon(genus, species, spAuthor, subtaxa, subtAuthor);
	}

	/**
	 * Suggest a list of matches.
	 * 
	 * @param genus valid genus
	 * @param species valid species
	 * @param spAuthor species authority
	 * @param subtaxa subtaxa to check
	 * @param subtAuthor species authority at the lowest classification level
	 * @param maxSize maximum number of suggestions to return
	 * @return list of suggestions, ordered by preference, never null
	 * @since 3.10
	 */
	public @Nonnull List findSimilar(String genus, String species, String spAuthor, String subtaxa, String subtAuthor, int maxSize) {
		return database.findSimilar(genus, species, spAuthor, subtaxa, subtAuthor, maxSize);
	}

	/**
	 * Get string similarity score.
	 *
	 * @param string1 string A
	 * @param string2 string B
	 * @return the double
	 */
	public double similarityScore(final String string1, final String string2) {
		return database.similarityScore(string1, string2);
	}

	/**
	 * Taxon record
	 * @since 3.10
	 */
	@Data
	@Slf4j
	public static class Taxon {
		private String genus;
		private String species;
		private String spAuthor;
		private String subtaxa;
		private String subtAuthor;
		private Long grinSpeciesId;

		@ToString.Exclude
		@EqualsAndHashCode.Exclude
		private Taxon currentTaxon;

		/**
		 * Blank
		 */
		public Taxon() {
		}

		/**
		 * Create {@code Taxon} from data.
		 * 
		 * @param genus genus name
		 * @param species specific epithet
		 * @param spAuthor species authority
		 * @param subtaxa subtaxon
		 * @param subtAuthor species authority at the most detailed taxonomic level
		 */
		public Taxon(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {
			this.genus = genus;
			this.species = species;
			this.spAuthor = spAuthor;
			this.subtaxa = subtaxa;
			this.subtAuthor = subtAuthor;
		}

		/**
		 * Create {@code Taxon} from {@link IGrinSpecies}.
		 * Populates {@link #currentTaxon}.
		 *
		 * @param grinSpecies source species
		 * @param speciesProvider species provider (id -> IGrinSpecies)
		 * @param genusNameProvider genus name provider (id -> string)
		 */
		public Taxon(IGrinSpecies grinSpecies, Function speciesProvider, Function genusNameProvider) {
			this.genus = genusNameProvider.apply(grinSpecies.getGenusId());
			assert(genus != null);
			this.grinSpeciesId = grinSpecies.getSpeciesId();
			this.species = grinSpecies.getSpeciesName();
			if (StringUtils.equals("spp.", this.species)) {
				this.species = "sp.";
			}
			this.spAuthor = grinSpecies.getSpeciesAuthority();
			this.subtaxa = grinSpecies.getSubtaxa();
			this.subtAuthor = grinSpecies.getSubtaxaAuthority();
			if (grinSpecies.getCurrentTaxonomySpeciesId() != null && ! Objects.equal(grinSpecies.getSpeciesId(), grinSpecies.getCurrentTaxonomySpeciesId())) {
				try {
					this.currentTaxon = new Taxon(speciesProvider.apply(grinSpecies.getCurrentTaxonomySpeciesId()), speciesProvider, genusNameProvider);
				} catch (Throwable e) {
					log.warn("Could not load current taxon for id={}", grinSpecies.getCurrentTaxonomySpeciesId());
				}
			}
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy