org.genesys.taxonomy.checker.TaxonomyChecker Maven / Gradle / Ivy
/*
* Copyright 2016 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.taxonomy.checker;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.taxonomy.gringlobal.model.IGrinSpecies;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Objects;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
/**
* Suggest fixes to scientific names based on an in-memory list of valid taxonomies.
*/
public class TaxonomyChecker {
private static final String UNKNOWN_GENUS = "Unknown";
private static final String[] UNKNOWN_GENUS_ALT = { UNKNOWN_GENUS, "Unidentified" };
private static final String UNKNOWN_SPECIES = "sp.";
private static final String[] UNKNOWN_SPECIES_ALT = { UNKNOWN_SPECIES, "spp.", "sp", "spp" };
/** The Constant LOG. */
private final static Logger LOG = LoggerFactory.getLogger(TaxonomyChecker.class);
/** The database. */
private TaxonomyDatabase database;
/**
* Sets the taxonomy database.
*
* @param database the new taxonomy database
*/
public void setTaxonomyDatabase(TaxonomyDatabase database) {
this.database = database;
}
/**
* Find suggestions for GENUS.
*
* @param genus the genus
* @param maxSize the max size
* @return suggested fixes for genus or empty list when genus is fine or when there are no suggestions.
*/
public List suggestGenus(String genus, int maxSize) {
if (StringUtils.isBlank(genus) || ArrayUtils.contains(UNKNOWN_GENUS_ALT, genus)) {
return Arrays.asList(UNKNOWN_GENUS);
} else if (database.containsGenus(genus)) {
LOG.trace("Database contains genus={}", genus);
return Arrays.asList(genus);
} else {
LOG.debug("Database does not contain genus={}", genus);
}
return database.findSimilarGenus(genus, maxSize);
}
/**
* Find suggestions for SPECIES.
*
* @param genus the genus
* @param species the species
* @param maxSize maximum number of suggestions to return
* @return suggested fixes for genus or empty list if species is fine or when there are no suggestions.
*/
public List suggestSpecies(String genus, String species, int maxSize) {
return suggestSpecies(genus, species, null, maxSize);
}
/**
* Find suggestions for SPECIES.
*
* @param genus the genus
* @param species the species
* @param spAuthor species authority
* @param maxSize maximum number of suggestions to return
* @return suggested fixes for genus or empty list if species is fine or when there are no suggestions.
*/
public List suggestSpecies(String genus, String species, String spAuthor, int maxSize) {
LOG.debug("Suggesting species for genus={} species={}", genus, species);
if (ArrayUtils.contains(UNKNOWN_GENUS_ALT, genus)) {
return Arrays.asList(UNKNOWN_SPECIES);
}
if (StringUtils.isBlank(species)) {
return List.of(UNKNOWN_SPECIES);
}
if (ArrayUtils.contains(UNKNOWN_SPECIES_ALT, species)) {
return List.of(UNKNOWN_SPECIES);
}
if (database.containsSpecies(genus, species)) {
return Arrays.asList(species);
}
if (database.containsGenus(genus)) {
// Genus exists
LOG.debug("Database contains genus={}", genus);
return database.findSimilarSpecies(genus, species, spAuthor, maxSize);
} else {
LOG.debug("Database does not contain genus={}", genus);
}
// Genus not listed in the database, go through suggestions
List suggestions = new ArrayList<>(0);
for (String suggestedGenus : suggestGenus(genus, 2)) {
LOG.debug("Suggesting with suggested genus={}", suggestedGenus);
suggestions.addAll(database.findSimilarSpecies(suggestedGenus, species, spAuthor, 2).stream()
.map(suggestion -> suggestedGenus.equalsIgnoreCase(genus) ? suggestion : "(" + suggestedGenus + ") " + suggestion).collect(Collectors.toList()));
}
return suggestions;
}
/**
* Return species authority for the genus + species.
*
* @param genus the genus
* @param species the species
* @return species authority if there's one matching record in the database
*/
public List getSpeciesAuthority(String genus, String species) {
return getSpeciesAuthority(genus, species, (String) null);
}
/**
* Return species authority for the genus + species.
*
* @param genus the genus
* @param species the species
* @param spAuthor species authority
* @return species authority if there's one matching record in the database
*/
public List getSpeciesAuthority(String genus, String species, String spAuthor) {
if (!database.containsSpecies(genus, species)) {
return null;
}
return database.getSpeciesAuthority(genus, species, spAuthor);
}
/**
* Find suggestions for SUBTAXA.
*
* @param genus must be valid genus in the database
* @param species species must be valid species within genus
* @param subtaxa current subtaxa, must not be null or blank
* @param maxSize maximum number of suggestions to return
* @return suggested fixes for subtaxa or empty list if there are no suggestions.
*/
public List suggestSubtaxa(String genus, String species, String subtaxa, int maxSize) {
return suggestSubtaxa(genus, species, null, subtaxa, null, maxSize);
}
/**
* Find suggestions for SUBTAXA.
*
* @param genus must be valid genus in the database
* @param species species must be valid species within genus
* @param spAuthor species authority
* @param subtaxa current subtaxa, must not be null or blank
* @param subtAuthor species authority at the lowest classification level
* @param maxSize maximum number of suggestions to return
* @return suggested fixes for subtaxa or empty list if there are no suggestions.
*/
public List suggestSubtaxa(String genus, String species, String spAuthor, String subtaxa, String subtAuthor, int maxSize) {
if (StringUtils.isBlank(subtaxa) || !database.containsSpecies(genus, species, spAuthor)) {
return List.of();
}
if (database.containsSubtaxa(genus, species, spAuthor, subtaxa, subtAuthor)) {
return List.of(subtaxa);
}
return database.findSimilarSubtaxa(genus, species, spAuthor, subtaxa, subtAuthor, maxSize);
}
/**
* Return authority for the genus + species + subtaxa
*
* @param genus the genus
* @param species the species
* @param spAuthor species authority
* @param subtaxa subtaxa to check
* @param subtAuthor species authority at the lowest classification level
* @return species authority if there's one matching record in the database
*/
public List getSubtaxaAuthority(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {
if (!database.containsSubtaxa(genus, species, subtaxa)) {
return null;
}
return database.getSubtaxaAuthority(genus, species, spAuthor, subtaxa, subtAuthor);
}
/**
* Suggest a list of matches.
*
* @param genus valid genus
* @param species valid species
* @param spAuthor species authority
* @param subtaxa subtaxa to check
* @param subtAuthor species authority at the lowest classification level
* @param maxSize maximum number of suggestions to return
* @return list of suggestions, ordered by preference, never null
* @since 3.10
*/
public @Nonnull List findTaxa(String genus, String species, String spAuthor, String subtaxa, String subtAuthor, int maxSize) {
return database.findTaxa(genus, species, spAuthor, subtaxa, subtAuthor, maxSize);
}
/**
* Get exact match.
*
* @param genus valid genus
* @param species valid species
* @param spAuthor species authority
* @param subtaxa subtaxa to check
* @param subtAuthor species authority at the lowest classification level
* @return One exactly matching record
* @since 3.10
*/
public Taxon getTaxon(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {
return database.getTaxon(genus, species, spAuthor, subtaxa, subtAuthor);
}
/**
* Suggest a list of matches.
*
* @param genus valid genus
* @param species valid species
* @param spAuthor species authority
* @param subtaxa subtaxa to check
* @param subtAuthor species authority at the lowest classification level
* @param maxSize maximum number of suggestions to return
* @return list of suggestions, ordered by preference, never null
* @since 3.10
*/
public @Nonnull List findSimilar(String genus, String species, String spAuthor, String subtaxa, String subtAuthor, int maxSize) {
return database.findSimilar(genus, species, spAuthor, subtaxa, subtAuthor, maxSize);
}
/**
* Get string similarity score.
*
* @param string1 string A
* @param string2 string B
* @return the double
*/
public double similarityScore(final String string1, final String string2) {
return database.similarityScore(string1, string2);
}
/**
* Taxon record
* @since 3.10
*/
@Data
@Slf4j
public static class Taxon {
private String genus;
private String species;
private String spAuthor;
private String subtaxa;
private String subtAuthor;
private Long grinSpeciesId;
@ToString.Exclude
@EqualsAndHashCode.Exclude
private Taxon currentTaxon;
/**
* Blank
*/
public Taxon() {
}
/**
* Create {@code Taxon} from data.
*
* @param genus genus name
* @param species specific epithet
* @param spAuthor species authority
* @param subtaxa subtaxon
* @param subtAuthor species authority at the most detailed taxonomic level
*/
public Taxon(String genus, String species, String spAuthor, String subtaxa, String subtAuthor) {
this.genus = genus;
this.species = species;
this.spAuthor = spAuthor;
this.subtaxa = subtaxa;
this.subtAuthor = subtAuthor;
}
/**
* Create {@code Taxon} from {@link IGrinSpecies}.
* Populates {@link #currentTaxon}.
*
* @param grinSpecies source species
* @param speciesProvider species provider (id -> IGrinSpecies)
* @param genusNameProvider genus name provider (id -> string)
*/
public Taxon(IGrinSpecies grinSpecies, Function speciesProvider, Function genusNameProvider) {
this.genus = genusNameProvider.apply(grinSpecies.getGenusId());
assert(genus != null);
this.grinSpeciesId = grinSpecies.getSpeciesId();
this.species = grinSpecies.getSpeciesName();
if (StringUtils.equals("spp.", this.species)) {
this.species = "sp.";
}
this.spAuthor = grinSpecies.getSpeciesAuthority();
this.subtaxa = grinSpecies.getSubtaxa();
this.subtAuthor = grinSpecies.getSubtaxaAuthority();
if (grinSpecies.getCurrentTaxonomySpeciesId() != null && ! Objects.equal(grinSpecies.getSpeciesId(), grinSpecies.getCurrentTaxonomySpeciesId())) {
try {
this.currentTaxon = new Taxon(speciesProvider.apply(grinSpecies.getCurrentTaxonomySpeciesId()), speciesProvider, genusNameProvider);
} catch (Throwable e) {
log.warn("Could not load current taxon for id={}", grinSpecies.getCurrentTaxonomySpeciesId());
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy