
org.genesys2.gringlobal.taxonomy.component.InMemoryTaxonomyDatabase Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2015 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys2.gringlobal.taxonomy.component;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.genesys2.gringlobal.taxonomy.model.SpeciesRow;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* {@link List} based in-memory "database". NOT THREAD-SAFE!
*/
public class InMemoryTaxonomyDatabase implements TaxonomyDatabase {
/** The Constant LOG. */
private final static Logger LOG = LoggerFactory.getLogger(InMemoryTaxonomyDatabase.class);
/** The genus id lookup. */
private Map> genusIdLookup = new HashMap<>();
/** The species lookup. */
private Map> speciesLookup = new HashMap<>();
/** The species rows. */
private int speciesRows;
/**
* Add a genus to the database.
*
* @param genusId the genus id
* @param genus the genus
*/
public void registerGenus(Long genusId, String genus) {
if (!genusIdLookup.containsKey(genus)) {
genusIdLookup.put(genus, new ArrayList<>(1));
}
genusIdLookup.get(genus).add(genusId);
speciesLookup.put(genusId, new ArrayList<>(1));
}
/**
* Add species to the database.
*
* @param speciesRow the species row
* @throws TaxonomyException the taxonomy exception
*/
public void registerSpecies(SpeciesRow speciesRow) throws TaxonomyException {
List genusSpecies = speciesLookup.get(speciesRow.getGenusId());
if (genusSpecies == null)
throw new TaxonomyException("No genus with specified genusId");
genusSpecies.add(speciesRow);
speciesRows++;
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "InMemory Taxonomy Database: " + genusIdLookup.size() + " genera" + " and " + speciesRows + " species";
}
/**
* Case insensitive search for genus.
*
* @param genus the genus
* @return true, if successful
*/
@Override
public boolean containsGenus(String genus) {
return genusIdLookup.containsKey(genus);
// .keySet().stream().anyMatch(g -> g.equalsIgnoreCase(genus));
}
/* (non-Javadoc)
* @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#findSimilarGenus(java.lang.String, int)
*/
@Override
public List findSimilarGenus(String genus, int maxSize) {
if (containsGenus(genus)) {
LOG.trace("Database contains genus={}", genus);
return Collections.emptyList();
}
BestScore bestScore = new BestScore();
return genusIdLookup.keySet().parallelStream().map(candidate -> new Suggestion(candidate, similarityScore(genus, candidate, 100)))
// .peek(InMemoryTaxonomyDatabase::print)
.filter(scored -> scored.getScore() >= 0 && scored.getScore() < 99).sequential()
// .peek(InMemoryTaxonomyDatabase::print)
.peek(scored -> bestScore.update(scored.getScore())).sorted(Comparator.comparing(Suggestion::getScore))
// .peek(InMemoryTaxonomyDatabase::print)
.filter(scored -> scored.getScore() <= bestScore.getBestScore() * 1.5)
// .filter(scored -> scored.getScore() - bestScore.getBestScore() <= 2)
.peek(InMemoryTaxonomyDatabase::print).map(Suggestion::getSuggestion).distinct().limit(maxSize).collect(Collectors.toList());
}
/**
* Prints the.
*
* @param the generic type
* @param suggestion the suggestion
*/
public static void print(Suggestion suggestion) {
if (LOG.isTraceEnabled())
LOG.trace("Score={} suggestion={}", suggestion.getScore(), suggestion.getSuggestion());
}
/**
* Prints the.
*
* @param suggestion the suggestion
*/
public static void print(Object suggestion) {
if (LOG.isTraceEnabled())
LOG.trace(suggestion == null ? "NULL" : suggestion.getClass() + "=" + suggestion.toString());
}
/**
* Gets the all genus species.
*
* @param genus the genus
* @return the all genus species
*/
protected List getAllGenusSpecies(String genus) {
if (!genusIdLookup.containsKey(genus)) {
return Collections.emptyList();
}
return genusIdLookup.get(genus).stream()
// .peek(InMemoryTaxonomyDatabase::print)
.map(genusId -> speciesLookup.get(genusId)).reduce(new ArrayList(1), (all, genusSpecies) -> {
all.addAll(genusSpecies);
return all;
});
}
/* (non-Javadoc)
* @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#containsSpecies(java.lang.String, java.lang.String)
*/
@Override
public boolean containsSpecies(String genus, String species) {
LOG.trace("Does database contain genus={} species={}", genus, species);
if (!genusIdLookup.containsKey(genus)) {
return false;
}
return getAllGenusSpecies(genus).stream().anyMatch(speciesRow -> species.equals(speciesRow.getSpeciesName()));
}
/* (non-Javadoc)
* @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#findSimilarSpecies(java.lang.String, java.lang.String, int)
*/
@Override
public List findSimilarSpecies(String genus, String species, int maxSize) {
LOG.debug("Searching similar species for genus={} species={}", genus, species);
if ("Unknown".equals(genus)) {
// Don't make suggestions
return Collections.emptyList();
}
List genusId = genusIdLookup.get(genus);
if (genusId == null) {
throw new UnsupportedOperationException("Genus does not exist in database. Genus=" + genus);
}
BestScore bestScore = new BestScore();
// System.err.println("doo");
return getAllGenusSpecies(genus).stream().map(SpeciesRow::getSpeciesName).distinct()
// .peek(InMemoryTaxonomyDatabase::print)
.map(candidate -> new Suggestion(candidate, similarityScore(species, candidate, 5))).filter(scored -> scored.getScore() >= 0 && scored.getScore() < 4)
.sorted(Comparator.comparing(Suggestion::getScore))
// .peek(InMemoryTaxonomyDatabase::print)
.peek(scored -> bestScore.update(scored.getScore())).filter(scored -> scored.getScore() <= bestScore.getBestScore() * 1.5)
// .filter(scored -> scored.getScore() - bestScore.getBestScore() <= 2)
.peek(InMemoryTaxonomyDatabase::print).map(Suggestion::getSuggestion).distinct().limit(maxSize).collect(Collectors.toList());
}
/**
* StringUtils.getLevenshteinDistance
*
* @param original the original
* @param candidate the candidate
* @param threshold the threshold
* @return the float
*/
private float similarityScore(String original, String candidate, int threshold) {
return StringUtils.getLevenshteinDistance(original.toLowerCase(), candidate.toLowerCase(), threshold);
// (float) StringUtils.getJaroWinklerDistance(original, candidate)
}
/* (non-Javadoc)
* @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#getSpeciesAuthority(java.lang.String, java.lang.String)
*/
@Override
public String getSpeciesAuthority(String genus, String species) {
List genusId = genusIdLookup.get(genus);
if (genusId == null) {
return null;
}
return getAllGenusSpecies(genus).stream().filter(speciesRow -> species.equals(speciesRow.getSpeciesName()))
.peek(speciesRow -> LOG.trace("Species authority {}", speciesRow.getSpeciesAuthority())).findFirst().map(speciesRow -> speciesRow.getSpeciesAuthority())
.orElse(null);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy