All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.genesys2.gringlobal.taxonomy.component.InMemoryTaxonomyDatabase Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015 Global Crop Diversity Trust
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.genesys2.gringlobal.taxonomy.component;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.apache.commons.lang3.StringUtils;
import org.genesys2.gringlobal.taxonomy.model.SpeciesRow;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * {@link List} based in-memory "database". NOT THREAD-SAFE!
 */
public class InMemoryTaxonomyDatabase implements TaxonomyDatabase {

	/** The Constant LOG. */
	private final static Logger LOG = LoggerFactory.getLogger(InMemoryTaxonomyDatabase.class);

	/** The genus id lookup. */
	private Map> genusIdLookup = new HashMap<>();
	
	/** The species lookup. */
	private Map> speciesLookup = new HashMap<>();
	
	/** The species rows. */
	private int speciesRows;

	/**
	 * Add a genus to the database.
	 *
	 * @param genusId the genus id
	 * @param genus the genus
	 */
	public void registerGenus(Long genusId, String genus) {
		if (!genusIdLookup.containsKey(genus)) {
			genusIdLookup.put(genus, new ArrayList<>(1));
		}
		genusIdLookup.get(genus).add(genusId);
		speciesLookup.put(genusId, new ArrayList<>(1));
	}

	/**
	 * Add species to the database.
	 *
	 * @param speciesRow the species row
	 * @throws TaxonomyException the taxonomy exception
	 */
	public void registerSpecies(SpeciesRow speciesRow) throws TaxonomyException {
		List genusSpecies = speciesLookup.get(speciesRow.getGenusId());
		if (genusSpecies == null)
			throw new TaxonomyException("No genus with specified genusId");

		genusSpecies.add(speciesRow);
		speciesRows++;
	}

	/* (non-Javadoc)
	 * @see java.lang.Object#toString()
	 */
	@Override
	public String toString() {
		return "InMemory Taxonomy Database: " + genusIdLookup.size() + " genera" + " and " + speciesRows + " species";
	}

	/**
	 * Case insensitive search for genus.
	 *
	 * @param genus the genus
	 * @return true, if successful
	 */
	@Override
	public boolean containsGenus(String genus) {
		return genusIdLookup.containsKey(genus);
		// .keySet().stream().anyMatch(g -> g.equalsIgnoreCase(genus));
	}

	/* (non-Javadoc)
	 * @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#findSimilarGenus(java.lang.String, int)
	 */
	@Override
	public List findSimilarGenus(String genus, int maxSize) {
		if (containsGenus(genus)) {
			LOG.trace("Database contains genus={}", genus);
			return Collections.emptyList();
		}

		BestScore bestScore = new BestScore();

		return genusIdLookup.keySet().parallelStream().map(candidate -> new Suggestion(candidate, similarityScore(genus, candidate, 100)))
				// .peek(InMemoryTaxonomyDatabase::print)
				.filter(scored -> scored.getScore() >= 0 && scored.getScore() < 99).sequential()
				// .peek(InMemoryTaxonomyDatabase::print)
				.peek(scored -> bestScore.update(scored.getScore())).sorted(Comparator.comparing(Suggestion::getScore))
				// .peek(InMemoryTaxonomyDatabase::print)
				.filter(scored -> scored.getScore() <= bestScore.getBestScore() * 1.5)
				// .filter(scored -> scored.getScore() - bestScore.getBestScore() <= 2)
				.peek(InMemoryTaxonomyDatabase::print).map(Suggestion::getSuggestion).distinct().limit(maxSize).collect(Collectors.toList());
	}

	/**
	 * Prints the.
	 *
	 * @param  the generic type
	 * @param suggestion the suggestion
	 */
	public static  void print(Suggestion suggestion) {
		if (LOG.isTraceEnabled())
			LOG.trace("Score={} suggestion={}", suggestion.getScore(), suggestion.getSuggestion());
	}

	/**
	 * Prints the.
	 *
	 * @param suggestion the suggestion
	 */
	public static void print(Object suggestion) {
		if (LOG.isTraceEnabled())
			LOG.trace(suggestion == null ? "NULL" : suggestion.getClass() + "=" + suggestion.toString());
	}

	/**
	 * Gets the all genus species.
	 *
	 * @param genus the genus
	 * @return the all genus species
	 */
	protected List getAllGenusSpecies(String genus) {
		if (!genusIdLookup.containsKey(genus)) {
			return Collections.emptyList();
		}

		return genusIdLookup.get(genus).stream()
				// .peek(InMemoryTaxonomyDatabase::print)
				.map(genusId -> speciesLookup.get(genusId)).reduce(new ArrayList(1), (all, genusSpecies) -> {
					all.addAll(genusSpecies);
					return all;
				});
	}

	/* (non-Javadoc)
	 * @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#containsSpecies(java.lang.String, java.lang.String)
	 */
	@Override
	public boolean containsSpecies(String genus, String species) {
		LOG.trace("Does database contain genus={} species={}", genus, species);

		if (!genusIdLookup.containsKey(genus)) {
			return false;
		}

		return getAllGenusSpecies(genus).stream().anyMatch(speciesRow -> species.equals(speciesRow.getSpeciesName()));
	}

	/* (non-Javadoc)
	 * @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#findSimilarSpecies(java.lang.String, java.lang.String, int)
	 */
	@Override
	public List findSimilarSpecies(String genus, String species, int maxSize) {
		LOG.debug("Searching similar species for genus={} species={}", genus, species);
		if ("Unknown".equals(genus)) {
			// Don't make suggestions
			return Collections.emptyList();
		}
		List genusId = genusIdLookup.get(genus);
		if (genusId == null) {
			throw new UnsupportedOperationException("Genus does not exist in database. Genus=" + genus);
		}

		BestScore bestScore = new BestScore();
		// System.err.println("doo");
		return getAllGenusSpecies(genus).stream().map(SpeciesRow::getSpeciesName).distinct()
				// .peek(InMemoryTaxonomyDatabase::print)
				.map(candidate -> new Suggestion(candidate, similarityScore(species, candidate, 5))).filter(scored -> scored.getScore() >= 0 && scored.getScore() < 4)
				.sorted(Comparator.comparing(Suggestion::getScore))
				// .peek(InMemoryTaxonomyDatabase::print)
				.peek(scored -> bestScore.update(scored.getScore())).filter(scored -> scored.getScore() <= bestScore.getBestScore() * 1.5)
				// .filter(scored -> scored.getScore() - bestScore.getBestScore() <= 2)
				.peek(InMemoryTaxonomyDatabase::print).map(Suggestion::getSuggestion).distinct().limit(maxSize).collect(Collectors.toList());
	}

	/**
	 * StringUtils.getLevenshteinDistance
	 *
	 * @param original the original
	 * @param candidate the candidate
	 * @param threshold the threshold
	 * @return the float
	 */
	private float similarityScore(String original, String candidate, int threshold) {
		return StringUtils.getLevenshteinDistance(original.toLowerCase(), candidate.toLowerCase(), threshold);
		// (float) StringUtils.getJaroWinklerDistance(original, candidate)
	}

	/* (non-Javadoc)
	 * @see org.genesys2.gringlobal.taxonomy.component.TaxonomyDatabase#getSpeciesAuthority(java.lang.String, java.lang.String)
	 */
	@Override
	public String getSpeciesAuthority(String genus, String species) {
		List genusId = genusIdLookup.get(genus);
		if (genusId == null) {
			return null;
		}

		return getAllGenusSpecies(genus).stream().filter(speciesRow -> species.equals(speciesRow.getSpeciesName()))
				.peek(speciesRow -> LOG.trace("Species authority {}", speciesRow.getSpeciesAuthority())).findFirst().map(speciesRow -> speciesRow.getSpeciesAuthority())
				.orElse(null);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy