
org.molgenis.data.mapper.service.impl.UnitResolverImpl Maven / Gradle / Ivy
The newest version!
package org.molgenis.data.mapper.service.impl;
import com.google.common.collect.Sets;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.molgenis.data.mapper.service.UnitResolver;
import org.molgenis.data.mapper.utils.UnitHelper;
import org.molgenis.data.meta.model.Attribute;
import org.molgenis.data.meta.model.EntityType;
import org.molgenis.data.semanticsearch.string.NGramDistanceAlgorithm;
import org.molgenis.ontology.core.model.Ontology;
import org.molgenis.ontology.core.model.OntologyTerm;
import org.molgenis.ontology.core.service.OntologyService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import javax.measure.quantity.Quantity;
import javax.measure.unit.Unit;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import static java.util.Objects.requireNonNull;
public class UnitResolverImpl implements UnitResolver
{
private static final Logger LOG = LoggerFactory.getLogger(UnitResolverImpl.class);
static final String UNIT_ONTOLOGY_IRI = "http://purl.obolibrary.org/obo/uo.owl";
private final OntologyService ontologyService;
@Autowired
public UnitResolverImpl(OntologyService ontologyService)
{
this.ontologyService = requireNonNull(ontologyService);
}
@Override
public Unit extends Quantity> resolveUnit(Attribute attr, EntityType entityType)
{
Set tokens = tokenize(attr.getLabel(), attr.getDescription());
// Option 1: Check if a term matches a unit
Unit extends Quantity> unit = null;
if (!tokens.isEmpty())
{
for (String term : tokens)
{
try
{
unit = Unit.valueOf(term);
break;
}
catch (IllegalArgumentException e)
{
// noop
}
}
if (isUnitEmpty(unit))
{
// Option 2: Search unit ontology for a match
OntologyTerm unitOntologyTerm = resolveUnitOntologyTerm(
tokens.stream().map(this::convertNumberToOntologyTermStyle).collect(Collectors.toSet()));
if (unitOntologyTerm != null)
{
// try label + synonym labels until hit
for (String synonymLabel : unitOntologyTerm.getSynonyms())
{
try
{
unit = Unit.valueOf(synonymLabel);
break;
}
catch (IllegalArgumentException e)
{
// noop
}
}
}
}
}
if (isUnitEmpty(unit))
{
unit = null;
}
return unit;
}
private OntologyTerm resolveUnitOntologyTerm(Set tokens)
{
OntologyTerm unitOntologyTerm;
Ontology unitOntology = ontologyService.getOntology(UNIT_ONTOLOGY_IRI);
if (unitOntology != null)
{
if (!tokens.isEmpty())
{
List ontologyIds = Arrays.asList(unitOntology.getId());
List ontologyTerms = ontologyService
.findExcatOntologyTerms(ontologyIds, tokens, Integer.MAX_VALUE);
if (ontologyTerms != null && !ontologyTerms.isEmpty())
{
if (ontologyTerms.size() == 1)
{
unitOntologyTerm = ontologyTerms.get(0);
}
else
{
// multiple unit ontology terms detected, pick first
unitOntologyTerm = ontologyTerms.get(0);
}
}
else
{
unitOntologyTerm = null;
}
}
else
{
unitOntologyTerm = null;
}
}
else
{
LOG.warn("Unit resolver is missing required unit ontology [" + UNIT_ONTOLOGY_IRI + "]");
unitOntologyTerm = null;
}
return unitOntologyTerm;
}
String convertNumberToOntologyTermStyle(String term)
{
term = UnitHelper.superscriptToNumber(term.replaceAll("\\^", StringUtils.EMPTY));
Pattern pattern = Pattern.compile("\\w+(\\d+)");
Matcher matcher = pattern.matcher(term);
if (matcher.find())
{
String group = matcher.group(1);
String modifiedPart = group.trim();
modifiedPart = "^[" + modifiedPart + "]";
term = term.replaceAll(group, modifiedPart);
}
return QueryParser.escape(term);
}
Set tokenize(String... terms)
{
Set tokens = new HashSet<>();
if (terms != null && terms.length > 0)
{
Sets.newHashSet(terms).stream().filter(StringUtils::isNotBlank).map(StringUtils::lowerCase)
.map(this::replaceIllegalChars).forEach(term -> tokens
.addAll(Sets.newHashSet(term.split("\\s+")).stream().filter(this::notPureNumberExpression)
.map(UnitHelper::numberToSuperscript).collect(Collectors.toSet())));
tokens.removeAll(NGramDistanceAlgorithm.STOPWORDSLIST);
}
return tokens;
}
boolean isUnitEmpty(Unit extends Quantity> unit)
{
return unit == null || (unit != null && StringUtils.isBlank(unit.toString()));
}
boolean notPureNumberExpression(String str)
{
return !str.matches("\\d+");
}
String replaceIllegalChars(String term)
{
return UnitHelper.superscriptToNumber(term).replaceAll("[^a-zA-Z0-9 /\\^]", " ");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy