
io.github.qudtlib.support.parse.UnitParser Maven / Gradle / Ivy
package io.github.qudtlib.support.parse;
import static java.util.function.Predicate.not;
import io.github.qudtlib.Qudt;
import io.github.qudtlib.exception.IncompleteDataException;
import io.github.qudtlib.model.DerivedUnitSearchMode;
import io.github.qudtlib.model.FactorUnits;
import io.github.qudtlib.model.QuantityKind;
import io.github.qudtlib.model.Unit;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class UnitParser {
String input;
QuantityKind quantityKind;
public UnitParser(String input) {
this(input, null);
}
public UnitParser(String input, QuantityKind quantityKind) {
this.input = input;
this.quantityKind = quantityKind;
}
public Set parse() {
Set oldStates = new HashSet<>();
SortedSet states =
new TreeSet<>(
Comparator.comparing(State::badness)
.thenComparing(s -> -s.getParsedUnits().size())
.thenComparing(
s ->
-(s.getRemainingInput().length()
+ Optional.ofNullable(s.getLeftoverInput())
.map(String::length)
.orElse(0)))
.thenComparing(Object::hashCode));
State initialState =
new State(
this.input,
StateTransition.UNIT,
StateTransition.WHITESPACE,
StateTransition.DIVIDER,
StateTransition.ONE);
states.add(initialState);
List finishedStates = new ArrayList<>();
boolean finished = false;
int step = 0;
while (!states.isEmpty()) {
/*
System.out.println("\nstep " + (step++));
System.out.println();
System.out.println("states:");
states.stream().forEach(System.out::println);
System.out.println();
*/
Iterator it = states.iterator();
State currentState = it.next();
it.remove();
List next = currentState.nextTransition();
next.stream().filter(State::isParseComplete).forEach(finishedStates::add);
next.stream().filter(not(State::isParseComplete)).forEach(states::add);
if (!finishedStates.isEmpty()) {
Set results = new HashSet<>();
for (State finishedState : finishedStates) {
// System.out.println("finished state: " + finishedState);
List parsedUnits = finishedState.getParsedUnits();
if (!parsedUnits.isEmpty()) {
FactorUnits factorUnits =
new FactorUnits(
parsedUnits.stream()
.map(ParsedUnit::getFactorUnit)
.collect(Collectors.toUnmodifiableList()));
if (this.quantityKind != null
&& !this.quantityKind.isDeprecated()
&& !this.quantityKind.equals(Qudt.QuantityKinds.Unknown)
&& this.quantityKind.getDimensionVector().isPresent()) {
try {
if (this.quantityKind
.getDimensionVector()
.get()
.equals(factorUnits.getDimensionVector())) {
findUnits(factorUnits, parsedUnits).forEach(results::add);
}
} catch (IncompleteDataException e) {
// ignore: unit will not be found
}
} else {
// cannot filter by dim vector, add everyting
findUnits(factorUnits, parsedUnits).forEach(results::add);
}
}
}
// System.out.println("results: " + results);
if (!results.isEmpty()) {
if (this.quantityKind != null) {
Set intermediateResults = new HashSet<>(results);
intermediateResults.retainAll(this.quantityKind.getApplicableUnits());
// System.out.println("intermediateResults: " +
// intermediateResults);
if (intermediateResults.isEmpty()) {
intermediateResults =
results.stream()
.filter(
u ->
u.getDimensionVector()
.map(
dv ->
dv.equals(
this
.quantityKind
.getDimensionVector()
.orElse(
null)))
.orElse(true))
.collect(Collectors.toSet());
}
results = intermediateResults;
}
if (results.size() > 1) {
results = this.findBetterMatches(results);
}
if (results.size() > 1) {
results = this.retainOnlyExactMatchesIfPresent(results);
}
if (!results.isEmpty()) {
return results;
}
}
finishedStates.clear();
}
}
return Set.of();
}
private Set retainOnlyExactMatchesIfPresent(Set results) {
Set exactMatches =
results.stream()
.filter(
u ->
this.input.equals(u.getSymbol().orElse("[no symbol]"))
|| this.input.equals(
u.getUcumCode().orElse("[no ucumCode]")))
.collect(Collectors.toSet());
if (!exactMatches.isEmpty()) {
return exactMatches;
}
return results;
}
private Stream findUnits(FactorUnits factorUnits, List parsedUnits) {
List matchingUnits =
Qudt.unitsFromFactorUnits(DerivedUnitSearchMode.ALL, factorUnits.getFactorUnits());
// System.out.println("matchingUnits: " + matchingUnits);
return matchingUnits.stream().filter(u -> containsAllParsedUnits(u, parsedUnits));
}
private boolean containsAllParsedUnits(Unit u, List parsedUnits) {
FactorUnits required =
new FactorUnits(parsedUnits.stream().map(ParsedUnit::getFactorUnit).toList());
FactorUnits toCheck = u.getFactorUnits();
boolean result = FactorUnits.ofUnit(u).equals(required) || toCheck.equals(required);
if (!result) {
// System.out.println(
// String.format(
// "unit %s has factorUnits %s, which are not the same as
// %s",
// u.getIriLocalname(), toCheck.toString(),
// required.toString()));
}
return result;
}
private Set findBetterMatches(Set results) {
Set better =
results.stream()
.filter(u -> UnitTransition.isRelaxedMatchForUnit(input, u))
.collect(Collectors.toSet());
if (better.size() > 1 && this.quantityKind != null) {
Set onlyWithExactQuantitykind =
results.stream()
.filter(u -> u.getQuantityKinds().contains(this.quantityKind))
.collect(Collectors.toSet());
if (!onlyWithExactQuantitykind.isEmpty()) {
return onlyWithExactQuantitykind;
}
}
if (!better.isEmpty()) {
return better;
}
return results;
}
@Override
public String toString() {
return "UnitParser{"
+ "input='"
+ input
+ '\''
+ ", quantityKind="
+ Qudt.NAMESPACES.quantityKind.abbreviate(quantityKind.getIri())
+ '}';
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy