All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.spokestack.spokestack.nlu.tensorflow.parsers.IntegerParser Maven / Gradle / Ivy

package io.spokestack.spokestack.nlu.tensorflow.parsers;

import io.spokestack.spokestack.nlu.tensorflow.SlotParser;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

/**
 * This parser converts string representations of integers into integers. These
 * strings may be digits or cardinal or ordinal number names. Strict hyphenation
 * of two-digit numbers is optional.
 *
 * 

* Only positive values are recognized. *

* *

* A "range" property is expected in the slot's metadata; this property * represents a half-open interval of (start, end] for the range of values the * parser should accept. Regardless of the end of the interval, the maximum * value that will be parsed is Java's {@link Integer#MAX_VALUE}; values greater * than this will result in an exception. *

*/ public final class IntegerParser implements SlotParser { private static final Map WORD_TO_NUM = new HashMap<>(); private static final Map MULTIPLIERS = new HashMap<>(); private static final Pattern DIGIT_SPLIT_RE = Pattern.compile("[-,()\\s]"); /** * Create a new integer parser. */ public IntegerParser() { initMaps(); } private void initMaps() { WORD_TO_NUM.put("oh", 0); WORD_TO_NUM.put("owe", 0); WORD_TO_NUM.put("zero", 0); WORD_TO_NUM.put("won", 1); WORD_TO_NUM.put("one", 1); WORD_TO_NUM.put("first", 1); WORD_TO_NUM.put("to", 2); WORD_TO_NUM.put("too", 2); WORD_TO_NUM.put("two", 2); WORD_TO_NUM.put("second", 2); WORD_TO_NUM.put("three", 3); WORD_TO_NUM.put("third", 3); WORD_TO_NUM.put("for", 4); WORD_TO_NUM.put("fore", 4); WORD_TO_NUM.put("four", 4); WORD_TO_NUM.put("five", 5); WORD_TO_NUM.put("fif", 5); WORD_TO_NUM.put("sicks", 6); WORD_TO_NUM.put("sics", 6); WORD_TO_NUM.put("six", 6); WORD_TO_NUM.put("seven", 7); WORD_TO_NUM.put("ate", 8); WORD_TO_NUM.put("eight", 8); WORD_TO_NUM.put("eighth", 8); WORD_TO_NUM.put("nine", 9); WORD_TO_NUM.put("ninth", 9); WORD_TO_NUM.put("tin", 10); WORD_TO_NUM.put("ten", 10); WORD_TO_NUM.put("eleven", 11); WORD_TO_NUM.put("twelve", 12); WORD_TO_NUM.put("twelf", 12); WORD_TO_NUM.put("thirteen", 13); WORD_TO_NUM.put("fourteen", 14); WORD_TO_NUM.put("fifteen", 15); WORD_TO_NUM.put("sixteen", 16); WORD_TO_NUM.put("seventeen", 17); WORD_TO_NUM.put("eighteen", 18); WORD_TO_NUM.put("nineteen", 19); WORD_TO_NUM.put("twenty", 20); WORD_TO_NUM.put("twentie", 20); WORD_TO_NUM.put("thirty", 30); WORD_TO_NUM.put("thirtie", 30); WORD_TO_NUM.put("forty", 40); WORD_TO_NUM.put("fortie", 40); WORD_TO_NUM.put("fifty", 50); WORD_TO_NUM.put("fiftie", 50); WORD_TO_NUM.put("sixty", 60); WORD_TO_NUM.put("sixtie", 60); WORD_TO_NUM.put("seventy", 70); WORD_TO_NUM.put("seventie", 70); WORD_TO_NUM.put("eighty", 80); WORD_TO_NUM.put("eightie", 80); WORD_TO_NUM.put("ninety", 90); WORD_TO_NUM.put("ninetie", 90); MULTIPLIERS.put("hundred", 100); MULTIPLIERS.put("thousand", 1000); MULTIPLIERS.put("million", 1000000); MULTIPLIERS.put("billion", 1000000000); WORD_TO_NUM.putAll(MULTIPLIERS); } @SuppressWarnings("unchecked") @Override public Object parse(Map metadata, String rawValue) { List range = null; Object rawRange = metadata.get("range"); if (rawRange != null) { range = (List) rawRange; } String normalized = rawValue.toLowerCase().trim(); List parsedInts = new ArrayList<>(); String[] tokens = DIGIT_SPLIT_RE.split(normalized); for (String token : tokens) { try { int parsed = Integer.parseInt(token); parsedInts.add(parsed); } catch (NumberFormatException nfe) { List reduced = parseReduce(token, parsedInts); if (reduced == null) { return null; } } } int result = sum(parsedInts); if (isInRange(result, range)) { return result; } return null; } private List parseReduce(String numStr, List soFar) { String toParse = numStr; if (toParse.endsWith("th")) { toParse = toParse.substring(0, toParse.length() - 2); } if (!WORD_TO_NUM.containsKey(toParse)) { // invalid number, but don't throw an error return null; } if (MULTIPLIERS.containsKey(toParse)) { List sum = collapse(MULTIPLIERS.get(toParse), soFar); soFar.clear(); soFar.addAll(sum); } else { soFar.add(WORD_TO_NUM.get(toParse)); } return soFar; } private List collapse(int multiplier, List soFar) { List collapsed = new ArrayList<>(); int sum = 0; for (Integer num : soFar) { if (num > multiplier) { collapsed.add(num); } else { sum += num; } } sum = (sum > 0) ? sum : 1; collapsed.add(sum * multiplier); return collapsed; } private Integer sum(List parsed) { int sum = 0; for (Integer num : parsed) { sum += num; } return sum; } private boolean isInRange(int val, List range) { return range == null || (val > 0 && range.get(0) < range.get(1) && val >= range.get(0) && val < range.get(1)); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy