All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nlpcraft.model.utils.NCTokenUtils Maven / Gradle / Ivy

There is a newer version: 0.8.2
Show newest version
/*
 * "Commons Clause" License, https://commonsclause.com/
 *
 * The Software is provided to you by the Licensor under the License,
 * as defined below, subject to the following condition.
 *
 * Without limiting other conditions in the License, the grant of rights
 * under the License will not include, and the License does not grant to
 * you, the right to Sell the Software.
 *
 * For purposes of the foregoing, "Sell" means practicing any or all of
 * the rights granted to you under the License to provide to third parties,
 * for a fee or other consideration (including without limitation fees for
 * hosting or consulting/support services related to the Software), a
 * product or service whose value derives, entirely or substantially, from
 * the functionality of the Software. Any license notice or attribution
 * required by the License must also include this Commons Clause License
 * Condition notice.
 *
 * Software:    NLPCraft
 * License:     Apache 2.0, https://www.apache.org/licenses/LICENSE-2.0
 * Licensor:    Copyright (C) 2018 DataLingvo, Inc. https://www.datalingvo.com
 *
 *     _   ____      ______           ______
 *    / | / / /___  / ____/________ _/ __/ /_
 *   /  |/ / / __ \/ /   / ___/ __ `/ /_/ __/
 *  / /|  / / /_/ / /___/ /  / /_/ / __/ /_
 * /_/ |_/_/ .___/\____/_/   \__,_/_/  \__/
 *        /_/
 */

package org.nlpcraft.model.utils;

import org.nlpcraft.model.*;
import org.nlpcraft.model.impl.*;
import java.sql.*;
import java.util.*;
import java.util.function.*;

/**
 * Convenient API for {@link NCToken} {@link NCToken#getMetadata() metadata}. Most of the methods in
 * this class provide static, conveniently named and typed accessors to token {@link NCToken#getMetadata() metadata}
 * properties. Note that some methods are valid only for specific IDs of {@link NCToken} - consult
 * each method's documentation for details. Note also that the methods from this class can be statically
 * imported into the scope for easier usage.
 * 

* Note that all tokens (from all providers) have a common set of metadata properties. * By default NLPCraft provides built-in tokens whose IDs start with {@code nlp:}. Additionally the following 3rd * party token providers from OpenNLP, * Stanford CoreNLP and * Google Natural Language APIs are supported: *

* * * * * * * * * * * * * * * * *
NLPCraftOpenNLPStanford CoreNLPGoogle Natural Language
* nlpcraft:geo
* nlpcraft:date
* nlpcraft:num
* nlpcraft:function
* nlpcraft:coordinate
*
* opennlp:location
* opennlp:money
* opennlp:person
* opennlp:organization
* opennlp:date
* opennlp:time
* opennlp:percentage *
* stanford:person
* stanford:location
* stanford:organization
* stanford:misc
* stanford:money
* stanford:number
* stanford:ordinal
* stanford:percent
* stanford:date
* stanford:time
* stanford:duration
* stanford:set
* stanford:url
* stanford:city
* stanford:state_or_province
* stanford:nationality
* stanford:religion
* stanford:title
* stanford:ideology
* stanford:criminal_charge
* stanford:cause_of_death *
* google:unknown
* google:person
* google:location
* google:organization
* google:event
* google:work_of_art
* google:consumer_good
* google:other
* google:phone_number
* google:address
* google:date
* google:number
* google:price *
*

* * The following table lists properties that are common across all tokens, from any token * provider: *

* * * * * * *
Common Properties
* {@link #isGeo(NCToken)}
* {@link #isDate(NCToken)}
* {@link #isNumeric(NCToken)}
* {@link #isFunction(NCToken)}
* {@link #isNlp(NCToken)}
* {@link #isContiguous(NCToken)}
* {@link #isAdjective(NCToken)}
* {@link #isAdverb(NCToken)}
* {@link #isBracketed(NCToken)}
* {@link #isEnglish(NCToken)}
* {@link #isFreeWord(NCToken)}
* {@link #isSwearWord(NCToken)}
* {@link #isSynthetic(NCToken)}
* {@link #isKnownWord(NCToken)}
* {@link #isStopWord(NCToken)}
* {@link #isQuoted(NCToken)}
* {@link #isPreposition(NCToken)}
* {@link #isNoun(NCToken)}
* {@link #isPronoun(NCToken)}
* {@link #getPosTag(NCToken)}
* {@link #getNormalizedText(NCToken)}
* {@link #getOriginalText(NCToken)}
* {@link #getTokenIndex(NCToken)}
* {@link #getCharLength(NCToken)}
* {@link #getLemma(NCToken)}
* {@link #getStem(NCToken)}
* {@link #getSparsity(NCToken)}
* {@link #getPosDescription(NCToken)}
* {@link #getUnid(NCToken)}
* {@link #getWordIndexes(NCToken)}
* {@link #getWordLength(NCToken)}
* {@link #isDirectSynonym(NCToken)} * {@link #getMinIndex(NCToken)} * {@link #getMaxIndex(NCToken)} *
*

* * The following table lists properties for {@code nlpcraft:date} tokens: *

* * * * * * *
Properties for nlpcraft:date tokens
* {@link #isDateAfter(NCToken, long)}
* {@link #isDateBefore(NCToken, long)}
* {@link #isDateIntersect(NCToken, long, long)}
* {@link #isDateWithin(NCToken, long)}
* {@link #getDateFrom(NCToken)}
* {@link #getDateTo(NCToken)}
* {@link #prepareDateSql(NCToken, String)} *
*

* * The following table lists properties for {@code nlpcraft:geo} tokens: *

* * * * * * *
Properties for nlpcraft:geo tokens
* {@link #isGeoCity(NCToken)}
* {@link #isGeoCountry(NCToken)}
* {@link #isGeoContinent(NCToken)}
* {@link #isGeoMetro(NCToken)}
* {@link #isGeoRegion(NCToken)}
* {@link #isGeoSubcontinent(NCToken)}
* {@link #getGeoCity(NCToken)}
* {@link #getGeoCountry(NCToken)}
* {@link #getGeoContinent(NCToken)}
* {@link #getGeoMetro(NCToken)}
* {@link #getGeoRegion(NCToken)}
* {@link #getGeoLatitude(NCToken)}
* {@link #getGeoLongitude(NCToken)}
* {@link #getGeoSubcontinent(NCToken)} *
* * The following table lists properties for {@code nlpcraft:num} tokens: * * * * * * * *
Properties for nlpcraft:num tokens
* {@link #isNumEqualCondition(NCToken)}
* {@link #isNumFractional(NCToken)}
* {@link #isNumFromInclusive(NCToken)}
* {@link #isNumFromNegativeInfCondition(NCToken)}
* {@link #isNumNotEqualCondition(NCToken)}
* {@link #isNumRangeCondition(NCToken)}
* {@link #isNumToPositiveInfCondition(NCToken)}
* {@link #isNumToInclusive(NCToken)}
* {@link #testNum(NCToken, int)}
* {@link #testNum(NCToken, byte)}
* {@link #testNum(NCToken, short)}
* {@link #testNum(NCToken, long)}
* {@link #testNum(NCToken, short)}
* {@link #testNum(NCToken, float, float)}
* {@link #testNum(NCToken, double, double)}
* {@link #getNumFrom(NCToken)}
* {@link #getNumTo(NCToken)}
* {@link #getNumIndex(NCToken)}
* {@link #getNumUnit(NCToken)}
* {@link #getNumUnitType(NCToken)}
* {@link #prepareNumSql(NCToken, String)} *
* * The following table lists properties for {@code nlpcraft:function} tokens: * * * * * * * *
Properties for nlpcraft:function tokens
* {@link #isLimitFun(NCToken)}
* {@link #isSortFun(NCToken)}
* {@link #isMaxFun(NCToken)}
* {@link #isMinFun(NCToken)}
* {@link #isAvgFun(NCToken)}
* {@link #isGroupFun(NCToken)}
* {@link #isCompareFun(NCToken)}
* {@link #isCorrelationFun(NCToken)}
* {@link #isAscendingFun(NCToken)}
* {@link #getFunIndexes(NCToken)}
* {@link #getFunLimit(NCToken)} *
* * The following table lists properties for {@code nlpcraft:coordinate} tokens: * * * * * * * *
Properties for nlpcraft:coordinate tokens
* {@link #getCoordinateLatitude(NCToken)}
* {@link #getCoordinateLongitude(NCToken)} *
* * The following table lists properties for any {@code stanford:} tokens: * * * * * * * *
Properties for stanford: tokens
* {@link #getStanfordConfidence(NCToken)}
* {@link #getStanfordNne(NCToken)} *
* * The following table lists properties for any {@code opennlp:} tokens: * * * * * * * *
Properties for opennlp: tokens
* {@link #getOpenNlpProbability(NCToken)} *
* * The following table lists properties for any {@code google:} tokens: * * * * * * * *
Properties for google: tokens
* {@link #getGoogleSalience(NCToken)}
* {@link #getGoogleMentionsBeginOffsets(NCToken)}
* {@link #getGoogleMentionsContents(NCToken)}
* {@link #getGoogleMentionsTypes(NCToken)}
* {@link #getGoogleMeta(NCToken)} *
* There are also methods you can use to check token's provider origin: *
    *
  • {@link #fromGoogle(NCToken)}
  • *
  • {@link #fromStanfordCoreNlp(NCToken)}
  • *
  • {@link #fromOpenNlp(NCToken)}
  • *
  • {@link #fromNlpCraft(NCToken)}
  • *
*/ public class NCTokenUtils { /** * * @param tok * @param id */ static private void checkId(NCToken tok, String id) { if (!tok.getId().equals(id)) throw new IllegalArgumentException( String.format("Token is of the wrong type [expected=%s, actual=%s]", id, tok.getId())); } /** * * @param tok * @param provider */ static private void checkProvider(NCToken tok, String provider) { if (!tok.getId().startsWith(provider)) throw new IllegalArgumentException( String.format("Token is of the wrong origin [expected=%s, actual=%s]", provider, tok.getId())); } /** * Whether or not this token is from Google Natural Language provider. * * @param tok A token. * @return Whether or not this token is from Google Natural Language provider. */ static public boolean fromGoogle(NCToken tok) { assert tok != null; return tok.getId().startsWith("google:"); } /** * Whether or not this token is from Stanford CoreNLP provider. * * @param tok A token. * @return Whether or not this token is from Stanford CoreNLP provider. */ static public boolean fromStanfordCoreNlp(NCToken tok) { assert tok != null; return tok.getId().startsWith("stanford:"); } /** * Whether or not this token is from OpenNLP provider. * * @param tok A token. * @return Whether or not this token is from OpenNLP provider. */ static public boolean fromOpenNlp(NCToken tok) { assert tok != null; return tok.getId().startsWith("opennlp:"); } /** * Whether or not this token is from built-in NLPCraft provider. * * @param tok A token. * @return Whether or not this token is from built-in NLPCraft provider. */ static public boolean fromNlpCraft(NCToken tok) { assert tok != null; return tok.getId().startsWith("nlpcraft:"); } /** * Whether or not this token has {@code nlpcraft:geo} {@link NCToken#getId() ID}. * * @param tok A token. * @return Whether or not this token has {@code nlpcraft:geo} {@link NCToken#getId() ID}. */ static public boolean isGeo(NCToken tok) { assert tok != null; return tok.getId().equals("nlpcraft:geo"); } /** * Whether or not this token has {@code nlpcraft:coordinate} {@link NCToken#getId() ID}. * * @param tok A token. * @return Whether or not this token has {@code nlpcraft:coordinate} {@link NCToken#getId() ID}. */ static public boolean isCoordinate(NCToken tok) { assert tok != null; return tok.getId().equals("nlpcraft:coordinate"); } /** * Whether or not this token has {@code nlpcraft:nlp} {@link NCToken#getId() ID}. * * @param tok A token. * @return Whether or not this token has {@code nlpcraft:nlp} {@link NCToken#getId() ID}. */ static public boolean isNlp(NCToken tok) { assert tok != null; return tok.getId().equals("nlpcraft:nlp"); } /** * Whether or not this token has {@code nlpcraft:date} {@link NCToken#getId() ID}. * * @param tok A token. * @return Whether or not this token has {@code nlpcraft:date} {@link NCToken#getId() ID}. */ static public boolean isDate(NCToken tok) { assert tok != null; return tok.getId().equals("nlpcraft:date"); } /** * Whether or not this token has {@code nlpcraft:num} {@link NCToken#getId() ID}. * * @param tok A token. * @return Whether or not this token has {@code nlpcraft:num} {@link NCToken#getId() ID}. */ static public boolean isNumeric(NCToken tok) { assert tok != null; return tok.getId().equals("nlpcraft:num"); } /** * Whether or not this token has {@code nlpcraft:function} {@link NCToken#getId() ID}. * * @param tok A token. * @return Whether or not this token has {@code nlpcraft:function} {@link NCToken#getId() ID}. */ static public boolean isFunction(NCToken tok) { assert tok != null; return tok.getId().equals("nlpcraft:function"); } /** * Prepares SQL WHERE clause adapter for JDBC prepared statement based on condition in given {@code nlpcraft:num} token. * * @param tok A token. * @param col SQL column name. * @param Type of parameters. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return SQL adapter for JDBC prepared statement. */ static public NCTokenSqlAdapter prepareNumSql(NCToken tok, String col) { assert tok != null; checkId(tok, "nlpcraft:num"); boolean isFractional = isNumFractional(tok); Function get = (d) -> { Number n; if (isFractional) n = d; else { Long l = d.longValue(); if (Byte.MAX_VALUE >= l && Byte.MIN_VALUE <= l) n = l.byteValue(); else if (Short.MAX_VALUE >= l && Short.MIN_VALUE <= l) n = l.shortValue(); else if (Integer.MAX_VALUE >= l && Integer.MIN_VALUE <= l) n = l.intValue(); else n = l; } return (T)n; }; Supplier moreThanFrom = () -> isNumFromInclusive(tok) ? col + " >= ?" : col + " > ?"; Supplier lessThanTo = () -> isNumToInclusive(tok) ? col + " <= ?" : col + " < ?"; if (isNumEqualCondition(tok)) return new NCSqlAdapterImpl<>(col + " = ?", get.apply(getNumFrom(tok))); else if (isNumNotEqualCondition(tok)) return new NCSqlAdapterImpl<>(col + " <> ?", get.apply(getNumFrom(tok))); else if (isNumFromNegativeInfCondition(tok)) return new NCSqlAdapterImpl<>(lessThanTo.get(), get.apply(getNumFrom(tok))); else if (isNumToPositiveInfCondition(tok)) return new NCSqlAdapterImpl<>(moreThanFrom.get(), get.apply(getNumFrom(tok))); else return new NCSqlAdapterImpl<>( moreThanFrom.get() + " AND " + lessThanTo.get(), get.apply(getNumFrom(tok)), get.apply(getNumTo(tok)) ); } /** * Compares given value against the numeric condition in given {@code nlpcraft:num} token. * * @param tok A token. * @param delta Precision delta for comparison. * @param v Value to compare with. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return {@code True} if the value satisfies condition in given {@code nlpcraft:num} token. */ static public boolean testNum(NCToken tok, double v, double delta) { return testCondition(tok, v, delta); } /** * Compares given value against the numeric condition in given {@code nlpcraft:num} token. * * @param tok A token. * @param delta Precision delta for comparison. * @param v Value to compare with. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return {@code True} if the value satisfies condition in given {@code nlpcraft:num} token. */ static public boolean testNum(NCToken tok, float v, float delta) { return testCondition(tok, v, delta); } /** * Compares given value against the numeric condition in given {@code nlpcraft:num} token. * * @param tok A token. * @param v Value to compare with. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return {@code True} if the value satisfies condition in given {@code nlpcraft:num} token. */ static public boolean testNum(NCToken tok, long v) { return testCondition(tok, v); } /** * Compares given value against the numeric condition in given {@code nlpcraft:num} token. * * @param tok A token. * @param v Value to compare with. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return {@code True} if the value satisfies condition in given {@code nlpcraft:num} token. */ static public boolean testNum(NCToken tok, int v) { return testCondition(tok, v); } /** * Compares given value against the numeric condition in given {@code nlpcraft:num} token. * * @param tok A token. * @param v Value to compare with. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return {@code True} if the value satisfies condition in given {@code nlpcraft:num} token. */ static public boolean testNum(NCToken tok, byte v) { return testCondition(tok, v); } /** * Compares given value against the numeric condition in given {@code nlpcraft:num} token. * * @param tok A token. * @param v Value to compare with. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return {@code True} if the value satisfies condition in given {@code nlpcraft:num} token. */ static public boolean testNum(NCToken tok, short v) { return testCondition(tok, v); } /** * * @param tok * @param v * @return */ static private boolean testCondition(NCToken tok, long v) { assert tok != null; checkId(tok, "nlpcraft:num"); if (isNumEqualCondition(tok)) return (long)getNumFrom(tok) == v; else if (isNumNotEqualCondition(tok)) return (long)getNumFrom(tok) != v; return handleRange(tok, () -> isNumFromInclusive(tok) ? v >= getNumFrom(tok) : v > getNumFrom(tok), () -> isNumToInclusive(tok) ? v <= getNumTo(tok) : v < getNumTo(tok) ); } /** * * @param tok * @param v * @param delta * @return */ private static boolean testCondition(NCToken tok, double v, double delta) { assert tok != null; checkId(tok, "nlpcraft:num"); if (isNumEqualCondition(tok)) return Math.abs(getNumFrom(tok) - v) <= delta; else if (isNumNotEqualCondition(tok)) return Math.abs(getNumFrom(tok) - v) > delta; return handleRange(tok, () -> isNumFromInclusive(tok) ? v > getNumFrom(tok) || Math.abs(getNumFrom(tok) - v) <= delta : v > getNumFrom(tok), () -> isNumToInclusive(tok) ? v < getNumTo(tok) || Math.abs(getNumTo(tok) - v) <= delta : v < getNumTo(tok) ); } static private boolean handleRange(NCToken tok, Supplier moreThanFrom, Supplier lessThanTo) { if (isNumFromNegativeInfCondition(tok)) return lessThanTo.get(); else if (isNumToPositiveInfCondition(tok)) return moreThanFrom.get(); else return moreThanFrom.get() && lessThanTo.get(); } /** * Gets Google Natural Language salience for given {@code google:} token. *

* Corresponds to {@code SALIENCE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token is not from Google Natural Language provider. * @return Google Natural Language salience value for given token. */ static public double getGoogleSalience(NCToken tok) { assert tok != null; checkProvider(tok, "google:"); return tok.getMetadata().getDouble("SALIENCE"); } /** * Gets Google Natural Language meta data for given {@code google:} token. *

* Corresponds to {@code META} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token is not from Google Natural Language provider. * @return Google Natural Language meta data value for given token. */ static public Map getGoogleMeta(NCToken tok) { assert tok != null; checkProvider(tok, "google:"); return (Map)tok.getMetadata().get("META"); } /** * Gets Google Natural Language mentions contents for given {@code google:} token. *

* Corresponds to {@code MENTIONSCONTENTS} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token is not from Google Natural Language provider. * @return Google Natural Language mentions contents for given token. */ static public List getGoogleMentionsContents(NCToken tok) { assert tok != null; checkProvider(tok, "google:"); return (List)tok.getMetadata().get("MENTIONSCONTENTS"); } /** * Gets Google Natural Language mentions types for given {@code google:} token. *

* Corresponds to {@code MENTIONSTYPES} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token is not from Google Natural Language provider. * @return Google Natural Language mentions types for given token. */ static public List getGoogleMentionsTypes(NCToken tok) { assert tok != null; checkProvider(tok, "google:"); return (List)tok.getMetadata().get("MENTIONSTYPES"); } /** * Gets Google Natural Language mentions begin offsets for given {@code google:} token. *

* Corresponds to {@code MENTIONSBEGINOFFSETS} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token is not from Google Natural Language provider. * @return Google Natural Language mentions begin offsets for given token. */ static public List getGoogleMentionsBeginOffsets(NCToken tok) { assert tok != null; checkProvider(tok, "google:"); return (List)tok.getMetadata().get("MENTIONSBEGINOFFSETS"); } /** * Gets OpenNLP probability value for given {@code opennlp:} token. *

* Corresponds to {@code PROBABILITY} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token is not from OpenNLP provider. * @return OpenNLP probability value for given token. */ static public double getOpenNlpProbability(NCToken tok) { assert tok != null; checkProvider(tok, "opennlp:"); return tok.getMetadata().getDouble("PROBABILITY"); } /** * Gets Stanford CoreNLP confidence value for given {@code stanford:} token. *

* Corresponds to {@code CONFIDENCE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token is not from Stanford CoreNLP provider. * @return Stanford CoreNLP confidence value for given token. */ static public double getStanfordConfidence(NCToken tok) { assert tok != null; checkProvider(tok, "stanford:"); return tok.getMetadata().getDouble("CONFIDENCE"); } /** * Gets optional Stanford CoreNLP Normalized Named Entity (NNE) value for given {@code stanford:} token. *

* Corresponds to {@code NNE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token is not from Stanford CoreNLP provider. * @return Optional NNE value or {@code null}. */ static public String getStanfordNne(NCToken tok) { assert tok != null; checkProvider(tok, "stanford:"); return tok.getMetadata().getString("NNE"); } /** * Whether given {@code nlpcraft:num} token represents a equality condition or single numeric value. *

* Corresponds to {@code NUM_ISEQUALCONDITION} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Whether given {@code nlpcraft:num} token represents a equality condition. */ static public boolean isNumEqualCondition(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getBoolean("NUM_ISEQUALCONDITION"); } /** * Whether given {@code nlpcraft:num} token represents a not-equality condition. *

* Corresponds to {@code NUM_ISNOTEQUALCONDITION} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Whether given {@code nlpcraft:num} token represents a not-equality condition. */ static public boolean isNumNotEqualCondition(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getBoolean("NUM_ISNOTEQUALCONDITION"); } /** * Whether given {@code nlpcraft:num} token represents a range to negative infinity. *

* Corresponds to {@code NUM_ISFROMNEGATIVEINFINITY} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Whether given {@code nlpcraft:num} token represents a range to negative infinity. */ static public boolean isNumFromNegativeInfCondition(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getBoolean("NUM_ISFROMNEGATIVEINFINITY"); } /** * Whether given {@code nlpcraft:num} token represents a range to positive infinity. *

* Corresponds to {@code NUM_ISTOPOSITIVEINFINITY} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Whether given {@code nlpcraft:num} token represents a range to positive infinity. */ static public boolean isNumToPositiveInfCondition(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getBoolean("NUM_ISTOPOSITIVEINFINITY"); } /** * Whether given {@code nlpcraft:num} token represents a range condition. *

* Corresponds to {@code NUM_ISRANGECONDITION} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Whether or not end of the numeric range is inclusive for given {@code nlpcraft:num} token. */ static public boolean isNumRangeCondition(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getBoolean("NUM_ISRANGECONDITION"); } /** * Whether or not end of the numeric range is inclusive for given {@code nlpcraft:num} token. *

* Corresponds to {@code NUM_TOINCL} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Whether or not end of the numeric range is inclusive for given {@code nlpcraft:num} token. */ static public boolean isNumToInclusive(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getBoolean("NUM_TOINCL"); } /** * Whether or not start of the numeric range is inclusive for given {@code nlpcraft:num} token. *

* Corresponds to {@code NUM_FROMINCL} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Whether or not start of the numeric range is inclusive for given {@code nlpcraft:num} token. */ static public boolean isNumFromInclusive(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getBoolean("NUM_FROMINCL"); } /** * Gets the start of the numeric range that satisfies the condition of given {@code nlpcraft:num} token. * Note that this method and {@link #getNumTo(NCToken)} can return the same value in * which case given {@code nlpcraft:num} token represents a single value and {@link #isNumEqualCondition(NCToken)} * will return {@code true}. *

* Corresponds to {@code NUM_FROM} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Start of the numeric range that satisfies the condition of given {@code nlpcraft:num} token. */ static public double getNumFrom(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getDouble("NUM_FROM"); } /** * Gets the end of numeric range that satisfies the condition of given {@code nlpcraft:num} token. * Note that {@link #getNumFrom(NCToken)} and this method can return the same value in * which case given {@code nlpcraft:num} token represents a single value and {@link #isNumEqualCondition(NCToken)} * will return {@code true}. *

* Corresponds to {@code NUM_TO} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return End of numeric range that satisfies the condition of given {@code nlpcraft:num} token. */ static public double getNumTo(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getDouble("NUM_TO"); } /** * Whether this token's value (single numeric value of a range) is a whole or a fractional number * for given {@code nlpcraft:num} token. *

* Corresponds to {@code NUM_ISFRACTIONAL} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Whether this {@code nlpcraft:num} token's value is a whole or a fractional number. */ static public boolean isNumFractional(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getBoolean("NUM_ISFRACTIONAL"); } /** * Gets optional unit for this {@code nlpcraft:num} token, e.g. "mm", "cm", "ft". *

* Corresponds to {@code NUM_UNIT} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Optional unit for this {@code nlpcraft:num} token or {@code null}. */ static public String getNumUnit(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getString("NUM_UNIT"); } /** * Gets optional unit type for this {@code nlpcraft:num} token, e.g. "length", "force", "mass". *

* Corresponds to {@code NUM_UNITTYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Optional unit type for this {@code nlpcraft:num} token or {@code null}. */ static public String getNumUnitType(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return tok.getMetadata().getString("NUM_UNITTYPE"); } /** * Gets optional index of another token in the sentence that this {@code nlpcraft:num} token is referring to. * If index could not be determined this token refers to a free word or a stopword. *

* Corresponds to {@code NUM_INDEX} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:num} ID. * @return Index of reference token for this {@code nlpcraft:num} token. */ static public Optional getNumIndex(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:num"); return Optional.ofNullable((Integer)tok.getMetadata().get("NUM_INDEX")); } /** * Gets start timestamp of the date range {@code nlpcraft:date} token. *

* Corresponds to {@code DATE_FROM} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:date} ID. * @return Start timestamp of the date range {@code nlpcraft:date} token. */ static public long getDateFrom(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:date"); return tok.getMetadata().getLong("DATE_FROM"); } /** * Gets end timestamp of the date range {@code nlpcraft:date} token. *

* Corresponds to {@code DATE_TO} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:date} ID. * @return End timestamp of the date range {@code nlpcraft:date} token. */ static public long getDateTo(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:date"); return tok.getMetadata().getLong("DATE_TO"); } /** * Tests if given timestamp is before the date range for given {@code nlpcraft:date} token. * * @param tok A token. * @param tstamp Timestamp to test. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:date} ID. * @return {@code True} if given timestamp is before the date range for given token, {@code false} otherwise. */ static public boolean isDateBefore(NCToken tok, long tstamp) { assert tok != null; checkId(tok, "nlpcraft:date"); return tstamp < getDateFrom(tok); } /** * Tests if given timestamp is after the date range for given {@code nlpcraft:date} token. * * @param tok A token. * @param tstamp Timestamp to test. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:date} ID. * @return {@code True} if given timestamp is after the date range for given token, {@code false} otherwise. */ static public boolean isDateAfter(NCToken tok, long tstamp) { assert tok != null; checkId(tok, "nlpcraft:date"); return tstamp > getDateTo(tok); } /** * Tests if given timestamp is within (inclusively) the date range for given {@code nlpcraft:date} token. * * @param tok A token. * @param tstamp Timestamp to test. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:date} ID. * @return {@code True} if given timestamp is within the date range for given token, {@code false} otherwise. */ static public boolean isDateWithin(NCToken tok, long tstamp) { assert tok != null; checkId(tok, "nlpcraft:date"); return tstamp >= getDateFrom(tok) && tstamp < getDateTo(tok); } /** * Tests if given from and to timestamps intersect (inclusively) with the date range for * given {@code nlpcraft:date} token. * * @param tok A token. * @param from From timestamp to check. * @param to To timestamp to check. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:date} ID. * @return {@code True} if given timestamp intersects the date range for given token, {@code false} otherwise. */ static public boolean isDateIntersect(NCToken tok, long from, long to) { assert tok != null; checkId(tok, "nlpcraft:date"); return from < getDateTo(tok) && to >= getDateFrom(tok); } /** * Creates SQL WHERE clause adapter for given token and SQL column name that can be conveniently * used in JDBC prepared statements. * * @param tok A token. * @param col Column name. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:date} ID. * @return SQL adapter. */ static public NCTokenSqlAdapter prepareDateSql(NCToken tok, String col) { assert tok != null; checkId(tok, "nlpcraft:date"); String clause = String.format("%s BETWEEN ? AND ?", col); List params = Arrays.asList(new Timestamp(getDateFrom(tok)), new Timestamp(getDateTo(tok))); return new NCTokenSqlAdapter() { @Override public String getClause() { return clause; } @Override public List getClauseParameters() { return params; } }; } /** * Gets subcontinent for given {@code nlpcraft:geo} token. *

* Corresponds to {@code GEO_SUBCONTINENT} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return Subcontinent. */ static public String getGeoSubcontinent(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_SUBCONTINENT"); } /** * Gets continent for given {@code nlpcraft:geo} token. *

* Corresponds to {@code GEO_CONTINENT} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return Continent. */ static public String getGeoContinent(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_CONTINENT"); } /** * Gets country for given {@code nlpcraft:geo} token. *

* Corresponds to {@code GEO_COUNTRY} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return Country. */ static public String getGeoCountry(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_COUNTRY"); } /** * Gets metro area code for given {@code nlpcraft:geo} token. *

* Corresponds to {@code GEO_METRO} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return Metro area code. */ static public String getGeoMetro(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_METRO"); } /** * Gets optional latitude of the given {@code nlpcraft:geo} token. *

* Corresponds to {@code GEO_LATITUDE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return Optional latitude of the given {@code nlpcraft:geo} token. */ static public Optional getGeoLatitude(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return Optional.ofNullable((Double)tok.getMetadata().get("GEO_LATITUDE")); } /** * Gets optional longitude of the given {@code nlpcraft:geo} token. *

* Corresponds to {@code GEO_LONGITUDE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return Optional longitude of the given {@code nlpcraft:geo} token. */ static public Optional getGeoLongitude(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return Optional.ofNullable((Double)tok.getMetadata().get("GEO_LONGITUDE")); } /** * Gets region for given {@code nlpcraft:geo} token. *

* Corresponds to {@code GEO_REGION} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return Region. */ static public String getGeoRegion(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_REGION"); } /** * Gets city for given {@code nlpcraft:geo} token. *

* Corresponds to {@code GEO_CITY} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return City name. */ static public String getGeoCity(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_CITY"); } /** * Tests if given {@code nlpcraft:geo} token represents a {@code subcontinent}. *

* Corresponds to {@code GEO_KIND} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return {@code True} if given token represents a {@code subcontinent}. * */ static public boolean isGeoSubcontinent(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_KIND").equals("SUBCONTINENT"); } /** * Tests if given {@code nlpcraft:geo} token represents a {@code continent}. *

* Corresponds to {@code GEO_KIND} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return {@code True} if given token represents a {@code continent}. * */ static public boolean isGeoContinent(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_KIND").equals("CONTINENT"); } /** * Tests if given {@code nlpcraft:geo} token represents a {@code country}. *

* Corresponds to {@code GEO_KIND} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return {@code True} if given token represents a {@code country}. * */ static public boolean isGeoCountry(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_KIND").equals("COUNTRY"); } /** * Tests if given {@code nlpcraft:geo} token represents a {@code city}. *

* Corresponds to {@code GEO_KIND} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return {@code True} if given token represents a {@code city}. * */ static public boolean isGeoCity(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_KIND").equals("CITY"); } /** * Tests if given {@code nlpcraft:geo} token represents a {@code metro}. *

* Corresponds to {@code GEO_KIND} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return {@code True} if given token represents a {@code metro}. * */ static public boolean isGeoMetro(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_KIND").equals("METRO"); } /** * Tests if given {@code nlpcraft:geo} token represents a {@code region}. *

* Corresponds to {@code GEO_KIND} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:geo} ID. * @return {@code True} if given token represents a {@code region}. * */ static public boolean isGeoRegion(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:geo"); return tok.getMetadata().getString("GEO_KIND").equals("REGION"); } /** * Tests if given {@code nlpcraft:function} token represents {@code SUM} function of some elements. *

* Corresponds to {@code FUNCTION_TYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return {@code True} if given token represents {@code SUM} function of some elements. */ static public boolean isSumFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return tok.getMetadata().getString("FUNCTION_TYPE").equals("SUM"); } /** * Tests if given {@code nlpcraft:function} token represents {@code MAX} function of some elements. *

* Corresponds to {@code FUNCTION_TYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return {@code True} if given token represents {@code MAX} function of some elements. */ static public boolean isMaxFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return tok.getMetadata().getString("FUNCTION_TYPE").equals("MAX"); } /** * Tests if given {@code nlpcraft:function} token represents {@code MIN} function of some elements. *

* Corresponds to {@code FUNCTION_TYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return {@code True} if given token represents {@code MIN} function of some elements. */ static public boolean isMinFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return tok.getMetadata().getString("FUNCTION_TYPE").equals("MIN"); } /** * Tests if given {@code nlpcraft:function} token represents {@code AVG} function of some elements. *

* Corresponds to {@code FUNCTION_TYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return {@code True} if given token represents {@code AVG} function of some elements. */ static public boolean isAvgFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return tok.getMetadata().getString("FUNCTION_TYPE").equals("AVG"); } /** * Tests if given {@code nlpcraft:function} token represents {@code SORT} function of some elements. *

* Corresponds to {@code FUNCTION_TYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return {@code True} if given token represents {@code SORT} function of some elements. */ static public boolean isSortFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return tok.getMetadata().getString("FUNCTION_TYPE").equals("SORT"); } /** * Tests if given {@code nlpcraft:function} token represents {@code LIMIT} function of some elements. *

* Corresponds to {@code FUNCTION_TYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return {@code True} if given token represents {@code LIMIT} function of some elements. */ static public boolean isLimitFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return tok.getMetadata().getString("FUNCTION_TYPE").equals("LIMIT"); } /** * Tests if given {@code nlpcraft:function} token represents {@code GROUP} function of some elements. *

* Corresponds to {@code FUNCTION_TYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return {@code True} if given token represents {@code GROUP} function of some elements. */ static public boolean isGroupFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return tok.getMetadata().getString("FUNCTION_TYPE").equals("GROUP"); } /** * Tests if given {@code nlpcraft:function} token represents {@code CORRELATION} function of some elements. *

* Corresponds to {@code FUNCTION_TYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return {@code True} if given token represents {@code CORRELATION} function of some elements. */ static public boolean isCorrelationFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return tok.getMetadata().getString("FUNCTION_TYPE").equals("CORRELATION"); } /** * Tests if given {@code nlpcraft:function} token represents {@code COMPARE} function of some elements. *

* Corresponds to {@code FUNCTION_TYPE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return {@code True} if given token represents {@code COMPARE} function of some elements. */ static public boolean isCompareFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return tok.getMetadata().getString("FUNCTION_TYPE").equals("COMPARE"); } /** * Gets limit value if given {@code nlpcraft:function} token represents {@code LIMIT} function. *

* Corresponds to {@code FUNCTION_LIMIT} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID or * it isn't of {@code LIMIT} type. * @return limit value if given {@code nlpcraft:function} token represents {@code LIMIT} function. * @see #isLimitFun(NCToken) */ static public double getFunLimit(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); if (!isLimitFun(tok)) throw new IllegalArgumentException("'nlpcraft:function' token is not of 'LIMIT' type: " + tok); return tok.getMetadata().getDouble("FUNCTION_LIMIT"); } /** * Optional value of whether this limit or sort {@code nlpcraft:function} token is ascending or descending. *

* Corresponds to {@code FUNCTION_ASC} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID or * it isn't of {@code LIMIT} or {@code SORT} type. * @return Limit or sort function direction flag. */ static public Optional isAscendingFun(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); if (!isLimitFun(tok) && !isSortFun(tok)) throw new IllegalArgumentException("'nlpcraft:function' token is not of 'LIMIT' or 'SORT' type: " + tok); return Optional.ofNullable((Boolean)tok.getMetadata().get("FUNCTION_ASC")); } /** * Gets indexes of the element(s) given @code nlpcraft:function} token is referencing. Returns potentially empty list. *

* Corresponds to {@code FUNCTION_INDEXES} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:function} ID. * @return Indexes of the element(s) given token is referencing. */ static public List getFunIndexes(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:function"); return (List)tok.getMetadata().get("FUNCTION_INDEXES"); } /** * Tests whether or not given token represents a free word. A free word is a token that was detected * neither as a user defined token nor as one of the semantic system tokens, i.e. it has * token ID {@code nlpcraft:nlp}. *

* Corresponds to {@code NLP_FREEWORD} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Free word flag. */ static public boolean isFreeWord(NCToken tok) { assert tok != null; return tok.getMetadata().getBoolean("NLP_FREEWORD"); } /** * Gets internal globally unique system ID of the given token. *

* Corresponds to {@code NLP_UNID} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Globally unique system ID of the token. */ static public String getUnid(NCToken tok) { assert tok != null; return tok.getMetadata().getString("NLP_UNID"); } /** * Tests whether or not the given token is a stopword. Stopwords are some extremely common words which * add little value in helping understanding user input and are excluded from the processing * entirely. For example, words like {@code a, the, can, of, about, over}, etc. are typical * stopwords in English. NLPCraft has built-in set of stopwords while user models can * specify additional stopwords. *

* Corresponds to {@code NLP_STOPWORD} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Stopword flag. */ static public boolean isStopWord(NCToken tok) { assert tok != null; return tok.getMetadata().getBoolean("NLP_STOPWORD"); } /** * Tests whether or not the given token is a swear word. NLPCraft has built-in list of common English swear words. *

* Corresponds to {@code NLP_SWEAR} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Swear word flag. */ static public boolean isSwearWord(NCToken tok) { assert tok != null; return tok.getMetadata().getBoolean("NLP_SWEAR"); } /** * Gets numeric value of how sparse the given token is. Sparsity zero means that all individual words in the token * follow each other. *

* Corresponds to {@code NLP_SPARSITY} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Sparsity value of this token. */ static public int getSparsity(NCToken tok) { assert tok != null; return tok.getMetadata().getInteger("NLP_SPARSITY"); } /** * Gets index of the first word in the given token. Note that token may not be contiguous. *

* Corresponds to {@code NLP_MININDEX} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Index of the first word in this token. */ static public int getMinIndex(NCToken tok) { assert tok != null; return tok.getMetadata().getInteger("NLP_MININDEX"); } /** * Whether or not this token was matched on direct (nor permutated) synonym. *

* Corresponds to {@code NLP_DIRECT} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Whether or not this token was matched on direct (nor permutated) synonym. */ static public boolean isDirectSynonym(NCToken tok) { assert tok != null; return tok.getMetadata().getBoolean("NLP_DIRECT"); } /** * Gets index of the last word in the given token. Note that token may not be contiguous. *

* Corresponds to {@code NLP_MAXINDEX} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Index of the last word in this token. */ static public int getMaxIndex(NCToken tok) { assert tok != null; return tok.getMetadata().getInteger("NLP_MAXINDEX"); } /** * Gets number of individual words in the given token. *

* Corresponds to {@code NLP_WORDLENGTH} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Number of individual words in this token. */ static public int getWordLength(NCToken tok) { assert tok != null; return tok.getMetadata().getInteger("NLP_WORDLENGTH"); } /** * Tests whether or not the given token has zero sparsity. *

* Corresponds to {@code NLP_CONTIGUOUS} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Contiguous flag. */ static public boolean isContiguous(NCToken tok) { assert tok != null; return tok.getMetadata().getBoolean("NLP_CONTIGUOUS"); } /** * Tests whether the given token represents an English word. Note that this only checks that token's text * consists of characters of English alphabet, i.e. the text doesn't have to be necessary * a known valid English word. *

* Corresponds to {@code NLP_ENGLISH} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Whether this token represents an English word. */ static public boolean isEnglish(NCToken tok) { assert tok != null; return tok.getMetadata().getBoolean("NLP_ENGLISH"); } /** * Gets list of word indexes in the given token. Always has at least one element in it. *

* Corresponds to {@code NLP_WORDINDEXES} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return List of word indexes in this token. */ static public List getWordIndexes(NCToken tok) { assert tok != null; return (List)tok.getMetadata().get("NLP_WORDINDEXES"); } /** * Tests whether given token's POS tag is a synthetic one (for multiword token). * * @param tok A token. * @return {@code True} if POS tag is a synthetic one. */ static public boolean isSynthetic(NCToken tok) { return getPosTag(tok).equals("---"); } /** * Tests whether POS tag for given token is one of {@code NN}, {@code NNS}, {@code NNP}, or {@code NNPS}. * * @param tok A token. * @return {@code True} if POS tag is one of {@code NN}, {@code NNS}, {@code NNP}, or {@code NNPS}. */ static public boolean isNoun(NCToken tok) { String pos = getPosTag(tok); return pos.equals("NN") || pos.equals("NNS") || pos.equals("NNP") || pos.equals("NNPS"); } /** * Tests whether POS tag for given token is one of {@code PRP} or {@code PRP$}. * * @param tok A token. * @return {@code True} if POS tag is one of {@code PRP} or {@code PRP$}. */ static public boolean isPronoun(NCToken tok) { String pos = getPosTag(tok); return pos.equals("PRP") || pos.equals("PRP$"); } /** * Tests whether POS tag for given token is one of {@code JJ}, {@code JJR}, or {@code JJS}. * * @param tok A token. * @return {@code True} if POS tag is one of {@code JJ}, {@code JJR}, or {@code JJS}. */ static public boolean isAdjective(NCToken tok) { String pos = getPosTag(tok); return pos.equals("JJ") || pos.equals("JJR") || pos.equals("JJS"); } /** * Tests whether POS tag for given token is one of {@code VB}, {@code VBD}, {@code VBG}, * {@code VBN}, {@code VBP}, or {@code VBZ}. * * @param tok A token. * @return {@code True} if POS tag is one of {@code VB}, {@code VBD}, {@code VBG}, * {@code VBN}, {@code VBP}, or {@code VBZ}. */ static public boolean isVerb(NCToken tok) { String pos = getPosTag(tok); return pos.equals("VB") || pos.equals("VBD") || pos.equals("VBG") || pos.equals("VBN") || pos.equals("VBP") || pos.equals("VBZ"); } /** * Tests whether POS tag for given token is one of {@code RB}, {@code RBR}, {@code RBS} or {@code WRB}. * * @param tok A token. * @return {@code True} if POS tag is one of {@code RB}, {@code RBR}, {@code RBS} or {@code WRB}. */ static public boolean isAdverb(NCToken tok) { String pos = getPosTag(tok); return pos.equals("RB") || pos.equals("RBR") || pos.equals("RBS") || pos.equals("WRB"); } /** * Tests whether POS tag for given token is {@code IN}. * * @param tok A token. * @return {@code True} if POS tag is {@code IN}. */ static public boolean isPreposition(NCToken tok) { String pos = getPosTag(tok); return pos.equals("IN"); } /** * Tests whether POS tag for given token is one of {@code DT}, {@code PDT}, or {@code WDT}. * * @param tok A token. * @return {@code True} if POS tag is one of {@code DT}, {@code PDT}, or {@code WDT}. */ static public boolean isDeterminer(NCToken tok) { String pos = getPosTag(tok); return pos.equals("DT") || pos.equals("PDT") || pos.equals("WDT"); } /** * Tests whether or not this token is surrounded by any of {@code '[', ']', '{', '}', '(', ')'} brackets. *

* Corresponds to {@code NLP_BRACKETED} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Bracketing flag. */ static public boolean isBracketed(NCToken tok) { assert tok != null; return tok.getMetadata().getBoolean("NLP_BRACKETED"); } /** * Gets description of Penn Treebank POS tag. * Learn more at http://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html *

* Corresponds to {@code NLP_POSDESC} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Description of Penn Treebank POS tag. */ static public String getPosDescription(NCToken tok) { assert tok != null; return tok.getMetadata().getString("NLP_POSDESC"); } /** * Tests whether or not this token is found in Princeton WordNet database. *

* Corresponds to {@code NLP_DICT} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Princeton WordNet database inclusion flag. */ static public boolean isKnownWord(NCToken tok) { assert tok != null; return tok.getMetadata().getBoolean("NLP_DICT"); } /** * Gets index of the given token in the sentence. *

* Corresponds to {@code NLP_INDEX} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Index of the given token in the sentence. */ static public int getTokenIndex(NCToken tok) { assert tok != null; return tok.getMetadata().getInteger("NLP_INDEX"); } /** * Gets lemma of the given token. Lemma is a canonical form of this token. Note that stemming and lemmatization * allow to reduce inflectional forms and sometimes derivationally related forms of a word to a * common base form. Lemmatization refers to the use of a vocabulary and morphological analysis * of words, normally aiming to remove inflectional endings only and to return the base or dictionary * form of a word, which is known as the lemma. Learn * more at https://nlp.stanford.edu/IR-book/html/htmledition/stemming-and-lemmatization-1.html *

* Corresponds to {@code NLP_LEMMA} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Given token lemma. */ static public String getLemma(NCToken tok) { assert tok != null; return tok.getMetadata().getString("NLP_LEMMA"); } /** * Gets stem of the given token. Note that stemming and lemmatization allow to reduce inflectional forms * and sometimes derivationally related forms of a word to a common base form. Unlike lemma, * stemming is a basic heuristic process that chops off the ends of words in the hope of achieving * this goal correctly most of the time, and often includes the removal of derivational affixes. * Learn more at https://nlp.stanford.edu/IR-book/html/htmledition/stemming-and-lemmatization-1.html *

* Corresponds to {@code NLP_STEM} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Given token stem. */ static public String getStem(NCToken tok) { assert tok != null; return tok.getMetadata().getString("NLP_STEM"); } /** * Gets character length of the given token. *

* Corresponds to {@code NLP_CHARLENGTH} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Character length of the given token. */ static int getCharLength(NCToken tok) { assert tok != null; return tok.getMetadata().getInteger("NLP_CHARLENGTH"); } /** * Gets original user input text for given token. *

* Corresponds to {@code NLP_ORIGTEXT} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Original token text. */ static public String getOriginalText(NCToken tok) { assert tok != null; return tok.getMetadata().getString("NLP_ORIGTEXT"); } /** * Gets normalized user input text for given token. *

* Corresponds to {@code NLP_NORMTEXT} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Normalized token text. */ static public String getNormalizedText(NCToken tok) { assert tok != null; return tok.getMetadata().getString("NLP_NORMTEXT"); } /** * Gets Penn Treebank POS tag for given token. Note that additionally to standard Penn Treebank POS * tags NLPCraft introduced {@code '---'} synthetic tag to indicate a POS tag for multiword tokens. * Learn more at http://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html *

* Corresponds to {@code NLP_POS} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Penn Treebank POS tag for given token. */ static public String getPosTag(NCToken tok) { assert tok != null; return tok.getMetadata().getString("NLP_POS"); } /** * Tests whether or not given token is surrounded by single or double quotes. *

* Corresponds to {@code NLP_QUOTED} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @return Quoted flag. */ static public boolean isQuoted(NCToken tok) { assert tok != null; return tok.getMetadata().getBoolean("NLP_QUOTED"); } /** * Gets coordinate latitude for this {@code nlpcraft:coordinate} token. *

* Corresponds to {@code COORDINATE_LATITUDE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:coordinate} ID. * @return Coordinate latitude for this {@code nlpcraft:coordinate} token. */ static public Double getCoordinateLatitude(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:coordinate"); return tok.getMetadata().getDouble("COORDINATE_LATITUDE"); } /** * Gets coordinate longitude for this {@code nlpcraft:coordinate} token. *

* Corresponds to {@code COORDINATE_LONGITUDE} token {@link NCToken#getMetadata() metadata} property. * * @param tok A token. * @throws IllegalArgumentException Thrown if given token doesn't have {@code nlpcraft:coordinate} ID. * @return Coordinate longitude for this {@code nlpcraft:coordinate} token. */ static public Double getCoordinateLongitude(NCToken tok) { assert tok != null; checkId(tok, "nlpcraft:coordinate"); return tok.getMetadata().getDouble("COORDINATE_LONGITUDE"); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy