![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.asterix.optimizer.base.FuzzyUtils Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.optimizer.base;
import java.util.ArrayList;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.asterix.aql.util.FunctionUtils;
import org.apache.asterix.metadata.declared.AqlMetadataProvider;
import org.apache.asterix.om.base.AFloat;
import org.apache.asterix.om.base.AInt32;
import org.apache.asterix.om.base.IAObject;
import org.apache.asterix.om.functions.AsterixBuiltinFunctions;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
public class FuzzyUtils {
private final static String DEFAULT_SIM_FUNCTION = "jaccard";
private final static float JACCARD_DEFAULT_SIM_THRESHOLD = .8f;
private final static int EDIT_DISTANCE_DEFAULT_SIM_THRESHOLD = 1;
private final static String SIM_FUNCTION_PROP_NAME = "simfunction";
private final static String SIM_THRESHOLD_PROP_NAME = "simthreshold";
public final static String JACCARD_FUNCTION_NAME = "jaccard";
public final static String EDIT_DISTANCE_FUNCTION_NAME = "edit-distance";
public static FunctionIdentifier getTokenizer(ATypeTag inputTag) {
switch (inputTag) {
case STRING:
return AsterixBuiltinFunctions.COUNTHASHED_WORD_TOKENS;
case UNORDEREDLIST:
case ORDEREDLIST:
case ANY:
return null;
default:
throw new NotImplementedException("No tokenizer for type " + inputTag);
}
}
public static IAObject getSimThreshold(AqlMetadataProvider metadata, String simFuncName) {
String simThresholValue = metadata.getPropertyValue(SIM_THRESHOLD_PROP_NAME);
IAObject ret = null;
if (simFuncName.equals(JACCARD_FUNCTION_NAME)) {
if (simThresholValue != null) {
float jaccThresh = Float.parseFloat(simThresholValue);
ret = new AFloat(jaccThresh);
} else {
ret = new AFloat(JACCARD_DEFAULT_SIM_THRESHOLD);
}
} else if (simFuncName.equals(EDIT_DISTANCE_FUNCTION_NAME)) {
if (simThresholValue != null) {
int edThresh = Integer.parseInt(simThresholValue);
ret = new AInt32(edThresh);
} else {
ret = new AFloat(EDIT_DISTANCE_DEFAULT_SIM_THRESHOLD);
}
}
return ret;
}
public static FunctionIdentifier getFunctionIdentifier(String simFuncName) {
if (simFuncName.equals(JACCARD_FUNCTION_NAME)) {
return AsterixBuiltinFunctions.SIMILARITY_JACCARD;
} else if (simFuncName.equals(EDIT_DISTANCE_FUNCTION_NAME)) {
return AsterixBuiltinFunctions.EDIT_DISTANCE;
}
return null;
}
public static ScalarFunctionCallExpression getComparisonExpr(String simFuncName,
ArrayList> cmpArgs) {
if (simFuncName.equals(JACCARD_FUNCTION_NAME)) {
return new ScalarFunctionCallExpression(FunctionUtils.getFunctionInfo(AlgebricksBuiltinFunctions.GE),
cmpArgs);
} else if (simFuncName.equals(EDIT_DISTANCE_FUNCTION_NAME)) {
return new ScalarFunctionCallExpression(FunctionUtils.getFunctionInfo(AlgebricksBuiltinFunctions.LE),
cmpArgs);
}
return null;
}
public static float getSimThreshold(AqlMetadataProvider metadata) {
float simThreshold = JACCARD_DEFAULT_SIM_THRESHOLD;
String simThresholValue = metadata.getPropertyValue(SIM_THRESHOLD_PROP_NAME);
if (simThresholValue != null) {
simThreshold = Float.parseFloat(simThresholValue);
}
return simThreshold;
}
// TODO: The default function depend on the input types.
public static String getSimFunction(AqlMetadataProvider metadata) {
String simFunction = metadata.getPropertyValue(SIM_FUNCTION_PROP_NAME);
if (simFunction == null) {
simFunction = DEFAULT_SIM_FUNCTION;
}
simFunction = simFunction.toLowerCase();
return simFunction;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy