All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.phoenix.expression.function.CollationKeyFunction Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.phoenix.expression.function;

import java.io.DataInput;
import java.io.IOException;
import java.sql.SQLException;
import java.text.Collator;
import java.util.List;
import java.util.Locale;

import org.apache.commons.lang3.BooleanUtils;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.phoenix.expression.Expression;
import org.apache.phoenix.parse.FunctionParseNode;
import org.apache.phoenix.schema.tuple.Tuple;
import org.apache.phoenix.schema.types.PBoolean;
import org.apache.phoenix.schema.types.PDataType;
import org.apache.phoenix.schema.types.PInteger;
import org.apache.phoenix.schema.types.PVarbinary;
import org.apache.phoenix.schema.types.PVarchar;
import org.apache.phoenix.util.VarBinaryFormatter;
import org.apache.phoenix.util.i18n.LinguisticSort;
import org.apache.phoenix.util.i18n.LocaleUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * A Phoenix Function that calculates a collation key for an input string based
 * on a caller-provided locale and collator strength and decomposition settings.
 * 
 * The locale should be specified as xx_yy_variant where xx is the ISO 639-1
 * 2-letter language code, yy is the the ISO 3166 2-letter country code. Both
 * countryCode and variant are optional. For example, zh_TW_STROKE, zh_TW and zh
 * are all valid locale representations. Note the language code, country code
 * and variant are used as arguments to the constructor of java.util.Locale.
 *
 * This function originally used the open-source i18n-util package to obtain the
 * collators it needs from the provided locale. As i18n-util is not maintained
 * anymore, the relevant parts from it were copied into Phoenix.
 * See: https://issues.apache.org/jira/browse/PHOENIX-6818
 *
 * The LinguisticSort implementation from i18n-util encapsulates sort-related
 * functionality for a substantive list of locales. For each locale, it provides
 * a collator and an Oracle-specific database function that can be used to sort
 * strings according to the natural language rules of that locale.
 *
 * This function uses the collator returned by LinguisticSort.getCollator to
 * produce a collation key for its input string. A user can expect that the
 * sorting semantics of this function for a given locale is equivalent to the
 * sorting behaviour of an Oracle query that is constructed using the Oracle
 * functions returned by LinguisticSort for that locale.
 *
 * The optional third argument to the function is a boolean that specifies
 * whether to use the upper-case collator (case-insensitive) returned by
 * LinguisticSort.getUpperCaseCollator.
 *
 * The optional fourth and fifth arguments are used to set respectively the
 * strength and composition of the collator returned by LinguisticSort using the
 * setStrength and setDecomposition methods of java.text.Collator.
 * 
 */
@FunctionParseNode.BuiltInFunction(name = CollationKeyFunction.NAME, args = {
		// input string
		@FunctionParseNode.Argument(allowedTypes = { PVarchar.class }),
		// ISO Code for Locale
		@FunctionParseNode.Argument(allowedTypes = { PVarchar.class }, isConstant = true),
		// whether to use special upper case collator
		@FunctionParseNode.Argument(allowedTypes = { PBoolean.class }, defaultValue = "false", isConstant = true),
		// collator strength
		@FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true),
		// collator decomposition
		@FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true) })
public class CollationKeyFunction extends ScalarFunction {

	private static final Logger LOGGER = LoggerFactory.getLogger(CollationKeyFunction.class);

	public static final String NAME = "COLLATION_KEY";

	private Collator collator;

	public CollationKeyFunction() {
	}

	public CollationKeyFunction(List children) throws SQLException {
		super(children);
		initialize();
	}

	@Override
	public void readFields(DataInput input) throws IOException {
		super.readFields(input);
		initialize();
	}

	@Override
	public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) {
		Expression expression = getChildren().get(0);
		if (!expression.evaluate(tuple, ptr)) {
			return false;
		}
		String inputString = (String) PVarchar.INSTANCE.toObject(ptr, expression.getSortOrder());
		if (LOGGER.isTraceEnabled()) {
			LOGGER.trace("CollationKey inputString: " + inputString);
		}

		if (inputString == null) {
			return true;
		}

		byte[] collationKeyByteArray = collator.getCollationKey(inputString).toByteArray();

		if (LOGGER.isTraceEnabled()) {
			LOGGER.trace("CollationKey bytes: " +
					VarBinaryFormatter.INSTANCE.format(collationKeyByteArray));
		}

		ptr.set(collationKeyByteArray);
		return true;
	}

	private void initialize() {
		String localeISOCode = getLiteralValue(1, String.class);
		Boolean useSpecialUpperCaseCollator = getLiteralValue(2, Boolean.class);
		Integer collatorStrength = getLiteralValue(3, Integer.class);
		Integer collatorDecomposition = getLiteralValue(4, Integer.class);

		if (LOGGER.isTraceEnabled()) {
			StringBuilder logInputsMessage = new StringBuilder();
			logInputsMessage.append("Input (literal) arguments:").append("localeISOCode: " + localeISOCode)
					.append(", useSpecialUpperCaseCollator: " + useSpecialUpperCaseCollator)
					.append(", collatorStrength: " + collatorStrength)
					.append(", collatorDecomposition: " + collatorDecomposition);
			LOGGER.trace(logInputsMessage.toString());
		}

		Locale locale = LocaleUtils.get().getLocaleByIsoCode(localeISOCode);

		if (LOGGER.isTraceEnabled()) {
			LOGGER.trace(String.format("Locale: " + locale.toLanguageTag()));
		}

		LinguisticSort linguisticSort = LinguisticSort.get(locale);

		collator = BooleanUtils.isTrue(useSpecialUpperCaseCollator) ? linguisticSort.getUpperCaseCollator(false)
				: linguisticSort.getCollator();

		if (collatorStrength != null) {
			collator.setStrength(collatorStrength);
		}

		if (collatorDecomposition != null) {
			collator.setDecomposition(collatorDecomposition);
		}

		if (LOGGER.isTraceEnabled()) {
			LOGGER.trace(String.format(
					"Collator: [strength: %d, decomposition: %d], Special-Upper-Case: %s",
					collator.getStrength(), collator.getDecomposition(),
					BooleanUtils.isTrue(useSpecialUpperCaseCollator)));
		}
	}

	@Override
	public PDataType getDataType() {
		return PVarbinary.INSTANCE;
	}

	@Override
	public String getName() {
		return NAME;
	}

	@Override
	public boolean isThreadSafe() {
		// ICU4J Collators are not thread-safe unless they are frozen.
		// TODO: Look into calling freeze() on them to be able return true here.
		return false;
	}
	
    @Override
    public boolean isNullable() {
        return getChildren().get(0).isNullable();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy