org.apache.hadoop.hive.ql.udf.UDFSubstr Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-apache Show documentation
Show all versions of hive-apache Show documentation
Shaded version of Apache Hive for Trino
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.udf;
import java.util.Arrays;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStart;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStartLen;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
/**
* UDFSubstr.
*
*/
@Description(name = "substr,substring",
value = "_FUNC_(str, pos[, len]) - returns the substring of str that"
+ " starts at pos and is of length len or" +
"_FUNC_(bin, pos[, len]) - returns the slice of byte array that"
+ " starts at pos and is of length len",
extended = "pos is a 1-based index. If pos<0 the starting position is"
+ " determined by counting backwards from the end of str.\n"
+ "Example:\n "
+ " > SELECT _FUNC_('Facebook', 5) FROM src LIMIT 1;\n"
+ " 'book'\n"
+ " > SELECT _FUNC_('Facebook', -5) FROM src LIMIT 1;\n"
+ " 'ebook'\n"
+ " > SELECT _FUNC_('Facebook', 5, 1) FROM src LIMIT 1;\n"
+ " 'b'")
@VectorizedExpressions({StringSubstrColStart.class, StringSubstrColStartLen.class})
public class UDFSubstr extends UDF {
private final int[] index;
private final Text r;
public UDFSubstr() {
index = new int[2];
r = new Text();
}
public Text evaluate(Text t, IntWritable pos, IntWritable len) {
if ((t == null) || (pos == null) || (len == null)) {
return null;
}
r.clear();
if ((len.get() <= 0)) {
return r;
}
String s = t.toString();
int[] index = makeIndex(pos.get(), len.get(), s.length());
if (index == null) {
return r;
}
r.set(s.substring(index[0], index[1]));
return r;
}
private int[] makeIndex(int pos, int len, int inputLen) {
if ((Math.abs(pos) > inputLen)) {
return null;
}
int start, end;
if (pos > 0) {
start = pos - 1;
} else if (pos < 0) {
start = inputLen + pos;
} else {
start = 0;
}
if ((inputLen - start) < len) {
end = inputLen;
} else {
end = start + len;
}
index[0] = start;
index[1] = end;
return index;
}
private final IntWritable maxValue = new IntWritable(Integer.MAX_VALUE);
public Text evaluate(Text s, IntWritable pos) {
return evaluate(s, pos, maxValue);
}
public BytesWritable evaluate(BytesWritable bw, IntWritable pos, IntWritable len) {
if ((bw == null) || (pos == null) || (len == null)) {
return null;
}
if ((len.get() <= 0)) {
return new BytesWritable();
}
int[] index = makeIndex(pos.get(), len.get(), bw.getLength());
if (index == null) {
return new BytesWritable();
}
return new BytesWritable(Arrays.copyOfRange(bw.getBytes(), index[0], index[1]));
}
public BytesWritable evaluate(BytesWritable bw, IntWritable pos){
return evaluate(bw, pos, maxValue);
}
}