org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
Show all versions of hive-exec Show documentation
Hive is a data warehouse infrastructure built on top of Hadoop see
http://wiki.apache.org/hadoop/Hive
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.udf.generic;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
/**
* IN_FILE(str, filename) returns true if 'str' appears in the file specified
* by 'filename'. A string is considered to be in the file if it that string
* appears as a line in the file.
*
* If either argument is NULL then NULL is returned.
*/
@Description(name = "in_file",
value = "_FUNC_(str, filename) - Returns true if str appears in the file")
public class GenericUDFInFile extends GenericUDF {
HashSet set;
ObjectInspector strObjectInspector;
ObjectInspector fileObjectInspector;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentException {
if (arguments.length != 2) {
throw new UDFArgumentLengthException(
"IN_FILE() accepts exactly 2 arguments.");
}
for (int i = 0; i < arguments.length; i++) {
if (!String.class.equals(
PrimitiveObjectInspectorUtils.
getJavaPrimitiveClassFromObjectInspector(arguments[i]))) {
throw new UDFArgumentTypeException(i, "The "
+ GenericUDFUtils.getOrdinal(i + 1)
+ " argument of function IN_FILE must be a string but "
+ arguments[i].toString() + " was given.");
}
}
strObjectInspector = arguments[0];
fileObjectInspector = arguments[1];
if (!ObjectInspectorUtils.isConstantObjectInspector(fileObjectInspector)) {
throw new UDFArgumentTypeException(1,
"The second argument of IN_FILE() must be a constant string but " +
fileObjectInspector.toString() + " was given.");
}
return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
}
@Override
public String[] getRequiredFiles() {
return new String[] {
ObjectInspectorUtils.getWritableConstantValue(fileObjectInspector)
.toString()
};
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
if (arguments[0].get() == null || arguments[1].get() == null) {
return null;
}
String str = (String)ObjectInspectorUtils.copyToStandardJavaObject(
arguments[0].get(), strObjectInspector);
if (set == null) {
String fileName = (String)ObjectInspectorUtils.copyToStandardJavaObject(
arguments[1].get(), fileObjectInspector);
try {
load(new FileInputStream((new File(fileName)).getName()));
} catch (FileNotFoundException e) {
throw new HiveException(e);
}
}
return Boolean.valueOf(set.contains(str));
}
/**
* Load the file from an InputStream.
* @param is The InputStream contains the file data.
* @throws HiveException
*/
public void load(InputStream is) throws HiveException {
BufferedReader reader =
new BufferedReader(new InputStreamReader(is));
set = new HashSet();
try {
String line;
while((line = reader.readLine()) != null) {
set.add(line);
}
} catch (Exception e) {
throw new HiveException(e);
}
}
@Override
public String getDisplayString(String[] children) {
assert (children.length == 2);
return "in_file(" + children[0] + ", " + children[1] + ")";
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy