Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.util;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.io.Text;
import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.builtin.PigStreaming;
/**
* This util class provides methods that are shared by storage class
* {@link PigStorage} and streaming class {@link PigStreaming}
*
*/
public final class StorageUtil {
private static final String UTF8 = "UTF-8";
/**
* Transform a String into a byte representing the
* field delimiter.
*
* @param delimiter a string that may be in single-quoted form
* @return the field delimiter in byte form
*/
public static byte parseFieldDel(String delimiter) {
if (delimiter == null) {
throw new IllegalArgumentException("Null delimiter");
}
delimiter = parseSingleQuotedString(delimiter);
if (delimiter.length() > 1 && delimiter.charAt(0) != '\\') {
throw new IllegalArgumentException("Delimeter must be a " +
"single character " + delimiter);
}
byte fieldDel = '\t';
if (delimiter.length() == 1) {
fieldDel = (byte)delimiter.charAt(0);
} else if (delimiter.charAt(0) == '\\') {
switch (delimiter.charAt(1)) {
case 't':
fieldDel = (byte)'\t';
break;
case 'x':
fieldDel =
Integer.valueOf(delimiter.substring(2), 16).byteValue();
break;
case 'u':
fieldDel =
Integer.valueOf(delimiter.substring(2)).byteValue();
break;
default:
throw new IllegalArgumentException("Unknown delimiter " +
delimiter);
}
}
return fieldDel;
}
/**
* Serialize an object to an {@link OutputStream} in the
* field-delimited form.
*
* @param out an OutputStream object
* @param field an object to be serialized
* @throws IOException if serialization fails.
*/
@SuppressWarnings("unchecked")
public static void putField(OutputStream out, Object field)
throws IOException {
//string constants for each delimiter
String tupleBeginDelim = "(";
String tupleEndDelim = ")";
String bagBeginDelim = "{";
String bagEndDelim = "}";
String mapBeginDelim = "[";
String mapEndDelim = "]";
String fieldDelim = ",";
String mapKeyValueDelim = "#";
switch (DataType.findType(field)) {
case DataType.NULL:
break; // just leave it empty
case DataType.BOOLEAN:
out.write(((Boolean)field).toString().getBytes());
break;
case DataType.INTEGER:
out.write(((Integer)field).toString().getBytes());
break;
case DataType.LONG:
out.write(((Long)field).toString().getBytes());
break;
case DataType.FLOAT:
out.write(((Float)field).toString().getBytes());
break;
case DataType.DOUBLE:
out.write(((Double)field).toString().getBytes());
break;
case DataType.BYTEARRAY:
byte[] b = ((DataByteArray)field).get();
out.write(b, 0, b.length);
break;
case DataType.CHARARRAY:
// oddly enough, writeBytes writes a string
out.write(((String)field).getBytes(UTF8));
break;
case DataType.MAP:
boolean mapHasNext = false;
Map m = (Map)field;
out.write(mapBeginDelim.getBytes(UTF8));
for(Map.Entry e: m.entrySet()) {
if(mapHasNext) {
out.write(fieldDelim.getBytes(UTF8));
} else {
mapHasNext = true;
}
putField(out, e.getKey());
out.write(mapKeyValueDelim.getBytes(UTF8));
putField(out, e.getValue());
}
out.write(mapEndDelim.getBytes(UTF8));
break;
case DataType.TUPLE:
boolean tupleHasNext = false;
Tuple t = (Tuple)field;
out.write(tupleBeginDelim.getBytes(UTF8));
for(int i = 0; i < t.size(); ++i) {
if(tupleHasNext) {
out.write(fieldDelim.getBytes(UTF8));
} else {
tupleHasNext = true;
}
try {
putField(out, t.get(i));
} catch (ExecException ee) {
throw ee;
}
}
out.write(tupleEndDelim.getBytes(UTF8));
break;
case DataType.BAG:
boolean bagHasNext = false;
out.write(bagBeginDelim.getBytes(UTF8));
Iterator tupleIter = ((DataBag)field).iterator();
while(tupleIter.hasNext()) {
if(bagHasNext) {
out.write(fieldDelim.getBytes(UTF8));
} else {
bagHasNext = true;
}
putField(out, (Object)tupleIter.next());
}
out.write(bagEndDelim.getBytes(UTF8));
break;
default: {
int errCode = 2108;
String msg = "Could not determine data type of field: " + field;
throw new ExecException(msg, errCode, PigException.BUG);
}
}
}
/**
* Transform a line of Text to a Tuple
*
* @param val a line of text
* @param fieldDel the field delimiter
* @return tuple constructed from the text
*/
public static Tuple textToTuple(Text val, byte fieldDel) {
byte[] buf = val.getBytes();
int len = val.getLength();
int start = 0;
ArrayList