org.apache.hadoop.hive.serde2.lazy.LazyString Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.serde2.lazy;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector;
import org.apache.hadoop.io.Text;
/**
* LazyObject for storing a value of String.
*/
public class LazyString extends LazyPrimitive {
public LazyString(LazyStringObjectInspector oi) {
super(oi);
data = new Text();
}
public LazyString(LazyString copy) {
super(copy);
data = new Text(copy.data);
}
@Override
public void init(ByteArrayRef bytes, int start, int length) {
if (oi.isEscaped()) {
byte escapeChar = oi.getEscapeChar();
byte[] inputBytes = bytes.getData();
// First calculate the length of the output string
int outputLength = 0;
for (int i = 0; i < length; i++) {
if (inputBytes[start + i] != escapeChar) {
outputLength++;
} else {
outputLength++;
i++;
}
}
// Copy the data over, so that the internal state of Text will be set to
// the required outputLength.
data.set(bytes.getData(), start, outputLength);
// We need to copy the data byte by byte only in case the
// "outputLength < length" (which means there is at least one escaped
// byte.
if (outputLength < length) {
int k = 0;
byte[] outputBytes = data.getBytes();
for (int i = 0; i < length; i++) {
byte b = inputBytes[start + i];
if (b != escapeChar || i == length - 1) {
outputBytes[k++] = b;
} else {
// get the next byte
i++;
outputBytes[k++] = inputBytes[start + i];
}
}
assert (k == outputLength);
}
} else {
// if the data is not escaped, simply copy the data.
data.set(bytes.getData(), start, length);
}
}
}