jp.co.yahoo.yosegi.hive.YosegiSerde Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package jp.co.yahoo.yosegi.hive;
import jp.co.yahoo.yosegi.hive.io.ParserWritable;
import jp.co.yahoo.yosegi.message.parser.hive.HiveMessageReader;
import jp.co.yahoo.yosegi.message.parser.hive.HiveStructParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Writable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
public class YosegiSerde extends AbstractSerDe {
private static final Logger LOG = LoggerFactory.getLogger(YosegiSerde.class);
private final HiveMessageReader messageReader = new HiveMessageReader();
private final Map filedIndexMap = new HashMap();
private HiveStructParser parser;
private ObjectInspector inspector;
private StructTypeInfo getAllReadTypeInfo(
final String columnNameProperty , final String columnTypeProperty ) {
ArrayList fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
ArrayList columnNames = new ArrayList();
if ( columnNameProperty != null && 0 < columnNameProperty.length() ) {
String[] columnNameArray = columnNameProperty.split(",");
for ( int i = 0 ; i < columnNameArray.length ; i++ ) {
columnNames.add( columnNameArray[i] );
filedIndexMap.put( columnNameArray[i] , i );
}
}
StructTypeInfo rootType = new StructTypeInfo();
rootType.setAllStructFieldNames( columnNames );
rootType.setAllStructFieldTypeInfos( fieldTypes );
return rootType;
}
private StructTypeInfo getColumnProjectionTypeInfo(
final String columnNameProperty ,
final String columnTypeProperty ,
final String projectionColumnNames ) {
Set columnNameSet = new HashSet();
for ( String columnName : projectionColumnNames.split(",") ) {
columnNameSet.add( columnName );
}
ArrayList fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty );
String[] splitNames = columnNameProperty.split(",");
ArrayList projectionColumnNameList = new ArrayList();
ArrayList projectionFieldTypeList = new ArrayList();
for ( int i = 0 ; i < fieldTypes.size() ; i++ ) {
if ( columnNameSet.contains( splitNames[i] ) ) {
projectionColumnNameList.add( splitNames[i] );
projectionFieldTypeList.add( fieldTypes.get(i) );
}
filedIndexMap.put( splitNames[i] , i );
}
StructTypeInfo rootType = new StructTypeInfo();
rootType.setAllStructFieldNames( projectionColumnNameList );
rootType.setAllStructFieldTypeInfos( projectionFieldTypeList );
return rootType;
}
@Override
public void initialize(
final Configuration conf, final Properties table ) throws SerDeException {
initialize( conf , table , table );
}
@Override
public void initialize(
final Configuration conf ,
final Properties table ,
final Properties part ) throws SerDeException {
LOG.info( table.toString() );
if ( part != null ) {
LOG.info( part.toString() );
}
String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES);
conf.unset( "spread.reader.expand.column" );
Iterator> jobConfIterator = conf.iterator();
while ( jobConfIterator.hasNext() ) {
Map.Entry keyValue = jobConfIterator.next();
if ( keyValue.getKey().startsWith( "spread.reader.flatten.column" ) ) {
conf.unset( keyValue.getKey() );
}
}
if ( table.containsKey( "yosegi.expand" ) ) {
conf.set("spread.reader.expand.column", table.getProperty( "yosegi.expand" ));
}
Iterator iterator = table.stringPropertyNames().iterator();
while ( iterator.hasNext() ) {
String keyName = iterator.next();
if ( keyName.startsWith( "yosegi.flatten" ) ) {
String yosegiKeyName = keyName.replace(
"yosegi.flatten" , "spread.reader.flatten.column" );
conf.set( yosegiKeyName , table.getProperty( keyName ) );
}
}
String projectionColumnNames =
conf.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , "" );
StructTypeInfo rootType;
if ( projectionColumnNames.isEmpty() ) {
rootType = getAllReadTypeInfo( columnNameProperty , columnTypeProperty );
} else {
rootType = getColumnProjectionTypeInfo(
columnNameProperty , columnTypeProperty , projectionColumnNames );
}
inspector = YosegiObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( rootType );
}
@Override
public Object deserialize( final Writable writable ) throws SerDeException {
return writable;
}
@Override
public ObjectInspector getObjectInspector() throws SerDeException {
return inspector;
}
@Override
public Class extends Writable> getSerializedClass() {
return ParserWritable.class;
}
@Override
public Writable serialize(
final Object obj, final ObjectInspector objInspector ) throws SerDeException {
ParserWritable parserWritable = new ParserWritable();
try {
if ( parser == null ) {
parser = (HiveStructParser)( messageReader.create( objInspector ) );
parser.setFieldIndexMap( filedIndexMap );
}
parser.setObject( obj );
parserWritable.set( parser );
} catch ( IOException ex ) {
throw new SerDeException( ex );
}
return parserWritable;
}
@Override
public SerDeStats getSerDeStats() {
return null;
}
}