All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jp.co.yahoo.yosegi.hive.YosegiSerde Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jp.co.yahoo.yosegi.hive; import jp.co.yahoo.yosegi.hive.io.ParserWritable; import jp.co.yahoo.yosegi.message.parser.hive.HiveMessageReader; import jp.co.yahoo.yosegi.message.parser.hive.HiveStructParser; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Writable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Properties; import java.util.Set; public class YosegiSerde extends AbstractSerDe { private static final Logger LOG = LoggerFactory.getLogger(YosegiSerde.class); private final HiveMessageReader messageReader = new HiveMessageReader(); private final Map filedIndexMap = new HashMap(); private HiveStructParser parser; private ObjectInspector inspector; private StructTypeInfo getAllReadTypeInfo( final String columnNameProperty , final String columnTypeProperty ) { ArrayList fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty ); ArrayList columnNames = new ArrayList(); if ( columnNameProperty != null && 0 < columnNameProperty.length() ) { String[] columnNameArray = columnNameProperty.split(","); for ( int i = 0 ; i < columnNameArray.length ; i++ ) { columnNames.add( columnNameArray[i] ); filedIndexMap.put( columnNameArray[i] , i ); } } StructTypeInfo rootType = new StructTypeInfo(); rootType.setAllStructFieldNames( columnNames ); rootType.setAllStructFieldTypeInfos( fieldTypes ); return rootType; } private StructTypeInfo getColumnProjectionTypeInfo( final String columnNameProperty , final String columnTypeProperty , final String projectionColumnNames ) { Set columnNameSet = new HashSet(); for ( String columnName : projectionColumnNames.split(",") ) { columnNameSet.add( columnName ); } ArrayList fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString( columnTypeProperty ); String[] splitNames = columnNameProperty.split(","); ArrayList projectionColumnNameList = new ArrayList(); ArrayList projectionFieldTypeList = new ArrayList(); for ( int i = 0 ; i < fieldTypes.size() ; i++ ) { if ( columnNameSet.contains( splitNames[i] ) ) { projectionColumnNameList.add( splitNames[i] ); projectionFieldTypeList.add( fieldTypes.get(i) ); } filedIndexMap.put( splitNames[i] , i ); } StructTypeInfo rootType = new StructTypeInfo(); rootType.setAllStructFieldNames( projectionColumnNameList ); rootType.setAllStructFieldTypeInfos( projectionFieldTypeList ); return rootType; } @Override public void initialize( final Configuration conf, final Properties table ) throws SerDeException { initialize( conf , table , table ); } @Override public void initialize( final Configuration conf , final Properties table , final Properties part ) throws SerDeException { LOG.info( table.toString() ); if ( part != null ) { LOG.info( part.toString() ); } String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS); String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES); conf.unset( "spread.reader.expand.column" ); Iterator> jobConfIterator = conf.iterator(); while ( jobConfIterator.hasNext() ) { Map.Entry keyValue = jobConfIterator.next(); if ( keyValue.getKey().startsWith( "spread.reader.flatten.column" ) ) { conf.unset( keyValue.getKey() ); } } if ( table.containsKey( "yosegi.expand" ) ) { conf.set("spread.reader.expand.column", table.getProperty( "yosegi.expand" )); } Iterator iterator = table.stringPropertyNames().iterator(); while ( iterator.hasNext() ) { String keyName = iterator.next(); if ( keyName.startsWith( "yosegi.flatten" ) ) { String yosegiKeyName = keyName.replace( "yosegi.flatten" , "spread.reader.flatten.column" ); conf.set( yosegiKeyName , table.getProperty( keyName ) ); } } String projectionColumnNames = conf.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , "" ); StructTypeInfo rootType; if ( projectionColumnNames.isEmpty() ) { rootType = getAllReadTypeInfo( columnNameProperty , columnTypeProperty ); } else { rootType = getColumnProjectionTypeInfo( columnNameProperty , columnTypeProperty , projectionColumnNames ); } inspector = YosegiObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( rootType ); } @Override public Object deserialize( final Writable writable ) throws SerDeException { return writable; } @Override public ObjectInspector getObjectInspector() throws SerDeException { return inspector; } @Override public Class getSerializedClass() { return ParserWritable.class; } @Override public Writable serialize( final Object obj, final ObjectInspector objInspector ) throws SerDeException { ParserWritable parserWritable = new ParserWritable(); try { if ( parser == null ) { parser = (HiveStructParser)( messageReader.create( objInspector ) ); parser.setFieldIndexMap( filedIndexMap ); } parser.setObject( obj ); parserWritable.set( parser ); } catch ( IOException ex ) { throw new SerDeException( ex ); } return parserWritable; } @Override public SerDeStats getSerDeStats() { return null; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy