All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.hibernate.search.indexes.serialization.avro.impl.AvroDeserializer Maven / Gradle / Ivy

There is a newer version: 9.1.7.Final
Show newest version
/*
 * Hibernate Search, full-text search for your domain model
 *
 * License: GNU Lesser General Public License (LGPL), version 2.1 or later
 * See the lgpl.txt file in the root directory or .
 */
package org.hibernate.search.indexes.serialization.avro.impl;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.avro.Protocol;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.util.Utf8;

import org.hibernate.search.bridge.spi.ConversionContext;
import org.hibernate.search.bridge.util.impl.ContextualExceptionBridgeHelper;
import org.hibernate.search.indexes.serialization.spi.Deserializer;
import org.hibernate.search.indexes.serialization.spi.LuceneWorksBuilder;
import org.hibernate.search.indexes.serialization.spi.SerializableIndex;
import org.hibernate.search.indexes.serialization.spi.SerializableStore;
import org.hibernate.search.indexes.serialization.spi.SerializableTermVector;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

/**
 * @author Emmanuel Bernard 
 */
public class AvroDeserializer implements Deserializer {

	private static final Log log = LoggerFactory.make();
	private final KnownProtocols protocols;
	private List classReferences;

	public AvroDeserializer(KnownProtocols protocols) {
		this.protocols = protocols;
	}

	@Override
	public void deserialize(byte[] data, LuceneWorksBuilder hydrator) {
		final ByteArrayInputStream inputStream = new ByteArrayInputStream( data );
		final int majorVersion = inputStream.read();
		final int minorVersion = inputStream.read();
		final Protocol protocol = protocols.getProtocol( majorVersion, minorVersion );

		Decoder decoder = DecoderFactory.get().binaryDecoder( inputStream, null );
		GenericDatumReader reader = new GenericDatumReader( protocol.getType( "Message" ) );
		GenericRecord result;
		try {
			result = reader.read( null, decoder );
		}
		catch (IOException e) {
			throw log.unableToDeserializeAvroStream( e );
		}

		classReferences = asListOfString( result, "classReferences" );
		final List operations = asListOfGenericRecords( result, "operations" );
		final ConversionContext conversionContext = new ContextualExceptionBridgeHelper();
		for ( GenericRecord operation : operations ) {
			String schema = operation.getSchema().getName();
			if ( "OptimizeAll".equals( schema ) ) {
				hydrator.addOptimizeAll();
			}
			else if ( "PurgeAll".equals( schema ) ) {
				hydrator.addPurgeAllLuceneWork( asClass( operation, "class" ) );
			}
			else if ( "Flush".equals( schema ) ) {
				hydrator.addFlush();
			}
			else if ( "Delete".equals( schema ) ) {
				processId( operation, hydrator );
				hydrator.addDeleteLuceneWork(
						asClass( operation, "class" ), conversionContext
				);
			}
			else if ( "Add".equals( schema ) ) {
				buildLuceneDocument( asGenericRecord( operation, "document" ), hydrator );
				Map analyzers = getAnalyzers( operation );
				processId( operation, hydrator );
				hydrator.addAddLuceneWork(
						asClass( operation, "class" ),
						analyzers,
						conversionContext
				);
			}
			else if ( "Update".equals( schema ) ) {
				buildLuceneDocument( asGenericRecord( operation, "document" ), hydrator );
				Map analyzers = getAnalyzers( operation );
				processId( operation, hydrator );
				hydrator.addUpdateLuceneWork(
						asClass( operation, "class" ),
						analyzers,
						conversionContext
				);
			}
			else {
				throw log.cannotDeserializeOperation( schema );
			}
		}
	}

	private String asClass(GenericRecord operation, String attribute) {
		Integer index = (Integer) operation.get( attribute );
		return classReferences.get( index ).toString();
	}

	private List asListOfString(GenericRecord result, String attribute) {
		return (List) result.get( attribute );
	}

	private void processId(GenericRecord operation, LuceneWorksBuilder hydrator) {
		GenericRecord id = (GenericRecord) operation.get( "id" );
		Object value = id.get( "value" );
		if ( value instanceof ByteBuffer ) {
			hydrator.addIdAsJavaSerialized( asByteArray( (ByteBuffer) value ) );
		}
		else if ( value instanceof Utf8 ) {
			hydrator.addId( value.toString() );
		}
		else {
			//the rest are serialized objects
			hydrator.addId( (Serializable) value );
		}
	}

	private Map getAnalyzers(GenericRecord operation) {
		Map analyzersWithUtf8 = (Map) operation.get( "fieldToAnalyzerMap" );
		if ( analyzersWithUtf8 == null ) {
			return null;
		}
		Map analyzers = new HashMap( analyzersWithUtf8.size() );
		for ( Map.Entry entry : analyzersWithUtf8.entrySet() ) {
			analyzers.put( entry.getKey().toString(), entry.getValue().toString() );
		}
		return analyzers;
	}

	private void buildLuceneDocument(GenericRecord document, LuceneWorksBuilder hydrator) {
		hydrator.defineDocument();
		List fieldables = asListOfGenericRecords( document, "fieldables" );
		for ( GenericRecord field : fieldables ) {
			String schema = field.getSchema().getName();
			if ( "CustomFieldable".equals( schema ) ) {
				hydrator.addFieldable( asByteArray( field, "instance" ) );
			}
			else if ( "NumericIntField".equals( schema ) ) {
				hydrator.addIntNumericField(
							asInt( field, "value" ),
							asString( field, "name" ),
							asInt( field, "precisionStep" ),
							asStore( field ),
							asBoolean( field, "indexed" ),
							asFloat( field, "boost" ),
							asBoolean( field, "omitNorms" ),
							asBoolean( field, "omitTermFreqAndPositions" )
				);
			}
			else if ( "NumericFloatField".equals( schema ) ) {
				hydrator.addFloatNumericField(
							asFloat( field, "value" ),
							asString( field, "name" ),
							asInt( field, "precisionStep" ),
							asStore( field ),
							asBoolean( field, "indexed" ),
							asFloat( field, "boost" ),
							asBoolean( field, "omitNorms" ),
							asBoolean( field, "omitTermFreqAndPositions" )
				);
			}
			else if ( "NumericLongField".equals( schema ) ) {
				hydrator.addLongNumericField(
							asLong( field, "value" ),
							asString( field, "name" ),
							asInt( field, "precisionStep" ),
							asStore( field ),
							asBoolean( field, "indexed" ),
							asFloat( field, "boost" ),
							asBoolean( field, "omitNorms" ),
							asBoolean( field, "omitTermFreqAndPositions" )
				);
			}
			else if ( "NumericDoubleField".equals( schema ) ) {
				hydrator.addDoubleNumericField(
							asDouble( field, "value" ),
							asString( field, "name" ),
							asInt( field, "precisionStep" ),
							asStore( field ),
							asBoolean( field, "indexed" ),
							asFloat( field, "boost" ),
							asBoolean( field, "omitNorms" ),
							asBoolean( field, "omitTermFreqAndPositions" )
				);
			}
			else if ( "BinaryField".equals( schema ) ) {
				hydrator.addFieldWithBinaryData(
							asString( field, "name" ),
							asByteArray( field, "value" ),
							asInt( field, "offset" ),
							asInt( field, "length" ),
							asFloat( field, "boost" ),
							asBoolean( field, "omitNorms" ),
							asBoolean( field, "omitTermFreqAndPositions" )
				);
			}
			else if ( "StringField".equals( schema ) ) {
				hydrator.addFieldWithStringData(
						asString( field, "name" ),
						asString( field, "value" ),
						asStore( field ),
						asIndex( field ),
						asTermVector( field ),
						asFloat( field, "boost" ),
						asBoolean( field, "omitNorms" ),
						asBoolean( field, "omitTermFreqAndPositions" )
				);
			}
			else if ( "TokenStreamField".equals( schema ) ) {
				buildAttributes( field, "value", hydrator );
				hydrator.addFieldWithTokenStreamData(
						asString( field, "name" ),
						asTermVector( field ),
						asFloat( field, "boost" ),
						asBoolean( field, "omitNorms" ),
						asBoolean( field, "omitTermFreqAndPositions" )
				);
			}
			else if ( "ReaderField".equals( schema ) ) {
				hydrator.addFieldWithSerializableReaderData(
						asString( field, "name" ),
						asByteArray( field, "value" ),
						asTermVector( field ),
						asFloat( field, "boost" ),
						asBoolean( field, "omitNorms" ),
						asBoolean( field, "omitTermFreqAndPositions" )
				);
			}
			else {
				throw log.cannotDeserializeField( schema );
			}
		}
	}

	private void buildAttributes(GenericRecord record, String field, LuceneWorksBuilder hydrator) {
		List> tokens = (List>) record.get( field );
		for ( List token : tokens ) {
			for ( Object attribute : token ) {
				buildAttribute( attribute, hydrator );
			}
			hydrator.addToken();
		}
	}

	private void buildAttribute(Object element, LuceneWorksBuilder hydrator) {
		if ( element instanceof GenericRecord ) {
			GenericRecord record = (GenericRecord) element;
			String name = record.getSchema().getName();
			if ( "TokenTrackingAttribute".equals( name ) ) {
				hydrator.addTokenTrackingAttribute( (List) record.get( "positions" ) );
			}
			else if ( "CharTermAttribute".equals( name ) ) {
				hydrator.addCharTermAttribute( (CharSequence) record.get( "sequence" ) );
			}
			else if ( "PayloadAttribute".equals( name ) ) {
				hydrator.addPayloadAttribute( asByteArray( record, "payload") );
			}
			else if ( "KeywordAttribute".equals( name ) ) {
				hydrator.addKeywordAttribute( asBoolean( record, "isKeyword") );
			}
			else if ( "PositionIncrementAttribute".equals( name ) ) {
				hydrator.addPositionIncrementAttribute( asInt( record, "positionIncrement") );
			}
			else if ( "FlagsAttribute".equals( name ) ) {
				hydrator.addFlagsAttribute( asInt( record, "flags") );
			}
			else if ( "TypeAttribute".equals( name ) ) {
				hydrator.addTypeAttribute( asString( record, "type") );
			}
			else if ( "OffsetAttribute".equals( name ) ) {
				hydrator.addOffsetAttribute( asInt( record, "startOffset"), asInt( record, "endOffset" ) );
			}
			else {
				log.unknownAttributeSerializedRepresentation( name );
			}
		}
		if ( element instanceof ByteBuffer ) {
			hydrator.addSerializedAttribute( asByteArray( (ByteBuffer) element ) );
		}
		else {
			log.unknownAttributeSerializedRepresentation( element.getClass().getName() );
		}
	}

	private GenericRecord asGenericRecord(GenericRecord operation, String field) {
		return (GenericRecord) operation.get( field );
	}

	private List asListOfGenericRecords(GenericRecord result, String field) {
		return (List) result.get( field );
	}

	private float asFloat(GenericRecord record, String field) {
		return ( (Float) record.get( field ) ).floatValue();
	}

	private int asInt(GenericRecord record, String field) {
		return ( (Integer) record.get( field ) ).intValue();
	}

	private long asLong(GenericRecord record, String field) {
		return ( (Long) record.get( field ) ).longValue();
	}

	private double asDouble(GenericRecord record, String field) {
		return ( (Double) record.get( field ) ).doubleValue();
	}

	private String asString(GenericRecord record, String field) {
		return record.get( field ).toString();
	}

	private boolean asBoolean(GenericRecord record, String field) {
		return ( (Boolean) record.get( field ) ).booleanValue();
	}

	private SerializableStore asStore(GenericRecord field) {
		String string = field.get( "store" ).toString();
		return SerializableStore.valueOf( string );
	}

	private SerializableIndex asIndex(GenericRecord field) {
		String string = field.get( "index" ).toString();
		return SerializableIndex.valueOf( string );
	}

	private SerializableTermVector asTermVector(GenericRecord field) {
		String string = field.get( "termVector" ).toString();
		return SerializableTermVector.valueOf( string );
	}

	private byte[] asByteArray(GenericRecord operation, String field) {
		ByteBuffer buffer = (ByteBuffer) operation.get( field );
		return asByteArray( buffer );
	}

	private byte[] asByteArray(ByteBuffer buffer) {
		byte[] copy = new byte[buffer.remaining()];
		buffer.get( copy );
		return copy;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy