All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.EntityReader.MapReduce.TermEntityValue Maven / Gradle / Ivy

The newest version!
package io.github.repir.EntityReader.MapReduce;

import io.github.repir.tools.io.buffer.BufferDelayedWriter;
import io.github.repir.tools.io.buffer.BufferReaderWriter;
import io.github.repir.tools.io.EOCException;
import io.github.repir.tools.lib.Log;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.BytesWritable;

/**
 * During extraction of a Repository (phase 2), the Mapper extracts the source
 * entities to TermEntityKey,TermEntityValue pairs. The mapper segments the
 * collection into partitions that are reduced separately. In a Reducer, all
 * {@link ReduciblePartitionedFeature}s are created by calling their {@link ReducibleFeature#reduceInput( 
 * TermEntityKey, java.lang.Iterable)}
 * <
 * p/>
 * During the PRELOAD phases, {@link EntityStoredFeatures} are consructed, which
 * can only have one value per Entity per Feature! Although this value can be of
 * a complex type, e.g. DocLiteral (also used for collectionid), DocTF,
 * DocForward.
 * 

* In the CHANNEL phase, features that extend {@link AutoTermDocumentFeature} * are constructed, for which the data is sorted first by Term and then by * Document. * * @author jer */ public class TermEntityValue extends BytesWritable { public static Log log = new Log(TermEntityValue.class); public BufferReaderWriter reader = new BufferReaderWriter(); public BufferDelayedWriter writer = new BufferDelayedWriter(); public TermEntityValue() { } @Override public void readFields(DataInput in) throws IOException { try { int length = in.readInt(); byte b[] = new byte[length]; in.readFully(b); reader.setBuffer(b); } catch (EOCException ex) { throw new IOException(ex); } } @Override public void write(DataOutput out) throws IOException { byte b[] = writer.getAsByteBlock(); out.write(b); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy