
io.github.repir.Repository.EntityStoredFeature Maven / Gradle / Ivy
The newest version!
package io.github.repir.Repository;
import java.util.HashMap;
import io.github.repir.tools.io.struct.StructuredFileIntID;
import io.github.repir.tools.extract.Content;
import io.github.repir.EntityReader.MapReduce.TermEntityKey;
import io.github.repir.EntityReader.MapReduce.TermEntityValue;
import io.github.repir.Retriever.Document;
import java.io.IOException;
import org.apache.hadoop.mapreduce.Mapper;
/**
* An abstract feature that can store a value per Document in the Repository.
* This value can be accessed with an internal DocumentID passed through
* {@link EntityStoredFeature#read(io.github.repir.Retriever.Document) }
* @author jer
* @param a StructuredFileIntID file to store it's values, allowing
* the stored Record to be accessed through an internal integer ID
* @param The datatype stored
*/
public abstract class EntityStoredFeature extends StoredReportableFeature implements ReduciblePartitionedFeature {
public HashMap cache;
public EntityStoredFeature(Repository repository, String field) {
super(repository, field);
}
@Override
public TermEntityKey createMapOutputKey(int partition, int feature, String docname, Content entity) {
TermEntityKey t = TermEntityKey.createTermDocKey(partition, feature, 0, docname);
t.type = TermEntityKey.Type.ENTITYFEATURE;
return t;
}
public String extract(Content entity) {
return entity.get(entityAttribute()).getContentStr();
}
abstract public void setMapOutputValue(TermEntityValue writer, Content doc);
TermEntityKey outkey;
TermEntityValue outvalue = new TermEntityValue();
@Override
public void writeMap(Mapper.Context context, int partition, int feature, String docname, Content entity) throws IOException, InterruptedException {
outkey = createMapOutputKey(partition, feature, docname, entity);
setMapOutputValue(outvalue, entity);
context.write(outkey, outvalue);
}
@Override
public void finishReduce() {
getFile().closeWrite();
}
@Override
public void startReduce(int partition, int buffersize) {
setPartition(partition);
getFile().setBufferSize(buffersize);
getFile().openWrite();
}
public abstract C getValue();
public abstract void setValue(C value);
public abstract void write(C value);
@Override
public void setPartition(int partition) {
if (this.partition != partition) {
super.setPartition(partition);
cache = null;
}
}
public void cacheResults() {
cache = new HashMap();
}
@Override
public void read(Document d) {
try {
if (partition != d.partition) {
setPartition(d.partition);
openRead();
if (cache != null)
cacheResults();
}
if (cache == null)
super.read(d.docid);
else {
C value = cache.get(d.docid);
if (value == null) {
super.read(d.docid);
cache.put(d.docid, getValue());
} else
setValue(value);
}
} catch (Exception ex) {
log.exception(ex, "Could not read value for document %d", d.docid);
}
}
@Override
public long getLength() {
return getFile().getLength();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy