All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.EntityStoredFeature Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository;

import java.util.HashMap;
import io.github.repir.tools.io.struct.StructuredFileIntID;
import io.github.repir.tools.extract.Content;
import io.github.repir.EntityReader.MapReduce.TermEntityKey;
import io.github.repir.EntityReader.MapReduce.TermEntityValue;
import io.github.repir.Retriever.Document;
import java.io.IOException;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * An abstract feature that can store a value per Document in the Repository.
 * This value can be accessed with an internal DocumentID passed through
 * {@link EntityStoredFeature#read(io.github.repir.Retriever.Document) }
 * @author jer
 * @param  a StructuredFileIntID file to store it's values, allowing 
 * the stored Record to be accessed through an internal integer ID
 * @param  The datatype stored
 */
public abstract class EntityStoredFeature extends StoredReportableFeature implements ReduciblePartitionedFeature {

   public HashMap cache;
   
   public EntityStoredFeature(Repository repository, String field) {
      super(repository, field);
   }

   @Override
   public TermEntityKey createMapOutputKey(int partition, int feature, String docname, Content entity) {
      TermEntityKey t = TermEntityKey.createTermDocKey(partition, feature, 0, docname);
      t.type = TermEntityKey.Type.ENTITYFEATURE;
      return t;
   }
   
   public String extract(Content entity) {
      return entity.get(entityAttribute()).getContentStr();
   }

   abstract public void setMapOutputValue(TermEntityValue writer, Content doc);

   TermEntityKey outkey;
   TermEntityValue outvalue = new TermEntityValue();
    @Override
    public void writeMap(Mapper.Context context, int partition, int feature, String docname, Content entity) throws IOException, InterruptedException {
          outkey = createMapOutputKey(partition, feature, docname, entity);
          setMapOutputValue(outvalue, entity);
          context.write(outkey, outvalue);
    }
    
   @Override
   public void finishReduce() {
      getFile().closeWrite();
   }

   @Override
   public void startReduce(int partition, int buffersize) {
      setPartition(partition);
      getFile().setBufferSize(buffersize);
      getFile().openWrite();
   }

   public abstract C getValue();
   
   public abstract void setValue(C value);

   public abstract void write(C value);
   
   @Override
   public void setPartition(int partition) {
      if (this.partition != partition) {
         super.setPartition(partition);
         cache = null;
      }
   }
   
   public void cacheResults() {
      cache = new HashMap();  
   }
   
   @Override
   public void read(Document d) {
      try {
         if (partition != d.partition) {
            setPartition(d.partition);
            openRead();
            if (cache != null) 
               cacheResults();
         }
         if (cache == null)
            super.read(d.docid);
         else {
            C value = cache.get(d.docid);
            if (value == null) {
               super.read(d.docid);
               cache.put(d.docid, getValue());
            } else 
               setValue(value);
         }
      } catch (Exception ex) {
         log.exception(ex, "Could not read value for document %d", d.docid);
      }
   }
   
   @Override
   public long getLength() {
      return getFile().getLength();
   }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy