All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.DocLiteral Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository;

import io.github.repir.Retriever.Document;
import io.github.repir.tools.io.Datafile;
import io.github.repir.tools.io.struct.StructuredFileShortJumpTable;
import io.github.repir.tools.io.struct.StructuredDataStream;
import io.github.repir.EntityReader.MapReduce.TermEntityKey;
import io.github.repir.EntityReader.MapReduce.TermEntityValue;
import io.github.repir.tools.lib.Log;
import io.github.repir.tools.extract.Content;
import io.github.repir.Repository.DocLiteral.File;
import io.github.repir.tools.io.EOCException;

/**
 * Can store one literal String per Document, e.g. collection ID, title, url.
 * @see EntityStoredFeature
 * @author jer
 */
public class DocLiteral
   extends EntityStoredFeature 
   implements ReduciblePartitionedFeature, ReportableFeature, ResidentFeature  {

   public static Log log = new Log(DocLiteral.class);

   protected DocLiteral(Repository repository, String field) {
      super(repository, field);
   }

   public static DocLiteral get(Repository repository, String field) {
       String label = canonicalName(DocLiteral.class, field);
       DocLiteral termid = (DocLiteral)repository.getStoredFeature(label);
       if (termid == null) {
          termid = new DocLiteral(repository, field);
          repository.storeFeature(label, termid);
       }
       return termid;
   }
   
   @Override
   public void setMapOutputValue(TermEntityValue value, Content entity) {
      value.writer.write(extract(entity));
   }

   @Override
   public void writeReduce(TermEntityKey key, Iterable values) {
      try {
         TermEntityValue value = values.iterator().next();
         String literal = value.reader.readString();
         file.literal.write(literal);
      } catch (EOCException ex) {
         log.exception(ex, "reduceInput( %s, %s ) file %s", key, values, file);
      }
   }

   @Override
   public void encode(Document d, int reportid) {
      //log.info("encode %s doc %d reportid %d value %s", this.getCanonicalName(), d.docid, reportid, d.getReportedFeature(reportid));
      String literal = (String) d.getReportedFeature(reportid);
      bdw.write(literal);
      d.setReportedFeature(reportid, bdw.getBytes());
   }

   @Override
   public void decode(Document d, int reportid) {
      reader.setBuffer((byte[]) d.getReportedFeature(reportid));
      d.setReportedFeature(reportid, reader.readString());
   }

   @Override
   public void report(Document doc, int reportid) {
      //log.info("report %s doc %d reportid %d value %s", this.getCanonicalName(), doc.docid, reportid, getValue());
      doc.setReportedFeature(reportid, getValue());
   }

   @Override
   public String valueReported(Document doc, int reportid) {
      return (String) doc.getReportedFeature(reportid);
   }
   
   @Override
   public String getValue() {
      return file.literal.value;
   }

   @Override
   public void write(String value) {
      file.literal.write(value);
   }

   @Override
   public File createFile(Datafile datafile) {
      return new File(datafile);
   }

   /**
    * Finds the internal document id for a literal. This search reads the whole
    * feature sequentially, as there is no direct access to finding literal
    * values.
    * 

* @param literal * @return */ public int findLiteral(String literal) { getFile().openRead(); file.setBufferSize(4096 * 25000); file.setOffset(0); int id = 0; while (file.nextRecord()) { if (literal.equals(file.literal.value)) { return id; } id++; } file.setBufferSize(4096); return -1; } @Override public void setValue(String value) { getFile().literal.value = value; } @Override public void readResident() { getFile().loadMem(); } @Override public boolean isReadResident() { return getFile().isLoadedInMem(); } public static class File extends StructuredFileShortJumpTable { public StringField literal = this.addString("directterm"); public File(Datafile df) { super(df); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy