All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.Pig.PigFeature Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository.Pig;

import io.github.repir.tools.extract.Content;
import io.github.repir.EntityReader.MapReduce.TermEntityKey;
import io.github.repir.EntityReader.MapReduce.TermEntityValue;
import io.github.repir.Repository.*;
import io.github.repir.Retriever.Document;
import io.github.repir.tools.search.ByteSearch;
import io.github.repir.tools.search.ByteSearchPosition;
import io.github.repir.tools.io.Datafile;
import io.github.repir.tools.io.struct.StructuredFileIntID;
import io.github.repir.tools.io.struct.StructuredTextFile.DataNode;
import io.github.repir.tools.io.struct.StructuredTextFile.FolderNode;
import io.github.repir.tools.io.struct.StructuredTextFile.Node;
import io.github.repir.tools.io.struct.StructuredTextPig;
import io.github.repir.tools.io.struct.StructuredTextPigTuple;
import io.github.repir.tools.lib.ClassTools;
import io.github.repir.tools.lib.PrintTools;
import io.github.repir.tools.lib.StrTools;
import java.util.HashMap;

/**
 * An abstract feature that can store a value per Document in the Repository.
 * This value can be accessed with an internal DocumentID passed through
 * {@link EntityStoredFeature#read(io.github.repir.Retriever.Document) }
 * @author jer
 * @param  a StructuredFileIntID file to store it's values, allowing 
 * the stored Record to be accessed through an internal integer ID
 * @param  The datatype stored
 */
public abstract class PigFeature> extends StoredFeature {

   public F file;
   
   public PigFeature(Repository repository) {
      super(repository);
   }

   public PigFeature(Repository repository, String field) {
      super(repository, field);
   }

   @Override
   public F getFile() {
      if (file == null) {
         file = createFile(getStoredFeatureFile());
      }
      return file;
   }
   
   @Override
   public Datafile getStoredFeatureFile() {
      Datafile datafile;
      String name = getCanonicalName();
      name = name.replaceFirst(":", ".");
      String path = repository.configuredString(name.toLowerCase() + ".path");
      if (path != null && path.length() > 0)
         datafile = new Datafile( repository.getFS(), path);
      else
         datafile = repository.getBaseDir().getFile(PrintTools.sprintf("pig/%s.%s", repository.getPrefix(), getFileNameSuffix()));
      return datafile;
   }
   
   private static final ByteSearch dot = ByteSearch.create("\\."); 
   
   public String loadScript() {
      StringBuilder sb = new StringBuilder();
      String filename = getFile().getDatafile().getFilename();
      ByteSearchPosition pos = dot.findLastPos(filename);
      sb.append("LOAD '");
      sb.append( getFile().getDatafile().getCanonicalPath() ).append("' AS ");
      sb.append(loadFolder( file.getRoot()));
      sb.append(";\n");
      return sb.toString();
   }
   
   public String loadLocalScript() {
      StringBuilder sb = new StringBuilder();
      String filename = getFile().getDatafile().getFilename();
      ByteSearchPosition pos = dot.findLastPos(filename);
      sb.append("LOAD '");
      sb.append("data/").append( getFile().getDatafile().getFilename() ).append("' AS ");
      sb.append(loadFolder( file.getRoot()));
      sb.append(";\n");
      return sb.toString();
   }
   
   public String loadFolder(FolderNode folder) {
      StringBuilder sb = new StringBuilder();
      sb.append("(");
      boolean first = true;
      for (Node n : folder.orderedfields) {
         if (first)
            first = false;
         else
            sb.append(", ");
         if (n instanceof DataNode) {
            sb.append(n.label);
            Class c = ClassTools.getGenericType(n);
            if (c.equals(Integer.class)) {
               sb.append(":int");
            } else if (c.equals(Long.class)) {
               sb.append(":long");
            } else if (c.equals(Double.class)) {
               sb.append(":double");
            } else if (c.equals(String.class)) {
               sb.append(":chararray");
            } else if (c.equals(Boolean.class)) {
               sb.append(":chararray");
            }
         } else {
            sb.append(n.label).append(":{");
            sb.append(loadFolder((FolderNode)n)).append("}");
         }
      }
      sb.append(")");
      return sb.toString();
   }
   
   public abstract F createFile(Datafile datafile);

   @Override
   public void openRead() {
      getFile().openRead();
   }

   @Override
   public void closeRead() {
      getFile().closeRead();
      file = null;
   }

   public void openAppend() {
      if (getFile().lock())
         getFile().openAppend();
      else 
         log.fatal("Could not lock file %s", getFile().getDatafile().getCanonicalPath());
   }
   
   public void openWrite() {
      if (getFile().lock())
         getFile().openWrite();
      else 
         log.fatal("Could not lock file %s", getFile().getDatafile().getCanonicalPath());
   }
   
   public void closeWrite() {
      getFile().closeWrite();
      getFile().unlock();
   }
   
   public abstract C getValue();

   public abstract void write(C value);
   
   
   public void setBufferSize(int size) {
      getFile().getDatafile().setBufferSize(size);
   }
   
   public void reuse() {}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy