All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.TermCF Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import io.github.repir.tools.io.Datafile;
import io.github.repir.tools.io.struct.StructuredFileByteJumpTable;
import io.github.repir.tools.io.struct.StructuredDataStream;
import io.github.repir.Repository.TermCF.File;
import io.github.repir.tools.lib.Const;
import io.github.repir.tools.lib.Log;

/**
 * Stores the collection frequency of terms, which can be accessed
 * by {@link #readValue(int)} using the termID. 
 * @author jer
 */
public class TermCF extends StoredUnreportableFeature implements DictionaryFeature {

   public static Log log = new Log(TermCF.class);
   public HashMap cache = new HashMap();

   private TermCF(Repository repository) {
      super(repository);
      readCache();
   }

   public static TermCF get(Repository repository) {
       String label = canonicalName(TermCF.class);
       TermCF termcf = (TermCF)repository.getStoredFeature(label);
       if (termcf == null) {
          termcf = new TermCF(repository);
          repository.storeFeature(label, termcf);
       }
       return termcf;
   }
   
   public void readCache() {
      ArrayList termids = repository.configuredIntList("repository.cachedtermids");
      if (termids.size() > 0) {
         ArrayList cf = repository.configuredLongList("repository.cachedtermtfs");
         for (int i = 0; i < termids.size() && i < cf.size(); i++) {
            if (cf.get(i) != Const.NULLLONG) {
               cache.put(termids.get(i), cf.get(i));
            }
         }
      }
   }

   public void writeCache() {
      ArrayList termids = repository.configuredIntList("repository.cachedtermids");
      for (Integer s : cache.keySet())
         if (!termids.contains(s))
            termids.add(s);
      if (termids.size() > 0) {
         ArrayList cf = new ArrayList();
         NEXT:
         for (Integer i : termids) {
            Long v = cache.get(i);
            if (v == null)
               v = Const.NULLLONG;
            cf.add(v);
         }
         repository.getConf().setIntList("repository.cachedtermids", termids);
         repository.getConf().setLongList("repository.cachedtermtfs", cf);
      }
   }
   
   public void dontCache() {
      cache = null;
   }

   public void loadMem() {
      openRead();
      file.loadMem();
   }

   public void unloadMem() {
      getFile().unloadMem();
   }

   @Override
   public File createFile(Datafile datafile) {
      return new File(datafile);
   }

   public Long getValue() {
      return file.cf.value;
   }

   public long readValue(int id) {
      Long value;
      if (cache != null) {
         value = cache.get(id);
         if (value != null)
            return value;
      }
      getFile().read(id);
      value = getValue();
      if (cache != null)
         cache.put(id, value);
      return value;
   }
   
   @Override
   public void reduceInput(int id, String term, long cf, long df) {
      file.cf.write(cf);
   }

   @Override
   public void startReduce(long corpustermfreq, int corpusdocumentfrequency) {
      openWrite();
   }

   @Override
   public void finishReduce() {
      closeWrite();
   }

   public static class File extends StructuredFileByteJumpTable {

      public StructuredDataStream.CLongField cf = this.addCLong("cf");

      public File(Datafile df) {
         super(df);
      }
   }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy