All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.TermDF Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository;

import java.util.ArrayList;
import java.util.HashMap;
import io.github.repir.tools.io.Datafile;
import io.github.repir.tools.io.struct.StructuredFileByteJumpTable;
import io.github.repir.Repository.TermDF.File;
import io.github.repir.tools.lib.Const;
import io.github.repir.tools.lib.Log;

/**
 * Stores the document frequency of terms in the Vocabulary, which can be accessed
 * by {@link #readValue(int)} using the termID. 
 * @author jer
 */
public class TermDF extends StoredUnreportableFeature implements DictionaryFeature {

   public static Log log = new Log(TermDF.class);
   int keyid = 0;
   public HashMap cache = new HashMap();

   private TermDF(Repository repository) {
      super(repository);
      readCache();
   }
   
   public static TermDF get(Repository repository) {
       String label = canonicalName(TermDF.class);
       TermDF termdf = (TermDF)repository.getStoredFeature(label);
       if (termdf == null) {
          termdf = new TermDF(repository);
          repository.storeFeature(label, termdf);
       }
       return termdf;
   }
   
   public void readCache() {
      ArrayList termids = repository.configuredIntList("repository.cachedtermids");
      if (termids.size() > 0) {
         ArrayList df = repository.configuredLongList("repository.cachedtermdfs");
         for (int i = 0; i < termids.size() && i < df.size(); i++) {
            if (df.get(i) != Const.NULLLONG) {
               cache.put(termids.get(i), df.get(i));
            }
         }
      }
   }

   public void writeCache() {
      ArrayList termids = repository.configuredIntList("repository.cachedtermids");
      for (Integer s : cache.keySet())
         if (!termids.contains(s))
            termids.add(s);
      if (termids.size() > 0) {
         ArrayList df = new ArrayList();
         NEXT:
         for (Integer i : termids) {
            Long v = cache.get(i);
            if (v == null)
               v = Const.NULLLONG;
            df.add(v);
         }
         repository.getConf().setIntList("repository.cachedtermids", termids);
         repository.getConf().setLongList("repository.cachedtermdfs", df);
      }
   }
   
   public void dontCache() {
      cache = null;
   }

   public void write(Long df) {
      file.df.write(df);
   }

   public long readValue(int id) {
      Long value;
      if (cache != null) {
         value = cache.get(id);
         if (value != null)
            return value;
      }
         
      if (file == null)
         openRead();
      file.read(id);
      value = getValue();
      if (cache != null)
         cache.put(id, value);
      return value;
   }
   
   public Long getValue() {
      return file.df.value;
   }

   public void loadMem() {
      openRead();
      file.loadMem();
   }

   public void unloadMem() {
      file.unloadMem();
   }

   @Override
   public File createFile(Datafile datafile) {
      return new File(datafile);
   }

   @Override
   public void reduceInput(int id, String term, long cf, long df) {
       write( df );
   }

   @Override
   public void startReduce(long corpustermfreq, int corpusdocumentfrequency) {
      openWrite();
   }

   @Override
   public void finishReduce() {
       closeWrite();
   }

   public static class File extends StructuredFileByteJumpTable {

      public CLongField df = this.addCLong("df");

      public File(Datafile df) {
         super(df);
      }
   }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy