fr.boreal.io.csv.encoding.dictionary.DictionaryHandler Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of integraal-io Show documentation
Show all versions of integraal-io Show documentation
Inputs and Outputs for integraal objects
The newest version!
package fr.boreal.io.csv.encoding.dictionary;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import fr.boreal.model.logicalElements.api.Atom;
import fr.boreal.model.logicalElements.api.Term;
import fr.boreal.model.logicalElements.impl.AtomImpl;
import fr.boreal.model.logicalElements.impl.ConstantImpl;
/**
* @author Florent Tornil
*
* Handles an encoding dictionary with parameterized criteria
*
*/
public class DictionaryHandler {
private final File dictionaryFile;
private final Map dictionary = new HashMap<>();
private File repareFile;
private int optimisticThreshold = -1;
private int nextEncoding = 1;
/**
* Constructor wit pessimistic encoding
*
* @param dictionaryFile file to store the dictionary
*/
public DictionaryHandler(File dictionaryFile) {
this.dictionaryFile = dictionaryFile;
}
/**
* Constructor with optimistic encoding
*
* If a term is of length > optimisticThreshold, it is encoded with a new encoding without checking nor storing it into the dictionary
*
* @param dictionaryFile file to store the dictionary
* @param repairFile file to store the optimistic (possible) collisions
* @param optimisticThreshold threshold at which to use optimistic encoding. Use -1 to disable it
*/
public DictionaryHandler(File dictionaryFile, int optimisticThreshold, File repairFile) {
this.dictionaryFile = dictionaryFile;
this.optimisticThreshold = optimisticThreshold;
this.repareFile = repairFile;
}
/**
* Encodes the given Atom
*
* @param initial the initial Atom
* @return the encoded Atom
*/
public Atom encode(Atom initial) {
List encodedTerms = new ArrayList<>(initial.getPredicate().arity());
for (int i = 0; i < initial.getTerms().length; i++) {
String label = initial.getTerm(i).label();
String encoding;
if(this.optimisticThreshold > -1 && label.length() > this.optimisticThreshold) {
encoding = Integer.toString(this.nextEncoding);
this.nextEncoding++;
try(FileWriter repairWriter = new FileWriter(this.repareFile, true)) {
repairWriter.write(label.length() + "," +
label + "," +
encoding + "," +
initial.getPredicate().label() + "," +
i + "," +
initial.getPredicate().arity() + "\n");
} catch (IOException e) {
e.printStackTrace();
}
this.storeDirectlyOnDisc(label, encoding);
} else {
encoding = this.dictionary.get(label);
if(encoding == null) {
encoding = Integer.toString(this.nextEncoding);
this.dictionary.put(label, encoding);
this.nextEncoding++;
}
}
encodedTerms.add(new ConstantImpl(encoding));
}
return new AtomImpl(initial.getPredicate(), encodedTerms);
}
/**
* Writes the content of this dictionary on the disk
*/
public void flushOnDisc() {
try(FileWriter dictionaryWriter = new FileWriter(this.dictionaryFile, true)) {
for(Entry entry : this.dictionary.entrySet()) {
dictionaryWriter.write(entry.getKey() + ",C," + entry.getValue() + "\n");
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @return the dictionaryFile
*/
public File getDictionaryFile() {
return dictionaryFile;
}
/**
* @return the repairFile
*/
public File getRepareFile() {
return repareFile;
}
/**
* Writes the encoding directly on the disk
* @param label key of the encoding
* @param encoding value of the encoding
*/
private void storeDirectlyOnDisc(String label, String encoding) {
try(FileWriter dictionaryWriter = new FileWriter(this.dictionaryFile, true)) {
dictionaryWriter.write(label + ",C," + encoding + "\n");
} catch (IOException e) {
e.printStackTrace();
}
}
}