All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.boreal.io.csv.encoding.dictionary.DictionaryHandler Maven / Gradle / Ivy

The newest version!
package fr.boreal.io.csv.encoding.dictionary;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import fr.boreal.model.logicalElements.api.Atom;
import fr.boreal.model.logicalElements.api.Term;
import fr.boreal.model.logicalElements.impl.AtomImpl;
import fr.boreal.model.logicalElements.impl.ConstantImpl;

/**
 * @author Florent Tornil
 *
 * Handles an encoding dictionary with parameterized criteria
 * 
 */
public class DictionaryHandler {

	private final File dictionaryFile;
	private final Map dictionary = new HashMap<>();

	private File repareFile;
	private int optimisticThreshold = -1;

	private int nextEncoding = 1;

	/**
	 * Constructor wit pessimistic encoding
	 * 
	 * @param dictionaryFile file to store the dictionary 
	 */
	public DictionaryHandler(File dictionaryFile) {
		this.dictionaryFile = dictionaryFile;
	}

	/**
	 * Constructor with optimistic encoding
	 * 
* If a term is of length > optimisticThreshold, it is encoded with a new encoding without checking nor storing it into the dictionary * * @param dictionaryFile file to store the dictionary * @param repairFile file to store the optimistic (possible) collisions * @param optimisticThreshold threshold at which to use optimistic encoding. Use -1 to disable it */ public DictionaryHandler(File dictionaryFile, int optimisticThreshold, File repairFile) { this.dictionaryFile = dictionaryFile; this.optimisticThreshold = optimisticThreshold; this.repareFile = repairFile; } /** * Encodes the given Atom * * @param initial the initial Atom * @return the encoded Atom */ public Atom encode(Atom initial) { List encodedTerms = new ArrayList<>(initial.getPredicate().arity()); for (int i = 0; i < initial.getTerms().length; i++) { String label = initial.getTerm(i).label(); String encoding; if(this.optimisticThreshold > -1 && label.length() > this.optimisticThreshold) { encoding = Integer.toString(this.nextEncoding); this.nextEncoding++; try(FileWriter repairWriter = new FileWriter(this.repareFile, true)) { repairWriter.write(label.length() + "," + label + "," + encoding + "," + initial.getPredicate().label() + "," + i + "," + initial.getPredicate().arity() + "\n"); } catch (IOException e) { e.printStackTrace(); } this.storeDirectlyOnDisc(label, encoding); } else { encoding = this.dictionary.get(label); if(encoding == null) { encoding = Integer.toString(this.nextEncoding); this.dictionary.put(label, encoding); this.nextEncoding++; } } encodedTerms.add(new ConstantImpl(encoding)); } return new AtomImpl(initial.getPredicate(), encodedTerms); } /** * Writes the content of this dictionary on the disk */ public void flushOnDisc() { try(FileWriter dictionaryWriter = new FileWriter(this.dictionaryFile, true)) { for(Entry entry : this.dictionary.entrySet()) { dictionaryWriter.write(entry.getKey() + ",C," + entry.getValue() + "\n"); } } catch (IOException e) { e.printStackTrace(); } } /** * @return the dictionaryFile */ public File getDictionaryFile() { return dictionaryFile; } /** * @return the repairFile */ public File getRepareFile() { return repareFile; } /** * Writes the encoding directly on the disk * @param label key of the encoding * @param encoding value of the encoding */ private void storeDirectlyOnDisc(String label, String encoding) { try(FileWriter dictionaryWriter = new FileWriter(this.dictionaryFile, true)) { dictionaryWriter.write(label + ",C," + encoding + "\n"); } catch (IOException e) { e.printStackTrace(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy