fr.boreal.io.csv.encoding.RLSEncoder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of integraal-io Show documentation
Show all versions of integraal-io Show documentation
Inputs and Outputs for integraal objects
The newest version!
package fr.boreal.io.csv.encoding;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Comparator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import fr.boreal.io.api.DataEncoder;
import fr.boreal.io.csv.CSVConstants;
import fr.boreal.io.csv.RLSCSVsParser;
import fr.boreal.io.csv.encoding.dictionary.DictionaryHandler;
import fr.boreal.model.logicalElements.api.Atom;
/**
* @author Florent Tornil
*
* Encodes with integers the given CSV files represented by the RLS
* configuration file
*
* The output is a triple with : - The RLS file linking to the encoded
* CSVs - The dictionary file - The repare file (if using an optimistic
* encoding)
*
*/
public class RLSEncoder implements DataEncoder {
static final Logger LOG = LoggerFactory.getLogger(RLSEncoder.class);
/**
* Default threshold to use optimistic encoding
*/
public static final int DEFAULT_TRESHOLD = 1000;
private static final Path encodingFolderPath = Path.of(System.getProperty("java.io.tmpdir"), "encoding",
Long.toString(System.currentTimeMillis()));
private final DictionaryHandler dictionaryHandler;
private final File rlsFile;
/**
* Constructor which sets default optimistic encoding
*
*/
public RLSEncoder() {
this(DEFAULT_TRESHOLD, CSVConstants.CSVSEPARATOR, CSVConstants.CSVPREFIX, CSVConstants.CSVHEADERSIZE);
}
/**
* Constructor which sets default optimistic encoding
*
* @param separator separator character of the CSV file
* @param prefix prefix of the predicate name
* @param headerSize size of the header of the CSV file
*/
public RLSEncoder(char separator, String prefix, int headerSize) {
this(DEFAULT_TRESHOLD, separator, prefix, headerSize);
}
/**
* Constructor with the whole dictionary handler object
*
* @param dictionaryHandler encoding dictionary handler
* @param separator separator character of the CSV file
* @param prefix prefix of the predicate name
* @param headerSize size of the header of the CSV file
*/
public RLSEncoder(DictionaryHandler dictionaryHandler, char separator, String prefix, int headerSize) {
this.dictionaryHandler = dictionaryHandler;
encodingFolderPath.toFile().mkdirs();
this.rlsFile = Path.of(encodingFolderPath.toString(), "RLS.rls").toFile();
}
/**
* Constructor with only the threshold for optimistic encoding
*
* Refer to DictionaryHandler for more information on the threshold
*
* @param optimisticThreshold threshold for optimistic encoding
* @param separator separator character of the CSV file
* @param prefix prefix of the predicate name
* @param headerSize size of the header of the CSV file
*/
public RLSEncoder(int optimisticThreshold, char separator, String prefix, int headerSize) {
this(new DictionaryHandler(Path.of(encodingFolderPath.toString(), "dictionary.csv").toFile(),
optimisticThreshold, Path.of(encodingFolderPath.toString(), "repare.csv").toFile()), separator, prefix,
headerSize);
}
@Override
public EncodedRLS encode(String input) {
try (RLSCSVsParser rlsParser = new RLSCSVsParser(input, false)) {
while (rlsParser.hasNext()) {
Atom encodedAtom = this.dictionaryHandler.encode(rlsParser.next());
File encodingFile = Path
.of(encodingFolderPath.toString(), encodedAtom.getPredicate().toString() + ".csv").toFile();
if (!encodingFile.exists()) {
try (FileWriter rlsWriter = new FileWriter(this.rlsFile, true);) {
rlsWriter.write("@source " + encodedAtom.getPredicate().toString() + "["
+ encodedAtom.getPredicate().arity() + "]" + ": load-csv(\"" + encodingFile.getPath()
+ "\") .\n");
} catch (IOException e) {
e.printStackTrace();
}
}
try (FileWriter encodingWriter = new FileWriter(encodingFile, true);) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < encodedAtom.getPredicate().arity(); i++) {
if (i != 0) {
sb.append(",");
}
sb.append(encodedAtom.getTerm(i));
}
sb.append("\n");
encodingWriter.write(sb.toString());
} catch (IOException e) {
e.printStackTrace();
}
}
}
this.dictionaryHandler.flushOnDisc();
return new EncodedRLS(this.rlsFile.getPath(), this.dictionaryHandler.getDictionaryFile().getPath(),
this.dictionaryHandler.getRepareFile().getPath());
}
public void deleteAllTempFiles() {
try {
Files.walk(encodingFolderPath).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(file -> {
LOG.debug("Deleting file: {}", file.getPath());
file.delete();
});
} catch (IOException e) {
e.printStackTrace();
LOG.error("An error occurred while trying to delete temporary files");
}
}
}