All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.boreal.io.csv.encoding.RLSEncoder Maven / Gradle / Ivy

The newest version!
package fr.boreal.io.csv.encoding;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Comparator;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import fr.boreal.io.api.DataEncoder;
import fr.boreal.io.csv.CSVConstants;
import fr.boreal.io.csv.RLSCSVsParser;
import fr.boreal.io.csv.encoding.dictionary.DictionaryHandler;
import fr.boreal.model.logicalElements.api.Atom;

/**
 * @author Florent Tornil
 *
 *         Encodes with integers the given CSV files represented by the RLS
 *         configuration file
 * 
 *         The output is a triple with : - The RLS file linking to the encoded
 *         CSVs - The dictionary file - The repare file (if using an optimistic
 *         encoding)
 *
 */
public class RLSEncoder implements DataEncoder {

	static final Logger LOG = LoggerFactory.getLogger(RLSEncoder.class);

	/**
	 * Default threshold to use optimistic encoding
	 */
	public static final int DEFAULT_TRESHOLD = 1000;
	private static final Path encodingFolderPath = Path.of(System.getProperty("java.io.tmpdir"), "encoding",
			Long.toString(System.currentTimeMillis()));

	private final DictionaryHandler dictionaryHandler;
	private final File rlsFile;

	/**
	 * Constructor which sets default optimistic encoding
	 * 
	 */
	public RLSEncoder() {
		this(DEFAULT_TRESHOLD, CSVConstants.CSVSEPARATOR, CSVConstants.CSVPREFIX, CSVConstants.CSVHEADERSIZE);
	}

	/**
	 * Constructor which sets default optimistic encoding
	 * 
	 * @param separator  separator character of the CSV file
	 * @param prefix     prefix of the predicate name
	 * @param headerSize size of the header of the CSV file
	 */
	public RLSEncoder(char separator, String prefix, int headerSize) {
		this(DEFAULT_TRESHOLD, separator, prefix, headerSize);
	}

	/**
	 * Constructor with the whole dictionary handler object
	 * 
	 * @param dictionaryHandler encoding dictionary handler
	 * @param separator         separator character of the CSV file
	 * @param prefix            prefix of the predicate name
	 * @param headerSize        size of the header of the CSV file
	 */
	public RLSEncoder(DictionaryHandler dictionaryHandler, char separator, String prefix, int headerSize) {
		this.dictionaryHandler = dictionaryHandler;

		encodingFolderPath.toFile().mkdirs();
		this.rlsFile = Path.of(encodingFolderPath.toString(), "RLS.rls").toFile();
	}

	/**
	 * Constructor with only the threshold for optimistic encoding
	 * 
	 * Refer to DictionaryHandler for more information on the threshold
	 * 
	 * @param optimisticThreshold threshold for optimistic encoding
	 * @param separator           separator character of the CSV file
	 * @param prefix              prefix of the predicate name
	 * @param headerSize          size of the header of the CSV file
	 */
	public RLSEncoder(int optimisticThreshold, char separator, String prefix, int headerSize) {
		this(new DictionaryHandler(Path.of(encodingFolderPath.toString(), "dictionary.csv").toFile(),
				optimisticThreshold, Path.of(encodingFolderPath.toString(), "repare.csv").toFile()), separator, prefix,
				headerSize);
	}

	@Override
	public EncodedRLS encode(String input) {

		try (RLSCSVsParser rlsParser = new RLSCSVsParser(input, false)) {
			while (rlsParser.hasNext()) {
				Atom encodedAtom = this.dictionaryHandler.encode(rlsParser.next());
				File encodingFile = Path
						.of(encodingFolderPath.toString(), encodedAtom.getPredicate().toString() + ".csv").toFile();

				if (!encodingFile.exists()) {
					try (FileWriter rlsWriter = new FileWriter(this.rlsFile, true);) {
						rlsWriter.write("@source " + encodedAtom.getPredicate().toString() + "["
								+ encodedAtom.getPredicate().arity() + "]" + ": load-csv(\"" + encodingFile.getPath()
								+ "\") .\n");
					} catch (IOException e) {
						e.printStackTrace();
					}
				}

				try (FileWriter encodingWriter = new FileWriter(encodingFile, true);) {
					StringBuilder sb = new StringBuilder();
					for (int i = 0; i < encodedAtom.getPredicate().arity(); i++) {
						if (i != 0) {
							sb.append(",");
						}
						sb.append(encodedAtom.getTerm(i));
					}
					sb.append("\n");
					encodingWriter.write(sb.toString());
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}

		this.dictionaryHandler.flushOnDisc();
		return new EncodedRLS(this.rlsFile.getPath(), this.dictionaryHandler.getDictionaryFile().getPath(),
				this.dictionaryHandler.getRepareFile().getPath());
	}

	public void deleteAllTempFiles() {
		try {
			Files.walk(encodingFolderPath).sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(file -> {
                LOG.debug("Deleting file: {}", file.getPath());
				file.delete();
			});

		} catch (IOException e) {
			e.printStackTrace();
			LOG.error("An error occurred while trying to delete temporary files");
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy