All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.boreal.io.csv.CSVParser Maven / Gradle / Ivy

The newest version!
package fr.boreal.io.csv;

import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;

import fr.boreal.io.api.Parser;
import fr.boreal.io.dlgp.ParserResult;
import fr.boreal.model.logicalElements.api.Atom;
import fr.boreal.model.logicalElements.api.Predicate;
import fr.boreal.model.logicalElements.api.Term;
import fr.boreal.model.logicalElements.factory.api.PredicateFactory;
import fr.boreal.model.logicalElements.factory.api.TermFactory;
import fr.boreal.model.logicalElements.factory.impl.SameObjectPredicateFactory;
import fr.boreal.model.logicalElements.factory.impl.SameObjectTermFactory;
import fr.boreal.model.logicalElements.impl.AtomImpl;
import fr.lirmm.boreal.util.stream.ArrayBlockingStream;

/**
 * @author Florent Tornil
 * 

* This class parses a single CSV file into atoms. *

* Each line of the file represents an atom and every atom of the file * have the same predicate either given to the constructor or deduced * from the file name. * * Please note that all the terms are seen as constants. */ public class CSVParser implements Parser, AutoCloseable { private final ArrayBlockingStream buffer = new ArrayBlockingStream<>(512); private static final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor(); /** * Parses the given CSV file using default values * @param filePath path of the csv file to parse */ public CSVParser(String filePath) { this(new File(filePath), CSVConstants.CSVSEPARATOR, CSVConstants.CSVPREFIX, CSVConstants.CSVHEADERSIZE); } /** * Parses the given CSV file using the given parsing arguments * Uses the filename as predicate label * @param file csv file to parse * @param separator csv separator * @param prefix (rdf) prefix * @param headerSize size of the csv header */ public CSVParser(File file, char separator, String prefix, int headerSize) { executor.submit(new Producer(file, buffer, separator, prefix, headerSize)); } /** * Parses the given CSV file using the given predicate to create atoms * @param predicateName label of the predicate * @param arity arity of the predicate * @param file csv file to parse * @param separator csv separator * @param prefix (rdf) prefix * @param headerSize size of the csv header */ public CSVParser(String predicateName, int arity, File file, char separator, String prefix, int headerSize) { new Thread(new Producer(predicateName, arity, file, buffer, separator, prefix, headerSize)).start(); } /** * Parses the given CSV file using the given predicate to create atoms * @param predicateName label of the predicate * @param arity arity of the predicate * @param file csv file to parse * */ public CSVParser(String predicateName, int arity, File file) { new Thread(new Producer(predicateName, arity, file, buffer, CSVConstants.CSVSEPARATOR, CSVConstants.CSVPREFIX, CSVConstants.CSVHEADERSIZE)).start(); } @Override public boolean hasNext() { return buffer.hasNext(); } @Override public Atom next() { return buffer.next(); } @Override public void close() { this.buffer.close(); executor.shutdownNow(); } @Override public ParserResult parse() { Collection atoms = new ArrayList<>(); while(this.hasNext()) { atoms.add(this.next()); } return new ParserResult(atoms, List.of(), List.of(), List.of()); } // // Private class Producer // static class Producer implements Runnable { private final PredicateFactory pf = SameObjectPredicateFactory.instance(); private final TermFactory tf = SameObjectTermFactory.instance(); private final File file; private final ArrayBlockingStream buffer; private final char separator; private final String prefix; private final int headerSize; private Predicate predicate = null; public Producer(File file, ArrayBlockingStream buffer, char separator, String prefix, int headerSize) { this.file = file; this.buffer = buffer; this.separator = separator; this.prefix = prefix; this.headerSize = headerSize; } public Producer(String predicateName, int arity, File file, ArrayBlockingStream buffer, char separator, String prefix, int headerSize) { this(file, buffer, separator, prefix, headerSize); this.predicate = this.pf.createOrGetPredicate(predicateName, arity); } @Override public void run() { com.opencsv.CSVParser csvParser = new CSVParserBuilder().withSeparator(this.separator).build(); try (CSVReader csvReader = new CSVReaderBuilder(new FileReader(this.file)) .withCSVParser(csvParser) // custom CSV parser .withSkipLines(this.headerSize) // skip the headerSize first line, header info .build()) { while (true) { String[] fileContentLine = csvReader.readNext(); if (fileContentLine == null) { break; } if (this.predicate == null) { // If the predicate isn't known, we deduce it from the csv filename and first // line size int arity = fileContentLine.length; String predicateName = prefix + this.file.getName().split("\\.")[0].toLowerCase(); this.predicate = this.pf.createOrGetPredicate(predicateName, arity); } List terms = new ArrayList<>(this.predicate.arity()); for (String value : fileContentLine) { Term t = this.tf.createOrGetConstant(value); terms.add(t); } Atom a = new AtomImpl(this.predicate, terms); this.buffer.write(a); } this.buffer.close(); } catch (Exception e) { e.printStackTrace(); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy