Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.openprovenance.prov.interop.Inputer Maven / Gradle / Ivy
package org.openprovenance.prov.interop;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.openprovenance.prov.model.DateTimeOption;
import org.openprovenance.prov.model.Document;
import org.openprovenance.prov.model.ProvFactory;
import org.openprovenance.prov.notation.ProvDeserialiser;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import static org.openprovenance.prov.interop.Formats.ProvFormat.*;
public class Inputer implements InteropMediaType {
static Logger logger = LogManager.getLogger(Inputer.class);
private final InteropFramework interopFramework;
static String SEPARATOR = ",";
private final ProvFactory pFactory;
final Map deserializerMap;
final Map deserializerMap2;
List preferredOrder = List.of(PROVN, JSONLD, JSON, PROVX);
public Inputer(InteropFramework interopFramework, ProvFactory pFactory) {
this.interopFramework = interopFramework;
this.pFactory = pFactory;
this.deserializerMap = createDeserializerMap();
deserializerMap2 = createDeserializerMap2();
}
private Map createDeserializerMap() {
//NOTE: Syntax restricted to 10 entries
Map deserializer = new HashMap();
deserializer.putAll(
Map.of(PROVN, () -> new ProvDeserialiser(pFactory, interopFramework.getConfig().dateTime, interopFramework.getConfig().timeZone),
PROVX, () -> new org.openprovenance.prov.core.xml.serialization.ProvDeserialiser(interopFramework.getConfig().dateTime, interopFramework.getConfig().timeZone),
JSONLD, () -> new org.openprovenance.prov.core.jsonld11.serialization.ProvDeserialiser(new ObjectMapper(), interopFramework.getConfig().dateTime, interopFramework.getConfig().timeZone),
JSON, () -> new org.openprovenance.prov.core.json.serialization.ProvDeserialiser(new ObjectMapper(), interopFramework.getConfig().dateTime, interopFramework.getConfig().timeZone))
);
return deserializer;
}
final public Map createDeserializerMap2() {
//NOTE: Syntax restricted to 10 entries
Map deserializer = new HashMap();
deserializer.putAll(
Map.of(PROVN, (DateTimeOption dateTime, TimeZone timeZone) -> new ProvDeserialiser(pFactory, dateTime, timeZone),
PROVX, (DateTimeOption dateTime, TimeZone timeZone) -> new org.openprovenance.prov.core.xml.serialization.ProvDeserialiser(dateTime, timeZone),
JSONLD, (DateTimeOption dateTime, TimeZone timeZone) -> new org.openprovenance.prov.core.jsonld11.serialization.ProvDeserialiser(new ObjectMapper(), dateTime, timeZone),
JSON, (DateTimeOption dateTime, TimeZone timeZone) -> new org.openprovenance.prov.core.json.serialization.ProvDeserialiser(new ObjectMapper(), dateTime, timeZone))
);
return deserializer;
}
Document deserialiseDocument(InputStream is, Formats.ProvFormat format) throws IOException {
DeserializerFunction deserializer = interopFramework.getDeserializerMap().get(format);
logger.debug("deserializer " + format + " " + deserializer);
return deserializer.apply().deserialiseDocument(is);
}
Document deserialiseDocument(InputStream is, Formats.ProvFormat format, DateTimeOption dateTimeOption, TimeZone timeZone) throws IOException {
DeserializerFunction2 deserializer = interopFramework.getDeserializerMap2().get(format);
logger.debug("deserializer " + format + " " + deserializer);
return deserializer.apply(dateTimeOption, timeZone).deserialiseDocument(is);
}
Document readDocument(String filename, String format) throws IOException {
Document doc;
Formats.ProvFormat informat;
if (format != null) {
informat = interopFramework.getTypeForFormat(format);
if (informat == null) {
throw new InteropException("Unknown format: " + format);
}
} else {
informat = interopFramework.getTypeForFile(filename);
if (informat == null) {
throw new InteropException("Unknown file format for: " + filename);
}
}
if (Objects.equals(filename, "-")) {
if (informat == null) {
throw new InteropException("File format for standard input not specified");
}
doc = deserialiseDocument(System.in, informat);
} else {
doc = deserialiseDocument(Files.newInputStream(Paths.get(filename)), informat);
}
return doc;
}
Document readDocument(ToRead something) throws IOException {
Document doc = null;
switch (something.kind) {
case FILE:
//doc=readDocumentFromFile(something.url, something.format);
doc = deserialiseDocument(Files.newInputStream(Paths.get(something.url)), something.format);
break;
case URL:
doc = readDocumentFromURL(something.url);// note: ignore format?
break;
}
return doc;
}
Document readDocumentFromFile(String filename, DateTimeOption dateTimeOption, TimeZone timeZone) {
Formats.ProvFormat format = interopFramework.getTypeForFile(filename);
if (format == null) {
throw new InteropException("Unknown output file format: " + filename);
}
try {
return deserialiseDocument(Files.newInputStream(Paths.get(filename)), format, dateTimeOption, timeZone);
} catch (IOException e) {
throw new InteropException(e);
}
}
Document readDocumentFromFile(String filename) {
Formats.ProvFormat format = interopFramework.getTypeForFile(filename);
if (format == null) {
throw new InteropException("Unknown output file format: " + filename);
}
try {
return deserialiseDocument(Files.newInputStream(Paths.get(filename)), format);
} catch (IOException e) {
throw new InteropException(e);
}
}
Document readDocumentFromFileWithUnknownType(String filename) {
for (Formats.ProvFormat format : preferredOrder) {
try {
return interopFramework.getDeserializerMap().get(format).apply().deserialiseDocument(Files.newInputStream(Paths.get(filename)));
} catch (IOException ignored) {
// we fail, let's continue with the next one
}
}
System.out.println("Unparseable format " + filename);
throw new UnsupportedOperationException("Unparseable format " + filename);
}
Document readDocumentFromFileWithUnknownType(String filename, DateTimeOption dateTimeOption, TimeZone timeZone) {
for (Formats.ProvFormat format : preferredOrder) {
try {
return interopFramework.getDeserializerMap2().get(format).apply(dateTimeOption, timeZone).deserialiseDocument(Files.newInputStream(Paths.get(filename)));
} catch (IOException ignored) {
// we fail, let's continue with the next one
}
}
System.out.println("Unparseable format " + filename);
throw new UnsupportedOperationException("Unparseable format " + filename);
}
/**
* Reads a document from a URL. Uses the Content-type header field to determine the
* mime-type of the resource, and therefore the parser to read the document.
*
* @param url a URL
* @return a Document
*/
Document readDocumentFromURL(String url) {
try {
URL theURL = new URL(url);
URLConnection conn = interopFramework.connectWithRedirect(theURL);
if (conn == null)
return null;
Formats.ProvFormat format = null;
String content_type = conn.getContentType();
logger.debug("Content-type: " + content_type);
if (content_type != null) {
// Need to trim optional parameters
// Content-Type: text/plain; charset=UTF-8
int end = content_type.indexOf(";");
if (end < 0) {
end = content_type.length();
}
String actual_content_type = content_type.substring(0, end).trim();
logger.debug("Found Content-type: " + actual_content_type);
// TODO: might be worth skipping if text/plain as that seems
// to be the
// default returned by unconfigured web servers
format = interopFramework.getMimeTypeRevMap().get(actual_content_type);
}
logger.debug("Format after Content-type: " + format);
if (format == null) {
format = interopFramework.getTypeForFile(theURL.toString());
}
logger.debug("Format after extension: " + format);
InputStream content_stream = conn.getInputStream();
return deserialiseDocument(content_stream, format);
} catch (IOException e) {
throw new InteropException(e);
}
}
List readIndexFile(File fin) throws IOException {
FileInputStream fis = new FileInputStream(fin);
return readIndexFile(fis);
}
List readIndexFile(InputStream is) throws IOException {
List res = new LinkedList();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line;
while ((line = br.readLine()) != null) {
String[] parts = line.split(SEPARATOR);
if (parts.length >= 3) {
FileKind kind = parts[0].trim().equals("URL") ? FileKind.URL : FileKind.FILE;
String path = parts[1].trim();
Formats.ProvFormat format = interopFramework.getTypeForFormat(parts[2].trim());
ToRead elem = new ToRead(kind, path, format);
res.add(elem);
} else if (parts.length == 1) {
String filename = parts[0].trim();
ToRead elem = new ToRead(FileKind.FILE, filename, interopFramework.getTypeForFile(filename));
res.add(elem);
}
}
br.close();
return res;
}
enum FileKind {FILE, URL}
static class ToRead {
FileKind kind;
String url;
Formats.ProvFormat format;
public String toString() {
return "[" + kind + "," + url + "," + format + "]";
}
ToRead(FileKind kind, String url, Formats.ProvFormat format) {
this.kind = kind;
this.url = url;
this.format = format;
}
}
}