org.nlp2rdf.parser.NIFParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nif Show documentation
Show all versions of nif Show documentation
A small library for NLP Interchange Format (NIF) — Edit
package org.nlp2rdf.parser;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.vocabulary.RDF;
import org.nlp2rdf.nif20.NIF20Format;
import java.io.ByteArrayInputStream;
import java.util.*;
public class NIFParser {
private String nif;
public NIFParser(String nif) {
this.nif = nif;
}
// authors: Milan Dojchinovski [email protected], Nilesh Chakraborty [email protected]
// http://dojchinovski.mk
public static Document getDocumentFromNIFString(String nifString) {
ArrayList list = new ArrayList<>();
Model model = ModelFactory.createDefaultModel();
model.read(new ByteArrayInputStream(nifString.getBytes()), null, "TTL");
StmtIterator iter = model.listStatements(null, RDF.type, model.getResource(NIF20Format.NIF_PROPERTY_PHRASE));
while (iter.hasNext()) {
Statement stm = iter.nextStatement();
Resource entityRes = stm.getSubject().asResource();
String mention = entityRes.getProperty(model.getProperty(NIF20Format.NIF_PROPERTY_ANCHOR_OF)).getObject().asLiteral().getString();
String referenceContext = entityRes.getProperty(model.getProperty(NIF20Format.NIF_PROPERTY_REFERENCE_CONTEXT)).getObject().toString();
int beginIndex = entityRes.getProperty(model.getProperty(NIF20Format.NIF_PROPERTY_BEGININDEX)).getObject().asLiteral().getInt();
int endIndex = entityRes.getProperty(model.getProperty(NIF20Format.NIF_PROPERTY_ENDINDEX)).getObject().asLiteral().getInt();
EntityMention em = new EntityMention();
em.setMention(mention);
em.setBeginIndex(beginIndex);
em.setEndIndex(endIndex);
em.setContext(stm.getSubject().getNameSpace());
em.setReferenceContext(referenceContext);
list.add(em);
}
iter = model.listStatements(null, RDF.type, model.getResource(NIF20Format.NIF_PROPERTY_CONTEXT));
Statement stm = iter.nextStatement();
Resource contextRes = stm.getSubject().asResource();
String text = contextRes.getProperty(model.getProperty(NIF20Format.NIF_PROPERTY_ISSTRING)).getObject().asLiteral().getString();
return new Document(list, text);
}
private Model init() {
Model model = ModelFactory.createDefaultModel();
model.read(new ByteArrayInputStream(nif.getBytes()), null, "TTL");
return model;
}
public Map getPrefixes() {
Model model = init();
Map result = new HashMap<>();
if (model != null) {
result = model.getNsPrefixMap();
}
return result;
}
private List getStatements() {
Model model = init();
List result = new ArrayList<>();
StmtIterator iter = model.listStatements();
while (iter.hasNext()) {
Statement statement = iter.nextStatement();
result.add(statement);
}
return result;
}
private void mergePrefixes(Model model) {
if (model != null) {
Map map = getPrefixes();
if (map != null) {
Iterator it = map.keySet().iterator();
while (it.hasNext()) {
String key = it.next();
model.setNsPrefix(key, map.get(key));
}
}
}
}
private void mergeStatements(Model model) {
if (model != null) {
model.add(getStatements());
}
}
public Model merge(Model model) {
mergePrefixes(model);
mergeStatements(model);
return model;
}
}