
eu.fbk.twm.utils.DBpediaOntology Maven / Gradle / Ivy
The newest version!
package eu.fbk.twm.utils;
import java.io.File;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.w3c.dom.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
public class DBpediaOntology {
HashMap singleDomainOntology = new HashMap();
HashMap singleRangeOntology = new HashMap();
HashMap> completeOntology = new HashMap>();
HashSet nodes = new HashSet();
HashMap> properties = new HashMap>();
HashMap indexedNodes = new HashMap();
private boolean lowerProp = false;
public boolean isLowerProp() {
return lowerProp;
}
public void setLowerProp(boolean lowerProp) {
this.lowerProp = lowerProp;
}
HashMap depths = new HashMap<>();
boolean loadedDephts = false;
public static Pattern genericDBpediaPattern = Pattern.compile("^http....?.?.?dbpedia.org/[a-z0-9_-]+/(.*)$");
public static Pattern foafPattern = Pattern.compile("^http....?.?.?xmlns.com/foaf/[0-9\\.]+/(.*)$");
String ontologyFile;
public HashMap getProperty(String propName) {
return properties.get(propName);
}
public HashMap> getProperties() {
return properties;
}
public HashSet getNodes() {
return nodes;
}
public boolean isLoadedDephts() {
return loadedDephts;
}
public int getDepth(String c) {
if (!loadedDephts) {
loadDepths();
}
return depths.get(c);
}
public void loadDepths() {
for (DBpediaOntologyNode n : nodes) {
if (n.className == null) {
continue;
}
depths.put(n.className, getHistoryFromName(n.className).size());
}
loadedDephts = true;
}
// public int compareDephts(String c1, String c2) {
// if (!loadedDephts) {
// loadDepths();
// }
// int dim1 = depths.get(c1);
// }
public HashSet completeClasses(HashSet tmpClasses) {
HashSet classes = new HashSet<>();
if (tmpClasses != null) {
for (String c : tmpClasses) {
String[] parts = c.split("/");
for (String s : parts) {
List nodes = getHistoryFromName(s);
if (nodes == null) {
// logger.trace(String.format("Error in class %s", s));
continue;
}
for (DBpediaOntologyNode n : nodes) {
classes.add(n.className);
}
}
}
}
return classes;
}
protected void removeNode(DBpediaOntologyNode node) {
for (DBpediaOntologyNode child : node.children) {
removeNode(child);
}
nodes.remove(node);
}
private ArrayList getHistoryFromNode(DBpediaOntologyNode node, String stopClass, int limit) {
ArrayList ret = new ArrayList();
if (limit > 10) {
return null;
}
if (node != null && (stopClass == null || (stopClass != null && !node.className.equals(stopClass)))) {
ret.add(node);
ret.addAll(getHistoryFromNode(node.parent, stopClass, limit - 1));
}
return ret;
}
public static Integer isInside(ArrayList first, ArrayList second) {
// Lists must be reversed!
int min = Math.min(first.size(), second.size());
if (first.get(min - 1).equals(second.get(min - 1))) {
if (second.size() > first.size()) {
return 1;
}
else {
return -1;
}
}
return 0;
}
public ArrayList getHistoryFromName(String name) {
return getHistoryFromName(name, null, 0);
}
public ArrayList getHistoryFromName(String name, String stopClass) {
return getHistoryFromName(name, stopClass, 0);
}
private ArrayList getHistoryFromName(String name, String stopClass, int limit) {
DBpediaOntologyNode thisNode = getNodeByName(name);
if (thisNode == null) {
return null;
}
return getHistoryFromNode(thisNode, stopClass, limit);
}
public DBpediaOntologyNode getNodeByName(String name) {
for (DBpediaOntologyNode node : nodes) {
if (node.className.equals(name)) {
return node;
}
for (String altName : node.equivalentClasses) {
if (node.className.equals(altName)) {
return node;
}
}
}
return null;
}
public HashSet getRootNodes() {
HashSet ret = new HashSet();
for (DBpediaOntologyNode node : nodes) {
if (node.superClass == null) {
ret.add(node);
}
}
return ret;
}
public HashSet getLeafNodes() {
HashSet ret = new HashSet();
for (DBpediaOntologyNode node : nodes) {
if (node.children.size() == 0) {
ret.add(node);
}
}
return ret;
}
public static String cleanName(String s, String toBeFound) {
if (s == null) {
return null;
}
int j = s.lastIndexOf(toBeFound);
if (j != -1) {
s = s.substring(j + toBeFound.length());
}
else {
s = null;
}
return s;
}
public static String cleanName(String s) {
return cleanName(s, "http://dbpedia.org/ontology/");
}
public static String cleanGenericName(String s) {
Matcher m;
m = genericDBpediaPattern.matcher(s);
if (m.find()) {
return m.group(1);
}
m = foafPattern.matcher(s);
if (m.find()) {
return "foaf:" + m.group(1);
}
return s;
}
public DBpediaOntology(String ontologyFile) {
this(ontologyFile, false);
}
public DBpediaOntology(String ontologyFile, boolean lower) {
this.ontologyFile = ontologyFile;
this.lowerProp = lower;
File d2 = new File(ontologyFile);
if (!d2.exists()) {
System.err.println("Ontology file does not exist");
System.exit(1);
}
readOntologyType();
}
public void readOntologyType() {
try {
File fXmlFile = new File(ontologyFile);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();
// Properties
NodeList ndProperties = doc.getElementsByTagName("owl:DatatypeProperty");
for (int temp = 0; temp < ndProperties.getLength(); temp++) {
Node nNode = ndProperties.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
String propertyName = eElement.getAttribute("rdf:about");
propertyName = cleanName(propertyName);
if (propertyName == null) {
continue;
}
if (propertyName.contains("/")) {
continue;
}
// propertyName = FuzzyTokenizer.tokenizeToString(propertyName);
String domain = null, range = null;
NodeList list = nNode.getChildNodes();
if (list.getLength() > 0) {
for (int i = 0; i < list.getLength(); i++) {
Node subNode = list.item(i);
String nodeName = subNode.getNodeName();
if (nodeName.equals("rdfs:domain")) {
domain = ((Attr) subNode.getAttributes().getNamedItem("rdf:resource")).getValue();
}
if (nodeName.equals("rdfs:range")) {
range = ((Attr) subNode.getAttributes().getNamedItem("rdf:resource")).getValue();
}
}
}
// Domain must be in dbpedia
String simpleDomain = cleanName(domain);
String simpleRange = cleanName(range, "http://www.w3.org/2001/XMLSchema#");
// if (simpleRange == null || simpleDomain == null) {
// continue;
// }
if (lowerProp) {
propertyName = propertyName.toLowerCase();
}
properties.put(propertyName, new HashMap());
properties.get(propertyName).put("name", propertyName);
properties.get(propertyName).put("domain", simpleDomain);
properties.get(propertyName).put("range", simpleRange);
properties.get(propertyName).put("type", "data");
}
}
NodeList noProperties = doc.getElementsByTagName("owl:ObjectProperty");
for (int temp = 0; temp < noProperties.getLength(); temp++) {
Node nNode = noProperties.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
String propertyName = eElement.getAttribute("rdf:about");
propertyName = cleanName(propertyName);
if (propertyName == null) {
continue;
}
if (propertyName.contains("/")) {
continue;
}
// propertyName = FuzzyTokenizer.tokenizeToString(propertyName).toLowerCase();
String domain = null, range = null;
NodeList list = nNode.getChildNodes();
if (list.getLength() > 0) {
for (int i = 0; i < list.getLength(); i++) {
Node subNode = list.item(i);
String nodeName = subNode.getNodeName();
if (nodeName.equals("rdfs:domain")) {
domain = ((Attr) subNode.getAttributes().getNamedItem("rdf:resource")).getValue();
}
if (nodeName.equals("rdfs:range")) {
range = ((Attr) subNode.getAttributes().getNamedItem("rdf:resource")).getValue();
}
}
}
// Domain must be in dbpedia
String simpleDomain = cleanName(domain);
String simpleRange = cleanName(range);
// if (simpleRange == null || simpleDomain == null) {
// continue;
// }
if (lowerProp) {
propertyName = propertyName.toLowerCase();
}
properties.put(propertyName, new HashMap());
properties.get(propertyName).put("name", propertyName);
properties.get(propertyName).put("domain", simpleDomain);
properties.get(propertyName).put("range", simpleRange);
properties.get(propertyName).put("type", "object");
}
}
// Classes
NodeList nListClass = doc.getElementsByTagName("owl:Class");
for (int temp = 0; temp < nListClass.getLength(); temp++) {
Node nNode = nListClass.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
String className = eElement.getAttribute("rdf:about");
className = cleanName(className);
HashMap labels = new HashMap();
HashSet equivalentClasses = new HashSet();
String superClass = null;
NodeList list = nNode.getChildNodes();
if (list.getLength() > 0) {
for (int i = 0; i < list.getLength(); i++) {
Node subNode = list.item(i);
String nodeName = subNode.getNodeName();
if (nodeName == "rdfs:subClassOf") {
String superClassTmp = ((Attr) subNode.getAttributes().getNamedItem("rdf:resource")).getValue();
superClassTmp = cleanName(superClassTmp);
if (superClassTmp != null) {
superClass = superClassTmp;
}
}
if (nodeName == "owl:equivalentClass") {
String s = ((Attr) subNode.getAttributes().getNamedItem("rdf:resource")).getValue();
s = cleanName(s);
if (s != null) {
equivalentClasses.add(s);
}
}
if (nodeName == "rdfs:label") {
String lang = ((Attr) subNode.getAttributes().getNamedItem("xml:lang")).getValue();
String text = subNode.getTextContent();
labels.put(lang, text);
}
}
}
DBpediaOntologyNode n = new DBpediaOntologyNode(className, labels, equivalentClasses, superClass);
nodes.add(n);
indexedNodes.put(className, n);
}
}
for (String p : properties.keySet()) {
if (properties.get(p).get("domain") != null) {
try {
indexedNodes.get(properties.get(p).get("domain")).properties.add(properties.get(p));
} catch (Exception ignored) {
// System.out.println("ERROR");
// System.out.println(properties.get(p).get("domain"));
// System.out.println(indexedNodes.get(properties.get(p).get("domain")));
}
}
}
updateTree();
} catch (Exception e) {
e.printStackTrace();
}
}
protected void updateTree() {
for (DBpediaOntologyNode node : nodes) {
node.parent = null;
node.children = new HashSet<>();
}
for (DBpediaOntologyNode node : nodes) {
if (node.superClass != null) {
try {
node.parent = this.getNodeByName(node.superClass);
this.getNodeByName(node.superClass).children.add(node);
} catch (Exception ignored) {
}
}
}
for (DBpediaOntologyNode node : nodes) {
ArrayList parents = getHistoryFromName(node.className);
for (DBpediaOntologyNode n : parents) {
node.properties.addAll(n.properties);
}
}
}
public void setDomainType(String relationName, String domainType) {
singleDomainOntology.remove(relationName);
singleDomainOntology.put(relationName, domainType);
}
public String getDomainType(String relationName) {
return singleDomainOntology.get(relationName);
}
public HashMap> getCompleteOntology() {
return completeOntology;
}
public void setRangeType(String relationName, String rangeType) {
singleRangeOntology.remove(relationName);
singleRangeOntology.put(relationName, rangeType);
}
public String getRangeType(String relationName) {
return singleRangeOntology.get(relationName);
}
@Override
public String toString() {
StringBuffer ret = new StringBuffer();
for (DBpediaOntologyNode n : getRootNodes()) {
ret.append(n.toStringRecursive());
}
return ret.toString();
}
public static void main(String[] args) {
if (args.length < 1) {
System.out.println("");
System.out.println("USAGE:");
System.out.println("");
System.out.println("java -mx6G main.java.org.fbk.cit.hlt.moschitti.utils.DBpediaOntology\n" +
" ontology-file\n" +
"");
System.out.println("");
System.exit(1);
}
String ontology = args[0];
DBpediaOntology o = new DBpediaOntology(ontology);
/*
for (DBpediaOntologyNode n : o.nodes) {
System.out.println(n.className);
for (HashMap p : n.properties) {
System.out.println(p.get("name"));
}
System.out.println();
System.out.println();
}
*/
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy