
eu.fbk.dkm.pikes.resources.Sumo Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pikes-resources Show documentation
Show all versions of pikes-resources Show documentation
A collection of Java classes for accessing and querying a number of NLP resources.
The newest version!
package eu.fbk.dkm.pikes.resources;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import com.google.common.base.Charsets;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Resources;
import org.openrdf.model.URI;
import org.openrdf.model.impl.URIImpl;
import eu.fbk.utils.vocab.SUMO;
public final class Sumo {
public static final String NAMESPACE = "http://www.ontologyportal.org/SUMO.owl#";
private static final Map URI_INDEX;
private static final Map SYNSET_INDEX;
static {
try (final BufferedReader reader = Resources.asCharSource(
NomBank.class.getResource("Sumo.tsv"), Charsets.UTF_8).openBufferedStream()) {
final Map uriIndex = Maps.newHashMap();
final Map nameIndex = Maps.newHashMap();
final Map synsetIndex = Maps.newHashMap();
String line;
while ((line = reader.readLine()) != null) {
final String[] tokens = Arrays.copyOf(line.split("\t"), 4);
final String name = tokens[0].intern();
final List parents = tokens[1] == null ? ImmutableList.of() : Splitter
.on('|').trimResults().omitEmptyStrings().splitToList(tokens[1]);
final List children = tokens[2] == null ? ImmutableList.of() : Splitter
.on('|').trimResults().omitEmptyStrings().splitToList(tokens[2]);
final List synsets = tokens[3] == null ? ImmutableList.of() : Splitter
.on('|').trimResults().omitEmptyStrings().splitToList(tokens[3]);
final URI[][] uriArrays = new URI[3][];
final List> stringLists = ImmutableList.of(ImmutableList.of(name),
parents, children);
for (int i = 0; i < 3; ++i) {
final List stringList = stringLists.get(i);
final URI[] uriArray = new URI[stringList.size()];
uriArrays[i] = uriArray;
for (int j = 0; j < stringList.size(); ++j) {
final String uriString = (SUMO.NAMESPACE + stringList.get(j).trim())
.intern();
URI uri = uriIndex.get(uriString);
if (uri == null) {
uri = new URIImpl(uriString);
uriIndex.put(uriString, uri);
}
uriArray[j] = uri;
}
}
final URI conceptURI = uriArrays[0][0];
final String[] synsetsArray = new String[synsets.size()];
for (int i = 0; i < synsets.size(); ++i) {
synsetsArray[i] = synsets.get(i).trim().intern();
}
final Concept concept = new Concept(conceptURI, uriArrays[1], uriArrays[2],
synsetsArray);
nameIndex.put(conceptURI, concept);
for (final String synset : synsets) {
synsetIndex.put(synset, concept);
}
}
URI_INDEX = ImmutableMap.copyOf(nameIndex);
SYNSET_INDEX = ImmutableMap.copyOf(synsetIndex);
} catch (final IOException ex) {
throw new Error("Cannot load PropBank data", ex);
}
}
@Nullable
public static URI synsetToConcept(@Nullable final String synsetID) {
if (synsetID == null) {
return null;
}
final Concept concept = SYNSET_INDEX.get(synsetID.toLowerCase());
return concept == null ? null : concept.uri;
}
public static Set synsetsToConcepts(@Nullable final Iterable synsetIDs) {
final Set conceptURIs = Sets.newHashSet();
for (final String synsetID : synsetIDs) {
final URI conceptURI = Sumo.synsetToConcept(synsetID);
if (conceptURI != null) {
conceptURIs.add(conceptURI);
}
}
return filterAncestors(conceptURIs);
}
public static Set conceptToSynsets(@Nullable final URI conceptURI) {
if (conceptURI == null) {
return null;
}
final Concept concept = URI_INDEX.get(conceptURI);
return concept == null ? ImmutableSet.of() : ImmutableSet.copyOf(concept.synsets);
}
public static Set filterAncestors(@Nullable final Iterable extends URI> conceptURIs) {
final Set result = Sets.newHashSet(conceptURIs);
outer: for (final URI uri1 : conceptURIs) {
for (final URI uri2 : conceptURIs) {
if (!uri1.equals(uri2) && isSubClassOf(uri1, uri2)) {
continue outer;
}
}
result.add(uri1);
}
return result;
}
public static Set getSubClasses(final URI parentURI) {
final Set result = Sets.newHashSet();
final List queue = Lists.newLinkedList();
queue.add(parentURI);
while (!queue.isEmpty()) {
final Concept concept = URI_INDEX.get(queue.remove(0));
if (concept != null) {
for (final URI uri : concept.children) {
if (result.add(uri)) {
queue.add(uri);
}
}
}
}
return result;
}
public static Set getSuperClasses(final URI childURI) {
final Set result = Sets.newHashSet();
final List queue = Lists.newLinkedList();
queue.add(childURI);
while (!queue.isEmpty()) {
final Concept concept = URI_INDEX.get(queue.remove(0));
if (concept != null) {
for (final URI uri : concept.parents) {
if (result.add(uri)) {
queue.add(uri);
}
}
}
}
return result;
}
public static boolean isSubClassOf(final URI childURI, final URI parentURI) {
final Concept child = URI_INDEX.get(childURI);
if (child == null) {
return false;
}
if (childURI.equals(parentURI)) {
return true;
}
for (final URI uri : child.parents) {
if (isSubClassOf(uri, parentURI)) {
return true;
}
}
return false;
}
private static final class Concept {
public final URI uri;
public final URI[] parents;
public final URI[] children;
public final String[] synsets;
Concept(final URI uri, final URI[] parents, final URI[] children, final String[] synsets) {
this.uri = uri;
this.parents = parents;
this.children = children;
this.synsets = synsets;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy