it.unipi.di.acube.batframework.systemPlugins.AgdistisAnnotator Maven / Gradle / Ivy
package it.unipi.di.acube.batframework.systemPlugins;
import it.unipi.di.acube.batframework.data.Annotation;
import it.unipi.di.acube.batframework.data.Mention;
import it.unipi.di.acube.batframework.problems.D2WSystem;
import it.unipi.di.acube.batframework.utils.AnnotationException;
import it.unipi.di.acube.batframework.utils.WikipediaApiInterface;
import java.io.*;
import java.net.*;
import java.util.*;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
/**
* D2W annotator that uses HITS on DBpedia Graph.
*
* @see https://github.com/AKSW/AGDISTIS
*/
public class AgdistisAnnotator implements D2WSystem {
private long calib = -1;
private long lastTime = -1;
private final String host;
private final int port;
private final WikipediaApiInterface wikiApi;
public AgdistisAnnotator(String host, int port, WikipediaApiInterface wikiApi) {
this.host = host;
this.port = port;
this.wikiApi = wikiApi;
}
public AgdistisAnnotator(WikipediaApiInterface wikiApi) {
this("139.18.2.164", 8080, wikiApi);
}
@Override
public String getName() {
return "Agdistis";
}
@Override
public long getLastAnnotationTime() {
if (calib == -1)
calib = TimingCalibrator.getOffset(this);
return lastTime - calib > 0 ? lastTime - calib : 0;
}
@Override
public HashSet solveD2W(String text, HashSet mentions) throws AnnotationException {
String textWithMentions = createTextWithMentions(text, mentions);
try {
return getAnnotations(textWithMentions);
} catch (IOException | JSONException e) {
throw new AnnotationException(e.getMessage());
}
}
public HashSet getAnnotations(String textWithMentions) throws IOException, JSONException {
URL agdistisUrl = new URL("http://" + host + ":" + port + "/AGDISTIS");
String parameters = "type=agdistis&text=" + URLEncoder.encode(textWithMentions, "UTF-8");
HttpURLConnection slConnection = (HttpURLConnection) agdistisUrl.openConnection();
slConnection.setDoOutput(true);
slConnection.setDoInput(true);
slConnection.setRequestMethod("POST");
slConnection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
slConnection.setRequestProperty("charset", "utf-8");
slConnection.setRequestProperty("Content-Length", "" + Integer.toString(parameters.getBytes().length));
slConnection.setUseCaches(false);
DataOutputStream wr = new DataOutputStream(slConnection.getOutputStream());
wr.writeBytes(parameters);
wr.flush();
wr.close();
HashSet annotations = parseJsonStream(slConnection);
return annotations;
}
private HashSet parseJsonStream(HttpURLConnection conn) throws IOException, JSONException {
HashSet annotations = new HashSet<>();
Scanner s = new Scanner(conn.getInputStream()).useDelimiter("\\A");
String resultStr = s.hasNext() ? s.next() : "";
JSONArray namedEntities = new JSONArray(resultStr);
for (int i=0; i posPoints) {
return namedEntityUri.substring(posSlash + 1);
} else if (posPoints < posSlash) {
return namedEntityUri.substring(posPoints + 1);
} else {
return namedEntityUri;
}
}
static String createTextWithMentions(String text, HashSet mentionsSet) {
// Example: 'The University of Leipzig in Barack Obama .'
List mentions = new ArrayList<>(mentionsSet);
Collections.sort(mentions, new Comparator() {
@Override
public int compare(Mention left, Mention right) {
return Integer.compare(left.getPosition(), right.getPosition());
}
});
StringBuilder textBuilder = new StringBuilder();
int lastPos = 0;
for (int i = 0; i < mentions.size(); i++) {
Mention m = mentions.get(i);
int begin = m.getPosition();
int end = m.getPosition() + m.getLength();
if (begin < lastPos) {
// we have two overlapping mentions --> take the larger one
Mention prev = mentions.get(i - 1);
assert (m.overlaps(prev));
System.err.printf("\"%s\" at pos %d overlaps with \"%s\" at pos %d%n", getMentionLabel(m, text),
m.getPosition(), getMentionLabel(prev, text), prev.getPosition());
if (m.getLength() > prev.getLength()) {
// current is larger --> replace previous with current
textBuilder.delete(textBuilder.length() - prev.getLength(), textBuilder.length());
lastPos -= prev.getLength();
} else
// previous is larger or equal --> skip current
continue;
}
String before = text.substring(lastPos, begin);
String label = text.substring(begin, end);
lastPos = end;
textBuilder.append(before).append("" + label + " ");
}
String lastSnippet = text.substring(lastPos, text.length());
textBuilder.append(lastSnippet);
return textBuilder.toString();
}
private static String getMentionLabel(Mention m, String text) {
return text.substring(m.getPosition(), m.getPosition() + m.getLength());
}
}