All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
it.unipi.di.acube.batframework.systemPlugins.AIDADefaultAnnotator Maven / Gradle / Ivy
package it.unipi.di.acube.batframework.systemPlugins;
import java.io.*;
import java.net.*;
import java.util.*;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.lang.StringEscapeUtils;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.xml.sax.SAXException;
import it.unipi.di.acube.batframework.data.Annotation;
import it.unipi.di.acube.batframework.data.Mention;
import it.unipi.di.acube.batframework.data.ScoredAnnotation;
import it.unipi.di.acube.batframework.data.ScoredTag;
import it.unipi.di.acube.batframework.data.Tag;
import it.unipi.di.acube.batframework.problems.MentionSpotter;
import it.unipi.di.acube.batframework.problems.Sa2WSystem;
import it.unipi.di.acube.batframework.utils.AnnotationException;
import it.unipi.di.acube.batframework.utils.ProblemReduction;
import it.unipi.di.acube.batframework.utils.WikipediaApiInterface;
public class AIDADefaultAnnotator implements Sa2WSystem, MentionSpotter {
private long lastTime = 0;
private String url;
private WikipediaApiInterface api;
private String tech;
public AIDADefaultAnnotator(String url, String tech,
WikipediaApiInterface api) {
this.url = url;
this.api = api;
this.tech = tech;
}
@Override
public HashSet solveA2W(String text) throws AnnotationException {
return ProblemReduction.Sa2WToA2W(solveSa2W(text));
}
@Override
public HashSet solveC2W(String text) throws AnnotationException {
return ProblemReduction.A2WToC2W(solveA2W(text));
}
@Override
public String getName() {
return String.format("AIDA - (%s)", tech);
}
@Override
public long getLastAnnotationTime() {
return lastTime;
}
@Override
public HashSet solveD2W(String text, HashSet mentions)
throws AnnotationException {
List mentionsList = new Vector();
mentionsList.addAll(mentions);
Collections.sort(mentionsList);
String spotString = "";
int lastChar = 0;
for (Mention m : mentionsList)
System.out.println(m.toString()
+ " "
+ text.substring(m.getPosition(),
m.getPosition() + m.getLength()));
for (Mention m : mentionsList) {
spotString += text.substring(lastChar, m.getPosition());
spotString += "[[";
spotString += text.substring(m.getPosition(),
m.getPosition() + m.getLength());
spotString += "]]";
lastChar = m.getPosition() + m.getLength();
}
spotString += text.substring(lastChar);
// System.out.println(spotString);
HashSet resScored = solveSa2W(spotString);
HashSet res = new HashSet<>();
for (Mention m : mentionsList) {
boolean found = false;
for (ScoredAnnotation a : resScored)
if (a.getLength() == m.getLength()
&& a.getPosition() == m.getPosition()) {
res.add(new Annotation(a.getPosition(), a.getLength(), a
.getConcept()));
found = true;
break;
}
if (!found)
res.add(new Annotation(m.getPosition(), m.getLength(), -1));
}
return res;
}
@Override
public HashSet solveSc2W(String text) throws AnnotationException {
return ProblemReduction.Sa2WToSc2W(solveSa2W(text));
}
@Override
public HashSet solveSa2W(String text)
throws AnnotationException {
JSONObject obj = null;
String getParameters = "";// String.format("lang=%s&method=%s&minCommonness=0.01",
// "en", method);
try {
lastTime = Calendar.getInstance().getTimeInMillis();
obj = queryJson(getParameters, text, url);
lastTime = Calendar.getInstance().getTimeInMillis() - lastTime;
} catch (Exception e) {
System.out
.print("Got error while querying AIDA API with GET parameters: "
+ getParameters + " with text: " + text);
e.printStackTrace();
throw new AnnotationException(
"An error occurred while querying AIDA API. Message: "
+ e.getMessage());
}
if (obj == null)
return new HashSet<>();
Vector startPositions = new Vector();
Vector lengths = new Vector();
Vector titles = new Vector();
Vector scores = new Vector();
try {
JSONArray jsMentions = obj.getJSONArray("mentions");
for (int i = 0; i < jsMentions.length(); i++) {
JSONObject jsMention = jsMentions.getJSONObject(i);
if (jsMention.isNull("bestEntity"))
continue;
// System.out.println(jsMention);
startPositions.add(jsMention.getInt("offset"));
lengths.add(jsMention.getInt("length"));
titles.add(StringEscapeUtils.unescapeJava(jsMention
.getJSONObject("bestEntity").getString("name")));
scores.add((float) jsMention.getJSONObject("bestEntity")
.getDouble("disambiguationScore"));
}
} catch (JSONException e) {
throw new AnnotationException(e.getMessage());
}
for (String title : titles)
System.out.println(title);
HashSet res = new HashSet();
try {
api.prefetchTitles(titles);
for (int i = 0; i < startPositions.size(); i++){
res.add(new ScoredAnnotation(startPositions.get(i), lengths
.get(i), api.getIdByTitle(titles.get(i)),
(float) scores.get(i)));
}
} catch (XPathExpressionException | IOException
| ParserConfigurationException | SAXException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
return res;
}
@Override
public HashSet getSpottedMentions(String text) {
HashSet res = new HashSet();
JSONObject obj = null;
String getParameters = "";
try {
obj = queryJson(getParameters, text, url);
} catch (Exception e) {
System.out
.print("Got error while querying AIDA API with GET parameters: "
+ getParameters + " with text: " + text);
e.printStackTrace();
throw new AnnotationException(
"An error occurred while querying AIDA API. Message: "
+ e.getMessage());
}
try {
JSONArray jsMentions = obj.getJSONArray("mentions");
for (int i = 0; i > jsMentions.length(); i++) {
JSONObject jsMention = jsMentions.getJSONObject(i);
int pos = jsMention.getInt("offset") - 1;
int len = jsMention.getInt("length");
res.add(new Mention(pos, len));
}
} catch (JSONException e) {
throw new AnnotationException(e.getMessage());
}
return res;
}
private JSONObject queryJson(String getParameters, String text, String url)
throws Exception {
String postParameters = String.format("text=%s\ntech=%s",
URLEncoder.encode(text, "UTF-8"), tech);
URL webApi = new URL(String.format("%s?%s", url, getParameters));
HttpURLConnection slConnection = (HttpURLConnection) webApi
.openConnection();
slConnection.setReadTimeout(0);
slConnection.setDoOutput(true);
slConnection.setDoInput(true);
slConnection.setRequestMethod("POST");
slConnection.setRequestProperty("Content-Type",
"application/x-www-form-urlencoded");
slConnection.setRequestProperty("charset", "utf-8");
slConnection.setRequestProperty("Content-Length",
"" + Integer.toString(postParameters.getBytes().length));
slConnection.setUseCaches(false);
DataOutputStream wr = new DataOutputStream(
slConnection.getOutputStream());
wr.writeBytes(postParameters);
wr.flush();
wr.close();
java.util.Scanner s = new java.util.Scanner(
slConnection.getInputStream());
s.useDelimiter("\\A");
String resultStr = s.hasNext() ? s.next() : "";
s.close();
if (resultStr.equals("ERROR: Failed Disambiguating"))
return null;
JSONObject obj = new JSONObject(resultStr);
return obj;
}
}