All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
it.unipi.di.acube.batframework.systemPlugins.TagmeAnnotator Maven / Gradle / Ivy
/**
* (C) Copyright 2012-2013 A-cube lab - Università di Pisa - Dipartimento di Informatica.
* BAT-Framework is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
* BAT-Framework is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with BAT-Framework. If not, see .
*/
package it.unipi.di.acube.batframework.systemPlugins;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.*;
import java.util.*;
import javax.xml.parsers.*;
import javax.xml.xpath.*;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import it.unipi.di.acube.batframework.data.*;
import it.unipi.di.acube.batframework.problems.*;
import it.unipi.di.acube.batframework.utils.*;
public class TagmeAnnotator implements Sa2WSystem {
private long lastTime = -1;
private String key;
private String url;
private float epsilon = -1;
private float minComm = -1;
private float minLink = -1;
public TagmeAnnotator(String configFile, float epsilon,
float minCommonness, float minLink)
throws ParserConfigurationException, FileNotFoundException,
SAXException, IOException, XPathExpressionException {
this(configFile);
this.epsilon = epsilon;
this.minComm = minCommonness;
this.minLink = minLink;
}
public TagmeAnnotator(String url, String key, float epsilon,
float minCommonness, float minLink)
throws ParserConfigurationException, FileNotFoundException,
SAXException, IOException, XPathExpressionException {
this.url = url;
this.key = key;
this.epsilon = epsilon;
this.minComm = minCommonness;
this.minLink = minLink;
}
public TagmeAnnotator(String url, String key) {
this.url = url;
this.key = key;
}
public TagmeAnnotator(String configFile)
throws ParserConfigurationException, FileNotFoundException,
SAXException, IOException, XPathExpressionException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new FileInputStream(configFile));
url = getConfigValue("access", "url", doc);
key = getConfigValue("access", "key", doc);
if (url.equals("") || key.equals(""))
throw new AnnotationException("Configuration file " + configFile
+ " has missing value 'url' or 'key'.");
if (key.equals("KEY"))
throw new AnnotationException(
"Configuration file "
+ configFile
+ " has dummy default key value 'KEY'. Please replace with an actual key.");
}
private String getConfigValue(String setting, String name, Document doc)
throws XPathExpressionException {
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
XPathExpression userExpr = xpath.compile("tagme/setting[@name=\""
+ setting + "\"]/param[@name=\"" + name + "\"]/@value");
return userExpr.evaluate(doc);
}
@Override
public HashSet solveA2W(String text) throws AnnotationException {
return ProblemReduction.Sa2WToA2W(solveSa2W(text), Float.MIN_VALUE);
}
@Override
public HashSet solveC2W(String text) throws AnnotationException {
return ProblemReduction.A2WToC2W(solveA2W(text));
}
@Override
public HashSet solveSc2W(String text) throws AnnotationException {
return ProblemReduction.Sa2WToSc2W(this.solveSa2W(text));
}
@Override
public HashSet solveD2W(String text, HashSet mentions) {
return ProblemReduction.Sa2WToD2W(solveSa2W(text), mentions,
Float.MIN_VALUE);
}
@Override
public String getName() {
return "TagMe 2";
}
@Override
public HashSet solveSa2W(String text)
throws AnnotationException {
// System.out.println(text.length()+ " "+text.substring(0,
// Math.min(text.length(), 15)));
// TODO: workaround for a bug in tagme. should be deleted afterwards.
String newText = "";
for (int i = 0; i < text.length(); i++)
newText += (text.charAt(i) > 127 ? ' ' : text.charAt(i));
text = newText;
// avoid crashes for empty documents
int j = 0;
while (j < text.length() && text.charAt(j) == ' ')
j++;
if (j == text.length())
return new HashSet();
HashSet res;
String params = null;
try {
res = new HashSet();
params = "key=" + this.key;
params += "&lang=en";
if (epsilon >= 0)
params += "&epsilon=" + epsilon;
if (minComm >= 0)
params += "&min_comm=" + minComm;
if (minLink >= 0)
params += "&min_lp=" + minLink;
params += "&text=" + URLEncoder.encode(text, "UTF-8");
URL wikiApi = new URL(url);
HttpURLConnection slConnection = (HttpURLConnection) wikiApi
.openConnection();
slConnection.setRequestProperty("accept", "text/xml");
slConnection.setDoOutput(true);
slConnection.setDoInput(true);
slConnection.setRequestMethod("POST");
slConnection.setRequestProperty("charset", "utf-8");
slConnection.setRequestProperty("Content-Length",
Integer.toString(params.getBytes().length));
slConnection.setUseCaches(false);
slConnection.setReadTimeout(0);
DataOutputStream wr = new DataOutputStream(
slConnection.getOutputStream());
wr.writeBytes(params);
wr.flush();
wr.close();
Scanner s = new Scanner(slConnection.getInputStream());
Scanner s2 = s.useDelimiter("\\A");
String resultStr = s2.hasNext() ? s2.next() : "";
s.close();
JSONObject obj = new JSONObject(resultStr);
lastTime = obj.getLong("time");
JSONArray jsAnnotations = obj.getJSONArray("annotations");
for (int i=0; i" + id
+ " (" + rho + ")");
res.add(new ScoredAnnotation(start, end - start, id,
(float) rho));
}
} catch (Exception e) {
e.printStackTrace();
throw new AnnotationException(
"An error occurred while querying TagMe API. Message: "
+ e.getMessage() + " Parameters:" + params);
}
return res;
}
@Override
public long getLastAnnotationTime() {
return lastTime;
}
}