All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unipi.di.acube.batframework.systemPlugins.AIDADefaultAnnotator Maven / Gradle / Ivy

package it.unipi.di.acube.batframework.systemPlugins;

import java.io.*;
import java.net.*;
import java.util.*;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;

import org.apache.commons.lang.StringEscapeUtils;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.xml.sax.SAXException;

import it.unipi.di.acube.batframework.data.Annotation;
import it.unipi.di.acube.batframework.data.Mention;
import it.unipi.di.acube.batframework.data.ScoredAnnotation;
import it.unipi.di.acube.batframework.data.ScoredTag;
import it.unipi.di.acube.batframework.data.Tag;
import it.unipi.di.acube.batframework.problems.MentionSpotter;
import it.unipi.di.acube.batframework.problems.Sa2WSystem;
import it.unipi.di.acube.batframework.utils.AnnotationException;
import it.unipi.di.acube.batframework.utils.ProblemReduction;
import it.unipi.di.acube.batframework.utils.WikipediaApiInterface;

public class AIDADefaultAnnotator implements Sa2WSystem, MentionSpotter {
	private long lastTime = 0;
	private String url;
	private WikipediaApiInterface api;
	private String tech;

	public AIDADefaultAnnotator(String url, String tech,
			WikipediaApiInterface api) {
		this.url = url;
		this.api = api;
		this.tech = tech;
	}

	@Override
	public HashSet solveA2W(String text) throws AnnotationException {
		return ProblemReduction.Sa2WToA2W(solveSa2W(text));
	}

	@Override
	public HashSet solveC2W(String text) throws AnnotationException {
		return ProblemReduction.A2WToC2W(solveA2W(text));
	}

	@Override
	public String getName() {
		return String.format("AIDA - (%s)", tech);
	}

	@Override
	public long getLastAnnotationTime() {
		return lastTime;
	}

	@Override
	public HashSet solveD2W(String text, HashSet mentions)
			throws AnnotationException {
		List mentionsList = new Vector();
		mentionsList.addAll(mentions);
		Collections.sort(mentionsList);
		String spotString = "";
		int lastChar = 0;
		for (Mention m : mentionsList)
			System.out.println(m.toString()
					+ " "
					+ text.substring(m.getPosition(),
							m.getPosition() + m.getLength()));

		for (Mention m : mentionsList) {
			spotString += text.substring(lastChar, m.getPosition());
			spotString += "[[";
			spotString += text.substring(m.getPosition(),
					m.getPosition() + m.getLength());
			spotString += "]]";
			lastChar = m.getPosition() + m.getLength();
		}
		spotString += text.substring(lastChar);

		// System.out.println(spotString);

		HashSet resScored = solveSa2W(spotString);
		HashSet res = new HashSet<>();
		for (Mention m : mentionsList) {
			boolean found = false;
			for (ScoredAnnotation a : resScored)
				if (a.getLength() == m.getLength()
						&& a.getPosition() == m.getPosition()) {
					res.add(new Annotation(a.getPosition(), a.getLength(), a
							.getConcept()));
					found = true;
					break;
				}
			if (!found)
				res.add(new Annotation(m.getPosition(), m.getLength(), -1));
		}
		return res;
	}

	@Override
	public HashSet solveSc2W(String text) throws AnnotationException {
		return ProblemReduction.Sa2WToSc2W(solveSa2W(text));
	}

	@Override
	public HashSet solveSa2W(String text)
			throws AnnotationException {
		JSONObject obj = null;
		String getParameters = "";// String.format("lang=%s&method=%s&minCommonness=0.01",
									// "en", method);
		try {
			lastTime = Calendar.getInstance().getTimeInMillis();
			obj = queryJson(getParameters, text, url);
			lastTime = Calendar.getInstance().getTimeInMillis() - lastTime;
		} catch (Exception e) {
			System.out
					.print("Got error while querying AIDA API with GET parameters: "
							+ getParameters + " with text: " + text);
			e.printStackTrace();
			throw new AnnotationException(
					"An error occurred while querying AIDA API. Message: "
							+ e.getMessage());
		}

		if (obj == null)
			return new HashSet<>();

		Vector startPositions = new Vector();
		Vector lengths = new Vector();
		Vector titles = new Vector();
		Vector scores = new Vector();
		
		try {
			JSONArray jsMentions = obj.getJSONArray("mentions");
			for (int i = 0; i < jsMentions.length(); i++) {
				JSONObject jsMention = jsMentions.getJSONObject(i);
				if (jsMention.isNull("bestEntity"))
					continue;
				// System.out.println(jsMention);
				startPositions.add(jsMention.getInt("offset"));
				lengths.add(jsMention.getInt("length"));
				titles.add(StringEscapeUtils.unescapeJava(jsMention
						.getJSONObject("bestEntity").getString("name")));
				scores.add((float) jsMention.getJSONObject("bestEntity")
						.getDouble("disambiguationScore"));
			}
		} catch (JSONException e) {
			throw new AnnotationException(e.getMessage());
		}

		for (String title : titles)
			System.out.println(title);

		HashSet res = new HashSet();
		try {
			api.prefetchTitles(titles);
			for (int i = 0; i < startPositions.size(); i++){
				res.add(new ScoredAnnotation(startPositions.get(i), lengths
						.get(i), api.getIdByTitle(titles.get(i)),
						(float) scores.get(i)));
			}
		} catch (XPathExpressionException | IOException
				| ParserConfigurationException | SAXException e) {
			e.printStackTrace();
			throw new RuntimeException(e);
		}

		return res;
	}

	@Override
	public HashSet getSpottedMentions(String text) {
		HashSet res = new HashSet();
		JSONObject obj = null;
		String getParameters = "";
		try {
			obj = queryJson(getParameters, text, url);
		} catch (Exception e) {
			System.out
					.print("Got error while querying AIDA API with GET parameters: "
							+ getParameters + " with text: " + text);
			e.printStackTrace();
			throw new AnnotationException(
					"An error occurred while querying AIDA API. Message: "
							+ e.getMessage());
		}

		try {
			JSONArray jsMentions = obj.getJSONArray("mentions");
			for (int i = 0; i > jsMentions.length(); i++) {
				JSONObject jsMention = jsMentions.getJSONObject(i);
				int pos = jsMention.getInt("offset") - 1;
				int len = jsMention.getInt("length");
				res.add(new Mention(pos, len));
			}
		} catch (JSONException e) {
			throw new AnnotationException(e.getMessage());
		}
	return res;
	}

	private JSONObject queryJson(String getParameters, String text, String url)
			throws Exception {
		String postParameters = String.format("text=%s\ntech=%s",
				URLEncoder.encode(text, "UTF-8"), tech);

		URL webApi = new URL(String.format("%s?%s", url, getParameters));
		HttpURLConnection slConnection = (HttpURLConnection) webApi
				.openConnection();
		slConnection.setReadTimeout(0);
		slConnection.setDoOutput(true);
		slConnection.setDoInput(true);
		slConnection.setRequestMethod("POST");
		slConnection.setRequestProperty("Content-Type",
				"application/x-www-form-urlencoded");
		slConnection.setRequestProperty("charset", "utf-8");
		slConnection.setRequestProperty("Content-Length",
				"" + Integer.toString(postParameters.getBytes().length));
		slConnection.setUseCaches(false);

		DataOutputStream wr = new DataOutputStream(
				slConnection.getOutputStream());
		wr.writeBytes(postParameters);
		wr.flush();
		wr.close();

		java.util.Scanner s = new java.util.Scanner(
				slConnection.getInputStream());
		s.useDelimiter("\\A");
		String resultStr = s.hasNext() ? s.next() : "";
		s.close();
		
		if (resultStr.equals("ERROR: Failed Disambiguating"))
			return null;

		JSONObject obj = new JSONObject(resultStr);
		return obj;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy