All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unipi.di.acube.batframework.systemPlugins.AgdistisAnnotator Maven / Gradle / Ivy

package it.unipi.di.acube.batframework.systemPlugins;

import it.unipi.di.acube.batframework.data.Annotation;
import it.unipi.di.acube.batframework.data.Mention;
import it.unipi.di.acube.batframework.problems.D2WSystem;
import it.unipi.di.acube.batframework.utils.AnnotationException;
import it.unipi.di.acube.batframework.utils.WikipediaApiInterface;

import java.io.*;
import java.net.*;
import java.util.*;

import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;

/**
 * D2W annotator that uses HITS on DBpedia Graph.
 * 
 * @see https://github.com/AKSW/AGDISTIS
 */
public class AgdistisAnnotator implements D2WSystem {

	private long calib = -1;
	private long lastTime = -1;

	private final String host;
	private final int port;
	private final WikipediaApiInterface wikiApi;

	public AgdistisAnnotator(String host, int port, WikipediaApiInterface wikiApi) {
		this.host = host;
		this.port = port;
		this.wikiApi = wikiApi;
	}

	public AgdistisAnnotator(WikipediaApiInterface wikiApi) {
		this("139.18.2.164", 8080, wikiApi);
	}

	@Override
	public String getName() {
		return "Agdistis";
	}

	@Override
	public long getLastAnnotationTime() {
		if (calib == -1)
			calib = TimingCalibrator.getOffset(this);
		return lastTime - calib > 0 ? lastTime - calib : 0;
	}

	@Override
	public HashSet solveD2W(String text, HashSet mentions) throws AnnotationException {
		String textWithMentions = createTextWithMentions(text, mentions);
		try {
			return getAnnotations(textWithMentions);
		} catch (IOException | JSONException e) {
			throw new AnnotationException(e.getMessage());
		}
	}

	public HashSet getAnnotations(String textWithMentions) throws IOException, JSONException {
		URL agdistisUrl = new URL("http://" + host + ":" + port + "/AGDISTIS");
		String parameters = "type=agdistis&text=" + URLEncoder.encode(textWithMentions, "UTF-8");
		HttpURLConnection slConnection = (HttpURLConnection) agdistisUrl.openConnection();
		slConnection.setDoOutput(true);
		slConnection.setDoInput(true);
		slConnection.setRequestMethod("POST");
		slConnection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
		slConnection.setRequestProperty("charset", "utf-8");
		slConnection.setRequestProperty("Content-Length", "" + Integer.toString(parameters.getBytes().length));
		slConnection.setUseCaches(false);

		DataOutputStream wr = new DataOutputStream(slConnection.getOutputStream());
		wr.writeBytes(parameters);
		wr.flush();
		wr.close();

		HashSet annotations = parseJsonStream(slConnection);
		return annotations;
	}

	private HashSet parseJsonStream(HttpURLConnection conn) throws IOException, JSONException {
		HashSet annotations = new HashSet<>();

		Scanner s = new Scanner(conn.getInputStream()).useDelimiter("\\A");
		String resultStr = s.hasNext() ? s.next() : "";

		JSONArray namedEntities = new JSONArray(resultStr);
		for (int i=0; i posPoints) {
			return namedEntityUri.substring(posSlash + 1);
		} else if (posPoints < posSlash) {
			return namedEntityUri.substring(posPoints + 1);
		} else {
			return namedEntityUri;
		}
	}

	static String createTextWithMentions(String text, HashSet mentionsSet) {
		// Example: 'The University of Leipzig in Barack Obama.'

		List mentions = new ArrayList<>(mentionsSet);
		Collections.sort(mentions, new Comparator() {
			@Override
			public int compare(Mention left, Mention right) {
				return Integer.compare(left.getPosition(), right.getPosition());
			}
		});

		StringBuilder textBuilder = new StringBuilder();
		int lastPos = 0;
		for (int i = 0; i < mentions.size(); i++) {
			Mention m = mentions.get(i);

			int begin = m.getPosition();
			int end = m.getPosition() + m.getLength();

			if (begin < lastPos) {
				// we have two overlapping mentions --> take the larger one
				Mention prev = mentions.get(i - 1);
				assert (m.overlaps(prev));
				System.err.printf("\"%s\" at pos %d overlaps with \"%s\" at pos %d%n", getMentionLabel(m, text),
						m.getPosition(), getMentionLabel(prev, text), prev.getPosition());
				if (m.getLength() > prev.getLength()) {
					// current is larger --> replace previous with current
					textBuilder.delete(textBuilder.length() - prev.getLength(), textBuilder.length());
					lastPos -= prev.getLength();
				} else
					// previous is larger or equal --> skip current
					continue;
			}
			String before = text.substring(lastPos, begin);
			String label = text.substring(begin, end);
			lastPos = end;
			textBuilder.append(before).append("" + label + "");
		}

		String lastSnippet = text.substring(lastPos, text.length());
		textBuilder.append(lastSnippet);

		return textBuilder.toString();
	}

	private static String getMentionLabel(Mention m, String text) {
		return text.substring(m.getPosition(), m.getPosition() + m.getLength());
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy