de.citec.scie.web.exporter.generic.NestedWriter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of webservice Show documentation
Module providing the webservice interface based on the Jetty embedded webserver and the FreeMarker template engine. Defines a simple format for providing textual annotations and produced output in HTML or JSON. This module has no dependencies to the other SCIE modules (except for the PDF text extractor) or the UIMA framework and thus can be used in any context, where text is annotated by an algorithm and should be presented to an end user.
The newest version!
/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.scie.web.exporter.generic;

import de.citec.scie.web.analysis.AbstractAnalysisResult;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.Stack;
import java.util.TreeMap;

/**
 * This generic class can be used to transform an AbstractAnalysisResult into a
 * nested format such as XML or HTML -- the actual format is determined by an
 * implementation of the NestedWriterFormat interface, which is responsible for
 * escaping characters and to write opening and closing tags.
 *
 * @author Andreas Stöckel -- [email protected]
 */
public class NestedWriter {

	/**
	 * Just an alias for an ArrayList of integers to make the code a little
	 * easier to understand.
	 */
	private static class AnnotationGroup extends ArrayList {
	};

	/**
	 * The AnnotationIndex class represents a list of annotations belonging to
	 * the same group. The AnnotationIndex class groups these annotations by
	 * their begin and end position, placing elements with the same begin and
	 * end position in the same AnnotationGroup.
	 */
	private class AnnotationIndex {

		/**
		 * Internally used map from begin positions to a map of end positions
		 * and ElementGroup instances.
		 */
		private final Map> begins
				= new HashMap<>();

		/**
		 * Inserts a new annotation with the given index into the index.
		 *
		 * @param idx is the annotation index in the result set.
		 */
		public void insertAnnotation(int idx) {
			// Fetch the begin and end position
			final int begin = result.getAnnotationBegin(idx);
			final int end = result.getAnnotationEnd(idx);

			// Fetch the end position map
			SortedMap ends = begins.get(begin);
			if (ends == null) {
				ends = new TreeMap<>(Collections.reverseOrder());
				begins.put(begin, ends);
			}

			// Fetch the collection for elements ending at this index and insert
			// this element
			AnnotationGroup eg = ends.get(end);
			if (eg == null) {
				eg = new AnnotationGroup();
				ends.put(end, eg);
			}
			eg.add(idx);
		}

		/**
		 * Returns all annotation groups starting at the given position.
		 *
		 * @param pos the start position.
		 * @return a map containing the annotation groups starting at the given
		 * position. The key corresponds to the end position. Larger end
		 * positions are first (to allow a more optional tree structure to be
		 * built).
		 */
		public SortedMap getAnnotations(int pos) {
			return begins.get(pos);
		}
	}

	/**
	 * Map mapping from group ids to the corresponding index.
	 */
	private final Map groups = new HashMap<>();

	/**
	 * Reference to the abstract analysis result.
	 */
	private final AbstractAnalysisResult result;

	/**
	 * Formater which performs the actual work of generating the output document
	 * for a certain language.
	 */
	private final NestedWriterFormater formater;

	/**
	 * Underlying document text.
	 */
	private final String text;

	/**
	 * Constructor of the NestedWriter class.
	 *
	 * @param result reference to the analysis result.
	 * @param formater reference to the formater producing the output.
	 */
	public NestedWriter(AbstractAnalysisResult result,
			NestedWriterFormater formater) {
		this.result = result;
		this.formater = formater;
		this.text = result.getText();
		buildGroup();
	}

	/**
	 * Function used internally to build the groups map.
	 */
	private void buildGroup() {
		for (int i = 0; i < result.getAnnotationCount(); i++) {
			// Skip annotations which are longer than n = 1000
			// characters -- this removes ugly long (but not wrong!)
			// annotations from the output
			final int begin = result.getAnnotationBegin(i);
			final int end = result.getAnnotationEnd(i);
			if (end - begin > 1000) {
				continue;
			}

			// Create an annotation group for this group id
			int gid = result.getAnnotationGroupId(i);
			AnnotationIndex group = groups.get(gid);
			if (group == null) {
				group = new AnnotationIndex();
				groups.put(gid, group);
			}

			// Insert this element into the annotation group
			group.insertAnnotation(i);
		}
	}

	/**
	 * Functions which performs the actual writing.
	 *
	 * @return a string containing the output document.
	 */
	public String write() {
		final StringBuilder res = new StringBuilder();
		final Stack spans = new Stack<>();
		final Map> endMap = new TreeMap<>();
		for (int pos = 0; pos < text.length(); pos++) {
			// Check whether an annotation group here, if yes, rewind the
			// stack until the closed group is found, then rebuild the spans
			if (endMap.containsKey(pos)) {
				Set skippedGroups = new HashSet<>();
				for (AnnotationGroup closedGroup : endMap.get(pos)) {
					// Skip groups which already have been closed
					if (skippedGroups.contains(closedGroup)) {
						continue;
					}

					Stack closedSpans = new Stack<>();
					while (!spans.empty()) {
						final AnnotationGroup a = spans.pop();
						final int gid = result.getAnnotationGroupId(a.get(0));
						final boolean last = closedGroup == a;
						res.append(formater.closeTag(a, gid, last));
						if (last) {
							break;
						}
						closedSpans.push(a);
					}

					// Reopen the closed spans
					while (!closedSpans.empty()) {
						final AnnotationGroup a = closedSpans.pop();
						final int idx0 = a.get(0);
						final int end = result.getAnnotationEnd(idx0);
						if (end > pos) {
							final int gid = result.getAnnotationGroupId(idx0);
							res.append(formater.openTag(a, gid, false));
							spans.push(a);
						} else {
							// Don't reopen this group if it is going to be
							// closed in the outer for loop
							skippedGroups.add(a);
						}
					}
				}

				// Remove this position from the end map
				endMap.remove(pos);
			}

			// Open all spans that start at this position
			for (AnnotationIndex group : groups.values()) {
				SortedMap as = group.getAnnotations(pos);
				if (as == null) {
					continue;
				}
				for (Map.Entry es : as.entrySet()) {
					// Fetch the annotation group, open the corresponding tag,
					// add the annotation group to the stack
					final AnnotationGroup a = es.getValue();
					final int gid = result.getAnnotationGroupId(a.get(0));
					res.append(formater.openTag(a, gid, true));
					spans.add(a);

					// Add the group and its end position to the endMap
					final int end = es.getKey();
					Collection endingGroups = endMap.get(end);
					if (endingGroups == null) {
						endingGroups = new ArrayList<>();
						endMap.put(end, endingGroups);
					}
					endingGroups.add(a);
				}
			}

			// Insert the current character
			res.append(formater.escapeCharacter(text.charAt(pos)));
		}
		return res.toString();
	}

}