All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.scie.web.ServerHandler Maven / Gradle / Ivy

Go to download

Module providing the webservice interface based on the Jetty embedded webserver and the FreeMarker template engine. Defines a simple format for providing textual annotations and produced output in HTML or JSON. This module has no dependencies to the other SCIE modules (except for the PDF text extractor) or the UIMA framework and thus can be used in any context, where text is annotated by an algorithm and should be presented to an end user.

The newest version!
/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

package de.citec.scie.web;

import de.citec.scie.pdf.PDFStructuredTextExtractor;
import de.citec.scie.web.analysis.AbstractAnalysis;
import de.citec.scie.web.analysis.AbstractAnalysisResult;
import de.citec.scie.web.analysis.AnalysisException;
import de.citec.scie.web.exporter.html.HtmlExporter;
import de.citec.scie.web.exporter.html.HtmlFormater;
import de.citec.scie.web.exporter.json.JsonExporter;
import de.citec.scie.web.utils.FileType;
import de.citec.scie.web.utils.FileUtils;
import freemarker.template.Template;
import freemarker.template.TemplateException;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.Part;
import org.eclipse.jetty.server.Request;
import org.eclipse.jetty.server.handler.AbstractHandler;

/**
 *
 * @author andreas
 */
public class ServerHandler extends AbstractHandler {

	private final AbstractAnalysis analysis;

	public ServerHandler(AbstractAnalysis analysis) {
		this.analysis = analysis;
	}

	@Override
	public void handle(String target, Request baseRequest,
			HttpServletRequest request, HttpServletResponse response)
			throws IOException, ServletException {
		// Do not handle any target containing a ".." (might be exploitable)
		if (target.contains("..")) {
			response.setStatus(HttpServletResponse.SC_FORBIDDEN);
			return;
		}

		// Set the correct content type
		response.setContentType(ServerConfig.getContentType(target));

		// Try to handle a static request (either style files or script files)
		if (ServerConfig.isStatic(target)) {
			handleStatic(target, baseRequest, request, response);
			return;
		}

		// Try to handle an request to the documentation
		if (ServerConfig.isDocu(target)) {
			handleStatic(new FileInputStream(ServerConfig.getDocuFile(target)),
					baseRequest, request, response);
		}

		// Try to handle an api request
		if (ServerConfig.isAPI(target, request.getMethod())) {
			handleApi(baseRequest, request, response);
			return;
		}

		// Try to handle templates
		if (ServerConfig.isTemplate(target)) {
			// Store the HOST variable as needed by the api.ftl template
			Map data = new HashMap<>();
			data.put("host", buildHost(request.getHeader("host")));

			// Assemble the template name
			String template = target.equals("/") ? "/index.ftl"
					: target.substring(0, target.lastIndexOf('.')) + ".ftl";
			handleTemplate("/public" + template, response, data);
			baseRequest.setHandled(true);
			return;
		}

		// Default: The requested resource does not exist
		response.setStatus(HttpServletResponse.SC_NOT_FOUND);
	}

	private void handleStatic(String target, Request baseRequest,
			HttpServletRequest request, HttpServletResponse response)
			throws IOException, ServletException {
		// Try to read the resource
		InputStream res = ServerHandler.class.getResourceAsStream("static" + target);
		if (res == null) {
			response.setStatus(HttpServletResponse.SC_NOT_FOUND);
			return;
		}
		handleStatic(res, baseRequest, request, response);
	}

	private void handleStatic(InputStream is, Request baseRequest,
			HttpServletRequest request, HttpServletResponse response)
			throws IOException, ServletException {
		// Set the cache control header for static content (clients do not load
		// these resources within every page impression)
		response.setHeader("Cache-Control", "max-age=86400");

		// Otherwise just output the resource
		byte[] buf = new byte[1024 * 10];
		try (InputStream in = is; OutputStream out = response.getOutputStream()) {
			for (int len; (len = in.read(buf)) > 0;) {
				out.write(buf, 0, len);
			}
		}
		response.setStatus(HttpServletResponse.SC_OK);
		baseRequest.setHandled(true);
	}

	private static void setContentDisposition(HttpServletResponse response,
			String fn, String ext) {
		response.setHeader("Content-Disposition",
				"attachment; filename=\"" + fn.replaceAll("\"", "\\\"") + "." + ext + "\"");
	}

	private static boolean readFlag(Map params,
			String key) {
		if (params.containsKey(key)) {
			final String[] val = params.get(key);
			if (val.length == 1 && val[0] != null) {
				final String s = val[0].toLowerCase();
				return s.equals("1") || s.equals("true");
			}
		}
		return false;
	}

	private static String readString(Map params, String key,
			String defaultValue) {
		if (params.containsKey(key)) {
			final String[] val = params.get(key);
			if (val.length == 1 && val[0] != null) {
				return val[0];
			}
		}
		return defaultValue;
	}

	private void handleApi(Request baseRequest, HttpServletRequest request,
			HttpServletResponse response) throws IOException, ServletException {

		// Only post requests are alowed when calling the api
		if (!request.getMethod().equals("POST")) {
			response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Only POST requests are allowed on /api");
			return;
		}

		// Enable multipart handling
		if (request.getContentType() != null
				&& request.getContentType().startsWith("multipart/form-data")) {
			baseRequest.setAttribute(Request.__MULTIPART_CONFIG_ELEMENT,
					ServerConfig.MULTI_PART_CONFIG);
		} else {
			response.sendError(HttpServletResponse.SC_BAD_REQUEST, "The request must be encoded using multipart/form-data");
			return;
		}

		// Read all important parameters
		try {
			Part part = request.getPart("document");
			Map params = request.getParameterMap();
			final boolean webfrontend = readFlag(params, "webfrontend");
			final boolean confirmUpload = readFlag(params, "confirm_upload")
					|| !webfrontend;
			final boolean confirmTerms = readFlag(params, "confirm_terms")
					|| !webfrontend;
			final boolean hasDocument = part != null && part.getSize() > 0;
			final String format = readString(params, "format", "json").toLowerCase();
			final boolean validFormat = ServerConfig.isValidFormat(format);

			// Collect all errors
			Map errors = new HashMap<>();
			if (!confirmUpload) {
				errors.put("error_confirm_upload", ServerConfig.ERR_CONFIRM);
			}
			if (!confirmTerms) {
				errors.put("error_confirm_terms", ServerConfig.ERR_CONFIRM);
			}
			if (!hasDocument) {
				errors.put("error_document", ServerConfig.ERR_NO_DOCUMENT);
			}
			if (!validFormat) {
				errors.put("error_format", ServerConfig.ERR_FORMAT);
			}

			// Try to work on the document if there are no errors
			if (errors.isEmpty() && part != null) {
				// Make sure an analysis is specified
				if (analysis == null) {
					response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
							"No analysis instance specified");
					return;
				}

				// Try to register the current request, issue an error message
				// if there are too many requests from a single ip address.
				final String ip = request.getRemoteAddr();
				if (!ServerConfig.registerRequestAndWait(ip)) {
					response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE,
							"Too many requests from your IP");
					return;
				}

				try {
					// Analyze the document input stream, abort if the document
					// is neither a PDF nor a text file.
					FileType t = FileUtils.detectFiletype(part.getInputStream());
					if (t == FileType.Unknown) {
						throw new AnalysisException(ServerConfig.ERR_UNSUPPORTED);
					}

					// Run the given analysis
					try (InputStream is = getDocumentInputStream(part, t)) {
						// The input file has been converted to plain text if
						// the analysis does not support pdf
						if (!analysis.supportsPDF()) {
							t = FileType.TXT;
						}

						// Perform the actual analysis
						AbstractAnalysisResult result = analysis.analyze(is, t);

						// Write the output
						final String fn = part.getSubmittedFileName();
						switch (format) {
							case "html_web":
								handleHtmlExport(fn, "/results.ftl", response, result);
								break;
							case "html":
								setContentDisposition(response, fn, "html");
								handleHtmlExport(fn, "/results_standalone.ftl", response, result);
								break;
							case "html_plain":
								setContentDisposition(response, fn, "html");
								handleHtmlExport(fn, "/results_standalone_plain.ftl", response, result);
								break;
							case "json":
								setContentDisposition(response, fn, "json");
								handleJsonExport(fn, response, result);
								break;
						}
					}
				} catch (AnalysisException ex) {
					errors.put("error_document", "Error: " + ex.getMessage());
				} finally {
					// Make sure the ip is unregistered no matter what
					ServerConfig.unregisterRequest(ip);
				}
			}

			// Handle any error
			if (!errors.isEmpty()) {
				// If in webfrontend mode simply display all error messages
				if (webfrontend) {
					// Insert the correctly sent values into the template
					Map data = new HashMap<>(errors);
					if (validFormat) {
						data.put("format_" + format + "_selected", "selected");
					}
					if (confirmUpload) {
						data.put("confirm_upload_checked", "checked");
					}
					if (confirmTerms) {
						data.put("confirm_terms_checked", "checked");
					}

					// Build the template
					handleTemplate("/public/upload.ftl", response, data);
				}
			}

			response.setStatus(HttpServletResponse.SC_OK);
			baseRequest.setHandled(true);
		} finally {
			// No matter what happens, make sure the uploaded documents get
			// deleted
			for (Part part : request.getParts()) {
				part.delete();
			}
		}
	}

	private void handleTemplate(String templateName,
			HttpServletResponse response, Map data) throws IOException,
			ServletException {
		// Create an empty hashmap if data was not given
		if (data == null) {
			data = new HashMap();
		}

		// Add some generic messages to the template
		data.put("msg_err_confirm", ServerConfig.ERR_CONFIRM);
		data.put("msg_err_no_document", ServerConfig.ERR_NO_DOCUMENT);

		// Preselect the confirmation checkboxes for development
		if (ServerConfig.CONFIRMATION_PRESELECTION) {
			data.put("confirm_upload_checked", "checked");
			data.put("confirm_terms_checked", "checked");
		}

		// Try to load the template and write the output into the response stream
		try {
			Template template = ServerConfig.getTemplate("templates" + templateName);
			template.process(data, response.getWriter());
		} catch (IOException ex) {
			response.setStatus(HttpServletResponse.SC_NOT_FOUND);
			return;
		} catch (TemplateException ex) {
			response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
			return;
		}

		// Everything is OK, the request was handled
		response.setStatus(HttpServletResponse.SC_OK);
	}

	private void handleHtmlExport(String fn, String template,
			HttpServletResponse response, AbstractAnalysisResult result)
			throws IOException, ServletException {
		HtmlExporter htmlExporter = new HtmlExporter();
		JsonExporter jsonExporter = new JsonExporter(false);

		StringBuilder html = new StringBuilder();
		htmlExporter.export(html, result);

		StringBuilder json = new StringBuilder();
		jsonExporter.export(json, result);

		Map data = new HashMap<>();
		data.put("analysis_filename", HtmlFormater.escapeString(fn));
		data.put("analysis_result", html.toString());
		data.put("analysis_json", json.toString());

		response.setContentType(ServerConfig.getContentType(".html"));
		handleTemplate(template, response, data);
	}

	private void handleJsonExport(String fn, HttpServletResponse response,
			AbstractAnalysisResult result)
			throws IOException, ServletException {
		JsonExporter jsonExporter = new JsonExporter(true);
		StringBuilder json = new StringBuilder();
		jsonExporter.export(json, result);
		response.setContentType(ServerConfig.getContentType(".json"));
		response.getWriter().append(json);
	}

	private InputStream getDocumentInputStream(Part part, FileType t) throws AnalysisException {
		try {
			// Convert the PDF to text if the analysis does not directly support
			// pdf documents
			if (t == FileType.PDF && !analysis.supportsPDF()) {
				try (InputStream is = part.getInputStream()) {
					return PDFStructuredTextExtractor.importAsInputStream(is);
				}
			} else {
				return part.getInputStream();
			}
		} catch (IOException ex) {
			throw new AnalysisException("Error while retrieving document input stream", ex);
		}
	}

	protected static String buildHost(String host) {
		// Fetch the start index
		int b = host.indexOf("http://") + 7;
		if (b < 7) {
			b = host.indexOf("https://") + 8;
			if (b < 8) {
				b = 0;
			}
		}

		// Fetch the end index
		int e = host.indexOf("/", b);
		if (e < 0) {
			e = host.length();
		}

		// Escape the characters to prevent XSS
		final StringBuilder sb = new StringBuilder();
		for (int i = b; i < e; i++) {
			sb.append(HtmlFormater.escapeCharacterStatic(host.charAt(i)));
		}
		return "http://" + sb.toString() + "/";
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy