All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.findwise.hydra.local.RemotePipeline Maven / Gradle / Ivy

There is a newer version: 0.5.0
Show newest version
package com.findwise.hydra.local;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;

import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.util.EntityUtils;

import com.findwise.hydra.DocumentFile;
import com.findwise.hydra.DocumentID;
import com.findwise.hydra.JsonException;
import com.findwise.hydra.SerializationUtils;
import com.findwise.tools.HttpConnection;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;

public class RemotePipeline {
	private static final Logger internalLogger = LoggerFactory.getLogger("internal");
	private static final Logger logger = LoggerFactory.getLogger(RemotePipeline.class);

	public static final String GET_DOCUMENT_URL = "getDocument";
	public static final String WRITE_DOCUMENT_URL = "writeDocument";
	public static final String RELEASE_DOCUMENT_URL = "releaseDocument";
	public static final String PROCESSED_DOCUMENT_URL = "processedDocument";
	public static final String PENDING_DOCUMENT_URL = "pendingDocument";
	public static final String DISCARDED_DOCUMENT_URL = "discardedDocument";
	public static final String GET_PROPERTIES_URL = "getProperties";
	public static final String FAILED_DOCUMENT_URL = "failedDocument";
	public static final String FILE_URL = "documentFile";

	public static final String STAGE_PARAM = "stage";
	public static final String NORELEASE_PARAM = "norelease";
	public static final String PARTIAL_PARAM = "partial";
	public static final String DOCID_PARAM = "docid";
	public static final String FILENAME_PARAM = "filename";

	public static final int DEFAULT_PORT = 12001;
	public static final String DEFAULT_HOST = "127.0.0.1";

	private boolean performanceLogging = false;

	private HttpConnection core;

	private String getUrl;
	private String writeUrl;
	private String processedUrl;
	private String failedUrl;
	private String pendingUrl;
	private String discardedUrl;
	private String propertyUrl;
	private String fileUrl;

	private String stageName;

	private LocalDocument currentDocument;

	/**
	 * Calls RemotePipeline(String, int, String) with default values for
	 * hostName (RemotePipeline.DEFAULT_HOST) and port (RemotePipeline.DEFAULT_PORT).
	 *
	 * @param stageName
	 */
	public RemotePipeline(String stageName) {
		this(DEFAULT_HOST, DEFAULT_PORT, stageName);
	}

	public RemotePipeline(String hostName, int port, String stageName) {
		this.stageName = stageName;
		getUrl = "/" + GET_DOCUMENT_URL + "?" + STAGE_PARAM + "=" + stageName;
		writeUrl = "/" + WRITE_DOCUMENT_URL + "?" + STAGE_PARAM + "=" + stageName;
		processedUrl = "/" + PROCESSED_DOCUMENT_URL + "?" + STAGE_PARAM + "=" + stageName;
		failedUrl = "/" + FAILED_DOCUMENT_URL + "?" + STAGE_PARAM + "=" + stageName;
		pendingUrl = "/" + PENDING_DOCUMENT_URL + "?" + STAGE_PARAM + "=" + stageName;
		discardedUrl = "/" + DISCARDED_DOCUMENT_URL + "?" + STAGE_PARAM + "=" + stageName;
		propertyUrl = "/" + GET_PROPERTIES_URL + "?" + STAGE_PARAM + "=" + stageName;
		fileUrl = "/" + FILE_URL + "?" + STAGE_PARAM + "=" + stageName;

		core = new HttpConnection(hostName, port);
	}

	/**
	 * Non-recurring, use this in all known cases except for in an output node.
	 * 

* The fetched document will be tagged with the name of the stage which is * used to execute getDocument. */ public LocalDocument getDocument(LocalQuery query) throws IOException { HttpResponse response; long start = System.currentTimeMillis(); response = core.post(getUrl, query.toJson()); long startSerialize = System.currentTimeMillis(); long startJson = 0L; LocalDocument ld = null; if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { String s = EntityUtils.toString(response.getEntity()); try { startJson = System.currentTimeMillis(); ld = new LocalDocument(s); } catch (JsonException e) { throw new IOException(e); } internalLogger.debug("Received document with ID " + ld.getID()); currentDocument = ld; } else if (response.getStatusLine().getStatusCode() == HttpStatus.SC_NOT_FOUND) { internalLogger.debug("No document found matching query"); EntityUtils.consume(response.getEntity()); } else { logUnexpected("getDocument()", response); } if (isPerformanceLogging()) { long end = System.currentTimeMillis(); Object docId = ld != null ? ld.getID() : null; logger.info(String.format("type=performance event=query stage_name=%s doc_id=\"%s\" start=%d fetch=%d entitystring=%d serialize=%d end=%d total=%d", stageName, docId, start, startSerialize - start, startJson - startSerialize, end - startJson, end, end - start)); } return ld; } private static void logUnexpected(String apiMethod, HttpResponse response) throws IOException { internalLogger.error(apiMethod + " gave an unexpected response: " + response.getStatusLine() + ", Message: " + EntityUtils.toString(response.getEntity())); } /** * Writes all outstanding updates to the last document fetched from the pipeline. */ public boolean saveCurrentDocument() throws IOException, JsonException { if (currentDocument == null) { internalLogger.error("There is no document to write."); return false; } boolean x = save(currentDocument); if (x) { currentDocument = null; } return x; } /** * Writes an entire document to the pipeline. Use is discouraged, try using save(..) whenever possible. */ public boolean saveFull(LocalDocument d) throws IOException, JsonException { boolean res = save(d, false); if (res) { d.markSynced(); } return res; } /** * Writes all outstanding updates to the document since it was initialized. */ public boolean save(LocalDocument d) throws IOException, JsonException { boolean res = save(d, true); if (res) { d.markSynced(); } return res; } private boolean save(LocalDocument d, boolean partialUpdate) throws IOException, JsonException { boolean hasId = d.getID() != null; String s; long start = System.currentTimeMillis(); if (partialUpdate) { s = d.modifiedFieldsToJson(); } else { s = d.toJson(); } long startPost = System.currentTimeMillis(); HttpResponse response = core.post(getWriteUrl(partialUpdate), s); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { if (!hasId) { LocalDocument updated = new LocalDocument(EntityUtils.toString(response.getEntity())); d.putAll(updated); } else { EntityUtils.consume(response.getEntity()); } if (isPerformanceLogging()) { long end = System.currentTimeMillis(); DocumentID docId = d.getID(); logger.info(String.format("type=performance event=update stage_name=%s doc_id=\"%s\" start=%d serialize=%d post=%d end=%d total=%d", stageName, docId, start, startPost - start, end - startPost, end, end - start)); } return true; } logUnexpected("save(partial=" + partialUpdate + ")", response); return false; } public boolean markPending(LocalDocument d) throws IOException { HttpResponse response = core.post(pendingUrl, d.contentFieldsToJson(null)); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { EntityUtils.consume(response.getEntity()); return true; } logUnexpected("markPending()", response); return false; } public boolean markFailed(LocalDocument d) throws IOException { HttpResponse response = core.post(failedUrl, d.modifiedFieldsToJson()); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { EntityUtils.consume(response.getEntity()); return true; } logUnexpected("markFailed()", response); return false; } public boolean markFailed(LocalDocument d, Throwable t) throws IOException { d.addError(stageName, t); return markFailed(d); } public boolean markProcessed(LocalDocument d) throws IOException { HttpResponse response = core.post(processedUrl, d.modifiedFieldsToJson()); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { EntityUtils.consume(response.getEntity()); return true; } logUnexpected("markProcessed()", response); return false; } public boolean markDiscarded(LocalDocument d) throws IOException { HttpResponse response = core.post(discardedUrl, d.modifiedFieldsToJson()); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { EntityUtils.consume(response.getEntity()); return true; } logUnexpected("markDiscarded()", response); return false; } private String getWriteUrl(boolean partialUpdate) { String s = writeUrl; s += "&" + NORELEASE_PARAM + "=0"; if (partialUpdate) { s += "&" + PARTIAL_PARAM + "=1"; } else { s += "&" + PARTIAL_PARAM + "=0"; } return s; } public Map getProperties() throws IOException { HttpResponse response = core.get(propertyUrl); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { Map map; try { map = SerializationUtils.fromJson(EntityUtils.toString(response.getEntity())); } catch (JsonException e) { throw new IOException(e); } internalLogger.debug("Successfully retrieved propertyMap with " + map.size() + " entries"); return map; } else if (response.getStatusLine().getStatusCode() == HttpStatus.SC_NOT_FOUND) { internalLogger.debug("No document found matching query"); EntityUtils.consume(response.getEntity()); return null; } else { logUnexpected("getProperties()", response); return null; } } private String getFileUrl(DocumentFile df) throws UnsupportedEncodingException { return getFileUrl(df.getFileName(), df.getDocumentId()); } private String getFileUrl(String fileName, DocumentID docid) throws UnsupportedEncodingException { return fileUrl + "&" + RemotePipeline.FILENAME_PARAM + "=" + fileName + "&" + RemotePipeline.DOCID_PARAM + "=" + URLEncoder.encode(docid.toJSON(), "UTF-8"); } public DocumentFile getFile(String fileName, DocumentID docid) throws IOException { HttpResponse response = core.get(getFileUrl(fileName, docid)); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { Object o; try { o = SerializationUtils.toObject(EntityUtils.toString(response.getEntity())); } catch (JsonException e) { throw new IOException(e); } if (!(o instanceof Map)) { return null; } @SuppressWarnings("unchecked") Map map = (Map) o; Date d = (Date) map.get("uploadDate"); String encoding = (String) map.get("encoding"); String mimetype = (String) map.get("mimetype"); String savedByStage = (String) map.get("savedByStage"); InputStream is; if (encoding == null) { is = new ByteArrayInputStream(Base64.decodeBase64(((String) map.get("stream")).getBytes("UTF-8"))); } else { is = new ByteArrayInputStream(Base64.decodeBase64(((String) map.get("stream")).getBytes(encoding))); } DocumentFile df = new DocumentFile(docid, fileName, is, savedByStage, d); df.setEncoding(encoding); df.setMimetype(mimetype); return df; } else { logUnexpected("getFile()", response); return null; } } public boolean saveFile(DocumentFile df) throws IOException { HttpResponse response = core.post(getFileUrl(df), SerializationUtils.toJson(df)); int code = response.getStatusLine().getStatusCode(); if (code == HttpStatus.SC_OK || code == HttpStatus.SC_NO_CONTENT) { EntityUtils.consume(response.getEntity()); return true; } else { logUnexpected("saveFile()", response); return false; } } public boolean deleteFile(String fileName, DocumentID docid) throws IOException { HttpResponse response = core.delete(getFileUrl(fileName, docid)); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { EntityUtils.consume(response.getEntity()); return true; } else { logUnexpected("deleteFile()", response); return false; } } @SuppressWarnings("unchecked") public List getFileNames(DocumentID docid) throws IOException { HttpResponse response = core.get(fileUrl + "&" + RemotePipeline.DOCID_PARAM + "=" + URLEncoder.encode(docid.toJSON(), "UTF-8")); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { try { return (List) SerializationUtils.toObject(EntityUtils.toString(response.getEntity())); } catch (JsonException e) { throw new IOException(e); } } else { logUnexpected("getFileNames()", response); return null; } } public List> getFiles(DocumentID docid) throws IOException { List fileNames = getFileNames(docid); List> files = new ArrayList>(); for (String fileName : fileNames) { files.add(getFile(fileName, docid)); } return files; } public String getStageName() { return stageName; } public void setPerformanceLogging(boolean performanceLogging) { this.performanceLogging = performanceLogging; } public boolean isPerformanceLogging() { return performanceLogging; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy