All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.crawler.rest.RestCrawlItem Maven / Gradle / Ivy

Go to download

OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.

The newest version!
/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2010-2013 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.crawler.rest;

import java.io.UnsupportedEncodingException;

import javax.xml.xpath.XPathExpressionException;

import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

import com.jaeksoft.searchlib.analysis.LanguageEnum;
import com.jaeksoft.searchlib.crawler.common.process.FieldMapCrawlItem;
import com.jaeksoft.searchlib.crawler.web.database.CredentialItem;
import com.jaeksoft.searchlib.crawler.web.spider.HttpDownloader;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.Variables;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;

public class RestCrawlItem extends FieldMapCrawlItem {

	private String name;

	private String url;

	private String sequenceParameter;

	private Integer sequenceFromInclusive;

	private Integer sequenceToExclusive;

	private Integer sequenceIncrement;

	private HttpDownloader.Method method;

	private CredentialItem credential;

	private LanguageEnum lang;

	private int bufferSize;

	private String pathDocument;

	private HttpDownloader.Method callbackMethod;

	private String callbackUrl;

	private CallbackMode callbackMode;

	private String callbackQueryParameter;

	private String callbackPayload;

	public static enum CallbackMode {
		NO_CALL, ONE_CALL_PER_DOCUMENT, ONE_CALL_FOR_ALL_DOCUMENTS;

		public final static CallbackMode find(String name) {
			for (CallbackMode mode : values())
				if (mode.name().equalsIgnoreCase(name))
					return mode;
			return NO_CALL;
		}
	}

	public RestCrawlItem(RestCrawlMaster crawlMaster) {
		super(crawlMaster, new RestFieldMap());
		name = null;
		url = null;
		method = HttpDownloader.Method.GET;
		credential = new CredentialItem();
		sequenceParameter = null;
		sequenceFromInclusive = null;
		sequenceToExclusive = null;
		sequenceIncrement = null;
		pathDocument = null;
		lang = LanguageEnum.UNDEFINED;
		bufferSize = 100;
		callbackMethod = HttpDownloader.Method.GET;
		callbackUrl = null;
		callbackMode = CallbackMode.NO_CALL;
		callbackQueryParameter = null;
		callbackPayload = null;
	}

	public RestCrawlItem(RestCrawlMaster crawlMaster, String name) {
		this(crawlMaster);
		this.name = name;
	}

	protected RestCrawlItem(RestCrawlItem crawl) {
		this((RestCrawlMaster) crawl.threadMaster);
		crawl.copyTo(this);
	}

	public RestCrawlItem duplicate() {
		return new RestCrawlItem(this);
	}

	public void apply(Variables variables) {
		if (variables == null)
			return;
		url = variables.replace(url);
		sequenceParameter = variables.replace(sequenceParameter);
		pathDocument = variables.replace(pathDocument);
		credential.apply(variables);
	}

	@Override
	public void copyTo(RestCrawlItem crawl) {
		super.copyTo(crawl);
		crawl.setName(this.getName());
		crawl.url = this.url;
		crawl.method = this.method;
		crawl.pathDocument = this.pathDocument;
		this.credential.copyTo(crawl.credential);
		crawl.lang = this.lang;
		crawl.bufferSize = this.bufferSize;
		crawl.callbackMethod = callbackMethod;
		crawl.callbackUrl = callbackUrl;
		crawl.callbackMode = callbackMode;
		crawl.callbackQueryParameter = callbackQueryParameter;
		crawl.callbackPayload = callbackPayload;
		crawl.sequenceParameter = this.sequenceParameter;
		crawl.sequenceFromInclusive = this.sequenceFromInclusive;
		crawl.sequenceToExclusive = this.sequenceToExclusive;
		crawl.sequenceIncrement = this.sequenceIncrement;
	}

	/**
	 * @return the url
	 */
	public String getUrl() {
		return url;
	}

	/**
	 * @param url
	 *            the url to set
	 */
	public void setUrl(String url) {
		this.url = url;
	}

	/**
	 * @return the sequenceParameter
	 */
	public String getSequenceParameter() {
		return sequenceParameter;
	}

	/**
	 * @param sequenceParameter
	 *            the sequenceParameter to set
	 */
	public void setSequenceParameter(String sequenceParameter) {
		this.sequenceParameter = sequenceParameter;
	}

	/**
	 * @return the sequenceFromInclusive
	 */
	public Integer getSequenceFromInclusive() {
		return sequenceFromInclusive;
	}

	/**
	 * @param sequenceFromInclusive
	 *            the sequenceFromInclusive to set
	 */
	public void setSequenceFromInclusive(Integer sequenceFromInclusive) {
		this.sequenceFromInclusive = sequenceFromInclusive;
	}

	/**
	 * @return the sequenceToExclusive
	 */
	public Integer getSequenceToExclusive() {
		return sequenceToExclusive;
	}

	/**
	 * @param sequenceToExclusive
	 *            the sequenceToExclusive to set
	 */
	public void setSequenceToExclusive(Integer sequenceToExclusive) {
		this.sequenceToExclusive = sequenceToExclusive;
	}

	/**
	 * @return the sequenceIncrement
	 */
	public Integer getSequenceIncrement() {
		return sequenceIncrement;
	}

	/**
	 * @param sequenceIncrement
	 *            the sequenceIncrement to set
	 */
	public void setSequenceIncrement(Integer sequenceIncrement) {
		this.sequenceIncrement = sequenceIncrement;
	}

	/**
	 * @return the credential
	 */
	public CredentialItem getCredential() {
		return credential;
	}

	/**
	 * @param lang
	 *            the lang to set
	 */
	public void setLang(LanguageEnum lang) {
		this.lang = lang;
	}

	/**
	 * @return the lang
	 */
	public LanguageEnum getLang() {
		return lang;
	}

	/**
	 * @return the fieldMap
	 */
	@Override
	public RestFieldMap getFieldMap() {
		return (RestFieldMap) super.getFieldMap();
	}

	/**
	 * @return the bufferSize
	 */
	public int getBufferSize() {
		return bufferSize;
	}

	/**
	 * @param bufferSize
	 *            the bufferSize to set
	 */
	public void setBufferSize(int bufferSize) {
		this.bufferSize = bufferSize;
	}

	protected final static String REST_CRAWL_NODE_NAME = "restCrawl";
	protected final static String REST_CRAWL_ATTR_NAME = "name";
	protected final static String REST_CRAWL_NODE_CREDENTIAL = "credential";
	protected final static String REST_CRAWL_ATTR_URL = "url";
	protected final static String REST_CRAWL_ATTR_SEQUENCE_PARAMETER = "seqParameter";
	protected final static String REST_CRAWL_ATTR_SEQUENCE_FROM = "seqFrom";
	protected final static String REST_CRAWL_ATTR_SEQUENCE_TO = "seqTo";
	protected final static String REST_CRAWL_ATTR_SEQUENCE_INCREMENT = "seqIncrement";
	protected final static String REST_CRAWL_ATTR_LANG = "lang";
	protected final static String REST_CRAWL_ATTR_METHOD = "method";
	protected final static String REST_CRAWL_ATTR_BUFFER_SIZE = "bufferSize";
	protected final static String REST_CRAWL_NODE_NAME_MAP = "map";
	protected final static String REST_CRAWL_NODE_DOC_PATH = "documentPath";
	protected final static String REST_CRAWL_NODE_CALLBACK = "callback";
	protected final static String REST_CRAWL_CALLBACK_ATTR_METHOD = "method";
	protected final static String REST_CRAWL_CALLBACK_ATTR_URL = "url";
	protected final static String REST_CRAWL_CALLBACK_ATTR_MODE = "mode";
	protected final static String REST_CRAWL_CALLBACK_ATTR_QUERY_PARAM = "queryParam";

	public RestCrawlItem(RestCrawlMaster crawlMaster, XPathParser xpp, Node item) throws XPathExpressionException {
		this(crawlMaster);
		setName(XPathParser.getAttributeString(item, REST_CRAWL_ATTR_NAME));
		setUrl(XPathParser.getAttributeString(item, REST_CRAWL_ATTR_URL));
		setSequenceParameter(XPathParser.getAttributeString(item, REST_CRAWL_ATTR_SEQUENCE_PARAMETER));
		setSequenceFromInclusive(DomUtils.getAttributeInteger(item, REST_CRAWL_ATTR_SEQUENCE_FROM, null));
		setSequenceToExclusive(DomUtils.getAttributeInteger(item, REST_CRAWL_ATTR_SEQUENCE_TO, null));
		setMethod(HttpDownloader.Method.find(DomUtils.getAttributeText(item, REST_CRAWL_ATTR_METHOD),
				HttpDownloader.Method.GET));
		setLang(LanguageEnum.findByCode(XPathParser.getAttributeString(item, REST_CRAWL_ATTR_LANG)));
		setBufferSize(XPathParser.getAttributeValue(item, REST_CRAWL_ATTR_BUFFER_SIZE));
		Node mapNode = xpp.getNode(item, REST_CRAWL_NODE_NAME_MAP);
		if (mapNode != null)
			getFieldMap().load(mapNode);
		Node pathNode = xpp.getNode(item, REST_CRAWL_NODE_DOC_PATH);
		if (pathNode != null)
			setPathDocument(StringEscapeUtils.unescapeXml(pathNode.getTextContent()));
		Node credNode = xpp.getNode(item, REST_CRAWL_NODE_CREDENTIAL);
		if (credNode != null)
			credential = CredentialItem.fromXml(credNode);
		Node callBackNode = DomUtils.getFirstNode(item, REST_CRAWL_NODE_CALLBACK);
		if (callBackNode != null) {
			setCallbackMethod(
					HttpDownloader.Method.find(DomUtils.getAttributeText(callBackNode, REST_CRAWL_CALLBACK_ATTR_METHOD),
							HttpDownloader.Method.GET));
			setCallbackUrl(DomUtils.getAttributeText(callBackNode, REST_CRAWL_CALLBACK_ATTR_URL));
			setCallbackMode(CallbackMode.find(DomUtils.getAttributeText(callBackNode, REST_CRAWL_CALLBACK_ATTR_MODE)));
			setCallbackQueryParameter(DomUtils.getAttributeText(callBackNode, REST_CRAWL_CALLBACK_ATTR_QUERY_PARAM));
			setCallbackPayload(callBackNode.getTextContent());
		}
	}

	public void writeXml(XmlWriter xmlWriter) throws SAXException, UnsupportedEncodingException {
		xmlWriter.startElement(REST_CRAWL_NODE_NAME, REST_CRAWL_ATTR_NAME, getName(), REST_CRAWL_ATTR_URL, getUrl(),
				REST_CRAWL_ATTR_SEQUENCE_PARAMETER, getSequenceParameter(), REST_CRAWL_ATTR_SEQUENCE_FROM,
				sequenceFromInclusive == null ? null : sequenceFromInclusive.toString(), REST_CRAWL_ATTR_SEQUENCE_TO,
				sequenceToExclusive == null ? null : sequenceToExclusive.toString(), REST_CRAWL_ATTR_SEQUENCE_INCREMENT,
				sequenceIncrement == null ? null : sequenceIncrement.toString(), REST_CRAWL_ATTR_METHOD, method.name(),
				REST_CRAWL_ATTR_LANG, getLang().getCode(), REST_CRAWL_ATTR_BUFFER_SIZE,
				Integer.toString(getBufferSize()));
		xmlWriter.startElement(REST_CRAWL_NODE_NAME_MAP);
		getFieldMap().store(xmlWriter);
		xmlWriter.endElement();
		xmlWriter.startElement(REST_CRAWL_NODE_CALLBACK, REST_CRAWL_CALLBACK_ATTR_METHOD, callbackMethod.name(),
				REST_CRAWL_CALLBACK_ATTR_MODE, callbackMode.name(), REST_CRAWL_CALLBACK_ATTR_URL, callbackUrl,
				REST_CRAWL_CALLBACK_ATTR_QUERY_PARAM, callbackQueryParameter);
		if (!StringUtils.isEmpty(callbackPayload))
			xmlWriter.textNode(callbackPayload);
		xmlWriter.endElement();
		xmlWriter.writeSubTextNodeIfAny(REST_CRAWL_NODE_DOC_PATH, xmlWriter.escapeXml(getPathDocument()));
		credential.writeXml(xmlWriter);
		xmlWriter.endElement();
	}

	/**
	 * @return the name
	 */
	public String getName() {
		return name;
	}

	/**
	 * @param name
	 *            the name to set
	 */
	public void setName(String name) {
		this.name = name;
	}

	@Override
	public int compareTo(RestCrawlItem o) {
		return getName().compareTo(o.getName());
	}

	@Override
	public String toString() {
		return getName();
	}

	/**
	 * @return the pathDocument
	 */
	public String getPathDocument() {
		return pathDocument;
	}

	/**
	 * @param pathDocument
	 *            the pathDocument to set
	 */
	public void setPathDocument(String pathDocument) {
		this.pathDocument = pathDocument;
	}

	/**
	 * @return the method
	 */
	public HttpDownloader.Method getMethod() {
		return method;
	}

	/**
	 * @param method
	 *            the method to set
	 */
	public void setMethod(HttpDownloader.Method method) {
		this.method = method;
	}

	/**
	 * @return the callbackMethod
	 */
	public HttpDownloader.Method getCallbackMethod() {
		return callbackMethod;
	}

	/**
	 * @param callbackMethod
	 *            the callbackMethod to set
	 */
	public void setCallbackMethod(HttpDownloader.Method callbackMethod) {
		this.callbackMethod = callbackMethod;
	}

	/**
	 * @return the callbackUrl
	 */
	public String getCallbackUrl() {
		return callbackUrl;
	}

	/**
	 * @param callbackUrl
	 *            the callbackUrl to set
	 */
	public void setCallbackUrl(String callbackUrl) {
		this.callbackUrl = callbackUrl;
	}

	/**
	 * @return the callbackMode
	 */
	public CallbackMode getCallbackMode() {
		return callbackMode;
	}

	/**
	 * @param callbackMode
	 *            the callbackMode to set
	 */
	public void setCallbackMode(CallbackMode callbackMode) {
		this.callbackMode = callbackMode;
	}

	/**
	 * @return the callbackQueryParameter
	 */
	public String getCallbackQueryParameter() {
		return callbackQueryParameter;
	}

	/**
	 * @param callbackQueryParameter
	 *            the callbackQueryParameter to set
	 */
	public void setCallbackQueryParameter(String callbackQueryParameter) {
		this.callbackQueryParameter = callbackQueryParameter;
	}

	/**
	 * @return the callbackPayload
	 */
	public String getCallbackPayload() {
		return callbackPayload;
	}

	/**
	 * @param callbackPayload
	 *            the callbackPayload to set
	 */
	public void setCallbackPayload(String callbackPayload) {
		this.callbackPayload = callbackPayload;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy