com.jaeksoft.searchlib.crawler.rest.RestCrawlItem Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2010-2013 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.crawler.rest;
import java.io.UnsupportedEncodingException;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import com.jaeksoft.searchlib.analysis.LanguageEnum;
import com.jaeksoft.searchlib.crawler.common.process.FieldMapCrawlItem;
import com.jaeksoft.searchlib.crawler.web.database.CredentialItem;
import com.jaeksoft.searchlib.crawler.web.spider.HttpDownloader;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.Variables;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
public class RestCrawlItem extends FieldMapCrawlItem {
private String name;
private String url;
private String sequenceParameter;
private Integer sequenceFromInclusive;
private Integer sequenceToExclusive;
private Integer sequenceIncrement;
private HttpDownloader.Method method;
private CredentialItem credential;
private LanguageEnum lang;
private int bufferSize;
private String pathDocument;
private HttpDownloader.Method callbackMethod;
private String callbackUrl;
private CallbackMode callbackMode;
private String callbackQueryParameter;
private String callbackPayload;
public static enum CallbackMode {
NO_CALL, ONE_CALL_PER_DOCUMENT, ONE_CALL_FOR_ALL_DOCUMENTS;
public final static CallbackMode find(String name) {
for (CallbackMode mode : values())
if (mode.name().equalsIgnoreCase(name))
return mode;
return NO_CALL;
}
}
public RestCrawlItem(RestCrawlMaster crawlMaster) {
super(crawlMaster, new RestFieldMap());
name = null;
url = null;
method = HttpDownloader.Method.GET;
credential = new CredentialItem();
sequenceParameter = null;
sequenceFromInclusive = null;
sequenceToExclusive = null;
sequenceIncrement = null;
pathDocument = null;
lang = LanguageEnum.UNDEFINED;
bufferSize = 100;
callbackMethod = HttpDownloader.Method.GET;
callbackUrl = null;
callbackMode = CallbackMode.NO_CALL;
callbackQueryParameter = null;
callbackPayload = null;
}
public RestCrawlItem(RestCrawlMaster crawlMaster, String name) {
this(crawlMaster);
this.name = name;
}
protected RestCrawlItem(RestCrawlItem crawl) {
this((RestCrawlMaster) crawl.threadMaster);
crawl.copyTo(this);
}
public RestCrawlItem duplicate() {
return new RestCrawlItem(this);
}
public void apply(Variables variables) {
if (variables == null)
return;
url = variables.replace(url);
sequenceParameter = variables.replace(sequenceParameter);
pathDocument = variables.replace(pathDocument);
credential.apply(variables);
}
@Override
public void copyTo(RestCrawlItem crawl) {
super.copyTo(crawl);
crawl.setName(this.getName());
crawl.url = this.url;
crawl.method = this.method;
crawl.pathDocument = this.pathDocument;
this.credential.copyTo(crawl.credential);
crawl.lang = this.lang;
crawl.bufferSize = this.bufferSize;
crawl.callbackMethod = callbackMethod;
crawl.callbackUrl = callbackUrl;
crawl.callbackMode = callbackMode;
crawl.callbackQueryParameter = callbackQueryParameter;
crawl.callbackPayload = callbackPayload;
crawl.sequenceParameter = this.sequenceParameter;
crawl.sequenceFromInclusive = this.sequenceFromInclusive;
crawl.sequenceToExclusive = this.sequenceToExclusive;
crawl.sequenceIncrement = this.sequenceIncrement;
}
/**
* @return the url
*/
public String getUrl() {
return url;
}
/**
* @param url
* the url to set
*/
public void setUrl(String url) {
this.url = url;
}
/**
* @return the sequenceParameter
*/
public String getSequenceParameter() {
return sequenceParameter;
}
/**
* @param sequenceParameter
* the sequenceParameter to set
*/
public void setSequenceParameter(String sequenceParameter) {
this.sequenceParameter = sequenceParameter;
}
/**
* @return the sequenceFromInclusive
*/
public Integer getSequenceFromInclusive() {
return sequenceFromInclusive;
}
/**
* @param sequenceFromInclusive
* the sequenceFromInclusive to set
*/
public void setSequenceFromInclusive(Integer sequenceFromInclusive) {
this.sequenceFromInclusive = sequenceFromInclusive;
}
/**
* @return the sequenceToExclusive
*/
public Integer getSequenceToExclusive() {
return sequenceToExclusive;
}
/**
* @param sequenceToExclusive
* the sequenceToExclusive to set
*/
public void setSequenceToExclusive(Integer sequenceToExclusive) {
this.sequenceToExclusive = sequenceToExclusive;
}
/**
* @return the sequenceIncrement
*/
public Integer getSequenceIncrement() {
return sequenceIncrement;
}
/**
* @param sequenceIncrement
* the sequenceIncrement to set
*/
public void setSequenceIncrement(Integer sequenceIncrement) {
this.sequenceIncrement = sequenceIncrement;
}
/**
* @return the credential
*/
public CredentialItem getCredential() {
return credential;
}
/**
* @param lang
* the lang to set
*/
public void setLang(LanguageEnum lang) {
this.lang = lang;
}
/**
* @return the lang
*/
public LanguageEnum getLang() {
return lang;
}
/**
* @return the fieldMap
*/
@Override
public RestFieldMap getFieldMap() {
return (RestFieldMap) super.getFieldMap();
}
/**
* @return the bufferSize
*/
public int getBufferSize() {
return bufferSize;
}
/**
* @param bufferSize
* the bufferSize to set
*/
public void setBufferSize(int bufferSize) {
this.bufferSize = bufferSize;
}
protected final static String REST_CRAWL_NODE_NAME = "restCrawl";
protected final static String REST_CRAWL_ATTR_NAME = "name";
protected final static String REST_CRAWL_NODE_CREDENTIAL = "credential";
protected final static String REST_CRAWL_ATTR_URL = "url";
protected final static String REST_CRAWL_ATTR_SEQUENCE_PARAMETER = "seqParameter";
protected final static String REST_CRAWL_ATTR_SEQUENCE_FROM = "seqFrom";
protected final static String REST_CRAWL_ATTR_SEQUENCE_TO = "seqTo";
protected final static String REST_CRAWL_ATTR_SEQUENCE_INCREMENT = "seqIncrement";
protected final static String REST_CRAWL_ATTR_LANG = "lang";
protected final static String REST_CRAWL_ATTR_METHOD = "method";
protected final static String REST_CRAWL_ATTR_BUFFER_SIZE = "bufferSize";
protected final static String REST_CRAWL_NODE_NAME_MAP = "map";
protected final static String REST_CRAWL_NODE_DOC_PATH = "documentPath";
protected final static String REST_CRAWL_NODE_CALLBACK = "callback";
protected final static String REST_CRAWL_CALLBACK_ATTR_METHOD = "method";
protected final static String REST_CRAWL_CALLBACK_ATTR_URL = "url";
protected final static String REST_CRAWL_CALLBACK_ATTR_MODE = "mode";
protected final static String REST_CRAWL_CALLBACK_ATTR_QUERY_PARAM = "queryParam";
public RestCrawlItem(RestCrawlMaster crawlMaster, XPathParser xpp, Node item) throws XPathExpressionException {
this(crawlMaster);
setName(XPathParser.getAttributeString(item, REST_CRAWL_ATTR_NAME));
setUrl(XPathParser.getAttributeString(item, REST_CRAWL_ATTR_URL));
setSequenceParameter(XPathParser.getAttributeString(item, REST_CRAWL_ATTR_SEQUENCE_PARAMETER));
setSequenceFromInclusive(DomUtils.getAttributeInteger(item, REST_CRAWL_ATTR_SEQUENCE_FROM, null));
setSequenceToExclusive(DomUtils.getAttributeInteger(item, REST_CRAWL_ATTR_SEQUENCE_TO, null));
setMethod(HttpDownloader.Method.find(DomUtils.getAttributeText(item, REST_CRAWL_ATTR_METHOD),
HttpDownloader.Method.GET));
setLang(LanguageEnum.findByCode(XPathParser.getAttributeString(item, REST_CRAWL_ATTR_LANG)));
setBufferSize(XPathParser.getAttributeValue(item, REST_CRAWL_ATTR_BUFFER_SIZE));
Node mapNode = xpp.getNode(item, REST_CRAWL_NODE_NAME_MAP);
if (mapNode != null)
getFieldMap().load(mapNode);
Node pathNode = xpp.getNode(item, REST_CRAWL_NODE_DOC_PATH);
if (pathNode != null)
setPathDocument(StringEscapeUtils.unescapeXml(pathNode.getTextContent()));
Node credNode = xpp.getNode(item, REST_CRAWL_NODE_CREDENTIAL);
if (credNode != null)
credential = CredentialItem.fromXml(credNode);
Node callBackNode = DomUtils.getFirstNode(item, REST_CRAWL_NODE_CALLBACK);
if (callBackNode != null) {
setCallbackMethod(
HttpDownloader.Method.find(DomUtils.getAttributeText(callBackNode, REST_CRAWL_CALLBACK_ATTR_METHOD),
HttpDownloader.Method.GET));
setCallbackUrl(DomUtils.getAttributeText(callBackNode, REST_CRAWL_CALLBACK_ATTR_URL));
setCallbackMode(CallbackMode.find(DomUtils.getAttributeText(callBackNode, REST_CRAWL_CALLBACK_ATTR_MODE)));
setCallbackQueryParameter(DomUtils.getAttributeText(callBackNode, REST_CRAWL_CALLBACK_ATTR_QUERY_PARAM));
setCallbackPayload(callBackNode.getTextContent());
}
}
public void writeXml(XmlWriter xmlWriter) throws SAXException, UnsupportedEncodingException {
xmlWriter.startElement(REST_CRAWL_NODE_NAME, REST_CRAWL_ATTR_NAME, getName(), REST_CRAWL_ATTR_URL, getUrl(),
REST_CRAWL_ATTR_SEQUENCE_PARAMETER, getSequenceParameter(), REST_CRAWL_ATTR_SEQUENCE_FROM,
sequenceFromInclusive == null ? null : sequenceFromInclusive.toString(), REST_CRAWL_ATTR_SEQUENCE_TO,
sequenceToExclusive == null ? null : sequenceToExclusive.toString(), REST_CRAWL_ATTR_SEQUENCE_INCREMENT,
sequenceIncrement == null ? null : sequenceIncrement.toString(), REST_CRAWL_ATTR_METHOD, method.name(),
REST_CRAWL_ATTR_LANG, getLang().getCode(), REST_CRAWL_ATTR_BUFFER_SIZE,
Integer.toString(getBufferSize()));
xmlWriter.startElement(REST_CRAWL_NODE_NAME_MAP);
getFieldMap().store(xmlWriter);
xmlWriter.endElement();
xmlWriter.startElement(REST_CRAWL_NODE_CALLBACK, REST_CRAWL_CALLBACK_ATTR_METHOD, callbackMethod.name(),
REST_CRAWL_CALLBACK_ATTR_MODE, callbackMode.name(), REST_CRAWL_CALLBACK_ATTR_URL, callbackUrl,
REST_CRAWL_CALLBACK_ATTR_QUERY_PARAM, callbackQueryParameter);
if (!StringUtils.isEmpty(callbackPayload))
xmlWriter.textNode(callbackPayload);
xmlWriter.endElement();
xmlWriter.writeSubTextNodeIfAny(REST_CRAWL_NODE_DOC_PATH, xmlWriter.escapeXml(getPathDocument()));
credential.writeXml(xmlWriter);
xmlWriter.endElement();
}
/**
* @return the name
*/
public String getName() {
return name;
}
/**
* @param name
* the name to set
*/
public void setName(String name) {
this.name = name;
}
@Override
public int compareTo(RestCrawlItem o) {
return getName().compareTo(o.getName());
}
@Override
public String toString() {
return getName();
}
/**
* @return the pathDocument
*/
public String getPathDocument() {
return pathDocument;
}
/**
* @param pathDocument
* the pathDocument to set
*/
public void setPathDocument(String pathDocument) {
this.pathDocument = pathDocument;
}
/**
* @return the method
*/
public HttpDownloader.Method getMethod() {
return method;
}
/**
* @param method
* the method to set
*/
public void setMethod(HttpDownloader.Method method) {
this.method = method;
}
/**
* @return the callbackMethod
*/
public HttpDownloader.Method getCallbackMethod() {
return callbackMethod;
}
/**
* @param callbackMethod
* the callbackMethod to set
*/
public void setCallbackMethod(HttpDownloader.Method callbackMethod) {
this.callbackMethod = callbackMethod;
}
/**
* @return the callbackUrl
*/
public String getCallbackUrl() {
return callbackUrl;
}
/**
* @param callbackUrl
* the callbackUrl to set
*/
public void setCallbackUrl(String callbackUrl) {
this.callbackUrl = callbackUrl;
}
/**
* @return the callbackMode
*/
public CallbackMode getCallbackMode() {
return callbackMode;
}
/**
* @param callbackMode
* the callbackMode to set
*/
public void setCallbackMode(CallbackMode callbackMode) {
this.callbackMode = callbackMode;
}
/**
* @return the callbackQueryParameter
*/
public String getCallbackQueryParameter() {
return callbackQueryParameter;
}
/**
* @param callbackQueryParameter
* the callbackQueryParameter to set
*/
public void setCallbackQueryParameter(String callbackQueryParameter) {
this.callbackQueryParameter = callbackQueryParameter;
}
/**
* @return the callbackPayload
*/
public String getCallbackPayload() {
return callbackPayload;
}
/**
* @param callbackPayload
* the callbackPayload to set
*/
public void setCallbackPayload(String callbackPayload) {
this.callbackPayload = callbackPayload;
}
}