com.jaeksoft.searchlib.Client Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
/**
 * License Agreement for OpenSearchServer
 * 
 * Copyright (C) 2008-2013 Emmanuel Keller / Jaeksoft
 * 

 * http://www.open-search-server.com
 * 

 * This file is part of OpenSearchServer.
 * 

 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 

 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with OpenSearchServer.
 * If not, see .
 **/

package com.jaeksoft.searchlib;

import com.jaeksoft.searchlib.analysis.LanguageEnum;
import com.jaeksoft.searchlib.config.Config;
import com.jaeksoft.searchlib.crawler.web.database.CredentialItem;
import com.jaeksoft.searchlib.crawler.web.spider.HttpDownloader;
import com.jaeksoft.searchlib.index.IndexDocument;
import com.jaeksoft.searchlib.index.IndexStatistics;
import com.jaeksoft.searchlib.request.AbstractRequest;
import com.jaeksoft.searchlib.request.DocumentsRequest;
import com.jaeksoft.searchlib.result.AbstractResult;
import com.jaeksoft.searchlib.schema.Schema;
import com.jaeksoft.searchlib.schema.SchemaField;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.IOUtils;
import com.jaeksoft.searchlib.util.InfoCallback;
import com.jaeksoft.searchlib.util.Timer;
import com.jaeksoft.searchlib.webservice.query.document.IndexDocumentResult;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.w3c.dom.Node;

import javax.xml.transform.stream.StreamSource;
import javax.xml.xpath.XPathExpressionException;
import java.io.*;
import java.net.URISyntaxException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Client extends Config {

	public Client(File initFileOrDir, boolean createIndexIfNotExists, boolean disableCrawler,
			String silentReplicationUrl) throws SearchLibException {
		super(initFileOrDir, null, createIndexIfNotExists, disableCrawler, silentReplicationUrl);
	}

	public Client(File initFileOrDir, String resourceName, boolean createIndexIfNotExists) throws SearchLibException {
		super(initFileOrDir, resourceName, createIndexIfNotExists, false, null);
	}

	public Client(File initFile) throws SearchLibException {
		this(initFile, false, false, null);
	}

	/**
	 * Insert or update a document in the index. If an unique key is defined in
	 * the schema, the document is updated if it already exists.
	 *
	 * @param document the document to udpate
	 * @return true if the document has been updated
	 * @throws IOException        inherited error
	 * @throws SearchLibException inherited error
	 */
	public boolean updateDocument(IndexDocument document) throws SearchLibException, IOException {
		Timer timer = new Timer("Update document " + document.toString());
		try {
			checkMaxStorageLimit();
			checkMaxDocumentLimit();
			Schema schema = getSchema();
			document.prepareCopyOf(schema);
			return getIndexAbstract().updateDocument(schema, document);
		} finally {
			getStatisticsList().addUpdate(timer);
		}
	}

	public int updateIndexDocuments(Collection indexDocuments)
			throws SearchLibException, IOException {
		Timer timer = new Timer("Update " + indexDocuments.size() + " documents");
		try {
			checkMaxStorageLimit();
			checkMaxDocumentLimit();
			Schema schema = getSchema();
			return getIndexAbstract().updateIndexDocuments(schema, indexDocuments);
		} finally {
			getStatisticsList().addUpdate(timer);
		}
	}

	public int updateDocuments(Collection documents) throws IOException, SearchLibException {
		Timer timer = new Timer("Update " + documents.size() + " documents");
		try {
			checkMaxStorageLimit();
			checkMaxDocumentLimit();
			Schema schema = getSchema();
			for (IndexDocument document : documents)
				document.prepareCopyOf(schema);
			return getIndexAbstract().updateDocuments(schema, documents);
		} finally {
			getStatisticsList().addUpdate(timer);
		}
	}

	private final int updateDocList(int totalCount, int docCount, Collection docList,
			InfoCallback infoCallBack)
			throws NoSuchAlgorithmException, IOException, URISyntaxException, SearchLibException,
			InstantiationException, IllegalAccessException, ClassNotFoundException {
		checkMaxStorageLimit();
		checkMaxDocumentLimit();
		docCount += updateDocuments(docList);
		StringBuilder sb = new StringBuilder();
		sb.append(docCount);
		if (totalCount > 0) {
			sb.append(" / ");
			sb.append(totalCount);
		}
		sb.append(" document(s) updated.");
		if (infoCallBack != null)
			infoCallBack.setInfo(sb.toString());
		else
			Logging.info(sb.toString());
		docList.clear();
		return docCount;
	}

	public int updateXmlDocuments(Node document, int bufferSize, CredentialItem urlDefaultCredential,
			HttpDownloader httpDownloader, InfoCallback infoCallBack)
			throws XPathExpressionException, NoSuchAlgorithmException, IOException, URISyntaxException,
			SearchLibException, InstantiationException, IllegalAccessException, ClassNotFoundException {
		List nodeList = DomUtils.getNodes(document, "index", "document");
		Collection docList = new ArrayList(bufferSize);
		int docCount = 0;
		final int totalCount = nodeList.size();
		for (Node node : nodeList) {
			docList.add(new IndexDocument(this, getParserSelector(), node, urlDefaultCredential, httpDownloader));
			if (docList.size() == bufferSize)
				docCount = updateDocList(totalCount, docCount, docList, infoCallBack);
		}
		if (docList.size() > 0)
			docCount = updateDocList(totalCount, docCount, docList, infoCallBack);
		return docCount;
	}

	public int updateTextDocuments(StreamSource streamSource, String charset, Integer bufferSize, String capturePattern,
			Integer langPosition, List fieldList, InfoCallback infoCallBack)
			throws SearchLibException, IOException, NoSuchAlgorithmException, URISyntaxException,
			InstantiationException, IllegalAccessException, ClassNotFoundException {
		if (capturePattern == null)
			throw new SearchLibException("No capture pattern");
		if (fieldList == null || fieldList.size() == 0)
			throw new SearchLibException("empty field list");
		String[] fields = fieldList.toArray(new String[fieldList.size()]);
		Matcher matcher = Pattern.compile(capturePattern).matcher("");
		BufferedReader br = null;
		Reader reader = null;
		SchemaField uniqueSchemaField = getSchema().getFieldList().getUniqueField();
		String uniqueField = uniqueSchemaField != null ? uniqueSchemaField.getName() : null;
		if (charset == null)
			charset = "UTF-8";
		if (bufferSize == null)
			bufferSize = 50;
		try {
			Collection docList = new ArrayList(bufferSize);
			reader = streamSource.getReader();
			if (reader == null)
				reader = new InputStreamReader(streamSource.getInputStream(), charset);
			br = reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader);
			String line;
			int docCount = 0;
			IndexDocument lastDocument = null;
			String lastUniqueValue = null;
			while ((line = br.readLine()) != null) {
				matcher.reset(line);
				if (!matcher.matches())
					continue;
				LanguageEnum lang = LanguageEnum.UNDEFINED;
				int matcherGroupCount = matcher.groupCount();
				if (langPosition != null && matcherGroupCount >= langPosition)
					lang = LanguageEnum.findByNameOrCode(matcher.group(langPosition));
				IndexDocument document = new IndexDocument(lang);
				int i = matcherGroupCount < fields.length ? matcherGroupCount : fields.length;
				String uniqueValue = null;
				while (i > 0) {
					String value = matcher.group(i--);
					String f = fields[i];
					document.add(f, value, 1.0F);
					if (f.equals(uniqueField))
						uniqueValue = value;
				}
				// Consecutive documents with same uniqueKey value are merged
				// (multivalued)
				if (uniqueField != null && lastDocument != null && uniqueValue != null && uniqueValue
						.equals(lastUniqueValue)) {
					lastDocument.addIfNotAlreadyHere(document);
					continue;
				}
				docList.add(document);
				if (docList.size() == bufferSize)
					docCount = updateDocList(0, docCount, docList, infoCallBack);
				lastUniqueValue = uniqueValue;
				lastDocument = document;
			}
			if (docList.size() > 0)
				docCount = updateDocList(0, docCount, docList, infoCallBack);
			return docCount;
		} finally {
			if (br != null)
				if (br != reader)
					IOUtils.close(br);
		}
	}

	private final int deleteUniqueKeyList(int totalCount, int docCount, Collection deleteList,
			InfoCallback infoCallBack) throws SearchLibException {
		docCount += deleteDocuments(getSchema().getUniqueField(), deleteList);
		StringBuilder sb = new StringBuilder();
		sb.append(docCount);
		sb.append(" / ");
		sb.append(totalCount);
		sb.append(" XML document(s) deleted.");
		if (infoCallBack != null)
			infoCallBack.setInfo(sb.toString());
		else
			Logging.info(sb.toString());
		deleteList.clear();
		return docCount;
	}

	public int deleteXmlDocuments(Node xmlDoc, int bufferSize, InfoCallback infoCallBack)
			throws XPathExpressionException, NoSuchAlgorithmException, IOException, URISyntaxException,
			SearchLibException, InstantiationException, IllegalAccessException, ClassNotFoundException {
		List deleteNodeList = DomUtils.getNodes(xmlDoc, "index", "delete");
		Collection deleteList = new ArrayList(bufferSize);
		int deleteCount = 0;
		final int totalCount = deleteNodeList.size();
		for (Node deleteNode : deleteNodeList) {
			List uniqueKeyNodeList = DomUtils.getNodes(deleteNode, "uniquekey");
			for (Node uniqueKeyNode : uniqueKeyNodeList) {
				deleteList.add(uniqueKeyNode.getTextContent());
				if (deleteList.size() == bufferSize)
					deleteCount = deleteUniqueKeyList(totalCount, deleteCount, deleteList, infoCallBack);
			}
		}
		if (deleteList.size() > 0)
			deleteCount = deleteUniqueKeyList(totalCount, deleteCount, deleteList, infoCallBack);
		return deleteCount;
	}

	private void checkField(String field) throws SearchLibException {
		if (StringUtils.isEmpty(field))
			throw new SearchLibException("No field has been given.");
		if (getSchema().getField(field) == null)
			throw new SearchLibException("The field " + field + " does not exist.");
	}

	public int deleteDocuments(String field, Collection values) throws SearchLibException {
		checkField(field);
		return deleteDocuments(new DocumentsRequest(this, field, values, false));
	}

	public int deleteDocument(String field, String value) throws SearchLibException {
		List values = new ArrayList(1);
		values.add(value);
		return deleteDocuments(field, values);
	}

	public int deleteDocuments(AbstractRequest request) throws SearchLibException {
		Timer timer = new Timer("Delete by query documents");
		try {
			return getIndexAbstract().deleteDocuments(request);
		} finally {
			getStatisticsList().addDelete(timer);
		}
	}

	public void deleteAll() throws SearchLibException {
		Timer timer = new Timer("DeleteAll");
		try {
			getIndexAbstract().deleteAll();
		} finally {
			getStatisticsList().addDelete(timer);
		}
	}

	public void reload() throws SearchLibException {
		Timer timer = new Timer("Reload");
		try {
			getIndexAbstract().reload();
		} finally {
			getStatisticsList().addReload(timer);
		}
	}

	public void setOnline(boolean online) throws SearchLibException {
		if (online == getIndexAbstract().isOnline())
			return;
		getIndexAbstract().setOnline(online);
	}

	public boolean isOnline() {
		return getIndexAbstract().isOnline();
	}

	public AbstractResult request(AbstractRequest request) throws SearchLibException {
		Timer timer = null;
		AbstractResult result = null;
		SearchLibException exception = null;
		try {
			request.init(this);
			timer = new Timer(request.getNameType());
			result = getIndexAbstract().request(request);
			return result;
		} catch (SearchLibException e) {
			exception = e;
			throw e;
		} catch (Exception e) {
			exception = new SearchLibException(e);
			throw exception;
		} finally {
			if (timer != null) {
				timer.getDuration();
				if (exception != null)
					timer.setError(exception);
				getStatisticsList().addSearch(timer);
				getLogReportManager().log(request, timer, result);
			}
		}
	}

	public String explain(AbstractRequest request, int docId, boolean bHtml) throws SearchLibException {
		return getIndexAbstract().explain(request, docId, bHtml);
	}

	protected final void checkMaxDocumentLimit() throws SearchLibException, IOException {
		ClientFactory.INSTANCE.properties.checkMaxDocumentLimit();
	}

	protected void checkMaxStorageLimit() throws SearchLibException {
		ClientFactory.INSTANCE.properties.checkMaxStorageLimit();
	}

	public IndexStatistics getStatistics() throws IOException, SearchLibException {
		return getIndexAbstract().getStatistics();
	}

	public TermEnum getTermEnum(Term term) throws SearchLibException {
		return getIndexAbstract().getTermEnum(term);
	}

	private final static String REPL_CHECK_FILENAME = "repl.check";

	public boolean isTrueReplicate() {
		return new File(this.getDirectory(), REPL_CHECK_FILENAME).exists();
	}

	public void writeReplCheck() throws IOException {
		new File(this.getDirectory(), REPL_CHECK_FILENAME).createNewFile();
	}

	public void removeReplCheck() {
		new File(this.getDirectory(), REPL_CHECK_FILENAME).delete();
	}

}