All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.autocompletion.AutoCompletionBuildThread Maven / Gradle / Ivy

Go to download

OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.

The newest version!
/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2012-2015 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.autocompletion;

import java.io.IOException;
import java.lang.Thread.State;
import java.net.URISyntaxException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.roaringbitmap.RoaringBitmap;

import com.jaeksoft.searchlib.Client;
import com.jaeksoft.searchlib.Logging;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.index.IndexDocument;
import com.jaeksoft.searchlib.process.ThreadAbstract;
import com.jaeksoft.searchlib.request.AbstractRequest;
import com.jaeksoft.searchlib.request.AbstractSearchRequest;
import com.jaeksoft.searchlib.result.AbstractResultSearch;
import com.jaeksoft.searchlib.result.collector.DocIdInterface;
import com.jaeksoft.searchlib.util.IOUtils;
import com.jaeksoft.searchlib.util.InfoCallback;

public class AutoCompletionBuildThread extends ThreadAbstract {

	private volatile Client sourceClient;
	private volatile Client autoCompClient;
	private volatile String searchRequest;
	private volatile String[] fieldNames;
	private volatile TermEnum termEnum;
	private volatile int bufferSize;

	protected AutoCompletionBuildThread(Client sourceClient, Client autoCompClient, InfoCallback infoCallBack) {
		super(sourceClient, null, null, infoCallBack);
		this.sourceClient = sourceClient;
		this.autoCompClient = autoCompClient;
		this.fieldNames = null;
		this.searchRequest = null;
		this.termEnum = null;
		this.bufferSize = 50;
	}

	public String getStatus() {
		State state = getThreadState();
		if (state == null)
			return "STOPPED";
		return state.toString();
	}

	public int getIndexNumDocs() throws IOException, SearchLibException {
		return autoCompClient.getStatistics().getNumDocs();
	}

	final private int indexBuffer(int docCount, List buffer)
			throws SearchLibException, NoSuchAlgorithmException, IOException, URISyntaxException,
			InstantiationException, IllegalAccessException, ClassNotFoundException {
		if (buffer.size() == 0)
			return docCount;
		docCount += autoCompClient.updateDocuments(buffer);
		buffer.clear();
		if (infoCallback != null)
			infoCallback.setInfo(docCount + " term(s) indexed");
		return docCount;
	}

	private int indexTerm(String term, Integer freq, List buffer, int docCount)
			throws NoSuchAlgorithmException, SearchLibException, IOException, URISyntaxException,
			InstantiationException, IllegalAccessException, ClassNotFoundException {
		IndexDocument indexDocument = new IndexDocument();
		indexDocument.addString("term", term);
		indexDocument.addString("cluster", term);
		if (freq != null)
			indexDocument.addString("freq", Integer.toString(freq));
		buffer.add(indexDocument);
		if (buffer.size() == bufferSize)
			docCount = indexBuffer(docCount, buffer);
		return docCount;
	}

	private int buildTermEnum(List buffer, int docCount)
			throws SearchLibException, NoSuchAlgorithmException, IOException, URISyntaxException,
			InstantiationException, IllegalAccessException, ClassNotFoundException {
		if (fieldNames == null)
			return docCount;
		for (String fieldName : fieldNames) {
			termEnum = sourceClient.getTermEnum(new Term(fieldName, ""));
			try {
				Term term = null;
				while ((term = termEnum.term()) != null) {
					if (!fieldName.equals(term.field()))
						break;
					if (isAborted())
						break;
					docCount = indexTerm(term.text(), termEnum.docFreq(), buffer, docCount);
					termEnum.next();
				}
			} finally {
				IOUtils.close(termEnum);
			}
		}
		return docCount;
	}

	private int buildSearchRequest(List buffer, int docCount)
			throws SearchLibException, IOException, NoSuchAlgorithmException, URISyntaxException,
			InstantiationException, IllegalAccessException, ClassNotFoundException {
		if (fieldNames == null)
			return docCount;
		AbstractRequest request = sourceClient.getNewRequest(searchRequest);
		if (request == null)
			throw new SearchLibException("Request not found " + searchRequest);
		if (!(request instanceof AbstractSearchRequest))
			throw new SearchLibException("The request " + searchRequest + " is not a Search request ");
		AbstractSearchRequest searchRequest = (AbstractSearchRequest) request;
		searchRequest.setRows(0);
		AbstractResultSearch result = (AbstractResultSearch) sourceClient.request(request);
		if (result == null)
			return docCount;
		DocIdInterface docIds = result.getDocs();
		if (docIds == null)
			return docCount;
		RoaringBitmap bitSet = docIds.getBitSet();
		if (bitSet == null || bitSet.isEmpty())
			return docCount;
		for (String fieldName : fieldNames) {
			termEnum = sourceClient.getTermEnum(new Term(fieldName, ""));
			try {
				Term term = null;
				while ((term = termEnum.term()) != null) {
					if (isAborted())
						break;
					if (!fieldName.equals(term.field()))
						break;
					TermDocs termDocs = sourceClient.getIndex().getTermDocs(term);
					boolean add = false;
					while (termDocs.next() && !add)
						add = bitSet.contains(termDocs.doc());
					if (add)
						docCount = indexTerm(term.text(), termEnum.docFreq(), buffer, docCount);
					termEnum.next();
				}
			} finally {
				IOUtils.close(termEnum);
			}
			if (isAborted())
				break;
		}
		return docCount;
	}

	@Override
	public void runner() throws Exception {
		autoCompClient.deleteAll();
		List buffer = new ArrayList();
		int docCount = 0;
		if (searchRequest != null && searchRequest.length() > 0)
			docCount = buildSearchRequest(buffer, docCount);
		else
			docCount = buildTermEnum(buffer, docCount);
		docCount = indexBuffer(docCount, buffer);
	}

	@Override
	public void release() {
		if (termEnum != null) {
			try {
				termEnum.close();
			} catch (IOException e) {
				Logging.warn(e);
			}
			termEnum = null;
		}
	}

	public void init(Collection fieldNames, String searchRequest, int bufferSize) {
		this.fieldNames = fieldNames.toArray(new String[fieldNames.size()]);
		this.searchRequest = searchRequest;
		this.bufferSize = bufferSize;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy