All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.crawler.common.database.AbstractManager Maven / Gradle / Ivy

Go to download

OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.

The newest version!
/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2012-2013 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.crawler.common.database;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.jaeksoft.searchlib.Client;
import com.jaeksoft.searchlib.Logging;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.crawler.FieldMap;
import com.jaeksoft.searchlib.join.JoinItem;
import com.jaeksoft.searchlib.join.JoinItem.OuterCollector;
import com.jaeksoft.searchlib.request.AbstractSearchRequest;
import com.jaeksoft.searchlib.result.AbstractResultSearch;
import com.jaeksoft.searchlib.scheduler.TaskAbstract;
import com.jaeksoft.searchlib.scheduler.TaskLog;
import com.jaeksoft.searchlib.schema.Indexed;
import com.jaeksoft.searchlib.schema.SchemaField;
import com.jaeksoft.searchlib.schema.SchemaFieldList;
import com.jaeksoft.searchlib.util.map.SourceField;
import com.jaeksoft.searchlib.util.map.TargetField;

public abstract class AbstractManager {

	protected Client targetClient;
	protected Client dbClient;

	private Set taskToCheck;
	private TaskLog currentTaskLog;

	protected AbstractManager() {
		currentTaskLog = null;
		targetClient = null;
		dbClient = null;
		taskToCheck = new HashSet(0);
	}

	final protected void init(Client targetClient, Client dbClient) {
		this.targetClient = targetClient;
		this.dbClient = dbClient;
	}

	final public void free() {
		if (dbClient != null) {
			dbClient.close();
			dbClient = null;
		}
	}

	final public Client getDbClient() {
		return dbClient;
	}

	public void setCurrentTaskLog(TaskLog taskLog) throws SearchLibException {
		synchronized (this) {
			if (currentTaskLog != null)
				throw new SearchLibException("A task is already running: " + currentTaskLog.getTask().getName());
			if (taskLog != null)
				synchronized (taskToCheck) {
					taskToCheck.add(taskLog.getTask());
				}
			currentTaskLog = taskLog;
		}
	}

	protected final String findIndexedFieldOfTargetIndex(FieldMap fieldMap, String sourceField)
			throws SearchLibException {

		List mappedPath = fieldMap.getLinks(new SourceField(sourceField));

		if (mappedPath == null || mappedPath.isEmpty())
			return null;

		SchemaFieldList targetSchemaFieldList = targetClient.getSchema().getFieldList();
		SchemaField targetUniqueField = targetSchemaFieldList.getUniqueField();
		for (TargetField targetField : mappedPath) {
			SchemaField field = targetSchemaFieldList.get(targetField.getName());
			if (field.getIndexed() != Indexed.YES)
				continue;
			if (targetUniqueField != null)
				if (field.getName().equals(targetUniqueField.getName()))
					return field.getName();
			if (field.getIndexAnalyzer() == null)
				return field.getName();
		}
		return null;
	}

	public void resetCurrentTaskLog() {
		synchronized (this) {
			currentTaskLog = null;
		}
	}

	public TaskLog getCurrentTaskLog() {
		synchronized (this) {
			return currentTaskLog;
		}
	}

	public boolean isCurrentTaskLogExists() {
		return getCurrentTaskLog() != null;
	}

	public boolean isNoCurrentTaskLogExists() {
		return !isCurrentTaskLogExists();
	}

	public boolean waitForTask(TaskAbstract taskAbstract, long secTimeOut) throws InterruptedException {
		long timeOut = System.currentTimeMillis() + 1000 * secTimeOut;
		while (timeOut > System.currentTimeMillis()) {
			synchronized (taskToCheck) {
				if (taskToCheck.remove(taskAbstract))
					return true;
			}
			Thread.sleep(1000);
		}
		Logging.warn("Wait for task " + taskAbstract.getName());
		return false;
	}

	public static Date getPastDate(long fetchInterval, String intervalUnit) {
		return new TimeInterval(intervalUnit, fetchInterval).getPastDate(System.currentTimeMillis());
	}

	private class SynchronizedOuterCollector implements OuterCollector {

		private long total;

		private final String field;

		private final int buffer;

		private final List deletionList;

		private final TaskLog taskLog;

		private SynchronizedOuterCollector(String field, int buffer, TaskLog taskLog) {
			this.total = 0;
			this.field = field;
			this.buffer = buffer;
			this.taskLog = taskLog;
			this.deletionList = new ArrayList(buffer);
		}

		@Override
		final public void collect(final int id, final String value) {
			deletionList.add(value);
			if (deletionList.size() >= buffer)
				delete();
		}

		final public void delete() {
			if (deletionList.isEmpty())
				return;
			try {
				total += targetClient.deleteDocuments(field, deletionList);
			} catch (SearchLibException e) {
				taskLog.setError(e);
			}
			taskLog.setInfo(total + " document(s) deleted");
			deletionList.clear();
		}

	}

	protected long synchronizeIndex(AbstractSearchRequest searchRequest, String targetField, String dbField,
			int bufferSize, TaskLog taskLog) throws SearchLibException {
		setCurrentTaskLog(taskLog);
		try {
			if (targetField == null)
				throw new SearchLibException("The primary field is not mapped");
			SynchronizedOuterCollector outerCollector = new SynchronizedOuterCollector(targetField, bufferSize,
					taskLog);
			searchRequest = (AbstractSearchRequest) searchRequest.duplicate();
			JoinItem joinItem = new JoinItem();
			joinItem.setIndexName(targetClient.getIndexName());
			joinItem.setForeignField(dbField);
			joinItem.setLocalField(targetField);
			joinItem.setOuterCollector(outerCollector);
			searchRequest.getJoinList().add(joinItem);
			AbstractResultSearch result = (AbstractResultSearch) dbClient.request(searchRequest);
			outerCollector.delete();
			if (taskLog != null) {
				taskLog.setInfo("URLs: (Found / Deleted: " + result.getNumFound() + " / " + outerCollector.total);
			}
			return outerCollector.total;
		} catch (InstantiationException e) {
			throw new SearchLibException(e);
		} catch (IllegalAccessException e) {
			throw new SearchLibException(e);
		} finally {
			resetCurrentTaskLog();
		}
	}

	final public long getSize() throws SearchLibException {
		try {
			return dbClient.getStatistics().getNumDocs();
		} catch (IOException e) {
			throw new SearchLibException(e);
		}
	}

	final public void deleteAll(TaskLog taskLog) throws SearchLibException {
		setCurrentTaskLog(taskLog);
		try {
			dbClient.deleteAll();
		} finally {
			resetCurrentTaskLog();
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy