All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.learning.Learner Maven / Gradle / Ivy

/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2013 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.learning;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.stream.StreamSource;
import javax.xml.xpath.XPathExpressionException;

import org.apache.commons.io.FileUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

import com.jaeksoft.searchlib.Client;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.crawler.FieldMap;
import com.jaeksoft.searchlib.index.IndexDocument;
import com.jaeksoft.searchlib.query.ParseException;
import com.jaeksoft.searchlib.util.DomUtils;
import com.jaeksoft.searchlib.util.InfoCallback;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;

public class Learner implements InfoCallback {

	private final static String LEARNER_ITEM_ROOT_NODE_NAME = "learner";
	private final static String LEARNER_ITEM_ROOT_ATTR_NAME = "name";
	private final static String LEARNER_ITEM_ROOT_ATTR_ACTIVE = "active";
	private final static String LEARNER_ITEM_ROOT_ATTR_BUFFER = "buffer";
	private final static String LEARNER_ITEM_ROOT_ATTR_MAX_RANK = "maxRank";
	private final static String LEARNER_ITEM_ROOT_ATTR_MIN_SCORE = "minScore";
	private final static String LEARNER_ITEM_ROOT_ATTR_SEARCH_REQUEST = "searchRequest";
	private final static String LEARNER_ITEM_MAP_SRC_NODE_NAME = "sourceFields";
	private final static String LEARNER_ITEM_MAP_TGT_NODE_NAME = "targetFields";

	private final ReadWriteLock rwl = new ReadWriteLock();

	private String name;

	private String searchRequest;

	private final FieldMap sourceFieldMap;

	private final FieldMap targetFieldMap;

	private boolean active;

	private int maxRank;

	private double minScore;

	private int buffer;

	private LearnerInterface learnerInstance;

	private Client client;

	private final ReadWriteLock rwlStatusLock = new ReadWriteLock();

	public static enum RunningStatus {
		Learning, Error, Idle;
	}

	private RunningStatus runningStatus;

	private String lastRunInfo;

	public Learner(Client client) {
		this.client = client;
		this.name = null;
		active = false;
		learnerInstance = null;
		searchRequest = null;
		sourceFieldMap = new FieldMap();
		targetFieldMap = new FieldMap();
		maxRank = 1;
		minScore = 0;
		buffer = 1000;
		runningStatus = RunningStatus.Idle;
		lastRunInfo = null;
	}

	public Learner(Learner source) {
		this(source.client);
		source.copyTo(this);
	}

	public void copyTo(Learner target) {
		rwl.r.lock();
		try {
			target.rwl.w.lock();
			try {
				target.client = client;
				target.name = name;
				target.active = active;
				target.searchRequest = searchRequest;
				target.learnerInstance = learnerInstance;
				sourceFieldMap.copyTo(target.sourceFieldMap);
				targetFieldMap.copyTo(target.targetFieldMap);
				target.maxRank = maxRank;
				target.minScore = minScore;
				target.setRunningStatus(this.getRunningStatus(),
						this.getLastRunInfo());
			} finally {
				target.rwl.w.unlock();
			}
		} finally {
			rwl.r.unlock();
		}
	}

	protected Learner(Client client, File file)
			throws ParserConfigurationException, SAXException, IOException,
			XPathExpressionException, SearchLibException {
		this(client);
		if (!file.exists())
			return;
		Document document = DomUtils.readXml(new StreamSource(file), false);
		Node rootNode = DomUtils.getFirstNode(document,
				LEARNER_ITEM_ROOT_NODE_NAME);
		if (rootNode == null)
			return;
		setName(XPathParser.getAttributeString(rootNode,
				LEARNER_ITEM_ROOT_ATTR_NAME));
		setActive("yes".equalsIgnoreCase(XPathParser.getAttributeString(
				rootNode, LEARNER_ITEM_ROOT_ATTR_ACTIVE)));
		setSearchRequest(XPathParser.getAttributeString(rootNode,
				LEARNER_ITEM_ROOT_ATTR_SEARCH_REQUEST));
		setMinScore(XPathParser.getAttributeDouble(rootNode,
				LEARNER_ITEM_ROOT_ATTR_MIN_SCORE));
		setMaxRank(XPathParser.getAttributeValue(rootNode,
				LEARNER_ITEM_ROOT_ATTR_MAX_RANK));
		setBuffer(XPathParser.getAttributeValue(rootNode,
				LEARNER_ITEM_ROOT_ATTR_BUFFER));
		sourceFieldMap.load(DomUtils.getFirstNode(rootNode,
				LEARNER_ITEM_MAP_SRC_NODE_NAME));
		targetFieldMap.load(DomUtils.getFirstNode(rootNode,
				LEARNER_ITEM_MAP_TGT_NODE_NAME));
	}

	/**
	 * 
	 * @return
	 */
	public FieldMap getSourceFieldMap() {
		rwl.r.lock();
		try {
			return sourceFieldMap;
		} finally {
			rwl.r.unlock();
		}
	}

	public FieldMap getTargetFieldMap() {
		rwl.r.lock();
		try {
			return targetFieldMap;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param name
	 *            the name to set
	 */
	public void setName(String name) {
		rwl.w.lock();
		try {
			this.name = name;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the name
	 */
	public String getName() {
		rwl.r.lock();
		try {
			return name;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param active
	 *            the active to set
	 */
	public void setActive(boolean active) {
		rwl.w.lock();
		try {
			this.active = active;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the active
	 */
	public boolean isActive() {
		rwl.r.lock();
		try {
			return active;
		} finally {
			rwl.r.unlock();
		}
	}

	public void writeXml(XmlWriter xmlWriter) throws SAXException {
		rwl.r.lock();
		try {
			xmlWriter.startElement(LEARNER_ITEM_ROOT_NODE_NAME,
					LEARNER_ITEM_ROOT_ATTR_NAME, name,
					LEARNER_ITEM_ROOT_ATTR_SEARCH_REQUEST, searchRequest,
					LEARNER_ITEM_ROOT_ATTR_ACTIVE, active ? "yes" : "no",
					LEARNER_ITEM_ROOT_ATTR_MAX_RANK, Integer.toString(maxRank),
					LEARNER_ITEM_ROOT_ATTR_MIN_SCORE,
					Double.toString(minScore), LEARNER_ITEM_ROOT_ATTR_BUFFER,
					Integer.toString(buffer));
			xmlWriter.startElement(LEARNER_ITEM_MAP_SRC_NODE_NAME);
			sourceFieldMap.store(xmlWriter);
			xmlWriter.endElement();
			xmlWriter.startElement(LEARNER_ITEM_MAP_TGT_NODE_NAME);
			targetFieldMap.store(xmlWriter);
			xmlWriter.endElement();
			xmlWriter.endElement();
		} finally {
			rwl.r.unlock();
		}
	}

	public String getSearchRequest() {
		rwl.r.lock();
		try {
			return searchRequest;
		} finally {
			rwl.r.unlock();
		}
	}

	public void setSearchRequest(String searchRequest) {
		rwl.w.lock();
		try {
			this.searchRequest = searchRequest;
		} finally {
			rwl.w.unlock();
		}
	}

	private final File getInstancesDataFile() {
		if (name == null)
			return null;
		return new File(client.getLearnerDirectory(), name + ".data");
	}

	public LearnerInterface getInstance() throws SearchLibException {
		rwl.r.lock();
		try {
			if (learnerInstance != null) {
				learnerInstance.init(getInstancesDataFile());
				return learnerInstance;
			}
		} finally {
			rwl.r.unlock();
		}
		rwl.w.lock();
		try {
			if (learnerInstance != null)
				return learnerInstance;
			if (client == null)
				return null;
			learnerInstance = new StandardLearner();
			learnerInstance.init(getInstancesDataFile());
			return learnerInstance;
		} finally {
			rwl.w.unlock();
		}
	}

	public void learn(Collection documents)
			throws SearchLibException {
		LearnerInterface instance = getInstance();
		rwl.r.lock();
		try {
			instance.learn(client, searchRequest, documents, sourceFieldMap);
		} catch (IOException e) {
			throw new SearchLibException(e);
		} finally {
			rwl.r.unlock();
		}
	}

	public void remove(String field, Collection values)
			throws SearchLibException {
		LearnerInterface instance = getInstance();
		rwl.r.lock();
		try {
			instance.remove(client, searchRequest, field, values,
					sourceFieldMap);
		} finally {
			rwl.r.unlock();
		}

	}

	// TODO Implement classify while indexing
	public void classify(IndexDocument document) throws SearchLibException {
		LearnerInterface instance = getInstance();
		rwl.r.lock();
		try {
		} finally {
			rwl.r.unlock();
		}
	}

	private LearnerResultItem[] populateCustoms(LearnerResultItem[] results)
			throws SearchLibException, ParseException {
		LearnerInterface instance = getInstance();
		if (results == null)
			return results;
		LearnerResultItem[] newResults = new LearnerResultItem[results.length];
		int i = 0;
		for (LearnerResultItem result : results)
			newResults[i++] = new LearnerResultItem(result,
					instance.getCustoms(result.getName()));
		return newResults;
	}

	public LearnerResultItem[] classify(Client client, String text,
			Integer max_rank, Double min_score) throws SearchLibException {
		LearnerInterface instance = getInstance();
		rwl.r.lock();
		try {
			if (max_rank == null)
				max_rank = maxRank;
			if (min_score == null)
				min_score = minScore;
			List list = new ArrayList(0);
			instance.classify(text, sourceFieldMap, max_rank, min_score, list);
			LearnerResultItem[] results = LearnerResultItem.sortArray(list);
			results = LearnerResultItem.maxRank(results, max_rank);
			if (sourceFieldMap.isMapped("custom"))
				results = populateCustoms(results);
			return results;
		} catch (IOException e) {
			throw new SearchLibException(e);
		} catch (ParseException e) {
			throw new SearchLibException(e);
		} finally {
			rwl.r.unlock();
		}
	}

	public LearnerResultItem[] similar(Client client, String text,
			Integer max_rank, Double min_score) throws SearchLibException {
		LearnerInterface instance = getInstance();
		rwl.r.lock();
		try {
			if (max_rank == null)
				max_rank = maxRank;
			if (min_score == null)
				min_score = minScore;
			List list = new ArrayList(0);
			instance.similar(text, sourceFieldMap, max_rank, min_score, list);
			LearnerResultItem[] results = LearnerResultItem.sortArray(list);
			results = LearnerResultItem.maxRank(results, max_rank);
			if (sourceFieldMap.isMapped("custom"))
				results = populateCustoms(results);
			return results;
		} catch (IOException e) {
			throw new SearchLibException(e);
		} catch (ParseException e) {
			throw new SearchLibException(e);
		} finally {
			rwl.r.unlock();
		}
	}

	public void learn(InfoCallback callback) throws SearchLibException {
		LearnerInterface instance = getInstance();
		rwl.r.lock();
		try {
			if (isRunning())
				throw new SearchLibException("The learner is already running: "
						+ name);
			if (callback == null)
				callback = this;
			setRunningStatus(RunningStatus.Learning, "");
			instance.learn(client, searchRequest, sourceFieldMap, buffer,
					callback);
			setRunningStatus(RunningStatus.Idle, callback.getInfo());
		} catch (IOException e) {
			setRunningStatus(RunningStatus.Error, e.getMessage());
			throw new SearchLibException(e);
		} catch (SearchLibException e) {
			setRunningStatus(RunningStatus.Error, e.getMessage());
			throw e;
		} finally {
			rwl.r.unlock();
		}
	}

	public void reset() throws SearchLibException, IOException {
		if (isRunning())
			throw new SearchLibException(
					"Cannot reset the learner while running: " + name);
		LearnerInterface instance = getInstance();
		instance.reset();
		File f = getInstancesDataFile();
		if (f.exists())
			if (f.isFile())
				f.delete();
			else if (f.isDirectory())
				FileUtils.deleteDirectory(f);
	}

	public long getDocumentCount() throws SearchLibException {
		LearnerInterface instance = getInstance();
		rwl.r.lock();
		try {
			return instance.getDocumentCount();
		} catch (IOException e) {
			throw new SearchLibException(e);
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @return the maxRank
	 */
	public int getMaxRank() {
		rwl.r.lock();
		try {
			return maxRank;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param maxRank
	 *            the maxRank to set
	 */
	public void setMaxRank(int maxRank) {
		rwl.w.lock();
		try {
			this.maxRank = maxRank;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the minScore
	 */
	public double getMinScore() {
		rwl.r.lock();
		try {
			return minScore;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param minScore
	 *            the minScore to set
	 */
	public void setMinScore(double minScore) {
		rwl.w.lock();
		try {
			this.minScore = minScore;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the buffer
	 */
	public double getBuffer() {
		rwl.r.lock();
		try {
			return buffer;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param buffer
	 *            the buffer to set
	 */
	public void setBuffer(int buffer) {
		rwl.w.lock();
		try {
			if (buffer <= 0)
				buffer = 1;
			this.buffer = buffer;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the running status
	 */
	public RunningStatus getRunningStatus() {
		rwlStatusLock.r.lock();
		try {
			return runningStatus;
		} finally {
			rwlStatusLock.r.unlock();
		}
	}

	/**
	 * @param RunningStatus
	 *            the RunningStatus to set
	 */
	protected void setRunningStatus(RunningStatus runningStatus,
			String lastRunInfo) {
		rwlStatusLock.w.lock();
		try {
			if (runningStatus != null)
				this.runningStatus = runningStatus;
			if (lastRunInfo != null)
				this.lastRunInfo = lastRunInfo;
		} finally {
			rwlStatusLock.w.unlock();
		}
	}

	/**
	 * @return the lastRunInfo
	 */
	public String getLastRunInfo() {
		rwlStatusLock.r.lock();
		try {
			return lastRunInfo;
		} finally {
			rwlStatusLock.r.unlock();
		}
	}

	@Override
	public String getInfo() {
		return getLastRunInfo();
	}

	@Override
	public void setInfo(String info) {
		setRunningStatus(null, info);
	}

	public boolean isRunning() {
		return getRunningStatus() == RunningStatus.Learning;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy