All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.snippet.SnippetQueries Maven / Gradle / Ivy

Go to download

OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.

The newest version!
/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2013-2014 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.snippet;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

import com.jaeksoft.searchlib.snippet.SnippetVectors.SnippetVector;
import com.jaeksoft.searchlib.util.Timer;

class SnippetQueries {

	private final String field;
	private final Map termMap;
	private final List termList;
	private final Set termQuerySet;
	private final Set termPhraseSet;
	private final List termSequenceList;
	final String[] terms;

	SnippetQueries(final Query query, final String field) {
		this.field = field;
		termQuerySet = new TreeSet();
		termPhraseSet = new TreeSet();
		termMap = new TreeMap();
		termList = new ArrayList();
		termSequenceList = new ArrayList(2);
		parse(query);
		terms = termList.toArray(new String[termList.size()]);
	}

	private final int checkTerm(final String term) {
		Integer pos = termMap.get(term);
		if (pos != null)
			return pos;
		pos = termList.size();
		termMap.put(term, pos);
		termList.add(term);
		return pos;
	}

	private final void parse(final TermQuery query) {
		Term term = query.getTerm();
		if (!field.equals(term.field()))
			return;
		int pos = checkTerm(term.text());
		termQuerySet.add(pos);
	}

	private final static class TermSequence {

		private final int slop;
		private final int[] terms;

		private TermSequence(final List termPosSequence, final int slop) {
			int i = 0;
			terms = new int[termPosSequence.size()];
			for (Integer termPos : termPosSequence)
				terms[i++] = termPos;
			this.slop = slop;
		}

	}

	private final void parse(final PhraseQuery query) {
		Term[] terms = query.getTerms();
		if (terms == null)
			return;

		List termPosSequence = new ArrayList(terms.length);
		for (Term term : terms) {
			if (!field.equals(term.field()))
				continue;
			int pos = checkTerm(term.text());
			termPosSequence.add(pos);
			termPhraseSet.add(pos);
		}
		// Term sequences with one term are not phrase queries
		if (termPosSequence.size() <= 1)
			return;
		termSequenceList
				.add(new TermSequence(termPosSequence, query.getSlop()));
	}

	private final void parse(final BooleanQuery query) {
		BooleanClause[] clauses = query.getClauses();
		if (clauses == null)
			return;
		for (BooleanClause clause : clauses) {
			switch (clause.getOccur()) {
			case MUST:
			case SHOULD:
				parse(clause.getQuery());
				break;
			default:
				break;
			}
		}
	}

	private final void parse(final Query query) {
		if (query == null)
			return;
		if (query instanceof BooleanQuery)
			parse((BooleanQuery) query);
		else if (query instanceof TermQuery)
			parse((TermQuery) query);
		else if (query instanceof PhraseQuery)
			parse((PhraseQuery) query);
	}

	private final void checkTermQueries(
			final Collection vectors, final long expiration) {
		if (termQuerySet.isEmpty())
			return;
		for (SnippetVector vector : vectors) {
			if (!vector.query) {
				if (termQuerySet.contains(vector.term))
					vector.query = true;
				if (expiration != 0)
					if (System.currentTimeMillis() > expiration)
						return;
			}
		}
	}

	private static class SequenceCollector {

		private enum Result {
			WRONG, CONTINUE, FULL
		};

		private final TermSequence termSequence;
		private final SnippetVector[] vectors;
		private int foundPos;
		private int nextPosition;
		private int nextTerm;

		private SequenceCollector(final TermSequence termSequence,
				final SnippetVector vector) {
			this.termSequence = termSequence;
			vectors = new SnippetVector[termSequence.terms.length];
			foundPos = 0;
			addVector(vector);
		}

		private final Result addVector(final SnippetVector vector) {
			vectors[foundPos++] = vector;
			nextPosition = vector.position + termSequence.slop + 1;
			if (foundPos == vectors.length)
				return Result.FULL;
			nextTerm = termSequence.terms[foundPos];
			return Result.CONTINUE;
		}

		private final Result collect(final SnippetVector vector) {
			if (vector.position > nextPosition)
				return Result.WRONG;
			if (vector.term != nextTerm)
				return Result.CONTINUE;
			if (addVector(vector) != Result.FULL)
				return Result.CONTINUE;
			for (SnippetVector v : vectors)
				v.query = true;
			return Result.FULL;
		}
	}

	private final void checkPhraseQueries(
			final Collection vectors, final long expiration) {
		if (termSequenceList.isEmpty())
			return;
		Set collectors = new HashSet();
		List toRemove = new ArrayList();
		for (SnippetVector vector : vectors) {
			if (!(termPhraseSet.contains(vector.term)))
				continue;
			for (TermSequence termSequence : termSequenceList) {
				if (termSequence.terms[0] == vector.term)
					collectors.add(new SequenceCollector(termSequence, vector));
			}
			for (SequenceCollector collector : collectors) {
				switch (collector.collect(vector)) {
				case CONTINUE:
					break;
				case WRONG:
				case FULL:
					toRemove.add(collector);
					break;
				}
			}
			if (!toRemove.isEmpty()) {
				collectors.removeAll(toRemove);
				toRemove.clear();
			}
			if (expiration != 0)
				if (System.currentTimeMillis() > expiration)
					return;
		}
	}

	final void checkQueries(final Collection vectors,
			final Timer parentTimer, final long expiration) {
		if (vectors == null)
			return;
		Timer t = new Timer(parentTimer, "checkTermQueries");
		checkTermQueries(vectors, expiration);
		t.end(null);
		t = new Timer(parentTimer, "checkPhraseQueries");
		checkPhraseQueries(vectors, expiration);
		t.end(null);
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy