All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.facet.Facet Maven / Gradle / Ivy

Go to download

OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.

The newest version!
/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2008-2015 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.facet;

import it.unimi.dsi.fastutil.Arrays;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermFreqVector;
import org.roaringbitmap.RoaringBitmap;

import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.facet.FacetCounter.FacetSorter;
import com.jaeksoft.searchlib.index.FieldCacheIndex;
import com.jaeksoft.searchlib.index.ReaderAbstract;
import com.jaeksoft.searchlib.result.collector.DocIdInterface;
import com.jaeksoft.searchlib.schema.SchemaField;
import com.jaeksoft.searchlib.schema.TermVector;
import com.jaeksoft.searchlib.util.Timer;

public class Facet implements Iterable> {

	protected FacetField facetField;
	private Map facetMap;
	protected transient List> list = null;

	public Facet() {
		list = null;
		facetMap = new HashMap();
	}

	public Facet(FacetField facetField) {
		this();
		this.facetField = facetField;
	}

	private Facet(FacetField facetField, String[] terms, int[] counts) {
		this(facetField);
		int i = 0;
		int minCount = facetField.getMinCount();
		for (int count : counts) {
			String term = terms[i];
			if (term != null && count >= minCount)
				facetMap.put(term, new FacetCounter(count));
			i++;
		}
	}

	private Facet(FacetField facetField, Map facetMap) {
		this(facetField);
		int minCount = facetField.getMinCount();
		for (Map.Entry entry : facetMap.entrySet()) {
			String term = entry.getKey();
			FacetCounter counter = entry.getValue();
			if (term != null && counter.count >= minCount)
				this.facetMap.put(term, counter);
		}
	}

	public FacetField getFacetField() {
		return this.facetField;
	}

	protected void sum(Facet facet) {
		if (facet == null)
			return;
		for (Map.Entry entry : facet) {
			String term = entry.getKey();
			if (term == null)
				continue;
			FacetCounter value = entry.getValue();
			FacetCounter count = facetMap.get(term);
			if (count == null)
				facetMap.put(term, new FacetCounter(value));
			else
				count.add(value);
		}
	}

	public List> getList() {
		synchronized (this) {
			if (list != null)
				return list;
			list = new ArrayList>(
					facetMap.entrySet());
			list = limitOrderBy(facetField, list);
			return list;
		}
	}

	private Map.Entry get(int i) {
		return getList().get(i);
	}

	@Override
	public Iterator> iterator() {
		return getList().iterator();
	}

	public int getTermCount() {
		return getList().size();
	}

	public String getTerm(int i) {
		return get(i).getKey();
	}

	public long getCount(int i) {
		return get(i).getValue().count;
	}

	final static protected Facet facetMultivalued(ReaderAbstract reader,
			SchemaField schemaField, DocIdInterface docIdInterface,
			FacetField facetField, Timer timer) throws IOException,
			SearchLibException {
		String fieldName = facetField.getName();
		if (schemaField.getTermVector() == TermVector.NO) {
			FieldCacheIndex stringIndex = reader.getStringIndex(fieldName);
			int[] countIndex = computeMultivaluedTD(reader, fieldName,
					stringIndex, docIdInterface);
			return new Facet(facetField, stringIndex.lookup, countIndex);
		} else {
			Map facetMap = computeMultivaluedTFV(reader,
					fieldName, docIdInterface);
			return new Facet(facetField, facetMap);
		}
	}

	final static protected Facet facetSingleValue(ReaderAbstract reader,
			DocIdInterface collector, FacetField facetField, Timer timer)
			throws IOException {
		String fieldName = facetField.getName();
		FieldCacheIndex stringIndex = reader.getStringIndex(fieldName);
		int[] countIndex = computeSinglevalued(stringIndex, collector);
		return new Facet(facetField, stringIndex.lookup, countIndex);
	}

	final private static int[] computeMultivaluedTD(ReaderAbstract reader,
			String fieldName, FieldCacheIndex stringIndex,
			DocIdInterface docIdInterface) throws IOException,
			SearchLibException {
		int[] countIndex = new int[stringIndex.lookup.length];
		int indexPos = 0;
		if (docIdInterface.getSize() == 0)
			return countIndex;
		int[] docs = new int[100];
		int[] freqs = new int[100];
		RoaringBitmap bitset = docIdInterface.getBitSet();
		Term oTerm = new Term(fieldName);
		for (String term : stringIndex.lookup) {
			if (term != null) {
				Term t = oTerm.createTerm(term);
				TermDocs termDocs = reader.getTermDocs(t);
				int l;
				while ((l = termDocs.read(docs, freqs)) > 0)
					for (int i = 0; i < l; i++)
						if (freqs[i] > 0)
							if (bitset.contains(docs[i]))
								countIndex[indexPos]++;
				termDocs.close();
			}
			indexPos++;
		}
		return countIndex;
	}

	final private static Map computeMultivaluedTFV(
			ReaderAbstract reader, String fieldName,
			DocIdInterface docIdInterface) throws IOException,
			SearchLibException {
		Map termMap = new HashMap();
		if (docIdInterface.getSize() == 0)
			return termMap;
		for (int docId : docIdInterface.getIds()) {
			TermFreqVector tfv = reader.getTermFreqVector(docId, fieldName);
			if (tfv == null)
				continue;
			String[] terms = tfv.getTerms();
			int[] freqs = tfv.getTermFrequencies();
			if (terms == null || freqs == null)
				continue;
			int i = 0;
			for (String term : terms) {
				if (freqs[i++] > 0) {
					FacetCounter facetItem = termMap.get(term);
					if (facetItem == null)
						termMap.put(term, new FacetCounter(1));
					else
						facetItem.increment();
				}
			}
		}
		return termMap;
	}

	final private static int[] computeSinglevalued(FieldCacheIndex stringIndex,
			DocIdInterface collector) throws IOException {
		int[] countArray = new int[stringIndex.lookup.length];
		int[] order = stringIndex.order;
		int i = collector.getSize();
		for (int id : collector.getIds()) {
			if (i == 0)
				break;
			countArray[order[id]]++;
			i--;
		}
		return countArray;
	}

	final private static List> limitOrderBy(
			FacetField facetField, List> list) {
		FacetSorter facetSorter = FacetSorter.getSorter(list,
				facetField.getOrderBy());
		if (facetSorter == null)
			return list;
		Arrays.quickSort(0, list.size(), facetSorter, facetSorter);
		Integer limit = facetField.getLimit();
		if (limit == null)
			return list;
		if (list.size() <= limit)
			return list;
		return list.subList(0, limit);

	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy