All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.big.mg4j.search.AbstractCompositeDocumentIterator Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.search;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2006-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.objects.ReferenceArraySet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.index.IndexIterator;
import it.unimi.dsi.big.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.util.Interval;

import java.io.IOException;

/** An abstract iterator on documents, based on a list of component iterators.
 * 
 * 

The {@linkplain #AbstractCompositeDocumentIterator(DocumentIterator[]) constructor} caches * into {@link #documentIterator} the component iterators, and sets up a number of protected * fields that can be useful to implementors. It also provide abstract member classes that make it * easier to implement interval iterators. * *

Note that this class implements both {@link #accept(DocumentIteratorVisitor)} * and {@link #acceptOnTruePaths(DocumentIteratorVisitor)} with a series of recursive * calls on all component iterator. If you desire a different behaviour * for {@link #acceptOnTruePaths(DocumentIteratorVisitor)} (see, e.g., * {@link it.unimi.dsi.big.mg4j.search.AbstractUnionDocumentIterator}, please override it. */ public abstract class AbstractCompositeDocumentIterator extends AbstractDocumentIterator implements DocumentIterator { /** The number of component iterators. */ public final int n; /** The component document iterators. */ protected final DocumentIterator[] documentIterator; /** A cached copy of {@link #documentIterator}, if all * underlying iterators are {@linkplain IndexIterator index iterators}; null, otherwise. */ protected final IndexIterator[] indexIterator; /** The set of indices involved in this iterator. */ protected final ReferenceArraySet indices = new ReferenceArraySet(); /** If not null, the sole index involved in this iterator. */ protected final Index soleIndex; /** Creates a new composite document iterator using a given list of component document iterators and * a specified index. * * @param index an index that will constitute the only index for which this iterator will return intervals, * or null to require the computation of the set of indices as the union of the indices * of all component iterators. * @param documentIterator the component iterators. */ protected AbstractCompositeDocumentIterator( final Index index, final DocumentIterator... documentIterator ) { this.documentIterator = documentIterator; this.n = documentIterator.length; if ( index == null ) { /* Now, for each index involved we build a corresponding interval iterator. * Note that the set indices() may contain indices from empty subqueries, too. */ for( int i = n; i-- != 0; ) indices.addAll( documentIterator[ i ].indices() ); soleIndex = indices.size() == 1 ? indices.iterator().next() : null; } else { soleIndex = index; indices.add( index ); } int i = n; while( i-- != 0 ) if ( ! ( documentIterator[ i ] instanceof IndexIterator ) ) break; if ( i == -1 ) { indexIterator = new IndexIterator[ n ]; System.arraycopy( documentIterator, 0, indexIterator, 0, n ); } else indexIterator = null; } /** Creates a new composite document iterator using a given list of component document iterators. * * @param documentIterator the component iterators. */ protected AbstractCompositeDocumentIterator( final DocumentIterator... documentIterator ) { this( null, documentIterator ); } public T accept( final DocumentIteratorVisitor visitor ) throws IOException { if ( ! visitor.visitPre( this ) ) return null; final T[] a = visitor.newArray( n ); if ( a == null ) { for( int i = 0; i < n; i++ ) if ( documentIterator[ i ] != null && documentIterator[ i ].accept( visitor ) == null ) return null; } else { for( int i = 0; i < n; i++ ) if ( documentIterator[ i ] != null && ( a[ i ] = documentIterator[ i ].accept( visitor ) ) == null ) return null; } return visitor.visitPost( this, a ); } public T acceptOnTruePaths( final DocumentIteratorVisitor visitor ) throws IOException { if ( ! visitor.visitPre( this ) ) return null; final T[] a = visitor.newArray( n ); if ( a == null ) { for( int i = 0; i < n; i++ ) if ( documentIterator[ i ] != null && documentIterator[ i ].acceptOnTruePaths( visitor ) == null ) return null; } else { for( int i = 0; i < n; i++ ) if ( documentIterator[ i ] != null && ( a[ i ] = documentIterator[ i ].acceptOnTruePaths( visitor ) ) == null ) return null; } return visitor.visitPost( this, a ); } public ReferenceSet indices() { return indices; } public IntervalIterator intervalIterator() throws IOException { if ( soleIndex == null ) throw new IllegalStateException(); return intervalIterator( soleIndex ); } public void dispose() throws IOException { for( DocumentIterator d: documentIterator ) d.dispose(); } public String toString() { StringBuilder res = new StringBuilder(); res.append( this.getClass().getSimpleName() ).append( "(" ); for ( int i = 0; i < n; i++ ) res.append( i > 0 ? "," : "" ).append( documentIterator[ i ] ); res.append( ")" ); if ( weight != 1 ) res.append( '{' ).append( weight ).append( '}' ); return res.toString(); } /** An abstract interval iterator. Provide mainly storage for the {@linkplain #intervalIterator component interval iterators}, * place for {@linkplain #curr the last interval returned by each iterator} and {@link #toString()}. */ protected abstract static class AbstractCompositeIntervalIterator extends AbstractIntervalIterator { /** The underlying iterators. */ protected IntervalIterator[] intervalIterator; /** The last interval returned by each iterator. */ protected Interval[] curr; public AbstractCompositeIntervalIterator( final int n ) { // We just set up some internal data, but we perform no initialisation. curr = new Interval[ n ]; intervalIterator = new IntervalIterator[ n ]; } public String toString() { MutableString res = new MutableString(); res.append( this.getClass().getName() ).append( "(" ).delete( 0, res.lastIndexOf( '.' ) + 1 ); for ( int i = 0; i < intervalIterator.length; i++ ) res.append( i > 0 ? "," : "" ).append( intervalIterator[ i ] ); return res.append( ")" ).toString(); } } /** An abstract {@link IndexIterator}-based interval iterator. The difference with {@link AbstractCompositeIntervalIterator} * is that this class assumes that all document iterators are actually index iterators. * The algorithms in this (very common) case can be significantly simplified, obtaining * a large gain in performance. */ protected abstract static class AbstractCompositeIndexIntervalIterator extends AbstractIntervalIterator { /** The position arrays returned by each index iterator. */ protected int[][] position; /** The index of current element of {@link #position} for each index iterator. */ protected int[] currPos; /** At any time, curr[ i ] contains position[ i ][ currPos[i ] ]. */ protected int[] curr; /** The number of elements of {@link #position} for each index iterator. */ protected int[] count; public AbstractCompositeIndexIntervalIterator( final int n ) { // We just set up some internal data, but we perform no initialisation. position = new int[ n ][]; count = new int[ n ]; currPos = new int[ n ]; curr = new int[ n ]; } public String toString() { MutableString res = new MutableString(); res.append( this.getClass().getName() ).append( "(" ).delete( 0, res.lastIndexOf( '.' ) + 1 ); for ( int i = 0; i < position.length; i++ ) res.append( i > 0 ? "," : "" ).append( position[ i ] != null ? IntArrayList.wrap( position[ i ], count[ i ] ) : "{}" ); return res.append( ")" ).toString(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy