All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.big.mg4j.search.RemappingDocumentIterator Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.search;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2009-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITfNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.query.nodes.Remap;
import it.unimi.dsi.big.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import it.unimi.dsi.fastutil.objects.ReferenceArraySet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;

import java.io.IOException;
import java.util.Map;

/** A decorator that remaps interval iterator requests.
 * 
 * 

Sometimes it is necessary to combine with positional operators * (e.g., {@link ConsecutiveDocumentIterator}) intervals from different indices. * By wrapping with an instance of this class a {@link DocumentIterator}, the requests * for intervals will be remapped following a map given at construction time. * *

This class distinguishes between internal indices, which are those actually * provided by the underlying {@link DocumentIterator}, and external indices, which * are those exposed by {@link #indices()}, and with which {@link #intervalIterator(Index)} * should be called. * The map provided at construction time should remap external indices to internal * indices (note the inversion w.r.t. {@link Remap}). * In many cases, a {@linkplain Reference2ReferenceMaps#singleton(Object, Object) singleton map} * will be appropriate. * * @author Sebastiano Vigna * @since 2.2 */ public class RemappingDocumentIterator implements DocumentIterator { /** The underlying document iterator. */ private final DocumentIterator documentIterator; /** If not null, the sole external index involved in this iterator. */ final private Index soleIndex; /** The set of external indices. */ final private ReferenceSet indices; /** A map from external to internal indices. */ final private Reference2ReferenceMap indexInverseRemapping; /** A map from external indices to the iterators already returned for the current document. The key set may * not contain an index because the related iterator has never been requested. */ final private Reference2ReferenceArrayMap currentIterators; /** An unmodifiable wrapper around {@link #currentIterators}. */ final private Reference2ReferenceMap unmodifiableCurrentIterators; /** Creates a new remapping document iterator wrapping a given document iterator and remapping interval-iterator requests * through a given mapping from external to internal indices. * * @param documentIterator the underlying document iterator. * @param indexInverseRemapping the mapping from external to internal indices. */ public RemappingDocumentIterator( final DocumentIterator documentIterator, final Reference2ReferenceMap indexInverseRemapping ) { this.documentIterator = documentIterator; this.indexInverseRemapping = indexInverseRemapping; final int n = documentIterator.indices().size(); this.currentIterators = new Reference2ReferenceArrayMap( new Index[ n ], new IntervalIterator[ n ] ); this.unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators ); indices = new ReferenceArraySet( documentIterator.indices().size() ); final ReferenceArraySet nonIndices = new ReferenceArraySet(); for( Map.Entry e : indexInverseRemapping.entrySet() ) { if ( documentIterator.indices().contains( e.getKey() ) ) throw new IllegalArgumentException( "You cannot remap index " + e.getValue() + " to index " + e.getKey() + " as the latter already belongs to the document iterator" ); if ( ! documentIterator.indices().contains( e.getValue() ) ) throw new IllegalArgumentException( "You cannot remap index " + e.getValue() + " to index " + e.getKey() + " as the former does not belong to the document iterator" ); nonIndices.add( e.getValue() ); indices.add( e.getKey() ); } for( Index index: documentIterator.indices() ) if ( ! nonIndices.contains( index ) ) indices.add( index ); soleIndex = n == 1 ? indices.iterator().next() : null; } public long document() { return documentIterator.document(); } private Index remapIndex( final Index index ) { final Index result = indexInverseRemapping.get( index ); return result == null ? index : result; } public ReferenceSet indices() { return indices; } public IntervalIterator intervalIterator( final Index index ) throws IOException { if ( ! indices.contains( index ) ) return IntervalIterators.FALSE; final Index remappedIndex = remapIndex( index ); IntervalIterator intervalIterator = currentIterators.get( index ); if ( intervalIterator == null ) currentIterators.put( index, intervalIterator = documentIterator.intervalIterator( remappedIndex ) ); return intervalIterator; } public IntervalIterator intervalIterator() throws IOException { if ( soleIndex == null ) throw new IllegalStateException(); return intervalIterator( soleIndex ); } public Reference2ReferenceMap intervalIterators() throws IOException { for( Index i : indices ) intervalIterator( i ); return unmodifiableCurrentIterators; } public long nextDocument() throws IOException { currentIterators.clear(); return documentIterator.nextDocument(); } public boolean mayHaveNext() { return documentIterator.mayHaveNext(); } public int nextInt() { currentIterators.clear(); return nextInt(); } @Override public long skipTo( final long n ) throws IOException { if ( documentIterator.document() >= n ) return documentIterator.document(); currentIterators.clear(); return documentIterator.skipTo( n ); } public void dispose() throws IOException { documentIterator.dispose(); } public T accept( DocumentIteratorVisitor visitor ) throws IOException { return documentIterator.accept( visitor ); } public T acceptOnTruePaths( DocumentIteratorVisitor visitor ) throws IOException { return documentIterator.acceptOnTruePaths( visitor ); } public IntervalIterator iterator() { try { return intervalIterator(); } catch ( IOException e ) { throw new RuntimeException( e ); } } public double weight() { return documentIterator.weight(); } public DocumentIterator weight( final double weight ) { return documentIterator.weight( weight ); } public String toString() { return this.getClass().getSimpleName() + "(" + documentIterator + ", " + indexInverseRemapping + ")"; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy