All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.terrier.matching.FatUtils Maven / Gradle / Ivy

The newest version!
/*
 * Terrier - Terabyte Retriever 
 * Webpage: http://terrier.org 
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is FatUtils.java.
 *
 * The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *   Craig Macdonald 
 */

package org.terrier.matching;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.hadoop.io.Writable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.terrier.matching.daat.FatCandidateResultSet;
import org.terrier.querying.IndexRef;
import org.terrier.structures.CollectionStatistics;
import org.terrier.structures.DocumentIndex;
import org.terrier.structures.EntryStatistics;
import org.terrier.structures.FieldEntryStatistics;
import org.terrier.structures.FieldLexiconEntry;
import org.terrier.structures.Index;
import org.terrier.structures.Lexicon;
import org.terrier.structures.LexiconEntry;
import org.terrier.structures.MetaIndex;
import org.terrier.structures.Pointer;
import org.terrier.structures.PostingIndex;
import org.terrier.structures.SimpleNgramEntryStatistics;
import org.terrier.structures.collections.MapEntry;
import org.terrier.structures.postings.BasicPostingImpl;
import org.terrier.structures.postings.BlockFieldPostingImpl;
import org.terrier.structures.postings.BlockPosting;
import org.terrier.structures.postings.BlockPostingImpl;
import org.terrier.structures.postings.FieldPosting;
import org.terrier.structures.postings.FieldPostingImpl;
import org.terrier.structures.postings.IterablePosting;
import org.terrier.structures.postings.IterablePostingImpl;
import org.terrier.structures.postings.WritablePosting;
import org.terrier.utility.ApplicationSetup;
import org.terrier.utility.ArrayUtils;
import org.terrier.utility.Files;
import org.terrier.utility.io.DebuggingDataInput;
import org.terrier.utility.io.DebuggingDataOutput;
import org.terrier.utility.io.WrappedIOException;

/** Various utilities for the dealing with {@link FatResultSet}s.
 * @author Craig Macdonald
 * @since 4.0
 */
public class FatUtils {

	private static final byte VERSION = 6;
	private static final boolean DEBUG = false;
	
	static Logger logger = LoggerFactory.getLogger(FatUtils.class);
	
	public static FatResultSet recreate(FatResultSet frs) throws IOException
	{
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		DataOutputStream dos = new DataOutputStream(baos);
		frs.write(dos);
		FatResultSet rtr = new FatCandidateResultSet();
		rtr.readFields(new DataInputStream(new ByteArrayInputStream(baos.toByteArray())));
		return rtr;
	}
	
	public static void readFields(FatResultSet frs, DataInput in) throws IOException
	{
		if (DEBUG)
			in = new DebuggingDataInput(in);
		
		try{
			byte version = in.readByte();
			switch (version) {
				case 2: readFieldsV2(frs, in); break;
				case 3: readFieldsV3(frs, in); break;
				case 4: readFieldsV4(frs, in); break;
				case 5: readFieldsV6(frs, in, false); break;
				case 6: readFieldsV6(frs, in, true); break;
				default: throw new IOException("Version mismatch, version " + version +" is not supported");
			}			
		}catch (EOFException eofe) {
			logger.error("EOF within FatUtils.read()", eofe);
			throw eofe;
		}catch (IOException ioe) {
			logger.error("EOF within FatUtils.read()", ioe);
			throw ioe;			
		}
	}

	protected static void readFieldsV2(FatResultSet frs, DataInput in)
			throws IOException 
	{
		int i =-1;
		int resultSize = -1;
		int j = -1;
		int termCount = -1;
		
		try{
			CollectionStatistics collStats = new CollectionStatistics();
			collStats.readFields(in);
			frs.setCollectionStatistics(collStats);
			
			final boolean fields = collStats.getNumberOfFields() > 0;
			final int fieldCount = collStats.getNumberOfFields();
			
			//read number of query terms
			termCount = in.readInt();
			if (termCount == 0)
			{
				frs.setResultSize(0);
				final int[] docids 		= new int[0];
				final double[] scores 		= new double[0];
				final short[] occurrences = new short[0];
				final WritablePosting[][] postings = new WritablePosting[0][];				
				frs.setScores(scores);
				frs.setDocids(docids);
				frs.setPostings(postings);
				frs.setOccurrences(occurrences);
		
				frs.setEntryStatistics(new EntryStatistics[0]);
            	frs.setKeyFrequencies(new double[0]);
            	frs.setQueryTerms(new String[0]);
				
				logger.warn("No found terms for this query");
				return;
			}
			
			//read the classes to use
			String statsClassName = in.readUTF();
			
			//hack for some older fat result versions
			if (statsClassName.equals("org.terrier.structures.FieldIndex$FieldIndexLexiconEntry"))
				statsClassName = FieldLexiconEntry.class.getName();
	
			Class statisticsClass = ApplicationSetup.getClass(statsClassName).asSubclass(EntryStatistics.class);
			Class postingClass = ApplicationSetup.getClass(in.readUTF()).asSubclass(WritablePosting.class);
			
			
			//read terms and entry statistics
			final EntryStatistics[] entryStats = new EntryStatistics[termCount];
			final String[] queryTerms = new String[termCount];
			final double[] keyFrequencies = new double[termCount];
			for(j=0;j postingClass[] = new Class[termCount];			
			
			//read terms and entry statistics
			final EntryStatistics[] entryStats = new EntryStatistics[termCount];
			final String[] queryTerms = new String[termCount];
			final double[] keyFrequencies = new double[termCount];
			final boolean[] fields = new boolean[termCount];
			final boolean[] blocks = new boolean[termCount];
			for(j=0;j statisticsClass = ApplicationSetup.getClass(in.readUTF()).asSubclass(EntryStatistics.class);
				keyFrequencies[j] = in.readDouble();
				logger.debug(queryTerms[j] + " f=" +fields[j]  + " b="+blocks[j] +" postings="+postingClass[j] + 
					" es="+statisticsClass.getSimpleName() /*+
					" es.isAssignableFrom(FieldEntryStatistics.class)="+statisticsClass.isAssignableFrom(FieldEntryStatistics.class) + 
					" FieldEntryStatistics.class.isAssignableFrom(es)="+FieldEntryStatistics.class.isAssignableFrom(statisticsClass)*/);
				EntryStatistics le = fields[j] || /* HACK */ FieldEntryStatistics.class.isAssignableFrom(statisticsClass)
					? statisticsClass.getConstructor(Integer.TYPE).newInstance(fieldCount)
					: statisticsClass.newInstance();
				((Writable)le).readFields(in);
				if (queryTerms[j].contains("#uw") || queryTerms[j].contains("#1"))
				{
					if (queryTerms[j].contains("#uw12")){
                        le = new SimpleNgramEntryStatistics(le);
                        ((SimpleNgramEntryStatistics)le).setWindowSize(12);
                    }else if (queryTerms[j].contains("#uw8")){
						le = new SimpleNgramEntryStatistics(le);
						((SimpleNgramEntryStatistics)le).setWindowSize(8);
					}else if (queryTerms[j].contains("#uw4")){
                        le = new SimpleNgramEntryStatistics(le);
                        ((SimpleNgramEntryStatistics)le).setWindowSize(8);
                    }else if (queryTerms[j].contains("#1")){
                        le = new SimpleNgramEntryStatistics(le);
                        ((SimpleNgramEntryStatistics)le).setWindowSize(2);
                    }
				}
				entryStats[j] = le;
			}
			
			frs.setEntryStatistics(entryStats);
			frs.setKeyFrequencies(keyFrequencies);
			frs.setQueryTerms(queryTerms);
			
			
			//read the number of documents
			resultSize = in.readInt();
			//size the arrays
			final int[] docids 		= new int[resultSize];
			final double[] scores 		= new double[resultSize];
			final short[] occurrences = new short[resultSize];
			final WritablePosting[][] postings = new WritablePosting[resultSize][];
			
			//for each document
			for (i = 0; i < resultSize; i++)
			{
				//read: docid, scores, occurrences
				lastDocid = docids[i] = in.readInt();
				scores[i] = in.readDouble();
				occurrences[i] = in.readShort();
				final int docLen = in.readInt();
				final int[] fieldLens;
				if (fieldCount > 0)
				{
					fieldLens = new int[fieldCount];
					for(int fi=0;fi[] tags = new Set[0];
				final WritablePosting[][] postings = new WritablePosting[0][];				
				frs.setScores(scores);
				frs.setDocids(docids);
				frs.setPostings(postings);
				frs.setOccurrences(occurrences);
				frs.setTags(tags);
				frs.setEntryStatistics(new EntryStatistics[0]);
                frs.setKeyFrequencies(new double[0]);
                frs.setQueryTerms(new String[0]);
				
				logger.warn("No found terms for this query");
				return;
			}
			
			
			@SuppressWarnings("unchecked")
			Class postingClass[] = new Class[termCount];			
			
			//read terms and entry statistics
			final EntryStatistics[] entryStats = new EntryStatistics[termCount];
			final String[] queryTerms = new String[termCount];
			final String[] tags = new String[termCount];
			final double[] keyFrequencies = new double[termCount];
			final boolean[] fields = new boolean[termCount];
			final boolean[] blocks = new boolean[termCount];
			for(j=0;j statisticsClass = ApplicationSetup.getClass(in.readUTF()).asSubclass(EntryStatistics.class);
				keyFrequencies[j] = in.readDouble();
				logger.debug(queryTerms[j] + " f=" +fields[j]  + " b="+blocks[j] +" postings="+postingClass[j] + 
					" es="+statisticsClass.getSimpleName() /*+
					" es.isAssignableFrom(FieldEntryStatistics.class)="+statisticsClass.isAssignableFrom(FieldEntryStatistics.class) + 
					" FieldEntryStatistics.class.isAssignableFrom(es)="+FieldEntryStatistics.class.isAssignableFrom(statisticsClass)*/);
				EntryStatistics le = fields[j] || /* HACK */ FieldEntryStatistics.class.isAssignableFrom(statisticsClass)
					? statisticsClass.getConstructor(Integer.TYPE).newInstance(fieldCount)
					: statisticsClass.newInstance();
				((Writable)le).readFields(in);
				if (queryTerms[j].contains("#uw") || queryTerms[j].contains("#1"))
				{
					if (queryTerms[j].contains("#uw12")){
                        le = new SimpleNgramEntryStatistics(le);
                        ((SimpleNgramEntryStatistics)le).setWindowSize(12);
                    }else if (queryTerms[j].contains("#uw8")){
						le = new SimpleNgramEntryStatistics(le);
						((SimpleNgramEntryStatistics)le).setWindowSize(8);
					}else if (queryTerms[j].contains("#uw4")){
                        le = new SimpleNgramEntryStatistics(le);
                        ((SimpleNgramEntryStatistics)le).setWindowSize(8);
                    }else if (queryTerms[j].contains("#1")){
                        le = new SimpleNgramEntryStatistics(le);
                        ((SimpleNgramEntryStatistics)le).setWindowSize(2);
                    }
				}
				entryStats[j] = le;
			}
			
			frs.setEntryStatistics(entryStats);
			frs.setKeyFrequencies(keyFrequencies);
			frs.setQueryTerms(queryTerms);
			
			
			//read the number of documents
			resultSize = in.readInt();
			//size the arrays
			final int[] docids 		= new int[resultSize];
			final double[] scores 		= new double[resultSize];
			final short[] occurrences = new short[resultSize];
			final WritablePosting[][] postings = new WritablePosting[resultSize][];
			
			//for each document
			for (i = 0; i < resultSize; i++)
			{
				//read: docid, scores, occurrences
				lastDocid = docids[i] = in.readInt();
				scores[i] = in.readDouble();
				occurrences[i] = in.readShort();
				final int docLen = in.readInt();
				final int[] fieldLens;
				if (fieldCount > 0)
				{
					fieldLens = new int[fieldCount];
					for(int fi=0;fi[] finalTags = new Set[tags.length];
			for(int ti=0;ti();
				finalTags[ti].add(tags[ti]);
			}
			frs.setTags(finalTags);
		} catch (IOException ioe) {
			throw new WrappedIOException("IOException (reset to start perhaps?), was reading document at rank " + i + " of " + resultSize + ", term " + j + " of " + termCount + " docid="+lastDocid, ioe);
		} catch (Exception e) {
			throw new WrappedIOException("Problem reading document at rank " + i + " of " + resultSize + ", term " + j + " of " + termCount + " docid="+lastDocid, e);
		}
		
	}
	
	protected static void readFieldsV5(FatResultSet frs, DataInput in, boolean v6)
			throws IOException {
		readFieldsV6(frs, in, false);
	}
	
	protected static void readFieldsV6(FatResultSet frs, DataInput in, boolean v6)
			throws IOException 
	{
		int i =-1;
		int resultSize = -1;
		int j = -1;
		int termCount = -1;
		int lastDocid = -1;
		
		try{
			CollectionStatistics collStats = new CollectionStatistics();
			if (v6)
				collStats.readFields(in);
			else
				collStats.readFieldsV5(in);
			frs.setCollectionStatistics(collStats);
			
			final int fieldCount = collStats.getNumberOfFields();
						
			
			//read number of query terms
			termCount = in.readInt();
			if (termCount == 0)
			{
				frs.setResultSize(0);
				final int[] docids 		= new int[0];
				final double[] scores 		= new double[0];
				final short[] occurrences = new short[0];
				@SuppressWarnings("unchecked")
				final Set[] tags = new Set[0];
				final WritablePosting[][] postings = new WritablePosting[0][];				
				frs.setScores(scores);
				frs.setDocids(docids);
				frs.setPostings(postings);
				frs.setOccurrences(occurrences);
				frs.setTags(tags);
				frs.setEntryStatistics(new EntryStatistics[0]);
                frs.setKeyFrequencies(new double[0]);
                frs.setQueryTerms(new String[0]);
				
				logger.warn("No found terms for this query");
				return;
			}
			
			
			@SuppressWarnings("unchecked")
			Class postingClass[] = new Class[termCount];			
			
			//read terms and entry statistics
			final EntryStatistics[] entryStats = new EntryStatistics[termCount];
			final String[] queryTerms = new String[termCount];
			@SuppressWarnings("unchecked")
			final Set[] tags = new Set[termCount];
			final double[] keyFrequencies = new double[termCount];
			final boolean[] fields = new boolean[termCount];
			final boolean[] blocks = new boolean[termCount];
			for(j=0;j();
					for(int ti=0;ti statisticsClass = ApplicationSetup.getClass(in.readUTF()).asSubclass(EntryStatistics.class);
				keyFrequencies[j] = in.readDouble();
				logger.debug(queryTerms[j] + " f=" +fields[j]  + " b="+blocks[j] +" postings="+postingClass[j] + 
					" es="+statisticsClass.getSimpleName() /*+
					" es.isAssignableFrom(FieldEntryStatistics.class)="+statisticsClass.isAssignableFrom(FieldEntryStatistics.class) + 
					" FieldEntryStatistics.class.isAssignableFrom(es)="+FieldEntryStatistics.class.isAssignableFrom(statisticsClass)*/);
				EntryStatistics le = fields[j] || /* HACK */ FieldEntryStatistics.class.isAssignableFrom(statisticsClass)
					? statisticsClass.getConstructor(Integer.TYPE).newInstance(fieldCount)
					: statisticsClass.newInstance();
				((Writable)le).readFields(in);
				if (queryTerms[j].contains("#uw") || queryTerms[j].contains("#1"))
				{
					if (queryTerms[j].contains("#uw12")){
                        le = new SimpleNgramEntryStatistics(le);
                        ((SimpleNgramEntryStatistics)le).setWindowSize(12);
                    }else if (queryTerms[j].contains("#uw8")){
						le = new SimpleNgramEntryStatistics(le);
						((SimpleNgramEntryStatistics)le).setWindowSize(8);
					}else if (queryTerms[j].contains("#uw4")){
                        le = new SimpleNgramEntryStatistics(le);
                        ((SimpleNgramEntryStatistics)le).setWindowSize(8);
                    }else if (queryTerms[j].contains("#1")){
                        le = new SimpleNgramEntryStatistics(le);
                        ((SimpleNgramEntryStatistics)le).setWindowSize(2);
                    }
				}
				entryStats[j] = le;
			}
			
			frs.setEntryStatistics(entryStats);
			frs.setKeyFrequencies(keyFrequencies);
			frs.setQueryTerms(queryTerms);
			
			
			//read the number of documents
			resultSize = in.readInt();
			//size the arrays
			final int[] docids 		= new int[resultSize];
			final double[] scores 		= new double[resultSize];
			final short[] occurrences = new short[resultSize];
			final WritablePosting[][] postings = new WritablePosting[resultSize][];
			
			//for each document
			for (i = 0; i < resultSize; i++)
			{
				//read: docid, scores, occurrences
				lastDocid = docids[i] = in.readInt();
				scores[i] = in.readDouble();
				occurrences[i] = in.readShort();
				final int docLen = in.readInt();
				final int[] fieldLens;
				if (fieldCount > 0)
				{
					fieldLens = new int[fieldCount];
					for(int fi=0;fi 0)
			{
				final int[] fieldLengths = ((FieldPosting)firstPosting).getFieldLengths();
				assert fieldLengths.length == fieldCount;
				for(int fi=0;fi[] tags = frs.getTags();
		final double[] keyFrequency = frs.getKeyFrequencies();
		final WritablePosting[][] postings = frs.getPostings();
		final int[] docids = frs.getDocids();
		final double[] scores = frs.getScores();
		final short[] occurrences = frs.getOccurrences();
		
		
		collStats.write(out);
		final int fieldCount = collStats.getNumberOfFields();		
		final int queryTermCount = queryTerms.length;
		final boolean fields[] = new boolean[queryTermCount];
		final boolean blocks[] = new boolean[queryTermCount];
		
		assert tags != null;
		
		//write out the number of query terms
		out.writeInt(queryTermCount);
		if(queryTermCount == 0)
			return;
		
		
		//write out query terms
		//write out the entry statistics
		for (int i = 0; i < queryTermCount; i++){
			out.writeUTF(queryTerms[i]);
			out.writeBoolean(tags[i] != null);
			if (tags[i] != null)
			{
				out.writeInt(tags[i].size());
				for(String t : tags[i])
					out.writeUTF(t);
			}
			WritablePosting firstPostingForTerm = firstPosting(postings, i);
			fields[i] = firstPostingForTerm instanceof FieldPosting;
			blocks[i] = firstPostingForTerm instanceof BlockPosting;
			out.writeBoolean(fields[i]);
			out.writeBoolean(blocks[i]);
			
			//HACK: MultiQueryTerm USED TO cause problems as it can return a FieldEntryStatistics where none is possible.???
			//if (! fields[i])
			//{
			//	entryStats[i] = new BasicLexiconEntry(entryStats[i].getTermId(), entryStats[i].getDocumentFrequency(), entryStats[i].getFrequency());
			//}
		
			out.writeBoolean(firstPostingForTerm != null);
			if (firstPostingForTerm != null)
			{
				//write out the classes			
				out.writeUTF(firstPostingForTerm.getClass().getName());
				//if we don't have a FieldPosting list, we should not have a FieldEntryStatistics 
				assert (! (fields[i]) && ! (entryStats[i] instanceof FieldEntryStatistics));
			}
			out.writeUTF(entryStats[i].getClass().getName());
			
			out.writeDouble(keyFrequency[i]);
			((Writable)entryStats[i]).write(out);
		}
		
		//write out the number of documents
		out.writeInt(docids.length);
		int i = 0;
		//for each document
		long notNullPostings = 0;
		for (i = 0; i < docids.length; i ++) {
			//write out the docid to out 
			out.writeInt(docids[i]);
			//write out the score
			out.writeDouble(scores[i]);
			//write out the occurrences
			out.writeShort(occurrences[i]);
			
			//write out the document length, and possible field lengths			
			WritablePosting firstPosting = firstPosting(postings[i]);
			assert firstPosting != null : "Docid " + docids[i] + " with score " + scores[i] + " has no matching postings";
			out.writeInt(firstPosting.getDocumentLength());			
			if (fieldCount > 0)
			{
				final int[] fieldLengths = ((FieldPosting)firstPosting).getFieldLengths();
				assert fieldLengths.length == fieldCount;
				for(int fi=0;fi statsMap = new HashMap();		
		
		final boolean fields = frs.getCollectionStatistics().getNumberOfFields() > 0;
		final int fieldCount = frs.getCollectionStatistics().getNumberOfFields();
		final boolean blocks = firstPosting(postings) instanceof BlockPosting;
		
		//make maps based on the terms
		for (int i=0;i " + i);
			}
			else
			{
				//TODO: optimise this by knowing at creation/loading of resultset.
				logger.warn("Ignoring term " + queryTerms[i] + " as it has no non-null postings in the FatResultSet");
			}
		}
		for (int di=0;di()
		{			
			@Override
			public int compare(WritablePosting[] p1, WritablePosting[] p2) {
				final int x = firstPosting(p1).getId();
				final int y = firstPosting(p2).getId();
				return (x < y) ? -1 : ((x == y) ? 0 : 1);
			}			
		});
		
		final Lexicon lex = new Lexicon() {

			@Override
			public LexiconEntry getLexiconEntry(String term) {
				return (LexiconEntry) statsMap.get(term);
			}

			@Override
			public Entry getLexiconEntry(int termid) {
				throw new UnsupportedOperationException();
			}
			@Override
			public Entry getIthLexiconEntry(int index) {
				throw new UnsupportedOperationException();
			}
			@Override
			public void close() throws IOException {}
			@Override
			public Iterator> iterator() {
				throw new UnsupportedOperationException();
			}
			@Override
			public int numberOfEntries() {
				throw new UnsupportedOperationException();
			}

			@Override
			public Iterator> getLexiconEntryRange(
					String from, String to) {
				throw new UnsupportedOperationException();
			}
		};
		
		final PostingIndex inv = new PostingIndex()
		{
			@Override
			public void close() throws IOException {}

			@Override
			public IterablePosting getPostings(Pointer lEntry)
					throws IOException 
			{
				final int term = ((LexiconEntry)lEntry).getTermId();
				//System.err.println("read: " + lEntry + " => " + term);
				if (blocks && fields)
					return new BFIterablePostingFromWritablePostingSlice(postings, term);
				else if (fields)
					return new FIterablePostingFromWritablePostingSlice(postings, term);
				else if (blocks)
					return new BIterablePostingFromWritablePostingSlice(postings, term);
				else 
					return new IterablePostingFromWritablePostingSlice(postings, term);
			}			
		};
		
		return new Index(){

			@Override
			public PostingIndex getInvertedIndex() {
				return inv;
			}

			@Override
			public Lexicon getLexicon() {
				return lex;
			}

			@Override
			public CollectionStatistics getCollectionStatistics() {
				return collStats;
			}

			@Override
			public PostingIndex getDirectIndex() {
				return null;
			}

			@Override
			public DocumentIndex getDocumentIndex() {
				return null;
			}

			@Override
			public MetaIndex getMetaIndex() {
				return null;
			}

			@Override
			public String toString() {
				return this.getClass().getSimpleName();
			}

			@Override
			public IndexRef getIndexRef() {
				return makeDirectIndexRef(this);
			}
			
		};
	}
	
	static class IterablePostingFromWritablePostingSlice extends IterablePostingImpl
	{		
		
		final WritablePosting[][] postings; //document, term
		final int slice;
		WritablePosting current;
		int index;
		
		public IterablePostingFromWritablePostingSlice(WritablePosting[][] postings, int slice)
		{
			this.index = -1;
			this.postings = postings;
			this.slice = slice;
		}
		
		@Override
		public int next() throws IOException {
			index++;
			if (index >= postings.length)
				return EOL;
			current = postings[index][slice];
			while(current == null)
			{
				index++;
				if (index >= postings.length)
					return EOL;
				current = postings[index][slice];
			}
			return current.getId();
		}

		@Override
		public boolean endOfPostings() {
			return index < postings.length;
		}

		@Override
		public int getId() {
			return  current.getId();
		}

		@Override
		public int getFrequency() {
			return current.getFrequency();
		}

		@Override
		public int getDocumentLength() {
			return current.getDocumentLength();
		}

		@Override
		public WritablePosting asWritablePosting() {
			return current.asWritablePosting();
		}

		@Override
		public void close() throws IOException {}
		
	}
	
	static class BIterablePostingFromWritablePostingSlice extends IterablePostingFromWritablePostingSlice implements BlockPosting
	{
		public BIterablePostingFromWritablePostingSlice(
				WritablePosting[][] postings, int slice) {
			super(postings, slice);
		}

		@Override
		public int[] getPositions() {
			return ((BlockPosting)current).getPositions(); 
		}		
	}
	
	
	static class FIterablePostingFromWritablePostingSlice extends IterablePostingFromWritablePostingSlice implements FieldPosting
	{
		public FIterablePostingFromWritablePostingSlice(
				WritablePosting[][] postings, int slice) {
			super(postings, slice);
		}

		@Override
		public int[] getFieldFrequencies() {
			return ((FieldPosting)current).getFieldFrequencies();
		}

		@Override
		public int[] getFieldLengths() {
			return ((FieldPosting)current).getFieldLengths();
		}

		@Override
		public void setFieldLengths(int[] newLengths) {
			((FieldPosting)current).setFieldLengths(newLengths);
		}	
	}
	
	static class BFIterablePostingFromWritablePostingSlice extends BIterablePostingFromWritablePostingSlice implements FieldPosting
	{

		public BFIterablePostingFromWritablePostingSlice(
				WritablePosting[][] postings, int slice) {
			super(postings, slice);
		}

		@Override
		public int[] getFieldFrequencies() {
			return ((FieldPosting)current).getFieldFrequencies();
		}

		@Override
		public int[] getFieldLengths() {
			return ((FieldPosting)current).getFieldLengths();
		}

		@Override
		public void setFieldLengths(int[] newLengths) {
			((FieldPosting)current).setFieldLengths(newLengths);
		}		
		
		/** Makes a human readable form of this posting */
		@Override
		public String toString()
		{
			return "(" + getId() + "," + getFrequency() + ",F[" + ArrayUtils.join(getFieldFrequencies(), ",")
				+ "],B[" + ArrayUtils.join(getPositions(), ",") + "])";
		}
	}

	protected static WritablePosting firstPosting(WritablePosting[][] postings)
	{
		for(int i=0;i extends Closeable, Iterator{};
	
	public static Iterator> readFatResultSet(String filename) throws IOException
	{
		final DataInputStream dis = new DataInputStream(Files.openFileStream(filename));
		return new CloseableIterator> () {

			boolean more = true;
			
			@Override
			public boolean hasNext() {
				return more;
			}

			@Override
			public Entry next() {
				try{
					String qid = dis.readUTF();
					FatResultSet frs = new FatCandidateResultSet();
					frs.readFields(dis);
					return new MapEntry(qid, frs);
				} catch (IOException e) {
					more = false;
					return null;
				}
			}

			@Override
			public void remove() {
				throw new UnsupportedOperationException();
			}

			@Override
			public void close() throws IOException {
				dis.close();
			}
			
		};
	}
	
	public static void main(String[] args) throws IOException
	{
		if (args.length != 2)
		{
			System.err.println("Usage: " +FatUtils.class.getName() + " {--info|--dump} results.fat.gz");
			return;
			
		}
		final boolean dump = args[0].equals("--dump");
		FatResultSet frs = new FatCandidateResultSet();
		DataInputStream dis = new DataInputStream(Files.openFileStream(args[1]));
		int queryCount = 0;
		while(true)
		{
			try
			{
				String qid = dis.readUTF();
				System.err.println("Now reading query " + qid);
				queryCount++;
				frs.readFields(dis);
				if (! dump)
					System.out.println(qid + " " + getInfo(frs));
				else
				{
					System.out.println("Query " + qid);
					dump(frs);
					System.out.println();
				}
			}
			catch (EOFException e) {
				break;
			}
		}
		System.out.println("Total " + queryCount + " queries");
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy