
org.terrier.matching.FatUtils Maven / Gradle / Ivy
The newest version!
/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is FatUtils.java.
*
* The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
* Craig Macdonald
*/
package org.terrier.matching;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.io.Writable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.terrier.matching.daat.FatCandidateResultSet;
import org.terrier.querying.IndexRef;
import org.terrier.structures.CollectionStatistics;
import org.terrier.structures.DocumentIndex;
import org.terrier.structures.EntryStatistics;
import org.terrier.structures.FieldEntryStatistics;
import org.terrier.structures.FieldLexiconEntry;
import org.terrier.structures.Index;
import org.terrier.structures.Lexicon;
import org.terrier.structures.LexiconEntry;
import org.terrier.structures.MetaIndex;
import org.terrier.structures.Pointer;
import org.terrier.structures.PostingIndex;
import org.terrier.structures.SimpleNgramEntryStatistics;
import org.terrier.structures.collections.MapEntry;
import org.terrier.structures.postings.BasicPostingImpl;
import org.terrier.structures.postings.BlockFieldPostingImpl;
import org.terrier.structures.postings.BlockPosting;
import org.terrier.structures.postings.BlockPostingImpl;
import org.terrier.structures.postings.FieldPosting;
import org.terrier.structures.postings.FieldPostingImpl;
import org.terrier.structures.postings.IterablePosting;
import org.terrier.structures.postings.IterablePostingImpl;
import org.terrier.structures.postings.WritablePosting;
import org.terrier.utility.ApplicationSetup;
import org.terrier.utility.ArrayUtils;
import org.terrier.utility.Files;
import org.terrier.utility.io.DebuggingDataInput;
import org.terrier.utility.io.DebuggingDataOutput;
import org.terrier.utility.io.WrappedIOException;
/** Various utilities for the dealing with {@link FatResultSet}s.
* @author Craig Macdonald
* @since 4.0
*/
public class FatUtils {
private static final byte VERSION = 6;
private static final boolean DEBUG = false;
static Logger logger = LoggerFactory.getLogger(FatUtils.class);
public static FatResultSet recreate(FatResultSet frs) throws IOException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
frs.write(dos);
FatResultSet rtr = new FatCandidateResultSet();
rtr.readFields(new DataInputStream(new ByteArrayInputStream(baos.toByteArray())));
return rtr;
}
public static void readFields(FatResultSet frs, DataInput in) throws IOException
{
if (DEBUG)
in = new DebuggingDataInput(in);
try{
byte version = in.readByte();
switch (version) {
case 2: readFieldsV2(frs, in); break;
case 3: readFieldsV3(frs, in); break;
case 4: readFieldsV4(frs, in); break;
case 5: readFieldsV6(frs, in, false); break;
case 6: readFieldsV6(frs, in, true); break;
default: throw new IOException("Version mismatch, version " + version +" is not supported");
}
}catch (EOFException eofe) {
logger.error("EOF within FatUtils.read()", eofe);
throw eofe;
}catch (IOException ioe) {
logger.error("EOF within FatUtils.read()", ioe);
throw ioe;
}
}
protected static void readFieldsV2(FatResultSet frs, DataInput in)
throws IOException
{
int i =-1;
int resultSize = -1;
int j = -1;
int termCount = -1;
try{
CollectionStatistics collStats = new CollectionStatistics();
collStats.readFields(in);
frs.setCollectionStatistics(collStats);
final boolean fields = collStats.getNumberOfFields() > 0;
final int fieldCount = collStats.getNumberOfFields();
//read number of query terms
termCount = in.readInt();
if (termCount == 0)
{
frs.setResultSize(0);
final int[] docids = new int[0];
final double[] scores = new double[0];
final short[] occurrences = new short[0];
final WritablePosting[][] postings = new WritablePosting[0][];
frs.setScores(scores);
frs.setDocids(docids);
frs.setPostings(postings);
frs.setOccurrences(occurrences);
frs.setEntryStatistics(new EntryStatistics[0]);
frs.setKeyFrequencies(new double[0]);
frs.setQueryTerms(new String[0]);
logger.warn("No found terms for this query");
return;
}
//read the classes to use
String statsClassName = in.readUTF();
//hack for some older fat result versions
if (statsClassName.equals("org.terrier.structures.FieldIndex$FieldIndexLexiconEntry"))
statsClassName = FieldLexiconEntry.class.getName();
Class extends EntryStatistics> statisticsClass = ApplicationSetup.getClass(statsClassName).asSubclass(EntryStatistics.class);
Class extends WritablePosting> postingClass = ApplicationSetup.getClass(in.readUTF()).asSubclass(WritablePosting.class);
//read terms and entry statistics
final EntryStatistics[] entryStats = new EntryStatistics[termCount];
final String[] queryTerms = new String[termCount];
final double[] keyFrequencies = new double[termCount];
for(j=0;j postingClass[] = new Class[termCount];
//read terms and entry statistics
final EntryStatistics[] entryStats = new EntryStatistics[termCount];
final String[] queryTerms = new String[termCount];
final double[] keyFrequencies = new double[termCount];
final boolean[] fields = new boolean[termCount];
final boolean[] blocks = new boolean[termCount];
for(j=0;j statisticsClass = ApplicationSetup.getClass(in.readUTF()).asSubclass(EntryStatistics.class);
keyFrequencies[j] = in.readDouble();
logger.debug(queryTerms[j] + " f=" +fields[j] + " b="+blocks[j] +" postings="+postingClass[j] +
" es="+statisticsClass.getSimpleName() /*+
" es.isAssignableFrom(FieldEntryStatistics.class)="+statisticsClass.isAssignableFrom(FieldEntryStatistics.class) +
" FieldEntryStatistics.class.isAssignableFrom(es)="+FieldEntryStatistics.class.isAssignableFrom(statisticsClass)*/);
EntryStatistics le = fields[j] || /* HACK */ FieldEntryStatistics.class.isAssignableFrom(statisticsClass)
? statisticsClass.getConstructor(Integer.TYPE).newInstance(fieldCount)
: statisticsClass.newInstance();
((Writable)le).readFields(in);
if (queryTerms[j].contains("#uw") || queryTerms[j].contains("#1"))
{
if (queryTerms[j].contains("#uw12")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(12);
}else if (queryTerms[j].contains("#uw8")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(8);
}else if (queryTerms[j].contains("#uw4")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(8);
}else if (queryTerms[j].contains("#1")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(2);
}
}
entryStats[j] = le;
}
frs.setEntryStatistics(entryStats);
frs.setKeyFrequencies(keyFrequencies);
frs.setQueryTerms(queryTerms);
//read the number of documents
resultSize = in.readInt();
//size the arrays
final int[] docids = new int[resultSize];
final double[] scores = new double[resultSize];
final short[] occurrences = new short[resultSize];
final WritablePosting[][] postings = new WritablePosting[resultSize][];
//for each document
for (i = 0; i < resultSize; i++)
{
//read: docid, scores, occurrences
lastDocid = docids[i] = in.readInt();
scores[i] = in.readDouble();
occurrences[i] = in.readShort();
final int docLen = in.readInt();
final int[] fieldLens;
if (fieldCount > 0)
{
fieldLens = new int[fieldCount];
for(int fi=0;fi[] tags = new Set[0];
final WritablePosting[][] postings = new WritablePosting[0][];
frs.setScores(scores);
frs.setDocids(docids);
frs.setPostings(postings);
frs.setOccurrences(occurrences);
frs.setTags(tags);
frs.setEntryStatistics(new EntryStatistics[0]);
frs.setKeyFrequencies(new double[0]);
frs.setQueryTerms(new String[0]);
logger.warn("No found terms for this query");
return;
}
@SuppressWarnings("unchecked")
Class extends WritablePosting> postingClass[] = new Class[termCount];
//read terms and entry statistics
final EntryStatistics[] entryStats = new EntryStatistics[termCount];
final String[] queryTerms = new String[termCount];
final String[] tags = new String[termCount];
final double[] keyFrequencies = new double[termCount];
final boolean[] fields = new boolean[termCount];
final boolean[] blocks = new boolean[termCount];
for(j=0;j statisticsClass = ApplicationSetup.getClass(in.readUTF()).asSubclass(EntryStatistics.class);
keyFrequencies[j] = in.readDouble();
logger.debug(queryTerms[j] + " f=" +fields[j] + " b="+blocks[j] +" postings="+postingClass[j] +
" es="+statisticsClass.getSimpleName() /*+
" es.isAssignableFrom(FieldEntryStatistics.class)="+statisticsClass.isAssignableFrom(FieldEntryStatistics.class) +
" FieldEntryStatistics.class.isAssignableFrom(es)="+FieldEntryStatistics.class.isAssignableFrom(statisticsClass)*/);
EntryStatistics le = fields[j] || /* HACK */ FieldEntryStatistics.class.isAssignableFrom(statisticsClass)
? statisticsClass.getConstructor(Integer.TYPE).newInstance(fieldCount)
: statisticsClass.newInstance();
((Writable)le).readFields(in);
if (queryTerms[j].contains("#uw") || queryTerms[j].contains("#1"))
{
if (queryTerms[j].contains("#uw12")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(12);
}else if (queryTerms[j].contains("#uw8")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(8);
}else if (queryTerms[j].contains("#uw4")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(8);
}else if (queryTerms[j].contains("#1")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(2);
}
}
entryStats[j] = le;
}
frs.setEntryStatistics(entryStats);
frs.setKeyFrequencies(keyFrequencies);
frs.setQueryTerms(queryTerms);
//read the number of documents
resultSize = in.readInt();
//size the arrays
final int[] docids = new int[resultSize];
final double[] scores = new double[resultSize];
final short[] occurrences = new short[resultSize];
final WritablePosting[][] postings = new WritablePosting[resultSize][];
//for each document
for (i = 0; i < resultSize; i++)
{
//read: docid, scores, occurrences
lastDocid = docids[i] = in.readInt();
scores[i] = in.readDouble();
occurrences[i] = in.readShort();
final int docLen = in.readInt();
final int[] fieldLens;
if (fieldCount > 0)
{
fieldLens = new int[fieldCount];
for(int fi=0;fi[] finalTags = new Set[tags.length];
for(int ti=0;ti();
finalTags[ti].add(tags[ti]);
}
frs.setTags(finalTags);
} catch (IOException ioe) {
throw new WrappedIOException("IOException (reset to start perhaps?), was reading document at rank " + i + " of " + resultSize + ", term " + j + " of " + termCount + " docid="+lastDocid, ioe);
} catch (Exception e) {
throw new WrappedIOException("Problem reading document at rank " + i + " of " + resultSize + ", term " + j + " of " + termCount + " docid="+lastDocid, e);
}
}
protected static void readFieldsV5(FatResultSet frs, DataInput in, boolean v6)
throws IOException {
readFieldsV6(frs, in, false);
}
protected static void readFieldsV6(FatResultSet frs, DataInput in, boolean v6)
throws IOException
{
int i =-1;
int resultSize = -1;
int j = -1;
int termCount = -1;
int lastDocid = -1;
try{
CollectionStatistics collStats = new CollectionStatistics();
if (v6)
collStats.readFields(in);
else
collStats.readFieldsV5(in);
frs.setCollectionStatistics(collStats);
final int fieldCount = collStats.getNumberOfFields();
//read number of query terms
termCount = in.readInt();
if (termCount == 0)
{
frs.setResultSize(0);
final int[] docids = new int[0];
final double[] scores = new double[0];
final short[] occurrences = new short[0];
@SuppressWarnings("unchecked")
final Set[] tags = new Set[0];
final WritablePosting[][] postings = new WritablePosting[0][];
frs.setScores(scores);
frs.setDocids(docids);
frs.setPostings(postings);
frs.setOccurrences(occurrences);
frs.setTags(tags);
frs.setEntryStatistics(new EntryStatistics[0]);
frs.setKeyFrequencies(new double[0]);
frs.setQueryTerms(new String[0]);
logger.warn("No found terms for this query");
return;
}
@SuppressWarnings("unchecked")
Class extends WritablePosting> postingClass[] = new Class[termCount];
//read terms and entry statistics
final EntryStatistics[] entryStats = new EntryStatistics[termCount];
final String[] queryTerms = new String[termCount];
@SuppressWarnings("unchecked")
final Set[] tags = new Set[termCount];
final double[] keyFrequencies = new double[termCount];
final boolean[] fields = new boolean[termCount];
final boolean[] blocks = new boolean[termCount];
for(j=0;j();
for(int ti=0;ti statisticsClass = ApplicationSetup.getClass(in.readUTF()).asSubclass(EntryStatistics.class);
keyFrequencies[j] = in.readDouble();
logger.debug(queryTerms[j] + " f=" +fields[j] + " b="+blocks[j] +" postings="+postingClass[j] +
" es="+statisticsClass.getSimpleName() /*+
" es.isAssignableFrom(FieldEntryStatistics.class)="+statisticsClass.isAssignableFrom(FieldEntryStatistics.class) +
" FieldEntryStatistics.class.isAssignableFrom(es)="+FieldEntryStatistics.class.isAssignableFrom(statisticsClass)*/);
EntryStatistics le = fields[j] || /* HACK */ FieldEntryStatistics.class.isAssignableFrom(statisticsClass)
? statisticsClass.getConstructor(Integer.TYPE).newInstance(fieldCount)
: statisticsClass.newInstance();
((Writable)le).readFields(in);
if (queryTerms[j].contains("#uw") || queryTerms[j].contains("#1"))
{
if (queryTerms[j].contains("#uw12")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(12);
}else if (queryTerms[j].contains("#uw8")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(8);
}else if (queryTerms[j].contains("#uw4")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(8);
}else if (queryTerms[j].contains("#1")){
le = new SimpleNgramEntryStatistics(le);
((SimpleNgramEntryStatistics)le).setWindowSize(2);
}
}
entryStats[j] = le;
}
frs.setEntryStatistics(entryStats);
frs.setKeyFrequencies(keyFrequencies);
frs.setQueryTerms(queryTerms);
//read the number of documents
resultSize = in.readInt();
//size the arrays
final int[] docids = new int[resultSize];
final double[] scores = new double[resultSize];
final short[] occurrences = new short[resultSize];
final WritablePosting[][] postings = new WritablePosting[resultSize][];
//for each document
for (i = 0; i < resultSize; i++)
{
//read: docid, scores, occurrences
lastDocid = docids[i] = in.readInt();
scores[i] = in.readDouble();
occurrences[i] = in.readShort();
final int docLen = in.readInt();
final int[] fieldLens;
if (fieldCount > 0)
{
fieldLens = new int[fieldCount];
for(int fi=0;fi 0)
{
final int[] fieldLengths = ((FieldPosting)firstPosting).getFieldLengths();
assert fieldLengths.length == fieldCount;
for(int fi=0;fi[] tags = frs.getTags();
final double[] keyFrequency = frs.getKeyFrequencies();
final WritablePosting[][] postings = frs.getPostings();
final int[] docids = frs.getDocids();
final double[] scores = frs.getScores();
final short[] occurrences = frs.getOccurrences();
collStats.write(out);
final int fieldCount = collStats.getNumberOfFields();
final int queryTermCount = queryTerms.length;
final boolean fields[] = new boolean[queryTermCount];
final boolean blocks[] = new boolean[queryTermCount];
assert tags != null;
//write out the number of query terms
out.writeInt(queryTermCount);
if(queryTermCount == 0)
return;
//write out query terms
//write out the entry statistics
for (int i = 0; i < queryTermCount; i++){
out.writeUTF(queryTerms[i]);
out.writeBoolean(tags[i] != null);
if (tags[i] != null)
{
out.writeInt(tags[i].size());
for(String t : tags[i])
out.writeUTF(t);
}
WritablePosting firstPostingForTerm = firstPosting(postings, i);
fields[i] = firstPostingForTerm instanceof FieldPosting;
blocks[i] = firstPostingForTerm instanceof BlockPosting;
out.writeBoolean(fields[i]);
out.writeBoolean(blocks[i]);
//HACK: MultiQueryTerm USED TO cause problems as it can return a FieldEntryStatistics where none is possible.???
//if (! fields[i])
//{
// entryStats[i] = new BasicLexiconEntry(entryStats[i].getTermId(), entryStats[i].getDocumentFrequency(), entryStats[i].getFrequency());
//}
out.writeBoolean(firstPostingForTerm != null);
if (firstPostingForTerm != null)
{
//write out the classes
out.writeUTF(firstPostingForTerm.getClass().getName());
//if we don't have a FieldPosting list, we should not have a FieldEntryStatistics
assert (! (fields[i]) && ! (entryStats[i] instanceof FieldEntryStatistics));
}
out.writeUTF(entryStats[i].getClass().getName());
out.writeDouble(keyFrequency[i]);
((Writable)entryStats[i]).write(out);
}
//write out the number of documents
out.writeInt(docids.length);
int i = 0;
//for each document
long notNullPostings = 0;
for (i = 0; i < docids.length; i ++) {
//write out the docid to out
out.writeInt(docids[i]);
//write out the score
out.writeDouble(scores[i]);
//write out the occurrences
out.writeShort(occurrences[i]);
//write out the document length, and possible field lengths
WritablePosting firstPosting = firstPosting(postings[i]);
assert firstPosting != null : "Docid " + docids[i] + " with score " + scores[i] + " has no matching postings";
out.writeInt(firstPosting.getDocumentLength());
if (fieldCount > 0)
{
final int[] fieldLengths = ((FieldPosting)firstPosting).getFieldLengths();
assert fieldLengths.length == fieldCount;
for(int fi=0;fi statsMap = new HashMap();
final boolean fields = frs.getCollectionStatistics().getNumberOfFields() > 0;
final int fieldCount = frs.getCollectionStatistics().getNumberOfFields();
final boolean blocks = firstPosting(postings) instanceof BlockPosting;
//make maps based on the terms
for (int i=0;i " + i);
}
else
{
//TODO: optimise this by knowing at creation/loading of resultset.
logger.warn("Ignoring term " + queryTerms[i] + " as it has no non-null postings in the FatResultSet");
}
}
for (int di=0;di()
{
@Override
public int compare(WritablePosting[] p1, WritablePosting[] p2) {
final int x = firstPosting(p1).getId();
final int y = firstPosting(p2).getId();
return (x < y) ? -1 : ((x == y) ? 0 : 1);
}
});
final Lexicon lex = new Lexicon() {
@Override
public LexiconEntry getLexiconEntry(String term) {
return (LexiconEntry) statsMap.get(term);
}
@Override
public Entry getLexiconEntry(int termid) {
throw new UnsupportedOperationException();
}
@Override
public Entry getIthLexiconEntry(int index) {
throw new UnsupportedOperationException();
}
@Override
public void close() throws IOException {}
@Override
public Iterator> iterator() {
throw new UnsupportedOperationException();
}
@Override
public int numberOfEntries() {
throw new UnsupportedOperationException();
}
@Override
public Iterator> getLexiconEntryRange(
String from, String to) {
throw new UnsupportedOperationException();
}
};
final PostingIndex inv = new PostingIndex()
{
@Override
public void close() throws IOException {}
@Override
public IterablePosting getPostings(Pointer lEntry)
throws IOException
{
final int term = ((LexiconEntry)lEntry).getTermId();
//System.err.println("read: " + lEntry + " => " + term);
if (blocks && fields)
return new BFIterablePostingFromWritablePostingSlice(postings, term);
else if (fields)
return new FIterablePostingFromWritablePostingSlice(postings, term);
else if (blocks)
return new BIterablePostingFromWritablePostingSlice(postings, term);
else
return new IterablePostingFromWritablePostingSlice(postings, term);
}
};
return new Index(){
@Override
public PostingIndex getInvertedIndex() {
return inv;
}
@Override
public Lexicon getLexicon() {
return lex;
}
@Override
public CollectionStatistics getCollectionStatistics() {
return collStats;
}
@Override
public PostingIndex> getDirectIndex() {
return null;
}
@Override
public DocumentIndex getDocumentIndex() {
return null;
}
@Override
public MetaIndex getMetaIndex() {
return null;
}
@Override
public String toString() {
return this.getClass().getSimpleName();
}
@Override
public IndexRef getIndexRef() {
return makeDirectIndexRef(this);
}
};
}
static class IterablePostingFromWritablePostingSlice extends IterablePostingImpl
{
final WritablePosting[][] postings; //document, term
final int slice;
WritablePosting current;
int index;
public IterablePostingFromWritablePostingSlice(WritablePosting[][] postings, int slice)
{
this.index = -1;
this.postings = postings;
this.slice = slice;
}
@Override
public int next() throws IOException {
index++;
if (index >= postings.length)
return EOL;
current = postings[index][slice];
while(current == null)
{
index++;
if (index >= postings.length)
return EOL;
current = postings[index][slice];
}
return current.getId();
}
@Override
public boolean endOfPostings() {
return index < postings.length;
}
@Override
public int getId() {
return current.getId();
}
@Override
public int getFrequency() {
return current.getFrequency();
}
@Override
public int getDocumentLength() {
return current.getDocumentLength();
}
@Override
public WritablePosting asWritablePosting() {
return current.asWritablePosting();
}
@Override
public void close() throws IOException {}
}
static class BIterablePostingFromWritablePostingSlice extends IterablePostingFromWritablePostingSlice implements BlockPosting
{
public BIterablePostingFromWritablePostingSlice(
WritablePosting[][] postings, int slice) {
super(postings, slice);
}
@Override
public int[] getPositions() {
return ((BlockPosting)current).getPositions();
}
}
static class FIterablePostingFromWritablePostingSlice extends IterablePostingFromWritablePostingSlice implements FieldPosting
{
public FIterablePostingFromWritablePostingSlice(
WritablePosting[][] postings, int slice) {
super(postings, slice);
}
@Override
public int[] getFieldFrequencies() {
return ((FieldPosting)current).getFieldFrequencies();
}
@Override
public int[] getFieldLengths() {
return ((FieldPosting)current).getFieldLengths();
}
@Override
public void setFieldLengths(int[] newLengths) {
((FieldPosting)current).setFieldLengths(newLengths);
}
}
static class BFIterablePostingFromWritablePostingSlice extends BIterablePostingFromWritablePostingSlice implements FieldPosting
{
public BFIterablePostingFromWritablePostingSlice(
WritablePosting[][] postings, int slice) {
super(postings, slice);
}
@Override
public int[] getFieldFrequencies() {
return ((FieldPosting)current).getFieldFrequencies();
}
@Override
public int[] getFieldLengths() {
return ((FieldPosting)current).getFieldLengths();
}
@Override
public void setFieldLengths(int[] newLengths) {
((FieldPosting)current).setFieldLengths(newLengths);
}
/** Makes a human readable form of this posting */
@Override
public String toString()
{
return "(" + getId() + "," + getFrequency() + ",F[" + ArrayUtils.join(getFieldFrequencies(), ",")
+ "],B[" + ArrayUtils.join(getPositions(), ",") + "])";
}
}
protected static WritablePosting firstPosting(WritablePosting[][] postings)
{
for(int i=0;i extends Closeable, Iterator{};
public static Iterator> readFatResultSet(String filename) throws IOException
{
final DataInputStream dis = new DataInputStream(Files.openFileStream(filename));
return new CloseableIterator> () {
boolean more = true;
@Override
public boolean hasNext() {
return more;
}
@Override
public Entry next() {
try{
String qid = dis.readUTF();
FatResultSet frs = new FatCandidateResultSet();
frs.readFields(dis);
return new MapEntry(qid, frs);
} catch (IOException e) {
more = false;
return null;
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public void close() throws IOException {
dis.close();
}
};
}
public static void main(String[] args) throws IOException
{
if (args.length != 2)
{
System.err.println("Usage: " +FatUtils.class.getName() + " {--info|--dump} results.fat.gz");
return;
}
final boolean dump = args[0].equals("--dump");
FatResultSet frs = new FatCandidateResultSet();
DataInputStream dis = new DataInputStream(Files.openFileStream(args[1]));
int queryCount = 0;
while(true)
{
try
{
String qid = dis.readUTF();
System.err.println("Now reading query " + qid);
queryCount++;
frs.readFields(dis);
if (! dump)
System.out.println(qid + " " + getInfo(frs));
else
{
System.out.println("Query " + qid);
dump(frs);
System.out.println();
}
}
catch (EOFException e) {
break;
}
}
System.out.println("Total " + queryCount + " queries");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy