
org.terrier.matching.FatFeaturedScoringMatching Maven / Gradle / Ivy
/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is FatFeaturedScoringMatching.java.
*
* The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
* Craig Macdonald
*/
package org.terrier.matching;
import java.io.IOException;
import java.util.Arrays;
import org.terrier.learning.FeaturedQueryResultSet;
import org.terrier.learning.FeaturedResultSet;
import org.terrier.matching.models.WeightingModel;
import org.terrier.structures.Index;
import org.terrier.structures.postings.BlockFieldPostingImpl;
import org.terrier.structures.postings.FieldPosting;
import org.terrier.structures.postings.WritablePosting;
/** Makes a {@link FeaturedResultSet} by applying a list of features. The input from a parent matching class is a {@link FatResultSet}.
*
* Feature names have a particular format:
*
* - WMODEL: defines a weighting model for all matching query terms (or other operator), i.e. a query dependent feature.
* - WMODELt: defines a weighting model for all matching single terms, i.e. a query dependent feature.
* - WMODELp1: defines a weighting model for all matching #1 proximity ops (query dependent).
* - WMODELuw8: defines a weighting model for all matching #uw8 proxity op (query dependent).
* - WMODEL$tag: defines a weighting model for all the matching op tagged "tag" - see #tag() in the MatchOp ql (query dependent).
* - QI: defined a weighting model called once for each matching document, i.e. a query independent feature.
* - DSM: applies a document score modifier.
* - SAMPLE the scoring method used by the parent {@link Matching} class becomes a feature.
*
*
*
*
* Properties:
*
* - fat.featured.scoring.matching.features - a semicolon delimited list of features OR the word FILE
* to load the feature list from a file.
* - fat.featured.scoring.matching.features.file - a filename containing a newline delimited list of feature.
*
*
* See also: "About Learning Models with Multiple Query Dependent Features. Craig Macdonald, Rodrygo L.T. Santos, Iadh Ounis and Ben He. Transactions on Information Systems. 31(3). 2013. [PDF]
* @author Craig Macdonald
* @since 4.0
*
*/
public class FatFeaturedScoringMatching extends FeaturedScoringMatching {
public FatFeaturedScoringMatching(Index _index, Matching _parent, String[] _featureNames) throws Exception
{
super(_index, _parent, _featureNames, FatScoringMatching.class);
}
public FatFeaturedScoringMatching(Index _index, Matching _parent) throws Exception
{
super(_index, _parent, FatScoringMatching.class);
}
public ResultSet doMatch(String queryNumber, MatchingQueryTerms queryTerms, final ResultSet res, boolean keepInputScores)
throws IOException
{
final FatResultSet fat = (FatResultSet)res;
final int numFields = fat.getCollectionStatistics().getNumberOfFields();
final int numResults = fat.getResultSize();
final FeaturedQueryResultSet rtr = new FeaturedQueryResultSet(fat);
int featureCount = 0;
if (fat.getResultSize() == 0)
{
rtr.scores = new double[0];
rtr.docids = new int[0];
rtr.occurrences = new short[0];
return rtr;
}
if (sampleFeature)
{
rtr.putFeatureScores("SAMPLE", fat.getScores());
featureCount++;
}
//for each WMODEL feature
for(int fid=0;fid 0)
{
//order is as per fat resultset
WritablePosting[][] postings = fat.getPostings();
int[] docids = fat.getDocids();
for(int fid=0;fid 0)
{
final Index fatIndex = FatUtils.makeIndex(fat);
final MatchingQueryTerms mqtLocal = queryTerms.clone(); //new MatchingQueryTerms(queryNumber);
mqtLocal.setDefaultTermWeightingModel(queryTerms.defaultWeightingModel);
mqtLocal.clear();
int ti = 0;
for(String t : fat.getQueryTerms())
{
mqtLocal.setTermProperty(t, fat.getKeyFrequencies()[ti]);
mqtLocal.setTermProperty(t, fat.getEntryStatistics()[ti]);
ti++;
}
featureCount += applyDSMs(fatIndex, queryNumber, mqtLocal, numResults, fat.getDocids(), fat.getOccurrences(), rtr);
}
if (keepInputScores) {
System.arraycopy(fat.getScores(), 0, rtr.getScores(), 0, fat.getResultSize());
}
//labels
final String[] labels = new String[rtr.getResultSize()];
Arrays.fill(labels, "-1");
rtr.setLabels(labels);
//metadata
if (fat.hasMetaItems("docno"))
{
rtr.addMetaItems("docno", fat.getMetaItems("docno"));
}
if (fat.hasMetaItems("label"))
rtr.setLabels(fat.getMetaItems("label"));
logger.info("Finished decorating " + queryNumber + " with " + featureCount + " features");
return rtr;
}
@Override
public ResultSet match(String queryNumber, MatchingQueryTerms queryTerms)
throws IOException
{
final FatResultSet fat = (FatResultSet) parent.match(queryNumber, queryTerms);
if (fat == null)
{
logger.warn("I got NO ResultSet from parent " + parent.getInfo() );
return new FeaturedQueryResultSet(0);
}
return doMatch(queryNumber, queryTerms, fat, true);
}
}