
org.terrier.matching.FatScoringMatching Maven / Gradle / Ivy
The newest version!
/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is FatScoringMatching.java.
*
* The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
* Craig Macdonald
*/
package org.terrier.matching;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import java.util.function.Predicate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.tuple.Pair;
import org.terrier.matching.dsms.DocumentScoreModifier;
import org.terrier.matching.models.WeightingModel;
import org.terrier.matching.models.WeightingModelFactory;
import org.terrier.structures.CollectionStatistics;
import org.terrier.structures.EntryStatistics;
import org.terrier.structures.Index;
import org.terrier.structures.postings.FieldPosting;
import org.terrier.structures.postings.WritablePosting;
import org.terrier.utility.ApplicationSetup;
/** Scores a FatResultSet into a normal ResultSet for a given weighting model
* @since 4.0
* @author Craig Macdonald
*/
public class FatScoringMatching extends AbstractScoringMatching {
protected static final Logger logger = LoggerFactory.getLogger(FatScoringMatching.class);
/** check for weighting models giving NaN scores */
static final boolean DEBUG = true;
protected static final boolean SCORE_ONLY_FROM_MQT = Boolean.parseBoolean(ApplicationSetup.getProperty("fat.scoring.only.mqt", "false"));
public FatScoringMatching(Index _index, Matching _parent, WeightingModel _wm, Predicate>> _filter)
{
super(_index, _parent, _wm, _filter);
}
public FatScoringMatching(Index _index, Matching _parent, WeightingModel _wm)
{
super(_index, _parent, _wm);
}
public FatScoringMatching(Index _index, Matching _parent)
{
super(_index, _parent, ApplicationSetup.getProperty("fat.scoring.matching.model", ApplicationSetup.getProperty("trec.model", "BM25")).equals("FromMQT")
? null
: WeightingModelFactory.newInstance(
ApplicationSetup.getProperty("fat.scoring.matching.model",
ApplicationSetup.getProperty("trec.model", "BM25"))
)
);
}
@Override
public String getInfo() {
return "FatScoringMatching";
}
protected static boolean containsFieldPostings(WritablePosting[][] postings)
{
boolean _fields = false;
CHECKFIELDS: for(int di=0;di 0;
final WritablePosting p = postings[di][ti];
final double s = wms[ti].score(p);
if (logger.isDebugEnabled() && (Double.isNaN(s) || Double.isInfinite(s)))
{
logger.debug(wms[ti].getInfo() + " was "+s+": posting=(" + p.toString() + ") for term " + ti + " ks=" + keyFreqs[ti] + " es="+ entryStats[ti] + " l=" + p.getDocumentLength());
if (p instanceof FieldPosting)
logger.debug("lf="+ Arrays.toString(((FieldPosting)p).getFieldLengths()));
}
score += s;
}
}
scores[di] = score;
if (score > 0.0d)
gt0++;
}
logger.info("Rescoring found " + gt0 + " docs with +ve score using " + wm.getInfo());
//make a new resultset
ResultSet outputRS = new QueryResultSet(docids, scores, occurs);
if (fInputRS.hasMetaItems("docno"))
outputRS.addMetaItems("docno", fInputRS.getMetaItems("docno"));
if (sort)
outputRS.sort(numDocs);
int numOfDocModifiers = documentModifiers.size();
int NumberOfQueryDSMs = 0;
DocumentScoreModifier[] dsms = queryTerms.getDocumentScoreModifiers();
if (dsms != null)
NumberOfQueryDSMs = dsms.length;
for (int t = NumberOfQueryDSMs-1; t >= 0; t--) {
if (dsms[t].modifyScores(index, queryTerms, outputRS) && sort)
outputRS.sort();
}
/*application dependent modification of scores
of documents for a query, based on a static set by the client code
sorting the result set after applying each DSM*/
for (int t = 0; t < numOfDocModifiers; t++) {
if (documentModifiers.get(t).modifyScores(index, queryTerms, outputRS) && sort)
outputRS.sort();
}
return outputRS;
}
@Override
public ResultSet match(String queryNumber, MatchingQueryTerms queryTerms)
throws IOException
{
return doMatch(queryNumber, queryTerms, parent.match(queryNumber, queryTerms));
}
@Override
public void setCollectionStatistics(CollectionStatistics cs) {
throw new UnsupportedOperationException();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy