All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.terrier.matching.FatScoringMatching Maven / Gradle / Ivy

The newest version!
/*
 * Terrier - Terabyte Retriever 
 * Webpage: http://terrier.org 
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is FatScoringMatching.java.
 *
 * The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *   Craig Macdonald 
 */

package org.terrier.matching;

import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import java.util.function.Predicate;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.lang3.tuple.Pair;
import org.terrier.matching.dsms.DocumentScoreModifier;
import org.terrier.matching.models.WeightingModel;
import org.terrier.matching.models.WeightingModelFactory;
import org.terrier.structures.CollectionStatistics;
import org.terrier.structures.EntryStatistics;
import org.terrier.structures.Index;
import org.terrier.structures.postings.FieldPosting;
import org.terrier.structures.postings.WritablePosting;
import org.terrier.utility.ApplicationSetup;
/** Scores a FatResultSet into a normal ResultSet for a given weighting model
 * @since 4.0
 * @author Craig Macdonald
 */ 
public class FatScoringMatching extends AbstractScoringMatching {
	
	protected static final Logger logger = LoggerFactory.getLogger(FatScoringMatching.class);

	/** check for weighting models giving NaN scores */
	static final boolean DEBUG = true;
	
	protected static final boolean SCORE_ONLY_FROM_MQT = Boolean.parseBoolean(ApplicationSetup.getProperty("fat.scoring.only.mqt", "false"));
	
	public FatScoringMatching(Index _index, Matching _parent, WeightingModel _wm, Predicate>> _filter)
	{
		super(_index, _parent, _wm, _filter);
	}
	
	public FatScoringMatching(Index _index, Matching _parent, WeightingModel _wm)
	{
		super(_index, _parent, _wm);
	}
	
	public FatScoringMatching(Index _index, Matching _parent)
	{
		super(_index, _parent, ApplicationSetup.getProperty("fat.scoring.matching.model", ApplicationSetup.getProperty("trec.model", "BM25")).equals("FromMQT")
				? null
				: WeightingModelFactory.newInstance(
						ApplicationSetup.getProperty("fat.scoring.matching.model", 
						ApplicationSetup.getProperty("trec.model", "BM25"))
				)
			);
	}
	
	@Override
	public String getInfo() {
		return "FatScoringMatching";
	}

	protected static boolean containsFieldPostings(WritablePosting[][] postings)
	{
		boolean _fields = false;
		CHECKFIELDS: for(int di=0;di 0;
					final WritablePosting p = postings[di][ti];
					final double s = wms[ti].score(p);
					if (logger.isDebugEnabled() && (Double.isNaN(s) || Double.isInfinite(s)))
					{
						logger.debug(wms[ti].getInfo() + " was "+s+": posting=(" +  p.toString() + ") for term " + ti + " ks=" + keyFreqs[ti] + " es="+ entryStats[ti] + " l=" + p.getDocumentLength());
						if (p instanceof FieldPosting)
							logger.debug("lf="+ Arrays.toString(((FieldPosting)p).getFieldLengths()));
					}
					score += s;
				}
			}
			scores[di] = score;
			if (score > 0.0d)
				gt0++;
		}
		logger.info("Rescoring found " + gt0 + " docs with +ve score using " + wm.getInfo());
		//make a new resultset
		ResultSet outputRS = new QueryResultSet(docids, scores, occurs);
		if (fInputRS.hasMetaItems("docno"))
			outputRS.addMetaItems("docno", fInputRS.getMetaItems("docno"));
		if (sort)
			outputRS.sort(numDocs);
		int numOfDocModifiers = documentModifiers.size();
		int NumberOfQueryDSMs = 0;
		DocumentScoreModifier[] dsms = queryTerms.getDocumentScoreModifiers();
		if (dsms != null)
			NumberOfQueryDSMs = dsms.length;

		for (int t = NumberOfQueryDSMs-1; t >= 0; t--) {
			if (dsms[t].modifyScores(index, queryTerms, outputRS) && sort)
				outputRS.sort();
		}
		
		/*application dependent modification of scores
		of documents for a query, based on a static set by the client code
		sorting the result set after applying each DSM*/
		for (int t = 0; t < numOfDocModifiers; t++) {
			if (documentModifiers.get(t).modifyScores(index, queryTerms, outputRS) && sort)
				outputRS.sort();
		}
		return outputRS;
	}
	
	@Override
	public ResultSet match(String queryNumber, MatchingQueryTerms queryTerms)
		throws IOException 
	{
		return doMatch(queryNumber, queryTerms, parent.match(queryNumber, queryTerms));
	}

	@Override
	public void setCollectionStatistics(CollectionStatistics cs) {
		throw new UnsupportedOperationException();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy