All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.terrier.matching.MatchingQueryTerms Maven / Gradle / Ivy

The newest version!
/*
 * Terrier - Terabyte Retriever 
 * Webpage: http://terrier.org 
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is MatchingQueryTerms.java.
 *
 * The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *   Vassilis Plachouras  (original author)
 *   Craig Macdonald 
 */
package org.terrier.matching;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;

import com.google.common.collect.Lists;

import org.terrier.matching.MatchingQueryTerms.MatchingTerm;
import org.terrier.matching.dsms.DocumentScoreModifier;
import org.terrier.matching.matchops.Operator;
import org.terrier.matching.matchops.SingleTermOp;
import org.terrier.matching.models.WeightingModel;
import org.terrier.querying.Request;
import org.terrier.querying.parser.Query;
import org.terrier.structures.EntryStatistics;
import org.terrier.structures.collections.MapEntry;
/**
 * Models a query used for matching documents. It is created
 * by creating an instance of this class, and then passing it as
 * an argument to the method obtainQueryTerms of a Query. It contains
 * the query terms, their weights, optionally the corresponding term 
 * identifiers and the associated term score modifiers. It also stores
 * the document score modifiers for the query.  
 * @author Vassilis Plachouras, Craig Macdonald.
  */
public class MatchingQueryTerms 
extends ArrayList 
implements Serializable,Cloneable
{
	
	public static class MatchingTerm 
	extends MapEntry
	implements Map.Entry{

		public MatchingTerm(Operator _key, QueryTermProperties _value) {
			super(_key, _value);
		}
	}

	private static final long serialVersionUID = -9l;
	/** The weight and the modifiers associated with a query term.*/
	public static class QueryTermProperties implements Serializable, Cloneable
	{
		private static final long serialVersionUID = 6327392687128896557L;
		
		/** the index at which this term was inserted */
		int index;
		
		//tri-valued logic: if null, then no requirement; if true then required, 
		//if false, then not required
		public Boolean required = null;

		/** The weight of a query term. This is usually how many times the term occurred
		  * in the query, but sometime may be altered if a weight has been specified on the
		  * query term: eg QueryExpansion will do this, as will manually specifying a weight
		  * on the unparsed query (example term1 term2^3). */
		public double weight;
		
		/** Info about the query term.*/
		public EntryStatistics stats;
		
		/** The term score modifiers associated with a particular query term.*/
		public List termModels = new ArrayList();
		
		public Set tags = new HashSet<>();
		
		public QueryTermProperties(int _index) {
			this.index = _index;
			//weight = 1.0d;
		}

		/** 
		 * An constructor for setting the term code 
		 * of a query term.
		 * @param _stats the statistics of the query term
		 */
		public QueryTermProperties(int _index, EntryStatistics _stats) {
			this(_index);
			stats = _stats;
		}
		
		/** 
		 * A constructor for setting the weight of a term.
		 * @param w double the weight of a query term. 
		 */
		public QueryTermProperties(int _index, double w) {
			this(_index);
			weight = w;
		}
		/**
		 * A constructor for setting a term score modifier for a term.
		 * @param model WeightingModel modifier associated with a query term.
		 */
		public QueryTermProperties(int _index, WeightingModel model) {
			this(_index);
			termModels.add(model);
		}		
		
		/**
		 * A constructor for setting the weight and a 
		 * term score modifier for a term.
		 * @param w double the weight of a query term. 
		 * @param model WeightingModel modifier associated with a query term.
		 */
		public QueryTermProperties(int _index, double w, WeightingModel model) {
			this(_index);
			weight = w;
			termModels.add(model);
		}
		
		/** 
		 * A constructor for setting the weight of a term
		 * and its term code.
		 * @param w double the weight of a query term. 
		 * @param _stats statistics of the query term
		 */
		public QueryTermProperties(int _index, double w, EntryStatistics _stats) {
			this(_index);
			weight = w;
			stats = _stats;
		}
		
		/**
		 * A constructor for setting a term score modifier for a term 
		 * and its term code.
		 * @param model WeightingModel modifier associated with a query term.
		 * @param _stats statistics of the query term
		 */
		public QueryTermProperties(int _index, WeightingModel model, EntryStatistics _stats) {
			this(_index);
			termModels.add(model);
			stats = _stats;
		}
		
		/**
		 * A constructor for setting a weight, a term score modifier 
		 * and the term code for a query term.
		 * @param w double the weight of a query term.
		 * @param model WeightingModel modifier associated with a query term.
		 * @param _stats statistics of the query term
		 */
		public QueryTermProperties(int _index, double w, WeightingModel model, EntryStatistics _stats) {
			this(_index);
			weight = w;
			termModels.add(model);
			stats = _stats;
		}

		public double getWeight() {
			return weight;
		}

		public Set getTags() {
			return tags;
		}
		
		public Boolean getRequired() {
			return required;
		}

		public void setWeight(double weight) {
			this.weight = weight;
		}

		public void setTag(String tag) {
			this.tags.add(tag);
		}
		
		public void setRequired(boolean b) {
			required = b;
		}

		@Override
		public QueryTermProperties clone()
		{
			QueryTermProperties newO = new QueryTermProperties(index, weight, stats);
			for (WeightingModel model : termModels)
				newO.termModels.add((WeightingModel)(model.clone()));
			newO.tags = new HashSet<>(tags);
			newO.required = required;
			return newO;
		}

		@Override
		public int hashCode()
		{
			int hashCodeValue = stats.hashCode();
			hashCodeValue += (new Double(weight)).hashCode();
			for (WeightingModel model : termModels)
			{
				hashCodeValue += model.hashCode();
			}
			return hashCodeValue;
		}
		
		@Override
		public String toString()
		{
			return "{ req=" + this.required + ", w=" + this.weight + ", stats=" + this.stats + ", models=" + this.termModels.toString() + " tags="+tags+"}";
		}

		@Override
		public boolean equals(Object _o) {
			if (! (_o instanceof QueryTermProperties))
				return false;
			QueryTermProperties o = (QueryTermProperties) _o;
			//we only check requirements, weighting models and tags
			return  (o.getRequired() == this.getRequired()) 
					&& o.termModels.equals(this.termModels)
					&& o.tags.equals(this.tags);

		}

		
		
	}
	
	static class StringQueryTermPropertiesByIndexComparator implements Comparator>, Serializable
	{
		private static final long serialVersionUID = 1L;
		public int compare(Entry o1, Entry o2)
		{
			return o1.getValue().index - o2.getValue().index;
		}		
	}
	static final Comparator> BY_INDEX_COMPARATOR = new StringQueryTermPropertiesByIndexComparator();
	
	
	/** The query ID, if provided */
	protected String queryId = null;
	
	protected Request rq = null;

	/** number of documents to request matching provides. -1 means default. */
	protected int numDocsRequest = -1;
	
	protected Set defaultTags = new HashSet<>(Arrays.asList(BaseMatching.BASE_MATCHING_TAG));
		
	/** default weighting model for all terms */
	protected WeightingModel defaultWeightingModel;
	
	/** 
	 * The document score modifiers associated with the query terms.
	 * It should contain the phrase score modifiers for example.
	 */
	protected ArrayList docScoreModifiers = new ArrayList();
	
	/** The original query as it came from the parser, in case any TSMs or DSMs
	 * wish to refer to it
	 */
	protected Query query; 

	public MatchingQueryTerms(Collection coll)
	{
		super(coll);
	}
	
	/** Generate a MatchingQueryTerms object. Query id will be null. */
	public MatchingQueryTerms()
	{}

	/** Generate a MatchingQueryTerms object, with the specified query id.
	  * @param qid A string representation of the query id */
	public MatchingQueryTerms(String qid)
	{
		queryId = qid;
	}
	/** Generate a MatchingQueryTerms object, with the specified query id, and request
	  * @param qid A string representation of the query id
	  * @param _rq A request for matching */
	public MatchingQueryTerms(String qid, Request _rq)
	{
		queryId = qid;
		this.rq = _rq;
	}
	/** Returns the request.
	  * @return Request*/
	public Request getRequest()
	{
		return this.rq;
	}

	/** Sets the request. */
	public void setRequest(Request r)
	{
		this.rq = r;
	}

	public int getMatchingRequestSize() {
		return numDocsRequest;
	}

	public void setMatchingRequestSize(int numDocs) {
		numDocsRequest = numDocs;
	}
	
	/**
	 * Adds a document score modifier for the query.
	 * @param dsm DocumentScoreModifier a document score modifier for 
	 *        the query.
	 */
	public void addDocumentScoreModifier(DocumentScoreModifier dsm) {
		docScoreModifiers.add(dsm);
	}
	
	/**
	 * Returns the document score modifiers used for the query.
	 * @return DocumentScoreModifiers[] an array of the registered
	 *         document score modifiers for the query. If there are 
	 *         no document score modifiers, then it returns null.
	 */
	public DocumentScoreModifier[] getDocumentScoreModifiers() {
		if (docScoreModifiers.size()>0)
			return docScoreModifiers.toArray(tmpDSM);
		return null;
	}
	
	/** Allows the manager to set the query that was used to
	 * query the system.
	 * @param q The Query, duh
	 */
	public void setQuery(Query q)
	{
		query = q;
	}

	/** Returns guess what?
	 * @return the query
	 */
	public Query getQuery()
	{
		return query;
	}

	/** Returns the query id specified when constructing this object.
	  * @return String query id, or null if none was specified. */
	public String getQueryId()
	{
		return queryId;
	}

	/** Sets the query id */
	public void setQueryId(String newId)
	{
		queryId = newId;
	}
	
	
	
	/**
	 * This method normalises the term weights by dividing each term weight
	 * by the maximum of the terms.
	 */
	public void normaliseTermWeights(){
		// obtain the maximum term weight
		double maxWeight = 0d;
		
		for(Map.Entry e : this)
		{
			maxWeight = Math.max(maxWeight, e.getValue().weight);
		}
		// normalise
		for(Map.Entry e : this)
		{
			e.getValue().weight /= maxWeight;
		}

	}
	
	/**
	 * This method resets query term statistics to allow for a single instance
	 * of MatchingQueryTerms to be reused for matching against different indices.
	 */
	public void resetTermProperties() {
		for(Map.Entry e : this) {
			e.setValue(null);
		}
	}
	
	
	public void setTermProperty(Operator term, EntryStatistics e) {
		QueryTermProperties properties = (QueryTermProperties)this.get(term);
		if (properties == null) {
			this.add( new MatchingTerm(term, properties = new QueryTermProperties(0, e)));
			properties.tags.addAll(this.defaultTags);
		} else {
			properties.stats = e;
		}
	}
	
	/**
	 * Sets a term's statistics for the given query term.
	 * @param term String the term for which to override the statitics
	 * @param e EntryStatistics the term score modifier to apply for the given term.
	 */
	public void setTermProperty(String term, EntryStatistics e) {
		setTermProperty(new SingleTermOp(term), e);
	}
	
	/**
	 * Sets a term score modifier for the given query term.
	 * @param term String the term for which to add a term score modifier.
	 * @param tsm TermScoreModifier the term score modifier to apply for the given term.
	 */
	public void setTermProperty(Operator term, WeightingModel tsm) {
		QueryTermProperties properties = (QueryTermProperties)this.get(term);
		if (properties == null) {
		//	this.put(term, new QueryTermProperties(termAdditionIndex++, tsm));
		} else {
			properties.termModels.add(tsm);
		}
	}
	
	/**
	 * Sets the weight and a term score modifier for the given query term.
	 * @param term String the term for which we set the properties.
	 * @param weight int the weight of the query term.
	 * @param tsm TermScoreModifier the term score modifier applied for the query term.
	 */
	public void setTermProperty(Operator term, double weight, WeightingModel tsm) {
		QueryTermProperties properties = (QueryTermProperties)this.get(term);
		if (properties == null) {
			
			this.add(new MatchingTerm(term, properties = new QueryTermProperties(0 /*termAdditionIndex++*/, weight, tsm)));
			properties.tags.addAll(this.defaultTags);
		} else {
			//properties.weight = weight;
			//TODO adjust the weights?
			properties.termModels.add(tsm);
		}
	}
	
	protected QueryTermProperties combine(QueryTermProperties existingProps, QueryTermProperties newProps) {
		
		if (existingProps.required != newProps.required)
			return null;
		if (! existingProps.termModels.equals(newProps.termModels))
			return null;
		
		existingProps.weight += newProps.weight;
		return existingProps;
	}
	
	@Override
	public boolean add(MatchingTerm e) {
		QueryTermProperties existing = this.get(e.getKey());
		if (existing != null && combine(existing, e.getValue()) != null)
		{
			//do nothing, we have combined
			return true;
		}
		return super.add(e);
		
	}

	private boolean superAdd(MatchingTerm e) {
		return super.add(e);
	}

	/**
	 * Returns the associated weight of the given query term.
	 * @param term String the query term for which the weight is returned.
	 * @return double the weight of the given query term. If the term is not part
	 *         of the query, then it returns 0.
	 */
	public double getTermWeight(Operator term) {
		final QueryTermProperties tp = this.get(term);
		if (tp!=null)
			return tp.weight;
		return 0.0d;
	}
	
	public double getTermWeight(String term) {
		Map.Entry ee = this.get(term);
		if (ee != null)
			return ee.getValue().weight;
		return 0d;
	}
	
	/**
	 * Returns the associated weights of the given query terms.
	 * @return double The weights of the given terms in a double array.
	 */
	public double[] getTermWeights(){
		double[] tws = new double[this.size()];
		int i=0;
		for(Map.Entry e : this)
			tws[i++] = e.getValue().weight;
		return tws;
	}
	
	/**
	 * Returns the assocciated code of the given query term.
	 * @param term String the query term for which the weight is returned.
	 * @return EntryStatistics the statistics of the term, or null if the
	 * term does not appear in the query.
	 */
	public EntryStatistics getStatistics(Operator term) {
		final QueryTermProperties tp = this.get(term);
		if (tp == null)
			return null;
		return tp.stats;
	}
	
	public EntryStatistics getStatistics(String term) {
		final Map.Entry ee = this.get(term);
		if (ee == null)
			return null;
		return ee.getValue().stats;
	}
	
	public QueryTermProperties get(Operator term) {
		//TODO: this is slow
		for( Map.Entry e : this)
		{
			if (e.getKey().equals(term))
				return e.getValue();
		}
		return null;
	}
	
	public Map.Entry get(String singleTerm) {
		//TODO: this is slow
		for( Map.Entry e : this)
		{
			if (e.getKey().toString().equals(singleTerm))
				return e;
			//if (e.getKey() instanceof SingleQueryTerm)
			//	if (((SingleQueryTerm)e.getKey()).toString().equals(singleTerm) )
			//		return e;
		}
		return null;
	}
	
	
	public void setTermProperty(String term, double weight) {
		Map.Entry e = get(term);
		if (e == null)
		{
			QueryTermProperties ev = new QueryTermProperties(0, weight);
			ev.tags.addAll(this.defaultTags);
			this.add(new MatchingTerm(new SingleTermOp(term), ev));
		}
		else
			e.getValue().weight = weight;
	}
	
	
	
	/** Returns the query terms, as they were added to this object. 
	 * @return Query terms in order that they were added to the query. Empty array if object has no query terms added.
	 */
	public Operator[] getMatchingTerms() {
		
		List l = Lists.newArrayList();
		for( Map.Entry e : this)
		{
			l.add(e.getKey());
		}
		return l.toArray(new Operator[l.size()]);
	}
	
	/** Returns the query terms, as they were added to this object. 
	 * @return Query terms in order that they were added to the query. Empty array if object has no query terms added.
	 */
	public String[] getTerms() {
		
		List l = Lists.newArrayList();
		for( Map.Entry e : this)
		{
			l.add(e.getKey().toString());
		}
		return l.toArray(new String[l.size()]);
	}
	
//	public Set getMatchingTags()
//	{
//		return matchOnTags;
//	}
	
	//Set matchOnTags = new HashSet();
	public List getMatchingTerms(String tag)
	{
//		if (matchOnTags.size() == 0)
//			return this;
		return this.stream()
				.filter(kv -> kv.getValue().getTags().contains(tag) )
				.collect(Collectors.toList());
	}
	
	/** 
	* Adds a term to the query with a given weight. If the term already exists,
	* the existing weight is overwritten.
	* @param term String the term to add.
	* @param weight double the weight of the added term.
	*/
	public void setTermProperty(Operator term, double weight) {
		QueryTermProperties properties = this.get(term);
		if (properties == null) {
		//	termProperties.put(term, new QueryTermProperties(termAdditionIndex++, weight));
		} else {
			properties.weight = weight;
		}
	}
	
	/**
	 * Returns the number of unique terms in the query.
	 * @return int the number of unique terms in the query.
	 */
	@Deprecated
	public int length() {
		return this.size();
	}

	/** Performs a deep clone of this object, and all objects it contains. This allows a MQT to be copied,
	  * and changed without affecting the original object. */
	public MatchingQueryTerms clone()
	{
		MatchingQueryTerms newMQT = new MatchingQueryTerms(this.queryId);
		
		//copy queryID, Strings are immutable
		//clone query term properties
		for (Map.Entry e : this)
		{
			newMQT.superAdd(new MatchingTerm(e.getKey().clone(), e.getValue().clone()));
		}
		for (DocumentScoreModifier dsm : docScoreModifiers)
		{
			newMQT.docScoreModifiers.add( (DocumentScoreModifier)(dsm.clone()));
		}
		//clone query
		if (this.query != null)
			newMQT.query = (Query)this.query.clone();
		if (this.defaultWeightingModel != null)
			newMQT.defaultWeightingModel = this.defaultWeightingModel.clone();
		//dont clone to avoid recursive cloning 
		newMQT.rq = this.rq;
		newMQT.queryId = this.queryId;
		return newMQT;
	}
	
	/** Remove a term from the list of terms to be matched
	 * @since 3.6 
	 */
	public boolean removeTerm(Operator term)
	{
		Iterator i = this.iterator();
		boolean rtr = false;
		while(i.hasNext())
		{
			if (i.next().getKey().equals(term))
			{
				rtr = true;
				i.remove();
			}
		}
		return rtr;
	}
	
	/* 
	 * The following attributes are used for creating arrays of the correct type.
	 */
	private static final DocumentScoreModifier[] tmpDSM = new DocumentScoreModifier[0];
	//private static final String[] tmpString = new String[0];
	//private static final WeightingModel[] tmpModels = new WeightingModel[0];
	

//	/** Returns the weighting models to be used for a given term. This will always include the default
//	 * weighting model */
//	public WeightingModel[] getTermWeightingModels(QueryTerm term) {
//		QueryTermProperties qtp = this.get(term);
//		if (qtp == null)
//			return tmpModels;
//		if (qtp.termModels.size() != 0)
//		{
//			final ArrayList n = new ArrayList(qtp.termModels);
//			n.add(0, defaultWeightingModel.clone());
//			return n.toArray(tmpModels);
//		}
//		return new WeightingModel[]{defaultWeightingModel.clone()};
//	}

	/** Set the default weighting model to be used for terms that do NOT have an explicit WeightingModel set. */
	public void setDefaultTermWeightingModel(WeightingModel weightingModel) {
		defaultWeightingModel = weightingModel;
		for(Map.Entry e : this)
		{
			if (e.getValue().termModels.size() == 0)
				e.getValue().termModels.add(weightingModel.clone());
		}
	}

	public void addTermPropertyWeight(String term, double d) {
		double w = getTermWeight(term);
		setTermProperty(term, d+w);
	}
	
	@Override
	public String toString()
	{
		StringBuilder s = new StringBuilder();
		for(Map.Entry e : this)
		{
			s.append(e.getKey().toString());
			s.append(' ');
			s.append(e.getValue().toString());
			s.append(' ');
		}
		return s.toString();		
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy