All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.terrier.querying.DependenceModelPreProcess Maven / Gradle / Ivy

The newest version!
/*
 * Terrier - Terabyte Retriever 
 * Webpage: http://terrier.org 
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is DependenceModelPreProcess.java.
 *
 * The Original Code is Copyright (C) 2017-2020 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *  Craig Macdonald
 */
package org.terrier.querying;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import org.terrier.matching.BaseMatching;
import org.terrier.matching.MatchingQueryTerms;
import org.terrier.matching.MatchingQueryTerms.MatchingTerm;
import org.terrier.matching.matchops.Operator;
import org.terrier.matching.matchops.PhraseOp;
import org.terrier.matching.matchops.SingleTermOp;
import org.terrier.matching.matchops.SynonymOp;
import org.terrier.matching.matchops.UnorderedWindowOp;
import org.terrier.matching.models.WeightingModel;
import org.terrier.matching.models.dependence.pBiL;
import org.terrier.querying.parser.Query.QTPBuilder;
import org.terrier.utility.ApplicationSetup;

import com.google.common.collect.Sets;

@ProcessPhaseRequisites(ManagerRequisite.MQT)
public class DependenceModelPreProcess implements MQTRewritingProcess{
	
	static final String DEFAULT_DEPENDENCE_WEIGHTING_MODEL = pBiL.class.getName();
	public static final String CONTROL_MODEL = "dependencemodel";
	public static final String CONTROL_MODEL_PARAM = "dependencemodelparam";
	public static final String DEPENDENCE_TAG = "sdm";
	
	@SuppressWarnings("unchecked")
	static Set> ALLOWED_OP_TYPES = Sets.newHashSet(SingleTermOp.class, SynonymOp.class);
	
	Double param = null;
	String defaultModel;

	public DependenceModelPreProcess() {
		this(DEFAULT_DEPENDENCE_WEIGHTING_MODEL);
	}
	
	public DependenceModelPreProcess(String _defaultModel) {
		defaultModel = _defaultModel;
	} 
	
	protected void initialise(SearchRequest q) {}
	
	@Override
	public void process(Manager manager, SearchRequest q) {
		initialise(q);
		String modelName = q.getControl(CONTROL_MODEL);
		if (modelName == null || modelName.length() == 0)
			modelName = defaultModel;
		
		String paramValue = q.getControl(CONTROL_MODEL_PARAM);
		param = paramValue != null && paramValue.length() > 0 ? Double.parseDouble(paramValue) : null;	
		this.process(((Request)q).getMatchingQueryTerms(), modelName);
	}
	
	WeightingModel getModel(String name, int ngramLength) {
		if (! name.contains("."))
			name = "org.terrier.matching.models.dependence." + name;
		WeightingModel rtr = null;
		try{
			Class clz = ApplicationSetup.getClass(name).asSubclass(WeightingModel.class);
			//TODO: this is a hack - we should have an interface for specific dependence models
			if (clz.getPackage().toString().contains("dependence"))
				rtr = clz.getConstructor(Integer.TYPE).newInstance(ngramLength);
			else
				rtr = clz.newInstance();
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
		if (param != null)
			rtr.setParameter(param);
		return rtr;
	}
	
	public void process(MatchingQueryTerms mqt, String modelName)
	{
		assert mqt != null;
		List queryTerms = new ArrayList<>();
		for(MatchingTerm e : mqt)
		{
			if (! ALLOWED_OP_TYPES.contains( e.getKey().getClass()) )
			{
				continue;
			}
			Operator o = e.getKey().clone();
			if (o instanceof SingleTermOp && ((SingleTermOp)o).getField() != null)
			{
				System.err.println("WARN: The query had fields for op "+o+" but proximity cannot have fields.");
				((SingleTermOp)o).setField(null);
			}
			queryTerms.add(o);
		}
		
		if (queryTerms.size() < 2)
			return;
		
		List newEntries = SD(modelName, queryTerms);
		
		//finally add the new entries
		mqt.addAll(newEntries);
	}

	protected List SD(String modelName, List queryTerms) {
		List newEntries = new ArrayList<>();
		
		//#1
		for(int i=0;i allTerms = queryTerms;
		if (allTerms.size() > 12)
			allTerms = allTerms.subList(0, 11);
		QTPBuilder qtp = QTPBuilder.of(new UnorderedWindowOp(allTerms.toArray(new Operator[allTerms.size()]), 12));
		qtp.setWeight(0.1d);
		qtp.addWeightingModel(getModel(modelName,12));
		qtp.setTag(DEPENDENCE_TAG).setTag(BaseMatching.BASE_MATCHING_TAG);;
		newEntries.add(qtp.build());
		return newEntries;
	}

	public boolean expandQuery(MatchingQueryTerms mqt, Request rq) throws IOException {
		int count = mqt.size();
		this.process(mqt, defaultModel);
		return (mqt.size() != count);
	}

	@Override
	public String getInfo() {
		return this.getClass().getSimpleName();
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy