All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.handler.component.QueryElevationComponent Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.handler.component;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.StringHelper;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.DOMUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.VersionedFile;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.apache.solr.request.SolrQueryRequest;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * A component to elevate some documents to the top of the result set.
 * 
 * @version $Id: QueryElevationComponent.java 949888 2010-05-31 23:24:40Z hossman $
 * @since solr 1.3
 */
public class QueryElevationComponent extends SearchComponent implements SolrCoreAware
{
  private static Logger log = LoggerFactory.getLogger(QueryElevationComponent.class);
  
  // Constants used in solrconfig.xml
  static final String FIELD_TYPE = "queryFieldType";
  static final String CONFIG_FILE = "config-file";
  static final String FORCE_ELEVATION = "forceElevation";
  static final String EXCLUDE = "exclude";
  
  // Runtime param -- should be in common?
  static final String ENABLE = "enableElevation";
    
  private SolrParams initArgs = null;
  private Analyzer analyzer = null;
  private String idField = null;
  boolean forceElevation = false;
  
  // For each IndexReader, keep a query->elevation map
  // When the configuration is loaded from the data directory.
  // The key is null if loaded from the config directory, and
  // is never re-loaded.
  final Map> elevationCache = 
    new WeakHashMap>();

  class ElevationObj {
    final String text;
    final String analyzed;
    final BooleanClause[] exclude;
    final BooleanQuery include;
    final Map priority;
    
    // use singletons so hashCode/equals on Sort will just work
    final FieldComparatorSource comparatorSource;

    ElevationObj( String qstr, List elevate, List exclude ) throws IOException
    {
      this.text = qstr;
      this.analyzed = getAnalyzedQuery( this.text );
      
      this.include = new BooleanQuery();
      this.include.setBoost( 0 );
      this.priority = new HashMap();
      int max = elevate.size()+5;
      for( String id : elevate ) {
        TermQuery tq = new TermQuery( new Term( idField, id ) );
        include.add( tq, BooleanClause.Occur.SHOULD );
        this.priority.put( id, max-- );
      }
      
      if( exclude == null || exclude.isEmpty() ) {
        this.exclude = null;
      }
      else {
        this.exclude = new BooleanClause[exclude.size()];
        for( int i=0; i searchHolder = null;
          try {
            searchHolder = core.getNewestSearcher(false);
            IndexReader reader = searchHolder.get().getReader();
            getElevationMap( reader, core );
          } finally {
            if (searchHolder != null) searchHolder.decref();
          }
        }
      }
    }
    catch( Exception ex ) {
      throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
          "Error initializing QueryElevationComponent.", ex );
    }
  }

  Map getElevationMap( IndexReader reader, SolrCore core ) throws Exception
  {
    synchronized( elevationCache ) {
      Map map = elevationCache.get( null );
      if (map != null) return map;

      map = elevationCache.get( reader );
      if( map == null ) {
        String f = initArgs.get( CONFIG_FILE );
        if( f == null ) {
          throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
                  "QueryElevationComponent must specify argument: "+CONFIG_FILE );
        }
        log.info( "Loading QueryElevation from data dir: "+f );

        InputStream is = VersionedFile.getLatestFile( core.getDataDir(), f );
        Config cfg = new Config( core.getResourceLoader(), f, is, null );
        map = loadElevationMap( cfg );
        elevationCache.put( reader, map );
      }
      return map;
    }
  }
  
  private Map loadElevationMap( Config cfg ) throws IOException
  {
    XPath xpath = XPathFactory.newInstance().newXPath();
    Map map = new HashMap();
    NodeList nodes = (NodeList)cfg.evaluate( "elevate/query", XPathConstants.NODESET );
    for (int i=0; i' child" );
      }

      ArrayList include = new ArrayList();
      ArrayList exclude = new ArrayList();
      for (int j=0; j elev = elevationCache.get( reader );
    if( elev == null ) {
      elev = new HashMap();
      elevationCache.put( reader, elev );
    }
    ElevationObj obj = new ElevationObj( query, Arrays.asList(ids), Arrays.asList(ex) );
    elev.put( obj.analyzed, obj );
  }
  
  String getAnalyzedQuery( String query ) throws IOException
  {
    if( analyzer == null ) {
      return query;
    }
    StringBuilder norm = new StringBuilder();
    TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
    tokens.reset();
    
    Token token = tokens.next();
    while( token != null ) {
      norm.append( new String(token.termBuffer(), 0, token.termLength()) );
      token = tokens.next();
    }
    return norm.toString();
  }

  //---------------------------------------------------------------------------------
  // SearchComponent
  //---------------------------------------------------------------------------------
  
  @Override
  public void prepare(ResponseBuilder rb) throws IOException
  {
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    // A runtime param can skip 
    if( !params.getBool( ENABLE, true ) ) {
      return;
    }

    // A runtime parameter can alter the config value for forceElevation
    boolean force = params.getBool( FORCE_ELEVATION, forceElevation );
    
    Query query = rb.getQuery();
    String qstr = rb.getQueryString();
    if( query == null || qstr == null) {
      return;
    }

    qstr = getAnalyzedQuery(qstr);
    IndexReader reader = req.getSearcher().getReader();
    ElevationObj booster = null;
    try {
      booster = getElevationMap( reader, req.getCore() ).get( qstr );
    }
    catch( Exception ex ) {
      throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
          "Error loading elevation", ex );      
    }
    
    if( booster != null ) {
      // Change the query to insert forced documents
      BooleanQuery newq = new BooleanQuery( true );
      newq.add( query, BooleanClause.Occur.SHOULD );
      newq.add( booster.include, BooleanClause.Occur.SHOULD );
      if( booster.exclude != null ) {
        for( BooleanClause bq : booster.exclude ) {
          newq.add( bq );
        }
      }
      rb.setQuery( newq );
      
      // if the sort is 'score desc' use a custom sorting method to 
      // insert documents in their proper place 
      SortSpec sortSpec = rb.getSortSpec();
      if( sortSpec.getSort() == null ) {
        sortSpec.setSort( new Sort( new SortField[] {
            new SortField(idField, booster.comparatorSource, false ),
            new SortField(null, SortField.SCORE, false)
        }));
      }
      else {
        // Check if the sort is based on score
        boolean modify = false;
        SortField[] current = sortSpec.getSort().getSort();
        ArrayList sorts = new ArrayList( current.length + 1 );
        // Perhaps force it to always sort by score
        if( force && current[0].getType() != SortField.SCORE ) {
          sorts.add( new SortField(idField, booster.comparatorSource, false ) );
          modify = true;
        }
        for( SortField sf : current ) {
          if( sf.getType() == SortField.SCORE ) {
            sorts.add( new SortField(idField, booster.comparatorSource, sf.getReverse() ) );
            modify = true;
          }
          sorts.add( sf );
        }
        if( modify ) {
          sortSpec.setSort( new Sort( sorts.toArray( new SortField[sorts.size()] ) ) );
        }
      }
    }
    
    // Add debugging information
    if( rb.isDebug() ) {
      List match = null;
      if( booster != null ) {
        // Extract the elevated terms into a list
        match = new ArrayList(booster.priority.size());
        for( Object o : booster.include.clauses() ) {
          TermQuery tq = (TermQuery)((BooleanClause)o).getQuery();
          match.add( tq.getTerm().text() );
        }
      }
      
      SimpleOrderedMap dbg = new SimpleOrderedMap();
      dbg.add( "q", qstr );
      dbg.add( "match", match );
      rb.addDebugInfo( "queryBoosting", dbg );
    }
  }

  @Override
  public void process(ResponseBuilder rb) throws IOException {
    // Do nothing -- the real work is modifying the input query
  }
    
  //---------------------------------------------------------------------------------
  // SolrInfoMBean
  //---------------------------------------------------------------------------------

  @Override
  public String getDescription() {
    return "Query Boosting -- boost particular documents for a given query";
  }

  @Override
  public String getVersion() {
    return "$Revision: 949888 $";
  }

  @Override
  public String getSourceId() {
    return "$Id: QueryElevationComponent.java 949888 2010-05-31 23:24:40Z hossman $";
  }

  @Override
  public String getSource() {
    return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.4/src/java/org/apache/solr/handler/component/QueryElevationComponent.java $";
  }

  @Override
  public URL[] getDocs() {
    try {
      return new URL[] {
        new URL("http://wiki.apache.org/solr/QueryElevationComponent")
      };
    } 
    catch (MalformedURLException e) {
      throw new RuntimeException( e );
    }
  }
}

class ElevationComparatorSource extends FieldComparatorSource {
  private final Map priority;

  public ElevationComparatorSource( final Map boosts) {
    this.priority = boosts;
  }

  public FieldComparator newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
    return new FieldComparator() {
      
      FieldCache.StringIndex idIndex;
      private final int[] values = new int[numHits];
      int bottomVal;

      public int compare(int slot1, int slot2) {
        return values[slot2] - values[slot1];  // values will be small enough that there is no overflow concern
      }

      public void setBottom(int slot) {
        bottomVal = values[slot];
      }

      private int docVal(int doc) throws IOException {
        String id = idIndex.lookup[idIndex.order[doc]];
        Integer prio = priority.get(id);
        return prio == null ? 0 : prio.intValue();
      }

      public int compareBottom(int doc) throws IOException {
        return docVal(doc) - bottomVal;
      }

      public void copy(int slot, int doc) throws IOException {
        values[slot] = docVal(doc);
      }

      public void setNextReader(IndexReader reader, int docBase) throws IOException {
        idIndex = FieldCache.DEFAULT.getStringIndex(reader, fieldname);
      }

      public Comparable value(int slot) {
        return values[slot];
      }
    };
  }
}