org.apache.solr.update.processor.RegexpBoostProcessor Maven / Gradle / Ivy
Show all versions of solr-core Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A processor which will match content of "inputField" against regular expressions found in
* "boostFilename", and if it matches will return the corresponding boost value from the file and
* output this to "boostField" as a double value. If more than one pattern matches, the boosts from
* each are multiplied.
*
* A typical use case may be to match a URL against patterns to boost or deboost web documents
* based on the URL itself:
*
*
* # Format of each line: <pattern><TAB><boost>
* # Example:
* https?://my.domain.com/temp.* 0.2
*
*
* Both inputField, boostField and boostFilename are mandatory parameters.
*/
public class RegexpBoostProcessor extends UpdateRequestProcessor {
protected static final String INPUT_FIELD_PARAM = "inputField";
protected static final String BOOST_FIELD_PARAM = "boostField";
protected static final String BOOST_FILENAME_PARAM = "boostFilename";
private static final String DEFAULT_INPUT_FIELDNAME = "url";
private static final String DEFAULT_BOOST_FIELDNAME = "urlboost";
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private boolean enabled = true;
private String inputFieldname = DEFAULT_INPUT_FIELDNAME;
private String boostFieldname = DEFAULT_BOOST_FIELDNAME;
private String boostFilename;
private List boostEntries = new ArrayList<>();
private static final String BOOST_ENTRIES_CACHE_KEY = "boost-entries";
RegexpBoostProcessor(
SolrParams parameters,
SolrQueryRequest request,
SolrQueryResponse response,
UpdateRequestProcessor nextProcessor,
final Map