org.apache.solr.update.processor.CloneFieldUpdateProcessorFactory Maven / Gradle / Ivy
Show all versions of solr-core Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.FieldMutatingUpdateProcessor.FieldNameSelector;
import org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory.SelectorParams;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Clones the values found in any matching source
field into
* a configured dest
field.
*
* The source
field(s) can be configured as either:
*
*
* - One or more
<str>
* - An
<arr>
of <str>
* - A
<lst>
containing {@link FieldMutatingUpdateProcessorFactory FieldMutatingUpdateProcessorFactory style selector arguments}
*
*
* The dest
field can be a single <str>
* containing the literal name of a destination field, or it may be a <lst>
specifying a
* regex pattern
and a replacement
string. If the pattern + replacement option
* is used the pattern will be matched against all fields matched by the source selector, and the replacement
* string (including any capture groups specified from the pattern) will be evaluated a using
* {@link Matcher#replaceAll(String)} to generate the literal name of the destination field.
*
*
* If the resolved dest
field already exists in the document, then the
* values from the source
fields will be added to it. The
* "boost" value associated with the dest
will not be changed,
* and any boost specified on the source
fields will be ignored.
* (If the dest
field did not exist prior to this processor, the
* newly created dest
field will have the default boost of 1.0)
*
*
* In the example below:
*
*
* - The
category
field will be cloned into the category_s
field
* - Both the
authors
and editors
fields will be cloned into the
* contributors
field
*
* - Any field with a name ending in
_price
-- except for
* list_price
-- will be cloned into the all_prices
*
* - Any field name beginning with feat and ending in s (i.e. feats or features)
* will be cloned into a field prefixed with key_ and not ending in s. (i.e. key_feat or key_feature)
*
*
*
*
*
* <updateRequestProcessorChain name="multiple-clones">
* <processor class="solr.CloneFieldUpdateProcessorFactory">
* <str name="source">category</str>
* <str name="dest">category_s</str>
* </processor>
* <processor class="solr.CloneFieldUpdateProcessorFactory">
* <arr name="source">
* <str>authors</str>
* <str>editors</str>
* </arr>
* <str name="dest">contributors</str>
* </processor>
* <processor class="solr.CloneFieldUpdateProcessorFactory">
* <lst name="source">
* <str name="fieldRegex">.*_price$</str>
* <lst name="exclude">
* <str name="fieldName">list_price</str>
* </lst>
* </lst>
* <str name="dest">all_prices</str>
* </processor>
* <processor class="solr.processor.CloneFieldUpdateProcessorFactory">
* <lst name="source">
* <str name="fieldRegex">^feat(.*)s$</str>
* </lst>
* <lst name="dest">
* <str name="pattern">^feat(.*)s$</str>
* <str name="replacement">key_feat$1</str>
* </str>
* </processor>
* </updateRequestProcessorChain>
*
*
*
* In common case situations where you wish to use a single regular expression as both a
* fieldRegex
selector and a destination pattern
, a "short hand" syntax
* is support for convinience: The pattern
and replacement
may be specified
* at the top level, omitting source
and dest
declarations completely, and
* the pattern
will be used to construct an equivalent source
selector internally.
*
*
* For example, both of the following configurations are equivalent:
*
*
* <!-- full syntax -->
* <processor class="solr.processor.CloneFieldUpdateProcessorFactory">
* <lst name="source">
* <str name="fieldRegex"^gt;$feat(.*)s$</str>
* </lst>
* <lst name="dest">
* <str name="pattern">^feat(.*)s$</str>
* <str name="replacement">key_feat$1</str>
* </str>
* </processor>
*
* <!-- syntactic sugar syntax -->
* <processor class="solr.processor.CloneFieldUpdateProcessorFactory">
* <str name="pattern">^feat(.*)s$</str>
* <str name="replacement">key_feat$1</str>
* </processor>
*
*
*
* When cloning multiple fields (or a single multivalued field) into a single valued field, one of the
* {@link FieldValueSubsetUpdateProcessorFactory} implementations configured after the
* CloneFieldUpdateProcessorFactory
can be useful to reduce the list of values down to a
* single value.
*
*
* @see FieldValueSubsetUpdateProcessorFactory
* @since 4.0.0
*/
public class CloneFieldUpdateProcessorFactory
extends UpdateRequestProcessorFactory implements SolrCoreAware {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String SOURCE_PARAM = "source";
public static final String DEST_PARAM = "dest";
public static final String PATTERN_PARAM = "pattern";
public static final String REPLACEMENT_PARAM = "replacement";
private SelectorParams srcInclusions = new SelectorParams();
private Collection srcExclusions
= new ArrayList<>();
private FieldNameSelector srcSelector = null;
/**
* If pattern is null, this this is a literal field name. If pattern is non-null then this
* is a replacement string that may contain meta-characters (ie: capture group identifiers)
* @see #pattern
*/
private String dest = null;
/** @see #dest */
private Pattern pattern = null;
@SuppressWarnings("WeakerAccess")
protected final FieldNameSelector getSourceSelector() {
if (null != srcSelector) return srcSelector;
throw new SolrException(SERVER_ERROR, "selector was never initialized, "+
" inform(SolrCore) never called???");
}
@SuppressWarnings("unchecked")
@Override
public void init(@SuppressWarnings({"rawtypes"})NamedList args) {
// high level (loose) check for which type of config we have.
//
// individual init methods do more strict syntax checking
if (0 <= args.indexOf(SOURCE_PARAM, 0) && 0 <= args.indexOf(DEST_PARAM, 0) ) {
initSourceSelectorSyntax(args);
} else if (0 <= args.indexOf(PATTERN_PARAM, 0) && 0 <= args.indexOf(REPLACEMENT_PARAM, 0)) {
initSimpleRegexReplacement(args);
} else {
throw new SolrException(SERVER_ERROR, "A combination of either '" + SOURCE_PARAM + "' + '"+
DEST_PARAM + "', or '" + REPLACEMENT_PARAM + "' + '" +
PATTERN_PARAM + "' init params are mandatory");
}
if (0 < args.size()) {
throw new SolrException(SERVER_ERROR,
"Unexpected init param(s): '" +
args.getName(0) + "'");
}
super.init(args);
}
/**
* init helper method that should only be called when we know for certain that both the
* "source" and "dest" init params do not exist.
*/
@SuppressWarnings("unchecked")
private void initSimpleRegexReplacement(@SuppressWarnings({"rawtypes"})NamedList args) {
// The syntactic sugar for the case where there is only one regex pattern for source and the same pattern
// is used for the destination pattern...
//
// pattern != null && replacement != null
//
// ...as top level elements, with no other config options specified
// if we got here we know we had pattern and replacement, now check for the other two so that we can give a better
// message than "unexpected"
if (0 <= args.indexOf(SOURCE_PARAM, 0) || 0 <= args.indexOf(DEST_PARAM, 0) ) {
throw new SolrException(SERVER_ERROR,"Short hand syntax must not be mixed with full syntax. Found " +
PATTERN_PARAM + " and " + REPLACEMENT_PARAM + " but also found " + SOURCE_PARAM + " or " + DEST_PARAM);
}
assert args.indexOf(SOURCE_PARAM, 0) < 0;
Object patt = args.remove(PATTERN_PARAM);
Object replacement = args.remove(REPLACEMENT_PARAM);
if (null == patt || null == replacement) {
throw new SolrException(SERVER_ERROR, "Init params '" + PATTERN_PARAM + "' and '" +
REPLACEMENT_PARAM + "' are both mandatory if '" + SOURCE_PARAM + "' and '"+
DEST_PARAM + "' are not both specified");
}
if (0 != args.size()) {
throw new SolrException(SERVER_ERROR, "Init params '" + REPLACEMENT_PARAM + "' and '" +
PATTERN_PARAM + "' must be children of '" + DEST_PARAM +
"' to be combined with other options.");
}
if (!(replacement instanceof String)) {
throw new SolrException(SERVER_ERROR, "Init param '" + REPLACEMENT_PARAM + "' must be a string (i.e. )");
}
if (!(patt instanceof String)) {
throw new SolrException(SERVER_ERROR, "Init param '" + PATTERN_PARAM + "' must be a string (i.e. )");
}
dest = replacement.toString();
try {
this.pattern = Pattern.compile(patt.toString());
} catch (PatternSyntaxException pe) {
throw new SolrException(SERVER_ERROR, "Init param " + PATTERN_PARAM +
" is not a valid regex pattern: " + patt, pe);
}
srcInclusions = new SelectorParams();
srcInclusions.fieldRegex = Collections.singletonList(this.pattern);
}
/**
* init helper method that should only be called when we know for certain that both the
* "source" and "dest" init params do exist.
*/
@SuppressWarnings("unchecked")
private void initSourceSelectorSyntax(@SuppressWarnings({"rawtypes"})NamedList args) {
// Full and complete syntax where source and dest are mandatory.
//
// source may be a single string or a selector.
// dest may be a single string or list containing pattern and replacement
//
// source != null && dest != null
// if we got here we know we had source and dest, now check for the other two so that we can give a better
// message than "unexpected"
if (0 <= args.indexOf(PATTERN_PARAM, 0) || 0 <= args.indexOf(REPLACEMENT_PARAM, 0) ) {
throw new SolrException(SERVER_ERROR,"Short hand syntax must not be mixed with full syntax. Found " +
SOURCE_PARAM + " and " + DEST_PARAM + " but also found " + PATTERN_PARAM + " or " + REPLACEMENT_PARAM);
}
Object d = args.remove(DEST_PARAM);
assert null != d;
List