All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.update.processor.AddSchemaFieldsUpdateProcessorFactory Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.update.processor;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.ManagedIndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.FieldMutatingUpdateProcessor.FieldNameSelector;
import org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory.SelectorParams;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST;
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
import static org.apache.solr.core.ConfigSetProperties.IMMUTABLE_CONFIGSET_ARG;


/**
 * 

* This processor will dynamically add fields to the schema if an input document contains * one or more fields that don't match any field or dynamic field in the schema. *

*

* By default, this processor selects all fields that don't match a schema field or * dynamic field. The "fieldName" and "fieldRegex" selectors may be specified to further * restrict the selected fields, but the other selectors ("typeName", "typeClass", and * "fieldNameMatchesSchemaField") may not be specified. *

*

* This processor is configured to map from each field's values' class(es) to the schema * field type that will be used when adding the new field to the schema. All new fields * are then added to the schema in a single batch. If schema addition fails for any * field, addition is re-attempted only for those that don’t match any schema * field. This process is repeated, either until all new fields are successfully added, * or until there are no new fields (presumably because the fields that were new when * this processor started its work were subsequently added by a different update * request, possibly on a different node). *

*

* This processor takes as configuration a sequence of zero or more "typeMapping"-s from * one or more "valueClass"-s, specified as either an <arr> of * <str>, or multiple <str> with the same name, * to an existing schema "fieldType". *

*

* If more than one "valueClass" is specified in a "typeMapping", field values with any * of the specified "valueClass"-s will be mapped to the specified target "fieldType". * The "typeMapping"-s are attempted in the specified order; if a field value's class * is not specified in a "valueClass", the next "typeMapping" is attempted. If no * "typeMapping" succeeds, then either the "typeMapping" configured with * <bool name="default">true</bool> is used, or if none is so * configured, the lt;str name="defaultFieldType">...</str> is * used. *

*

* Zero or more "copyField" directives may be included with each "typeMapping", using a * <lst>. The copy field source is automatically set to the new field * name; "dest" must specify the destination field or dynamic field in a * <str>; and "maxChars" may optionally be specified in an * <int>. *

*

* Example configuration: *

* *
 * <updateProcessor class="solr.AddSchemaFieldsUpdateProcessorFactory" name="add-schema-fields">
 *   <lst name="typeMapping">
 *     <str name="valueClass">java.lang.String</str>
 *     <str name="fieldType">text_general</str>
 *     <lst name="copyField">
 *       <str name="dest">*_str</str>
 *       <int name="maxChars">256</int>
 *     </lst>
 *     <!-- Use as default mapping instead of defaultFieldType -->
 *     <bool name="default">true</bool>
 *   </lst>
 *   <lst name="typeMapping">
 *     <str name="valueClass">java.lang.Boolean</str>
 *     <str name="fieldType">booleans</str>
 *   </lst>
 *   <lst name="typeMapping">
 *     <str name="valueClass">java.util.Date</str>
 *     <str name="fieldType">pdates</str>
 *   </lst>
 *   <lst name="typeMapping">
 *     <str name="valueClass">java.lang.Long</str>
 *     <str name="valueClass">java.lang.Integer</str>
 *     <str name="fieldType">plongs</str>
 *   </lst>
 *   <lst name="typeMapping">
 *     <str name="valueClass">java.lang.Number</str>
 *     <str name="fieldType">pdoubles</str>
 *   </lst>
 * </updateProcessor>
* @since 4.4.0 */ public class AddSchemaFieldsUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware, UpdateRequestProcessorFactory.RunAlways { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final String TYPE_MAPPING_PARAM = "typeMapping"; private static final String VALUE_CLASS_PARAM = "valueClass"; private static final String FIELD_TYPE_PARAM = "fieldType"; private static final String DEFAULT_FIELD_TYPE_PARAM = "defaultFieldType"; private static final String COPY_FIELD_PARAM = "copyField"; private static final String DEST_PARAM = "dest"; private static final String MAX_CHARS_PARAM = "maxChars"; private static final String IS_DEFAULT_PARAM = "default"; private List typeMappings = Collections.emptyList(); private SelectorParams inclusions = new SelectorParams(); private Collection exclusions = new ArrayList<>(); private SolrResourceLoader solrResourceLoader = null; private String defaultFieldType; @Override public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { return new AddSchemaFieldsUpdateProcessor(next); } @Override public void init(NamedList args) { inclusions = FieldMutatingUpdateProcessorFactory.parseSelectorParams(args); validateSelectorParams(inclusions); inclusions.fieldNameMatchesSchemaField = false; // Explicitly (non-configurably) require unknown field names exclusions = FieldMutatingUpdateProcessorFactory.parseSelectorExclusionParams(args); for (SelectorParams exclusion : exclusions) { validateSelectorParams(exclusion); } Object defaultFieldTypeParam = args.remove(DEFAULT_FIELD_TYPE_PARAM); if (null != defaultFieldTypeParam) { if ( ! (defaultFieldTypeParam instanceof CharSequence)) { throw new SolrException(SERVER_ERROR, "Init param '" + DEFAULT_FIELD_TYPE_PARAM + "' must be a "); } defaultFieldType = defaultFieldTypeParam.toString(); } typeMappings = parseTypeMappings(args); if (null == defaultFieldType && typeMappings.stream().noneMatch(TypeMapping::isDefault)) { throw new SolrException(SERVER_ERROR, "Must specify either '" + DEFAULT_FIELD_TYPE_PARAM + "' or declare one typeMapping as default."); } super.init(args); } @Override public void inform(SolrCore core) { solrResourceLoader = core.getResourceLoader(); for (TypeMapping typeMapping : typeMappings) { typeMapping.populateValueClasses(core); } } private static List parseTypeMappings(NamedList args) { List typeMappings = new ArrayList<>(); List typeMappingsParams = args.getAll(TYPE_MAPPING_PARAM); for (Object typeMappingObj : typeMappingsParams) { if (null == typeMappingObj) { throw new SolrException(SERVER_ERROR, "'" + TYPE_MAPPING_PARAM + "' init param cannot be null"); } if ( ! (typeMappingObj instanceof NamedList) ) { throw new SolrException(SERVER_ERROR, "'" + TYPE_MAPPING_PARAM + "' init param must be a "); } NamedList typeMappingNamedList = (NamedList)typeMappingObj; Object fieldTypeObj = typeMappingNamedList.remove(FIELD_TYPE_PARAM); if (null == fieldTypeObj) { throw new SolrException(SERVER_ERROR, "Each '" + TYPE_MAPPING_PARAM + "' must contain a '" + FIELD_TYPE_PARAM + "' "); } if ( ! (fieldTypeObj instanceof CharSequence)) { throw new SolrException(SERVER_ERROR, "'" + FIELD_TYPE_PARAM + "' init param must be a "); } if (null != typeMappingNamedList.get(FIELD_TYPE_PARAM)) { throw new SolrException(SERVER_ERROR, "Each '" + TYPE_MAPPING_PARAM + "' may contain only one '" + FIELD_TYPE_PARAM + "' "); } String fieldType = fieldTypeObj.toString(); Collection valueClasses = typeMappingNamedList.removeConfigArgs(VALUE_CLASS_PARAM); if (valueClasses.isEmpty()) { throw new SolrException(SERVER_ERROR, "Each '" + TYPE_MAPPING_PARAM + "' must contain at least one '" + VALUE_CLASS_PARAM + "' "); } // isDefault (optional) Boolean isDefault = false; Object isDefaultObj = typeMappingNamedList.remove(IS_DEFAULT_PARAM); if (null != isDefaultObj) { if ( ! (isDefaultObj instanceof Boolean)) { throw new SolrException(SERVER_ERROR, "'" + IS_DEFAULT_PARAM + "' init param must be a "); } if (null != typeMappingNamedList.get(IS_DEFAULT_PARAM)) { throw new SolrException(SERVER_ERROR, "Each '" + COPY_FIELD_PARAM + "' may contain only one '" + IS_DEFAULT_PARAM + "' "); } isDefault = Boolean.parseBoolean(isDefaultObj.toString()); } Collection copyFieldDefs = new ArrayList<>(); while (typeMappingNamedList.get(COPY_FIELD_PARAM) != null) { Object copyFieldObj = typeMappingNamedList.remove(COPY_FIELD_PARAM); if ( ! (copyFieldObj instanceof NamedList)) { throw new SolrException(SERVER_ERROR, "'" + COPY_FIELD_PARAM + "' init param must be a "); } NamedList copyFieldNamedList = (NamedList)copyFieldObj; // dest Object destObj = copyFieldNamedList.remove(DEST_PARAM); if (null == destObj) { throw new SolrException(SERVER_ERROR, "Each '" + COPY_FIELD_PARAM + "' must contain a '" + DEST_PARAM + "' "); } if ( ! (destObj instanceof CharSequence)) { throw new SolrException(SERVER_ERROR, "'" + COPY_FIELD_PARAM + "' init param must be a "); } if (null != copyFieldNamedList.get(COPY_FIELD_PARAM)) { throw new SolrException(SERVER_ERROR, "Each '" + COPY_FIELD_PARAM + "' may contain only one '" + COPY_FIELD_PARAM + "' "); } String dest = destObj.toString(); // maxChars (optional) Integer maxChars = 0; Object maxCharsObj = copyFieldNamedList.remove(MAX_CHARS_PARAM); if (null != maxCharsObj) { if ( ! (maxCharsObj instanceof Integer)) { throw new SolrException(SERVER_ERROR, "'" + MAX_CHARS_PARAM + "' init param must be a "); } if (null != copyFieldNamedList.get(MAX_CHARS_PARAM)) { throw new SolrException(SERVER_ERROR, "Each '" + COPY_FIELD_PARAM + "' may contain only one '" + MAX_CHARS_PARAM + "' "); } maxChars = Integer.parseInt(maxCharsObj.toString()); } copyFieldDefs.add(new CopyFieldDef(dest, maxChars)); } typeMappings.add(new TypeMapping(fieldType, valueClasses, isDefault, copyFieldDefs)); if (0 != typeMappingNamedList.size()) { throw new SolrException(SERVER_ERROR, "Unexpected '" + TYPE_MAPPING_PARAM + "' init sub-param(s): '" + typeMappingNamedList.toString() + "'"); } args.remove(TYPE_MAPPING_PARAM); } return typeMappings; } private void validateSelectorParams(SelectorParams params) { if ( ! params.typeName.isEmpty()) { throw new SolrException(SERVER_ERROR, "'typeName' init param is not allowed in this processor"); } if ( ! params.typeClass.isEmpty()) { throw new SolrException(SERVER_ERROR, "'typeClass' init param is not allowed in this processor"); } if (null != params.fieldNameMatchesSchemaField) { throw new SolrException(SERVER_ERROR, "'fieldNameMatchesSchemaField' init param is not allowed in this processor"); } } private static class TypeMapping { public String fieldTypeName; public Collection valueClassNames; public Collection copyFieldDefs; public Set> valueClasses; public Boolean isDefault; public TypeMapping(String fieldTypeName, Collection valueClassNames, boolean isDefault, Collection copyFieldDefs) { this.fieldTypeName = fieldTypeName; this.valueClassNames = valueClassNames; this.isDefault = isDefault; this.copyFieldDefs = copyFieldDefs; // this.valueClasses population is delayed until the schema is available } public void populateValueClasses(SolrCore core) { IndexSchema schema = core.getLatestSchema(); ClassLoader loader = core.getResourceLoader().getClassLoader(); if (null == schema.getFieldTypeByName(fieldTypeName)) { throw new SolrException(SERVER_ERROR, "fieldType '" + fieldTypeName + "' not found in the schema"); } valueClasses = new HashSet<>(); for (String valueClassName : valueClassNames) { try { valueClasses.add(loader.loadClass(valueClassName)); } catch (ClassNotFoundException e) { throw new SolrException(SERVER_ERROR, "valueClass '" + valueClassName + "' not found for fieldType '" + fieldTypeName + "'"); } } } public boolean isDefault() { return isDefault; } } private static class CopyFieldDef { private final String destGlob; private final Integer maxChars; public CopyFieldDef(String destGlob, Integer maxChars) { this.destGlob = destGlob; this.maxChars = maxChars; if (destGlob.contains("*") && (!destGlob.startsWith("*") && !destGlob.endsWith("*"))) { throw new SolrException(SERVER_ERROR, "dest '" + destGlob + "' is invalid. Must either be a plain field name or start or end with '*'"); } } public Integer getMaxChars() { return maxChars; } public String getDest(String srcFieldName) { if (!destGlob.contains("*")) { return destGlob; } else if (destGlob.startsWith("*")) { return srcFieldName + destGlob.substring(1); } else { return destGlob.substring(0,destGlob.length()-1) + srcFieldName; } } } private class AddSchemaFieldsUpdateProcessor extends UpdateRequestProcessor { public AddSchemaFieldsUpdateProcessor(UpdateRequestProcessor next) { super(next); } @Override public void processAdd(AddUpdateCommand cmd) throws IOException { if ( ! cmd.getReq().getSchema().isMutable()) { final String message = "This IndexSchema is not mutable."; throw new SolrException(BAD_REQUEST, message); } final SolrInputDocument doc = cmd.getSolrInputDocument(); final SolrCore core = cmd.getReq().getCore(); // use the cmd's schema rather than the latest, because the schema // can be updated during processing. Using the cmd's schema guarantees // this will be detected and the cmd's schema updated. IndexSchema oldSchema = cmd.getReq().getSchema(); for (;;) { List newFields = new ArrayList<>(); // Group copyField defs per field and then per maxChar, to adapt to IndexSchema API Map>> newCopyFields = new HashMap<>(); // build a selector each time through the loop b/c the schema we are // processing may have changed FieldNameSelector selector = buildSelector(oldSchema); Map> unknownFields = new HashMap<>(); getUnknownFields(selector, doc, unknownFields); for (final Map.Entry> entry : unknownFields.entrySet()) { String fieldName = entry.getKey(); String fieldTypeName = defaultFieldType; TypeMapping typeMapping = mapValueClassesToFieldType(entry.getValue()); if (typeMapping != null) { fieldTypeName = typeMapping.fieldTypeName; if (!typeMapping.copyFieldDefs.isEmpty()) { newCopyFields.put(fieldName, typeMapping.copyFieldDefs.stream().collect(Collectors.groupingBy(CopyFieldDef::getMaxChars))); } } newFields.add(oldSchema.newField(fieldName, fieldTypeName, Collections.emptyMap())); } if (newFields.isEmpty() && newCopyFields.isEmpty()) { // nothing to do - no fields will be added - exit from the retry loop log.debug("No fields or copyFields to add to the schema."); break; } else if ( isImmutableConfigSet(core) ) { final String message = "This ConfigSet is immutable."; throw new SolrException(BAD_REQUEST, message); } if (log.isDebugEnabled()) { StringBuilder builder = new StringBuilder(); builder.append("\nFields to be added to the schema: ["); boolean isFirst = true; for (SchemaField field : newFields) { builder.append(isFirst ? "" : ","); isFirst = false; builder.append(field.getName()); builder.append("{type=").append(field.getType().getTypeName()).append("}"); } builder.append("]"); builder.append("\nCopyFields to be added to the schema: ["); isFirst = true; for (Map.Entry>> entry : newCopyFields.entrySet()) { String fieldName = entry.getKey(); builder.append(isFirst ? "" : ","); isFirst = false; builder.append("source=").append(fieldName).append("{"); for (List copyFieldDefList : entry.getValue().values()) { for (CopyFieldDef copyFieldDef : copyFieldDefList) { builder.append("{dest=").append(copyFieldDef.getDest(fieldName)); builder.append(", maxChars=").append(copyFieldDef.getMaxChars()).append("}"); } } builder.append("}"); } builder.append("]"); log.debug(builder.toString()); } // Need to hold the lock during the entire attempt to ensure that // the schema on the request is the latest synchronized (oldSchema.getSchemaUpdateLock()) { try { IndexSchema newSchema = oldSchema.addFields(newFields, Collections.emptyMap(), false); // Add copyFields for (Map.Entry>> entry : newCopyFields.entrySet()) { String srcField = entry.getKey(); for (Integer maxChars : entry.getValue().keySet()) { newSchema = newSchema.addCopyFields(srcField, entry.getValue().get(maxChars).stream().map(f -> f.getDest(srcField)).collect(Collectors.toList()), maxChars); } } if (null != newSchema) { ((ManagedIndexSchema)newSchema).persistManagedSchema(false); core.setLatestSchema(newSchema); cmd.getReq().updateSchemaToLatest(); log.debug("Successfully added field(s) and copyField(s) to the schema."); break; // success - exit from the retry loop } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Failed to add fields and/or copyFields."); } } catch (ManagedIndexSchema.FieldExistsException e) { log.error("At least one field to be added already exists in the schema - retrying."); oldSchema = core.getLatestSchema(); cmd.getReq().updateSchemaToLatest(); } catch (ManagedIndexSchema.SchemaChangedInZkException e) { log.debug("Schema changed while processing request - retrying."); oldSchema = core.getLatestSchema(); cmd.getReq().updateSchemaToLatest(); } } } super.processAdd(cmd); } /** * Recursively find unknown fields in the given doc and its child documents, if any. */ private void getUnknownFields (FieldNameSelector selector, SolrInputDocument doc, Map> unknownFields) { for (final String fieldName : doc.getFieldNames()) { //We do a assert and a null check because even after SOLR-12710 is addressed //older SolrJ versions can send null values causing an NPE assert fieldName != null; if (fieldName != null) { if (selector.shouldMutate(fieldName)) { // returns false if the field already exists in the current schema List solrInputFields = unknownFields.get(fieldName); if (null == solrInputFields) { solrInputFields = new ArrayList<>(); unknownFields.put(fieldName, solrInputFields); } solrInputFields.add(doc.getField(fieldName)); } } } List childDocs = doc.getChildDocuments(); if (null != childDocs) { for (SolrInputDocument childDoc : childDocs) { getUnknownFields(selector, childDoc, unknownFields); } } } /** * Maps all given field values' classes to a typeMapping object * * @param fields one or more (same-named) field values from one or more documents */ private TypeMapping mapValueClassesToFieldType(List fields) { NEXT_TYPE_MAPPING: for (TypeMapping typeMapping : typeMappings) { for (SolrInputField field : fields) { //We do a assert and a null check because even after SOLR-12710 is addressed //older SolrJ versions can send null values causing an NPE assert field.getValues() != null; if (field.getValues() != null) { NEXT_FIELD_VALUE: for (Object fieldValue : field.getValues()) { for (Class valueClass : typeMapping.valueClasses) { if (valueClass.isInstance(fieldValue)) { continue NEXT_FIELD_VALUE; } } // This fieldValue is not an instance of any of the mapped valueClass-s, // so mapping fails - go try the next type mapping. continue NEXT_TYPE_MAPPING; } } } // Success! Each of this field's values is an instance of a mapped valueClass return typeMapping; } // At least one of this field's values is not an instance of any of the mapped valueClass-s // Return the typeMapping marked as default, if we have one, else return null to use fallback type List defaultMappings = typeMappings.stream().filter(TypeMapping::isDefault).collect(Collectors.toList()); if (defaultMappings.size() > 1) { throw new SolrException(SERVER_ERROR, "Only one typeMapping can be default"); } else if (defaultMappings.size() == 1) { return defaultMappings.get(0); } else { return null; } } private FieldNameSelector buildSelector(IndexSchema schema) { FieldNameSelector selector = FieldMutatingUpdateProcessor.createFieldNameSelector (solrResourceLoader, schema, inclusions, fieldName -> null == schema.getFieldTypeNoEx(fieldName)); for (SelectorParams exc : exclusions) { selector = FieldMutatingUpdateProcessor.wrap(selector, FieldMutatingUpdateProcessor.createFieldNameSelector (solrResourceLoader, schema, exc, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS)); } return selector; } private boolean isImmutableConfigSet(SolrCore core) { NamedList args = core.getConfigSetProperties(); Object immutable = args != null ? args.get(IMMUTABLE_CONFIGSET_ARG) : null; return immutable != null ? Boolean.parseBoolean(immutable.toString()) : false; } } }