All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.rest.schema.analysis.ManagedSynonymFilterFactory Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.rest.schema.analysis;

import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.text.ParseException;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.FlattenGraphFilterFactory;
import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.ResourceLoader;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.rest.BaseSolrResource;
import org.apache.solr.rest.ManagedResource;
import org.apache.solr.rest.ManagedResourceStorage.StorageIO;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * TokenFilterFactory and ManagedResource implementation for doing CRUD on synonyms using the REST
 * API.
 *
 * @deprecated Use {@link ManagedSynonymGraphFilterFactory} instead, but be sure to also use {@link
 *     FlattenGraphFilterFactory} at index time (not at search time) as well.
 * @since 4.8.0
 * @lucene.spi {@value #NAME}
 */
@Deprecated
public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory {

  /** SPI name */
  public static final String NAME = "managedSynonym";

  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  public static final String SYNONYM_MAPPINGS = "synonymMappings";
  public static final String IGNORE_CASE_INIT_ARG = "ignoreCase";

  /**
   * Used internally to preserve the case of synonym mappings regardless of the ignoreCase setting.
   */
  private static class CasePreservedSynonymMappings {
    Map> mappings = new TreeMap<>();

    /**
     * Provides a view of the mappings for a given term; specifically, if ignoreCase is true, then
     * the returned "view" contains the mappings for all known cases of the term, if it is false,
     * then only the mappings for the specific case is returned.
     */
    Set getMappings(boolean ignoreCase, String key) {
      Set synMappings = null;
      if (ignoreCase) {
        // TODO: should we return the mapped values in all lower-case here?
        if (mappings.size() == 1) {
          // if only one in the map (which is common) just return it directly
          return mappings.values().iterator().next();
        }

        synMappings = new TreeSet<>();
        for (Set next : mappings.values()) synMappings.addAll(next);
      } else {
        synMappings = mappings.get(key);
      }
      return synMappings;
    }

    @Override
    public String toString() {
      return mappings.toString();
    }
  }

  /**
   * ManagedResource implementation for synonyms, which are so specialized that it makes sense to
   * implement this class as an inner class as it has little application outside the
   * SynonymFilterFactory use cases.
   */
  public static class SynonymManager extends ManagedResource
      implements ManagedResource.ChildResourceSupport {
    protected Map synonymMappings;

    public SynonymManager(String resourceId, SolrResourceLoader loader, StorageIO storageIO)
        throws SolrException {
      super(resourceId, loader, storageIO);
    }

    @SuppressWarnings("unchecked")
    @Override
    protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData)
        throws SolrException {
      NamedList initArgs = (NamedList) managedInitArgs;

      String format = (String) initArgs.get("format");
      if (format != null && !"solr".equals(format)) {
        throw new SolrException(
            ErrorCode.BAD_REQUEST, "Invalid format " + format + "! Only 'solr' is supported.");
      }

      // the default behavior is to not ignore case,
      // so if not supplied, then install the default
      if (initArgs.get(IGNORE_CASE_INIT_ARG) == null) {
        initArgs.add(IGNORE_CASE_INIT_ARG, Boolean.FALSE);
      }

      boolean ignoreCase = getIgnoreCase(managedInitArgs);
      synonymMappings = new TreeMap<>();
      if (managedData != null) {
        Map storedSyns = (Map) managedData;
        for (Map.Entry entry : storedSyns.entrySet()) {
          String key = entry.getKey();

          String caseKey = applyCaseSetting(ignoreCase, key);
          CasePreservedSynonymMappings cpsm = synonymMappings.get(caseKey);
          if (cpsm == null) {
            cpsm = new CasePreservedSynonymMappings();
            synonymMappings.put(caseKey, cpsm);
          }

          // give the nature of our JSON parsing solution, we really have
          // no guarantees on what is in the file
          Object mapping = entry.getValue();
          if (!(mapping instanceof List)) {
            throw new SolrException(
                ErrorCode.SERVER_ERROR,
                "Invalid synonym file format! Expected a list of synonyms for "
                    + key
                    + " but got "
                    + mapping.getClass().getName());
          }

          Set sortedVals = new TreeSet<>((List) entry.getValue());
          cpsm.mappings.put(key, sortedVals);
        }
      }
      if (log.isInfoEnabled()) {
        log.info("Loaded {} synonym mappings for {}", synonymMappings.size(), getResourceId());
      }
    }

    @SuppressWarnings("unchecked")
    @Override
    protected Object applyUpdatesToManagedData(Object updates) {
      boolean ignoreCase = getIgnoreCase();
      boolean madeChanges = false;
      if (updates instanceof List) {
        madeChanges = applyListUpdates((List) updates, ignoreCase);
      } else if (updates instanceof Map) {
        madeChanges = applyMapUpdates((Map) updates, ignoreCase);
      } else {
        throw new SolrException(
            ErrorCode.BAD_REQUEST,
            "Unsupported data format ("
                + updates.getClass().getName()
                + "); expected a JSON object (Map or List)!");
      }
      return madeChanges ? getStoredView() : null;
    }

    protected boolean applyListUpdates(List jsonList, boolean ignoreCase) {
      boolean madeChanges = false;
      for (String term : jsonList) {
        // find the mappings using the case aware key
        String origTerm = term;
        term = applyCaseSetting(ignoreCase, term);
        CasePreservedSynonymMappings cpsm = synonymMappings.get(term);
        if (cpsm == null) cpsm = new CasePreservedSynonymMappings();

        Set treeTerms = new TreeSet<>(jsonList);
        cpsm.mappings.put(origTerm, treeTerms);
        madeChanges = true;
        // only add the cpsm to the synonymMappings if it has valid data
        if (!synonymMappings.containsKey(term) && cpsm.mappings.get(origTerm) != null) {
          synonymMappings.put(term, cpsm);
        }
      }
      return madeChanges;
    }

    protected boolean applyMapUpdates(Map jsonMap, boolean ignoreCase) {
      boolean madeChanges = false;

      for (String term : jsonMap.keySet()) {

        String origTerm = term;
        term = applyCaseSetting(ignoreCase, term);

        // find the mappings using the case aware key
        CasePreservedSynonymMappings cpsm = synonymMappings.get(term);
        if (cpsm == null) cpsm = new CasePreservedSynonymMappings();

        Set output = cpsm.mappings.get(origTerm);

        Object val = jsonMap.get(origTerm); // IMPORTANT: use the original
        if (val instanceof String) {
          String strVal = (String) val;

          if (output == null) {
            output = new TreeSet<>();
            cpsm.mappings.put(origTerm, output);
          }

          if (output.add(strVal)) {
            madeChanges = true;
          }
        } else if (val instanceof List) {
          @SuppressWarnings({"unchecked"})
          List vals = (List) val;

          if (output == null) {
            output = new TreeSet<>();
            cpsm.mappings.put(origTerm, output);
          }

          for (String nextVal : vals) {
            if (output.add(nextVal)) {
              madeChanges = true;
            }
          }

        } else {
          throw new SolrException(
              ErrorCode.BAD_REQUEST,
              "Unsupported value "
                  + val
                  + " for "
                  + term
                  + "; expected single value or a JSON array!");
        }

        // only add the cpsm to the synonymMappings if it has valid data
        if (!synonymMappings.containsKey(term) && cpsm.mappings.get(origTerm) != null) {
          synonymMappings.put(term, cpsm);
        }
      }

      return madeChanges;
    }

    /**
     * Returns a Map of how we store and load data managed by this resource, which is different than
     * how it is managed at runtime in order to support the ignoreCase setting.
     */
    protected Map> getStoredView() {
      Map> storedView = new TreeMap<>();
      for (CasePreservedSynonymMappings cpsm : synonymMappings.values()) {
        for (Map.Entry> entry : cpsm.mappings.entrySet()) {
          storedView.put(entry.getKey(), entry.getValue());
        }
      }
      return storedView;
    }

    protected String applyCaseSetting(boolean ignoreCase, String str) {
      return (ignoreCase && str != null) ? str.toLowerCase(Locale.ROOT) : str;
    }

    public boolean getIgnoreCase() {
      return getIgnoreCase(managedInitArgs);
    }

    public boolean getIgnoreCase(NamedList initArgs) {
      Boolean ignoreCase = initArgs.getBooleanArg(IGNORE_CASE_INIT_ARG);
      // ignoreCase = false by default
      return null == ignoreCase ? false : ignoreCase;
    }

    @Override
    public void doGet(BaseSolrResource endpoint, String childId) {
      SolrQueryResponse response = endpoint.getSolrResponse();
      if (childId != null) {
        boolean ignoreCase = getIgnoreCase();
        String key = applyCaseSetting(ignoreCase, childId);

        // if ignoreCase==true, then we get the mappings using the lower-cased key
        // and then return a union of all case-sensitive keys, if false, then
        // we only return the mappings for the exact case requested
        CasePreservedSynonymMappings cpsm = synonymMappings.get(key);
        Set mappings = (cpsm != null) ? cpsm.getMappings(ignoreCase, childId) : null;
        if (mappings == null)
          throw new SolrException(
              ErrorCode.NOT_FOUND,
              String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId()));

        response.add(childId, mappings);
      } else {
        response.add(SYNONYM_MAPPINGS, buildMapToStore(getStoredView()));
      }
    }

    @Override
    public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) {
      boolean ignoreCase = getIgnoreCase();
      String key = applyCaseSetting(ignoreCase, childId);

      CasePreservedSynonymMappings cpsm = synonymMappings.get(key);
      if (cpsm == null)
        throw new SolrException(
            ErrorCode.NOT_FOUND,
            String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId()));

      if (ignoreCase) {
        // delete all mappings regardless of case
        synonymMappings.remove(key);
      } else {
        // just delete the mappings for the specific case-sensitive key
        if (cpsm.mappings.containsKey(childId)) {
          cpsm.mappings.remove(childId);

          if (cpsm.mappings.isEmpty()) synonymMappings.remove(key);
        } else {
          throw new SolrException(
              ErrorCode.NOT_FOUND,
              String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId()));
        }
      }

      // store the updated data (using the stored view)
      storeManagedData(getStoredView());

      log.info("Removed synonym mappings for: {}", childId);
    }
  }

  /**
   * Custom SynonymMap.Parser implementation that provides synonym mappings from the managed JSON in
   * this class during SynonymMap building.
   */
  private static class ManagedSynonymParser extends SynonymMap.Parser {

    SynonymManager synonymManager;

    public ManagedSynonymParser(SynonymManager synonymManager, boolean dedup, Analyzer analyzer) {
      super(dedup, analyzer);
      this.synonymManager = synonymManager;
    }

    /** Add the managed synonyms and their mappings into the SynonymMap builder. */
    @Override
    public void parse(Reader in) throws IOException, ParseException {
      boolean ignoreCase = synonymManager.getIgnoreCase();
      for (CasePreservedSynonymMappings cpsm : synonymManager.synonymMappings.values()) {
        for (Map.Entry> entry : cpsm.mappings.entrySet()) {
          for (String mapping : entry.getValue()) {
            // apply the case setting to match the behavior of the SynonymMap builder
            CharsRef casedTerm =
                analyze(
                    synonymManager.applyCaseSetting(ignoreCase, entry.getKey()),
                    new CharsRefBuilder());
            CharsRef casedMapping =
                analyze(
                    synonymManager.applyCaseSetting(ignoreCase, mapping), new CharsRefBuilder());
            add(casedTerm, casedMapping, false);
          }
        }
      }
    }
  }

  protected SynonymFilterFactory delegate;

  public ManagedSynonymFilterFactory(Map args) {
    super(args);
  }

  /** Default ctor for compatibility with SPI */
  public ManagedSynonymFilterFactory() {
    throw defaultCtorException();
  }

  @Override
  public String getResourceId() {
    return "/schema/analysis/synonyms/" + handle;
  }

  @Override
  protected Class getManagedResourceImplClass() {
    return SynonymManager.class;
  }

  /**
   * Called once, during core initialization, to initialize any analysis components that depend on
   * the data managed by this resource. It is important that the analysis component is only
   * initialized once during core initialization so that text analysis is consistent, especially in
   * a distributed environment, as we don't want one server applying a different set of stop words
   * than other servers.
   */
  @SuppressWarnings("unchecked")
  @Override
  public void onManagedResourceInitialized(NamedList initArgs, final ManagedResource res)
      throws SolrException {
    NamedList args = (NamedList) initArgs;
    args.add("synonyms", getResourceId());
    args.add("expand", "false");
    args.add("format", "solr");

    Map filtArgs = new HashMap<>();
    for (Map.Entry entry : args) {
      filtArgs.put(entry.getKey(), entry.getValue().toString());
    }
    // create the actual filter factory that pulls the synonym mappings
    // from synonymMappings using a custom parser implementation
    delegate =
        new SynonymFilterFactory(filtArgs) {
          @Override
          protected SynonymMap loadSynonyms(
              ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer)
              throws IOException, ParseException {

            ManagedSynonymParser parser =
                new ManagedSynonymParser((SynonymManager) res, dedup, analyzer);
            // null is safe here because there's no actual parsing done against a input Reader
            parser.parse(null);
            return parser.build();
          }
        };
    try {
      delegate.inform(res.getResourceLoader());
    } catch (IOException e) {
      throw new SolrException(ErrorCode.SERVER_ERROR, e);
    }
  }

  @Override
  public TokenStream create(TokenStream input) {
    if (delegate == null)
      throw new IllegalStateException(
          this.getClass().getName()
              + " not initialized correctly! The SynonymFilterFactory delegate was not initialized.");

    return delegate.create(input);
  }
}