All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.handler.clustering.EngineContext Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler.clustering;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.lang.invoke.MethodHandles;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.ServiceLoader;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.apache.solr.core.SolrCore;
import org.carrot2.clustering.ClusteringAlgorithm;
import org.carrot2.clustering.ClusteringAlgorithmProvider;
import org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm;
import org.carrot2.clustering.lingo.LingoClusteringAlgorithm;
import org.carrot2.clustering.stc.STCClusteringAlgorithm;
import org.carrot2.language.LanguageComponents;
import org.carrot2.language.LanguageComponentsLoader;
import org.carrot2.language.LoadedLanguages;
import org.carrot2.util.ChainedResourceLookup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Clustering engine context: algorithms, preloaded language resources and initial validation. */
final class EngineContext {
  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  private final LinkedHashMap languages;
  private final Map algorithmProviders;

  private static final Map aliasedNames;

  static {
    aliasedNames = new HashMap<>();
    aliasedNames.put(LingoClusteringAlgorithm.class.getName(), LingoClusteringAlgorithm.NAME);
    aliasedNames.put(STCClusteringAlgorithm.class.getName(), STCClusteringAlgorithm.NAME);
    aliasedNames.put(
        BisectingKMeansClusteringAlgorithm.class.getName(),
        BisectingKMeansClusteringAlgorithm.NAME);
  }

  EngineContext(String resourcesPath, SolrCore core) {
    LanguageComponentsLoader loader = LanguageComponents.loader();

    List resourceLocations = new ArrayList<>();

    Path configDir = core.getResourceLoader().getConfigPath();
    if (resourcesPath != null && !resourcesPath.trim().isEmpty()) {
      configDir = configDir.resolve(resourcesPath);
      resourceLocations.add(configDir);
    }

    if (!resourceLocations.isEmpty()) {
      log.info("Clustering algorithm resources first looked up relative to: {}", resourceLocations);

      loader.withResourceLookup(
          (provider) ->
              new ChainedResourceLookup(
                  Arrays.asList(
                      new PathResourceLookup(resourceLocations),
                      provider.defaultResourceLookup())));
    } else {
      log.info("Resources read from defaults (JARs).");
    }

    ClassLoader classLoader = getClass().getClassLoader();
    algorithmProviders =
        ServiceLoader.load(ClusteringAlgorithmProvider.class, classLoader).stream()
            .map(ServiceLoader.Provider::get)
            .collect(Collectors.toMap(ClusteringAlgorithmProvider::name, e -> e));

    // Only load the resources of algorithms we're interested in.
    loader.limitToAlgorithms(
        algorithmProviders.values().stream()
            .map(Supplier::get)
            .toArray(ClusteringAlgorithm[]::new));

    languages = new LinkedHashMap<>();
    try {
      LoadedLanguages loadedLanguages = loader.load();
      for (String lang : loadedLanguages.languages()) {
        languages.put(lang, loadedLanguages.language(lang));
      }
    } catch (IOException e) {
      throw new UncheckedIOException(e);
    }

    // Debug info about loaded languages.
    for (String lang : languages.keySet()) {
      if (log.isTraceEnabled()) {
        log.trace(
            "Loaded language '{}' with components:\n  - {}",
            lang,
            languages.get(lang).components().stream()
                .map(Class::getSimpleName)
                .collect(Collectors.joining("\n  - ")));
      }
    }

    // Remove algorithms for which there are no languages that are supported.
    algorithmProviders
        .entrySet()
        .removeIf(e -> !isAlgorithmAvailable(e.getValue(), languages.values()));

    algorithmProviders.forEach(
        (name, prov) -> {
          String supportedLanguages =
              languages.values().stream()
                  .filter(lc -> prov.get().supports(lc))
                  .map(LanguageComponents::language)
                  .collect(Collectors.joining(", "));

          log.info(
              "Clustering algorithm {} loaded with support for the following languages: {}",
              name,
              supportedLanguages);
        });
  }

  ClusteringAlgorithm getAlgorithm(String algorithmName) {
    if (!algorithmProviders.containsKey(algorithmName) && aliasedNames.containsKey(algorithmName)) {
      algorithmName = aliasedNames.get(algorithmName);
    }

    ClusteringAlgorithmProvider provider = algorithmProviders.get(algorithmName);
    return provider == null ? null : provider.get();
  }

  LanguageComponents getLanguage(String language) {
    return languages.get(language);
  }

  boolean isLanguageSupported(String language) {
    return languages.containsKey(language);
  }

  private boolean isAlgorithmAvailable(
      ClusteringAlgorithmProvider provider, Collection languages) {
    ClusteringAlgorithm algorithm = provider.get();
    Optional first = languages.stream().filter(algorithm::supports).findFirst();
    if (first.isEmpty()) {
      log.warn("Algorithm does not support any of the available languages: {}", provider.name());
      return false;
    } else {
      return true;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy