All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.language.LanguageComponentsLoader Maven / Gradle / Ivy

There is a newer version: 4.6.0
Show newest version
/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2021, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * https://www.carrot2.org/carrot2.LICENSE
 */
package org.carrot2.language;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.ServiceLoader;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.carrot2.clustering.ClusteringAlgorithm;
import org.carrot2.util.ResourceLookup;

public final class LanguageComponentsLoader {
  private Set languageRestrictions;
  private Function resourceLookupModifier;
  private ClusteringAlgorithm[] algorithmRestriction;

  public LoadedLanguages load() throws IOException {
    return load(loadProvidersFromSpi(defaultSpiClassloader()));
  }

  public LoadedLanguages load(Map> languageProviders)
      throws IOException {
    sanityCheck(languageProviders);

    // Apply restrictions.
    if (languageRestrictions != null) {
      languageProviders.keySet().retainAll(languageRestrictions);
    }

    Function>, Set>> componentFilters = s -> s;
    if (algorithmRestriction != null) {
      Set> components =
          Arrays.stream(algorithmRestriction)
              .flatMap(
                  algorithm ->
                      Stream.concat(
                          algorithm.optionalLanguageComponents().stream(),
                          algorithm.requiredLanguageComponents().stream()))
              .collect(Collectors.toSet());
      componentFilters =
          fullSet -> {
            HashSet> needed = new HashSet<>(fullSet);
            needed.retainAll(components);
            return needed;
          };
    }

    // Preload components.
    Map, Supplier>> preloadedSuppliers = new LinkedHashMap<>();
    for (String language : languageProviders.keySet()) {
      LinkedHashMap, Supplier> componentSuppliers = new LinkedHashMap<>();
      for (LanguageComponentsProvider provider : languageProviders.get(language)) {
        ResourceLookup rl;
        if (resourceLookupModifier != null) {
          rl = resourceLookupModifier.apply(provider);
        } else {
          rl = provider.defaultResourceLookup();
        }

        Set> requiredTypes = componentFilters.apply(provider.componentTypes());
        if (!requiredTypes.isEmpty()) {
          componentSuppliers.putAll(provider.load(language, rl, requiredTypes));
        }
      }

      if (!componentSuppliers.isEmpty()) {
        preloadedSuppliers.put(language, componentSuppliers);
      }
    }

    // If we only have one language to support, remove any loaded unsupported languages.
    // We can't do this in general because languages A and B may support a mutually
    // exclusive subset of languages.
    if (algorithmRestriction != null && algorithmRestriction.length == 1) {
      var algorithm = algorithmRestriction[0];
      preloadedSuppliers
          .entrySet()
          .removeIf(
              e -> {
                return !algorithm.supports(new LanguageComponents(e.getKey(), e.getValue()));
              });
    }

    return new LoadedLanguages(preloadedSuppliers);
  }

  /** Limits the loaded components to just those required by the given list of languages. */
  public LanguageComponentsLoader limitToLanguages(String... languages) {
    if (this.languageRestrictions != null) {
      throw new RuntimeException("Method can be set once.");
    }
    this.languageRestrictions = new HashSet<>(Arrays.asList(languages));
    return this;
  }

  /**
   * Limits the loaded components to just those required by the given set of algorithms.
   *
   * 

Note that there is no guarantee that all algorithms will have all the required components: * the loaded set may contain a subset of the required components of each algorithm. This method * exists to prevent unnecessary resources from being resolved and loaded. * * @see ClusteringAlgorithm#supports(LanguageComponents) */ public LanguageComponentsLoader limitToAlgorithms(ClusteringAlgorithm... algorithms) { if (this.algorithmRestriction != null) { throw new RuntimeException("Method can be set once."); } this.algorithmRestriction = algorithms; return this; } public LanguageComponentsLoader withResourceLookup( Function resourceLookupModifier) { if (this.resourceLookupModifier != null) { throw new RuntimeException("Method can be set once."); } this.resourceLookupModifier = resourceLookupModifier; return this; } private void sanityCheck(Map> languageProviders) { languageProviders.forEach( (language, providers) -> { Map, LanguageComponentsProvider> byComponent = new HashMap<>(); providers.forEach( provider -> { for (Class componentType : provider.componentTypes()) { LanguageComponentsProvider previous = byComponent.put(componentType, provider); if (previous != null) { throw new RuntimeException( String.format( Locale.ROOT, "Language '%s' has multiple providers of component '%s': %s", language, componentType.getSimpleName(), Stream.of(previous, provider) .map(s -> s.getClass().getName()) .collect(Collectors.joining(", ")))); } } }); }); } private ClassLoader defaultSpiClassloader() { return getClass().getClassLoader(); } public static Map> loadProvidersFromSpi( ClassLoader... classloaders) { Map> providers = new TreeMap<>(); BiPredicate sameProvider = (p1, p2) -> { return Objects.equals(p1.name(), p2.name()) && Objects.equals(p1.getClass(), p2.getClass()) && Objects.equals(p1.componentTypes(), p2.componentTypes()) && Objects.equals(p1.languages(), p2.languages()); }; for (ClassLoader classLoader : classloaders) { for (LanguageComponentsProvider candidate : ServiceLoader.load(LanguageComponentsProvider.class, classLoader)) { for (String language : candidate.languages()) { List existingProviders = providers.computeIfAbsent(language, key -> new ArrayList<>()); if (existingProviders.stream() .noneMatch(existing -> sameProvider.test(existing, candidate))) { existingProviders.add(candidate); } } } } return providers; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy