org.carrot2.elasticsearch.ControllerSingleton Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-carrot2 Show documentation
Show all versions of elasticsearch-carrot2 Show documentation
Search results clustering plugin for Elasticsearch (Carrot2-based)
package org.carrot2.elasticsearch;
import org.apache.logging.log4j.Logger;
import org.carrot2.core.Controller;
import org.carrot2.core.ControllerFactory;
import org.carrot2.core.ProcessingComponentDescriptor;
import org.carrot2.core.ProcessingComponentSuite;
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
import org.carrot2.util.resource.ClassLoaderLocator;
import org.carrot2.util.resource.DirLocator;
import org.carrot2.util.resource.FileResource;
import org.carrot2.util.resource.IResource;
import org.carrot2.util.resource.ResourceLookup;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.component.AbstractLifecycleComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.node.Node;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.carrot2.elasticsearch.ClusteringPlugin.DEFAULT_COMPONENT_SIZE_PROPERTY_NAME;
import static org.carrot2.elasticsearch.ClusteringPlugin.DEFAULT_RESOURCES_PROPERTY_NAME;
import static org.carrot2.elasticsearch.ClusteringPlugin.DEFAULT_SUITE_PROPERTY_NAME;
/**
* Holds the {@link Controller} singleton initialized and ready throughout
* the {@link Node}'s lifecycle.
*/
public class ControllerSingleton extends AbstractLifecycleComponent {
private final Environment environment;
private Controller controller;
private List algorithms;
private Logger logger;
@Inject
public ControllerSingleton(Settings settings) {
super(settings);
this.environment = new Environment(settings);
this.logger = Loggers.getLogger("plugin.carrot2");
}
@SuppressForbidden(reason = "C2 integration (File API)")
@Override
protected void doStart() throws ElasticsearchException {
try {
Settings.Builder builder = Settings.builder();
Path pluginConfigPath = environment.configFile().resolve(ClusteringPlugin.PLUGIN_NAME);
if (!Files.isDirectory(pluginConfigPath)) {
throw new ElasticsearchException("Missing config files: {}", pluginConfigPath);
} else {
logger.info("Configuration files at: {}", pluginConfigPath.toAbsolutePath());
}
for (String configName : new String [] {
"config.yml",
"config.yaml",
"config.json",
"config.properties"
}) {
try {
Path resolved = pluginConfigPath.resolve(configName);
if (resolved != null && Files.exists(resolved)) {
builder.loadFromPath(resolved);
}
} catch (NoClassDefFoundError e) {
logger.warn("Could not parse: " + configName, e);
}
}
Settings c2Settings = builder.build();
// Parse suite descriptors with loggers turned off (shut them up a bit).
final Path suitePath = pluginConfigPath.resolve(c2Settings.get(DEFAULT_SUITE_PROPERTY_NAME));
if (!Files.isRegularFile(suitePath)) {
throw new ElasticsearchException(
"Could not find algorithm suite: " + suitePath.toAbsolutePath().normalize());
}
final ResourceLookup suiteLookup = new ResourceLookup(new DirLocator(suitePath.getParent()));
final IResource suiteResource =
suiteLookup.getFirst(suitePath.getFileName().toString());
final List failed = new ArrayList<>();
final ProcessingComponentSuite suite = AccessController.doPrivileged(
(PrivilegedExceptionAction) () ->
getProcessingComponentSuite(suiteResource, suiteLookup, failed));
algorithms = new ArrayList<>();
for (ProcessingComponentDescriptor descriptor : suite.getAlgorithms()) {
algorithms.add(descriptor.getId());
}
algorithms = Collections.unmodifiableList(algorithms);
if (!algorithms.isEmpty()) {
logger.info("Available clustering components: {}", String.join(", ", algorithms));
}
if (!failed.isEmpty()) {
logger.info("Unavailable clustering components: {}", String.join(", ", failed));
}
final Path resourcesPath = pluginConfigPath.resolve(c2Settings.get(DEFAULT_RESOURCES_PROPERTY_NAME, "."))
.toAbsolutePath()
.normalize();
logger.info("Lexical resources dir: {}", resourcesPath);
final ResourceLookup resourceLookup = new ResourceLookup(
new DirLocator(resourcesPath),
new ClassLoaderLocator(ControllerSingleton.class.getClassLoader()));
// Change the default resource lookup to include the configured location.
Map c2SettingsAsMap = new HashMap<>();
DefaultLexicalDataFactoryDescriptor.attributeBuilder(c2SettingsAsMap)
.resourceLookup(resourceLookup);
c2SettingsAsMap.putAll(c2Settings.getAsMap());
// Set up the license for Lingo3G, if it's available.
Path lingo3gLicense = scanForLingo3GLicense(environment, pluginConfigPath);
if (lingo3gLicense != null && Files.isReadable(lingo3gLicense)) {
c2SettingsAsMap.put("license", new FileResource(lingo3gLicense));
} else if (algorithms.contains("lingo3g")) {
logger.warn("Lingo3G is on classpath, but no licenses have been found. Check out the documentation.");
}
// Create component pool.
Integer poolSize = c2Settings.getAsInt(DEFAULT_COMPONENT_SIZE_PROPERTY_NAME, 0);
if (poolSize > 0) {
controller = ControllerFactory.createPooling(poolSize);
} else {
controller = ControllerFactory.createPooling();
}
controller.init(c2SettingsAsMap, suite.getComponentConfigurations());
} catch (Exception e) {
throw new ElasticsearchException(
"Could not start Carrot2 controller.", e);
}
if (algorithms == null || algorithms.isEmpty()) {
throw new ElasticsearchException("No registered/ available clustering algorithms? Check the logs, it's odd.");
}
}
/**
* Because we're running with a security manager (most likely), we need to scan for Lingo3G
* license in ES configuration directories.
*/
private Path scanForLingo3GLicense(Environment environment, Path pluginConfigPath) {
List licenses = new ArrayList<>();
for (Path candidate : new Path [] {
pluginConfigPath.resolve("license.xml"),
pluginConfigPath.resolve(".license.xml"),
environment.configFile().resolve("license.xml"),
environment.configFile().resolve(".license.xml")
}) {
logger.debug("Lingo3G license location scan: {} {}.",
candidate.toAbsolutePath().normalize(),
Files.isRegularFile(candidate) ? "(found)" : "(not found)");
if (Files.isRegularFile(candidate)) {
licenses.add(candidate);
}
}
if (licenses.size() > 1) {
throw new ElasticsearchException("There should be exactly one Lingo3G license on scan paths: {}", licenses);
}
if (licenses.size() == 1) {
return licenses.iterator().next();
} else {
return null;
}
}
private ProcessingComponentSuite getProcessingComponentSuite(IResource suiteResource,
ResourceLookup suiteLookup,
List failedComponents) throws Exception {
ProcessingComponentSuite suite1 = ProcessingComponentSuite.deserialize(suiteResource, suiteLookup);
for (ProcessingComponentDescriptor desc : suite1.removeUnavailableComponents()) {
failedComponents.add(desc.getId());
if (isNoClassDefFound(desc.getInitializationFailure())) {
logger.debug("Algorithm not available on classpath: " + desc.getId());
} else {
logger.warn("Algorithm initialization failed: " + desc.getId(),
desc.getInitializationFailure());
}
}
return suite1;
}
/** */
protected boolean isNoClassDefFound(Throwable initializationFailure) {
if (initializationFailure != null) {
return initializationFailure.getCause() instanceof ClassNotFoundException;
}
return false;
}
public Controller getController() {
return controller;
}
/**
* Return a list of available algorithm component identifiers.
*/
public List getAlgorithms() {
return algorithms;
}
@Override
protected void doStop() throws ElasticsearchException {
final Controller c = controller;
controller = null;
if (c != null) {
AccessController.doPrivileged((PrivilegedAction