
fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClientManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of fscrawler Show documentation
Show all versions of fscrawler Show documentation
FS Crawler offers a simple way to index local files into elasticsearch.
/*
* Licensed to David Pilato (the "Author") under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Author licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package fr.pilato.elasticsearch.crawler.fs.client;
import fr.pilato.elasticsearch.crawler.fs.meta.settings.FsSettings;
import fr.pilato.elasticsearch.crawler.fs.util.FsCrawlerUtil;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.Version;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.node.Node;
import org.elasticsearch.threadpool.ThreadPool;
import java.io.IOException;
import java.nio.file.Path;
import static fr.pilato.elasticsearch.crawler.fs.util.FsCrawlerUtil.INDEX_SETTINGS_FILE;
import static fr.pilato.elasticsearch.crawler.fs.util.FsCrawlerUtil.INDEX_SETTINGS_FOLDER_FILE;
public class ElasticsearchClientManager {
private final Logger logger = LogManager.getLogger(ElasticsearchClientManager.class);
private final Path config;
private final FsSettings settings;
private ElasticsearchClient client = null;
private BulkProcessor bulkProcessorDoc = null;
private BulkProcessor bulkProcessorFolder = null;
private ThreadPool threadPool;
public ElasticsearchClientManager(Path config, FsSettings settings) {
this.config = config;
this.settings = settings;
}
public ElasticsearchClient client() {
if (client == null) {
throw new RuntimeException("You must call start() before client()");
}
return client;
}
public BulkProcessor bulkProcessorDoc() {
if (bulkProcessorDoc == null) {
throw new RuntimeException("You must call start() before bulkProcessorDoc()");
}
return bulkProcessorDoc;
}
/**
* We can probably remove that bulk processor as we now support ingest pipeline per request
* @return a BulkProcessor instance
*/
@Deprecated
public BulkProcessor bulkProcessorFolder() {
if (bulkProcessorFolder == null) {
throw new RuntimeException("You must call start() before bulkProcessorFolder()");
}
return bulkProcessorFolder;
}
public void start() throws Exception {
if (client != null) {
// The client has already been initialized. Let's skip this again
return;
}
try {
// Create an elasticsearch client
client = new ElasticsearchClient(ElasticsearchClient.buildRestClient(settings.getElasticsearch()));
// We set what will be elasticsearch behavior as it depends on the cluster version
client.setElasticsearchBehavior();
} catch (Exception e) {
logger.warn("failed to create elasticsearch client, disabling crawler...");
throw e;
}
// Check that we don't try using an ingest pipeline with a non compatible version
if (settings.getElasticsearch().getPipeline() != null && !client.isIngestSupported()) {
throw new RuntimeException("You defined pipeline:" + settings.getElasticsearch().getPipeline() +
", but your elasticsearch cluster does not support this feature.");
}
threadPool = new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "high-level-client").build());
BulkProcessor.Listener listener = new BulkProcessor.Listener() {
@Override public void beforeBulk(long executionId, BulkRequest request) { }
@Override public void afterBulk(long executionId, BulkRequest request, BulkResponse response) { }
@Override public void afterBulk(long executionId, BulkRequest request, Throwable failure) { }
};
bulkProcessorDoc = new BulkProcessor.Builder(client::bulkAsync, listener, threadPool)
.setBulkActions(settings.getElasticsearch().getBulkSize())
.setFlushInterval(TimeValue.timeValueMillis(settings.getElasticsearch().getFlushInterval().millis()))
// TODO fix when elasticsearch will support global pipelines
// .setPipeline(settings.getElasticsearch().getPipeline())
.build();
bulkProcessorFolder = new BulkProcessor.Builder(client::bulkAsync, listener, threadPool)
.setBulkActions(settings.getElasticsearch().getBulkSize())
.setFlushInterval(TimeValue.timeValueMillis(settings.getElasticsearch().getFlushInterval().millis()))
.build();
}
public void createIndices(FsSettings settings) throws Exception {
String elasticsearchVersion;
Path jobMappingDir = config.resolve(settings.getName()).resolve("_mappings");
// Let's read the current version of elasticsearch cluster
Version version = client.info().getVersion();
logger.debug("FS crawler connected to an elasticsearch [{}] node.", version.toString());
elasticsearchVersion = Byte.toString(version.major);
// If needed, we create the new settings for this files index
if (settings.getFs().isAddAsInnerObject() == false || (!settings.getFs().isJsonSupport() && !settings.getFs().isXmlSupport())) {
createIndex(jobMappingDir, elasticsearchVersion, INDEX_SETTINGS_FILE, settings.getElasticsearch().getIndex());
} else {
client.createIndex(settings.getElasticsearch().getIndex(), true, null);
}
// If needed, we create the new settings for this folder index
if (settings.getFs().isIndexFolders()) {
createIndex(jobMappingDir, elasticsearchVersion, INDEX_SETTINGS_FOLDER_FILE, settings.getElasticsearch().getIndexFolder());
} else {
client.createIndex(settings.getElasticsearch().getIndexFolder(), true, null);
}
}
private void createIndex(Path jobMappingDir, String elasticsearchVersion, String indexSettingsFile, String indexName) throws Exception {
try {
// If needed, we create the new settings for this files index
String indexSettings =
FsCrawlerUtil.readJsonFile(jobMappingDir, config, elasticsearchVersion, indexSettingsFile);
client.createIndex(indexName, true, indexSettings);
} catch (Exception e) {
logger.warn("failed to create index [{}], disabling crawler...", indexName);
throw e;
}
}
public void close() {
logger.debug("Closing Elasticsearch client manager");
if (threadPool != null) {
threadPool.shutdownNow();
}
if (bulkProcessorDoc != null) {
bulkProcessorDoc.close();
}
if (bulkProcessorFolder != null) {
bulkProcessorFolder.close();
}
if (client != null) {
try {
client.shutdown();
} catch (IOException e) {
logger.warn("Can not close elasticsearch client", e);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy