All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.unlaxer.jaddress.elaticesearch.ElasticSearchUtil Maven / Gradle / Ivy

package org.unlaxer.jaddress.elaticesearch;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.TimeUnit;

import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.unlaxer.jaddress.ElaticeSearchAccessor;
import org.unlaxer.jaddress.GradleStructure;
import org.unlaxer.jaddress.Indexes;
import org.unlaxer.jaddress.ProjectContext;
import org.unlaxer.jaddress.SubProjects;

public class ElasticSearchUtil {

	Logger logger = LoggerFactory.getLogger(getClass());

	public ElasticSearchUtil() {
		super();
	}

	public AnalyzeResponse analyze(AnalyzeRequest request) {
		AnalyzeResponse res;

		try (ElaticeSearchAccessor elaticeSearchAccessor = new ElaticeSearchAccessor();
				RestHighLevelClient client = elaticeSearchAccessor.get()) {

			res = client.indices().analyze(request, RequestOptions.DEFAULT);
			client.close();
		} catch (IOException e) {
			throw new UncheckedIOException(e);
		}
		return res;
	}

	public void createIndex(Map> fields) {
		deleteIndexIfExist();

		try (ElaticeSearchAccessor elaticeSearchAccessor = new ElaticeSearchAccessor();
				RestHighLevelClient client = elaticeSearchAccessor.get()) {

			CreateIndexRequest request = new CreateIndexRequest(Indexes.ad_address.name());
			request.settings(getJsonSettings(), XContentType.JSON);
			request.mapping(getMapping(fields));

			client.indices().create(request, RequestOptions.DEFAULT);
		} catch (IOException e) {
			throw new UncheckedIOException(e);
		}
	}

	public String getJsonSettings() {
		Path path = ProjectContext.getPath(SubProjects.main, GradleStructure.mainResources, "ElasticSearch_Analysis.json");
		String ret = null;
		try {
			ret = Files.readString(path);
		} catch (IOException e) {
			throw new UncheckedIOException(e);
		}
		return ret;
	}


	public XContentBuilder getMapping(Map> fields) throws IOException {
		XContentBuilder builder = XContentFactory.jsonBuilder();
		builder.startObject();
		{
			builder.startObject("properties");
			{
				addField(builder, fields);
			}
			builder.endObject();
		}
		builder.endObject();

		return builder;
	}

	private void addField(XContentBuilder builder, Map> fields) throws IOException {
		for (Entry> field : fields.entrySet()) {
			builder.startObject(field.getKey());
			{
				builder.field("type", "text");
				builder.startObject("fields");
				{
					for (Analyzer analyzer : field.getValue()) {
						builder.startObject(analyzer.getField());
						{
							builder.field("type", "text");
							builder.field("analyzer", analyzer.getAnalyzer());
							builder.field("search_analyzer", analyzer.getAnalyzer());
//							builder.field("search_analyzer", "default");
						}
						builder.endObject();
					}
				}
				builder.endObject();
			}
			builder.endObject();
		}
	}


	public SearchResponse search(SearchRequest searchRequest) {
		SearchResponse res;

		try (ElaticeSearchAccessor elaticeSearchAccessor = new ElaticeSearchAccessor();
				RestHighLevelClient client = elaticeSearchAccessor.get()) {

			res = client.search(searchRequest, RequestOptions.DEFAULT);
		} catch (IOException e) {
			throw new UncheckedIOException(e);
		}
		return res;
	}

	private boolean existsIndex() {
		boolean exists = false;
		try (ElaticeSearchAccessor elaticeSearchAccessor = new ElaticeSearchAccessor();
				RestHighLevelClient client = elaticeSearchAccessor.get()) {

			GetIndexRequest request = new GetIndexRequest(Indexes.ad_address.name());
			exists = client.indices().exists(request, RequestOptions.DEFAULT);
		} catch (IOException e) {
			e.printStackTrace();
		}
		return exists;
	}

	public void deleteIndexIfExist() {
		try (ElaticeSearchAccessor elaticeSearchAccessor = new ElaticeSearchAccessor();
				RestHighLevelClient client = elaticeSearchAccessor.get()) {

			if (existsIndex()) {
				DeleteIndexRequest deleteRequest = new DeleteIndexRequest(Indexes.ad_address.name());
				AcknowledgedResponse deleteIndexResponse = client.indices().delete(deleteRequest,
						RequestOptions.DEFAULT);
				logger.info("AcknowledgedResponse=" + deleteIndexResponse.isAcknowledged());
			} else {
				logger.info(Indexes.ad_address.name() + " not exist");
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public void indexing(Path path) {
		BulkProcessor.Listener listener = new BulkProcessor.Listener() {
			@Override
			public void beforeBulk(long executionId, BulkRequest request) {
				int numberOfActions = request.numberOfActions();
				logger.debug("Executing bulk [{}] with {} requests", executionId, numberOfActions);
			}

			@Override
			public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
				if (response.hasFailures()) {
					logger.warn("Bulk [{}] executed with failures", executionId);
				} else {
					logger.debug("Bulk [{}] completed in {} milliseconds", executionId, response.getTook().getMillis());
				}
			}

			@Override
			public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
				logger.error("Failed to execute bulk", failure);
			}
		};

		try (ElaticeSearchAccessor elaticeSearchAccessor = new ElaticeSearchAccessor();
				RestHighLevelClient client = elaticeSearchAccessor.get()) {

			BulkProcessor bulkProcessor = BulkProcessor
					.builder((request, bulkListener) -> client.bulkAsync(request, RequestOptions.DEFAULT, bulkListener),
							listener)
					.build();

			try (BufferedReader br = Files.newBufferedReader(path)) {
				String text;
				int i = 0;

				while ((text = br.readLine()) != null) {
					i++;
//					Map map = new RequestData(i, text).map;
					RequestData2 data = new RequestData2(text);

					IndexRequest indexRequest = Indexes.ad_address.createIndexRequest();
					indexRequest.id(data.id);
					indexRequest.source(data.map);
//					indexRequest.id(String.valueOf(i));
//					indexRequest.source(map);

					bulkProcessor.add(indexRequest);
				}
			} catch (IOException e) {
				e.printStackTrace();
			}

			boolean terminated = bulkProcessor.awaitClose(30L, TimeUnit.SECONDS);
			logger.info("terminated:" + terminated);

		} catch (IOException e) {
			throw new UncheckedIOException(e);
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	}

	//////////////////////////////////////////////////////////////////////////////////////////////////////////////
//	public void test(Path path) {
//	try (BufferedReader br = Files.newBufferedReader(path)) {
//		String text;
//		while ((text = br.readLine()) != null) {
//			new TsvData(text).debugPrint();
//		}
//	} catch (IOException e) {
//		e.printStackTrace();
//	}
//}

	@Deprecated
	public SearchResponse _search(QueryCondition queryCondition) {
		SearchRequest searchRequest = Indexes.ad_address.createSearchRequest()
				.source(queryCondition.toSearchSourceBuilder());
		SearchResponse res;

		try (ElaticeSearchAccessor elaticeSearchAccessor = new ElaticeSearchAccessor();
				RestHighLevelClient client = elaticeSearchAccessor.get()) {

			res = client.search(searchRequest, RequestOptions.DEFAULT);
		} catch (IOException e) {
			throw new UncheckedIOException(e);
		}
		return res;
	}


	@Deprecated
	public XContentBuilder getSettings() throws IOException {
		// TODO:builder use api

		XContentBuilder builder = XContentFactory.jsonBuilder();
		builder.startObject();
		{
			builder.startObject("analysis");
			{
				builder.startObject("analyzer");
				{
					builder.startObject(Analyzer.katakana.getAnalyzer());
					{
						builder.field("type", "custom");

						builder.field("tokenizer", "kuromoji_tokenizer");

						builder.array("char_filter", Arrays.asList(//
								"icu_normalizer" //
						).toArray());

						builder.array("filter", Arrays.asList(//
								"katakana_readingform", //
								"kuromoji_number").toArray());
					}
					builder.endObject();

					builder.startObject(Analyzer.romaji.getAnalyzer());
					{
						builder.field("type", "custom");

						builder.field("tokenizer", "kuromoji_tokenizer");

						builder.array("char_filter", Arrays.asList(//
								"icu_normalizer" //
						).toArray());

						builder.array("filter", Arrays.asList(//
								"romaji_readingform", //
								"kuromoji_number").toArray());
					}
					builder.endObject();

					builder.startObject(Analyzer.kuromoji2.getAnalyzer());
					{
						builder.field("type", "custom");

						builder.field("tokenizer", "kuromoji_tokenizer");
						builder.field("mode", "normal");
//						builder.field("discard_punctuation", "false");
//						builder.array("char_filter", Arrays.asList(//
//								"icu_normalizer" //
//						).toArray());
//
					}
					builder.endObject();

					builder.startObject(Analyzer.icu2.getAnalyzer());
					{
						builder.field("type", "custom");
						builder.field("tokenizer", "icu_tokenizer");
						builder.array("char_filter", Arrays.asList(//
								"icu_normalizer" //
						).toArray());
					}
					builder.endObject();

					builder.startObject(Analyzer.kuromoji3.getAnalyzer());
					{
						builder.field("type", "custom");
						builder.field("tokenizer", "kuromoji_tokenizer");
//						builder.field("mode", "extended");
//						builder.field("discard_punctuation", "false");
//						builder.field("user_dictionary", "userdict_ja.txt");

						builder.array("char_filter", Arrays.asList(//
								"icu_normalizer" //
								, "kuromoji_iteration_mark"//
						).toArray());

						builder.array("filter", Arrays.asList(//
								"kuromoji_baseform", //
								"kuromoji_part_of_speech", //
								"ja_stop", //
								"kuromoji_number", //
								"kuromoji_stemmer"//
						).toArray());
					}
					builder.endObject();

				}
				builder.endObject();

				builder.startObject("filter");
				{

					builder.startObject("katakana_readingform");
					{
						builder.field("type", "kuromoji_readingform");
						builder.field("use_romaji", "false");
					}
					builder.endObject();

					builder.startObject("romaji_readingform");
					{
						builder.field("type", "kuromoji_readingform");
						builder.field("use_romaji", "true");
					}
					builder.endObject();
				}
				builder.endObject();

			}
			builder.endObject();
		}
		builder.endObject();

		return builder;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy