
index.EsIndexer Maven / Gradle / Ivy
The newest version!
package index;
import client.ClientFactory;
import com.alibaba.fastjson.JSONObject;
import config.Global;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.elasticsearch.action.admin.indices.exists.types.TypesExistsRequest;
import org.elasticsearch.action.admin.indices.exists.types.TypesExistsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.action.bulk.BackoffPolicy;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.*;
import java.util.concurrent.TimeUnit;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
/**
* unclewang
* 2018/5/2 10:21
*/
public class EsIndexer {
private Settings.Builder settings = Settings.builder()
.put("index.number_of_shards", Global.getEsNumOfShards())
.put("index.number_of_replicas", Global.getEsNumOfReplicas());
protected static Client client = ClientFactory.get();
protected String indice;
protected String type;
protected String filepath;
public EsIndexer(String indice, String type, String filepath) {
this.indice = indice;
this.type = type;
this.filepath = filepath;
}
public void index() throws IOException, InterruptedException {
checkIndex(indice, type);
doIndex();
}
private void doIndex() throws InterruptedException {
BulkProcessor bulkProcessor = getBulkProcessor();
File file = new File(filepath);
if (file.exists()) {
processFile(file, bulkProcessor);
bulkProcessor.flush();
bulkProcessor.awaitClose(5, TimeUnit.MINUTES);
} else {
System.out.println(filepath + " doesn't exist!");
}
}
private void processFile(File file, BulkProcessor bulkProcessor) {
try {
if (file.isFile()) {
// 需要处理的是单个文件
System.out.println("@@indexing@@, [" + file.getCanonicalPath() + "]");
BufferedReader reader = new BufferedReader(new FileReader(file));
String record = null;
while ((record = reader.readLine()) != null) {
JSONObject json = JSONObject.parseObject(record);
bulkProcessor.add(upsert(json.getString("id"), record));
}
} else {
for (File f : file.listFiles()) {
processFile(f, bulkProcessor);
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 创建索引map
*/
protected void map() throws IOException {
PutMappingResponse response = client.admin().indices().preparePutMapping(indice)
.setType(type)
.setSource(
jsonBuilder()
.startObject()
.startObject("properties")
.startObject("id").field("type", "keyword").field("store", true).endObject()
.startObject("title").field("type", "text").field("store", true).endObject()
.startObject("authors").field("type", "nested")
.startObject("properties")
.startObject("name").field("type", "text").field("store", true).endObject()
.startObject("org").field("type", "text").endObject()
.endObject()
.endObject()
.startObject("venue").field("type", "text").endObject()
.startObject("year").field("type", "integer").field("store", true).endObject()
.startObject("keywords").field("type", "text").field("store", true).endObject()
.startObject("fos").field("type", "text").field("store", true)
.startObject("fields").startObject("raw").field("type", "keyword").endObject().endObject()
.endObject()
.startObject("n_citation").field("type", "integer").field("store", true).endObject()
.startObject("references").field("type", "keyword").endObject()
.startObject("page_stat").field("type", "integer").endObject()
.startObject("page_end").field("type", "integer").endObject()
.startObject("doc_type").field("type", "keyword").field("store", true).endObject()
.startObject("lang").field("type", "keyword").field("store", true).endObject()
.startObject("publisher").field("type", "text").endObject()
.startObject("volume").field("type", "text").endObject()
.startObject("issue").field("type", "text").endObject()
.startObject("issn").field("type", "keyword").endObject()
.startObject("isbn").field("type", "keyword").endObject()
.startObject("doi").field("type", "keyword").endObject()
.startObject("pdf").field("type", "keyword").endObject()
.startObject("url").field("type", "keyword").endObject()
.startObject("abstract").field("type", "text").endObject()
.endObject()
.endObject()).get();
System.out.println("@@put mapping@@, ack:" + response.isAcknowledged());
}
/**
* 检验索引是否存在
* 不存在才创建
*/
private void checkIndex(String indice, String type) throws IOException {
if (!isExistsIndex(indice)) {
createIndex(indice);
}
if (!isExistsType(indice, type)) {
map();
}
}
/**
* 给索引添加别名
*/
private boolean addAlias(String indice, String alias) {
IndicesAliasesResponse rsp = client.admin().indices().prepareAliases().addAlias(indice, alias).get();
return rsp.isAcknowledged();
}
/**
* 删除索引对应的别名
*/
private boolean removeAlias(String indice, String alias) {
IndicesAliasesResponse rsp = client.admin().indices().prepareAliases().removeAlias(indice, alias).get();
return rsp.isAcknowledged();
}
/**
* 删除索引
*/
private boolean deleteIndex(String indice) {
return client.admin().indices().prepareDelete(indice).get().isAcknowledged();
}
/**
* 判断指定的索引名是否存在
*/
private boolean isExistsIndex(String indexName) {
IndicesExistsResponse response =
client.admin().indices().exists(
new IndicesExistsRequest().indices(indexName)).actionGet();
return response.isExists();
}
/**
* 判断指定的索引的类型是否存在
*/
private boolean isExistsType(String indice, String type) {
TypesExistsResponse response =
client.admin().indices()
.typesExists(new TypesExistsRequest(new String[]{indice}, type))
.actionGet();
return response.isExists();
}
/**
* 创建索引
*/
private boolean createIndex(String indice) {
CreateIndexResponse rsp = client.admin().indices().prepareCreate(indice)
.setSettings(settings)
.get();
return rsp.isAcknowledged();
}
protected UpdateRequest upsert(String id, String source) {
return new UpdateRequest(indice, type, id).doc(source, XContentType.JSON).upsert(source, XContentType.JSON);
}
/**
* 获取 BulkProcessor
*/
protected static BulkProcessor getBulkProcessor() {
return BulkProcessor.builder(
client,
new BulkProcessor.Listener() {
@Override
public void beforeBulk(long executionId, BulkRequest request) {
}
@Override
public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
System.out.println("@@bulk success@@, execId:" + executionId + ", bulk size:" + response.getItems().length);
}
@Override
public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
System.out.println("@@bulk error@@, execId:" + executionId + ", failure:" + failure.getMessage());
}
})
.setBulkActions(Global.getEsBulkActions())
.setBulkSize(new ByteSizeValue(Global.getEsBulkSize(), ByteSizeUnit.MB))
.setFlushInterval(TimeValue.timeValueSeconds(5))
.setConcurrentRequests(5)
.setBackoffPolicy(
BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3))
.build();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy