org.elasticsearch.plugin.nlpcn.HashJoinElasticExecutor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-sql Show documentation
Show all versions of elasticsearch-sql Show documentation
Query elasticsearch using SQL
package org.elasticsearch.plugin.nlpcn;
import com.alibaba.druid.sql.ast.statement.SQLJoinTableSource;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.nlpcn.es4sql.domain.Field;
import org.nlpcn.es4sql.domain.Select;
import org.nlpcn.es4sql.domain.Where;
import org.nlpcn.es4sql.exception.SqlParseException;
import org.nlpcn.es4sql.query.join.HashJoinElasticRequestBuilder;
import org.nlpcn.es4sql.query.join.TableInJoinRequestBuilder;
import org.nlpcn.es4sql.query.maker.QueryMaker;
import java.io.IOException;
import java.util.*;
/**
* Created by Eliran on 22/8/2015.
*/
public class HashJoinElasticExecutor extends ElasticJoinExecutor {
private HashJoinElasticRequestBuilder requestBuilder;
private Client client;
private boolean useQueryTermsFilterOptimization = false;
private final int MAX_RESULTS_FOR_FIRST_TABLE = 100000;
HashJoinComparisonStructure hashJoinComparisonStructure;
private Set alreadyMatched;
public HashJoinElasticExecutor(Client client, HashJoinElasticRequestBuilder requestBuilder) {
super(requestBuilder);
this.client = client;
this.requestBuilder = requestBuilder;
this.useQueryTermsFilterOptimization = requestBuilder.isUseTermFiltersOptimization();
this.hashJoinComparisonStructure = new HashJoinComparisonStructure(requestBuilder.getT1ToT2FieldsComparison());
this.alreadyMatched = new HashSet<>();
}
public List innerRun() throws IOException, SqlParseException {
Map>> optimizationTermsFilterStructure =
initOptimizationStructure();
updateFirstTableLimitIfNeeded();
TableInJoinRequestBuilder firstTableRequest = requestBuilder.getFirstTable();
createKeyToResultsAndFillOptimizationStructure(optimizationTermsFilterStructure, firstTableRequest);
TableInJoinRequestBuilder secondTableRequest = requestBuilder.getSecondTable();
if (needToOptimize(optimizationTermsFilterStructure)) {
updateRequestWithTermsFilter(optimizationTermsFilterStructure, secondTableRequest);
}
List combinedResult = createCombinedResults(secondTableRequest);
int currentNumOfResults = combinedResult.size();
int totalLimit = requestBuilder.getTotalLimit();
if (requestBuilder.getJoinType() == SQLJoinTableSource.JoinType.LEFT_OUTER_JOIN && currentNumOfResults < totalLimit) {
String t1Alias = requestBuilder.getFirstTable().getAlias();
String t2Alias = requestBuilder.getSecondTable().getAlias();
//todo: for each till Limit
addUnmatchedResults(combinedResult, this.hashJoinComparisonStructure.getAllSearchHits(),
requestBuilder.getSecondTable().getReturnedFields(),
currentNumOfResults, totalLimit,
t1Alias,
t2Alias);
}
if(firstTableRequest.getOriginalSelect().isOrderdSelect()){
Collections.sort(combinedResult,new Comparator() {
@Override
public int compare(InternalSearchHit o1, InternalSearchHit o2) {
return o1.docId() - o2.docId();
}
});
}
return combinedResult;
}
private Map>> initOptimizationStructure() {
Map>> optimizationTermsFilterStructure = new HashMap<>();
for(String comparisonId: this.hashJoinComparisonStructure.getComparisons().keySet()){
optimizationTermsFilterStructure.put(comparisonId,new HashMap>());
}
return optimizationTermsFilterStructure;
}
private void updateFirstTableLimitIfNeeded() {
if (requestBuilder.getJoinType() == SQLJoinTableSource.JoinType.LEFT_OUTER_JOIN) {
Integer firstTableHintLimit = requestBuilder.getFirstTable().getHintLimit();
int totalLimit = requestBuilder.getTotalLimit();
if (firstTableHintLimit == null || firstTableHintLimit > totalLimit) {
requestBuilder.getFirstTable().setHintLimit(totalLimit);
}
}
}
private List createCombinedResults( TableInJoinRequestBuilder secondTableRequest) {
List combinedResult = new ArrayList<>();
int resultIds = 0;
int totalLimit = this.requestBuilder.getTotalLimit();
Integer hintLimit = secondTableRequest.getHintLimit();
SearchResponse searchResponse;
boolean finishedScrolling;
if (hintLimit != null && hintLimit < MAX_RESULTS_ON_ONE_FETCH) {
searchResponse = secondTableRequest.getRequestBuilder().setSize(hintLimit).get();
finishedScrolling = true;
} else {
searchResponse = secondTableRequest.getRequestBuilder()
.setScroll(new TimeValue(60000))
.setSize(MAX_RESULTS_ON_ONE_FETCH).get();
//es5.0 no need to scroll again!
// searchResponse = client.prepareSearchScroll(searchResponse.getScrollId()).setScroll(new TimeValue(600000)).get();
finishedScrolling = false;
}
updateMetaSearchResults(searchResponse);
boolean limitReached = false;
int fetchedSoFarFromSecondTable = 0;
while (!limitReached) {
SearchHit[] secondTableHits = searchResponse.getHits().getHits();
fetchedSoFarFromSecondTable += secondTableHits.length;
for (SearchHit secondTableHit : secondTableHits) {
if (limitReached) break;
//todo: need to run on comparisons. for each comparison check if exists and add.
HashMap>> comparisons = this.hashJoinComparisonStructure.getComparisons();
for (Map.Entry>> comparison : comparisons.entrySet()) {
String comparisonID = comparison.getKey();
List> t1ToT2FieldsComparison = comparison.getValue();
String key = getComparisonKey(t1ToT2FieldsComparison, secondTableHit, false, null);
SearchHitsResult searchHitsResult = this.hashJoinComparisonStructure.searchForMatchingSearchHits(comparisonID, key);
if (searchHitsResult != null && searchHitsResult.getSearchHits().size() > 0) {
searchHitsResult.setMatchedWithOtherTable(true);
List searchHits = searchHitsResult.getSearchHits();
for (InternalSearchHit matchingHit : searchHits) {
String combinedId = matchingHit.id() + "|" + secondTableHit.getId();
//in order to prevent same matching when using OR on hashJoins.
if(this.alreadyMatched.contains(combinedId)){
continue;
}
else {
this.alreadyMatched.add(combinedId);
}
Map copiedSource = new HashMap();
copyMaps(copiedSource,secondTableHit.sourceAsMap());
onlyReturnedFields(copiedSource, secondTableRequest.getReturnedFields(),secondTableRequest.getOriginalSelect().isSelectAll());
InternalSearchHit searchHit = new InternalSearchHit(matchingHit.docId(), combinedId, new Text(matchingHit.getType() + "|" + secondTableHit.getType()), matchingHit.getFields());
searchHit.sourceRef(matchingHit.getSourceRef());
searchHit.sourceAsMap().clear();
searchHit.sourceAsMap().putAll(matchingHit.sourceAsMap());
String t1Alias = requestBuilder.getFirstTable().getAlias();
String t2Alias = requestBuilder.getSecondTable().getAlias();
mergeSourceAndAddAliases(copiedSource, searchHit, t1Alias, t2Alias);
combinedResult.add(searchHit);
resultIds++;
if (resultIds >= totalLimit) {
limitReached = true;
break;
}
}
}
}
}
if (!finishedScrolling) {
if (secondTableHits.length > 0 && (hintLimit == null || fetchedSoFarFromSecondTable >= hintLimit)) {
searchResponse = client.prepareSearchScroll(searchResponse.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
} else break;
} else {
break;
}
}
return combinedResult;
}
private void copyMaps(Map into, Map from) {
for(Map.Entry keyAndValue : from.entrySet())
into.put(keyAndValue.getKey(),keyAndValue.getValue());
}
private void createKeyToResultsAndFillOptimizationStructure(Map>> optimizationTermsFilterStructure, TableInJoinRequestBuilder firstTableRequest) {
List firstTableHits = fetchAllHits(firstTableRequest);
int resultIds = 1;
for (SearchHit hit : firstTableHits) {
HashMap>> comparisons = this.hashJoinComparisonStructure.getComparisons();
for (Map.Entry>> comparison : comparisons.entrySet()) {
String comparisonID = comparison.getKey();
List> t1ToT2FieldsComparison = comparison.getValue();
String key = getComparisonKey(t1ToT2FieldsComparison, hit, true, optimizationTermsFilterStructure.get(comparisonID));
//int docid , id
InternalSearchHit searchHit = new InternalSearchHit(resultIds, hit.id(), new Text(hit.getType()), hit.getFields());
searchHit.sourceRef(hit.getSourceRef());
onlyReturnedFields(searchHit.sourceAsMap(), firstTableRequest.getReturnedFields(),firstTableRequest.getOriginalSelect().isSelectAll());
resultIds++;
this.hashJoinComparisonStructure.insertIntoComparisonHash(comparisonID, key, searchHit);
}
}
}
private List fetchAllHits(TableInJoinRequestBuilder tableInJoinRequest) {
Integer hintLimit = tableInJoinRequest.getHintLimit();
SearchRequestBuilder requestBuilder = tableInJoinRequest.getRequestBuilder();
if (hintLimit != null && hintLimit < MAX_RESULTS_ON_ONE_FETCH) {
requestBuilder.setSize(hintLimit);
SearchResponse searchResponse = requestBuilder.get();
updateMetaSearchResults(searchResponse);
return Arrays.asList(searchResponse.getHits().getHits());
}
return scrollTillLimit(tableInJoinRequest, hintLimit);
}
private List scrollTillLimit(TableInJoinRequestBuilder tableInJoinRequest, Integer hintLimit) {
SearchResponse scrollResp = scrollOneTimeWithMax(client,tableInJoinRequest);
updateMetaSearchResults(scrollResp);
List hitsWithScan = new ArrayList<>();
int curentNumOfResults = 0;
SearchHit[] hits = scrollResp.getHits().hits();
if (hintLimit == null) hintLimit = MAX_RESULTS_FOR_FIRST_TABLE;
while (hits.length != 0 && curentNumOfResults < hintLimit) {
curentNumOfResults += hits.length;
Collections.addAll(hitsWithScan, hits);
if (curentNumOfResults >= MAX_RESULTS_FOR_FIRST_TABLE) {
//todo: log or exception?
System.out.println("too many results for first table, stoping at:" + curentNumOfResults);
break;
}
scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
hits = scrollResp.getHits().getHits();
}
return hitsWithScan;
}
private boolean needToOptimize(Map>> optimizationTermsFilterStructure) {
if(! useQueryTermsFilterOptimization && optimizationTermsFilterStructure != null && optimizationTermsFilterStructure.size() > 0)
return false;
boolean allEmpty = true;
for(Map> optimization : optimizationTermsFilterStructure.values()){
if(optimization.size() > 0){
allEmpty = false;
break;
}
}
return !allEmpty;
}
private void updateRequestWithTermsFilter(Map>> optimizationTermsFilterStructure, TableInJoinRequestBuilder secondTableRequest) throws SqlParseException {
Select select = secondTableRequest.getOriginalSelect();
BoolQueryBuilder orQuery = QueryBuilders.boolQuery();
for(Map> optimization : optimizationTermsFilterStructure.values()) {
BoolQueryBuilder andQuery = QueryBuilders.boolQuery();
for (Map.Entry> keyToValues : optimization.entrySet()) {
String fieldName = keyToValues.getKey();
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy