Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.elasticsearch.plugin.nlpcn.executors.CSVResultsExtractor Maven / Gradle / Ivy
package org.elasticsearch.plugin.nlpcn.executors;
import com.google.common.base.Joiner;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregation;
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
import org.elasticsearch.search.aggregations.metrics.geobounds.GeoBounds;
import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles;
import org.elasticsearch.search.aggregations.metrics.stats.Stats;
import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStats;
import org.elasticsearch.search.aggregations.metrics.tophits.TopHits;
import org.nlpcn.es4sql.Util;
import java.util.*;
/**
* Created by Eliran on 27/12/2015.
*/
public class CSVResultsExtractor {
private final boolean includeType;
private final boolean includeScore;
private final boolean indcludeId;
private int currentLineIndex;
public CSVResultsExtractor(boolean includeScore, boolean includeType, boolean includeId) {
this.includeScore = includeScore;
this.includeType = includeType;
this.indcludeId = includeId;
this.currentLineIndex = 0;
}
public CSVResult extractResults(Object queryResult, boolean flat, String separator) throws CsvExtractorException {
if(queryResult instanceof SearchHits){
SearchHit[] hits = ((SearchHits) queryResult).getHits();
List> docsAsMap = new ArrayList<>();
List headers = createHeadersAndFillDocsMap(flat, hits, docsAsMap);
List csvLines = createCSVLinesFromDocs(flat, separator, docsAsMap, headers);
return new CSVResult(headers,csvLines);
}
if(queryResult instanceof Aggregations){
List headers = new ArrayList<>();
List> lines = new ArrayList<>();
lines.add(new ArrayList());
handleAggregations((Aggregations) queryResult, headers, lines);
List csvLines = new ArrayList<>();
for(List simpleLine : lines){
csvLines.add(Joiner.on(separator).join(simpleLine));
}
//todo: need to handle more options for aggregations:
//Aggregations that inhrit from base
//ScriptedMetric
return new CSVResult(headers,csvLines);
}
return null;
}
private void handleAggregations(Aggregations aggregations, List headers, List> lines) throws CsvExtractorException {
if(allNumericAggregations(aggregations)){
lines.get(this.currentLineIndex).addAll(fillHeaderAndCreateLineForNumericAggregations(aggregations, headers));
return;
}
//aggregations with size one only supported when not metrics.
List aggregationList = aggregations.asList();
if(aggregationList.size() > 1){
throw new CsvExtractorException("currently support only one aggregation at same level (Except for numeric metrics)");
}
Aggregation aggregation = aggregationList.get(0);
//we want to skip singleBucketAggregations (nested,reverse_nested,filters)
if(aggregation instanceof SingleBucketAggregation){
Aggregations singleBucketAggs = ((SingleBucketAggregation) aggregation).getAggregations();
handleAggregations(singleBucketAggs, headers, lines);
return;
}
if(aggregation instanceof NumericMetricsAggregation){
handleNumericMetricAggregation(headers, lines.get(currentLineIndex), aggregation);
return;
}
if(aggregation instanceof GeoBounds){
handleGeoBoundsAggregation(headers, lines, (GeoBounds) aggregation);
return;
}
if(aggregation instanceof TopHits){
//todo: handle this . it returns hits... maby back to normal?
//todo: read about this usages
// TopHits topHitsAggregation = (TopHits) aggregation;
}
if(aggregation instanceof MultiBucketsAggregation){
MultiBucketsAggregation bucketsAggregation = (MultiBucketsAggregation) aggregation;
String name = bucketsAggregation.getName();
//checking because it can comes from sub aggregation again
if(!headers.contains(name)){
headers.add(name);
}
Collection extends MultiBucketsAggregation.Bucket> buckets = bucketsAggregation.getBuckets();
//clone current line.
List currentLine = lines.get(this.currentLineIndex);
List clonedLine = new ArrayList<>(currentLine);
//call handle_Agg with current_line++
boolean firstLine = true;
for (MultiBucketsAggregation.Bucket bucket : buckets) {
//each bucket need to add new line with current line copied => except for first line
String key = bucket.getKeyAsString();
if(firstLine){
firstLine = false;
}
else {
currentLineIndex++;
currentLine = new ArrayList(clonedLine);
lines.add(currentLine);
}
currentLine.add(key);
handleAggregations(bucket.getAggregations(),headers,lines);
}
}
}
private void handleGeoBoundsAggregation(List headers, List> lines, GeoBounds geoBoundsAggregation) {
String geoBoundAggName = geoBoundsAggregation.getName();
headers.add(geoBoundAggName+".topLeft.lon");
headers.add(geoBoundAggName+".topLeft.lat");
headers.add(geoBoundAggName+".bottomRight.lon");
headers.add(geoBoundAggName+".bottomRight.lat");
List line = lines.get(this.currentLineIndex);
line.add(String.valueOf(geoBoundsAggregation.topLeft().getLon()));
line.add(String.valueOf(geoBoundsAggregation.topLeft().getLat()));
line.add(String.valueOf(geoBoundsAggregation.bottomRight().getLon()));
line.add(String.valueOf(geoBoundsAggregation.bottomRight().getLat()));
lines.add(line);
}
private List fillHeaderAndCreateLineForNumericAggregations(Aggregations aggregations, List header) throws CsvExtractorException {
List line = new ArrayList<>();
List aggregationList = aggregations.asList();
for(Aggregation aggregation : aggregationList){
handleNumericMetricAggregation(header, line, aggregation);
}
return line;
}
private void handleNumericMetricAggregation(List header, List line, Aggregation aggregation) throws CsvExtractorException {
String name = aggregation.getName();
if(aggregation instanceof NumericMetricsAggregation.SingleValue){
if(!header.contains(name)){
header.add(name);
}
NumericMetricsAggregation.SingleValue agg = (NumericMetricsAggregation.SingleValue) aggregation;
line.add(!Double.isInfinite(agg.value()) ? agg.getValueAsString() : "null");
}
//todo:Numeric MultiValue - Stats,ExtendedStats,Percentile...
else if(aggregation instanceof NumericMetricsAggregation.MultiValue){
if(aggregation instanceof Stats) {
String[] statsHeaders = new String[]{"count", "sum", "avg", "min", "max"};
boolean isExtendedStats = aggregation instanceof ExtendedStats;
if(isExtendedStats){
String[] extendedHeaders = new String[]{"sumOfSquares", "variance", "stdDeviation"};
statsHeaders = Util.concatStringsArrays(statsHeaders,extendedHeaders);
}
mergeHeadersWithPrefix(header, name, statsHeaders);
Stats stats = (Stats) aggregation;
line.add(String.valueOf(stats.getCount()));
line.add(stats.getSumAsString());
line.add(stats.getAvgAsString());
line.add(stats.getMinAsString());
line.add(stats.getMaxAsString());
if(isExtendedStats){
ExtendedStats extendedStats = (ExtendedStats) aggregation;
line.add(extendedStats.getSumOfSquaresAsString());
line.add(extendedStats.getVarianceAsString());
line.add(extendedStats.getStdDeviationAsString());
}
}
else if( aggregation instanceof Percentiles){
String[] percentileHeaders = new String[]{"1.0", "5.0", "25.0", "50.0", "75.0", "95.0", "99.0"};
mergeHeadersWithPrefix(header, name, percentileHeaders);
Percentiles percentiles = (Percentiles) aggregation;
line.add(percentiles.percentileAsString(1.0));
line.add(percentiles.percentileAsString(5.0));
line.add(percentiles.percentileAsString(25.0));
line.add(percentiles.percentileAsString(50.0));
line.add(percentiles.percentileAsString(75));
line.add(percentiles.percentileAsString(95.0));
line.add(percentiles.percentileAsString(99.0));
}
else {
throw new CsvExtractorException("unknown NumericMetricsAggregation.MultiValue:" + aggregation.getClass());
}
}
else {
throw new CsvExtractorException("unknown NumericMetricsAggregation" + aggregation.getClass());
}
}
private void mergeHeadersWithPrefix(List header, String prefix, String[] newHeaders) {
for (int i = 0; i < newHeaders.length; i++) {
String newHeader = newHeaders[i];
if(prefix != null && !prefix.equals("")) {
newHeader = prefix + "." + newHeader;
}
if (!header.contains(newHeader)) {
header.add(newHeader);
}
}
}
private boolean allNumericAggregations(Aggregations aggregations) {
List aggregationList = aggregations.asList();
for(Aggregation aggregation : aggregationList){
if(!(aggregation instanceof NumericMetricsAggregation)){
return false;
}
}
return true;
}
private Aggregation skipAggregations(Aggregation firstAggregation) {
while(firstAggregation instanceof SingleBucketAggregation){
firstAggregation = getFirstAggregation(((SingleBucketAggregation) firstAggregation).getAggregations());
}
return firstAggregation;
}
private Aggregation getFirstAggregation(Aggregations aggregations){
return aggregations.asList().get(0);
}
private List createCSVLinesFromDocs(boolean flat, String separator, List> docsAsMap, List headers) {
List csvLines = new ArrayList<>();
for(Map doc : docsAsMap){
String line = "";
for(String header : headers){
line += findFieldValue(header, doc, flat, separator);
}
csvLines.add(line.substring(0, line.lastIndexOf(separator)));
}
return csvLines;
}
private List createHeadersAndFillDocsMap(boolean flat, SearchHit[] hits, List> docsAsMap) {
Set csvHeaders = new HashSet<>();
for(SearchHit hit : hits){
Map doc = hit.getSourceAsMap();
Map fields = hit.getFields();
for(DocumentField searchHitField : fields.values()){
doc.put(searchHitField.getName(),searchHitField.getValue());
}
mergeHeaders(csvHeaders, doc, flat);
if(this.indcludeId){
doc.put("_id", hit.getId());
}
if(this.includeScore){
doc.put("_score", hit.getScore());
}
if(this.includeType){
doc.put("_type",hit.getType());
}
docsAsMap.add(doc);
}
ArrayList headersList = new ArrayList<>(csvHeaders);
if (this.indcludeId){
headersList.add("_id");
}
if (this.includeScore){
headersList.add("_score");
}
if (this.includeType){
headersList.add("_type");
}
return headersList;
}
private String findFieldValue(String header, Map doc, boolean flat, String separator) {
if(flat && header.contains(".")){
String[] split = header.split("\\.");
Object innerDoc = doc;
for(String innerField : split){
if(!(innerDoc instanceof Map)){
return separator;
}
innerDoc = ((Map)innerDoc).get(innerField);
if(innerDoc == null){
return separator;
}
}
return innerDoc.toString() + separator;
}
else {
if(doc.containsKey(header)){
return String.valueOf(doc.get(header)) + separator;
}
}
return separator;
}
private void mergeHeaders(Set headers, Map doc, boolean flat) {
if (!flat) {
headers.addAll(doc.keySet());
return;
}
mergeFieldNamesRecursive(headers, doc, "");
}
private void mergeFieldNamesRecursive(Set headers, Map doc, String prefix) {
for(Map.Entry field : doc.entrySet()){
Object value = field.getValue();
if(value instanceof Map){
mergeFieldNamesRecursive(headers,(Map) value,prefix+field.getKey()+".");
}
else {
headers.add(prefix+field.getKey());
}
}
}
}