com.caseystella.analytics.extractor.DataPointExtractor Maven / Gradle / Ivy
/**
* Copyright (C) 2016 Hurence ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.caseystella.analytics.extractor;
import com.caseystella.analytics.DataPoint;
import com.google.common.base.Joiner;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class DataPointExtractor implements Extractor {
DataPointExtractorConfig config = null;
public DataPointExtractor() {
}
public DataPointExtractor(DataPointExtractorConfig config) {
this.config = config;
}
public DataPointExtractor withConfig(DataPointExtractorConfig config) {
this.config = config;
return this;
}
@Override
public Iterable extract(byte[] key, byte[] value, boolean failOnMalformed) {
Map unionMap = new HashMap<>();
{
Map keyMap = config.getKeyConverter().convert(key, config.getKeyConverterConfig());
Map valueMap = config.getValueConverter().convert(value, config.getValueConverterConfig());
if (keyMap != null) {
unionMap.putAll(keyMap);
}
if (valueMap != null) {
unionMap.putAll(valueMap);
}
}
List ret = new ArrayList<>();
if(unionMap.size() > 0) {
for (DataPointExtractorConfig.Measurement measurement : config.getMeasurements()) {
try {
DataPoint dp = new DataPoint();
if (measurement.getSource() != null) {
dp.setSource(measurement.getSource());
} else {
List sources = new ArrayList<>();
for (String sourceField : measurement.getSourceFields()) {
sources.add(unionMap.get(sourceField).toString());
}
dp.setSource(Joiner.on(".").join(sources));
}
Object tsObj = unionMap.get(measurement.getTimestampField());
if (tsObj == null) {
throw new RuntimeException("Unable to find " + measurement.getTimestampField() + " in " + unionMap);
}
dp.setTimestamp(measurement.getTimestampConverter().convert(tsObj, measurement.getTimestampConverterConfig()));
Object measurementObj = unionMap.get(measurement.getMeasurementField());
if (measurementObj == null) {
throw new RuntimeException("Unable to find " + measurement.getMeasurementField() + " in " + unionMap);
}
dp.setValue(measurement.getMeasurementConverter().convert(measurementObj, measurement.getMeasurementConverterConfig()));
Map metadata = new HashMap<>();
if (measurement.getMetadataFields() != null && measurement.getMetadataFields().size() > 0) {
for (String field : measurement.getMetadataFields()) {
metadata.put( field
, unionMap.get(field)
.toString()
.replace(' ', '_')
.replace("&", "and")
.replace(",", "")
.replace("(", "")
.replace(")", "")
.replace("[", "")
.replace("]", "")
.replace("{", "")
.replace("}", "")
.replace("?", "")
.replace("\'", "")
.replace("\"", "")
.replace("/", "_")
);
if(metadata.get(field).length() == 0) {
metadata.remove(field);
}
}
} else {
for (Map.Entry kv : unionMap.entrySet()) {
if (!kv.getKey().equals(measurement.getMeasurementField()) && !kv.getKey().equals(measurement.getTimestampField())) {
metadata.put( kv.getKey()
, kv.getValue()
.toString()
.replace(' ', '_')
.replace("&", "and")
.replace(",", "")
.replace("(", "")
.replace(")", "")
.replace("[", "")
.replace("]", "")
.replace("{", "")
.replace("}", "")
.replace("?", "")
.replace("\'", "")
.replace("\"", "")
.replace("/", "_")
);
if(metadata.get(kv.getKey()).length() == 0) {
metadata.remove(kv.getKey());
}
}
}
}
dp.setMetadata(metadata);
ret.add(dp);
}
catch(RuntimeException re) {
if(failOnMalformed) {
throw re;
}
}
}
}
return ret;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy