All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.locationtech.geowave.examples.stats.WordCountStatistic Maven / Gradle / Ivy

The newest version!
/**
 * Copyright (c) 2013-2022 Contributors to the Eclipse Foundation
 *
 * 

See the NOTICE file distributed with this work for additional information regarding copyright * ownership. All rights reserved. This program and the accompanying materials are made available * under the terms of the Apache License, Version 2.0 which accompanies this distribution and is * available at http://www.apache.org/licenses/LICENSE-2.0.txt */ package org.locationtech.geowave.examples.stats; import java.nio.ByteBuffer; import org.locationtech.geowave.core.index.Mergeable; import org.locationtech.geowave.core.index.VarintUtils; import org.locationtech.geowave.core.store.api.DataTypeAdapter; import org.locationtech.geowave.core.store.api.FieldStatistic; import org.locationtech.geowave.core.store.api.StatisticValue; import org.locationtech.geowave.core.store.entities.GeoWaveRow; import org.locationtech.geowave.core.store.statistics.StatisticsDeleteCallback; import org.locationtech.geowave.core.store.statistics.StatisticsIngestCallback; import org.locationtech.geowave.core.store.statistics.field.FieldStatisticType; import com.beust.jcommander.Parameter; public class WordCountStatistic extends FieldStatistic { public static final FieldStatisticType STATS_TYPE = new FieldStatisticType<>("WORD_COUNT"); private static final String WHITESPACE_REGEX = "\\s+"; /** * Statistics support JCommander parameters so that they can be configured when adding the * statistic via the CLI. In this case, the minimum word length for the statistic would be * configurable via the `--minWordLength ` option when adding this statistic. */ @Parameter( names = "--minWordLength", required = true, description = "The minimum word length to count.") private int minWordLength = 0; public WordCountStatistic() { super(STATS_TYPE); } public WordCountStatistic(final String typeName, final String fieldName) { super(STATS_TYPE, typeName, fieldName); } /** * Add a programmatic setter for min word length. */ public void setMinWordLength(final int length) { this.minWordLength = length; } /** * Provides a description of the statistic that will be displayed in the CLI when describing * available statistics. */ @Override public String getDescription() { return "Provides a count of all words of a string field."; } /** * Returns `true` for every class this statistic is compatible with. In our case, only `String` * types will be supported since we are doing a word count. */ @Override public boolean isCompatibleWith(final Class fieldClass) { return String.class.isAssignableFrom(fieldClass); } /** * Constructs an empty statistic value for this statistic. The state of the value should be as if * no entries have been ingested. */ @Override public WordCountValue createEmpty() { return new WordCountValue(this); } /** * The `byteLength`, `writeBytes`, and `readBytes` functions only need to be overriden if you are * adding additional configuration parameters or need to store additional information needed for * the statistic to function properly. In this example, we have added a minimum word length * parameter, so we need to store that when the statistic is serialized and deserialized. */ @Override protected int byteLength() { return super.byteLength() + Integer.BYTES; } @Override protected void writeBytes(ByteBuffer buffer) { super.writeBytes(buffer); buffer.putInt(minWordLength); } @Override protected void readBytes(ByteBuffer buffer) { super.readBytes(buffer); minWordLength = buffer.getInt(); } /** * Every statistic has a corresponding statistic value. This class is responsible for determining * what happens when entries are ingested or deleted, as well as when two values need to be * merged. If a value can be updated on ingest, `StatisticsIngestCallback` should be implemented. * If the value can be updated on delete, `StatisticsDeleteCallback` should be implemented. Some * statistics, such as bounding box statistics cannot be updated on delete because there isn't * enough information to know if the bounding box should shrink when an entry is deleted. In that * case, only the ingest callback would be implemented. */ public static class WordCountValue extends StatisticValue implements StatisticsIngestCallback, StatisticsDeleteCallback { private long count = 0; public WordCountValue() { this(null); } private WordCountValue(final WordCountStatistic statistic) { super(statistic); } public long getCount() { return count; } /** * Merge this value with another. */ @Override public void merge(final Mergeable merge) { if ((merge != null) && (merge instanceof WordCountValue)) { final WordCountValue other = (WordCountValue) merge; count += other.count; } } /** * Get the field value from the adapter, and if it's not null, count the number of words that * exceed the minimum length and add it to the total. */ @Override public void entryIngested( final DataTypeAdapter adapter, final T entry, final GeoWaveRow... rows) { final WordCountStatistic stat = ((WordCountStatistic) getStatistic()); final Object o = adapter.getFieldValue(entry, stat.getFieldName()); if (o == null) { return; } final String str = (String) o; final String[] split = str.split(WHITESPACE_REGEX); for (String word : split) { if (word.length() >= stat.minWordLength) { count++; } } } /** * Get the field value from the adapter, and if it's not null, count the number of words that * exceed the minimum length and subtract it from the total. */ @Override public void entryDeleted( final DataTypeAdapter adapter, final T entry, final GeoWaveRow... rows) { final WordCountStatistic stat = ((WordCountStatistic) getStatistic()); final Object o = adapter.getFieldValue(entry, stat.getFieldName()); if (o == null) { return; } final String str = (String) o; final String[] split = str.split(WHITESPACE_REGEX); for (String word : split) { if (word.length() >= stat.minWordLength) { count++; } } } /** * Return the actual value of the statistic. */ @Override public Long getValue() { return getCount(); } /** * Serialize the statistic value to binary. */ @Override public byte[] toBinary() { final ByteBuffer buffer = ByteBuffer.allocate(VarintUtils.unsignedLongByteLength(count)); VarintUtils.writeUnsignedLong(count, buffer); return buffer.array(); } /** * Deserialize the statistic value from binary. */ @Override public void fromBinary(final byte[] bytes) { final ByteBuffer buffer = ByteBuffer.wrap(bytes); count = VarintUtils.readUnsignedLong(buffer); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy