uk.gov.gchq.gaffer.operation.impl.SampleElementsForSplitPoints Maven / Gradle / Ivy
/*
* Copyright 2016-2020 Crown Copyright
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.gov.gchq.gaffer.operation.impl;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import com.fasterxml.jackson.core.type.TypeReference;
import uk.gov.gchq.gaffer.data.element.Element;
import uk.gov.gchq.gaffer.operation.Operation;
import uk.gov.gchq.gaffer.operation.io.InputOutput;
import uk.gov.gchq.gaffer.operation.io.MultiInput;
import uk.gov.gchq.gaffer.operation.serialisation.TypeReferenceImpl;
import uk.gov.gchq.koryphe.Since;
import uk.gov.gchq.koryphe.Summary;
import uk.gov.gchq.koryphe.ValidationResult;
import java.util.List;
import java.util.Map;
/**
*
* The {@code SampleElementsForSplitPoints} operation is for sampling an {@link Iterable}
* of {@link Element}s and generating split points. The operation returns a
* {@link List} of the split points. The split points are normally UTF strings
* but this may differ for different Stores.
*
*
* By default, all elements in the iterable will used to generate splits.
* If you wish to only sample the iterable you can set the proportionToSample
* field to a value between 0 and 1.
*
*
* You can manually set the number of splits using the numSplits field. If
* you don't set it then the Gaffer Store should calculate a number of splits
* for you.
*
*
* If you want to only use the first few elements in the iterable then you
* can chain this operation after a {@link Limit} Operation.
*
*
* Depending on the Store you run this operation against there may be a limit
* to the number of elements you are allowed to include in the sample.
*
*
* @param the type of splits
* @see SampleElementsForSplitPoints.Builder
*/
@JsonPropertyOrder(value = {"class", "input"}, alphabetic = true)
@Since("1.1.1")
@Summary("Samples an iterable of elements and generates split points")
public class SampleElementsForSplitPoints implements
Operation,
InputOutput, List>,
MultiInput {
private Iterable extends Element> input;
private Integer numSplits;
private float proportionToSample = 1f;
private Map options;
@Override
public ValidationResult validate() {
final ValidationResult result = InputOutput.super.validate();
if (null != numSplits && numSplits < 1) {
result.addError("numSplits must be null or greater than 0");
}
if (proportionToSample > 1 || proportionToSample < 0) {
result.addError("proportionToSample must within range: [0, 1]");
}
return result;
}
@Override
public SampleElementsForSplitPoints shallowClone() {
return new SampleElementsForSplitPoints.Builder()
.input(input)
.numSplits(numSplits)
.proportionToSample(proportionToSample)
.options(options)
.build();
}
@Override
public Iterable extends Element> getInput() {
return input;
}
@Override
public void setInput(final Iterable extends Element> input) {
this.input = input;
}
@Override
public Map getOptions() {
return options;
}
@Override
public void setOptions(final Map options) {
this.options = options;
}
public Integer getNumSplits() {
return numSplits;
}
public void setNumSplits(final Integer numSplits) {
this.numSplits = numSplits;
}
public float getProportionToSample() {
return proportionToSample;
}
public void setProportionToSample(final float proportionToSample) {
this.proportionToSample = proportionToSample;
}
@Override
public TypeReference> getOutputTypeReference() {
return new TypeReferenceImpl.List<>();
}
public static class Builder extends Operation.BaseBuilder, Builder>
implements InputOutput.Builder, Iterable extends Element>, List, Builder>,
MultiInput.Builder, Element, Builder> {
public Builder() {
super(new SampleElementsForSplitPoints<>());
}
public Builder numSplits(final Integer numSplits) {
_getOp().setNumSplits(numSplits);
return this;
}
public Builder proportionToSample(final float proportionToSample) {
_getOp().setProportionToSample(proportionToSample);
return _self();
}
}
}