All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.gov.gchq.gaffer.operation.impl.SampleElementsForSplitPoints Maven / Gradle / Ivy

There is a newer version: 2.3.1
Show newest version
/*
 * Copyright 2016-2020 Crown Copyright
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package uk.gov.gchq.gaffer.operation.impl;

import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import com.fasterxml.jackson.core.type.TypeReference;

import uk.gov.gchq.gaffer.data.element.Element;
import uk.gov.gchq.gaffer.operation.Operation;
import uk.gov.gchq.gaffer.operation.io.InputOutput;
import uk.gov.gchq.gaffer.operation.io.MultiInput;
import uk.gov.gchq.gaffer.operation.serialisation.TypeReferenceImpl;
import uk.gov.gchq.koryphe.Since;
import uk.gov.gchq.koryphe.Summary;
import uk.gov.gchq.koryphe.ValidationResult;

import java.util.List;
import java.util.Map;


/**
 * 

* The {@code SampleElementsForSplitPoints} operation is for sampling an {@link Iterable} * of {@link Element}s and generating split points. The operation returns a * {@link List} of the split points. The split points are normally UTF strings * but this may differ for different Stores. *

*

* By default, all elements in the iterable will used to generate splits. * If you wish to only sample the iterable you can set the proportionToSample * field to a value between 0 and 1. *

*

* You can manually set the number of splits using the numSplits field. If * you don't set it then the Gaffer Store should calculate a number of splits * for you. *

*

* If you want to only use the first few elements in the iterable then you * can chain this operation after a {@link Limit} Operation. *

*

* Depending on the Store you run this operation against there may be a limit * to the number of elements you are allowed to include in the sample. *

* * @param the type of splits * @see SampleElementsForSplitPoints.Builder */ @JsonPropertyOrder(value = {"class", "input"}, alphabetic = true) @Since("1.1.1") @Summary("Samples an iterable of elements and generates split points") public class SampleElementsForSplitPoints implements Operation, InputOutput, List>, MultiInput { private Iterable input; private Integer numSplits; private float proportionToSample = 1f; private Map options; @Override public ValidationResult validate() { final ValidationResult result = InputOutput.super.validate(); if (null != numSplits && numSplits < 1) { result.addError("numSplits must be null or greater than 0"); } if (proportionToSample > 1 || proportionToSample < 0) { result.addError("proportionToSample must within range: [0, 1]"); } return result; } @Override public SampleElementsForSplitPoints shallowClone() { return new SampleElementsForSplitPoints.Builder() .input(input) .numSplits(numSplits) .proportionToSample(proportionToSample) .options(options) .build(); } @Override public Iterable getInput() { return input; } @Override public void setInput(final Iterable input) { this.input = input; } @Override public Map getOptions() { return options; } @Override public void setOptions(final Map options) { this.options = options; } public Integer getNumSplits() { return numSplits; } public void setNumSplits(final Integer numSplits) { this.numSplits = numSplits; } public float getProportionToSample() { return proportionToSample; } public void setProportionToSample(final float proportionToSample) { this.proportionToSample = proportionToSample; } @Override public TypeReference> getOutputTypeReference() { return new TypeReferenceImpl.List<>(); } public static class Builder extends Operation.BaseBuilder, Builder> implements InputOutput.Builder, Iterable, List, Builder>, MultiInput.Builder, Element, Builder> { public Builder() { super(new SampleElementsForSplitPoints<>()); } public Builder numSplits(final Integer numSplits) { _getOp().setNumSplits(numSplits); return this; } public Builder proportionToSample(final float proportionToSample) { _getOp().setProportionToSample(proportionToSample); return _self(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy