All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.client.operations.ReduceSpec Maven / Gradle / Ivy

The newest version!
package tech.ytsaurus.client.operations;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;

import javax.annotation.Nullable;

import tech.ytsaurus.client.TransactionalClient;
import tech.ytsaurus.core.DataSize;
import tech.ytsaurus.core.cypress.YPath;
import tech.ytsaurus.lang.NonNullApi;
import tech.ytsaurus.lang.NonNullFields;
import tech.ytsaurus.ysontree.YTreeBuilder;


/**
 * Spec of the reduce operation.
 *
 * @see 
 * reduce documentation
 * 
 */
@NonNullApi
@NonNullFields
public class ReduceSpec extends SimpleUserOperationSpecBase implements Spec {
    private final UserJobSpec reducerSpec;
    private final List reduceBy;
    private final List joinBy;

    private final @Nullable
    JobIo jobIo;
    private final boolean enableKeyGuarantee;

    /**
     * Construct reduce spec from input and output tables, `reduceBy` list of columns
     * and command with other options set by defaults.
     */
    public ReduceSpec(
            List inputTables,
            List outputTables,
            List reduceBy,
            String command) {
        this(inputTables, outputTables, reduceBy, new CommandSpec(command));
    }

    /**
     * Construct reduce spec from input and output tables, `reduceBy` list of columns
     * and reducer with other options set by defaults.
     */
    public ReduceSpec(
            List inputTables,
            List outputTables,
            List reduceBy,
            Reducer reducer) {
        this(inputTables, outputTables, reduceBy, new ReducerSpec(reducer));
    }

    /**
     * Construct reduce spec from input and output tables, `reduceBy` list of columns
     * and reducer spec with other options set by defaults.
     */
    public ReduceSpec(
            List inputTables,
            List outputTables,
            List reduceBy,
            UserJobSpec reducerSpec) {
        this(
                null,
                null,
                inputTables,
                outputTables,
                reduceBy,
                reducerSpec
        );
    }

    public ReduceSpec(
            @Nullable Integer jobCount,
            @Nullable DataSize maxDataSizePerJob,
            List inputTables,
            List outputTables,
            List reduceBy,
            UserJobSpec reducerSpec) {
        this(
                builder()
                        .setJobCount(jobCount)
                        .setMaxDataSizePerJob(maxDataSizePerJob)
                        .setInputTables(inputTables)
                        .setOutputTables(outputTables)
                        .setReduceBy(reduceBy)
                        .setReducerSpec(reducerSpec)
        );
    }

    protected ReduceSpec(BuilderBase builder) {
        super(builder);
        if (builder.reducerSpec == null) {
            throw new RuntimeException("reducer is not set");
        }
        reducerSpec = builder.reducerSpec;

        if (builder.joinBy.isEmpty() && builder.reduceBy.isEmpty()) {
            throw new RuntimeException("Neither reduceBy nor joinBy is set");
        }
        reduceBy = builder.reduceBy;
        joinBy = builder.joinBy;

        if (reducerSpec instanceof MapperOrReducerSpec) {
            MapperOrReducerSpec mapperOrReducerSpec = (MapperOrReducerSpec) reducerSpec;
            jobIo = mapperOrReducerSpec.createJobIo(builder.jobIo);
            if (mapperOrReducerSpec.mapperOrReducer.outputType().getClass() == EntityTableEntryType.class) {
                var outputTableSchema = ((EntityTableEntryType) mapperOrReducerSpec
                        .mapperOrReducer.outputType()).getTableSchema();
                getOutputTables().replaceAll(yPath -> yPath.withSchema(outputTableSchema.toYTree()));
            }
        } else {
            jobIo = builder.jobIo;
        }
        enableKeyGuarantee = builder.enableKeyGuarantee;
    }

    /**
     * @see Builder#setJobIo(JobIo)
     */
    public Optional getJobIo() {
        return Optional.ofNullable(jobIo);
    }

    /**
     * @see Builder#setReduceBy(List)
     */
    public List getReduceBy() {
        return reduceBy;
    }

    /**
     * @see Builder#setReducerSpec(UserJobSpec)
     */
    public UserJobSpec getReducerSpec() {
        return reducerSpec;
    }

    /**
     * @see Builder#setJoinBy(List)
     */
    public List getJoinBy() {
        return joinBy;
    }

    /**
     * Create yson reduce spec to transfer to YT.
     */
    @Override
    public YTreeBuilder prepare(YTreeBuilder builder, TransactionalClient yt,
                                SpecPreparationContext specPreparationContext) {
        SpecUtils.createOutputTables(
                yt,
                specPreparationContext.getTransactionalOptions().orElse(null),
                getOutputTables(),
                getOutputTableAttributes()
        );

        var formatContext = FormatContext.builder()
                .setInputTableCount(getInputTables().size())
                .setOutputTableCount(getOutputTables().size())
                .build();
        return builder.beginMap()
                .apply(b -> SpecUtils.addMapperOrReducerTitle(b, reducerSpec))
                .key("reducer").apply(b -> reducerSpec.prepare(b, yt, specPreparationContext, formatContext))
                .key("reduce_by").value(reduceBy)
                .when(!joinBy.isEmpty(), b -> b.key("join_by").value(joinBy))
                .when(jobIo != null, b -> b.key("job_io").value(jobIo.prepare()))
                .when(!enableKeyGuarantee, b -> b.key("enable_key_guarantee").value(false))
                .apply(b -> dumpToSpec(b, specPreparationContext))
                .endMap();
    }

    /**
     * Construct empty builder for reduce spec.
     */
    public static BuilderBase builder() {
        return new Builder();
    }

    /**
     * Builder for {@link ReduceSpec}
     */
    protected static class Builder extends BuilderBase {
        @Override
        protected Builder self() {
            return this;
        }
    }

    /**
     * BuilderBase was taken out because there is another client
     * which we need to support too and which use the same ReduceSpec class.
     */
    @NonNullApi
    @NonNullFields
    public abstract static class BuilderBase> extends SimpleUserOperationSpecBase.Builder {
        @Nullable
        private UserJobSpec reducerSpec;
        @Nullable
        private JobIo jobIo;
        private List reduceBy = new ArrayList<>();
        private List joinBy = new ArrayList<>();
        private boolean enableKeyGuarantee = true;

        protected BuilderBase() {
        }

        /**
         * Construct {@link ReduceSpec} instance.
         */
        public ReduceSpec build() {
            return new ReduceSpec(this);
        }

        /**
         * Set reducer spec.
         *
         * @see ReducerSpec
         * @see CommandSpec
         */
        public T setReducerSpec(UserJobSpec reducerSpec) {
            this.reducerSpec = reducerSpec;
            return self();
        }

        /**
         * Set reducer command.
         * Create CommandSpec as user job spec from command with other options set by defaults.
         */
        public T setReducerCommand(String command) {
            return setReducerSpec(new CommandSpec(command));
        }

        /**
         * Set a list of columns by which reduce is carried out;
         */
        public T setReduceBy(List reduceBy) {
            this.reduceBy = new ArrayList<>(reduceBy);
            return self();
        }

        /**
         * Set a list of columns by which reduce is carried out;
         */
        public T setReduceBy(String... reduceBy) {
            return setReduceBy(Arrays.asList(reduceBy));
        }

        public T setJoinBy(List joinBy) {
            this.joinBy = new ArrayList<>(joinBy);
            return self();
        }

        public T setJoinBy(String... joinBy) {
            return setJoinBy(Arrays.asList(joinBy));
        }

        /**
         * Set job I/O options.
         *
         * @see JobIo
         */
        public T setJobIo(@Nullable JobIo jobIo) {
            this.jobIo = jobIo;
            return self();
        }

        /**
         * When this option is set to false and joinBy is specified,
         * Reduce operation behaves like JoinReduce.
         * 

* Documentation. */ public T setEnableKeyGuarantee(boolean enableKeyGuarantee) { this.enableKeyGuarantee = enableKeyGuarantee; return self(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy