All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.client.operations.SortSpec Maven / Gradle / Ivy

The newest version!
package tech.ytsaurus.client.operations;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;

import javax.annotation.Nullable;

import tech.ytsaurus.client.TransactionalClient;
import tech.ytsaurus.core.DataSize;
import tech.ytsaurus.core.cypress.YPath;
import tech.ytsaurus.core.tables.SortColumn;
import tech.ytsaurus.lang.NonNullApi;
import tech.ytsaurus.lang.NonNullFields;
import tech.ytsaurus.ysontree.YTreeBuilder;


/**
 * Immutable sort spec.
 *
 * @see 
 * sort documentation
 * 
 */
@NonNullApi
@NonNullFields
public class SortSpec extends SystemOperationSpecBase implements Spec {
    private final List sortBy;

    @Nullable
    private final Integer partitionCount;
    @Nullable
    private final Integer partitionJobCount;
    @Nullable
    private final DataSize dataSizePerSortJob;
    @Nullable
    private final DataSize dataSizePerSortedMergeJob;
    @Nullable
    private final JobIo mergeJobIo;

    /**
     * Create sort spec from input tables, output table and list of columns.
     */
    public SortSpec(
            List inputTables,
            YPath outputTable,
            List sortBy) {
        this(
                SortSpec.builder()
                        .setInputTables(inputTables)
                        .setOutputTable(outputTable)
                        .setSortBy(sortBy)
        );
    }

    protected > SortSpec(BuilderBase builder) {
        super(builder);
        if (builder.sortBy.isEmpty()) {
            throw new RuntimeException("sortBy is not specified");
        }
        sortBy = builder.sortBy;

        partitionCount = builder.partitionCount;
        partitionJobCount = builder.partitionJobCount;
        dataSizePerSortJob = builder.dataSizePerSortJob;
        dataSizePerSortedMergeJob = builder.dataSizePerSortedMergeJob;
        mergeJobIo = builder.mergeJobIo;
    }

    /**
     * @see Builder#setPartitionCount(Integer)
     */
    public Optional getPartitionCount() {
        return Optional.ofNullable(partitionCount);
    }

    /**
     * @see Builder#setPartitionJobCount(Integer)
     */
    public Optional getPartitionJobCount() {
        return Optional.ofNullable(partitionJobCount);
    }

    /**
     * @see Builder#setDataSizePerSortJob(DataSize)
     */
    public Optional getDataSizePerSortJob() {
        return Optional.ofNullable(dataSizePerSortJob);
    }

    /**
     * @see Builder#setDataSizePerSortedMergeJob(DataSize)
     */
    public Optional getDataSizePerSortedMergeJob() {
        return Optional.ofNullable(dataSizePerSortedMergeJob);
    }

    /**
     * @see Builder#setMergeJobIo(JobIo)
     */
    public Optional getMergeJobIo() {
        return Optional.ofNullable(mergeJobIo);
    }

    /**
     * @see Builder#setSortBy(String...)
     */
    public List getSortBy() {
        return sortBy.stream().map(SortColumn::getName).collect(Collectors.toList());
    }

    /**
     * @see Builder#setSortByColumns(List)
     */
    public List getSortByColumns() {
        return sortBy;
    }

    /**
     * Convert to yson.
     */
    @Override
    public YTreeBuilder prepare(YTreeBuilder builder, TransactionalClient yt,
                                SpecPreparationContext specPreparationContext) {
        SpecUtils.createOutputTables(
                yt,
                specPreparationContext.getTransactionalOptions().orElse(null),
                List.of(getOutputTable()),
                getOutputTableAttributes()
        );

        return builder.beginMap()
                .when(partitionCount != null, b -> b.key("partition_count").value(partitionCount))
                .when(partitionJobCount != null, b -> b.key("partition_job_count").value(partitionJobCount))
                .when(dataSizePerSortJob != null, b -> b.key("data_size_per_sort_job")
                        .value(Objects.requireNonNull(dataSizePerSortJob).toBytes()))
                .when(dataSizePerSortedMergeJob != null, b -> b.key("data_size_per_sorted_merge_job")
                        .value(Objects.requireNonNull(dataSizePerSortedMergeJob).toBytes()))
                .key("sort_by").value(sortBy, (b, t) -> t.toTree(b))
                .when(mergeJobIo != null,
                        b -> b.key("merge_job_io").value(Objects.requireNonNull(mergeJobIo).prepare()))
                .apply(b -> toTree(b, specPreparationContext))
                .endMap();
    }

    /**
     * Create empty builder.
     */
    public static BuilderBase builder() {
        return new Builder();
    }

    /**
     * Builder of {@link SortSpec}.
     */
    protected static class Builder extends BuilderBase {
        @Override
        protected Builder self() {
            return this;
        }
    }

    // BuilderBase was taken out because there is another client
    // which we need to support too and which use the same SortSpec class.
    @NonNullApi
    @NonNullFields
    public abstract static class BuilderBase> extends SystemOperationSpecBase.Builder {
        private List sortBy = new ArrayList<>();
        @Nullable
        private Integer partitionCount;
        @Nullable
        private Integer partitionJobCount;
        @Nullable
        private DataSize dataSizePerSortJob;
        @Nullable
        private DataSize dataSizePerSortedMergeJob;
        @Nullable
        private JobIo mergeJobIo;

        /**
         * Create instance of {@link SortSpec}.
         */
        public SortSpec build() {
            return new SortSpec(this);
        }

        /**
         * Set list of columns to sort by. It is required parameter.
         */
        public T setSortByColumns(List sortBy) {
            this.sortBy = new ArrayList<>(sortBy);
            return self();
        }

        /**
         * Set list of columns to sort by. It is required parameter.
         */
        public T setSortByColumns(SortColumn... sortBy) {
            return setSortByColumns(Arrays.asList(sortBy));
        }

        /**
         * Set list of columns to sort by. It is required parameter.
         */
        public T setSortBy(Collection sortBy) {
            return setSortByColumns(SortColumn.convert(sortBy));
        }

        /**
         * Set list of columns to sort by. It is required parameter.
         */
        public T setSortBy(String... sortBy) {
            return setSortBy(Arrays.asList(sortBy));
        }

        /**
         * Set how many partitions should be made in the sort. It is advisory.
         */
        public T setPartitionCount(@Nullable Integer partitionCount) {
            this.partitionCount = partitionCount;
            return self();
        }

        /**
         * Set how many partition jobs should be run. It is advisory.
         */
        public T setPartitionJobCount(@Nullable Integer partitionJobCount) {
            this.partitionJobCount = partitionJobCount;
            return self();
        }

        /**
         * Set recommended amount of input data for one sort job.
         */
        public T setDataSizePerSortJob(@Nullable DataSize dataSizePerSortJob) {
            this.dataSizePerSortJob = dataSizePerSortJob;
            return self();
        }

        /**
         * Set recommended amount of input data for one sorted merge job.
         */
        public T setDataSizePerSortedMergeJob(@Nullable DataSize dataSizePerSortedMergeJob) {
            this.dataSizePerSortedMergeJob = dataSizePerSortedMergeJob;
            return self();
        }

        /**
         * Set job I/O options.
         */
        public T setMergeJobIo(@Nullable JobIo mergeJobIo) {
            this.mergeJobIo = mergeJobIo;
            return self();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy