All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.spark.execution.PrestoSparkDiskPageInput Maven / Gradle / Ivy

There is a newer version: 0.290
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.spark.execution;

import com.facebook.airlift.log.Logger;
import com.facebook.presto.common.Page;
import com.facebook.presto.execution.StageId;
import com.facebook.presto.spark.classloader_interface.PrestoSparkStorageHandle;
import com.facebook.presto.spark.classloader_interface.PrestoSparkTaskOutput;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.page.PagesSerde;
import com.facebook.presto.spi.page.SerializedPage;
import com.facebook.presto.spi.plan.PlanNodeId;
import com.facebook.presto.spi.storage.TempDataOperationContext;
import com.facebook.presto.spi.storage.TempStorage;
import com.facebook.presto.spi.storage.TempStorageHandle;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.InputStreamSliceInput;

import javax.annotation.concurrent.GuardedBy;

import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.zip.CRC32;

import static com.facebook.presto.spark.SparkErrorCode.STORAGE_ERROR;
import static com.facebook.presto.spi.page.PagesSerdeUtil.readSerializedPages;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.Collections.shuffle;
import static java.util.Objects.requireNonNull;

public class PrestoSparkDiskPageInput
        implements PrestoSparkPageInput
{
    private static final Logger log = Logger.get(PrestoSparkDiskPageInput.class);

    private final PagesSerde pagesSerde;
    private final TempStorage tempStorage;
    private final TempDataOperationContext tempDataOperationContext;
    private final PrestoSparkBroadcastTableCacheManager prestoSparkBroadcastTableCacheManager;
    private final StageId stageId;
    private final PlanNodeId planNodeId;
    private final List> broadcastTableFilesInfo;

    @GuardedBy("this")
    private List> pageIterators;
    @GuardedBy("this")
    private int currentIteratorIndex;

    public PrestoSparkDiskPageInput(
            PagesSerde pagesSerde,
            TempStorage tempStorage,
            TempDataOperationContext tempDataOperationContext,
            PrestoSparkBroadcastTableCacheManager prestoSparkBroadcastTableCacheManager,
            StageId stageId,
            PlanNodeId planNodeId,
            List> broadcastTableFilesInfo)
    {
        this.pagesSerde = requireNonNull(pagesSerde, "pagesSerde is null");
        this.tempStorage = requireNonNull(tempStorage, "tempStorage is null");
        this.tempDataOperationContext = requireNonNull(tempDataOperationContext, "tempDataOperationContext is null");
        this.prestoSparkBroadcastTableCacheManager = requireNonNull(prestoSparkBroadcastTableCacheManager, "prestoSparkBroadcastTableCacheManager is null");
        this.stageId = requireNonNull(stageId, "stageId is null");
        this.planNodeId = requireNonNull(planNodeId, "planNodeId is null");
        this.broadcastTableFilesInfo = requireNonNull(broadcastTableFilesInfo, "broadcastTableFilesInfo is null");
    }

    @Override
    public Page getNextPage()
    {
        Page page = null;
        synchronized (this) {
            while (page == null) {
                if (currentIteratorIndex >= getPageIterators().size()) {
                    return null;
                }
                Iterator currentIterator = getPageIterators().get(currentIteratorIndex);
                if (currentIterator.hasNext()) {
                    page = currentIterator.next();
                }
                else {
                    currentIteratorIndex++;
                }
            }
        }
        return page;
    }

    private List> getPageIterators()
    {
        if (pageIterators == null) {
            pageIterators = getPages(broadcastTableFilesInfo, tempStorage, tempDataOperationContext, prestoSparkBroadcastTableCacheManager, stageId, planNodeId);
        }
        return pageIterators;
    }

    private List> getPages(
            List> broadcastTableFilesInfo,
            TempStorage tempStorage,
            TempDataOperationContext tempDataOperationContext,
            PrestoSparkBroadcastTableCacheManager prestoSparkBroadcastTableCacheManager,
            StageId stageId,
            PlanNodeId planNodeId)
    {
        // Try to get table from cache
        List> pages = prestoSparkBroadcastTableCacheManager.getCachedBroadcastTable(stageId, planNodeId);
        if (pages == null) {
            pages = broadcastTableFilesInfo.stream()
                    .map(tableFiles -> {
                        List serializedPages = loadBroadcastTable(tableFiles, tempStorage, tempDataOperationContext);
                        return serializedPages.stream().map(serializedPage -> pagesSerde.deserialize(serializedPage)).collect(toImmutableList());
                    }).collect(toImmutableList());

            // Cache deserialized pages
            prestoSparkBroadcastTableCacheManager.cache(stageId, planNodeId, pages);
        }

        return pages.stream().map(List::iterator).collect(toImmutableList());
    }

    private List loadBroadcastTable(
            List broadcastTaskFilesInfo,
            TempStorage tempStorage,
            TempDataOperationContext tempDataOperationContext)
    {
        try {
            CRC32 checksum = new CRC32();
            ImmutableList.Builder pages = ImmutableList.builder();
            List broadcastTaskFilesInfoCopy = new ArrayList<>(broadcastTaskFilesInfo);
            shuffle(broadcastTaskFilesInfoCopy);
            for (PrestoSparkTaskOutput taskFileInfo : broadcastTaskFilesInfoCopy) {
                checksum.reset();
                PrestoSparkStorageHandle prestoSparkStorageHandle = (PrestoSparkStorageHandle) taskFileInfo;
                TempStorageHandle tempStorageHandle = tempStorage.deserialize(prestoSparkStorageHandle.getSerializedStorageHandle());
                log.info("Reading path: " + tempStorageHandle.toString());
                try (InputStream inputStream = tempStorage.open(tempDataOperationContext, tempStorageHandle);
                        InputStreamSliceInput inputStreamSliceInput = new InputStreamSliceInput(inputStream)) {
                    Iterator pagesIterator = readSerializedPages(inputStreamSliceInput);
                    while (pagesIterator.hasNext()) {
                        SerializedPage page = pagesIterator.next();
                        checksum.update(page.getSlice().byteArray(), page.getSlice().byteArrayOffset(), page.getSlice().length());
                        pages.add(page);
                    }
                }

                if (checksum.getValue() != prestoSparkStorageHandle.getChecksum()) {
                    throw new PrestoException(STORAGE_ERROR, "Disk page checksum does not match. " +
                            "Data seems to be corrupted on disk for file " + tempStorageHandle.toString());
                }
            }
            return pages.build();
        }
        catch (UncheckedIOException | IOException e) {
            throw new PrestoException(STORAGE_ERROR, "Unable to read data from disk: ", e);
        }
    }

    public long getRetainedSizeInBytes()
    {
        return prestoSparkBroadcastTableCacheManager.getBroadcastTableSizeInBytes(stageId, planNodeId);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy