All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.master.lineage.LineageMaster Maven / Gradle / Ivy

There is a newer version: 313
Show newest version
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.master.lineage;

import alluxio.AlluxioURI;
import alluxio.Configuration;
import alluxio.Constants;
import alluxio.exception.AccessControlException;
import alluxio.exception.AlluxioException;
import alluxio.exception.BlockInfoException;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.FileAlreadyExistsException;
import alluxio.exception.FileDoesNotExistException;
import alluxio.exception.InvalidPathException;
import alluxio.exception.LineageDeletionException;
import alluxio.exception.LineageDoesNotExistException;
import alluxio.heartbeat.HeartbeatContext;
import alluxio.heartbeat.HeartbeatThread;
import alluxio.job.CommandLineJob;
import alluxio.job.Job;
import alluxio.master.AbstractMaster;
import alluxio.master.MasterContext;
import alluxio.master.file.FileSystemMaster;
import alluxio.master.file.options.CreateFileOptions;
import alluxio.master.journal.Journal;
import alluxio.master.journal.JournalOutputStream;
import alluxio.master.journal.JournalProtoUtils;
import alluxio.master.lineage.checkpoint.CheckpointPlan;
import alluxio.master.lineage.checkpoint.CheckpointSchedulingExcecutor;
import alluxio.master.lineage.meta.Lineage;
import alluxio.master.lineage.meta.LineageIdGenerator;
import alluxio.master.lineage.meta.LineageStore;
import alluxio.master.lineage.meta.LineageStoreView;
import alluxio.master.lineage.recompute.RecomputeExecutor;
import alluxio.master.lineage.recompute.RecomputePlanner;
import alluxio.proto.journal.Journal.JournalEntry;
import alluxio.proto.journal.Lineage.DeleteLineageEntry;
import alluxio.proto.journal.Lineage.LineageEntry;
import alluxio.proto.journal.Lineage.LineageIdGeneratorEntry;
import alluxio.thrift.LineageMasterClientService;
import alluxio.util.IdUtils;
import alluxio.util.io.PathUtils;
import alluxio.wire.FileInfo;
import alluxio.wire.LineageInfo;

import com.google.common.base.Preconditions;
import com.google.protobuf.Message;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.apache.thrift.TProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Future;

import javax.annotation.concurrent.NotThreadSafe;

/**
 * The lineage master stores the lineage metadata in Alluxio, and it contains the components that
 * manage all lineage-related activities.
 */
@NotThreadSafe
public final class LineageMaster extends AbstractMaster {
  private static final Logger LOG = LoggerFactory.getLogger(Constants.LOGGER_TYPE);

  private final Configuration mConfiguration;
  private final LineageStore mLineageStore;
  private final FileSystemMaster mFileSystemMaster;
  private final LineageIdGenerator mLineageIdGenerator;

  /**
   * The service that checkpoints lineages. We store it here so that it can be accessed from tests.
   */
  @SuppressFBWarnings("URF_UNREAD_FIELD")
  private Future mCheckpointExecutionService;
  /**
   * The service that recomputes lineages. We store it here so that it can be accessed from tests.
   */
  @SuppressFBWarnings("URF_UNREAD_FIELD")
  private Future mRecomputeExecutionService;

  /**
   * @param baseDirectory the base journal directory
   * @return the journal directory for this master
   */
  public static String getJournalDirectory(String baseDirectory) {
    return PathUtils.concatPath(baseDirectory, Constants.LINEAGE_MASTER_NAME);
  }

  /**
   * Creates a new instance of {@link LineageMaster}.
   *
   * @param fileSystemMaster the file system master
   * @param journal the journal
   */
  public LineageMaster(FileSystemMaster fileSystemMaster, Journal journal) {
    super(journal, 2);

    mConfiguration = MasterContext.getConf();
    mFileSystemMaster = Preconditions.checkNotNull(fileSystemMaster);
    mLineageIdGenerator = new LineageIdGenerator();
    mLineageStore = new LineageStore(mLineageIdGenerator);
  }

  @Override
  public Map getServices() {
    Map services = new HashMap<>();
    services.put(Constants.LINEAGE_MASTER_CLIENT_SERVICE_NAME,
        new LineageMasterClientService.Processor<>(new LineageMasterClientServiceHandler(this)));
    return services;
  }

  @Override
  public String getName() {
    return Constants.LINEAGE_MASTER_NAME;
  }

  @Override
  public void processJournalEntry(JournalEntry entry) throws IOException {
    Message innerEntry = JournalProtoUtils.unwrap(entry);
    if (innerEntry instanceof LineageEntry) {
      mLineageStore.addLineageFromJournal((LineageEntry) innerEntry);
    } else if (innerEntry instanceof LineageIdGeneratorEntry) {
      mLineageIdGenerator.initFromJournalEntry((LineageIdGeneratorEntry) innerEntry);
    } else if (innerEntry instanceof DeleteLineageEntry) {
      deleteLineageFromEntry((DeleteLineageEntry) innerEntry);
    } else {
      throw new IOException(ExceptionMessage.UNEXPECTED_JOURNAL_ENTRY.getMessage(innerEntry));
    }
  }

  @Override
  public void start(boolean isLeader) throws IOException {
    super.start(isLeader);
    if (isLeader) {
      mCheckpointExecutionService = getExecutorService()
          .submit(new HeartbeatThread(HeartbeatContext.MASTER_CHECKPOINT_SCHEDULING,
              new CheckpointSchedulingExcecutor(this, mFileSystemMaster),
              mConfiguration.getInt(Constants.MASTER_LINEAGE_CHECKPOINT_INTERVAL_MS)));
      mRecomputeExecutionService = getExecutorService()
          .submit(new HeartbeatThread(HeartbeatContext.MASTER_FILE_RECOMPUTATION,
              new RecomputeExecutor(new RecomputePlanner(mLineageStore, mFileSystemMaster),
                  mFileSystemMaster),
              mConfiguration.getInt(Constants.MASTER_LINEAGE_RECOMPUTE_INTERVAL_MS)));
    }
  }

  @Override
  public synchronized void streamToJournalCheckpoint(JournalOutputStream outputStream)
      throws IOException {
    mLineageStore.streamToJournalCheckpoint(outputStream);
    outputStream.writeEntry(mLineageIdGenerator.toJournalEntry());
  }

  /**
   * @return a lineage store view wrapping the contained lineage store
   */
  public LineageStoreView getLineageStoreView() {
    return new LineageStoreView(mLineageStore);
  }

  /**
   * Creates a lineage. It creates a new file for each output file.
   *
   * @param inputFiles the input files
   * @param outputFiles the output files
   * @param job the job
   * @return the id of the created lineage
   * @throws InvalidPathException if the path to the input file is invalid
   * @throws FileAlreadyExistsException if the output file already exists
   * @throws BlockInfoException if fails to create the output file
   * @throws IOException if the creation of a file fails
   * @throws AccessControlException if the permission check fails
   * @throws FileDoesNotExistException if any of the input files do not exist
   */
  public synchronized long createLineage(List inputFiles, List outputFiles,
      Job job) throws InvalidPathException, FileAlreadyExistsException, BlockInfoException,
      IOException, AccessControlException, FileDoesNotExistException {
    List inputAlluxioFiles = new ArrayList<>();
    for (AlluxioURI inputFile : inputFiles) {
      long fileId;
      fileId = mFileSystemMaster.getFileId(inputFile);
      if (fileId == IdUtils.INVALID_FILE_ID) {
        throw new FileDoesNotExistException(
            ExceptionMessage.LINEAGE_INPUT_FILE_NOT_EXIST.getMessage(inputFile));
      }
      inputAlluxioFiles.add(fileId);
    }
    // create output files
    List outputAlluxioFiles = new ArrayList<>();
    for (AlluxioURI outputFile : outputFiles) {
      long fileId;
      // TODO(yupeng): delete the placeholder files if the creation fails.
      // Create the file initialized with block size 1KB as placeholder.
      CreateFileOptions options =
          CreateFileOptions.defaults().setRecursive(true).setBlockSizeBytes(Constants.KB);
      fileId = mFileSystemMaster.createFile(outputFile, options);
      outputAlluxioFiles.add(fileId);
    }

    LOG.info("Create lineage of input:{}, output:{}, job:{}", inputAlluxioFiles, outputAlluxioFiles,
        job);
    long lineageId = mLineageStore.createLineage(inputAlluxioFiles, outputAlluxioFiles, job);

    writeJournalEntry(mLineageIdGenerator.toJournalEntry());
    writeJournalEntry(mLineageStore.getLineage(lineageId).toJournalEntry());
    flushJournal();
    return lineageId;
  }

  /**
   * Deletes a lineage.
   *
   * @param lineageId id the of lineage
   * @param cascade the flag if to delete all the downstream lineages
   * @return true if the lineage is deleted, false otherwise
   * @throws LineageDoesNotExistException the lineage does not exist
   * @throws LineageDeletionException the lineage deletion fails
   */
  public synchronized boolean deleteLineage(long lineageId, boolean cascade)
      throws LineageDoesNotExistException, LineageDeletionException {
    deleteLineageInternal(lineageId, cascade);
    DeleteLineageEntry deleteLineage = DeleteLineageEntry.newBuilder()
        .setLineageId(lineageId)
        .setCascade(cascade)
        .build();
    writeJournalEntry(JournalEntry.newBuilder().setDeleteLineage(deleteLineage).build());
    flushJournal();
    return true;
  }

  private boolean deleteLineageInternal(long lineageId, boolean cascade)
      throws LineageDoesNotExistException, LineageDeletionException {
    Lineage lineage = mLineageStore.getLineage(lineageId);
    LineageDoesNotExistException.check(lineage != null, ExceptionMessage.LINEAGE_DOES_NOT_EXIST,
        lineageId);

    // there should not be child lineage if not cascade
    if (!cascade && !mLineageStore.getChildren(lineage).isEmpty()) {
      throw new LineageDeletionException(
          ExceptionMessage.DELETE_LINEAGE_WITH_CHILDREN.getMessage(lineageId));
    }

    LOG.info("Delete lineage {}", lineageId);
    mLineageStore.deleteLineage(lineageId);
    return true;
  }

  private void deleteLineageFromEntry(DeleteLineageEntry entry) {
    try {
      deleteLineageInternal(entry.getLineageId(), entry.getCascade());
    } catch (LineageDoesNotExistException e) {
      LOG.error("Failed to delete lineage {}", entry.getLineageId(), e);
    } catch (LineageDeletionException e) {
      LOG.error("Failed to delete lineage {}", entry.getLineageId(), e);
    }
  }

  /**
   * Reinitializes the file when the file is lost or not completed.
   *
   * @param path the path to the file
   * @param blockSizeBytes the block size
   * @param ttl the TTL
   * @return the id of the reinitialized file when the file is lost or not completed, -1 otherwise
   * @throws InvalidPathException the file path is invalid
   * @throws LineageDoesNotExistException when the file does not exist
   * @throws AccessControlException if permission checking fails
   * @throws FileDoesNotExistException if the path does not exist
   */
  public synchronized long reinitializeFile(String path, long blockSizeBytes, long ttl)
      throws InvalidPathException, LineageDoesNotExistException, AccessControlException,
      FileDoesNotExistException {
    long fileId = mFileSystemMaster.getFileId(new AlluxioURI(path));
    FileInfo fileInfo;
    try {
      fileInfo = mFileSystemMaster.getFileInfo(fileId);
      if (!fileInfo.isCompleted() || mFileSystemMaster.getLostFiles().contains(fileId)) {
        LOG.info("Recreate the file {} with block size of {} bytes", path, blockSizeBytes);
        return mFileSystemMaster.reinitializeFile(new AlluxioURI(path), blockSizeBytes, ttl);
      }
    } catch (FileDoesNotExistException e) {
      throw new LineageDoesNotExistException(
          ExceptionMessage.MISSING_REINITIALIZE_FILE.getMessage(path));
    }
    return -1;
  }

  /**
   * @return the list of all the {@link LineageInfo}s
   * @throws LineageDoesNotExistException if the lineage does not exist
   * @throws FileDoesNotExistException if any associated file does not exist
   */
  public synchronized List getLineageInfoList()
      throws LineageDoesNotExistException, FileDoesNotExistException {
    List lineages = new ArrayList<>();

    for (Lineage lineage : mLineageStore.getAllInTopologicalOrder()) {
      LineageInfo info = new LineageInfo();
      List parents = new ArrayList<>();
      for (Lineage parent : mLineageStore.getParents(lineage)) {
        parents.add(parent.getId());
      }
      info.setParents(parents);
      List children = new ArrayList<>();
      for (Lineage child : mLineageStore.getChildren(lineage)) {
        children.add(child.getId());
      }
      info.setChildren(children);
      info.setId(lineage.getId());
      List inputFiles = new ArrayList<>();
      for (long inputFileId : lineage.getInputFiles()) {
        inputFiles.add(mFileSystemMaster.getPath(inputFileId).toString());
      }
      info.setInputFiles(inputFiles);
      List outputFiles = new ArrayList<>();
      for (long outputFileId : lineage.getOutputFiles()) {
        outputFiles.add(mFileSystemMaster.getPath(outputFileId).toString());
      }
      info.setOutputFiles(outputFiles);
      info.setCreationTimeMs(lineage.getCreationTime());
      info.setJob(((CommandLineJob) lineage.getJob()).generateCommandLineJobInfo());

      lineages.add(info);
    }
    return lineages;
  }

  /**
   * Schedules persistence for the output files of the given checkpoint plan.
   *
   * @param plan the plan for checkpointing
   */
  public synchronized void scheduleCheckpoint(CheckpointPlan plan) {
    for (long lineageId : plan.getLineagesToCheckpoint()) {
      Lineage lineage = mLineageStore.getLineage(lineageId);
      // schedule the lineage file for persistence
      for (long file : lineage.getOutputFiles()) {
        try {
          mFileSystemMaster.scheduleAsyncPersistence(mFileSystemMaster.getPath(file));
        } catch (AlluxioException e) {
          LOG.error("Failed to persist the file {}.", file, e);
        }
      }
    }
  }

  /**
   * Polls the files to send to the given worker for checkpoint.
   *
   * @param path the path to the file
   * @throws FileDoesNotExistException if the file does not exist
   * @throws AccessControlException if permission checking fails
   * @throws InvalidPathException if the path is invalid
   */
  public synchronized void reportLostFile(String path) throws FileDoesNotExistException,
      AccessControlException, InvalidPathException {
    long fileId = mFileSystemMaster.getFileId(new AlluxioURI(path));
    mFileSystemMaster.reportLostFile(fileId);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy