All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.pact.runtime.cache.FileCache Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.pact.runtime.cache;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;

import eu.stratosphere.api.common.cache.DistributedCache;
import eu.stratosphere.configuration.ConfigConstants;
import eu.stratosphere.configuration.GlobalConfiguration;
import eu.stratosphere.core.fs.FSDataInputStream;
import eu.stratosphere.core.fs.FSDataOutputStream;
import eu.stratosphere.core.fs.FileSystem;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.fs.local.LocalFileSystem;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.taskmanager.runtime.ExecutorThreadFactory;
import eu.stratosphere.nephele.util.IOUtils;

/**
 * FileCache is used to create the local tmp file for the registered cache file when a task is deployed. Also when the
 * task is unregistered, it will remove the local tmp file. Given that another task from the same job may be registered
 * shortly after, there exists a 5 second delay	before clearing the local tmp file.
 */
public class FileCache {

	private LocalFileSystem lfs = new LocalFileSystem();

	private Map, Integer> count = new HashMap, Integer>();

	private final ScheduledExecutorService executorService = Executors.newScheduledThreadPool(10, ExecutorThreadFactory.INSTANCE);

	/**
	 * If the file doesn't exists locally, it will copy the file to the temp directory.
	 */
	public FutureTask createTmpFile(String name, String filePath, JobID jobID) {

		synchronized (count) {
			Pair key = new ImmutablePair(jobID,name);
			if (count.containsKey(key)) {
				count.put(key, count.get(key) + 1);
			} else {
				count.put(key, 1);
			}
		}
		CopyProcess cp = new CopyProcess(name, filePath, jobID);
		FutureTask copyTask = new FutureTask(cp);
		executorService.submit(copyTask);
		return copyTask;
	}

	/**
	 * Leave a 5 seconds delay to clear the local file.
	 */
	public void deleteTmpFile(String name, JobID jobID) {
		DeleteProcess dp = new DeleteProcess(name, jobID, count.get(new ImmutablePair(jobID,name)));
		executorService.schedule(dp, 5000L, TimeUnit.MILLISECONDS);
	}

	public Path getTempDir(JobID jobID, String name) {
		return new Path(GlobalConfiguration.getString(ConfigConstants.TASK_MANAGER_TMP_DIR_KEY,
			ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH), DistributedCache.TMP_PREFIX + jobID.toString() + "_" +  name);
	}

	public void shutdown() {
		if (this.executorService != null) {
			this.executorService.shutdown();
			try {
				this.executorService.awaitTermination(5000L, TimeUnit.MILLISECONDS);
			} catch (InterruptedException e) {
				throw new RuntimeException("Error shutting down the file cache", e);
			}
		}
	}

	/**
	 * Asynchronous file copy process
	 */
	private class CopyProcess implements Callable {
		private JobID jobID;
		private String name;
		private String filePath;

		public CopyProcess(String name, String filePath, JobID jobID) {
			this.name = name;
			this.filePath = filePath;
			this.jobID = jobID;
		}
		public Path call()  {
			Path tmp = getTempDir(jobID, name);
			try {
				if (!lfs.exists(tmp)) {
					FSDataOutputStream lfsOutput = lfs.create(tmp, false);
					Path distributedPath = new Path(filePath);
					FileSystem fs = distributedPath.getFileSystem();
					FSDataInputStream fsInput = fs.open(distributedPath);
					IOUtils.copyBytes(fsInput, lfsOutput);
				}
			} catch (IOException e1) {
				throw new RuntimeException("Error copying a file from hdfs to the local fs", e1);
			}
			return tmp;
		}
	}
	/**
	 * If no task is using this file after 5 seconds, clear it.
	 */
	private class DeleteProcess implements Runnable {
		private String name;
		private JobID jobID;
		private int oldCount;

		public DeleteProcess(String name, JobID jobID, int c) {
			this.name = name;
			this.jobID = jobID;
			this.oldCount = c;
		}

		public void run() {
			synchronized (count) {
				if (count.get(new ImmutablePair(jobID, name)) != oldCount) {
					return;
				}
			}
			Path tmp = getTempDir(jobID, name);
			try {
				if (lfs.exists(tmp)) {
					lfs.delete(tmp, true);
				}
			} catch (IOException e1) {
				throw new RuntimeException("Error deleting the file", e1);
			}
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy