eu.stratosphere.pact.runtime.cache.FileCache Maven / Gradle / Ivy
/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.pact.runtime.cache;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import eu.stratosphere.api.common.cache.DistributedCache;
import eu.stratosphere.configuration.ConfigConstants;
import eu.stratosphere.configuration.GlobalConfiguration;
import eu.stratosphere.core.fs.FSDataInputStream;
import eu.stratosphere.core.fs.FSDataOutputStream;
import eu.stratosphere.core.fs.FileSystem;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.fs.local.LocalFileSystem;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.taskmanager.runtime.ExecutorThreadFactory;
import eu.stratosphere.nephele.util.IOUtils;
/**
* FileCache is used to create the local tmp file for the registered cache file when a task is deployed. Also when the
* task is unregistered, it will remove the local tmp file. Given that another task from the same job may be registered
* shortly after, there exists a 5 second delay before clearing the local tmp file.
*/
public class FileCache {
private LocalFileSystem lfs = new LocalFileSystem();
private Map, Integer> count = new HashMap, Integer>();
private final ScheduledExecutorService executorService = Executors.newScheduledThreadPool(10, ExecutorThreadFactory.INSTANCE);
/**
* If the file doesn't exists locally, it will copy the file to the temp directory.
*/
public FutureTask createTmpFile(String name, String filePath, JobID jobID) {
synchronized (count) {
Pair key = new ImmutablePair(jobID,name);
if (count.containsKey(key)) {
count.put(key, count.get(key) + 1);
} else {
count.put(key, 1);
}
}
CopyProcess cp = new CopyProcess(name, filePath, jobID);
FutureTask copyTask = new FutureTask(cp);
executorService.submit(copyTask);
return copyTask;
}
/**
* Leave a 5 seconds delay to clear the local file.
*/
public void deleteTmpFile(String name, JobID jobID) {
DeleteProcess dp = new DeleteProcess(name, jobID, count.get(new ImmutablePair(jobID,name)));
executorService.schedule(dp, 5000L, TimeUnit.MILLISECONDS);
}
public Path getTempDir(JobID jobID, String name) {
return new Path(GlobalConfiguration.getString(ConfigConstants.TASK_MANAGER_TMP_DIR_KEY,
ConfigConstants.DEFAULT_TASK_MANAGER_TMP_PATH), DistributedCache.TMP_PREFIX + jobID.toString() + "_" + name);
}
public void shutdown() {
if (this.executorService != null) {
this.executorService.shutdown();
try {
this.executorService.awaitTermination(5000L, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
throw new RuntimeException("Error shutting down the file cache", e);
}
}
}
/**
* Asynchronous file copy process
*/
private class CopyProcess implements Callable {
private JobID jobID;
private String name;
private String filePath;
public CopyProcess(String name, String filePath, JobID jobID) {
this.name = name;
this.filePath = filePath;
this.jobID = jobID;
}
public Path call() {
Path tmp = getTempDir(jobID, name);
try {
if (!lfs.exists(tmp)) {
FSDataOutputStream lfsOutput = lfs.create(tmp, false);
Path distributedPath = new Path(filePath);
FileSystem fs = distributedPath.getFileSystem();
FSDataInputStream fsInput = fs.open(distributedPath);
IOUtils.copyBytes(fsInput, lfsOutput);
}
} catch (IOException e1) {
throw new RuntimeException("Error copying a file from hdfs to the local fs", e1);
}
return tmp;
}
}
/**
* If no task is using this file after 5 seconds, clear it.
*/
private class DeleteProcess implements Runnable {
private String name;
private JobID jobID;
private int oldCount;
public DeleteProcess(String name, JobID jobID, int c) {
this.name = name;
this.jobID = jobID;
this.oldCount = c;
}
public void run() {
synchronized (count) {
if (count.get(new ImmutablePair(jobID, name)) != oldCount) {
return;
}
}
Path tmp = getTempDir(jobID, name);
try {
if (lfs.exists(tmp)) {
lfs.delete(tmp, true);
}
} catch (IOException e1) {
throw new RuntimeException("Error deleting the file", e1);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy