All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.execution.librarycache;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;

import org.apache.flink.runtime.blob.BlobKey;
import org.apache.flink.runtime.blob.BlobService;
import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
import org.apache.flink.api.common.JobID;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * For each job graph that is submitted to the system the library cache manager maintains
 * a set of libraries (typically JAR files) which the job requires to run. The library cache manager
 * caches library files in order to avoid unnecessary retransmission of data. It is based on a singleton
 * programming pattern, so there exists at most one library manager at a time.
 */
public final class BlobLibraryCacheManager extends TimerTask implements LibraryCacheManager {

	private static Logger LOG = LoggerFactory.getLogger(BlobLibraryCacheManager.class);
	
	private static ExecutionAttemptID JOB_ATTEMPT_ID = new ExecutionAttemptID(-1, -1);
	
	// --------------------------------------------------------------------------------------------
	
	/** The global lock to synchronize operations */
	private final Object lockObject = new Object();

	/** Registered entries per job */
	private final Map cacheEntries = new HashMap();
	
	/** Map to store the number of reference to a specific file */
	private final Map blobKeyReferenceCounters = new HashMap();

	/** The blob service to download libraries */
	private final BlobService blobService;
	
	private final Timer cleanupTimer;
	
	// --------------------------------------------------------------------------------------------

	public BlobLibraryCacheManager(BlobService blobService, long cleanupInterval) {
		this.blobService = blobService;

		// Initializing the clean up task
		this.cleanupTimer = new Timer(true);
		this.cleanupTimer.schedule(this, cleanupInterval, cleanupInterval);
	}

	// --------------------------------------------------------------------------------------------
	
	@Override
	public void registerJob(JobID id, Collection requiredJarFiles, Collection requiredClasspaths)
			throws IOException {
		registerTask(id, JOB_ATTEMPT_ID, requiredJarFiles, requiredClasspaths);
	}
	
	@Override
	public void registerTask(JobID jobId, ExecutionAttemptID task, Collection requiredJarFiles,
			Collection requiredClasspaths) throws IOException {
		Preconditions.checkNotNull(jobId, "The JobId must not be null.");
		Preconditions.checkNotNull(task, "The task execution id must not be null.");

		if (requiredJarFiles == null) {
			requiredJarFiles = Collections.emptySet();
		}
		if (requiredClasspaths == null) {
			requiredClasspaths = Collections.emptySet();
		}

		synchronized (lockObject) {
			LibraryCacheEntry entry = cacheEntries.get(jobId);

			if (entry == null) {
				// create a new entry in the library cache
				BlobKey[] keys = requiredJarFiles.toArray(new BlobKey[requiredJarFiles.size()]);
				URL[] urls = new URL[keys.length + requiredClasspaths.size()];

				int count = 0;
				try {
					for (; count < keys.length; count++) {
						BlobKey blobKey = keys[count];
						urls[count] = registerReferenceToBlobKeyAndGetURL(blobKey);
					}
				}
				catch (Throwable t) {
					// undo the reference count increases
					try {
						for (int i = 0; i < count; i++) {
							unregisterReferenceToBlobKey(keys[i]);
						}
					}
					catch (Throwable tt) {
						LOG.error("Error while updating library reference counters.", tt);
					}

					// rethrow or wrap
					ExceptionUtils.tryRethrowIOException(t);
					throw new IOException("Library cache could not register the user code libraries.", t);
				}

				// add classpaths
				for (URL url : requiredClasspaths) {
					urls[count] = url;
					count++;
				}

				cacheEntries.put(jobId, new LibraryCacheEntry(requiredJarFiles, urls, task));
			}
			else {
				entry.register(task, requiredJarFiles);
			}
		}
	}

	@Override
	public void unregisterJob(JobID id) {
		unregisterTask(id, JOB_ATTEMPT_ID);
	}
	
	@Override
	public void unregisterTask(JobID jobId, ExecutionAttemptID task) {
		Preconditions.checkNotNull(jobId, "The JobId must not be null.");
		Preconditions.checkNotNull(task, "The task execution id must not be null.");

		synchronized (lockObject) {
			LibraryCacheEntry entry = cacheEntries.get(jobId);

			if (entry != null) {
				if (entry.unregister(task)) {
					cacheEntries.remove(jobId);

					entry.releaseClassLoader();

					for (BlobKey key : entry.getLibraries()) {
						unregisterReferenceToBlobKey(key);
					}
				}
			}
			// else has already been unregistered
		}
	}

	@Override
	public ClassLoader getClassLoader(JobID id) {
		if (id == null) {
			throw new IllegalArgumentException("The JobId must not be null.");
		}
		
		synchronized (lockObject) {
			LibraryCacheEntry entry = cacheEntries.get(id);
			if (entry != null) {
				return entry.getClassLoader();
			} else {
				throw new IllegalStateException("No libraries are registered for job " + id);
			}
		}
	}

	@Override
	public File getFile(BlobKey blobKey) throws IOException {
		return new File(blobService.getURL(blobKey).getFile());
	}

	public int getBlobServerPort() {
		return blobService.getPort();
	}

	@Override
	public void shutdown() throws IOException{
		try {
			run();
		} catch (Throwable t) {
			LOG.warn("Failed to run clean up task before shutdown", t);
		}

		blobService.shutdown();
		cleanupTimer.cancel();
	}
	
	/**
	 * Cleans up blobs which are not referenced anymore
	 */
	@Override
	public void run() {
		synchronized (lockObject) {
			Iterator> entryIter = blobKeyReferenceCounters.entrySet().iterator();
			
			while (entryIter.hasNext()) {
				Map.Entry entry = entryIter.next();
				BlobKey key = entry.getKey();
				int references = entry.getValue();
				
				try {
					if (references <= 0) {
						blobService.delete(key);
						entryIter.remove();
					}
				} catch (Throwable t) {
					LOG.warn("Could not delete file with blob key" + key, t);
				}
			}
		}
	}
	
	public int getNumberOfReferenceHolders(JobID jobId) {
		synchronized (lockObject) {
			LibraryCacheEntry entry = cacheEntries.get(jobId);
			return entry == null ? 0 : entry.getNumberOfReferenceHolders();
		}
	}
	
	int getNumberOfCachedLibraries() {
		return blobKeyReferenceCounters.size();
	}
	
	private URL registerReferenceToBlobKeyAndGetURL(BlobKey key) throws IOException {
		// it is important that we fetch the URL before increasing the counter.
		// in case the URL cannot be created (failed to fetch the BLOB), we have no stale counter
		try {
			URL url = blobService.getURL(key);

			Integer references = blobKeyReferenceCounters.get(key);
			int newReferences = references == null ? 1 : references + 1;
			blobKeyReferenceCounters.put(key, newReferences);

			return url;
		}
		catch (IOException e) {
			throw new IOException("Cannot get library with hash " + key, e);
		}
	}
	
	private void unregisterReferenceToBlobKey(BlobKey key) {
		Integer references = blobKeyReferenceCounters.get(key);
		if (references != null) {
			int newReferences = Math.max(references - 1, 0);
			blobKeyReferenceCounters.put(key, newReferences);
		}
		else {
			// make sure we have an entry in any case, that the cleanup timer removes any
			// present libraries
			blobKeyReferenceCounters.put(key, 0);
		}
	}


	// --------------------------------------------------------------------------------------------

	/**
	 * An entry in the per-job library cache. Tracks which execution attempts
	 * still reference the libraries. Once none reference it any more, the
	 * libraries can be cleaned up.
	 */
	private static class LibraryCacheEntry {
		
		private final FlinkUserCodeClassLoader classLoader;
		
		private final Set referenceHolders;
		
		private final Set libraries;
		
		
		public LibraryCacheEntry(Collection libraries, URL[] libraryURLs, ExecutionAttemptID initialReference) {
			this.classLoader = new FlinkUserCodeClassLoader(libraryURLs);
			this.libraries = new HashSet<>(libraries);
			this.referenceHolders = new HashSet<>();
			this.referenceHolders.add(initialReference);
		}
		
		
		public ClassLoader getClassLoader() {
			return classLoader;
		}
		
		public Set getLibraries() {
			return libraries;
		}
		
		public void register(ExecutionAttemptID task, Collection keys) {
			if (!libraries.containsAll(keys)) {
				throw new IllegalStateException(
						"The library registration references a different set of libraries than previous registrations for this job.");
			}
			
			this.referenceHolders.add(task);
		}
		
		public boolean unregister(ExecutionAttemptID task) {
			referenceHolders.remove(task);
			return referenceHolders.isEmpty();
		}
		
		public int getNumberOfReferenceHolders() {
			return referenceHolders.size();
		}

		/**
		 * Release the class loader to ensure any file descriptors are closed
		 * and the cached libraries are deleted immediately.
		 */
		void releaseClassLoader() {
			try {
				classLoader.close();
			} catch (IOException e) {
				LOG.warn("Failed to release user code class loader for " + Arrays.toString(libraries.toArray()));
			}
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy