
hudson.model.Computer Maven / Gradle / Ivy
Show all versions of hudson-core Show documentation
/*******************************************************************************
*
* Copyright (c) 2004-2012 Oracle Corporation.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*
* Kohsuke Kawaguchi, Winston Prakash, Seiji Sogabe, Stephen Connolly, Thomas J. Black, Tom Huybrechts
*
*******************************************************************************/
package hudson.model;
import hudson.EnvVars;
import hudson.Util;
import hudson.cli.declarative.CLIMethod;
import hudson.console.AnnotatedLargeText;
import hudson.model.Descriptor.FormException;
import hudson.model.queue.WorkUnit;
import hudson.node_monitors.NodeMonitor;
import hudson.remoting.Channel;
import hudson.remoting.VirtualChannel;
import hudson.remoting.Callable;
import hudson.security.*;
import hudson.slaves.ComputerLauncher;
import hudson.slaves.RetentionStrategy;
import hudson.slaves.WorkspaceList;
import hudson.slaves.OfflineCause;
import hudson.slaves.OfflineCause.ByCLI;
import hudson.tasks.BuildWrapper;
import hudson.tasks.Publisher;
import hudson.util.BuildHistoryList;
import hudson.util.DaemonThreadFactory;
import hudson.util.ExceptionCatchingThreadFactory;
import hudson.util.RemotingDiagnostics;
import hudson.util.RemotingDiagnostics.HeapDump;
import hudson.util.RunList;
import hudson.util.Futures;
import org.kohsuke.stapler.StaplerRequest;
import org.kohsuke.stapler.StaplerResponse;
import org.kohsuke.stapler.QueryParameter;
import org.kohsuke.stapler.HttpResponses;
import org.kohsuke.stapler.HttpResponse;
import org.kohsuke.stapler.HttpRedirect;
import org.kohsuke.stapler.export.Exported;
import org.kohsuke.stapler.export.ExportedBean;
import org.kohsuke.args4j.Option;
import javax.servlet.ServletException;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Enumeration;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ExecutionException;
import java.util.logging.LogRecord;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.nio.charset.Charset;
import java.net.InetAddress;
import java.net.NetworkInterface;
import java.net.Inet4Address;
import java.util.Collection;
import org.eclipse.hudson.security.HudsonSecurityEntitiesHolder;
import org.eclipse.hudson.security.HudsonSecurityManager;
import org.eclipse.hudson.security.team.Team;
import org.eclipse.hudson.security.team.TeamManager;
/**
* Represents the running state of a remote computer that holds
* {@link Executor}s.
*
* {@link Executor}s on one {@link Computer} are transparently
* interchangeable (that is the definition of {@link Computer}.)
*
*
This object is related to {@link Node} but they have some significant
* difference. {@link Computer} primarily works as a holder of
* {@link Executor}s, so if a {@link Node} is configured (probably temporarily)
* with 0 executors, you won't have a {@link Computer} object for it.
*
* Also, even if you remove a {@link Node}, it takes time for the corresponding
* {@link Computer} to be removed, if some builds are already in progress on
* that node. Or when the node configuration is changed, unaffected
* {@link Computer} object remains intact, while all the {@link Node} objects
* will go away.
*
*
This object also serves UI (since {@link Node} is an interface and can't
* have related side pages.)
*
* @author Kohsuke Kawaguchi
*/
@ExportedBean
public /*transient*/ abstract class Computer extends Actionable implements AccessControlled, ExecutorListener {
/**
* Parameter in request for deleting the slave.
*/
private static final String DELETE_MODE_KEY = "deleteMode";
/**
* Key for delete mode when the jobs are stopping before the slave will
* delete
*/
private static final String DELETE_MODE_STOP_KEY = "0";
private final CopyOnWriteArrayList executors = new CopyOnWriteArrayList();
// TODO:
private final CopyOnWriteArrayList oneOffExecutors = new CopyOnWriteArrayList();
private int numExecutors;
/**
* Contains info about reason behind computer being offline.
*/
protected volatile OfflineCause offlineCause;
private long connectTime = 0;
/**
* True if Hudson shouldn't start new builds on this node.
*/
private boolean temporarilyOffline;
/**
* {@link Node} object may be created and deleted independently from this
* object.
*/
protected String nodeName;
/**
* @see #getHostName()
*/
private volatile String cachedHostName;
private volatile boolean hostNameCached;
private final WorkspaceList workspaceList = new WorkspaceList();
public Computer(Node node) {
assert node.getNumExecutors() != 0 : "Computer created with 0 executors";
setNode(node);
}
/**
* This is where the log from the remote agent goes.
*/
protected File getLogFile() {
return new File(Hudson.getInstance().getRootDir(), "slave-" + nodeName + ".log");
}
/**
* Gets the object that coordinates the workspace allocation on this
* computer.
*/
public WorkspaceList getWorkspaceList() {
return workspaceList;
}
/**
* Gets the string representation of the slave log.
*/
public String getLog() throws IOException {
return Util.loadFile(getLogFile());
}
/**
* Used to URL-bind {@link AnnotatedLargeText}.
*/
public AnnotatedLargeText getLogText() {
return new AnnotatedLargeText(getLogFile(), Charset.defaultCharset(), false, this);
}
public ACL getACL() {
return HudsonSecurityEntitiesHolder.getHudsonSecurityManager().getAuthorizationStrategy().getACL(this);
}
public void checkPermission(Permission permission) {
getACL().checkPermission(permission);
}
public boolean hasPermission(Permission permission) {
return getACL().hasPermission(permission);
}
/**
* If the computer was offline (either temporarily or not), this method will
* return the cause.
*
* @return null if the system was put offline without given a cause.
*/
@Exported
public OfflineCause getOfflineCause() {
return offlineCause;
}
/**
* Gets the channel that can be used to run a program on this computer.
*
* @return never null when {@link #isOffline()}==false.
*/
public abstract VirtualChannel getChannel();
/**
* Gets the default charset of this computer.
*
* @return never null when {@link #isOffline()}==false.
*/
public abstract Charset getDefaultCharset();
/**
* Gets the logs recorded by this slave.
*/
public abstract List getLogRecords() throws IOException, InterruptedException;
/**
* If {@link #getChannel()}==null, attempts to relaunch the slave agent.
*/
public abstract void doLaunchSlaveAgent(StaplerRequest req, StaplerResponse rsp) throws IOException, ServletException;
/**
* @deprecated since 2009-01-06. Use {@link #connect(boolean)}
*/
public final void launch() {
connect(true);
}
/**
* Do the same as
* {@link #doLaunchSlaveAgent(StaplerRequest, StaplerResponse)} but outside
* the context of serving a request.
*
* If already connected or if this computer doesn't support proactive
* launching, no-op. This method may return immediately while the launch
* operation happens asynchronously.
*
* @see #disconnect()
*
* @param forceReconnect If true and a connect activity is already in
* progress, it will be cancelled and the new one will be started. If false,
* and a connect activity is already in progress, this method will do
* nothing and just return the pending connection operation.
* @return A {@link Future} representing pending completion of the task. The
* 'completion' includes both a successful completion and a non-successful
* completion (such distinction typically doesn't make much sense because as
* soon as {@link Computer} is connected it can be disconnected by some
* other threads.)
*/
public final Future> connect(boolean forceReconnect) {
connectTime = System.currentTimeMillis();
return _connect(forceReconnect);
}
/**
* Allows implementing-classes to provide an implementation for the connect
* method.
*
*
If already connected or if this computer doesn't support proactive
* launching, no-op. This method may return immediately while the launch
* operation happens asynchronously.
*
* @see #disconnect()
*
* @param forceReconnect If true and a connect activity is already in
* progress, it will be cancelled and the new one will be started. If false,
* and a connect activity is already in progress, this method will do
* nothing and just return the pending connection operation.
* @return A {@link Future} representing pending completion of the task. The
* 'completion' includes both a successful completion and a non-successful
* completion (such distinction typically doesn't make much sense because as
* soon as {@link Computer} is connected it can be disconnected by some
* other threads.)
*/
protected abstract Future> _connect(boolean forceReconnect);
/**
* CLI command to reconnect this node.
*/
@CLIMethod(name = "connect-node")
public void cliConnect(@Option(name = "-f", usage = "Cancel any currently pending connect operation and retry from scratch") boolean force) throws ExecutionException, InterruptedException {
checkPermission(Hudson.ADMINISTER);
connect(force).get();
}
/**
* Gets the time (since epoch) when this computer connected.
*
* @return The time in ms since epoch when this computer last connected.
*/
public final long getConnectTime() {
return connectTime;
}
/**
* Disconnect this computer.
*
* If this is the master, no-op. This method may return immediately while
* the launch operation happens asynchronously.
*
* @param cause Object that identifies the reason the node was disconnected.
*
* @return {@link Future} to track the asynchronous disconnect operation.
* @see #connect(boolean)
* @since 1.320
*/
public Future> disconnect(OfflineCause cause) {
offlineCause = cause;
if (Util.isOverridden(Computer.class, getClass(), "disconnect")) {
return disconnect(); // legacy subtypes that extend disconnect().
}
connectTime = 0;
return Futures.precomputed(null);
}
/**
* Equivalent to {@code disconnect(null)}
*
* @deprecated as of 1.320. Use {@link #disconnect(OfflineCause)} and
* specify the cause.
*/
public Future> disconnect() {
if (Util.isOverridden(Computer.class, getClass(), "disconnect", OfflineCause.class)) // if the subtype already derives disconnect(OfflineCause), delegate to it
{
return disconnect(null);
}
connectTime = 0;
return Futures.precomputed(null);
}
/**
* CLI command to disconnects this node.
*/
@CLIMethod(name = "disconnect-node")
public void cliDisconnect(@Option(name = "-m", usage = "Record the note about why you are disconnecting this node") String cause) throws ExecutionException, InterruptedException {
checkPermission(Hudson.ADMINISTER);
disconnect(new ByCLI(cause)).get();
}
/**
* CLI command to mark the node offline.
*/
@CLIMethod(name = "offline-node")
public void cliOffline(@Option(name = "-m", usage = "Record the note about why you are disconnecting this node") String cause) throws ExecutionException, InterruptedException {
checkPermission(CONFIGURE);
setTemporarilyOffline(true, new ByCLI(cause));
}
@CLIMethod(name = "online-node")
public void cliOnline() throws ExecutionException, InterruptedException {
checkPermission(CONFIGURE);
setTemporarilyOffline(false, null);
}
/**
* Number of {@link Executor}s that are configured for this computer.
*
*
When this value is decreased, it is temporarily possible for
* {@link #executors} to have a larger number than this.
*/
// ugly name to let EL access this
@Exported
public int getNumExecutors() {
return numExecutors;
}
/**
* Returns {@link Node#getNodeName() the name of the node}.
*/
public String getName() {
return nodeName;
}
/**
* Returns the {@link Node} that this computer represents.
*
* @return null if the configuration has changed and the node is removed,
* yet the corresponding {@link Computer} is not yet gone.
*/
public Node getNode() {
if (nodeName == null) {
return Hudson.getInstance();
}
return Hudson.getInstance().getNode(nodeName);
}
@Exported
public LoadStatistics getLoadStatistics() {
return getNode().getSelfLabel().loadStatistics;
}
public BuildTimelineWidget getTimeline() {
Collection allJobs = Hudson.getInstance().getAllItems(Job.class);
return new BuildTimelineWidget(BuildHistoryList.newBuildHistoryList(allJobs));
}
/**
* {@inheritDoc}
*/
public void taskAccepted(Executor executor, Queue.Task task) {
// dummy implementation
}
/**
* {@inheritDoc}
*/
public void taskCompleted(Executor executor, Queue.Task task, long durationMS) {
// dummy implementation
}
/**
* {@inheritDoc}
*/
public void taskCompletedWithProblems(Executor executor, Queue.Task task, long durationMS, Throwable problems) {
// dummy implementation
}
@Exported
public boolean isOffline() {
return temporarilyOffline || getChannel() == null;
}
public final boolean isOnline() {
return !isOffline();
}
/**
* This method is called to determine whether manual launching of the slave
* is allowed at this point in time.
*
* @return {@code true} if manual launching of the slave is allowed at this
* point in time.
*/
@Exported
public boolean isManualLaunchAllowed() {
return getRetentionStrategy().isManualLaunchAllowed(this);
}
/**
* Is a {@link #connect(boolean)} operation in progress?
*/
public abstract boolean isConnecting();
/**
* Returns true if this computer is supposed to be launched via JNLP.
*
* @deprecated since 2008-05-18. See {@linkplain #isLaunchSupported()} and
* {@linkplain ComputerLauncher}
*/
@Exported
@Deprecated
public boolean isJnlpAgent() {
return false;
}
/**
* Returns true if this computer can be launched by Hudson proactively and
* automatically.
*
* For example, JNLP slaves return {@code false} from this, because the
* launch process needs to be initiated from the slave side.
*/
@Exported
public boolean isLaunchSupported() {
return true;
}
/**
* Returns true if this node is marked temporarily offline by the user.
*
*
In contrast, {@link #isOffline()} represents the actual
* online/offline state. For example, this method may return false while
* {@link #isOffline()} returns true if the slave agent failed to launch.
*
* @deprecated You should almost always want {@link #isOffline()}. This
* method is marked as deprecated to warn people when they accidentally call
* this method.
*/
@Exported
public boolean isTemporarilyOffline() {
return temporarilyOffline;
}
/**
* @deprecated as of 1.320. Use
* {@link #setTemporarilyOffline(boolean, OfflineCause)}
*/
public void setTemporarilyOffline(boolean temporarilyOffline) {
setTemporarilyOffline(temporarilyOffline, null);
}
/**
* Marks the computer as temporarily offline. This retains the underlying
* {@link Channel} connection, but prevent builds from executing.
*
* @param cause If the first argument is true, specify the reason why the
* node is being put offline.
*/
public void setTemporarilyOffline(boolean temporarilyOffline, OfflineCause cause) {
offlineCause = temporarilyOffline ? cause : null;
this.temporarilyOffline = temporarilyOffline;
Node node = getNode();
if (null != node) {
node.setOfflineCause(offlineCause);
}
Hudson.getInstance().getQueue().scheduleMaintenance();
}
@Exported
public String getIcon() {
if (isOffline()) {
return "computer-x.png";
} else {
return "computer.png";
}
}
public String getIconAltText() {
if (isOffline()) {
return "[offline]";
} else {
return "[online]";
}
}
@Exported
public String getDisplayName() {
return nodeName;
}
public String getCaption() {
return Messages.Computer_Caption(nodeName);
}
public String getUrl() {
return "computer/" + getDisplayName() + "/";
}
/**
* Returns projects that are tied on this node.
*/
public List getTiedJobs() {
return getNode().getSelfLabel().getTiedJobs();
}
public RunList getBuilds() {
return new RunList(Hudson.getInstance().getAllItems(Job.class)).node(getNode());
}
/**
* Returns jobs that running on current computer.
*
* @return List.
*/
@Exported
public List getRunningJobs() {
List jobs = new ArrayList();
Queue queue = Hudson.getInstance().getQueue();
if (getTiedJobs() != null) {
for (AbstractProject project : getTiedJobs()) {
if (project.isBuilding() || queue.contains(project)) {
jobs.add(project);
}
}
}
return jobs;
}
/**
* Called to notify {@link Computer} that its corresponding {@link Node}
* configuration is updated.
*/
protected void setNode(Node node) {
assert node != null;
if (node instanceof Slave) {
this.nodeName = node.getNodeName();
} else {
this.nodeName = null;
}
setNumExecutors(node.getNumExecutors());
if (temporarilyOffline) {
node.setOfflineCause(offlineCause);
}
}
/**
* Called by {@link Hudson#updateComputerList()} to notify {@link Computer}
* that it will be discarded.
*/
protected void kill() {
setNumExecutors(0);
}
private synchronized void setNumExecutors(int n) {
if (numExecutors == n) {
return; // no-op
}
int diff = n - numExecutors;
this.numExecutors = n;
if (diff < 0) {
// send signal to all idle executors to potentially kill them off
for (Executor e : executors) {
if (e.isIdle()) {
e.interrupt();
}
}
} else {
// if the number is increased, add new ones
while (executors.size() < numExecutors) {
Executor e = new Executor(this, executors.size());
e.start();
executors.add(e);
}
}
}
/**
* @since 2.1.0
*/
public int getIdleCount() {
return countIdle();
}
/**
* Returns the number of idle {@link Executor}s that can start working
* immediately.
*/
public int countIdle() {
int n = 0;
for (Executor e : executors) {
if (e.isIdle()) {
n++;
}
}
return n;
}
/**
* Returns the number of {@link Executor}s that are doing some work right
* now.
*/
public final int countBusy() {
return countExecutors() - countIdle();
}
/**
* @since 2.1.0
*/
public int getBusyCount() {
return countBusy();
}
/**
* Returns the current size of the executor pool for this computer. This
* number may temporarily differ from {@link #getNumExecutors()} if there
* are busy tasks when the configured size is decreased. OneOffExecutors are
* not included in this count.
*/
public final int countExecutors() {
return executors.size();
}
/**
* @since 2.1.0
*/
public int getExecutorsCount() {
return countExecutors();
}
/**
* @since 2.1.0
*/
public int getOneOffExecutorsCount() {
return oneOffExecutors.size();
}
/**
* Gets the read-only snapshot view of all {@link Executor}s.
*/
@Exported
public List getExecutors() {
return new ArrayList(executors);
}
/**
* Gets the read-only snapshot view of all {@link OneOffExecutor}s.
*/
@Exported
public List getOneOffExecutors() {
return new ArrayList(oneOffExecutors);
}
/**
* Returns true if all the executors of this computer are idle.
*/
@Exported
public final boolean isIdle() {
if (!oneOffExecutors.isEmpty()) {
return false;
}
for (Executor e : executors) {
if (!e.isIdle()) {
return false;
}
}
return true;
}
public final boolean isPartiallyIdle() {
for (Executor e : executors) {
if (e.isIdle()) {
return true;
}
}
return false;
}
/**
* Returns the time when this computer last became idle.
*
* If this computer is already idle, the return value will point to the
* time in the past since when this computer has been idle.
*
*
If this computer is busy, the return value will point to the time in
* the future where this computer will be expected to become free.
*/
public final long getIdleStartMilliseconds() {
long firstIdle = Long.MIN_VALUE;
for (Executor e : oneOffExecutors) {
firstIdle = Math.max(firstIdle, e.getIdleStartMilliseconds());
}
for (Executor e : executors) {
firstIdle = Math.max(firstIdle, e.getIdleStartMilliseconds());
}
return firstIdle;
}
/**
* Returns the time when this computer first became in demand.
*/
public final long getDemandStartMilliseconds() {
long firstDemand = Long.MAX_VALUE;
for (Queue.BuildableItem item : Hudson.getInstance().getQueue().getBuildableItems(this)) {
firstDemand = Math.min(item.buildableStartMilliseconds, firstDemand);
}
return firstDemand;
}
/**
* Called by {@link Executor} to kill excessive executors from this
* computer.
*/
/*package*/ synchronized void removeExecutor(Executor e) {
executors.remove(e);
if (!isAlive()) {
Hudson.getInstance().removeComputer(this);
}
}
/**
* Returns true if any of the executors are functioning.
*
* Note that if an executor dies, we'll leave it in {@link #executors} until
* the administrator yanks it out, so that we can see why it died.
*/
private boolean isAlive() {
for (Executor e : executors) {
if (e.isAlive()) {
return true;
}
}
return false;
}
/**
* Interrupt all {@link Executor}s.
*/
public void interrupt() {
for (Executor e : executors) {
e.interrupt();
}
}
public String getSearchUrl() {
return "computer/" + nodeName;
}
/**
* {@link RetentionStrategy} associated with this computer.
*
* @return never null. This method return
* {@code RetentionStrategy super T>} where {@code T=this.getClass()}.
*/
public abstract RetentionStrategy getRetentionStrategy();
/**
* Expose monitoring data for the remote API.
*/
@Exported(inline = true)
public Map getMonitorData() {
Map r = new HashMap();
for (NodeMonitor monitor : NodeMonitor.getAll()) {
r.put(monitor.getClass().getName(), monitor.data(this));
}
return r;
}
/**
* Gets the system properties of the JVM on this computer. If this is the
* master, it returns the system property of the master computer.
*/
public Map