All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.helios.system.AgentZooKeeperDownTolerationTest Maven / Gradle / Ivy

There is a newer version: 0.9.9
Show newest version
/*
 * Copyright (c) 2014 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.helios.system;

import com.spotify.docker.client.DockerClient;
import com.spotify.docker.client.messages.Container;
import com.spotify.helios.Polling;
import com.spotify.helios.agent.AgentMain;
import com.spotify.helios.client.HeliosClient;
import com.spotify.helios.common.descriptors.Deployment;
import com.spotify.helios.common.descriptors.Job;
import com.spotify.helios.common.descriptors.JobId;
import com.spotify.helios.common.descriptors.JobStatus;
import com.spotify.helios.common.descriptors.TaskStatus;
import com.spotify.helios.common.protocol.CreateJobResponse;
import com.spotify.helios.common.protocol.JobDeployResponse;

import org.junit.Test;

import java.util.List;
import java.util.Objects;
import java.util.concurrent.Callable;

import static com.spotify.helios.common.descriptors.Goal.START;
import static com.spotify.helios.common.descriptors.HostStatus.Status.UP;
import static com.spotify.helios.common.descriptors.TaskStatus.State.RUNNING;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

public class AgentZooKeeperDownTolerationTest extends SystemTestBase {

  @Test
  public void test() throws Exception {
    startDefaultMaster();

    final DockerClient dockerClient = getNewDockerClient();

    final HeliosClient client = defaultClient();

    final AgentMain agent1 = startDefaultAgent(testHost());

    // Create a job
    final Job job = Job.newBuilder()
        .setName(testJobName)
        .setVersion(testJobVersion)
        .setImage(BUSYBOX)
        .setCommand(IDLE_COMMAND)
        .setCreatingUser(TEST_USER)
        .build();
    final JobId jobId = job.getId();
    final CreateJobResponse created = client.createJob(job).get();
    assertEquals(CreateJobResponse.Status.OK, created.getStatus());

    // Wait for agent to come up
    awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);

    // Deploy the job on the agent
    final Deployment deployment = Deployment.of(jobId, START);
    final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
    assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());

    // Wait for the job to run
    final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING,
                                                     LONG_WAIT_SECONDS, SECONDS);
    assertJobEquals(job, firstTaskStatus.getJob());
    assertNotNull(dockerClient.inspectContainer(firstTaskStatus.getContainerId()));

    // Stop zookeeper
    zk().stop();

    // Wait for a while and make sure that the container is still running
    Thread.sleep(5000);
    assertTrue(
        dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());

    // Stop the agent
    agent1.stopAsync().awaitTerminated();

    // Start the agent again
    final AgentMain agent2 = startDefaultAgent(testHost());

    // Wait for a while and make sure that the same container is still running
    Thread.sleep(5000);
    assertTrue(
        dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());

    // Kill the container
    dockerClient.killContainer(firstTaskStatus.getContainerId());
    assertFalse(
        dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());

    // Wait for a while and make sure that a new container was spawned
    final String firstRestartedContainerId =
        Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable() {
          @Override
          public String call() throws Exception {
            final List containers = listContainers(dockerClient, testTag);
            return containers.size() == 1 ? containers.get(0).id() : null;
          }
        });

    // Stop the agent
    agent2.stopAsync().awaitTerminated();

    // Kill the container
    dockerClient.killContainer(firstRestartedContainerId);
    assertFalse(dockerClient.inspectContainer(firstRestartedContainerId).state().running());

    // Start the agent again
    startDefaultAgent(testHost());

    // Wait for a while and make sure that a new container was spawned
    final String secondRestartedContainerId =
        Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable() {
          @Override
          public String call() throws Exception {
            final List containers = listContainers(dockerClient, testTag);
            return containers.size() == 1 ? containers.get(0).id() : null;
          }
        });
    assertTrue(dockerClient.inspectContainer(secondRestartedContainerId).state().running());

    // Start zookeeper
    zk().start();

    // Verify that the agent is listed as up
    awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);

    // Wait for the new container id to be reflected in the task status
    Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable() {
      @Override
      public TaskStatus call() throws Exception {
        final JobStatus jobStatus = client.jobStatus(jobId).get();
        final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(testHost());
        return taskStatus != null && Objects.equals(taskStatus.getContainerId(),
                                                    secondRestartedContainerId)
               ? taskStatus : null;
      }
    });
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy