com.edmunds.rest.databricks.service.JobServiceImpl Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of databricks-rest-client Show documentation
A simple java rest client to interact with the Databricks Rest Service https://docs.databricks.com/api/latest/index.html
There is a newer version: 3.3.3
Show newest version
/*
 * Copyright 2018 Edmunds.com, Inc.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package com.edmunds.rest.databricks.service;

import com.edmunds.rest.databricks.DTO.JobsDTO;
import com.edmunds.rest.databricks.DTO.RunMetadataDTO;
import com.edmunds.rest.databricks.DTO.RunNowDTO;
import com.edmunds.rest.databricks.DTO.RunsDTO;
import com.edmunds.rest.databricks.DTO.jobs.JobDTO;
import com.edmunds.rest.databricks.DTO.jobs.JobSettingsDTO;
import com.edmunds.rest.databricks.DTO.jobs.RunDTO;
import com.edmunds.rest.databricks.DTO.jobs.RunParametersDTO;
import com.edmunds.rest.databricks.DatabricksRestException;
import com.edmunds.rest.databricks.RequestMethod;
import com.edmunds.rest.databricks.restclient.DatabricksRestClient;
import com.fasterxml.jackson.core.type.TypeReference;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/**
 * The implementation of JobService.
 */
@Deprecated
public class JobServiceImpl extends DatabricksService implements JobService {

  private static Logger log = LogManager.getLogger(JobServiceImpl.class);

  public JobServiceImpl(final DatabricksRestClient client) {
    super(client);
  }

  @Override
  public long createJob(JobSettingsDTO jobSettingsDTO) throws IOException, DatabricksRestException {
    String marshalled = this.mapper.writeValueAsString(jobSettingsDTO);
    Map data = this.mapper.readValue(marshalled, new
        TypeReference>() {
        });
    byte[] responseBody = client.performQuery(RequestMethod.POST, "/jobs/create", data);
    Map response = this.mapper
        .readValue(responseBody, new TypeReference>() {
        });
    return response.get("job_id");
  }

  @Override
  public void deleteJob(long jobId) throws IOException, DatabricksRestException {
    Map data = new HashMap<>();
    data.put("job_id", jobId);
    client.performQuery(RequestMethod.POST, "/jobs/delete", data);
  }

  @Override
  public void deleteJob(String jobName) throws IOException, DatabricksRestException {
    JobDTO jobDTO = getJobByName(jobName);
    if (jobDTO != null) {
      log.info("Deleting: " + getJobLink(jobDTO.getJobId()));
      deleteJob(jobDTO.getJobId());
    } else {
      log.info("No jobs found for " + jobName);
    }
  }

  @Override
  public JobDTO getJob(long jobId) throws IOException, DatabricksRestException {
    Map data = new HashMap<>();
    data.put("job_id", jobId);
    byte[] responseBody = client.performQuery(RequestMethod.GET, "/jobs/get", data);
    return this.mapper.readValue(responseBody, JobDTO.class);
  }

  @Override
  public List getJobsByName(String jobName) throws IOException, DatabricksRestException {
    return getJobsByRegex(Pattern.compile(jobName));
  }

  @Override
  public JobDTO getJobByName(String jobName) throws IOException, DatabricksRestException {
    return getJobByName(jobName, true);
  }

  @Override
  public JobDTO getJobByName(String jobName, boolean failOnMultipleJobs)
      throws IOException, DatabricksRestException {
    List jobs = getJobsByName(jobName);
    if (jobs.size() > 1) {
      String errorMessage = String
          .format("[%s] job ids found for name: [%s]. Please delete duplicate jobs, or "
              + "renaming conflicting jobs:\n%s\n", jobs.size(), jobName, jobs);
      if (failOnMultipleJobs) {
        throw new IllegalStateException(errorMessage);
      } else {
        log.error(errorMessage);
        log.error("returning the job with the lowest jobId");
        //NOTE this is no longer deterministic as job ids are no longer sequential
        return jobs.stream().min(Comparator.comparing(JobDTO::getJobId)).get();
      }
    }
    if (jobs.isEmpty()) {
      return null;
    }
    return jobs.get(0);
  }

  @Override
  public List getJobsByRegex(Pattern regex) throws IOException, DatabricksRestException {
    if (regex == null) {
      throw new IllegalArgumentException("Job name must not be blank.");
    }

    return Arrays.stream(listAllJobs().getJobs())
            .filter(x -> regex.matcher(x.getSettings().getName()).matches())
            .collect(Collectors.toList());
  }

  @Override
  public JobsDTO listAllJobs() throws DatabricksRestException, IOException {
    byte[] responseBody = client.performQuery(RequestMethod.GET, "/jobs/list", null);
    return this.mapper.readValue(responseBody, JobsDTO.class);
  }

  @Override
  public String getJobLink(long jobId) {
    return String.format("https://%s/#job/%s", client.getHost(), jobId);
  }

  @Override
  public RunNowDTO runJobNow(long jobId) throws DatabricksRestException, IOException {
    return runJobNow(jobId, (RunParametersDTO) null);
  }

  @Override
  public RunNowDTO runJobNow(long jobId, Map notebookParams)
      throws DatabricksRestException,
      IOException {
    RunParametersDTO parametersDTO = new RunParametersDTO();
    parametersDTO.setNotebookParams(notebookParams);
    return runJobNow(jobId, parametersDTO);
  }

  //CHECKSTYLE:OFF
  @Override
  public RunNowDTO runJobNow(long jobId, RunParametersDTO params) throws DatabricksRestException,
      IOException {
    Map data = new HashMap<>();
    data.put("job_id", jobId);
    if (params == null) {
      // skip param setting

    } else if (params.getJarParams() != null) {
      data.put("jar_params", params.getJarParams());

    } else if (params.getNotebookParams() != null) {
      data.put("notebook_params", params.getNotebookParams());

    } else if (params.getPythonParams() != null) {
      data.put("python_params", params.getPythonParams());

    } else if (params.getSparkSubmitParams() != null) {
      data.put("spark_submit_params", params.getSparkSubmitParams());

    }

    byte[] responseBody = client.performQuery(RequestMethod.POST, "/jobs/run-now", data);
    return this.mapper.readValue(responseBody, RunNowDTO.class);
  }


  @Override
  public RunsDTO listRuns(Long jobId, Boolean activeOnly, Integer offset, Integer limit) throws
      DatabricksRestException, IOException {
    Map data = new HashMap<>();

    if (jobId != null) {
      data.put("job_id", jobId);
    }
    if (activeOnly != null) {
      data.put("active_only", activeOnly);
    }
    //TODO we need a way to do completed_only
    if (offset != null) {
      data.put("offset", offset);
    }
    if (limit != null) {
      data.put("limit", limit);
    }

    byte[] responseBody = client.performQuery(RequestMethod.GET, "/jobs/runs/list", data);
    return this.mapper.readValue(responseBody, RunsDTO.class);
  }

  @Override
  public RunDTO getRun(long runId) throws DatabricksRestException, IOException {
    Map data = new HashMap<>();
    data.put("run_id", runId);

    byte[] responseBody = client.performQuery(RequestMethod.GET, "/jobs/runs/get", data);
    return this.mapper.readValue(responseBody, RunDTO.class);
  }

  @Override
  public void cancelRun(long runId) throws IOException, DatabricksRestException {
    Map data = new HashMap<>();
    data.put("run_id", runId);

    client.performQuery(RequestMethod.POST, "/jobs/runs/cancel", data);
  }

  @Override
  public void reset(long jobId, JobSettingsDTO jobSettings)
      throws IOException, DatabricksRestException {
    Map data = new HashMap<>();
    data.put("job_id", jobId);
    data.put("new_settings", jobSettings);

    client.performQuery(RequestMethod.POST, "/jobs/reset", data);
  }

  @Override
  public String buildRunJobRestUrl(long jobId, long numberInJob) {
    return "https://" + client.getHost() + "/#job/" + jobId + "/run/" + numberInJob;
  }

  @Override
  public void upsertJob(JobSettingsDTO jobSettingsDTO, boolean failOnDuplicateJobNames)
      throws IOException, DatabricksRestException {
    String jobName = jobSettingsDTO.getName();
    List jobs = getJobsByName(jobName);
    if (jobs.size() > 1) {
      String errorMessage = String.format(
          "[%s] job ids found for name: [%s]. Please consider deleting duplicate jobs, "
              + "or renaming "
              + "conflicting jobs:\n%s\n",
          jobs.size(), jobName, jobs);

      if (failOnDuplicateJobNames) {
        throw new IllegalArgumentException(errorMessage);
      } else {
        log.error(errorMessage + "UPDATING FIRST JOB ONLY");
      }
    }
    if (jobs.size() == 0) {
      long newJobId = createJob(jobSettingsDTO);
      log.info(String.format("Created job, url: %s", getJobLink(newJobId)));
    } else {
      for (JobDTO job : jobs) {
        reset(job.getJobId(), jobSettingsDTO);
        log.info(String.format("Updated job, url: %s", getJobLink(job.getJobId())));
      }
    }
  }

  @Override
  public RunNowDTO runSubmit(JobSettingsDTO jobSettings) throws IOException, DatabricksRestException {
    String marshalled = this.mapper.writeValueAsString(jobSettings);
    Map data = this.mapper.readValue(marshalled, new
        TypeReference>() {
        });
    byte[] responseBody = client.performQuery(RequestMethod.POST, "/jobs/runs/submit", data);
    return this.mapper.readValue(responseBody, RunNowDTO.class);
  }

  @Override
  public String getOutput(long runId)
      throws IOException, DatabricksRestException {
    Map data = new HashMap<>();
    data.put("run_id", runId);

    byte[] responseBody = client.performQuery(RequestMethod.GET, "/jobs/runs/get-output", data);
    RunMetadataDTO metadata = this.mapper.readValue(responseBody, RunMetadataDTO.class);
    return metadata.getNotebookOutput().getResult();
  }
}