All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.csiro.pathling.update.ImportExecutor Maven / Gradle / Ivy

There is a newer version: 7.0.1
Show newest version
/*
 * Copyright 2023 Commonwealth Scientific and Industrial Research
 * Organisation (CSIRO) ABN 41 687 119 230.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package au.csiro.pathling.update;

import static au.csiro.pathling.utilities.Preconditions.checkUserInput;

import au.csiro.pathling.encoders.FhirEncoders;
import au.csiro.pathling.encoders.UnsupportedResourceError;
import au.csiro.pathling.errors.InvalidUserInputError;
import au.csiro.pathling.errors.SecurityError;
import au.csiro.pathling.fhir.FhirContextFactory;
import au.csiro.pathling.io.AccessRules;
import au.csiro.pathling.io.Database;
import au.csiro.pathling.io.FileSystemPersistence;
import au.csiro.pathling.io.ImportMode;
import ca.uhn.fhir.rest.annotation.ResourceParam;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
import org.hl7.fhir.instance.model.api.IBaseResource;
import org.hl7.fhir.r4.model.CodeType;
import org.hl7.fhir.r4.model.Enumerations.ResourceType;
import org.hl7.fhir.r4.model.OperationOutcome;
import org.hl7.fhir.r4.model.OperationOutcome.IssueSeverity;
import org.hl7.fhir.r4.model.OperationOutcome.IssueType;
import org.hl7.fhir.r4.model.OperationOutcome.OperationOutcomeIssueComponent;
import org.hl7.fhir.r4.model.Parameters;
import org.hl7.fhir.r4.model.Parameters.ParametersParameterComponent;
import org.hl7.fhir.r4.model.UrlType;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Component;

/**
 * Encapsulates the execution of an import operation.
 *
 * @author John Grimes
 * @see Import
 */
@Component
@Profile({"core", "import"})
@Slf4j
public class ImportExecutor {

  @Nonnull
  private final SparkSession spark;

  @Nonnull
  private final Database database;

  @Nonnull
  private final FhirEncoders fhirEncoders;

  @Nonnull
  private final FhirContextFactory fhirContextFactory;

  @Nonnull
  private final Optional accessRules;

  /**
   * @param spark a {@link SparkSession} for resolving Spark queries
   * @param database a {@link Database} for writing resources
   * @param fhirEncoders a {@link FhirEncoders} object for converting data back into HAPI FHIR
   * @param fhirContextFactory a {@link FhirContextFactory} for constructing FhirContext objects in
   * the context of parallel processing
   * @param accessRules a {@link AccessRules} for validating access to URLs
   */
  public ImportExecutor(@Nonnull final SparkSession spark,
      @Nonnull final Database database,
      @Nonnull final FhirEncoders fhirEncoders,
      @Nonnull final FhirContextFactory fhirContextFactory,
      @Nonnull final Optional accessRules) {
    this.spark = spark;
    this.database = database;
    this.fhirEncoders = fhirEncoders;
    this.fhirContextFactory = fhirContextFactory;
    this.accessRules = accessRules;
  }

  /**
   * Executes an import request.
   *
   * @param inParams a FHIR {@link Parameters} object describing the import request
   * @return a FHIR {@link OperationOutcome} resource describing the result
   */
  @Nonnull
  public OperationOutcome execute(@Nonnull @ResourceParam final Parameters inParams) {
    // Parse and validate the JSON request.
    final List sourceParams = inParams.getParameter().stream()
        .filter(param -> "source".equals(param.getName())).collect(Collectors.toList());
    if (sourceParams.isEmpty()) {
      throw new InvalidUserInputError("Must provide at least one source parameter");
    }
    log.info("Received $import request");

    // For each input within the request, read the resources of the declared type and create
    // the corresponding table in the warehouse.
    for (final ParametersParameterComponent sourceParam : sourceParams) {
      final ParametersParameterComponent resourceTypeParam = sourceParam.getPart().stream()
          .filter(param -> "resourceType".equals(param.getName()))
          .findFirst()
          .orElseThrow(
              () -> new InvalidUserInputError("Must provide resourceType for each source"));
      final ParametersParameterComponent urlParam = sourceParam.getPart().stream()
          .filter(param -> "url".equals(param.getName()))
          .findFirst()
          .orElseThrow(
              () -> new InvalidUserInputError("Must provide url for each source"));
      // The mode parameter defaults to 'overwrite'.
      final ImportMode importMode = sourceParam.getPart().stream()
          .filter(param -> "mode".equals(param.getName()) &&
              param.getValue() instanceof CodeType)
          .findFirst()
          .map(param -> ImportMode.fromCode(
              ((CodeType) param.getValue()).asStringValue()))
          .orElse(ImportMode.OVERWRITE);
      final String resourceCode = ((CodeType) resourceTypeParam.getValue()).getCode();
      final ResourceType resourceType = ResourceType.fromCode(resourceCode);

      // Get an encoder based on the declared resource type within the source parameter.
      final ExpressionEncoder fhirEncoder;
      try {
        fhirEncoder = fhirEncoders.of(resourceType.toCode());
      } catch (final UnsupportedResourceError e) {
        throw new InvalidUserInputError("Unsupported resource type: " + resourceCode);
      }

      // Read the resources from the source URL into a dataset of strings.
      final Dataset jsonStrings = readStringsFromUrl(urlParam);

      // Parse each line into a HAPI FHIR object, then encode to a Spark dataset.
      final Dataset resources = jsonStrings.map(jsonToResourceConverter(),
          fhirEncoder);

      log.info("Importing {} resources (mode: {})", resourceType.toCode(), importMode.getCode());
      if (importMode == ImportMode.OVERWRITE) {
        database.overwrite(resourceType, resources.toDF());
      } else {
        database.merge(resourceType, resources.toDF());
      }
    }

    // We return 200, as this operation is currently synchronous.
    log.info("Import complete");

    // Construct a response.
    final OperationOutcome opOutcome = new OperationOutcome();
    final OperationOutcomeIssueComponent issue = new OperationOutcomeIssueComponent();
    issue.setSeverity(IssueSeverity.INFORMATION);
    issue.setCode(IssueType.INFORMATIONAL);
    issue.setDiagnostics("Data import completed successfully");
    opOutcome.getIssue().add(issue);
    return opOutcome;
  }

  @Nonnull
  private Dataset readStringsFromUrl(@Nonnull final ParametersParameterComponent urlParam) {
    final String url = ((UrlType) urlParam.getValue()).getValueAsString();
    final String decodedUrl = URLDecoder.decode(url, StandardCharsets.UTF_8);
    final String convertedUrl = FileSystemPersistence.convertS3ToS3aUrl(decodedUrl);
    final Dataset jsonStrings;
    try {
      // Check that the user is authorized to execute the operation.
      accessRules.ifPresent(ar -> ar.checkCanImportFrom(convertedUrl));
      final FilterFunction nonBlanks = s -> !s.isBlank();
      jsonStrings = spark.read().textFile(convertedUrl).filter(nonBlanks);
    } catch (final SecurityError e) {
      throw new InvalidUserInputError("Not allowed to import from URL: " + convertedUrl, e);
    } catch (final Exception e) {
      throw new InvalidUserInputError("Error reading from URL: " + convertedUrl, e);
    }
    return jsonStrings;
  }

  @Nonnull
  private MapFunction jsonToResourceConverter() {
    final FhirContextFactory localFhirContextFactory = this.fhirContextFactory;
    return (json) -> {
      final IBaseResource resource = localFhirContextFactory.build().newJsonParser()
          .parseResource(json);
      // All imported resources must have an ID set.
      checkUserInput(!resource.getIdElement().isEmpty(), "Encountered a resource with no ID");
      return resource;
    };
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy