All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.csiro.pathling.library.io.source.DataSourceBuilder Maven / Gradle / Ivy

There is a newer version: 7.0.1
Show newest version
/*
 * Copyright 2023 Commonwealth Scientific and Industrial Research
 * Organisation (CSIRO) ABN 41 687 119 230.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package au.csiro.pathling.library.io.source;

import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toSet;

import au.csiro.pathling.library.PathlingContext;
import java.util.Set;
import java.util.function.Function;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.hl7.fhir.r4.model.Enumerations.ResourceType;

/**
 * A factory for creating various different data sources capable of preparing FHIR data for query.
 *
 * @author Piotr Szul
 * @author John Grimes
 */
public class DataSourceBuilder {

  @Nonnull
  private final PathlingContext context;

  public DataSourceBuilder(@Nonnull final PathlingContext context) {
    this.context = context;

  }

  /**
   * Creates a new data source from a directory containing NDJSON encoded FHIR resource data, with
   * filenames containing the resource type the file contains, e.g. "Patient.ndjson" should contain
   * only Patient resources.
   * 

* The filename can also optionally contain a qualifier after the resource type, to allow for * resources of the same type to be organised into different files, e.g. * "Observation.Chart.ndjson" and "Observation.Lab.ndjson". * * @param ndjsonDir the URI of directory containing NDJSON files * @return the new data source */ @Nonnull public NdjsonSource ndjson(@Nullable final String ndjsonDir) { return new NdjsonSource(context, requireNonNull(ndjsonDir)); } /** * Creates a new data source from a directory containing NDJSON encoded FHIR resource data, with * filenames containing the resource type the file contains, e.g. "Patient.ndjson" should contain * only Patient resources. *

* The filename can also optionally contain a qualifier after the resource type, to allow for * resources of the same type to be organised into different files, e.g. * "Observation.Chart.ndjson" and "Observation.Lab.ndjson". *

* A file extension is also provided, which overrides the default ".ndjson" extension and serves * as a filter for the files to be included in the data source. * * @param path the URI of directory containing NDJSON files * @param extension the file extension to expect * @return the new data source */ @Nonnull public NdjsonSource ndjson(@Nullable final String path, @Nullable final String extension) { return new NdjsonSource(context, requireNonNull(path), requireNonNull(extension)); } /** * Creates a new data source from a directory containing NDJSON encoded FHIR resource data, with * filenames determined by the provided function. *

* A file extension is also provided, which overrides the default ".ndjson" extension and serves * as a filter for the files to be included in the data source. * * @param path the URI of directory containing NDJSON files * @param extension the file extension to expect * @param fileNameMapper a function that maps a filename to a list of resource types * @return the new data source */ @Nonnull public NdjsonSource ndjson(@Nullable final String path, @Nullable final String extension, @Nullable final Function> fileNameMapper) { return new NdjsonSource(context, requireNonNull(path), requireNonNull(extension), requireNonNull(fileNameMapper)); } /** * Creates a new data source from a directory containing FHIR Bundles. Takes an argument that * specifies the resource types that should be extracted from the bundles and added to the data * source. *

* If the MIME type is "application/fhir+xml", then the bundles are expected to be in XML format, * and the file extensions are expected to be ".xml". If the MIME type is "application/fhir+json", * then the bundles are expected to be in JSON format, and the file extensions are expected to be * ".json". * * @param path the URI of the directory containing the bundles * @param resourceTypes the resource types to extract from the bundles * @param mimeType the MIME type of the bundles * @return the new data source */ @Nonnull public BundlesSource bundles(@Nullable final String path, @Nullable final Set resourceTypes, @Nullable final String mimeType) { final Set resourceTypeEnums = requireNonNull(resourceTypes).stream() .map(ResourceType::fromCode) .collect(toSet()); return new BundlesSource(context, requireNonNull(path), requireNonNull(mimeType), resourceTypeEnums); } /** * Creates a new data source from Spark datasets. * * @return a {@link DatasetSource}, which can then be populated with datasets that are mapped to * the resource types that they contain, using the {@link DatasetSource#dataset} method. */ @Nonnull public DatasetSource datasets() { return new DatasetSource(context); } /** * Creates a new data source form a directory containing Parquet-encoded FHIR resource data, with * filenames representing the resource type the file/directory contains, e.g. 'Patient.parquet' * should contain Patient resources. * * @param path the URI of the directory containing the Parquet files/directories * @return the new data source */ @Nonnull public ParquetSource parquet(@Nullable final String path) { return new ParquetSource(context, requireNonNull(path)); } /** * Creates a new data source from a Delta warehouse. * * @param path the location of the Delta warehouse * @return the new data source */ @Nonnull public DeltaSource delta(@Nullable final String path) { return new DeltaSource(context, requireNonNull(path)); } /** * Creates a new data source from a tables registered within the catalog. The table names are * assumed to be the same as the resource types they contain. * * @return the new data source */ @Nonnull public CatalogSource tables() { return new CatalogSource(context); } /** * Creates a new data source from a specified set of tables registered within the catalog. The * table names are assumed to be the same as the resource types they contain. The schema from * which the tables are read is specified. * * @param schema the schema from which the tables are read * @return the new data source */ @Nonnull public CatalogSource tables(@Nullable final String schema) { return new CatalogSource(context, requireNonNull(schema)); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy