All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.csiro.pathling.library.io.source.ParquetSource Maven / Gradle / Ivy

There is a newer version: 7.0.1
Show newest version
/*
 * Copyright 2023 Commonwealth Scientific and Industrial Research
 * Organisation (CSIRO) ABN 41 687 119 230.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package au.csiro.pathling.library.io.source;

import au.csiro.pathling.library.PathlingContext;
import java.util.Collections;
import javax.annotation.Nonnull;

/**
 * A class for making FHIR data in Parquet format available for query. It is assumed that the schema
 * of the Parquet files aligns with that of the Pathling FHIR encoders.
 *
 * @author John Grimes
 * @author Piotr Szul
 */
public class ParquetSource extends FileSource {

  public ParquetSource(@Nonnull final PathlingContext context, @Nonnull final String path) {
    super(context, path,
        // Assume the file name will be the resource type.
        Collections::singleton,
        // Assume the "parquet" file extension.
        "parquet",
        context.getSpark().read().format("parquet"),
        // Apply no transformations on the data - we assume it has already been processed using the 
        // Pathling FHIR encoders.
        (sourceData, resourceType) -> sourceData);
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy