All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.parquet.tensorflow.TensorflowExampleParquetInputFormat Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2023 Spotify AB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.spotify.parquet.tensorflow;

import org.apache.hadoop.mapreduce.Job;
import org.apache.parquet.hadoop.ParquetInputFormat;
import org.apache.parquet.hadoop.util.ContextUtil;
import org.tensorflow.metadata.v0.Schema;
import org.tensorflow.proto.example.Example;

public class TensorflowExampleParquetInputFormat extends ParquetInputFormat {

  /**
   * Set the subset of columns to read (projection pushdown). Specified as an tensorflow schema, the
   * requested projection is converted into a Parquet schema for Parquet column projection.
   *
   * 

This is useful if the full schema is large and you only want to read a few columns, since it * saves time by not reading unused columns. * *

If a requested projection is set, then the tensorflow schema used for reading must be * compatible with the projection. Use {@link * #setExampleReadSchema(org.apache.hadoop.mapreduce.Job, org.tensorflow.metadata.v0.Schema)} to * set a read schema, if needed. * * @param job a job * @param requestedProjection the requested projection schema * @see #setExampleReadSchema(org.apache.hadoop.mapreduce.Job, org.tensorflow.metadata.v0.Schema) * @see TensorflowExampleParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, * org.tensorflow.metadata.v0.Schema) */ public static void setRequestedProjection(Job job, Schema requestedProjection) { TensorflowExampleReadSupport.setRequestedProjection( ContextUtil.getConfiguration(job), requestedProjection); } /** * Override the tensorflow schema to use for reading. If not set, the tensorflow schema used for * writing is used. * * @param job a job * @param tfReadSchema the requested schema * @see #setRequestedProjection(org.apache.hadoop.mapreduce.Job, * org.tensorflow.metadata.v0.Schema) * @see TensorflowExampleParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, * org.tensorflow.metadata.v0.Schema) */ public static void setExampleReadSchema(Job job, Schema tfReadSchema) { TensorflowExampleReadSupport.setExampleReadSchema( ContextUtil.getConfiguration(job), tfReadSchema); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy