All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.data.input.InputFormat Maven / Gradle / Ivy

There is a newer version: 30.0.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.data.input;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonSubTypes.Type;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import org.apache.druid.data.input.impl.CsvInputFormat;
import org.apache.druid.data.input.impl.DelimitedInputFormat;
import org.apache.druid.data.input.impl.JsonInputFormat;
import org.apache.druid.data.input.impl.NestedInputFormat;
import org.apache.druid.data.input.impl.RegexInputFormat;
import org.apache.druid.data.input.impl.SplittableInputSource;
import org.apache.druid.guice.annotations.UnstableApi;
import org.apache.druid.utils.CompressionUtils;

import java.io.File;

/**
 * InputFormat abstracts the file format of input data.
 * It creates a {@link InputEntityReader} to read data and parse it into {@link InputRow}.
 * The created InputEntityReader is used by {@link InputSourceReader}.
 * 

* See {@link NestedInputFormat} for nested input formats such as JSON. */ @UnstableApi @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = InputFormat.TYPE_PROPERTY) @JsonSubTypes(value = { @Type(name = CsvInputFormat.TYPE_KEY, value = CsvInputFormat.class), @Type(name = JsonInputFormat.TYPE_KEY, value = JsonInputFormat.class), @Type(name = RegexInputFormat.TYPE_KEY, value = RegexInputFormat.class), @Type(name = DelimitedInputFormat.TYPE_KEY, value = DelimitedInputFormat.class) }) public interface InputFormat { String TYPE_PROPERTY = "type"; /** * Trait to indicate that a file can be split into multiple {@link InputSplit}s. *

* This method is not being used anywhere for now, but should be considered * in {@link SplittableInputSource#createSplits} in the future. */ @SuppressWarnings("unused") @JsonIgnore boolean isSplittable(); InputEntityReader createReader( InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory ); /** * Computes the weighted size of a given input object of the underyling input format type, weighted * for its cost during ingestion. The weight calculated is dependent on the format type and compression type * ({@link CompressionUtils.Format}) used if any. Uncompressed newline delimited json is used as baseline * with scale factor 1. This means that when computing the byte weight that an uncompressed newline delimited * json input object has towards ingestion, we take the file size as is, 1:1. * * @param path The path of the input object. Used to tell whether any compression is used. * @param size The size of the input object in bytes. * * @return The weighted size of the input object. */ default long getWeightedSize(String path, long size) { return size; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy