za.co.absa.cobrix.spark.cobol.reader.parameters.ReaderParameters.scala Maven / Gradle / Ivy
/*
* Copyright 2018-2019 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package za.co.absa.cobrix.spark.cobol.reader.parameters
import za.co.absa.cobrix.cobol.parser.decoders.StringTrimmingPolicy
import za.co.absa.cobrix.cobol.parser.decoders.StringTrimmingPolicy.StringTrimmingPolicy
import za.co.absa.cobrix.spark.cobol.schema.SchemaRetentionPolicy
import za.co.absa.cobrix.spark.cobol.schema.SchemaRetentionPolicy.SchemaRetentionPolicy
/**
* These are properties for customizing mainframe binary data reader.
*
* @param isEbcdic If true the input data file encoding is EBCDIC, otherwise it is ASCII
* @param ebcdicCodePage Specifies what code page to use for EBCDIC to ASCII/Unicode conversions
* @param ebcdicCodePageClass An optional custom code page conversion class provided by a user
* @param lengthFieldName A name of a field that contains record length. Optional. If not set the copybook record length will be used.
* @param isRecordSequence Does input files have 4 byte record length headers
* @param isRdwBigEndian Is RDW big endian? It may depend on flavor of mainframe and/or mainframe to PC transfer method
* @param isRdwPartRecLength Does RDW count itself as part of record length itself
* @param rdwAdjustment Controls a mismatch between RDW and record length
* @param isIndexGenerationNeeded Is indexing input file before processing is requested
* @param inputSplitRecords The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option
* @param inputSplitSizeMB A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size
* @param hdfsDefaultBlockSize Default HDFS block size for the HDFS filesystem used. This value is used as the default split size if inputSplitSizeMB is not specified
* @param startOffset An offset to the start of the record in each binary data block.
* @param endOffset An offset from the end of the record to the end of the binary data block.
* @param fileStartOffset A number of bytes to skip at the beginning of each file
* @param fileEndOffset A number of bytes to skip at the end of each file
* @param generateRecordId If true, a record id field will be prepended to each record.
* @param schemaPolicy Specifies a policy to transform the input schema. The default policy is to keep the schema exactly as it is in the copybook.
* @param stringTrimmingPolicy Specifies if and how strings should be trimmed when parsed.
* @param multisegment Parameters specific to reading multisegment files
* @param dropGroupFillers If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
* @param nonTerminals A list of non-terminals (GROUPS) to combine and parse as primitive fields
* @param recordHeaderParser A parser used to parse data field record headers
* @param rhpAdditionalInfo An optional additional option string passed to a custom record header parser
*/
case class ReaderParameters(
isEbcdic: Boolean = true,
ebcdicCodePage: String,
ebcdicCodePageClass: Option[String],
lengthFieldName: Option[String] = None,
isRecordSequence: Boolean = false,
isRdwBigEndian: Boolean = false,
isRdwPartRecLength: Boolean = false,
rdwAdjustment: Int = 0,
isIndexGenerationNeeded: Boolean = false,
inputSplitRecords: Option[Int] = None,
inputSplitSizeMB: Option[Int] = None,
hdfsDefaultBlockSize: Option[Int] = None,
startOffset: Int = 0,
endOffset: Int = 0,
fileStartOffset: Int = 0,
fileEndOffset: Int = 0,
generateRecordId: Boolean = false,
schemaPolicy: SchemaRetentionPolicy = SchemaRetentionPolicy.KeepOriginal,
stringTrimmingPolicy: StringTrimmingPolicy = StringTrimmingPolicy.TrimBoth,
multisegment: Option[MultisegmentParameters],
dropGroupFillers: Boolean,
nonTerminals: Seq[String],
recordHeaderParser: Option[String],
rhpAdditionalInfo: Option[String]
)
© 2015 - 2025 Weber Informatics LLC | Privacy Policy