za.co.absa.cobrix.spark.cobol.source.parameters.CobolParameters.scala Maven / Gradle / Ivy

Go to download
/*
 * Copyright 2018-2019 ABSA Group Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package za.co.absa.cobrix.spark.cobol.source.parameters

import za.co.absa.cobrix.spark.cobol.reader.parameters.MultisegmentParameters
import za.co.absa.cobrix.spark.cobol.schema.SchemaRetentionPolicy.SchemaRetentionPolicy
import za.co.absa.cobrix.cobol.parser.decoders.StringTrimmingPolicy.StringTrimmingPolicy

/**
  * This class holds parameters for the job.
  *
  * @param copybookPath           String containing the path to the copybook in a given file system.
  * @param multiCopybookPath      Sequence containing the paths to the copybooks.
  * @param copybookContent        String containing the actual content of the copybook. Either this, the copybookPath, or multiCopybookPath parameter must be specified.
  * @param sourcePath             String containing the path to the Cobol file to be parsed.
  * @param isEbcdic               If true the input data file encoding is EBCDIC, otherwise it is ASCII
  * @param ebcdicCodePage         Specifies what code page to use for EBCDIC to ASCII/Unicode conversions
  * @param ebcdicCodePageClass    An optional custom code page conversion class provided by a user
  * @param recordStartOffset      A number of bytes to skip at the beginning of the record before parsing a record according to a copybook
  * @param recordEndOffset        A number of bytes to skip at the end of each record
  * @param variableLengthParams   VariableLengthParameters containing the specifications for the consumption of variable-length Cobol records.
  * @param schemaRetentionPolicy  A copybook usually has a root group struct element that acts like a rowtag in XML. This can be retained in Spark schema or can be collapsed
  * @param stringTrimmingPolicy   Specify if and how strings should be trimmed when parsed
  * @param multisegmentParams     Parameters for reading multisegment mainframe files
  * @param dropGroupFillers       If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
  * @param nonTerminals           A list of non-terminals (GROUPS) to combine and parse as primitive fields
  * @param debugIgnoreFileSize    If true the fixed length file reader won't check file size divisibility. Useful for debugging binary file / copybook mismatches.
  */
case class CobolParameters(
                            copybookPath:          Option[String],
                            multiCopybookPath:     Seq[String],
                            copybookContent:       Option[String],
                            sourcePath:            Option[String],
                            isEbcdic:              Boolean,
                            ebcdicCodePage:        String,
                            ebcdicCodePageClass:   Option[String],
                            recordStartOffset:     Int,
                            recordEndOffset:       Int,
                            variableLengthParams:  Option[VariableLengthParameters],
                            schemaRetentionPolicy: SchemaRetentionPolicy,
                            stringTrimmingPolicy:  StringTrimmingPolicy,
                            multisegmentParams:    Option[MultisegmentParameters],
                            dropGroupFillers:      Boolean,
                            nonTerminals:          Seq[String],
                            debugIgnoreFileSize:   Boolean
                          )