All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.univocity.parsers.fixed.FixedWidthParserSettings Maven / Gradle / Ivy

Go to download

uniVocity's open source parsers for processing different text formats using a consistent API

There is a newer version: 2.9.1
Show newest version
/*******************************************************************************
 * Copyright 2014 Univocity Software Pty Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package com.univocity.parsers.fixed;

import com.univocity.parsers.annotations.*;
import com.univocity.parsers.annotations.helpers.*;
import com.univocity.parsers.common.*;
import com.univocity.parsers.common.input.*;

import java.util.*;

/**
 * This is the configuration class used by the Fixed-Width parser ({@link FixedWidthParser})
 *
 * 

In addition to the configuration options provided by {@link CommonParserSettings}, the FixedWidthParserSettings include: * *

    *
  • skipTrailingCharsUntilNewline (defaults to false): Indicates whether or not any trailing characters beyond the record's length should be skipped until the newline is reached *

    For example, if the record length is 5, but the row contains "12345678\n", then portion containing "678" will be discarded and not considered part of the next record

  • *
  • recordEndsOnNewline (defaults to false): Indicates whether or not a record is considered parsed when a newline is reached. *

    For example, if recordEndsOnNewline is set to true, then given a record of length 4, and the input "12\n3456", the parser will identify [12] and [3456] *

    If recordEndsOnNewline is set to false, then given a record of length 4, and the input "12\n3456", the parser will identify a multi-line record [12\n3] and [456 ]

  • *
* *

The FixedWidthParserSettings need a definition of the field lengths of each record in the input. This must provided using an instance of {@link FixedWidthFields}. * * @author Univocity Software Pty Ltd - [email protected] * @see com.univocity.parsers.fixed.FixedWidthParser * @see com.univocity.parsers.fixed.FixedWidthFormat * @see FixedWidthFields * @see com.univocity.parsers.common.CommonParserSettings */ public class FixedWidthParserSettings extends CommonParserSettings { protected boolean skipTrailingCharsUntilNewline = false; protected boolean recordEndsOnNewline = false; private boolean useDefaultPaddingForHeaders = true; private FixedWidthFields fieldLengths; private Map lookaheadFormats = new HashMap(); private Map lookbehindFormats = new HashMap(); /** * You can only create an instance of this class by providing a definition of the field lengths of each record in the input. *

This must provided using an instance of {@link FixedWidthFields}. * * @param fieldLengths the instance of {@link FixedWidthFields} which provides the lengths of each field in the fixed-width records to be parsed * * @see FixedWidthFields */ public FixedWidthParserSettings(FixedWidthFields fieldLengths) { if (fieldLengths == null) { throw new IllegalArgumentException("Field lengths cannot be null"); } this.fieldLengths = fieldLengths; String[] names = fieldLengths.getFieldNames(); if (names != null) { setHeaders(names); } } /** * Creates a basic configuration object for the Fixed-Width parser with no field length configuration. * This constructor is intended to be used when the record length varies depending of the input row. * Refer to {@link #addFormatForLookahead(String, FixedWidthFields)}, {@link #addFormatForLookbehind(String, FixedWidthFields)} */ public FixedWidthParserSettings() { fieldLengths = null; } /** * Returns the sequence of lengths to be read by the parser to form a record. * * @return the sequence of lengths to be read by the parser to form a record. */ int[] getFieldLengths() { if (fieldLengths == null) { return null; } return fieldLengths.getFieldLengths(); } int[] getAllLengths() { if (fieldLengths == null) { return null; } return fieldLengths.getAllLengths(); } /** * Returns the sequence of paddings used by each field of each record. * * @return the sequence of paddings used by each field of each record. */ char[] getFieldPaddings() { if (fieldLengths == null) { return null; } return fieldLengths.getFieldPaddings(this.getFormat()); } /** * Returns the sequence of fields to ignore. * * @return the sequence of fields to ignore. */ boolean[] getFieldsToIgnore() { if (fieldLengths == null) { return null; } return fieldLengths.getFieldsToIgnore(); } /** * Returns the sequence of alignments to consider for each field of each record. * * @return the sequence of alignments to consider for each field of each record. */ FieldAlignment[] getFieldAlignments() { if (fieldLengths == null) { return null; } return fieldLengths.getFieldAlignments(); } /** * Indicates whether or not any trailing characters beyond the record's length should be skipped until the newline is reached (defaults to false) *

For example, if the record length is 5, but the row contains "12345678\n", then the portion containing "678\n" will be discarded and not considered part of the next record * * @return returns true if any trailing characters beyond the record's length should be skipped until the newline is reached, false otherwise */ public boolean getSkipTrailingCharsUntilNewline() { return skipTrailingCharsUntilNewline; } /** * Defines whether or not any trailing characters beyond the record's length should be skipped until the newline is reached (defaults to false) *

For example, if the record length is 5, but the row contains "12345678\n", then the portion containing "678\n" will be discarded and not considered part of the next record * * @param skipTrailingCharsUntilNewline a flag indicating if any trailing characters beyond the record's length should be skipped until the newline is reached */ public void setSkipTrailingCharsUntilNewline(boolean skipTrailingCharsUntilNewline) { this.skipTrailingCharsUntilNewline = skipTrailingCharsUntilNewline; } /** * Indicates whether or not a record is considered parsed when a newline is reached. Examples: *

    *
  • Consider two records of length 4, and the input 12\n3456
  • *
  • When {@link FixedWidthParserSettings#recordEndsOnNewline} is set to true: the first value will be read as 12 and the second 3456
  • *
  • When {@link FixedWidthParserSettings#recordEndsOnNewline} is set to false: the first value will be read as 12\n3 and the second 456
  • *
*

Defaults to false * * @return true if a record should be considered parsed when a newline is reached; false otherwise */ public boolean getRecordEndsOnNewline() { return recordEndsOnNewline; } /** * Defines whether or not a record is considered parsed when a newline is reached. Examples: *

    *
  • Consider two records of length 4, and the input 12\n3456
  • *
  • When {@link FixedWidthParserSettings#recordEndsOnNewline} is set to true: the first value will be read as 12 and the second 3456
  • *
  • When {@link FixedWidthParserSettings#recordEndsOnNewline} is set to false: the first value will be read as 12\n3 and the second 456
  • *
* * @param recordEndsOnNewline a flag indicating whether or not a record is considered parsed when a newline is reached */ public void setRecordEndsOnNewline(boolean recordEndsOnNewline) { this.recordEndsOnNewline = recordEndsOnNewline; } /** * Returns the default FixedWidthFormat configured to handle Fixed-Width inputs * * @return and instance of FixedWidthFormat configured to handle Fixed-Width inputs */ @Override protected FixedWidthFormat createDefaultFormat() { return new FixedWidthFormat(); } /** * Returns an instance of CharAppender with the configured limit of maximum characters per column and, default value used to represent a null value (when the String parsed from the input is empty), and the padding character to handle unwritten positions * *

This overrides the parent implementation to create a CharAppender capable of handling padding characters that represent unwritten positions. * * @return an instance of CharAppender with the configured limit of maximum characters per column and, default value used to represent a null value (when the String parsed from the input is empty), and the padding character to handle unwritten positions */ @Override protected CharAppender newCharAppender() { return new DefaultCharAppender(getMaxCharsPerColumn(), getNullValue(), getWhitespaceRangeStart()); } /** * The maximum number of characters allowed for any given value being written/read. Used to avoid OutOfMemoryErrors (defaults to a minimum of 4096 characters). * *

This overrides the parent implementation and calculates the absolute minimum number of characters required to store the values of a record *

If the sum of all field lengths is greater than the configured maximum number of characters per column, the calculated amount will be returned. * * @return The maximum number of characters allowed for any given value being written/read */ @Override public int getMaxCharsPerColumn() { int max = super.getMaxCharsPerColumn(); int minimum = 0; for (int length : calculateMaxFieldLengths()) { //adding 2 to give room for line breaks in every record (e.g. "\r\n"). minimum += length + 2; } return max > minimum ? max : minimum; } /** * Returns the hard limit of how many columns a record can have (defaults to a maximum of 512). * You need this to avoid OutOfMemory errors in case of inputs that might be inconsistent with the format you are dealing with. * *

This overrides the parent implementation and calculates the absolute minimum number of columns required to store the values of a record *

If the sum of all fields is greater than the configured maximum number columns, the calculated amount will be returned. * * @return The maximum number of columns a record can have. */ @Override public int getMaxColumns() { int max = super.getMaxColumns(); int minimum = calculateMaxFieldLengths().length; return max > minimum ? max : minimum; } private int[] calculateMaxFieldLengths() { return Lookup.calculateMaxFieldLengths(fieldLengths, lookaheadFormats, lookbehindFormats); } Lookup[] getLookaheadFormats() { return Lookup.getLookupFormats(lookaheadFormats, getFormat()); } Lookup[] getLookbehindFormats() { return Lookup.getLookupFormats(lookbehindFormats, getFormat()); } /** * Defines the format of records identified by a lookahead symbol. * * @param lookahead the lookahead value that when found in the input, * will notify the parser to switch to a new record format, with different field lengths * @param lengths the field lengths of the record format identified by the given lookahead symbol. */ public void addFormatForLookahead(String lookahead, FixedWidthFields lengths) { Lookup.registerLookahead(lookahead, lengths, lookaheadFormats); } /** * Defines the format of records identified by a lookbehind symbol. * * @param lookbehind the lookbehind value that when found in the previous input row, * will notify the parser to switch to a new record format, with different field lengths * @param lengths the field lengths of the record format identified by the given lookbehind symbol. */ public void addFormatForLookbehind(String lookbehind, FixedWidthFields lengths) { Lookup.registerLookbehind(lookbehind, lengths, lookbehindFormats); } /** * Indicates whether headers should be parsed using the default padding specified in {@link FixedWidthFormat#getPadding()} * instead of any custom padding associated with a given field (in {@link FixedWidthFields#setPadding(char, int...)}) * Defaults to {@code true} * * @return {@code true} if the default padding is to be used when reading headers, otherwise {@code false} */ public boolean getUseDefaultPaddingForHeaders() { return useDefaultPaddingForHeaders; } /** * Defines whether headers should be parsed using the default padding specified in {@link FixedWidthFormat#getPadding()} * instead of any custom padding associated with a given field (in {@link FixedWidthFields#setPadding(char, int...)}) * * @param useDefaultPaddingForHeaders flag indicating whether the default padding is to be used when parsing headers */ public void setUseDefaultPaddingForHeaders(boolean useDefaultPaddingForHeaders) { this.useDefaultPaddingForHeaders = useDefaultPaddingForHeaders; } @Override protected void configureFromAnnotations(Class beanClass) { if (fieldLengths == null) { try { fieldLengths = FixedWidthFields.forParsing(beanClass); Headers headerAnnotation = AnnotationHelper.findHeadersAnnotation(beanClass); if (headerExtractionEnabled == null && headerAnnotation != null) { setHeaderExtractionEnabled(headerAnnotation.extract()); } } catch (IllegalArgumentException e) { throw e; } catch (Exception ex) { //ignore. } } if (headerExtractionEnabled == null) { setHeaderExtractionEnabled(false); } super.configureFromAnnotations(beanClass); if (!isHeaderExtractionEnabled()) { FixedWidthFields.setHeadersIfPossible(fieldLengths, this); } } @Override protected void addConfiguration(Map out) { super.addConfiguration(out); out.put("Skip trailing characters until new line", skipTrailingCharsUntilNewline); out.put("Record ends on new line", recordEndsOnNewline); out.put("Field lengths", fieldLengths == null ? "" : fieldLengths.toString()); out.put("Lookahead formats", lookaheadFormats); out.put("Lookbehind formats", lookbehindFormats); } /** * Clones this configuration object to reuse all user-provided settings, including the fixed-width field configuration. * * @return a copy of all configurations applied to the current instance. */ @Override public final FixedWidthParserSettings clone() { return (FixedWidthParserSettings) super.clone(); } /** * Clones this configuration object to reuse most user-provided settings. This includes the fixed-width field configuration, * but doesn't include other input-specific settings. This method is meant to be used internally only. * * @return a copy of all configurations applied to the current instance. * * @deprecated doesn't really make sense for fixed-width. Use alternative method {@link #clone(FixedWidthFields)}. */ @Deprecated protected final FixedWidthParserSettings clone(boolean clearInputSpecificSettings) { return clone(clearInputSpecificSettings, fieldLengths == null ? null : fieldLengths.clone()); } /** * Clones this configuration object to reuse most user-provided settings. Properties that are specific to a given * input (such as header names and selection of fields) will be reset to their defaults. * * To obtain a full copy, use {@link #clone()}. * * @param fields the fixed-width field configuration to be used by the cloned settings object. * * @return a copy of the general configurations applied to the current instance. */ public final FixedWidthParserSettings clone(FixedWidthFields fields) { return clone(true, fields); } private FixedWidthParserSettings clone(boolean clearInputSpecificSettings, FixedWidthFields fields) { FixedWidthParserSettings out = (FixedWidthParserSettings) super.clone(clearInputSpecificSettings); out.fieldLengths = fields; if (clearInputSpecificSettings) { out.lookaheadFormats = new HashMap(); out.lookbehindFormats = new HashMap(); } else { out.lookaheadFormats = new HashMap(this.lookaheadFormats); out.lookbehindFormats = new HashMap(this.lookbehindFormats); } return out; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy