org.pentaho.di.trans.steps.fixedinput.FixedFileInputField Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kettle-engine Show documentation
Show all versions of kettle-engine Show documentation
Container pom for Pentaho Data Integration modules
The newest version!
/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.fixedinput;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Objects;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaFactory;
import org.pentaho.di.core.row.value.ValueMetaString;
import org.pentaho.di.core.xml.XMLHandler;
import org.pentaho.di.core.xml.XMLInterface;
import org.w3c.dom.Node;
public class FixedFileInputField implements Cloneable, XMLInterface {
public static final String XML_TAG = "field";
private static final String[] date_formats = new String[] {
"yyyy/MM/dd HH:mm:ss.SSS", "yyyy/MM/dd HH:mm:ss", "dd/MM/yyyy", "dd-MM-yyyy", "yyyy/MM/dd", "yyyy-MM-dd",
"yyyyMMdd", "ddMMyyyy", "d-M-yyyy", "d/M/yyyy", "d-M-yy", "d/M/yy", };
private static final String[] number_formats = new String[] {
"", "#", Const.DEFAULT_NUMBER_FORMAT, "0.00", "0000000000000", "###,###,###.#######",
"###############.###############", "#####.###############%", };
private String name;
private int type;
private int width;
private int length;
private int precision;
private String format;
private int trimType;
private String decimal;
private String grouping;
private String currency;
private String[] samples;
public FixedFileInputField( Node fnode ) {
name = XMLHandler.getTagValue( fnode, "name" );
type = ValueMetaFactory.getIdForValueMeta( XMLHandler.getTagValue( fnode, "type" ) );
format = XMLHandler.getTagValue( fnode, "format" );
trimType = ValueMetaString.getTrimTypeByCode( XMLHandler.getTagValue( fnode, "trim_type" ) );
currency = XMLHandler.getTagValue( fnode, "currency" );
decimal = XMLHandler.getTagValue( fnode, "decimal" );
grouping = XMLHandler.getTagValue( fnode, "group" );
width = Const.toInt( XMLHandler.getTagValue( fnode, "width" ), -1 );
length = Const.toInt( XMLHandler.getTagValue( fnode, "length" ), -1 );
precision = Const.toInt( XMLHandler.getTagValue( fnode, "precision" ), -1 );
}
public FixedFileInputField() {
type = ValueMetaInterface.TYPE_STRING;
length = -1;
precision = -1;
trimType = ValueMetaInterface.TRIM_TYPE_NONE;
}
/**
* For testing only.
*/
protected FixedFileInputField( String name ) {
this();
this.name = name;
}
@Override
public boolean equals( Object obj ) {
return name.equalsIgnoreCase( ( (FixedFileInputField) obj ).name );
}
@Override
public int hashCode() {
return Objects.hashCode( name );
}
public String getXML() {
StringBuilder retval = new StringBuilder( 300 );
retval.append( " " ).append( XMLHandler.openTag( XML_TAG ) ).append( Const.CR );
retval.append( " " ).append( XMLHandler.addTagValue( "name", name ) );
retval.append( " " ).append( XMLHandler.addTagValue( "type",
ValueMetaFactory.getValueMetaName( type ) ) );
retval.append( " " ).append( XMLHandler.addTagValue( "format", format ) );
retval.append( " " ).append(
XMLHandler.addTagValue( "trim_type", ValueMetaString.getTrimTypeCode( trimType ) ) );
retval.append( " " ).append( XMLHandler.addTagValue( "currency", currency ) );
retval.append( " " ).append( XMLHandler.addTagValue( "decimal", decimal ) );
retval.append( " " ).append( XMLHandler.addTagValue( "group", grouping ) );
retval.append( " " ).append( XMLHandler.addTagValue( "width", width ) );
retval.append( " " ).append( XMLHandler.addTagValue( "length", length ) );
retval.append( " " ).append( XMLHandler.addTagValue( "precision", precision ) );
retval.append( " " ).append( XMLHandler.closeTag( XML_TAG ) ).append( Const.CR );
return retval.toString();
}
/**
* @return the name
*/
public String getName() {
return name;
}
/**
* @param name
* the name to set
*/
public void setName( String name ) {
this.name = name;
}
/**
* @return the type
*/
public int getType() {
return type;
}
/**
* @param type
* the type to set
*/
public void setType( int type ) {
this.type = type;
}
/**
* @return the width
*/
public int getWidth() {
return width;
}
/**
* @param width
* the width to set
*/
public void setWidth( int width ) {
this.width = width;
}
/**
* @return the length
*/
public int getLength() {
return length;
}
/**
* @param length
* the length to set
*/
public void setLength( int length ) {
this.length = length;
}
/**
* @return the precision
*/
public int getPrecision() {
return precision;
}
/**
* @param precision
* the precision to set
*/
public void setPrecision( int precision ) {
this.precision = precision;
}
/**
* @return the format
*/
public String getFormat() {
return format;
}
/**
* @param format
* the format to set
*/
public void setFormat( String format ) {
this.format = format;
}
/**
* @return the decimal
*/
public String getDecimal() {
return decimal;
}
/**
* @param decimal
* the decimal to set
*/
public void setDecimal( String decimal ) {
this.decimal = decimal;
}
/**
* @return the grouping
*/
public String getGrouping() {
return grouping;
}
/**
* @param grouping
* the grouping to set
*/
public void setGrouping( String grouping ) {
this.grouping = grouping;
}
/**
* @return the currency
*/
public String getCurrency() {
return currency;
}
/**
* @param currency
* the currency to set
*/
public void setCurrency( String currency ) {
this.currency = currency;
}
public void setSamples( String[] samples ) {
this.samples = samples;
}
/**
* @return the samples
*/
public String[] getSamples() {
return samples;
}
public void guess() {
guessType();
}
public void guessType() {
NumberFormat nf = NumberFormat.getInstance();
DecimalFormat df = (DecimalFormat) nf;
DecimalFormatSymbols dfs = new DecimalFormatSymbols();
SimpleDateFormat daf = new SimpleDateFormat();
daf.setLenient( false );
// Start with a string...
type = ValueMetaInterface.TYPE_STRING;
// If we have no samples, we assume a String...
if ( samples == null ) {
return;
}
// ////////////////////////////
// DATES
// ////////////////////////////
// See if all samples can be transformed into a date...
int datefmt_cnt = date_formats.length;
boolean[] datefmt = new boolean[date_formats.length];
for ( int i = 0; i < date_formats.length; i++ ) {
datefmt[i] = true;
}
int datenul = 0;
for ( int i = 0; i < samples.length; i++ ) {
for ( int x = 0; x < date_formats.length; x++ ) {
if ( samples[i] == null || Const.onlySpaces( samples[i] ) || samples[i].length() == 0 ) {
datefmt[x] = false;
datefmt_cnt--;
}
if ( datefmt[x] ) {
try {
daf.applyPattern( date_formats[x] );
Date date = daf.parse( samples[i] );
Calendar cal = Calendar.getInstance();
cal.setTime( date );
int year = cal.get( Calendar.YEAR );
if ( year < 1800 || year > 2200 ) {
datefmt[x] = false; // Don't try it again in the future.
datefmt_cnt--; // One less that works..
}
} catch ( Exception e ) {
datefmt[x] = false; // Don't try it again in the future.
datefmt_cnt--; // One less that works..
}
}
}
}
// If it is a date, copy info over to the format etc. Then return with the info.
// If all samples where NULL values, we can't really decide what the type is.
// So we're certainly not going to take a date, just take a string in that case.
if ( datefmt_cnt > 0 && datenul != samples.length ) {
int first = -1;
for ( int i = 0; i < date_formats.length && first < 0; i++ ) {
if ( datefmt[i] ) {
first = i;
}
}
type = ValueMetaInterface.TYPE_DATE;
format = date_formats[first];
return;
}
// ////////////////////////////
// NUMBERS
// ////////////////////////////
boolean isnumber = true;
// Set decimal symbols to default
decimal = "" + dfs.getDecimalSeparator();
grouping = "" + dfs.getGroupingSeparator();
boolean[] numfmt = new boolean[number_formats.length];
int[] maxprecision = new int[number_formats.length];
for ( int i = 0; i < numfmt.length; i++ ) {
numfmt[i] = true;
maxprecision[i] = -1;
}
int numfmt_cnt = number_formats.length;
int numnul = 0;
for ( int i = 0; i < samples.length && isnumber; i++ ) {
boolean contains_dot = false;
boolean contains_comma = false;
String field = samples[i];
for ( int x = 0; x < field.length() && isnumber; x++ ) {
char ch = field.charAt( x );
if ( !Character.isDigit( ch )
&& ch != '.' && ch != ',' && ( ch != '-' || x > 0 ) && ch != 'E' && ch != 'e' // exponential
) {
isnumber = false;
numfmt_cnt = 0;
} else {
if ( ch == '.' ) {
contains_dot = true;
// containsDot = true;
}
if ( ch == ',' ) {
contains_comma = true;
// containsComma = true;
}
}
}
// If it's still a number, try to parse it as a double
if ( isnumber ) {
if ( contains_dot && !contains_comma ) {
// American style 174.5
dfs.setDecimalSeparator( '.' );
decimal = ".";
dfs.setGroupingSeparator( ',' );
grouping = ",";
} else if ( !contains_dot && contains_comma ) {
// European style 174,5
dfs.setDecimalSeparator( ',' );
decimal = ",";
dfs.setGroupingSeparator( '.' );
grouping = ".";
} else if ( contains_dot && contains_comma ) {
// Both appear!
// What's the last occurance: decimal point!
int idx_dot = field.indexOf( '.' );
int idx_com = field.indexOf( ',' );
if ( idx_dot > idx_com ) {
dfs.setDecimalSeparator( '.' );
decimal = ".";
dfs.setGroupingSeparator( ',' );
grouping = ",";
} else {
dfs.setDecimalSeparator( ',' );
decimal = ",";
dfs.setGroupingSeparator( '.' );
grouping = ".";
}
}
// Try the remaining possible number formats!
for ( int x = 0; x < number_formats.length; x++ ) {
if ( numfmt[x] ) {
boolean islong = true;
try {
int prec = -1;
// Try long integers first....
if ( !contains_dot && !contains_comma ) {
try {
Long.parseLong( field );
prec = 0;
} catch ( Exception e ) {
islong = false;
}
}
if ( !islong ) {
// Try the double
df.setDecimalFormatSymbols( dfs );
df.applyPattern( number_formats[x] );
double d = df.parse( field ).doubleValue();
prec = guessPrecision( d );
}
if ( prec > maxprecision[x] ) {
maxprecision[x] = prec;
}
} catch ( Exception e ) {
numfmt[x] = false; // Don't try it again in the future.
numfmt_cnt--; // One less that works..
}
}
}
}
}
// Still a number? Grab the result and return.
// If all sample strings are empty or represent NULL values we can't take a number as type.
if ( numfmt_cnt > 0 && numnul != samples.length ) {
int first = -1;
for ( int i = 0; i < number_formats.length && first < 0; i++ ) {
if ( numfmt[i] ) {
first = i;
}
}
type = ValueMetaInterface.TYPE_NUMBER;
format = number_formats[first];
precision = maxprecision[first];
// Wait a minute!!! What about Integers?
// OK, only if the precision is 0 and the length <19 (java long integer)
/*
* if (length<19 && precision==0 && !containsDot && !containsComma) { type=ValueMetaInterface.TYPE_INTEGER;
* decimalSymbol=""; groupSymbol=""; }
*/
return;
}
//
// Assume it's a string...
//
type = ValueMetaInterface.TYPE_STRING;
format = "";
precision = -1;
decimal = "";
grouping = "";
currency = "";
}
public static final int guessPrecision( double d ) {
int maxprec = 4;
double maxdiff = 0.00005;
// Make sure that 7.99995 == 8.00000
// This is usually a rounding error!
double diff = Math.abs( Math.floor( d ) - d );
if ( diff < maxdiff ) {
return 0; // nothing behind decimal point...
}
// System.out.println("d="+d+", diff="+diff);
// remainder: 12.345678 --> 0.345678
for ( int i = 1; i < maxprec; i++ ) { // cap off precision at a reasonable maximum
double factor = Math.pow( 10.0, i );
diff = Math.abs( Math.floor( d * factor ) - ( d * factor ) );
if ( diff < maxdiff ) {
return i;
}
// System.out.println("d="+d+", diff="+diff+", factor="+factor);
factor *= 10;
}
// Unknown length!
return -1;
}
public int getTrimType() {
return trimType;
}
public void setTrimType( int trimType ) {
this.trimType = trimType;
}
}