All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pentaho.di.job.entries.ftp.MVSFileParser Maven / Gradle / Ivy

The newest version!
/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.job.entries.ftp;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.StringTokenizer;

import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.i18n.BaseMessages;

import com.enterprisedt.net.ftp.FTPFile;
import com.enterprisedt.net.ftp.FTPFileParser;

/**
 * MVS Folder Listing Parser The purpose of this parser is to be able handle responses from an MVS z/OS mainframe FTP
 * server.
 *
 * Many places on the 'net were consulted for input to this parser. Importantly, this information from
 * com.os.os2.networking.tcp-ip group:
 *
 * http://groups.google.com/group/comp.os.os2.networking.tcp-ip/msg/25acc89563f1e93e
 * http://groups.google.com/group/comp.
 * os.os2.networking.tcp-ip/browse_frm/thread/11af1ba1bc6b0edd?hl=en&lr&ie=UTF-8&oe=UTF
 * -8&rnum=6&prev=/groups?q%3DMVS%2BPartitioned
 * %2Bdata%2Bset%2Bdirectory%26hl%3Den%26lr%3D%26ie%3DUTF-8%26oe%3DUTF-8%26selm
 * %3D4e7k0p%2524t1v%2540blackice.winternet.com%26rnum%3D6&pli=1
 * http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/F1AA2032/1.5.15?SHELF=&DT=20001127174124
 *
 * Implementation Details 1- This supports folders and partitioned data sets only. This does not support JCL or HFS 2-
 * You must treat partitioned data sets (Dsorg PO/PO-E) like folders and CD to them 3- Dsorg=PS is a downloadable file
 * as are all the contents of a Partitioned Data Set. 4- When downloading from a folder, the Recfm must start with V or
 * F.
 *
 * Note - the location for this is completely up for debate. I modeled this after the ftpsget/FTPSConnection and how
 * ftpsput reaches up and into the ftpsget package to get it. However, I think a better solution is to have an
 * entry/common. James and I agreed (in Matt's absense) to model the behavior after something already existing rather
 * than introduce a new folder (like entry/common or entry/util).
 *
 * @author mbatchelor September 2010
 *
 */

public class MVSFileParser extends FTPFileParser {

  private static Class PKG = MVSFileParser.class; // for i18n purposes, needed by Translator2!!

  /*** DO NOT TRANSLATE THESE ***/
  private static final String PARSER_KEY = "MVS";
  private static final String HEADER_VOLUME = "Volume";
  private static final String HEADER_NAME = "Name";
  private static final String LINE_TYPE_ARCIVE = "ARCIVE"; // *** NOT MISSPELLED ***
  private static final String ENTRY_FILE_TYPE = "PS";
  private static final String LINE_TYPE_MIGRATED = "Migrated";
  /*** ^^^ DO NOT TRANSLATE THESE ^^^ ***/
  private static final int FOLDER_HEADER_TYPE_IDX = 0;
  private static final int FOLDER_LISTING_LENGTH_NORMAL = 10;
  private static final int FOLDER_LISTING_LENGTH_ARCIVE = 8;

  private String dateFormatString; // String used to parse file dates
  private String alternateFormatString; // Alternate form of date string in case month/day are switched
  private SimpleDateFormat dateFormat; // The DateFormat object to parse dates with
  private SimpleDateFormat dateTimeFormat; // The DateFormat object to parse "last modified" date+time with.

  private boolean partitionedDataset = false; // If true, It's a partitioned data set listing

  private LogChannelInterface log;

  public MVSFileParser( LogChannelInterface log ) {
    this.log = log;
  }

  /************************ Abstract Class Implementations *************************/

  /*
   *
   * This method decides whether this parser can handle this directory listing
   *
   * Directory listing format ------------------------ Volume Unit Referred Ext Used Recfm Lrecl BlkSz Dsorg Dsname
   * BALP4B 3390 2010/09/09 6 57 FB 80 800 PO BMS BALP8E 3390 2010/09/07 1 2 FB 80 800 PO BMS.BACKUP ARCIVE Not Direct
   * Access Device KJ.IOP998.ERROR.PL.UNITTEST USS018 3308 2010/01/15 1 15 VB 259 8000 PS NFS.DOC Migrated
   * OAQPS.INTERIM.CNTYIM.V1.DATA
   *
   * Partitioned Dataset listing format: ----------------------------------- Name VV.MM Created Changed Size Init Mod Id
   * A 01.03 2007/10/22 2009/05/27 20:18 30 3 0 TR6JAM AAA 01.01 2007/06/01 2009/01/27 03:50 183 11 0 TR6AAJ AAJSUSU
   * 01.00 2005/08/29 2005/08/29 15:11 20 20 0 TR6MGM ADERESSO 01.01 2007/03/15 2007/03/15 16:38 45 45 0 TR6CCU
   *
   *
   * Note: Date Format needs to be deciphered since for other sites it looks like this: BALP4B 3390 09/12/95 6 57 FB 80
   * 800 PO BMS
   */

  @Override
  public boolean isValidFormat( String[] listing ) {

    if ( log.isDebug() ) {
      log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Checking.Parser" ) );
    }
    if ( listing.length > 0 ) {
      String[] header = splitMVSLine( listing[0] ); // first line of MVS listings is a header
      if ( ( header.length == FOLDER_LISTING_LENGTH_NORMAL ) || ( header.length == FOLDER_LISTING_LENGTH_ARCIVE ) ) {
        if ( header[FOLDER_HEADER_TYPE_IDX].equals( HEADER_VOLUME ) ) {
          this.partitionedDataset = false; // This is a directory listing, not PDS listing
          if ( log.isDebug() ) {
            log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.INFO.Detected.Dir" ) );
          }
          return isValidDirectoryFormat( listing );
        } else if ( header[FOLDER_HEADER_TYPE_IDX].equals( HEADER_NAME ) ) {
          this.partitionedDataset = true; // Suspect PDS listing.
          if ( log.isDebug() ) {
            log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.INFO.Detected.PDS" ) );
          }
          return isValidPDSFormat( listing );
        }
      }
    }
    return false;
  }

  /**
   * This parses an individual line from the directory listing.
   *
   */
  @Override
  public FTPFile parse( String raw ) throws ParseException {
    String[] aLine = splitMVSLine( raw );
    FTPFile rtn = null;
    if ( this.partitionedDataset ) {
      rtn = parsePDSLine( aLine, raw ); // where the real work is done.
    } else { // Folder List
      rtn = parseFolder( aLine, raw );
    }
    return rtn;
  }

  /**
   * Could in theory be used to figure out the format of the date/time except that I'd need time on the server to see if
   * this actually works that way. For now, we ignore the locale and try to figure out the date format ourselves.
   */
  @Override
  public void setLocale( Locale arg0 ) {
    //
    if ( log.isDebug() ) {
      log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Ignore.Locale" ) );
    }
  }

  /**
   * Returns parser name. By extensibility oversight in the third-party library we use, this isn't used to match the on
   * the server (unfortunately).
   */
  public String toString() {
    return PARSER_KEY;
  }

  /************************ Worker Methods *************************/

  /**
   * Parses a Partitioned Dataset Entry, and returns an FTPFile object.
   *
   * @param aLine
   *          Split line
   * @param raw
   *          Unparsed raw string
   * @return FTPFile unless it's the header row.
   * @throws ParseException
   */
  protected FTPFile parsePDSLine( String[] aLine, String raw ) throws ParseException {
    FTPFile rtn = null;
    if ( aLine[0].equals( HEADER_NAME ) ) {
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Skip.Header" ) );
      }
      return null;
    }
    rtn = new FTPFile( raw );
    rtn.setName( aLine[0] );
    if ( dateTimeFormat == null ) {
      dateTimeFormat = new SimpleDateFormat( dateFormatString + " HH:mm" );
    }
    rtn.setCreated( dateFormat.parse( aLine[2] ) );
    String modDateTime = aLine[3] + ' ' + aLine[4];
    rtn.setLastModified( dateTimeFormat.parse( modDateTime ) );
    rtn.setDir( false );
    return rtn;
  }

  /**
   * Parses a line from a folder listing.
   *
   * Note: Returns NULL if it's the header line, if it is ARCIVE or Migrated, if the record format doesn't start with
   * 'F' or 'V', and if the dsorg doesn't start with 'P'.
   *
   * @param aLine
   *          Line split apart
   * @param raw
   *          Raw line from the transport
   * @return FTPFile for the line unless it is expressly exluded
   */
  protected FTPFile parseFolder( String[] aLine, String raw ) {
    if ( aLine[0].equals( HEADER_VOLUME ) ) {
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Skip.Header" ) );
      }
      return null;
    }
    // Directory format
    if ( aLine[0].equals( LINE_TYPE_ARCIVE ) ) { // It's on tape somewhere
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Skip.ARCIVE" ) );
      }
      return null;
    }
    if ( aLine[0].equals( LINE_TYPE_MIGRATED ) ) { // It's been moved.
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Skip.Migrated" ) );
      }
      return null;
    }
    if ( aLine[5].charAt( 0 ) != 'F' && aLine[5].charAt( 0 ) != 'V' ) {
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Skip.recf" ) );
      }
      return null;
    }
    if ( aLine[8].charAt( 0 ) != 'P' ) { // Only handle PO, PS, or PO-E
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Skip.dso" ) );
      }
      return null;
    }
    // OK, I think I can handle this.
    FTPFile rtn = new FTPFile( raw );
    rtn.setName( aLine[9] );
    // Fake out dates - these are all newly created files / folders
    rtn.setCreated( new Date() );
    rtn.setLastModified( new Date() );
    if ( aLine[8].equals( ENTRY_FILE_TYPE ) ) {
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Found.File", aLine[9] ) );
      }
      // This is a file...
      rtn.setDir( false );
      long l = -1;
      try {
        l = Long.parseLong( aLine[4] );
      } catch ( Exception ignored ) {
        // Ignore errors
      }
      rtn.setSize( l );
    } else {
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Found.Folder", aLine[9] ) );
      }
      rtn.setDir( true );
    }
    // Left this code here in case last time accessed becomes important.
    // For directory items, this is just the last time accessed
    // Date dt = dateFormat.parse(aLine[2]);
    //
    return rtn;
  }

  /************************ Utility Methods *************************/

  /**
   * This is a split + trim function. The String.split method doesn't work well if there are a multiple contiguous
   * white-space characters. StringTokenizer handles this very well. This should never fail to return an array, even if
   * the array is empty. In other words, this should never return null.
   *
   * @param raw
   *          The string to tokenize from the MainFrame
   * @return String array of all the elements from the parse.
   */
  protected String[] splitMVSLine( String raw ) {
    if ( raw == null ) {
      return new String[] {};
    }
    StringTokenizer st = new StringTokenizer( raw );
    String[] rtn = new String[st.countTokens()];
    int i = 0;
    while ( st.hasMoreTokens() ) {
      String nextToken = st.nextToken();
      rtn[i] = nextToken.trim();
      i++;
    }
    return rtn;
  }

  /**
   * Returns true if this seems to be a recognized MVS folder (not PDS) listing.
   *
   * @param listing
   * @return true if by all appearances this is a listing of an MVS folder
   */
  protected boolean isValidDirectoryFormat( String[] listing ) {
    String[] aLine;
    for ( int i = 1; i < listing.length; i++ ) {
      aLine = splitMVSLine( listing[i] );
      if ( ( aLine.length == 2 ) && ( aLine[0].equals( LINE_TYPE_MIGRATED ) ) ) {
        if ( log.isDebug() ) {
          log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Detected.Migrated" ) );
        }
      } else if ( aLine.length != 10 && ( !aLine[0].equals( LINE_TYPE_ARCIVE ) ) ) { // 10 = regular, ARCIVE=on tape
        log.logError( BaseMessages.getString( PKG, "MVSFileParser.ERROR.Invalid.Folder.Line", listing[i] ) );
        return false;
      }
      if ( dateFormatString != null ) {
        // validate date
        if ( !checkDateFormat( aLine[2] ) ) {
          return false;
        }
      } else {
        if ( aLine.length == 10 ) {
          // Try to parse the date.
          guessDateFormat( aLine[2] );
        }
      }
    }
    return true;
  }

  /**
   * Returns true if this seems to be a recognized MVS PDS listing (not folder).
   *
   * @param listing
   * @return true if by all appearances this is a listing of the contents of a PDS
   */
  protected boolean isValidPDSFormat( String[] listing ) {
    String[] aLine;
    for ( int i = 1; i < listing.length; i++ ) {
      aLine = splitMVSLine( listing[i] );
      if ( aLine.length != 9 ) { // 9 because there are two fields for changed...
        log.logError( BaseMessages.getString( PKG, "MVSFileParser.ERROR.Invalid.PDS.Line", listing[i] ) );
        return false;
      }
      if ( dateFormatString != null ) {
        if ( !checkDateFormat( aLine[3] ) ) {
          return false;
        }
      } else {
        guessDateFormat( aLine[2] );
      }
    }
    return true;
  }

  /*
   * This method will try the date format string to make sure it knows how to parse the dates. If it fails a parse it
   * will try the alternate format if available. For example, if the first three files have these dates: 2010/03/04
   * 2010/07/09 2010/23/06
   *
   * For the first two, either yyyy/MM/dd or yyyy/dd/MM would work. When the parse on 2010/23/06 fails, it will try the
   * alternate, succeed, and carry on.
   *
   * The weakness of this approach is if all files have valid inter- changable day/month on all dates. In that case, all
   * would be detected as yyyy/MM/dd which may be incorrect. If this is a problem, the correct fix is to set the date
   * format on the parser, or play with the Locale and see if that can be used to figure out what the real format from
   * the server is.
   */
  protected boolean checkDateFormat( String dateStr ) {
    try {
      dateFormat.parse( dateStr );
    } catch ( ParseException ex ) {
      if ( log.isDebug() ) {
        if ( log.isDebug() ) {
          log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Date.Parse.Error" ) );
        }
      }
      if ( ( alternateFormatString != null ) ) {
        if ( log.isDebug() ) {
          if ( log.isDebug() ) {
            log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Date.Parse.Choose.Alt" ) );
          }
        }
        dateFormatString = alternateFormatString;
        dateFormat = new SimpleDateFormat( dateFormatString );
        alternateFormatString = null;
        try {
          dateFormat.parse( dateStr );
        } catch ( ParseException ex2 ) {
          return false;
        }
      } else {
        log.logError( BaseMessages.getString( PKG, "MVSFileParser.ERROR.Date.Parse.Fail", dateStr ) );
        return false;
      }
    }
    return true;
  }

  /**
   * This method will look at the incoming date string and try to figure out the format of the date. Googling on the
   * internet showed several possible looks to the date:
   *
   * dd/MM/yy yy/MM/dd MM/dd/yy yyyy/MM/dd yyyy/dd/MM
   *
   * I never saw samples showing dd/MM/yyyy but I suppose it's possible. Not happy with this algorithm because it feels
   * clumsy. It works, but it's not very elegant (time crunch).
   *
   * @param dateStr
   */
  protected void guessDateFormat( String dateStr ) {
    if ( log.isDebug() ) {
      log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Guess.Date" ) );
    }
    String[] dateSplit = dateStr.split( "/" );
    String yrFmt = null;
    int yrPos = -1;
    int dayPos = -1;
    // quick look for either yyyy/xx/xx or xx/xx/yyyy
    for ( int i = 0; i < dateSplit.length; i++ ) {
      int aDigit = Integer.parseInt( dateSplit[i] );
      if ( dateSplit[i].length() == 4 ) {
        yrFmt = "yyyy";
        yrPos = i;
      } else if ( aDigit > 31 ) {
        // found 2-digit year
        yrFmt = "yy";
        yrPos = i;
      } else if ( aDigit > 12 ) {
        // definitely found a # <=31,
        dayPos = i;
      }
    }
    if ( yrFmt != null ) {
      StringBuilder fmt = new StringBuilder();
      if ( dayPos >= 0 ) {
        // OK, we know everything.
        String[] tmp = new String[3];
        tmp[yrPos] = yrFmt;
        tmp[dayPos] = "dd";
        for ( int i = 0; i < tmp.length; i++ ) {
          fmt.append( i > 0 ? "/" : "" );
          fmt.append( tmp[i] == null ? "MM" : tmp[i] );
        }
        if ( log.isDebug() ) {
          log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Guess.Date.Obvious" ) );
        }
      } else {
        // OK, we have something like 2010/01/01 - I can't
        // tell month from day. So, we'll guess. If it doesn't work on a later
        // date, we'll flip it (the alternate).

        StringBuilder altFmt = new StringBuilder();
        if ( yrPos == 0 ) {
          fmt.append( yrFmt ).append( "/MM/dd" );
          altFmt.append( yrFmt ).append( "/dd/MM" );
        } else {
          fmt.append( "MM/dd/" ).append( yrFmt );
          altFmt.append( "dd/MM/" ).append( yrFmt );
        }
        this.alternateFormatString = altFmt.toString();
        if ( log.isDebug() ) {
          log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Guess.Date.Ambiguous" ) );
        }
      }
      this.dateFormatString = fmt.toString();
      this.dateFormat = new SimpleDateFormat( dateFormatString );
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages
          .getString( PKG, "MVSFileParser.DEBUG.Guess.Date.Decided", this.dateFormatString ) );
      }
      try {
        dateFormat.parse( dateStr );
      } catch ( ParseException ex ) {
        if ( log.isDebug() ) {
          log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Guess.Date.Unparsable", dateStr ) );
        }
      }
    } else {
      // looks ilke something like 01/02/05 - where's the year?
      if ( log.isDebug() ) {
        log.logDebug( BaseMessages.getString( PKG, "MVSFileParser.DEBUG.Guess.Date.Year.Ambiguous" ) );
      }
      return;
    }

  }

  /*************************** Getters and Setters **************************/

  /**
   * @return true if listing is a PDS
   */
  public boolean isPartitionedDataset() {
    return this.partitionedDataset;
  }

  /**
   * Returns the date format string in use for parsing date in the listing.
   *
   * @return string format
   */
  public String getDateFormatString() {
    return this.dateFormatString;
  }

  /**
   * Provides ability to pre-specify the format that the parser will use to parse dates.
   *
   * @param value
   *          the string to set.
   */
  public void setDateFormatString( String value ) {
    this.dateFormatString = value;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy