All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pentaho.di.job.entries.folderscompare.JobEntryFoldersCompare Maven / Gradle / Ivy

The newest version!
/*! ******************************************************************************
 *
 * Pentaho Data Integration
 *
 * Copyright (C) 2002-2017 by Hitachi Vantara : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.job.entries.folderscompare;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSelectInfo;
import org.apache.commons.vfs2.FileSelector;
import org.apache.commons.vfs2.FileType;
import org.pentaho.di.cluster.SlaveServer;
import org.pentaho.di.core.CheckResultInterface;
import org.pentaho.di.core.Result;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleDatabaseException;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleFileException;
import org.pentaho.di.core.exception.KettleXMLException;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.vfs.KettleVFS;
import org.pentaho.di.core.xml.XMLHandler;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.job.JobMeta;
import org.pentaho.di.job.entry.JobEntryBase;
import org.pentaho.di.job.entry.JobEntryInterface;
import org.pentaho.di.job.entry.validator.AbstractFileValidator;
import org.pentaho.di.job.entry.validator.AndValidator;
import org.pentaho.di.job.entry.validator.JobEntryValidatorUtils;
import org.pentaho.di.job.entry.validator.ValidatorContext;
import org.pentaho.di.repository.ObjectId;
import org.pentaho.di.repository.Repository;
import org.pentaho.metastore.api.IMetaStore;
import org.w3c.dom.Node;

/**
 * This defines a 'folder compare' job entry. It will compare 2 folders, and will either follow the true flow upon the
 * files being the same or the false flow otherwise.
 *
 * @author Samatar Hassan
 * @since 25-11-2007
 *
 */
public class JobEntryFoldersCompare extends JobEntryBase implements Cloneable, JobEntryInterface {
  private static Class PKG = JobEntryFoldersCompare.class; // for i18n purposes, needed by Translator2!!

  private String filename1;
  private String filename2;
  private String wildcard;
  private String compareonly;
  private boolean includesubfolders;
  private boolean comparefilecontent;
  private boolean comparefilesize;

  public JobEntryFoldersCompare( String n ) {

    super( n, "" );
    includesubfolders = false;
    comparefilesize = false;
    comparefilecontent = false;
    compareonly = "all";
    wildcard = null;
    filename1 = null;
    filename2 = null;
  }

  public void setCompareOnly( String comparevalue ) {
    this.compareonly = comparevalue;
  }

  public String getCompareOnly() {
    return compareonly;
  }

  public JobEntryFoldersCompare() {
    this( "" );
  }

  public Object clone() {
    JobEntryFoldersCompare je = (JobEntryFoldersCompare) super.clone();
    return je;
  }

  public String getXML() {
    StringBuilder retval = new StringBuilder( 200 ); // 133 chars in just spaces and tag names alone

    retval.append( super.getXML() );
    retval.append( "      " ).append( XMLHandler.addTagValue( "include_subfolders", includesubfolders ) );
    retval.append( "      " ).append( XMLHandler.addTagValue( "compare_filecontent", comparefilecontent ) );
    retval.append( "      " ).append( XMLHandler.addTagValue( "compare_filesize", comparefilesize ) );

    retval.append( "      " ).append( XMLHandler.addTagValue( "compareonly", compareonly ) );
    retval.append( "      " ).append( XMLHandler.addTagValue( "wildcard", wildcard ) );
    retval.append( "      " ).append( XMLHandler.addTagValue( "filename1", filename1 ) );
    retval.append( "      " ).append( XMLHandler.addTagValue( "filename2", filename2 ) );
    if ( parentJobMeta != null ) {
      parentJobMeta.getNamedClusterEmbedManager().registerUrl( filename1 );
      parentJobMeta.getNamedClusterEmbedManager().registerUrl( filename2 );
    }
    return retval.toString();
  }

  public void loadXML( Node entrynode, List databases, List slaveServers,
    Repository rep, IMetaStore metaStore ) throws KettleXMLException {
    try {
      super.loadXML( entrynode, databases, slaveServers );
      includesubfolders = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "include_subfolders" ) );
      comparefilecontent = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "compare_filecontent" ) );
      comparefilesize = "Y".equalsIgnoreCase( XMLHandler.getTagValue( entrynode, "compare_filesize" ) );

      compareonly = XMLHandler.getTagValue( entrynode, "compareonly" );
      wildcard = XMLHandler.getTagValue( entrynode, "wildcard" );
      filename1 = XMLHandler.getTagValue( entrynode, "filename1" );
      filename2 = XMLHandler.getTagValue( entrynode, "filename2" );
    } catch ( KettleXMLException xe ) {
      throw new KettleXMLException( BaseMessages.getString( PKG, "JobFoldersCompare.Meta.UnableLoadXML", xe
        .getMessage() ) );
    }
  }

  public void loadRep( Repository rep, IMetaStore metaStore, ObjectId id_jobentry, List databases,
    List slaveServers ) throws KettleException {
    try {
      includesubfolders = rep.getJobEntryAttributeBoolean( id_jobentry, "include_subfolders" );
      comparefilecontent = rep.getJobEntryAttributeBoolean( id_jobentry, "compare_filecontent" );
      comparefilesize = rep.getJobEntryAttributeBoolean( id_jobentry, "compare_filesize" );

      compareonly = rep.getJobEntryAttributeString( id_jobentry, "compareonly" );
      wildcard = rep.getJobEntryAttributeString( id_jobentry, "wildcard" );
      filename1 = rep.getJobEntryAttributeString( id_jobentry, "filename1" );
      filename2 = rep.getJobEntryAttributeString( id_jobentry, "filename2" );
    } catch ( KettleException dbe ) {
      throw new KettleException( BaseMessages.getString( PKG, "JobFoldersCompare.Meta.UnableLoadRep", ""
        + id_jobentry, dbe.getMessage() ) );
    }
  }

  public void saveRep( Repository rep, IMetaStore metaStore, ObjectId id_job ) throws KettleException {
    try {
      rep.saveJobEntryAttribute( id_job, getObjectId(), "include_subfolders", includesubfolders );
      rep.saveJobEntryAttribute( id_job, getObjectId(), "compare_filecontent", comparefilecontent );
      rep.saveJobEntryAttribute( id_job, getObjectId(), "compare_filesize", comparefilesize );

      rep.saveJobEntryAttribute( id_job, getObjectId(), "compareonly", compareonly );
      rep.saveJobEntryAttribute( id_job, getObjectId(), "wildcard", wildcard );
      rep.saveJobEntryAttribute( id_job, getObjectId(), "filename1", filename1 );
      rep.saveJobEntryAttribute( id_job, getObjectId(), "filename2", filename2 );
    } catch ( KettleDatabaseException dbe ) {
      throw new KettleException( BaseMessages.getString(
        PKG, "JobFoldersCompare.Meta.UnableSaveRep", "" + id_job, dbe.getMessage() ) );
    }
  }

  public void setIncludeSubfolders( boolean includeSubfolders ) {
    this.includesubfolders = includeSubfolders;
  }

  public boolean isIncludeSubfolders() {
    return includesubfolders;
  }

  public void setCompareFileContent( boolean comparefilecontent ) {
    this.comparefilecontent = comparefilecontent;
  }

  public boolean isCompareFileContent() {
    return comparefilecontent;
  }

  public void setCompareFileSize( boolean comparefilesize ) {
    this.comparefilesize = comparefilesize;
  }

  public boolean isCompareFileSize() {
    return comparefilesize;
  }

  public String getRealWildcard() {
    return environmentSubstitute( getWildcard() );
  }

  public String getRealFilename1() {
    return environmentSubstitute( getFilename1() );
  }

  public String getRealFilename2() {
    return environmentSubstitute( getFilename2() );
  }

  /**
   * Check whether 2 files have the same contents.
   *
   * @param file1
   *          first file to compare
   * @param file2
   *          second file to compare
   * @return true if files are equal, false if they are not
   *
   * @throws IOException
   *           upon IO problems
   */
  protected boolean equalFileContents( FileObject file1, FileObject file2 ) throws KettleFileException {
    // Really read the contents and do comparisons
    DataInputStream in1 = null;
    DataInputStream in2 = null;
    try {
      // Really read the contents and do comparisons

      in1 =
        new DataInputStream( new BufferedInputStream( KettleVFS.getInputStream(
          KettleVFS.getFilename( file1 ), this ) ) );
      in2 =
        new DataInputStream( new BufferedInputStream( KettleVFS.getInputStream(
          KettleVFS.getFilename( file2 ), this ) ) );

      char ch1, ch2;
      while ( in1.available() != 0 && in2.available() != 0 ) {
        ch1 = (char) in1.readByte();
        ch2 = (char) in2.readByte();
        if ( ch1 != ch2 ) {
          return false;
        }
      }
      if ( in1.available() != in2.available() ) {
        return false;
      } else {
        return true;
      }
    } catch ( IOException e ) {
      throw new KettleFileException( e );
    } finally {
      if ( in1 != null ) {
        try {
          in1.close();
        } catch ( IOException ignored ) {
          // Nothing to see here...
        }
      }
      if ( in2 != null ) {
        try {
          in2.close();
        } catch ( Exception ignored ) {
          // We can't do anything else here...
        }
      }
    }
  }

  public Result execute( Result previousResult, int nr ) {
    Result result = previousResult;
    result.setResult( false );
    boolean ok = true;

    String realFilename1 = getRealFilename1();
    String realFilename2 = getRealFilename2();

    FileObject folder1 = null;
    FileObject folder2 = null;
    FileObject filefolder1 = null;
    FileObject filefolder2 = null;

    try {
      if ( filename1 != null && filename2 != null ) {
        //Set Embedded NamedCluter MetatStore Provider Key so that it can be passed to VFS
        if ( parentJobMeta.getNamedClusterEmbedManager() != null ) {
          parentJobMeta.getNamedClusterEmbedManager()
            .passEmbeddedMetastoreKey( this, parentJobMeta.getEmbeddedMetastoreProviderKey() );
        }
        // Get Folders/Files to compare
        folder1 = KettleVFS.getFileObject( realFilename1, this );
        folder2 = KettleVFS.getFileObject( realFilename2, this );

        if ( folder1.exists() && folder2.exists() ) {
          if ( !folder1.getType().equals( folder2.getType() ) ) {
            // pb...we try to compare file with folder !!!
            logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.CanNotCompareFilesFolders" ) );

            if ( folder1.getType() == FileType.FILE ) {
              logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsAFile", realFilename1 ) );
            } else if ( folder1.getType() == FileType.FOLDER ) {
              logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsAFolder", realFilename1 ) );
            } else {
              logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsUnknownFileType", realFilename1 ) );
            }

            if ( folder2.getType() == FileType.FILE ) {
              logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsAFile", realFilename2 ) );
            } else if ( folder2.getType() == FileType.FOLDER ) {
              logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsAFolder", realFilename2 ) );
            } else {
              logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsUnknownFileType", realFilename2 ) );
            }

          } else {
            if ( folder1.getType() == FileType.FILE ) {
              // simply compare 2 files ..
              if ( equalFileContents( folder1, folder2 ) ) {
                result.setResult( true );
              } else {
                result.setResult( false );
              }
            } else if ( folder1.getType() == FileType.FOLDER ) {
              // We compare 2 folders ...

              FileObject[] list1 = folder1.findFiles( new TextFileSelector( folder1.toString() ) );
              FileObject[] list2 = folder2.findFiles( new TextFileSelector( folder2.toString() ) );

              int lenList1 = list1.length;
              int lenList2 = list2.length;

              if ( log.isDetailed() ) {
                logDetailed( BaseMessages.getString(
                  PKG, "JobFoldersCompare.Log.FolderContains", realFilename1, "" + lenList1 ) );
                logDetailed( BaseMessages.getString(
                  PKG, "JobFoldersCompare.Log.FolderContains", realFilename2, "" + lenList2 ) );
              }
              if ( lenList1 == lenList2 ) {

                HashMap collection1 = new HashMap();
                HashMap collection2 = new HashMap();

                for ( int i = 0; i < list1.length; i++ ) {
                  // Put files list1 in TreeMap collection1
                  collection1.put( list1[i].getName().getBaseName(), list1[i].toString() );
                }

                for ( int i = 0; i < list2.length; i++ ) {
                  // Put files list2 in TreeMap collection2
                  collection2.put( list2[i].getName().getBaseName(), list2[i].toString() );
                }

                // Let's now fetch Folder1
                // and for each entry, we will search it in Folder2
                // if the entry exists..we will compare file entry (file or folder?)
                // if the 2 entry are file (not folder), we will compare content
                Set> entrees = collection1.entrySet();
                Iterator> iterateur = entrees.iterator();

                while ( iterateur.hasNext() ) {
                  Map.Entry entree = iterateur.next();
                  if ( !collection2.containsKey( entree.getKey() ) ) {
                    ok = false;
                    if ( log.isDetailed() ) {
                      logDetailed( BaseMessages.getString(
                        PKG, "JobFoldersCompare.Log.FileCanNotBeFoundIn", entree.getKey().toString(),
                        realFilename2 ) );
                    }
                  } else {
                    if ( log.isDebug() ) {
                      logDebug( BaseMessages.getString( PKG, "JobFoldersCompare.Log.FileIsFoundIn", entree
                        .getKey().toString(), realFilename2 ) );
                    }

                    filefolder1 = KettleVFS.getFileObject( entree.getValue().toString(), this );
                    filefolder2 = KettleVFS.getFileObject( collection2.get( entree.getKey() ).toString(), this );

                    if ( !filefolder2.getType().equals( filefolder1.getType() ) ) {
                      // The file1 exist in the folder2..but they don't have the same type
                      ok = false;
                      if ( log.isDetailed() ) {
                        logDetailed( BaseMessages.getString(
                          PKG, "JobFoldersCompare.Log.FilesNotSameType", filefolder1.toString(), filefolder2
                            .toString() ) );
                      }

                      if ( filefolder1.getType() == FileType.FILE ) {
                        logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsAFile", filefolder1
                          .toString() ) );
                      } else if ( filefolder1.getType() == FileType.FOLDER ) {
                        logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsAFolder", filefolder1
                          .toString() ) );
                      } else {
                        logError( BaseMessages.getString(
                          PKG, "JobFoldersCompare.Log.IsUnknownFileType", filefolder1.toString() ) );
                      }

                      if ( filefolder2.getType() == FileType.FILE ) {
                        logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsAFile", filefolder2
                          .toString() ) );
                      } else if ( filefolder2.getType() == FileType.FOLDER ) {
                        logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.IsAFolder", filefolder2
                          .toString() ) );
                      } else {
                        logError( BaseMessages.getString(
                          PKG, "JobFoldersCompare.Log.IsUnknownFileType", filefolder2.toString() ) );
                      }

                    } else {
                      // Files are the same type ...
                      if ( filefolder2.getType() == FileType.FILE ) {
                        // Let's compare file size
                        if ( comparefilesize ) {
                          long filefolder1_size = filefolder1.getContent().getSize();
                          long filefolder2_size = filefolder2.getContent().getSize();
                          if ( filefolder1_size != filefolder2_size ) {
                            ok = false;
                            if ( log.isDetailed() ) {
                              logDetailed( BaseMessages.getString(
                                PKG, "JobFoldersCompare.Log.FilesNotSameSize", filefolder1.toString(),
                                filefolder2.toString() ) );
                              logDetailed( BaseMessages.getString(
                                PKG, "JobFoldersCompare.Log.SizeFileIs", filefolder1.toString(), ""
                                  + filefolder1_size ) );
                              logDetailed( BaseMessages.getString(
                                PKG, "JobFoldersCompare.Log.SizeFileIs", filefolder2.toString(), ""
                                  + filefolder2_size ) );
                            }
                          }
                        }

                        if ( ok ) {
                          // Let's compare files content..
                          if ( comparefilecontent ) {
                            if ( !equalFileContents( filefolder1, filefolder2 ) ) {
                              ok = false;
                              if ( log.isDetailed() ) {
                                logDetailed( BaseMessages.getString(
                                  PKG, "JobFoldersCompare.Log.FilesNotSameContent", filefolder1.toString(),
                                  filefolder2.toString() ) );
                              }
                            }
                          }
                        }
                      }
                    }

                  }
                  // logBasic(entree.getKey() + " - " + entree.getValue());
                }

                result.setResult( ok );
              } else {
                // The 2 folders don't have the same files number
                if ( log.isDetailed() ) {
                  logDetailed( BaseMessages.getString(
                    PKG, "JobFoldersCompare.Log.FoldersDifferentFiles", realFilename1.toString(), realFilename2
                      .toString() ) );
                }
              }

            }
            // else: File type unknown !!
          }

        } else {
          if ( !folder1.exists() ) {
            logError( BaseMessages.getString( PKG, "JobFileCompare.Log.FileNotExist", realFilename1 ) );
          }
          if ( !folder2.exists() ) {
            logError( BaseMessages.getString( PKG, "JobFileCompare.Log.FileNotExist", realFilename2 ) );
          }
          result.setResult( false );
          result.setNrErrors( 1 );
        }
      } else {
        logError( BaseMessages.getString( PKG, "JobFoldersCompare.Log.Need2Files" ) );
      }
    } catch ( Exception e ) {
      result.setResult( false );
      result.setNrErrors( 1 );
      logError( BaseMessages.getString(
        PKG, "JobFoldersCompare.Log.ErrorComparing", realFilename2, realFilename2, e.getMessage() ) );
    } finally {
      try {
        if ( folder1 != null ) {
          folder1.close();
          folder1 = null;
        }
        if ( folder2 != null ) {
          folder2.close();
          folder2 = null;
        }
        if ( filefolder1 != null ) {
          filefolder1.close();
          filefolder1 = null;
        }
        if ( filefolder2 != null ) {
          filefolder2.close();
          filefolder2 = null;
        }
      } catch ( IOException e ) {
        // Ignore errors
      }
    }

    return result;
  }

  private class TextFileSelector implements FileSelector {
    String source_folder = null;

    public TextFileSelector( String sourcefolderin ) {
      if ( !Utils.isEmpty( sourcefolderin ) ) {
        source_folder = sourcefolderin;
      }

    }

    public boolean includeFile( FileSelectInfo info ) {
      boolean returncode = false;
      try {
        if ( !info.getFile().toString().equals( source_folder ) ) {
          // Pass over the Base folder itself
          String short_filename = info.getFile().getName().getBaseName();

          if ( info.getFile().getParent().equals( info.getBaseFolder() ) ) {
            // In the Base Folder...
            if ( ( info.getFile().getType() == FileType.FILE && compareonly.equals( "only_files" ) )
              || ( info.getFile().getType() == FileType.FOLDER && compareonly.equals( "only_folders" ) )
              || ( GetFileWildcard( short_filename ) && compareonly.equals( "specify" ) )
              || ( compareonly.equals( "all" ) ) ) {
              returncode = true;
            }
          } else {
            // Not in the Base Folder...Only if include sub folders
            if ( includesubfolders ) {
              if ( ( info.getFile().getType() == FileType.FILE && compareonly.equals( "only_files" ) )
                || ( info.getFile().getType() == FileType.FOLDER && compareonly.equals( "only_folders" ) )
                || ( GetFileWildcard( short_filename ) && compareonly.equals( "specify" ) )
                || ( compareonly.equals( "all" ) ) ) {
                returncode = true;
              }
            }
          }

        }
      } catch ( Exception e ) {

        logError( "Error while finding files ... in ["
          + info.getFile().toString() + "]. Exception :" + e.getMessage() );
        returncode = false;
      }
      return returncode;
    }

    public boolean traverseDescendents( FileSelectInfo info ) {
      return true;
    }
  }

  /**********************************************************
   *
   * @param selectedfile
   * @param sourceWildcard
   * @return True if the selectedfile matches the wildcard
   **********************************************************/
  private boolean GetFileWildcard( String selectedfile ) {
    Pattern pattern = null;
    boolean getIt = true;

    if ( !Utils.isEmpty( wildcard ) ) {
      pattern = Pattern.compile( wildcard );
      // First see if the file matches the regular expression!
      if ( pattern != null ) {
        Matcher matcher = pattern.matcher( selectedfile );
        getIt = matcher.matches();
      }
    }

    return getIt;
  }

  public boolean evaluates() {
    return true;
  }

  public void setWildcard( String wildcard ) {
    this.wildcard = wildcard;
  }

  public String getWildcard() {
    return wildcard;
  }

  public void setFilename1( String filename ) {
    this.filename1 = filename;
  }

  public String getFilename1() {
    return filename1;
  }

  public void setFilename2( String filename ) {
    this.filename2 = filename;
  }

  public String getFilename2() {
    return filename2;
  }

  public void check( List remarks, JobMeta jobMeta, VariableSpace space,
    Repository repository, IMetaStore metaStore ) {
    ValidatorContext ctx = new ValidatorContext();
    AbstractFileValidator.putVariableSpace( ctx, getVariables() );
    AndValidator.putValidators( ctx, JobEntryValidatorUtils.notNullValidator(), JobEntryValidatorUtils.fileExistsValidator() );
    JobEntryValidatorUtils.andValidator().validate( this, "filename1", remarks, ctx );
    JobEntryValidatorUtils.andValidator().validate( this, "filename2", remarks, ctx );
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy