All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.org.jets3t.service.utils.FileComparer Maven / Gradle / Ivy

/*
 * jets3t : Java Extra-Tasty S3 Toolkit (for Amazon S3 online storage service)
 * This is a java.net project, see https://jets3t.dev.java.net/
 * 
 * Copyright 2006 James Murty
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. 
 */
package org.jets3t.service.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jets3t.service.Constants;
import org.jets3t.service.Jets3tProperties;
import org.jets3t.service.S3ObjectsChunk;
import org.jets3t.service.S3Service;
import org.jets3t.service.S3ServiceException;
import org.jets3t.service.io.BytesProgressWatcher;
import org.jets3t.service.io.ProgressMonitoredInputStream;
import org.jets3t.service.model.S3Bucket;
import org.jets3t.service.model.S3Object;
import org.jets3t.service.multithread.GetObjectHeadsEvent;
import org.jets3t.service.multithread.ListObjectsEvent;
import org.jets3t.service.multithread.S3ServiceEventAdaptor;
import org.jets3t.service.multithread.S3ServiceEventListener;
import org.jets3t.service.multithread.S3ServiceMulti;

/**
 * File comparison utility to compare files on the local computer with objects present in an S3
 * account and determine whether there are any differences. This utility contains methods to
 * build maps of the contents of the local file system or S3 account for comparison, and 
 * buildDiscrepancyLists methods to find differences in these maps.
 * 

* File comparisons are based primarily on MD5 hashes of the files' contents. If a local file does * not match an object in S3 with the same name, this utility determine which of the items is * newer by comparing the last modified dates. * * @author James Murty */ public class FileComparer { private static final Log log = LogFactory.getLog(FileComparer.class); private Jets3tProperties jets3tProperties = null; /** * Constructs the class. * * @param jets3tProperties * the object containing the properties that will be applied in this class. */ public FileComparer(Jets3tProperties jets3tProperties) { this.jets3tProperties = jets3tProperties; } /** * @param jets3tProperties * the object containing the properties that will be applied in the instance. * @return * a FileComparer instance. */ public static FileComparer getInstance(Jets3tProperties jets3tProperties) { return new FileComparer(jets3tProperties); } /** * @return * a FileComparer instance initialized with the default JetS3tProperties * object. */ public static FileComparer getInstance() { return new FileComparer( Jets3tProperties.getInstance(Constants.JETS3T_PROPERTIES_FILENAME)); } /** * If a .jets3t-ignore file is present in the given directory, the file is read * and all the paths contained in it are coverted to regular expression Pattern objects. * * @param directory * a directory that may contain a .jets3t-ignore file. If this parameter is null * or is actually a file and not a directory, an empty list will be returned. * * @return * a list of Pattern objects representing the paths in the ignore file. If there is no ignore * file, or if it has no contents, the list returned will be empty. */ protected List buildIgnoreRegexpList(File directory) { ArrayList ignorePatternList = new ArrayList(); if (directory == null || !directory.isDirectory()) { return ignorePatternList; } File jets3tIgnoreFile = new File(directory, Constants.JETS3T_IGNORE_FILENAME); if (jets3tIgnoreFile.exists() && jets3tIgnoreFile.canRead()) { if (log.isDebugEnabled()) { log.debug("Found ignore file: " + jets3tIgnoreFile.getPath()); } try { String ignorePaths = ServiceUtils.readInputStreamToString( new FileInputStream(jets3tIgnoreFile), null); StringTokenizer st = new StringTokenizer(ignorePaths.trim(), "\n"); while (st.hasMoreTokens()) { String ignorePath = st.nextToken(); // Convert path to RegExp. String ignoreRegexp = ignorePath; ignoreRegexp = ignoreRegexp.replaceAll("\\.", "\\\\."); ignoreRegexp = ignoreRegexp.replaceAll("\\*", ".*"); ignoreRegexp = ignoreRegexp.replaceAll("\\?", "."); Pattern pattern = Pattern.compile(ignoreRegexp); if (log.isDebugEnabled()) { log.debug("Ignore path '" + ignorePath + "' has become the regexp: " + pattern.pattern()); } ignorePatternList.add(pattern); } } catch (IOException e) { if (log.isErrorEnabled()) { log.error("Failed to read contents of ignore file '" + jets3tIgnoreFile.getPath() + "'", e); } } } if (jets3tProperties.getBoolProperty("filecomparer.skip-upload-of-md5-files", false)) { Pattern pattern = Pattern.compile(".*\\.md5"); if (log.isDebugEnabled()) { log.debug("Skipping upload of pre-computed MD5 files with path '*.md5' using the regexp: " + pattern.pattern()); } ignorePatternList.add(pattern); } return ignorePatternList; } /** * Determines whether a file should be ignored, based on whether it matches a regular expression * Pattern in the provided ignore list. * * @param ignorePatternList * a list of Pattern objects representing the file names to ignore. * @param file * a file that will either be ignored or not, depending on whether it matches an ignore Pattern. * * @return * true if the file should be ignored, false otherwise. */ protected boolean isIgnored(List ignorePatternList, File file) { Iterator patternIter = ignorePatternList.iterator(); while (patternIter.hasNext()) { Pattern pattern = (Pattern) patternIter.next(); if (pattern.matcher(file.getName()).matches()) { if (log.isDebugEnabled()) { log.debug("Ignoring " + (file.isDirectory() ? "directory" : "file") + " matching pattern '" + pattern.pattern() + "': " + file.getName()); } return true; } } return false; } /** * Builds a File Map containing the given files. If any of the given files are actually * directories, the contents of the directory are included. *

* File keys are delimited with '/' characters. *

* Any file or directory matching a path in a .jets3t-ignore file will be ignored. * * @param files * the set of files/directories to include in the file map. * @param includeDirectories * If true all directories, including empty ones, will be included in the Map. These directories * will be mere place-holder objects with the content type {@link Mimetypes#MIMETYPE_JETS3T_DIRECTORY}. * If this variable is false directory objects will not be included in the Map, and it will not * be possible to store empty directories in S3. * * @return * a Map of file path keys to File objects. */ public Map buildFileMap(File[] files, boolean includeDirectories) { // Build map of files proposed for upload or download. HashMap fileMap = new HashMap(); List ignorePatternList = null; List ignorePatternListForCurrentDir = null; for (int i = 0; i < files.length; i++) { if (files[i].getParentFile() == null) { // For direct references to a file or dir, look for a .jets3t-ignore file // in the current directory - only do this once for the current dir. if (ignorePatternListForCurrentDir == null) { ignorePatternListForCurrentDir = buildIgnoreRegexpList(new File(".")); } ignorePatternList = ignorePatternListForCurrentDir; } else { ignorePatternList = buildIgnoreRegexpList(files[i].getParentFile()); } if (!isIgnored(ignorePatternList, files[i])) { if (!files[i].exists()) { continue; } if (!files[i].isDirectory() || includeDirectories) { fileMap.put(files[i].getName(), files[i]); } if (files[i].isDirectory()) { buildFileMapImpl(files[i], files[i].getName() + Constants.FILE_PATH_DELIM, fileMap, includeDirectories); } } } return fileMap; } /** * Builds a File Map containing all the files and directories inside the given root directory, * where the map's key for each file is the relative path to the file. *

* File keys are delimited with '/' characters. *

* Any file or directory matching a path in a .jets3t-ignore file will be ignored. * * @see #buildDiscrepancyLists(Map, Map) * @see #buildS3ObjectMap(S3Service, S3Bucket, String, S3Object[], boolean, S3ServiceEventListener) * * @param rootDirectory * The root directory containing the files/directories of interest. The root directory is not * included in the result map. * @param fileKeyPrefix * A prefix added to each file path key in the map, e.g. the name of the root directory the * files belong to. If provided, a '/' suffix is always added to the end of the prefix. If null * or empty, no prefix is used. * @param includeDirectories * If true all directories, including empty ones, will be included in the Map. These directories * will be mere place-holder objects with the content type {@link Mimetypes#MIMETYPE_JETS3T_DIRECTORY}. * If this variable is false directory objects will not be included in the Map, and it will not * be possible to store empty directories in S3. * * @return A Map of file path keys to File objects. */ public Map buildFileMap(File rootDirectory, String fileKeyPrefix, boolean includeDirectories) { HashMap fileMap = new HashMap(); List ignorePatternList = buildIgnoreRegexpList(rootDirectory); if (!isIgnored(ignorePatternList, rootDirectory)) { if (fileKeyPrefix == null || fileKeyPrefix.length() == 0) { fileKeyPrefix = ""; } else { if (!fileKeyPrefix.endsWith(Constants.FILE_PATH_DELIM)) { fileKeyPrefix += Constants.FILE_PATH_DELIM; } } buildFileMapImpl(rootDirectory, fileKeyPrefix, fileMap, includeDirectories); } return fileMap; } /** * Recursively builds a File Map containing all the files and directories inside the given directory, * where the map's key for each file is the relative path to the file. *

* File keys are delimited with '/' characters. *

* Any file or directory matching a path in a .jets3t-ignore file will be ignored. * * @param directory * The directory containing the files/directories of interest. The directory is not * included in the result map. * @param fileKeyPrefix * A prefix added to each file path key in the map, e.g. the name of the root directory the * files belong to. This prefix must end with a '/' character. * @param fileMap * a map of path keys to File objects, that this method adds items to. * @param includeDirectories * If true all directories, including empty ones, will be included in the Map. These directories * will be mere place-holder objects with the content type {@link Mimetypes#MIMETYPE_JETS3T_DIRECTORY}. * If this variable is false directory objects will not be included in the Map, and it will not * be possible to store empty directories in S3. */ protected void buildFileMapImpl(File directory, String fileKeyPrefix, Map fileMap, boolean includeDirectories) { List ignorePatternList = buildIgnoreRegexpList(directory); File children[] = directory.listFiles(); for (int i = 0; children != null && i < children.length; i++) { if (!isIgnored(ignorePatternList, children[i])) { if (!children[i].isDirectory() || includeDirectories) { fileMap.put(fileKeyPrefix + children[i].getName(), children[i]); } if (children[i].isDirectory()) { buildFileMapImpl(children[i], fileKeyPrefix + children[i].getName() + "/", fileMap, includeDirectories); } } } } /** * Lists the objects in a bucket using a partitioning technique to divide * the object namespace into separate partitions that can be listed by * multiple simultaneous threads. This method divides the object namespace * using the given delimiter, traverses this space up to the specified * depth to identify prefix names for multiple "partitions", and * then lists the objects in each partition. It returns the complete list * of objects in the bucket path. *

* This partitioning technique will work best for buckets with many objects * that are divided into a number of virtual subdirectories of roughly equal * size. * * @param s3Service * the service object that will be used to perform listing requests. * @param bucketName * the name of the bucket whose contents will be listed. * @param targetPath * a root path within the bucket to be listed. If this parameter is null, all * the bucket's objects will be listed. Otherwise, only the objects below the * virtual path specified will be listed. * @param delimiter * the delimiter string used to identify virtual subdirectory partitions * in a bucket. If this parameter is null, or it has a value that is not * present in your object names, no partitioning will take place. * @param toDepth * the number of delimiter levels this method will traverse to identify * subdirectory partions. If this value is zero, no partitioning will take * place. * * @return * the list of objects under the target path in the bucket. * * @throws S3ServiceException */ public S3Object[] listObjectsThreaded(S3Service s3Service, final String bucketName, String targetPath, final String delimiter, int toDepth) throws S3ServiceException { final List allObjects = Collections.synchronizedList(new ArrayList()); final List lastCommonPrefixes = Collections.synchronizedList(new ArrayList()); final S3ServiceException s3ServiceExceptions[] = new S3ServiceException[1]; /* * Create a S3ServiceMulti object with an event listener that responds to * ListObjectsEvent notifications and populates a complete object listing. */ final S3ServiceMulti s3Multi = new S3ServiceMulti(s3Service, new S3ServiceEventAdaptor() { public void s3ServiceEventPerformed(ListObjectsEvent event) { if (ListObjectsEvent.EVENT_IN_PROGRESS == event.getEventCode()) { Iterator chunkIter = event.getChunkList().iterator(); while (chunkIter.hasNext()) { S3ObjectsChunk chunk = (S3ObjectsChunk) chunkIter.next(); if (log.isDebugEnabled()) { log.debug("Listed " + chunk.getObjects().length + " objects and " + chunk.getCommonPrefixes().length + " common prefixes in bucket '" + bucketName + "' using prefix=" + chunk.getPrefix() + ", delimiter=" + chunk.getDelimiter()); } allObjects.addAll(Arrays.asList(chunk.getObjects())); lastCommonPrefixes.addAll(Arrays.asList(chunk.getCommonPrefixes())); } } else if (ListObjectsEvent.EVENT_ERROR == event.getEventCode()) { s3ServiceExceptions[0] = new S3ServiceException( "Failed to list all objects in S3 bucket", event.getErrorCause()); } } }); // The first listing partition we use as a starting point is the target path. String[] prefixesToList = new String[] {targetPath}; int currentDepth = 0; while (currentDepth <= toDepth && prefixesToList.length > 0) { if (log.isDebugEnabled()) { log.debug("Listing objects in '" + bucketName + "' using " + prefixesToList.length + " prefixes: " + Arrays.asList(prefixesToList)); } // Initialize the variables that will be used, or populated, by the // multi-threaded listing. lastCommonPrefixes.clear(); final String[] finalPrefixes = prefixesToList; final String finalDelimiter = (currentDepth < toDepth ? delimiter : null); /* * Perform a multi-threaded listing, where each prefix string * will be used as a unique partition to be listed in a separate thread. */ (new Thread() { public void run() { s3Multi.listObjects(bucketName, finalPrefixes, finalDelimiter, Constants.DEFAULT_OBJECT_LIST_CHUNK_SIZE); }; }).run(); // Throw any exceptions that occur inside the threads. if (s3ServiceExceptions[0] != null) { throw s3ServiceExceptions[0]; } // We use the common prefix paths identified in the last listing // iteration, if any, to identify partitions for follow-up listings. prefixesToList = (String[]) lastCommonPrefixes .toArray(new String[lastCommonPrefixes.size()]); currentDepth++; } return (S3Object[]) allObjects.toArray(new S3Object[allObjects.size()]); } /** * Lists the objects in a bucket using a partitioning technique to divide * the object namespace into separate partitions that can be listed by * multiple simultaneous threads. This method divides the object namespace * using the given delimiter, traverses this space up to the specified * depth to identify prefix names for multiple "partitions", and * then lists the objects in each partition. It returns the complete list * of objects in the bucket path. *

* This partitioning technique will work best for buckets with many objects * that are divided into a number of virtual subdirectories of roughly equal * size. *

* The delimiter and depth properties that define how this method will * partition the bucket's namespace are set in the jets3t.properties file * with the setting: * filecomparer.bucket-listing.<bucketname>=<delim>,<depth>
* For example: filecomparer.bucket-listing.my-bucket=/,2 * * @param s3Service * the service object that will be used to perform listing requests. * @param bucketName * the name of the bucket whose contents will be listed. * @param targetPath * a root path within the bucket to be listed. If this parameter is null, all * the bucket's objects will be listed. Otherwise, only the objects below the * virtual path specified will be listed. * * @return * the list of objects under the target path in the bucket. * * @throws S3ServiceException */ public S3Object[] listObjectsThreaded(S3Service s3Service, final String bucketName, String targetPath) throws S3ServiceException { String delimiter = null; int toDepth = 0; // Find bucket-specific listing properties, if any. String bucketListingProperties = jets3tProperties.getStringProperty( "filecomparer.bucket-listing." + bucketName, null); if (bucketListingProperties != null) { String splits[] = bucketListingProperties.split(","); if (splits.length != 2) { throw new S3ServiceException( "Invalid setting for bucket listing property " + "filecomparer.bucket-listing." + bucketName + ": '" + bucketListingProperties + "'"); } delimiter = splits[0].trim(); toDepth = Integer.parseInt(splits[1]); } return listObjectsThreaded(s3Service, bucketName, targetPath, delimiter, toDepth); } /** * Builds an S3 Object Map containing all the objects within the given target path, * where the map's key for each object is the relative path to the object. * * @see #buildDiscrepancyLists(Map, Map) * @see #buildFileMap(File, String, boolean) * * @param s3Service * @param bucket * @param targetPath * @param skipMetadata * @param s3ServiceEventListener * @return * mapping of keys/S3Objects * @throws S3ServiceException */ public Map buildS3ObjectMap(S3Service s3Service, S3Bucket bucket, String targetPath, boolean skipMetadata, S3ServiceEventListener s3ServiceEventListener) throws S3ServiceException { String prefix = (targetPath.length() > 0 ? targetPath : null); S3Object[] s3ObjectsIncomplete = this.listObjectsThreaded( s3Service, bucket.getName(), prefix); return buildS3ObjectMap(s3Service, bucket, targetPath, s3ObjectsIncomplete, skipMetadata, s3ServiceEventListener); } /** * Builds an S3 Object Map containing a partial set of objects within the given target path, * where the map's key for each object is the relative path to the object. *

* If the method is asked to perform a complete listing, it will use the * {@link #listObjectsThreaded(S3Service, String, String)} method to list the objects * in the bucket, potentially taking advantage of any bucket name partitioning * settings you have applied. *

* If the method is asked to perform only a partial listing, no bucket name * partitioning will be applied. * * @see #buildDiscrepancyLists(Map, Map) * @see #buildFileMap(File, String, boolean) * * @param s3Service * @param bucket * @param targetPath * @param priorLastKey * the prior last key value returned by a prior invocation of this method, if any. * @param completeListing * if true, this method will perform a complete listing of an S3 target. * If false, the method will list a partial set of objects commencing from the * given prior last key. * * @return * an object containing a mapping of key names to S3Objects, and the prior last * key (if any) that should be used to perform follow-up method calls. * @throws S3ServiceException */ public PartialObjectListing buildS3ObjectMapPartial(S3Service s3Service, S3Bucket bucket, String targetPath, String priorLastKey, boolean completeListing, boolean skipMetadata, S3ServiceEventListener s3ServiceEventListener) throws S3ServiceException { String prefix = (targetPath.length() > 0 ? targetPath : null); S3Object[] objects = null; String resultPriorLastKey = null; if (completeListing) { objects = listObjectsThreaded(s3Service, bucket.getName(), prefix); } else { S3ObjectsChunk chunk = s3Service.listObjectsChunked(bucket.getName(), prefix, null, Constants.DEFAULT_OBJECT_LIST_CHUNK_SIZE, priorLastKey, completeListing); objects = chunk.getObjects(); resultPriorLastKey = chunk.getPriorLastKey(); } Map objectsMap = buildS3ObjectMap(s3Service, bucket, targetPath, objects, skipMetadata, s3ServiceEventListener); return new PartialObjectListing(objectsMap, resultPriorLastKey); } /** * Builds an S3 Object Map containing all the given objects, by retrieving HEAD details about * all the objects and using {@link #populateS3ObjectMap(String, S3Object[])} to product an object/key * map. * * @see #buildDiscrepancyLists(Map, Map) * @see #buildFileMap(File, String, boolean) * * @param s3Service * @param bucket * @param targetPath * @param skipMetadata * @param s3ObjectsIncomplete * @return * mapping of keys/S3Objects * @throws S3ServiceException */ public Map buildS3ObjectMap(S3Service s3Service, S3Bucket bucket, String targetPath, S3Object[] s3ObjectsIncomplete, boolean skipMetadata, S3ServiceEventListener s3ServiceEventListener) throws S3ServiceException { S3Object[] s3Objects = null; if (skipMetadata) { s3Objects = s3ObjectsIncomplete; } else { // Retrieve the complete information about all objects listed via GetObjectsHeads. final ArrayList s3ObjectsCompleteList = new ArrayList(s3ObjectsIncomplete.length); final S3ServiceException s3ServiceExceptions[] = new S3ServiceException[1]; S3ServiceMulti s3ServiceMulti = new S3ServiceMulti(s3Service, new S3ServiceEventAdaptor() { public void s3ServiceEventPerformed(GetObjectHeadsEvent event) { if (GetObjectHeadsEvent.EVENT_IN_PROGRESS == event.getEventCode()) { S3Object[] finishedObjects = event.getCompletedObjects(); if (finishedObjects.length > 0) { s3ObjectsCompleteList.addAll(Arrays.asList(finishedObjects)); } } else if (GetObjectHeadsEvent.EVENT_ERROR == event.getEventCode()) { s3ServiceExceptions[0] = new S3ServiceException( "Failed to retrieve detailed information about all S3 objects", event.getErrorCause()); } } }); if (s3ServiceEventListener != null) { s3ServiceMulti.addServiceEventListener(s3ServiceEventListener); } s3ServiceMulti.getObjectsHeads(bucket, s3ObjectsIncomplete); if (s3ServiceExceptions[0] != null) { throw s3ServiceExceptions[0]; } s3Objects = (S3Object[]) s3ObjectsCompleteList .toArray(new S3Object[s3ObjectsCompleteList.size()]); } return populateS3ObjectMap(targetPath, s3Objects); } /** * Builds a map of key/object pairs each object is associated with a key based on its location * in the S3 target path. * * @param targetPath * @param s3Objects * @return * a map of key/S3Object pairs. */ public Map populateS3ObjectMap(String targetPath, S3Object[] s3Objects) { HashMap map = new HashMap(); for (int i = 0; i < s3Objects.length; i++) { String relativeKey = s3Objects[i].getKey(); if (targetPath.length() > 0) { relativeKey = relativeKey.substring(targetPath.length()); int slashIndex = relativeKey.indexOf(Constants.FILE_PATH_DELIM); if (slashIndex == 0) { relativeKey = relativeKey.substring(slashIndex + 1, relativeKey.length()); } else { // This object is the result of a prefix search, not an explicit directory. // Base the relative key on the last full subdirectory in the // target path if available... slashIndex = targetPath.lastIndexOf(Constants.FILE_PATH_DELIM); if (slashIndex >= 0) { relativeKey = s3Objects[i].getKey().substring(slashIndex + 1); } // ...otherwise, use the full object key name. else { relativeKey = s3Objects[i].getKey(); } } } if (relativeKey.length() > 0) { map.put(relativeKey, s3Objects[i]); } } return map; } /** * Compares the contents of a directory on the local file system with the contents of an * S3 resource. This comparison is performed on a map of files and a map of S3 objects previously * generated using other methods in this class. * * @param filesMap * a map of keys/Files built using the method {@link #buildFileMap(File, String, boolean)} * @param s3ObjectsMap * a map of keys/S3Objects built using the method * {@link #buildS3ObjectMap(S3Service, S3Bucket, String, S3Object[], boolean, S3ServiceEventListener)} * @return * an object containing the results of the file comparison. * * @throws NoSuchAlgorithmException * @throws FileNotFoundException * @throws IOException * @throws ParseException */ public FileComparerResults buildDiscrepancyLists(Map filesMap, Map s3ObjectsMap) throws NoSuchAlgorithmException, FileNotFoundException, IOException, ParseException { return buildDiscrepancyLists(filesMap, s3ObjectsMap, null); } /** * Compares the contents of a directory on the local file system with the contents of an * S3 resource. This comparison is performed on a map of files and a map of S3 objects previously * generated using other methods in this class. * * @param filesMap * a map of keys/Files built using the method {@link #buildFileMap(File, String, boolean)} * @param s3ObjectsMap * a map of keys/S3Objects built using the method * {@link #buildS3ObjectMap(S3Service, S3Bucket, String, boolean, S3ServiceEventListener)} * @param progressWatcher * watches the progress of file hash generation. * @return * an object containing the results of the file comparison. * * @throws NoSuchAlgorithmException * @throws FileNotFoundException * @throws IOException * @throws ParseException */ public FileComparerResults buildDiscrepancyLists(Map filesMap, Map s3ObjectsMap, BytesProgressWatcher progressWatcher) throws NoSuchAlgorithmException, FileNotFoundException, IOException, ParseException { Set onlyOnServerKeys = new HashSet(); Set updatedOnServerKeys = new HashSet(); Set updatedOnClientKeys = new HashSet(); Set alreadySynchronisedKeys = new HashSet(); Set onlyOnClientKeys = new HashSet(); // Check files on server against local client files. Iterator s3ObjectsMapIter = s3ObjectsMap.entrySet().iterator(); while (s3ObjectsMapIter.hasNext()) { Map.Entry entry = (Map.Entry) s3ObjectsMapIter.next(); String keyPath = (String) entry.getKey(); S3Object s3Object = (S3Object) entry.getValue(); // A special-case check to identify objects created by Panic's // Transmit application that serve as directory placehoders - // a similar concept to the placeholders JetS3t uses but sadly // these look different. if (keyPath.endsWith("/") && s3Object.getContentLength() == 0 && "binary/octet-stream".equals(s3Object.getContentType())) { boolean ignorePanicDirPlaceholders = jets3tProperties.getBoolProperty( "filecomparer.ignore-panic-dir-placeholders", false); if (ignorePanicDirPlaceholders) { if (log.isDebugEnabled()) { log.debug("Ignoring object that looks like a directory " + "placeholder created by Panic's Transmit application: " + keyPath); } alreadySynchronisedKeys.add(keyPath); continue; } else { if (log.isWarnEnabled()) { log.warn("Identified an object that looks like a directory " + "placeholder created by Panic's Transmit application. " + "If this object was indeed created by Transmit, it will not " + "be handled properly unless the JetS3t property " + "\"filecomparer.ignore-panic-dir-placeholders\" is set to " + "true. " + s3Object); } } } // Check whether local file is already on server if (filesMap.containsKey(keyPath)) { // File has been backed up in the past, is it still up-to-date? File file = (File) filesMap.get(keyPath); if (file.isDirectory()) { // We don't care about directory date changes, as long as it's present. alreadySynchronisedKeys.add(keyPath); } else { // Compare file hashes. boolean useMd5Files = jets3tProperties .getBoolProperty("filecomparer.use-md5-files", false); boolean generateMd5Files = jets3tProperties .getBoolProperty("filecomparer.generate-md5-files", false); byte[] computedHash = null; // Check whether a pre-computed MD5 hash file is available File computedHashFile = new File(file.getPath() + ".md5"); if (useMd5Files && computedHashFile.canRead() && computedHashFile.lastModified() > file.lastModified()) { try { // A pre-computed MD5 hash file is available, try to read this hash value BufferedReader br = new BufferedReader(new FileReader(computedHashFile)); computedHash = ServiceUtils.fromHex(br.readLine().split("\\s")[0]); br.close(); } catch (Exception e) { if (log.isWarnEnabled()) { log.warn("Unable to read hash from computed MD5 file", e); } } } if (computedHash == null) { // A pre-computed hash file was not available, or could not be read. // Calculate the hash value anew. InputStream hashInputStream = null; if (progressWatcher != null) { hashInputStream = new ProgressMonitoredInputStream( // Report on MD5 hash progress. new FileInputStream(file), progressWatcher); } else { hashInputStream = new FileInputStream(file); } computedHash = ServiceUtils.computeMD5Hash(hashInputStream); } String fileHashAsBase64 = ServiceUtils.toBase64(computedHash); if (generateMd5Files && !file.getName().endsWith(".md5") && (!computedHashFile.exists() || computedHashFile.lastModified() < file.lastModified())) { // Create or update a pre-computed MD5 hash file. try { FileWriter fw = new FileWriter(computedHashFile); fw.write(ServiceUtils.toHex(computedHash)); fw.close(); } catch (Exception e) { if (log.isWarnEnabled()) { log.warn("Unable to write computed MD5 hash to a file", e); } } } // Get the S3 object's Base64 hash. String objectHash = null; if (s3Object.containsMetadata(S3Object.METADATA_HEADER_ORIGINAL_HASH_MD5)) { // Use the object's *original* hash, as it is an encoded version of a local file. objectHash = (String) s3Object.getMetadata( S3Object.METADATA_HEADER_ORIGINAL_HASH_MD5); if (log.isDebugEnabled()) { log.debug("Object in S3 is encoded, using the object's original hash value for: " + s3Object.getKey()); } } else { // The object wasn't altered when uploaded, so use its current hash. objectHash = s3Object.getMd5HashAsBase64(); } if (fileHashAsBase64.equals(objectHash)) { // Hashes match so file is already synchronised. alreadySynchronisedKeys.add(keyPath); } else { // File is out-of-synch. Check which version has the latest date. Date s3ObjectLastModified = null; String metadataLocalFileDate = (String) s3Object.getMetadata( Constants.METADATA_JETS3T_LOCAL_FILE_DATE); // Try to retrieve the date using the deprecated metadata name if (metadataLocalFileDate == null) { metadataLocalFileDate = (String) s3Object.getMetadata( Constants.METADATA_JETS3T_LOCAL_FILE_DATE_DEPRECATED); } if (metadataLocalFileDate == null) { // This is risky as local file times and S3 times don't match! if (log.isWarnEnabled()) { log.warn("Using S3 last modified date as file date. This is not reliable " + "as the time according to S3 can differ from your local system time. " + "Please use the metadata item " + Constants.METADATA_JETS3T_LOCAL_FILE_DATE); } s3ObjectLastModified = s3Object.getLastModifiedDate(); } else { s3ObjectLastModified = ServiceUtils .parseIso8601Date(metadataLocalFileDate); } if (s3ObjectLastModified.getTime() > file.lastModified()) { updatedOnServerKeys.add(keyPath); } else if (s3ObjectLastModified.getTime() < file.lastModified()) { updatedOnClientKeys.add(keyPath); } else { // Local file date and S3 object date values match exactly, yet the // local file has a different hash. This shouldn't ever happen, but // sometimes does with Excel files. if (jets3tProperties.getBoolProperty( "filecomparer.assume-local-latest-in-mismatch", false)) { if (log.isWarnEnabled()) { log.warn("Backed-up S3Object " + s3Object.getKey() + " and local file " + file.getName() + " have the same date but different hash values. " + "Assuming local file is the latest version."); } updatedOnClientKeys.add(keyPath); } else { throw new IOException("Backed-up S3Object " + s3Object.getKey() + " and local file " + file.getName() + " have the same date but different hash values. " + "This shouldn't happen!"); } } } } } else { // File is not in local file system, so it's only on the S3 // server. onlyOnServerKeys.add(keyPath); } } // Any local files not already put into another list only exist locally. onlyOnClientKeys.addAll(filesMap.keySet()); onlyOnClientKeys.removeAll(updatedOnClientKeys); onlyOnClientKeys.removeAll(alreadySynchronisedKeys); onlyOnClientKeys.removeAll(updatedOnServerKeys); return new FileComparerResults(onlyOnServerKeys, updatedOnServerKeys, updatedOnClientKeys, onlyOnClientKeys, alreadySynchronisedKeys); } public class PartialObjectListing { private Map objectsMap = null; private String priorLastKey = null; public PartialObjectListing(Map objectsMap, String priorLastKey) { this.objectsMap = objectsMap; this.priorLastKey = priorLastKey; } public Map getObjectsMap() { return objectsMap; } public String getPriorLastKey() { return priorLastKey; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy