All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.ratis.util.MD5FileUtil Maven / Gradle / Ivy

There is a newer version: 3.1.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.ratis.util;

import org.apache.ratis.io.MD5Hash;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.StandardOpenOption;
import java.security.MessageDigest;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public abstract class MD5FileUtil {
  public static final Logger LOG = LoggerFactory.getLogger(MD5FileUtil.class);

  // TODO: we should provide something like Hadoop's checksum fs for the local filesystem
  // so that individual state machines do not have to deal with checksumming/corruption prevention.
  // Keep the checksum and data in the same block format instead of individual files.

  public static final String MD5_SUFFIX = ".md5";
  private static final String LINE_REGEX = "([0-9a-f]{32}) [ *](.+)";
  private static final Pattern LINE_PATTERN = Pattern.compile(LINE_REGEX);

  static Matcher getMatcher(String md5) {
    return Optional.ofNullable(md5)
        .map(LINE_PATTERN::matcher)
        .filter(Matcher::matches)
        .orElse(null);
  }

  static String getDoesNotMatchString(String line) {
    return "\"" + line + "\" does not match the pattern " + LINE_REGEX;
  }

  /**
   * Verify that the previously saved md5 for the given file matches
   * expectedMd5.
   */
  public static void verifySavedMD5(File dataFile, MD5Hash expectedMD5)
      throws IOException {
    MD5Hash storedHash = readStoredMd5ForFile(dataFile);
    // Check the hash itself
    if (!expectedMD5.equals(storedHash)) {
      throw new IOException(
          "File " + dataFile + " did not match stored MD5 checksum " +
              " (stored: " + storedHash + ", computed: " + expectedMD5);
    }
  }

  /** Read the first line of the given file. */
  private static String readFirstLine(File f) throws IOException {
    try (BufferedReader reader = new BufferedReader(new InputStreamReader(
        FileUtils.newInputStream(f), StandardCharsets.UTF_8))) {
      return Optional.ofNullable(reader.readLine()).map(String::trim).orElse(null);
    } catch (IOException ioe) {
      throw new IOException("Failed to read file: " + f, ioe);
    }
  }

  /**
   * Read the md5 checksum stored alongside the given data file.
   * @param dataFile the file containing data
   * @return the checksum stored in dataFile.md5
   */
  public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException {
    final File md5File = getDigestFileForFile(dataFile);
    if (!md5File.exists()) {
      return null;
    }

    final String md5 = readFirstLine(md5File);
    final Matcher matcher = Optional.ofNullable(getMatcher(md5)).orElseThrow(() -> new IOException(
        "Invalid MD5 file " + md5File + ": the content " + getDoesNotMatchString(md5)));
    String storedHash = matcher.group(1);
    File referencedFile = new File(matcher.group(2));

    // Sanity check: Make sure that the file referenced in the .md5 file at
    // least has the same name as the file we expect
    if (!referencedFile.getName().equals(dataFile.getName())) {
      throw new IOException(
          "MD5 file at " + md5File + " references file named " +
              referencedFile.getName() + " but we expected it to reference " +
              dataFile);
    }
    return new MD5Hash(storedHash);
  }

  /**
   * Read dataFile and compute its MD5 checksum.
   */
  public static MD5Hash computeMd5ForFile(File dataFile) throws IOException {
    final int bufferSize = SizeInBytes.ONE_MB.getSizeInt();
    final MessageDigest digester = MD5Hash.getDigester();
    try (FileChannel in = FileUtils.newFileChannel(dataFile, StandardOpenOption.READ)) {
      final long fileSize = in.size();
      for (int offset = 0; offset < fileSize; ) {
        final int readSize = Math.toIntExact(Math.min(fileSize - offset, bufferSize));
        digester.update(in.map(FileChannel.MapMode.READ_ONLY, offset, readSize));
        offset += readSize;
      }
    }
    return new MD5Hash(digester.digest());
  }

  public static MD5Hash computeAndSaveMd5ForFile(File dataFile) {
    final MD5Hash md5;
    try {
      md5 = computeMd5ForFile(dataFile);
    } catch (IOException e) {
      throw new IllegalStateException("Failed to compute MD5 for file " + dataFile, e);
    }
    try {
      saveMD5File(dataFile, md5);
    } catch (IOException e) {
      throw new IllegalStateException("Failed to save MD5 " + md5 + " for file " + dataFile, e);
    }
    return md5;
  }

  /**
   * Save the ".md5" file that lists the md5sum of another file.
   * @param dataFile the original file whose md5 was computed
   * @param digest the computed digest
   */
  public static void saveMD5File(File dataFile, MD5Hash digest)
      throws IOException {
    final String digestString = StringUtils.bytes2HexString(digest.getDigest());
    saveMD5File(dataFile, digestString);
  }

  private static void saveMD5File(File dataFile, String digestString)
      throws IOException {
    final String md5Line = digestString + " *" + dataFile.getName() + "\n";
    if (getMatcher(md5Line.trim()) == null) {
      throw new IllegalArgumentException("Invalid md5 string: " + getDoesNotMatchString(digestString));
    }

    final File md5File = getDigestFileForFile(dataFile);
    try (AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File)) {
      afos.write(md5Line.getBytes(StandardCharsets.UTF_8));
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("Saved MD5 " + digestString + " to " + md5File);
    }
  }

  /**
   * @return a reference to the file with .md5 suffix that will
   * contain the md5 checksum for the given data file.
   */
  public static File getDigestFileForFile(File file) {
    return new File(file.getParentFile(), file.getName() + MD5_SUFFIX);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy