All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.senzing.io.IOUtilities Maven / Gradle / Ivy

package com.senzing.io;

import java.io.*;
import java.io.File;
import java.nio.charset.Charset;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.attribute.BasicFileAttributes;

import org.mozilla.universalchardet.UniversalDetector;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;

import static java.nio.file.FileVisitResult.*;

/**
 * Static I/O utility functions.
 */
public class IOUtilities {
  /**
   * Constant for the name of the UTF-8 character encoding.
   */
  public static final String UTF_8 = "UTF-8";

  /**
   * Constant for the UTF-8 {@link Charset}.
   */
  public static final Charset UTF_8_CHARSET = Charset.forName(UTF_8);

  /**
   * Private default constructor.
   */
  private IOUtilities() {
    // do nothing
  }

  /**
   * Reads an ASCII line of text from the specified {@link InputStream} until
   * a linefeed character is encountered.
   *
   * @param inputStream The input stream to read from.
   *
   * @return The line that was read or null if EOF was reached.
   *
   * @throws IOException If an I/O exception occurs.
   */
  public static String readAsciiLine(InputStream inputStream)
      throws IOException
  {
    StringBuilder sb = new StringBuilder();
    int readCount = 0;
    for (int readByte = inputStream.read();
         readByte >= 0;
         readByte = inputStream.read())
    {
      readCount++;
      if (readByte == '\n') break;
      sb.append((char) readByte);
    }
    if (readCount == 0) return null;
    return sb.toString().trim();
  }

  /**
   * Closes an {@link AutoCloseable} and traps and ignores any exception.
   *
   * @param closeable The {@link AutoCloseable} to close.
   */
  public static void close(AutoCloseable closeable) {
    try {
      if (closeable != null) closeable.close();
    } catch (Exception ignore) {
      // ignore the exception
    }
  }

  /**
   * Reads the contents of the file as text and returns the {@link String}
   * representing the contents.  The text is expected to be encoded in the
   * specified character encoding.  If the specified character encoding is
   * null then the system default encoding is used.
   *
   * @param file The {@link File} whose contents should be read.
   * @param charEncoding The character encoding for the text in the file.
   * @return The {@link String} representing the contents of the file.
   * @throws IOException If an I/O failure occurs.
   */
  public static String readTextFileAsString(File file, String charEncoding)
    throws IOException
  {
    Charset charset = (charEncoding == null) ? Charset.defaultCharset()
                    : Charset.forName(charEncoding);

    try (FileInputStream fis = new FileInputStream(file);
         InputStreamReader isr = new InputStreamReader(fis, charset);
         Reader reader = bomSkippingReader(isr, charset.name());
         BufferedReader br = new BufferedReader(reader))
    {
      long size = file.length();
      if (size > Integer.MAX_VALUE) size = Integer.MAX_VALUE;

      StringBuilder sb = new StringBuilder((int) size);
      for (int nextChar = br.read(); nextChar >= 0; nextChar = br.read()) {
        if (nextChar == 0) continue;
        sb.append((char) nextChar);
      }
      return sb.toString();
    }
  }

  /**
   * Reads text from the specified {@link Reader} until EOF is reached and
   * returns a {@link String} representing the text that was read.  This
   * function will close the specified {@link Reader} upon completion.
   *
   * @param reader The {@link Reader} whose contents should be read.
   * @return The {@link String} representing the text from the {@link Reader}.
   * @throws IOException If an I/O failure occurs.
   */
  public static String readFully(Reader reader) throws IOException
  {
    try (BufferedReader br = new BufferedReader(reader))
    {
      StringBuilder sb = new StringBuilder();
      for (int nextChar = br.read(); nextChar >= 0; nextChar = br.read()) {
        if (nextChar == 0) continue;
        sb.append((char) nextChar);
      }
      return sb.toString();
    }
  }

  /**
   * Reads data from the specified {@link InputStream} assuming the data
   * represents characters and attempts via several methods to guess the
   * character encoding of those characters.  If no character encoding can
   * be determined with confidence then this returns null.
   *
   * @param is The {@link InputStream} to read from.
   * @return The name of the character encoding that was guessed.
   * @throws IOException If an I/O failure occurs.
   */
  public static String detectCharacterEncoding(InputStream is)
      throws IOException
  {
    UniversalDetector detector = new UniversalDetector(null);
    int readCount = 0;
    int totalReadCount = 0;
    byte[] buffer = new byte[50];
    int maxReadCount = 10*1024*1024;
    boolean allAscii = true;
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    while ((totalReadCount < maxReadCount)
           && !detector.isDone()
           && ((readCount = is.read(buffer)) >= 0))
    {
      totalReadCount += readCount;
      detector.handleData(buffer, 0, readCount);
      baos.write(buffer, 0, readCount);
    }
    detector.dataEnd();
    String encoding = detector.getDetectedCharset();
    //if (encoding != null) return encoding;

    // check if all ascii
    byte[] bytes = baos.toByteArray();
    for (byte b : bytes) {
      if (((int)b) < 0) {
        allAscii = false;
        break;
      }
    }

    CharsetDetector cd = new CharsetDetector();
    cd.setText(bytes);
    CharsetMatch[] matches = cd.detectAll();
    if (matches == null) return null;
    if (matches.length == 0) return null;
    CharsetMatch bestMatch = null;
    for (CharsetMatch match : matches) {
      // get the first match as the best candidate
      if (bestMatch == null) {
        bestMatch = match;

        if (match.getName().toUpperCase().startsWith("UTF")) {
          // if it starts with UTF, then we can have no better match
          break;
        } else {
          // if not UTF, then check if the next one has equal confidence
          continue;
        }
      }
      // check confidence of this one versus the best match
      int c1 = bestMatch.getConfidence();
      int c2 = match.getConfidence();

      // if lower confidence then we are done
      if (c2 < c1) break;

      // check the name if this candidate match
      if (match.getName().toUpperCase().startsWith("UTF")) {
        bestMatch = match;
        break;
      }
    }
    if (bestMatch == null && encoding != null) return encoding;
    if (bestMatch == null && allAscii) return "UTF-8";
    if (bestMatch == null) return null;
    if (bestMatch.getConfidence() < 50 && allAscii) return "UTF-8";
    return bestMatch.getName();
  }

  /**
   * Using the specified character encoding, this method will wraps the
   * specified {@link Reader} in a new {@link Reader} that will skip
   * the "byte order mark" (BOM) character at the beginning of the file for
   * UTF character encodings (e.g.: "UTF-8", "UTF-16" or "UTF-32").  If the
   * specified character encoding is not a "UTF" character encoding then it is
   * simply returned as-is.
   * @param src The source {@link Reader}.
   * @param encoding The character encoding.
   * @return The new {@link Reader} that will skip the byte-order mark.
   * @throws IOException If an I/O failure occurs.
   * @throws NullPointerException If either parameter is null.
   */
  public static Reader bomSkippingReader(Reader src, String encoding)
      throws IOException, NullPointerException
  {
    // check if encoding is null (illegal)
    if (encoding == null) {
      throw new NullPointerException(
          "Cannot skip byte order mark without specifying the encoding.");
    }

    // check if we have an encoding that is NOT a UTF encoding
    if (!encoding.toUpperCase().startsWith("UTF")) {
      // if not UTF encoding then there should not be a BOM to skip
      return src;
    }

    // create a pushback reader and peek at the first character
    PushbackReader result = new PushbackReader(src, 1);
    int first = result.read();

    // check if already at EOF
    if (first == -1) {
      // just return the source stream
      return src;
    }

    // check if we do NOT have a byte order mark
    if (first != 0xFEFF) {
      // push the character back on to the stream so it can be read
      result.unread(first);
    }

    // return the pushback reader
    return result;
  }

  /**
   * Creates the specified directory if it does not exist.
   *
   * @param dir The {@link File} representing the directory.
   *
   * @return true if the directory was created and
   *         false if the directory already existed.
   *
   * @throws IOException If a failure occurs creating the directory or if
   *                     the named directory exists but is not a directory.
   */
  public static boolean createDirectoryIfMissing(File dir) throws IOException {
    if (dir.exists() && dir.isDirectory()) return false;
    if (dir.exists()) {
      throw new IOException(
          "The named directory file exists, but is not a directory: " + dir);
    }
    boolean created = dir.mkdirs();
    if (!created) {
      throw new IOException("Failed to create directory: " + dir);
    }
    return true;
  }

  /**
   * Recursively deletes the specified directory and returns the number of
   * files in the directory that failed to be deleted.
   *
   * @param dir The {@link File} representing the directory.
   *
   * @return The number of files in the directory that could not be deleted.
   *
   * @throws IOException If a serious failure occurs.
   */
  public static int recursiveDeleteDirectory(File dir) throws IOException {
    int[] failedCount = { 0 };

    Files.walkFileTree(
        dir.toPath(), new FileVisitor() {
          public FileVisitResult preVisitDirectory(java.nio.file.Path path, BasicFileAttributes attrs) {
            return CONTINUE;
          }
          public FileVisitResult postVisitDirectory(java.nio.file.Path path, IOException e) {
            if (e != null) return CONTINUE;
            path.toFile().delete();
            return CONTINUE;
          }
          public FileVisitResult visitFile(java.nio.file.Path path, BasicFileAttributes attrs) {
            boolean result = path.toFile().delete();
            if (!result) {
              failedCount[0]++;
            }
            return CONTINUE;
          }
          public FileVisitResult visitFileFailed(java.nio.file.Path path, IOException e) {
            e.printStackTrace();
            return CONTINUE;
          }
        });

    try {
      Files.deleteIfExists(dir.toPath());
    } catch (Exception ignore) {
      ignore.printStackTrace();
    }
    return failedCount[0];
  }

  /**
   * Checks if two (2) files are different.
   *
   * @param file1 The first file.
   * @param file2 the second file.
   * @return true if the files differ, otherwise false
   *
   * @throws IOException If an I/O exception occurs.
   */
  public static boolean checkFilesDiffer(File file1, File file2)
      throws IOException
  {
    return checkFilesDiffer(file1, file2, false);
  }

  /**
   * Checks if two (2) files are different.
   *
   * @param file1 The first file.
   * @param file2 the second file.
   * @param timestampSignificant true if timestamps need to be the
   *                             same, otherwise false
   * @return true if the files differ, otherwise false
   *
   * @throws IOException If an I/O exception occurs.
   */
  public static boolean checkFilesDiffer(File     file1,
                                         File     file2,
                                         boolean  timestampSignificant)
      throws IOException
  {
    if (file1.equals(file2)) return false;
    if (!file1.exists() && !file2.exists()) return false;
    if (!file1.exists() || !file2.exists()) return true;
    if (file1.length() != file2.length()) return true;

    if (timestampSignificant) {
      if (file1.lastModified() != file2.lastModified()) return true;
    }

    try (FileInputStream fis1 = new FileInputStream(file1);
         FileInputStream fis2 = new FileInputStream(file2);
         BufferedInputStream bis1 = new BufferedInputStream(fis1);
         BufferedInputStream bis2 = new BufferedInputStream(fis2))
    {
      int byte1, byte2;
      do {
        byte1 = bis1.read();
        byte2 = bis2.read();
        if (byte1 != byte2) return true;

      } while (byte1 != -1 && byte2 != -1);

      // if we get here then they are identical
      return false;
    }
  }

  /**
   * Creates the specified file if it does not exist, otherwise it updates
   * the modified time for the specified file.
   *
   * @param file The {@link File} to be touched.
   *
   * @return The last modified time of the file in milliseconds.
   *
   * @throws IOException If a failure occurs.
   */
  public static long touchFile(File file) throws IOException {
    if (file.exists()) {
      file.setLastModified(System.currentTimeMillis());
    } else {
      file.createNewFile();
    }
    return file.lastModified();
  }

  /**
   * Wraps the specified {@link InputStream} in one that will not
   * close the underlying {@link InputStream} when {@link InputStream#close()}
   * is called.  This means you must keep a reference to the original input
   * stream so that you can close it when appropriate.
   *
   * @param inputStream The backing input stream to wrap.
   *
   * @return The {@link InputStream} that is backed by the specified {@link
   *         InputStream}, but will not close the backing {@link InputStream}
   *         when closed.
   */
  public static InputStream nonClosingWrapper(InputStream inputStream) {
    return new NonClosingInputStream(inputStream);
  }

  /**
   * Wraps the specified {@link OutputStream} in one that will not
   * close the underlying {@link OutputStream} when {@link OutputStream#close()}
   * is called -- it will instead call {@link OutputStream#flush()}  This means
   * you must keep a reference to the original output stream so that you can
   * close it when appropriate.
   *
   * @param outputStream The backing output stream to wrap.
   *
   * @return The {@link OutputStream} that is backed by the specified {@link
   *         OutputStream}, but will not close the backing {@link OutputStream}
   *         when closed.
   */
  public static OutputStream nonClosingWrapper(OutputStream outputStream) {
    return new NonClosingOutputStream(outputStream);
  }

  /**
   * Wraps the specified {@link Reader} in one that will not
   * close the underlying {@link Reader} when {@link Reader#close()}
   * is called.  This means you must keep a reference to the original reader
   * so that you can close it when appropriate.
   *
   * @param reader The backing reader to wrap.
   *
   * @return The {@link Reader} that is backed by the specified {@link
   *         Reader}, but will not close the backing {@link Reader}
   *         when closed.
   */
  public static Reader nonClosingWrapper(Reader reader) {
    return new NonClosingReader(reader);
  }

  /**
   * Wraps the specified {@link Writer} in one that will not
   * close the underlying {@link Writer} when {@link Writer#close()}
   * is called -- it will instead call {@link Writer#flush()}  This means
   * you must keep a reference to the original output stream so that you can
   * close it when appropriate.
   *
   * @param writer The backing output stream to wrap.
   *
   * @return The {@link Writer} that is backed by the specified {@link
   *         Writer}, but will not close the backing {@link Writer}
   *         when closed.
   */
  public static Writer nonClosingWrapper(Writer writer) {
    return new NonClosingWriter(writer);
  }

  /**
   * Extends {@link FilterInputStream} to prevent closing of the backing stream.
   */
  private static class NonClosingInputStream extends FilterInputStream {
    private NonClosingInputStream(InputStream backingStream) {
      super(backingStream);
    }

    public void close() {
      // do nothing
    }
  }

  /**
   * Extends {@link FilterOutputStream} to prevent closing of the backing
   * stream.
   */
  private static class NonClosingOutputStream extends FilterOutputStream {
    private NonClosingOutputStream(OutputStream backingStream) {
      super(backingStream);
    }

    public void close() {
      try {
        this.out.flush();
      } catch (IOException ignore) {
        // do nothing
      }
    }
  }

  /**
   * Extends {@link FilterReader} to prevent closing of the backing reader.
   */
  private static class NonClosingReader extends FilterReader {
    private NonClosingReader(Reader backingReader) {
      super(backingReader);
    }

    public void close() {
      // do nothing
    }
  }

  /**
   * Extends {@link FilterWriter} to prevent closing of the backing writer.
   */
  private static class NonClosingWriter extends FilterWriter {
    private NonClosingWriter(Writer backingWriter) {
      super(backingWriter);
    }

    public void close() {
      try {
        this.out.flush();
      } catch (IOException ignore) {
        // do nothing
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy