All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.atlas.io.IO Maven / Gradle / Ivy

There is a newer version: 5.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.atlas.io;

import java.io.*;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.jena.atlas.RuntimeIOException;
import org.apache.jena.atlas.lib.IRILib;
import org.apache.jena.atlas.lib.StrUtils;

public class IO
{
    public static final int EOF = -1;
    public static final int UNSET = -2;

    /** Open an input stream to a file.
     * 

* If the filename is null or "-", return System.in * If the filename ends in .gz, wrap in GZIPInputStream *

* Throws {@link RuntimeIOException} on failure to open. */ static public InputStream openFile(String filename) { try { return openFileEx(filename); } catch (IOException ex) { IO.exception(ex); return null; } } /** * Open an input stream to a file and buffer it. If the filename is null or "-", * return System.in If the filename ends in .gz, wrap in GZIPInputStream. * If using this {@code InputStream} with an {@code InputStreamReader} * (e.g. to get UTF-8), there is no need to buffer the {@code InputStream}. * Instead, buffer the {@code Reader}. *

* Throws {@link RuntimeIOException} on failure to open. */ static public InputStream openFileBuffered(String filename) { InputStream in = openFile(filename); return ensureBuffered(in); } private static final String ext_gz = "gz"; private static final String ext_bz2 = "bz2"; private static final String ext_sz = "sz"; /** Open an input stream to a file; do not mask IOExceptions. * If the filename is null or "-", return System.in * If the filename ends in .gz, wrap in GZIPInputStream * If the filename ends in .bz2, wrap in BZip2CompressorInputStream * @param filename * @throws FileNotFoundException * @throws IOException */ static public InputStream openFileEx(String filename) throws IOException, FileNotFoundException { if ( filename == null || filename.equals("-") ) return System.in; if ( filename.startsWith("file:") ) { filename = filename.substring("file:".length()); filename = IRILib.decodeHex(filename); } InputStream in = new FileInputStream(filename); String ext = getExtension(filename); // Input is a file stream. // https://commons.apache.org/proper/commons-compress/examples.html#Buffering : // """ // The stream classes all wrap around streams provided by the calling // code and they work on them directly without any additional // buffering. On the other hand most of them will benefit from // buffering so it is highly recommended that users wrap their stream // in Buffered(In|Out)putStreams before using the Commons Compress // API. // """ // GZip and Snappy have internal buffering. // BZip2 does not. switch ( ext ) { case "": return in; case ext_gz: // Makes a small improvement (<5%) to use 8K. return new GZIPInputStream(in, 8*1024); case ext_bz2: // Make a huge improvement. x10 faster. in = IO.ensureBuffered(in); return new BZip2CompressorInputStream(in, true); case ext_sz: return new SnappyCompressorInputStream(in); } return in; } // ---- Extracted from Apache CommonsIO : FilenameUtils (2.8.0) because of the drive letter handling. private static final int NOT_FOUND = -1; private static final String EMPTY_STRING = ""; private static final String EXTENSION_SEPARATOR = "."; private static final char UNIX_SEPARATOR = '/'; private static final char WINDOWS_SEPARATOR = '\\'; private static int indexOfLastSeparator(final String fileName) { if (fileName == null) { return NOT_FOUND; } final int lastUnixPos = fileName.lastIndexOf(UNIX_SEPARATOR); final int lastWindowsPos = fileName.lastIndexOf(WINDOWS_SEPARATOR); return Math.max(lastUnixPos, lastWindowsPos); } private static int indexOfExtension(final String fileName) throws IllegalArgumentException { if (fileName == null) { return NOT_FOUND; } // if (isSystemWindows()) { // // Special handling for NTFS ADS: Don't accept colon in the fileName. // final int offset = fileName.indexOf(':', getAdsCriticalOffset(fileName)); // if (offset != -1) { // throw new IllegalArgumentException("NTFS ADS separator (':') in file name is forbidden."); // } // } final int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); final int lastSeparator = indexOfLastSeparator(fileName); return lastSeparator > extensionPos ? NOT_FOUND : extensionPos; } private static String getExtension(final String fileName) { if (fileName == null) { return null; } final int index = indexOfExtension(fileName); if (index == -1) { return ""; } return fileName.substring(index + 1); } // ---- Apache CommonsIO : FilenameUtils /** * The filename without any compression extension, or the original filename. * It tests for compression types handled by {@link #openFileEx}. */ static public String filenameNoCompression(String filename) { String ext = getExtension(filename); switch ( ext ) { case EMPTY_STRING: return filename; case ext_gz: case ext_bz2: case ext_sz: // +1 for the "." return filename.substring(0, filename.length()-(ext.length()+1)); } return filename; } /** Open a UTF8 Reader for a file. * If the filename is null or "-", use System.in * If the filename ends in .gz, use GZIPInputStream */ static public Reader openFileUTF8(String filename) { return openFileReader(filename, StandardCharsets.UTF_8); } /** Open an ASCII Reader for a file. * If the filename is null or "-", use System.in * If the filename ends in .gz, use GZIPInputStream */ static public Reader openFileASCII(String filename) { return openFileReader(filename, StandardCharsets.US_ASCII); } private static Reader openFileReader(String filename, Charset charset) { InputStream in = openFile(filename); return new InputStreamReader(in, charset); } /** Create an unbuffered reader that uses UTF-8 encoding */ static public Reader asUTF8(InputStream in) { return new InputStreamReader(in, StandardCharsets.UTF_8); } /** Create a unbuffered reader that uses ASCII encoding */ static public Reader asASCII(InputStream in) { //return new InputStreamReader(in, StandardCharsets.US_ASCII); CharsetDecoder dec = StandardCharsets.US_ASCII.newDecoder(); // Make into an error - the default is REPLACE (insert unicode U+FFFD) dec.onMalformedInput(CodingErrorAction.REPORT); return new InputStreamReader(in, dec); } /** Create an buffered reader that uses UTF-8 encoding */ static public BufferedReader asBufferedUTF8(InputStream in) { // Always buffered - for readLine. return new BufferedReader(asUTF8(in), BUFSIZE_IN / 2); } /** Create a writer that uses UTF-8 encoding */ static public Writer asUTF8(OutputStream out) { return new OutputStreamWriter(out, StandardCharsets.UTF_8); } /** Create a writer that uses ASCII encoding */ static public Writer asASCII(OutputStream out) { return new OutputStreamWriter(out, StandardCharsets.US_ASCII); } /** Create a writer that uses UTF-8 encoding and is buffered. */ static public Writer asBufferedUTF8(OutputStream out) { Writer w = new OutputStreamWriter(out, StandardCharsets.UTF_8); return ensureBuffered(w); } /** * Open a file for output - may include adding gzip processing. *

* Throws {@link RuntimeIOException} on failure to open. */ static public OutputStream openOutputFile(String filename) { try { return openOutputFileEx(filename); } catch (IOException ex) { IO.exception(ex); return null; } } /** Open an input stream to a file; do not mask IOExceptions. * If the filename ends in .gz, wrap in GZIPOutputStream * @param filename * @throws FileNotFoundException If the output can't be opened. * @throws IOException for bad gzip encoded data */ static public OutputStream openOutputFileEx(String filename) throws FileNotFoundException,IOException { if ( filename == null || filename.equals("-") ) return System.out; if ( filename.startsWith("file:") ) { filename = filename.substring("file:".length()); filename = IRILib.decodeHex(filename); } OutputStream out = new FileOutputStream(filename); String ext = getExtension(filename); switch ( ext ) { case "": return out; case "gz": return new GZIPOutputStream(out); case "bz2": return new BZip2CompressorOutputStream(out); case "sz": throw new UnsupportedOperationException("Snappy output"); } return out; } /** Wrap in a general writer interface */ static public AWriter wrap(Writer w) { return Writer2.wrap(w); } /** Wrap in a general writer interface */ static public AWriter wrapUTF8(OutputStream out) { return wrap(asUTF8(out)); } /** Wrap in a general writer interface */ static public AWriter wrapASCII(OutputStream out) { return wrap(asASCII(out)); } /** Create a print writer that uses UTF-8 encoding */ static public PrintWriter asPrintWriterUTF8(OutputStream out) { return new PrintWriter(asUTF8(out)); } public static boolean isEmptyDirectory(String directory) { Path path = Path.of(directory); try(DirectoryStream dirStream = Files.newDirectoryStream(path)) { return !dirStream.iterator().hasNext(); } catch (NotDirectoryException ex) { return false ; } catch (IOException ex) { IO.exception(ex); return false; } } public static boolean exists(String fsname) { Path path = Path.of(fsname); return Files.exists(path); } public static boolean isDirectory(String directory) { Path path = Path.of(directory); return Files.isDirectory(path); } public static void close(org.apache.jena.atlas.lib.Closeable resource) { resource.close(); } public static void closeSilent(org.apache.jena.atlas.lib.Closeable resource) { try { resource.close(); } catch (Exception ex) { } } public static void close(java.io.Closeable resource) { if ( resource == null ) return; try { resource.close(); } catch (IOException ex) { exception(ex); } } public static void closeSilent(java.io.Closeable resource) { if ( resource == null ) return; try { resource.close(); } catch (IOException ex) { } } public static void close(AWriter resource) { if ( resource == null ) return; resource.close(); } public static void closeSilent(AWriter resource) { if ( resource == null ) return; try { resource.close(); } catch (Exception ex) { } } public static void close(IndentedWriter resource) { if ( resource == null ) return; resource.close(); } public static void closeSilent(IndentedWriter resource) { if ( resource == null ) return; try { resource.close(); } catch (Exception ex) { } } /** Throw a RuntimeIOException - this function is guaranteed not to return normally */ public static void exception(String message) { throw new RuntimeIOException(message); } /** Throw a RuntimeIOException - this function is guaranteed not to return normally */ public static void exception(IOException ex) { throw new RuntimeIOException(ex); } /** Throw a RuntimeIOException - this function is guaranteed not to return normally */ public static void exception(String msg, IOException ex) { throw new RuntimeIOException(msg, ex); } public static void flush(OutputStream out) { if ( out == null ) return; try { out.flush(); } catch (IOException ex) { exception(ex); } } public static void flush(Writer out) { if ( out == null ) return; try { out.flush(); } catch (IOException ex) { exception(ex); } } public static void flush(AWriter out) { if ( out == null ) return; out.flush(); } private static final int BUFSIZE_IN = 128*1024 ; private static final int BUFSIZE_OUT = 128*1024; ; private static final int WHOLE_FILE_BUFFER_SIZE = 32*1024; public static InputStream ensureBuffered(InputStream input) { if ( input instanceof BufferedInputStream ) return input; if ( input instanceof ByteArrayInputStream ) return input; return new BufferedInputStream(input, BUFSIZE_IN); } public static Reader ensureBuffered(Reader input) { if ( input instanceof BufferedReader ) return input; if ( input instanceof StringReader ) return input; return new BufferedReader(input, BUFSIZE_IN / 2); } public static OutputStream ensureBuffered(OutputStream output) { if ( output instanceof BufferedOutputStream ) return output; if ( output instanceof ByteArrayOutputStream ) return output; return new BufferedOutputStream(output, BUFSIZE_OUT); } public static Writer ensureBuffered(Writer output) { if ( output instanceof BufferedWriter ) return output; if ( output instanceof StringWriter ) return output; return new BufferedWriter(output, BUFSIZE_OUT / 2); } public static byte[] readWholeFile(InputStream in) { try(ByteArrayOutputStream out = new ByteArrayOutputStream(WHOLE_FILE_BUFFER_SIZE)) { byte buff[] = new byte[WHOLE_FILE_BUFFER_SIZE]; while (true) { int l = in.read(buff); if ( l <= 0 ) break; out.write(buff, 0, l); } return out.toByteArray(); } catch (IOException ex) { exception(ex); return null; } } /** Read a whole file as UTF-8 * @param filename * @return String */ public static String readWholeFileAsUTF8(String filename) { try ( InputStream in = new FileInputStream(filename) ) { return readWholeFileAsUTF8(in); } catch (IOException ex) { IO.exception(ex); return null; } } /** Read a whole stream as UTF-8 * * @param in InputStream to be read * @return String */ public static String readWholeFileAsUTF8(InputStream in) { // Don't buffer - we're going to read in large chunks anyway try ( Reader r = asUTF8(in) ) { return readWholeFileAsUTF8(r); } catch (IOException ex) { IO.exception(ex); return null; } } /** Fully reads the next up to maxWidth + 1 characters from the stream and returns them as a string. * If the extra character is read then the apprevMarker in appended to the result in its place. * Closing the stream is the caller's responsibility. */ public static String abbreviate(InputStream in, Charset charset, int maxWidth, String abbrevMarker) throws IOException { return abbreviate(new InputStreamReader(in, charset), maxWidth, abbrevMarker); } /** Fully reads the next up to maxWidth + 1 characters from the reader and returns them as a string. * If the extra character is read then the apprevMarker in appended to the result in its place. * Closing the stream is the caller's responsibility. */ public static String abbreviate(Reader reader, int maxWidth, String abbrevMarker) throws IOException { char[] buffer = new char[maxWidth + 1]; int n = IOUtils.read(reader, buffer); StringBuilder sb = new StringBuilder(); sb.append(buffer, 0, Math.min(n, maxWidth)); if (n > maxWidth) { sb.append(abbrevMarker); } return sb.toString(); } /** Read a whole file as UTF-8 * * @param r * @return String The whole file * @throws IOException */ // Private worker as we are trying to force UTF-8. private static String readWholeFileAsUTF8(Reader r) throws IOException { final int WHOLE_FILE_BUFFER_SIZE = 32*1024; try(StringWriter sw = new StringWriter(WHOLE_FILE_BUFFER_SIZE)) { char buff[] = new char[WHOLE_FILE_BUFFER_SIZE]; for (;;) { int l = r.read(buff); if (l < 0) break; sw.write(buff, 0, l); } return sw.toString(); } } /** Write a string to a file as UTF-8. The file is closed after the operation. * @param filename * @param content String to be written * @throws IOException */ public static void writeStringAsUTF8(String filename, String content) throws IOException { try ( OutputStream out = IO.openOutputFileEx(filename) ) { writeStringAsUTF8(out, content); out.flush(); } } /** Write a string into an {@link OutputStream} as UTF-8. * * @param out OutputStream destination. * @param content String to be written * @throws IOException */ public static void writeStringAsUTF8(OutputStream out, String content) throws IOException { Writer w = new OutputStreamWriter(out, StandardCharsets.UTF_8); w.write(content); w.flush(); // Not close. } /** String to ByteBuffer as UTF-8 bytes */ public static ByteBuffer stringToByteBuffer(String str) { byte[] b = StrUtils.asUTF8bytes(str); return ByteBuffer.wrap(b); } /** ByteBuffer to String */ public static String byteBufferToString(ByteBuffer bb) { byte[] b = new byte[bb.remaining()]; bb.get(b); return StrUtils.fromUTF8bytes(b); } public static String uniqueFilename(String directory, String base, String ext) { File d = new File(directory); if ( !d.exists() ) throw new IllegalArgumentException("Not found: " + directory); try { String fn0 = d.getCanonicalPath() + File.separator + base; String fn = fn0; int x = 1; while (true) { if ( ext != null ) fn = fn + "."+ext; File f = new File(fn); if ( ! f.exists() ) return fn; fn = fn0 + "-" + (x++); } } catch (IOException e) { IO.exception(e); return null; } } /** Delete everything from a {@code Path} start point, including the path itself. * This function works on files or directories. * This function does not follow symbolic links. */ public static void deleteAll(Path start) { // Walks down the tree and delete directories on the way backup. try { Files.walkFileTree(start, new SimpleFileVisitor() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { Files.delete(file); return FileVisitResult.CONTINUE; } @Override public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException { if (e == null) { Files.delete(dir); return FileVisitResult.CONTINUE; } else { throw e; } } }); } catch (IOException ex) { IO.exception(ex); return; } } // Do nothing buffer. Never read from this, it may be corrupt because it is shared. private static int SKIP_BUFFER_LEN = 64*1024; private static byte[] SKIP_BUFFER = null; /** Skip to the end of the InputStream, discarding input. */ public static void skipToEnd(InputStream input) { if ( SKIP_BUFFER == null ) // No harm in concurrent assignment. SKIP_BUFFER = new byte[SKIP_BUFFER_LEN]; try { for(;;) { // InputStream.skip does not guarantee to go to end of file. // Actually read it to be sure. long rLen = input.read(SKIP_BUFFER, 0, SKIP_BUFFER_LEN); if (rLen < 0) // EOF break; } } catch (IOException ex) {} } // Do nothing buffer. Never read from this, it may be corrupt because it is shared. private static int SKIP_BUFFER_LEN_R = 16*1024; private static char[] SKIP_BUFFER_R = null; /** Skip to the end of the Reader, discarding input. */ public static void skipToEnd(Reader input) { if ( SKIP_BUFFER_R == null ) // No harm in concurrent assignment. SKIP_BUFFER_R = new char[SKIP_BUFFER_LEN_R]; try { for(;;) { // Skip does not guarantee to go to end of file. long rLen = input.read(SKIP_BUFFER_R, 0, SKIP_BUFFER_LEN_R); if (rLen < 0) // EOF break; } } catch (IOException ex) {} } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy