gate.util.Files Maven / Gradle / Ivy
/*
* Files.java
*
* Copyright (c) 1995-2012, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 2, June 1991 (in the distribution as file licence.html,
* and also available at http://gate.ac.uk/gate/licence.html).
*
* $Id: Files.java 17600 2014-03-08 18:47:11Z markagreenwood $
*/
package gate.util;
import gate.Gate;
import gate.corpora.DocumentXmlUtils;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
/** Some utilities for use with Files and with resources.
*
* Note that there is a terminology conflict between the use
* of "resources" here and gate.Resource and its inheritors.
*
* Java "resources" are files that live on the CLASSPATH or in a Jar
* file that are not .class files. For example: a
* .gif file that is used by a GUI, or one of the XML files
* used for testing GATE's document format facilities. This class
* allows you to access these files in various ways (as streams, as
* byte arrays, etc.).
*
* GATE resources are components (Java Beans) that provide all of the
* natural language processing capabilities of a GATE-based system, and
* the language data that such systems analsyse and produce. For
* example: parsers, lexicons, generators, corpora.
*
* Where we say "resource" in this class we mean Java resource; elsewhere
* in the system we almost always mean GATE resource.
*/
public class Files {
/** Debug flag */
private static final boolean DEBUG = false;
/** Used to generate temporary resources names*/
static long resourceIndex = 0;
/**Where on the classpath the gate resources are to be found*/
protected static final String resourcePath = "/gate/resources";
/**Gets the path for the gate resources within the classpath*/
public static String getResourcePath(){
return resourcePath;
}
/** It returns the last component in a file path.
* It takes E.g: d:/tmp/file.txt and returns file.txt
*/
public static String getLastPathComponent(String path){
if(path == null || path.length() == 0) return "";
//we should look both for "/" and "\" as on windows the file separator is"\"
//but a path coming from an URL will be separated by "/"
int index = path.lastIndexOf('/');
if(index == -1) index = path.lastIndexOf('\\');
if(index == -1) return path;
else return path.substring(index + 1);
}// getLastPathComponent()
/** Get a string representing the contents of a text file. */
public static String getString(String fileName) throws IOException {
return getString(new File(fileName));
} // getString(fileName)
/** Get a string representing the contents of a text file. */
public static String getString(File textFile) throws IOException {
FileInputStream fis = new FileInputStream(textFile);
int len = (int) textFile.length();
byte[] textBytes = new byte[len];
fis.read(textBytes, 0, len);
fis.close();
return new String(textBytes);
} // getString(File)
/** Get a byte array representing the contents of a binary file. */
public static byte[] getByteArray(File binaryFile) throws IOException {
FileInputStream fis = new FileInputStream(binaryFile);
int len = (int) binaryFile.length();
byte[] bytes = new byte[len];
fis.read(bytes, 0, len);
fis.close();
return bytes;
} // getByteArray(File)
/** Get a resource from the GATE ClassLoader as a String.
* @param resourceName The resource to input.
*/
public static String getResourceAsString(String resourceName)
throws IOException {
return getResourceAsString(resourceName, null);
}
/** Get a resource from the GATE ClassLoader as a String.
* @param encoding The encoding of the reader used to input the file
* (may be null in which case the default encoding is used).
* @param resourceName The resource to input.
*/
public static String
getResourceAsString(String resourceName, String encoding)
throws IOException
{
InputStream resourceStream = getResourceAsStream(resourceName);
if(resourceStream == null) return null;
BufferedReader resourceReader;
if(encoding == null) {
resourceReader = new BomStrippingInputStreamReader(resourceStream);
} else {
resourceReader = new BomStrippingInputStreamReader(resourceStream, encoding);
}
StringBuffer resourceBuffer = new StringBuffer();
int i;
int charsRead = 0;
final int size = 1024;
char[] charArray = new char[size];
while( (charsRead = resourceReader.read(charArray,0,size)) != -1 )
resourceBuffer.append (charArray,0,charsRead);
while( (i = resourceReader.read()) != -1 )
resourceBuffer.append((char) i);
resourceReader.close();
return resourceBuffer.toString();
} // getResourceAsString(String)
/** Get a resource from the GATE resources directory as a String.
* The resource name should be relative to resourcePath
which
* is equal with gate/resources; e.g.
* for a resource stored as gate/resources/jape/Test11.jape,
* this method should be passed the name jape/Test11.jape.
*/
public static String getGateResourceAsString(String resourceName)
throws IOException {
InputStream resourceStream = getGateResourceAsStream(resourceName);
if (resourceStream == null)
throw new IOException("No such resource on classpath: " + resourceName);
try {
return IOUtils.toString(resourceStream);
}
finally {
resourceStream.close();
}
} // getGateResourceAsString(String)
/**
* Writes a temporary file into the default temporary directory,
* form an InputStream a unique ID is generated and associated automaticaly
* with the file name...
*/
public static File writeTempFile(InputStream contentStream)
throws IOException {
File resourceFile = null;
FileOutputStream resourceFileOutputStream = null;
try {
// create a temporary file name
resourceFile = File.createTempFile("gateResource", ".tmp");
resourceFileOutputStream = new FileOutputStream(resourceFile);
resourceFile.deleteOnExit();
if(contentStream == null) return resourceFile;
int bytesRead = 0;
final int readSize = 1024;
byte[] bytes = new byte[readSize];
while((bytesRead = contentStream.read(bytes, 0, readSize)) != -1)
resourceFileOutputStream.write(bytes, 0, bytesRead);
} finally {
IOUtils.closeQuietly(resourceFileOutputStream);
IOUtils.closeQuietly(contentStream);
}
return resourceFile;
}// writeTempFile()
/**
* Writes aString into a temporary file located inside
* the default temporary directory defined by JVM, using the specific
* anEncoding.
* An unique ID is generated and associated automaticaly with the file name.
* @param aString the String to be written. If is null then the file will be
* empty.
* @param anEncoding the encoding to be used. If is null then the default
* encoding will be used.
* @return the tmp file containing the string.
*/
public static File writeTempFile(String aString, String anEncoding) throws
UnsupportedEncodingException, IOException{
File resourceFile = null;
OutputStreamWriter writer = null;
// Create a temporary file name
resourceFile = File.createTempFile ("gateResource", ".tmp");
resourceFile.deleteOnExit ();
if (aString == null) return resourceFile;
// Prepare the writer
if (anEncoding == null){
// Use default encoding
writer = new OutputStreamWriter(new FileOutputStream(resourceFile));
}else {
// Use the specified encoding
writer = new OutputStreamWriter(
new FileOutputStream(resourceFile),anEncoding);
}// End if
// This Action is added only when a gate.Document is created.
// So, is for sure that the resource is a gate.Document
writer.write(aString);
writer.flush();
writer.close();
return resourceFile;
}// writeTempFile()
/**
* Writes aString into a temporary file located inside
* the default temporary directory defined by JVM, using the default
* encoding.
* An unique ID is generated and associated automaticaly with the file name.
* @param aString the String to be written. If is null then the file will be
* empty.
* @return the tmp file containing the string.
*/
public static File writeTempFile(String aString) throws IOException{
return writeTempFile(aString,null);
}// writeTempFile()
/** Get a resource from the GATE ClassLoader as a byte array.
*/
public static byte[] getResourceAsByteArray(String resourceName)
throws IOException, IndexOutOfBoundsException, ArrayStoreException {
InputStream resourceInputStream = getResourceAsStream(resourceName);
BufferedInputStream resourceStream =
new BufferedInputStream(resourceInputStream);
byte b;
final int bufSize = 1024;
byte[] buf = new byte[bufSize];
int i = 0;
// get the whole resource into buf (expanding the array as needed)
while( (b = (byte) resourceStream.read()) != -1 ) {
if(i == buf.length) {
byte[] newBuf = new byte[buf.length * 2];
System.arraycopy (buf,0,newBuf,0,i);
buf = newBuf;
}
buf[i++] = b;
}
// close the resource stream
resourceStream.close();
// copy the contents of buf to an array of the correct size
byte[] bytes = new byte[i];
// copy from buf to bytes
System.arraycopy (buf,0,bytes,0,i);
return bytes;
} // getResourceAsByteArray(String)
/** Get a resource from the GATE resources directory as a byte array.
* The resource name should be relative to resourcePath
which
* is equal with gate/resources; e.g.
* for a resource stored as gate/resources/jape/Test11.jape,
* this method should be passed the name jape/Test11.jape.
*/
public static byte[] getGateResourceAsByteArray(String resourceName)
throws IOException, IndexOutOfBoundsException, ArrayStoreException {
InputStream resourceInputStream = getGateResourceAsStream(resourceName);
BufferedInputStream resourceStream =
new BufferedInputStream(resourceInputStream);
byte b;
final int bufSize = 1024;
byte[] buf = new byte[bufSize];
int i = 0;
// get the whole resource into buf (expanding the array as needed)
while( (b = (byte) resourceStream.read()) != -1 ) {
if(i == buf.length) {
byte[] newBuf = new byte[buf.length * 2];
System.arraycopy (buf,0,newBuf,0,i);
buf = newBuf;
}
buf[i++] = b;
}
// close the resource stream
resourceStream.close();
// copy the contents of buf to an array of the correct size
byte[] bytes = new byte[i];
// copy from buf to bytes
System.arraycopy (buf,0,bytes,0,i);
return bytes;
} // getResourceGateAsByteArray(String)
/** Get a resource from the GATE ClassLoader as an InputStream.
*/
public static InputStream getResourceAsStream(String resourceName)
throws IOException {
// Strip any leading '/'
if(resourceName.charAt(0) == '/') {
resourceName = resourceName.substring(1);
}
ClassLoader gcl = Gate.getClassLoader();
if(gcl == null) {
// if the GATE ClassLoader has not been initialised yet (i.e. this
// method was called before Gate.init) then fall back to the current
// classloader
return Files.class.getClassLoader().getResourceAsStream(resourceName);
}
else {
// if we can, get the resource through the GATE ClassLoader to allow
// loading of resources from plugin JARs as well as gate.jar
return gcl.getResourceAsStream(resourceName);
}
//return ClassLoader.getSystemResourceAsStream(resourceName);
} // getResourceAsStream(String)
/** Get a resource from the GATE resources directory as an InputStream.
* The resource name should be relative to resourcePath which
* is equal with gate/resources; e.g.
* for a resource stored as gate/resources/jape/Test11.jape,
* this method should be passed the name jape/Test11.jape.
*/
public static InputStream getGateResourceAsStream(String resourceName)
throws IOException {
if(resourceName.startsWith("/") || resourceName.startsWith("\\") )
return getResourceAsStream(resourcePath + resourceName);
else return getResourceAsStream(resourcePath + "/" + resourceName);
} // getResourceAsStream(String)
/**
* Get a resource from the GATE ClassLoader. The return value is a
* {@link java.net.URL} that can be used to retrieve the contents of the
* resource.
*/
public static URL getResource(String resourceName) {
// Strip any leading '/'
if(resourceName.charAt(0) == '/') {
resourceName = resourceName.substring(1);
}
ClassLoader gcl = Gate.getClassLoader();
if(gcl == null) {
// if the GATE ClassLoader has not been initialised yet (i.e. this
// method was called before Gate.init) then fall back to the current
// classloader
return Files.class.getClassLoader().getResource(resourceName);
}
else {
// if we can, get the resource through the GATE ClassLoader to allow
// loading of resources from plugin JARs as well as gate.jar
return gcl.getResource(resourceName);
}
}
/**
* Get a resource from the GATE resources directory. The return value is a
* {@link java.net.URL} that can be used to retrieve the contents of the
* resource.
* The resource name should be relative to resourcePath which
* is equal with gate/resources; e.g.
* for a resource stored as gate/resources/jape/Test11.jape,
* this method should be passed the name jape/Test11.jape.
*/
public static URL getGateResource(String resourceName) {
if(resourceName.startsWith("/") || resourceName.startsWith("\\") )
return getResource(resourcePath + resourceName);
else return getResource(resourcePath + "/" + resourceName);
}
/**
* This method takes a regular expression and a directory name and returns
* the set of Files that match the pattern under that directory.
*
* @param regex regular expression path that begins with pathFile
* @param pathFile directory path where to search for files
* @return set of file paths under pathFile
that matches
* regex
*/
public static Set Find(String regex, String pathFile) {
Set regexfinal = new HashSet();
String[] tab;
File file = null;
//open a file
try {
file = new File(pathFile);
} catch(NullPointerException npe) {
npe.printStackTrace(Err.getPrintWriter());
}
Pattern pattern = Pattern.compile("^"+regex);
if (file.isDirectory()){
tab = file.list();
for (int i=0;i<=tab.length-1;i++){
String finalPath = pathFile+"/"+tab[i];
Matcher matcher = pattern.matcher(finalPath);
if (matcher.matches()){
regexfinal.add(finalPath);
}
}
}
else {
if (file.isFile()){
Matcher matcher = pattern.matcher(pathFile);
if (matcher.matches()){
regexfinal.add(pathFile);
}
}
}
return regexfinal;
} //find
/** Recursively remove a directory even if it contains other files
* or directories. Returns true when the directory and all its
* contents are successfully removed, else false.
*/
public static boolean rmdir(File dir) {
if(dir == null || ! dir.isDirectory()) // only delete directories
return false;
// list all the members of the dir
String[] members = dir.list();
// return value indicating success or failure
boolean succeeded = true;
// for each member, if is dir then recursively delete; if file then delete
for(int i = 0; i newAttrs
) throws IOException {
String line = null;
String nl = Strings.getNl();
StringBuffer newXml = new StringBuffer();
// read the whole source
while( ( line = xml.readLine() ) != null ) {
newXml.append(line);
newXml.append(nl);
}
// find the location of the element
int start = newXml.toString().indexOf("<" + elementName);
if(start == -1) return newXml.toString();
int end = newXml.toString().indexOf(">", start);
if(end == -1) return newXml.toString();
// check if the old element is empty (ends in "/>") or not
boolean isEmpty = false;
if(newXml.toString().charAt(end - 1) == '/') isEmpty = true;
// create the new element string with the new attributes
StringBuffer newElement = new StringBuffer();
newElement.append("<");
newElement.append(elementName);
// add in the new attributes
Iterator> iter = newAttrs.entrySet().iterator();
while(iter.hasNext()) {
Map.Entry entry = iter.next();
String key = entry.getKey();
String value = entry.getValue();
newElement.append(" ");
newElement.append(DocumentXmlUtils.combinedNormalisation(key));
newElement.append("=\"");
newElement.append(DocumentXmlUtils.combinedNormalisation(value));
newElement.append("\"" + nl);
}
// terminate the element
if(isEmpty) newElement.append("/");
newElement.append(">");
// replace the old string
newXml.replace(start, end + 1, newElement.toString());
return newXml.toString();
} // updateXmlElement(Reader...)
/**
* This method updates an XML element in an XML file
* with a new set of attributes. If the element is not found the XML
* file is unchanged. The attributes keys and values must all be Strings.
* We first try to read the file using UTF-8 encoding. If an error occurs we
* fall back to the platform default encoding (for backwards-compatibility
* reasons) and try again. The file is written back in UTF-8, with an
* updated encoding declaration.
*
* @param xmlFile An XML file.
* @param elementName The name of the element to update.
* @param newAttrs The new attributes to place on the element.
* @return A string of the whole XML file, with the element updated (the
* file is also overwritten).
*/
public static String updateXmlElement(
File xmlFile, String elementName, Map newAttrs
) throws IOException {
String newXml = null;
BufferedReader utfFileReader = null;
BufferedReader platformFileReader = null;
Charset utfCharset = Charset.forName("UTF-8");
try {
FileInputStream fis = new FileInputStream(xmlFile);
// try reading with UTF-8, make sure any errors throw an exception
CharsetDecoder decoder = utfCharset.newDecoder()
.onUnmappableCharacter(CodingErrorAction.REPORT)
.onMalformedInput(CodingErrorAction.REPORT);
utfFileReader = new BomStrippingInputStreamReader(fis, decoder);
newXml = updateXmlElement(utfFileReader, elementName, newAttrs);
}
catch(CharacterCodingException cce) {
// File not readable as UTF-8, so try the platform default encoding
if(utfFileReader != null) {
utfFileReader.close();
utfFileReader = null;
}
if(DEBUG) {
Err.prln("updateXmlElement: could not read " + xmlFile + " as UTF-8, "
+ "trying platform default");
}
platformFileReader = new BufferedReader(new FileReader(xmlFile));
newXml = updateXmlElement(platformFileReader, elementName, newAttrs);
}
finally {
if(utfFileReader != null) {
utfFileReader.close();
}
if(platformFileReader != null) {
platformFileReader.close();
}
}
// write the updated file in UTF-8, fixing the encoding declaration
newXml = newXml.replaceFirst(
"\\A<\\?xml (.*)encoding=(?:\"[^\"]*\"|'[^']*')",
"java.io.File
. First tries to parse
* the URL's toExternalForm as a URI and create the File object from that
* URI. If this fails, just uses the path part of the URL. This handles
* URLs that contain spaces or other unusual characters, both as literals and
* when encoded as (e.g.) %20.
*
* @exception IllegalArgumentException if the URL is not convertable into a
* File.
*/
public static File fileFromURL(URL theURL) throws IllegalArgumentException {
try {
URI uri = new URI(theURL.toExternalForm());
return new File(uri);
}
catch(URISyntaxException use) {
try {
URI uri = new URI(theURL.getProtocol(), null, theURL.getPath(), null, null);
return new File(uri);
}
catch(URISyntaxException use2) {
throw new IllegalArgumentException("Cannot convert " + theURL + " to a file path");
}
}
}
/**
* Same as {@link java.io.File#listFiles(java.io.FileFilter)}
* but recursive on directories.
* @param directory file path to start the search, will not be include
* in the results
* @param filter filter apply to the search
* @return an array of files (including directories) contained inside
* directory
. The array will be empty if the directory is
* empty. Returns null if this abstract pathname does not denote a
* directory, or if an I/O error occurs.
*/
public static File[] listFilesRecursively(File directory, FileFilter filter) {
List filesList = new ArrayList();
File[] filesRootArray = directory.listFiles(filter);
if (filesRootArray == null) { return null; }
for (File file : filesRootArray) {
filesList.add(file);
if (file.isDirectory()) {
File[] filesDeepArray = listFilesRecursively(file, filter);
if (filesDeepArray == null) { return null; }
filesList.addAll(Arrays.asList(filesDeepArray));
}
}
return filesList.toArray(new File[filesList.size()]);
}
} // class Files