All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gate.util.Files Maven / Gradle / Ivy

/*
 *  Files.java
 *
 *  Copyright (c) 1995-2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  $Id: Files.java 17600 2014-03-08 18:47:11Z markagreenwood $
 */

package gate.util;

import gate.Gate;
import gate.corpora.DocumentXmlUtils;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.IOUtils;


/** Some utilities for use with Files and with resources.
  * 

* Note that there is a terminology conflict between the use * of "resources" here and gate.Resource and its inheritors. *

* Java "resources" are files that live on the CLASSPATH or in a Jar * file that are not .class files. For example: a * .gif file that is used by a GUI, or one of the XML files * used for testing GATE's document format facilities. This class * allows you to access these files in various ways (as streams, as * byte arrays, etc.). *

* GATE resources are components (Java Beans) that provide all of the * natural language processing capabilities of a GATE-based system, and * the language data that such systems analsyse and produce. For * example: parsers, lexicons, generators, corpora. *

* Where we say "resource" in this class we mean Java resource; elsewhere * in the system we almost always mean GATE resource. */ public class Files { /** Debug flag */ private static final boolean DEBUG = false; /** Used to generate temporary resources names*/ static long resourceIndex = 0; /**Where on the classpath the gate resources are to be found*/ protected static final String resourcePath = "/gate/resources"; /**Gets the path for the gate resources within the classpath*/ public static String getResourcePath(){ return resourcePath; } /** It returns the last component in a file path. * It takes E.g: d:/tmp/file.txt and returns file.txt */ public static String getLastPathComponent(String path){ if(path == null || path.length() == 0) return ""; //we should look both for "/" and "\" as on windows the file separator is"\" //but a path coming from an URL will be separated by "/" int index = path.lastIndexOf('/'); if(index == -1) index = path.lastIndexOf('\\'); if(index == -1) return path; else return path.substring(index + 1); }// getLastPathComponent() /** Get a string representing the contents of a text file. */ public static String getString(String fileName) throws IOException { return getString(new File(fileName)); } // getString(fileName) /** Get a string representing the contents of a text file. */ public static String getString(File textFile) throws IOException { FileInputStream fis = new FileInputStream(textFile); int len = (int) textFile.length(); byte[] textBytes = new byte[len]; fis.read(textBytes, 0, len); fis.close(); return new String(textBytes); } // getString(File) /** Get a byte array representing the contents of a binary file. */ public static byte[] getByteArray(File binaryFile) throws IOException { FileInputStream fis = new FileInputStream(binaryFile); int len = (int) binaryFile.length(); byte[] bytes = new byte[len]; fis.read(bytes, 0, len); fis.close(); return bytes; } // getByteArray(File) /** Get a resource from the GATE ClassLoader as a String. * @param resourceName The resource to input. */ public static String getResourceAsString(String resourceName) throws IOException { return getResourceAsString(resourceName, null); } /** Get a resource from the GATE ClassLoader as a String. * @param encoding The encoding of the reader used to input the file * (may be null in which case the default encoding is used). * @param resourceName The resource to input. */ public static String getResourceAsString(String resourceName, String encoding) throws IOException { InputStream resourceStream = getResourceAsStream(resourceName); if(resourceStream == null) return null; BufferedReader resourceReader; if(encoding == null) { resourceReader = new BomStrippingInputStreamReader(resourceStream); } else { resourceReader = new BomStrippingInputStreamReader(resourceStream, encoding); } StringBuffer resourceBuffer = new StringBuffer(); int i; int charsRead = 0; final int size = 1024; char[] charArray = new char[size]; while( (charsRead = resourceReader.read(charArray,0,size)) != -1 ) resourceBuffer.append (charArray,0,charsRead); while( (i = resourceReader.read()) != -1 ) resourceBuffer.append((char) i); resourceReader.close(); return resourceBuffer.toString(); } // getResourceAsString(String) /** Get a resource from the GATE resources directory as a String. * The resource name should be relative to resourcePath which * is equal with gate/resources; e.g. * for a resource stored as gate/resources/jape/Test11.jape, * this method should be passed the name jape/Test11.jape. */ public static String getGateResourceAsString(String resourceName) throws IOException { InputStream resourceStream = getGateResourceAsStream(resourceName); if (resourceStream == null) throw new IOException("No such resource on classpath: " + resourceName); try { return IOUtils.toString(resourceStream); } finally { resourceStream.close(); } } // getGateResourceAsString(String) /** * Writes a temporary file into the default temporary directory, * form an InputStream a unique ID is generated and associated automaticaly * with the file name... */ public static File writeTempFile(InputStream contentStream) throws IOException { File resourceFile = null; FileOutputStream resourceFileOutputStream = null; try { // create a temporary file name resourceFile = File.createTempFile("gateResource", ".tmp"); resourceFileOutputStream = new FileOutputStream(resourceFile); resourceFile.deleteOnExit(); if(contentStream == null) return resourceFile; int bytesRead = 0; final int readSize = 1024; byte[] bytes = new byte[readSize]; while((bytesRead = contentStream.read(bytes, 0, readSize)) != -1) resourceFileOutputStream.write(bytes, 0, bytesRead); } finally { IOUtils.closeQuietly(resourceFileOutputStream); IOUtils.closeQuietly(contentStream); } return resourceFile; }// writeTempFile() /** * Writes aString into a temporary file located inside * the default temporary directory defined by JVM, using the specific * anEncoding. * An unique ID is generated and associated automaticaly with the file name. * @param aString the String to be written. If is null then the file will be * empty. * @param anEncoding the encoding to be used. If is null then the default * encoding will be used. * @return the tmp file containing the string. */ public static File writeTempFile(String aString, String anEncoding) throws UnsupportedEncodingException, IOException{ File resourceFile = null; OutputStreamWriter writer = null; // Create a temporary file name resourceFile = File.createTempFile ("gateResource", ".tmp"); resourceFile.deleteOnExit (); if (aString == null) return resourceFile; // Prepare the writer if (anEncoding == null){ // Use default encoding writer = new OutputStreamWriter(new FileOutputStream(resourceFile)); }else { // Use the specified encoding writer = new OutputStreamWriter( new FileOutputStream(resourceFile),anEncoding); }// End if // This Action is added only when a gate.Document is created. // So, is for sure that the resource is a gate.Document writer.write(aString); writer.flush(); writer.close(); return resourceFile; }// writeTempFile() /** * Writes aString into a temporary file located inside * the default temporary directory defined by JVM, using the default * encoding. * An unique ID is generated and associated automaticaly with the file name. * @param aString the String to be written. If is null then the file will be * empty. * @return the tmp file containing the string. */ public static File writeTempFile(String aString) throws IOException{ return writeTempFile(aString,null); }// writeTempFile() /** Get a resource from the GATE ClassLoader as a byte array. */ public static byte[] getResourceAsByteArray(String resourceName) throws IOException, IndexOutOfBoundsException, ArrayStoreException { InputStream resourceInputStream = getResourceAsStream(resourceName); BufferedInputStream resourceStream = new BufferedInputStream(resourceInputStream); byte b; final int bufSize = 1024; byte[] buf = new byte[bufSize]; int i = 0; // get the whole resource into buf (expanding the array as needed) while( (b = (byte) resourceStream.read()) != -1 ) { if(i == buf.length) { byte[] newBuf = new byte[buf.length * 2]; System.arraycopy (buf,0,newBuf,0,i); buf = newBuf; } buf[i++] = b; } // close the resource stream resourceStream.close(); // copy the contents of buf to an array of the correct size byte[] bytes = new byte[i]; // copy from buf to bytes System.arraycopy (buf,0,bytes,0,i); return bytes; } // getResourceAsByteArray(String) /** Get a resource from the GATE resources directory as a byte array. * The resource name should be relative to resourcePath which * is equal with gate/resources; e.g. * for a resource stored as gate/resources/jape/Test11.jape, * this method should be passed the name jape/Test11.jape. */ public static byte[] getGateResourceAsByteArray(String resourceName) throws IOException, IndexOutOfBoundsException, ArrayStoreException { InputStream resourceInputStream = getGateResourceAsStream(resourceName); BufferedInputStream resourceStream = new BufferedInputStream(resourceInputStream); byte b; final int bufSize = 1024; byte[] buf = new byte[bufSize]; int i = 0; // get the whole resource into buf (expanding the array as needed) while( (b = (byte) resourceStream.read()) != -1 ) { if(i == buf.length) { byte[] newBuf = new byte[buf.length * 2]; System.arraycopy (buf,0,newBuf,0,i); buf = newBuf; } buf[i++] = b; } // close the resource stream resourceStream.close(); // copy the contents of buf to an array of the correct size byte[] bytes = new byte[i]; // copy from buf to bytes System.arraycopy (buf,0,bytes,0,i); return bytes; } // getResourceGateAsByteArray(String) /** Get a resource from the GATE ClassLoader as an InputStream. */ public static InputStream getResourceAsStream(String resourceName) throws IOException { // Strip any leading '/' if(resourceName.charAt(0) == '/') { resourceName = resourceName.substring(1); } ClassLoader gcl = Gate.getClassLoader(); if(gcl == null) { // if the GATE ClassLoader has not been initialised yet (i.e. this // method was called before Gate.init) then fall back to the current // classloader return Files.class.getClassLoader().getResourceAsStream(resourceName); } else { // if we can, get the resource through the GATE ClassLoader to allow // loading of resources from plugin JARs as well as gate.jar return gcl.getResourceAsStream(resourceName); } //return ClassLoader.getSystemResourceAsStream(resourceName); } // getResourceAsStream(String) /** Get a resource from the GATE resources directory as an InputStream. * The resource name should be relative to resourcePath which * is equal with gate/resources; e.g. * for a resource stored as gate/resources/jape/Test11.jape, * this method should be passed the name jape/Test11.jape. */ public static InputStream getGateResourceAsStream(String resourceName) throws IOException { if(resourceName.startsWith("/") || resourceName.startsWith("\\") ) return getResourceAsStream(resourcePath + resourceName); else return getResourceAsStream(resourcePath + "/" + resourceName); } // getResourceAsStream(String) /** * Get a resource from the GATE ClassLoader. The return value is a * {@link java.net.URL} that can be used to retrieve the contents of the * resource. */ public static URL getResource(String resourceName) { // Strip any leading '/' if(resourceName.charAt(0) == '/') { resourceName = resourceName.substring(1); } ClassLoader gcl = Gate.getClassLoader(); if(gcl == null) { // if the GATE ClassLoader has not been initialised yet (i.e. this // method was called before Gate.init) then fall back to the current // classloader return Files.class.getClassLoader().getResource(resourceName); } else { // if we can, get the resource through the GATE ClassLoader to allow // loading of resources from plugin JARs as well as gate.jar return gcl.getResource(resourceName); } } /** * Get a resource from the GATE resources directory. The return value is a * {@link java.net.URL} that can be used to retrieve the contents of the * resource. * The resource name should be relative to resourcePath which * is equal with gate/resources; e.g. * for a resource stored as gate/resources/jape/Test11.jape, * this method should be passed the name jape/Test11.jape. */ public static URL getGateResource(String resourceName) { if(resourceName.startsWith("/") || resourceName.startsWith("\\") ) return getResource(resourcePath + resourceName); else return getResource(resourcePath + "/" + resourceName); } /** * This method takes a regular expression and a directory name and returns * the set of Files that match the pattern under that directory. * * @param regex regular expression path that begins with pathFile * @param pathFile directory path where to search for files * @return set of file paths under pathFile that matches * regex */ public static Set Find(String regex, String pathFile) { Set regexfinal = new HashSet(); String[] tab; File file = null; //open a file try { file = new File(pathFile); } catch(NullPointerException npe) { npe.printStackTrace(Err.getPrintWriter()); } Pattern pattern = Pattern.compile("^"+regex); if (file.isDirectory()){ tab = file.list(); for (int i=0;i<=tab.length-1;i++){ String finalPath = pathFile+"/"+tab[i]; Matcher matcher = pattern.matcher(finalPath); if (matcher.matches()){ regexfinal.add(finalPath); } } } else { if (file.isFile()){ Matcher matcher = pattern.matcher(pathFile); if (matcher.matches()){ regexfinal.add(pathFile); } } } return regexfinal; } //find /** Recursively remove a directory even if it contains other files * or directories. Returns true when the directory and all its * contents are successfully removed, else false. */ public static boolean rmdir(File dir) { if(dir == null || ! dir.isDirectory()) // only delete directories return false; // list all the members of the dir String[] members = dir.list(); // return value indicating success or failure boolean succeeded = true; // for each member, if is dir then recursively delete; if file then delete for(int i = 0; i newAttrs ) throws IOException { String line = null; String nl = Strings.getNl(); StringBuffer newXml = new StringBuffer(); // read the whole source while( ( line = xml.readLine() ) != null ) { newXml.append(line); newXml.append(nl); } // find the location of the element int start = newXml.toString().indexOf("<" + elementName); if(start == -1) return newXml.toString(); int end = newXml.toString().indexOf(">", start); if(end == -1) return newXml.toString(); // check if the old element is empty (ends in "/>") or not boolean isEmpty = false; if(newXml.toString().charAt(end - 1) == '/') isEmpty = true; // create the new element string with the new attributes StringBuffer newElement = new StringBuffer(); newElement.append("<"); newElement.append(elementName); // add in the new attributes Iterator> iter = newAttrs.entrySet().iterator(); while(iter.hasNext()) { Map.Entry entry = iter.next(); String key = entry.getKey(); String value = entry.getValue(); newElement.append(" "); newElement.append(DocumentXmlUtils.combinedNormalisation(key)); newElement.append("=\""); newElement.append(DocumentXmlUtils.combinedNormalisation(value)); newElement.append("\"" + nl); } // terminate the element if(isEmpty) newElement.append("/"); newElement.append(">"); // replace the old string newXml.replace(start, end + 1, newElement.toString()); return newXml.toString(); } // updateXmlElement(Reader...) /** * This method updates an XML element in an XML file * with a new set of attributes. If the element is not found the XML * file is unchanged. The attributes keys and values must all be Strings. * We first try to read the file using UTF-8 encoding. If an error occurs we * fall back to the platform default encoding (for backwards-compatibility * reasons) and try again. The file is written back in UTF-8, with an * updated encoding declaration. * * @param xmlFile An XML file. * @param elementName The name of the element to update. * @param newAttrs The new attributes to place on the element. * @return A string of the whole XML file, with the element updated (the * file is also overwritten). */ public static String updateXmlElement( File xmlFile, String elementName, Map newAttrs ) throws IOException { String newXml = null; BufferedReader utfFileReader = null; BufferedReader platformFileReader = null; Charset utfCharset = Charset.forName("UTF-8"); try { FileInputStream fis = new FileInputStream(xmlFile); // try reading with UTF-8, make sure any errors throw an exception CharsetDecoder decoder = utfCharset.newDecoder() .onUnmappableCharacter(CodingErrorAction.REPORT) .onMalformedInput(CodingErrorAction.REPORT); utfFileReader = new BomStrippingInputStreamReader(fis, decoder); newXml = updateXmlElement(utfFileReader, elementName, newAttrs); } catch(CharacterCodingException cce) { // File not readable as UTF-8, so try the platform default encoding if(utfFileReader != null) { utfFileReader.close(); utfFileReader = null; } if(DEBUG) { Err.prln("updateXmlElement: could not read " + xmlFile + " as UTF-8, " + "trying platform default"); } platformFileReader = new BufferedReader(new FileReader(xmlFile)); newXml = updateXmlElement(platformFileReader, elementName, newAttrs); } finally { if(utfFileReader != null) { utfFileReader.close(); } if(platformFileReader != null) { platformFileReader.close(); } } // write the updated file in UTF-8, fixing the encoding declaration newXml = newXml.replaceFirst( "\\A<\\?xml (.*)encoding=(?:\"[^\"]*\"|'[^']*')", "java.io.File. First tries to parse * the URL's toExternalForm as a URI and create the File object from that * URI. If this fails, just uses the path part of the URL. This handles * URLs that contain spaces or other unusual characters, both as literals and * when encoded as (e.g.) %20. * * @exception IllegalArgumentException if the URL is not convertable into a * File. */ public static File fileFromURL(URL theURL) throws IllegalArgumentException { try { URI uri = new URI(theURL.toExternalForm()); return new File(uri); } catch(URISyntaxException use) { try { URI uri = new URI(theURL.getProtocol(), null, theURL.getPath(), null, null); return new File(uri); } catch(URISyntaxException use2) { throw new IllegalArgumentException("Cannot convert " + theURL + " to a file path"); } } } /** * Same as {@link java.io.File#listFiles(java.io.FileFilter)} * but recursive on directories. * @param directory file path to start the search, will not be include * in the results * @param filter filter apply to the search * @return an array of files (including directories) contained inside * directory. The array will be empty if the directory is * empty. Returns null if this abstract pathname does not denote a * directory, or if an I/O error occurs. */ public static File[] listFilesRecursively(File directory, FileFilter filter) { List filesList = new ArrayList(); File[] filesRootArray = directory.listFiles(filter); if (filesRootArray == null) { return null; } for (File file : filesRootArray) { filesList.add(file); if (file.isDirectory()) { File[] filesDeepArray = listFilesRecursively(file, filter); if (filesDeepArray == null) { return null; } filesList.addAll(Arrays.asList(filesDeepArray)); } } return filesList.toArray(new File[filesList.size()]); } } // class Files





© 2015 - 2024 Weber Informatics LLC | Privacy Policy