All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.fbk.twm.utils.GenericFileUtils Maven / Gradle / Ivy

The newest version!
package eu.fbk.twm.utils;


import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;

import java.io.*;

import java.util.*;
import java.util.regex.*;


/**
 * Created with IntelliJ IDEA.
 * User: aprosio
 * Date: 2/1/13
 * Time: 4:53 PM
 * To change this template use File | Settings | File Templates.
 */
public class GenericFileUtils {
	/**
	 * Define a static logger variable so that it references the
	 * Logger instance named ExtractorParameters.
	 */
	static Logger logger = Logger.getLogger(GenericFileUtils.class.getName());

	public static String tail(File file) {
		RandomAccessFile fileHandler = null;
		try {
			fileHandler = new RandomAccessFile(file, "r");
			long fileLength = file.length() - 1;
			StringBuilder sb = new StringBuilder();

			for (long filePointer = fileLength; filePointer != -1; filePointer--) {
				fileHandler.seek(filePointer);
				int readByte = fileHandler.readByte();

				if (readByte == 0xA) {
					if (filePointer == fileLength) {
						continue;
					}
					else {
						break;
					}
				}
				else if (readByte == 0xD) {
					if (filePointer == fileLength - 1) {
						continue;
					}
					else {
						break;
					}
				}

				sb.append((char) readByte);
			}

			String lastLine = sb.reverse().toString();
			return lastLine;
		} catch (java.io.FileNotFoundException e) {
			e.printStackTrace();
			return null;
		} catch (java.io.IOException e) {
			e.printStackTrace();
			return null;
		} finally {
			try {
				fileHandler.close();
			} catch (Exception ignored) {

			}
		}
	}

	public static Object loadObjectFromDisk(String fileName) {
		Object ret = null;
		try {
			FileInputStream fiStream = new FileInputStream(new File(fileName));
			ObjectInputStream objectInputStreamFr = new ObjectInputStream(fiStream);
			ret = objectInputStreamFr.readObject();
			objectInputStreamFr.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		return ret;
	}

	public static void saveObjectToDisk(Object o, String fileName) {
		try {
			ByteArrayOutputStream baos = new ByteArrayOutputStream();
			ObjectOutputStream oos = new ObjectOutputStream(baos);
			oos.writeObject(o);
			oos.close();
			FileOutputStream foStream = new FileOutputStream(new File(fileName));
			baos.writeTo(foStream);
			foStream.close();
			baos.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public static String checkWriteableFolder(String dir, boolean clean) {
		if (!dir.endsWith(System.getProperty("file.separator"))) {
			dir += System.getProperty("file.separator");
		}

		File d = new File(dir);
		if (!d.exists()) {
			if (!d.mkdirs()) {
				return null;
			}
		}
		else {
			if (clean) {
				String a2[] = d.list();
				if (a2 != null) {
					for (int j = 0; j < a2.length; j++) {
						String fileName = dir + a2[j];
						File f = new File(fileName);
						f.delete();
					}
				}
			}
		}

		return dir;
	}

	public static ArrayList listFilesInFolder(File folder, boolean recursive) throws IOException {
		ArrayList ret = new ArrayList();

		if (!folder.exists()) {
			throw new IOException("File/folder " + folder + " does not exist");
		}
		if (!folder.isDirectory()) {
			throw new IOException("File/folder " + folder + " is not a directory");
		}

		File[] listOfFiles = folder.listFiles();
		if (listOfFiles == null) {
			return ret;
		}
		Arrays.sort(listOfFiles, Collections.reverseOrder());

		ArrayList files = new ArrayList();
		ArrayList dirs = new ArrayList();

		for (File f : listOfFiles) {
			if (f.isDirectory()) {
				dirs.add(f);
			}
			else {
				files.add(f);
			}
		}

		for (File f : dirs) {
			ret.add(f);
			if (recursive) {
				ret.addAll(listFilesInFolder(f, recursive));
			}
		}
		for (File f : files) {
			ret.add(f);
		}

		return ret;
	}

	public static ArrayList listFilesInFolder(String folder, boolean recursive) throws IOException {
		return listFilesInFolder(new File(folder), recursive);
	}

	public static HashMap searchForFilesInTheSameFolder(String folder, Pattern p1, Pattern... ps) throws IOException {
		Pattern[] patterns = new Pattern[ps.length + 1];
		patterns[0] = p1;
		System.arraycopy(ps, 0, patterns, 1, ps.length);

		ArrayList files = listFilesInFolder(folder, true);
		// System.out.println(files);
		HashMap> found = new HashMap>();
		for (File f : files) {
			String thisFolder = f.getParent();
			if (found.get(thisFolder) == null) {
				found.put(thisFolder, new HashMap());
			}
			String thisFile = f.getName();
			for (int i = 0; i < patterns.length; i++) {
				if (patterns[i].matcher(thisFile).find()) {
					found.get(thisFolder).put(i, f);
					// System.out.println("Found " + thisFile + " in " + thisFolder);
					if (found.get(thisFolder).size() >= patterns.length) {
						// ArrayList ret = new ArrayList();
						HashMap ret = new HashMap();
						for (Integer index : found.get(thisFolder).keySet()) {
							ret.put(patterns[index], found.get(thisFolder).get(index));
							// ret.add(found.get(thisFolder).get(index));
						}
						return ret;
					}
				}
			}
		}

		return null;
	}

	public static Map searchForFilesInTheSameFolder(String folder, String p1, String... ps) throws IOException {
		//logger.debug("searchForFilesInTheSameFolder " + folder);
		Pattern[] patterns = new Pattern[ps.length + 1];
		patterns[0] = Pattern.compile(p1);
		for (int i = 0; i < ps.length; i++) {
			patterns[i + 1] = Pattern.compile(ps[i]);
		}

		//System.arraycopy(ps, 0, patterns, 1, ps.length);
		ArrayList files = listFilesInFolder(folder, true);
		//logger.debug(files);
		HashMap> found = new HashMap>();
		for (File f : files) {
			String thisFolder = f.getParent();
			if (found.get(thisFolder) == null) {
				found.put(thisFolder, new HashMap());
			}
			String thisFile = f.getName();
			for (int i = 0; i < patterns.length; i++) {
				if (patterns[i].matcher(thisFile).find()) {
					found.get(thisFolder).put(i, f);
					// System.out.println("Found " + thisFile + " in " + thisFolder);
					if (found.get(thisFolder).size() >= patterns.length) {
						// ArrayList ret = new ArrayList();
						HashMap ret = new HashMap();
						for (Integer index : found.get(thisFolder).keySet()) {
							ret.put(patterns[index].pattern(), found.get(thisFolder).get(index).getAbsolutePath());
							// ret.add(found.get(thisFolder).get(index));
						}
						return ret;
					}
				}

			}
		}
		checkPatterns(folder, p1,ps);
		//logger.warn(found);
		return null;
	}

	public static Map checkPatterns(String folder, String p1, String... ps) throws IOException {
		logger.debug("checking for missing files in " + folder + "...");
		Pattern[] patterns = new Pattern[ps.length + 1];
		patterns[0] = Pattern.compile(p1);
		for (int i = 0; i < ps.length; i++) {
			patterns[i + 1] = Pattern.compile(ps[i]);
		}
		File[] files = new File(folder).listFiles();
		Set set = new HashSet();
		for (File f : files) {

			for (int i = 0; i < patterns.length; i++) {
				if (patterns[i].matcher(f.getName()).find()) {
					//logger.debug(i + "\t" + patterns[i] + "\t" + f.getName());
					set.add(i);
				}
			}
		}
		for (int i = 0; i < patterns.length; i++) {
			if (!set.contains(i)) {
				logger.error(i + "\t" + patterns[i]);
			}
		}

		//logger.warn(found);
		return null;
	}

	public static void main(String[] args) {

		String logConfig = System.getProperty("log-config");
		if (logConfig == null) {
			logConfig = "configuration/log-config.txt";
		}

		PropertyConfigurator.configure(logConfig);

		//java -cp dist/thewikimachine.jar eu.fbk.twm.utils.GenericFileUtils
		String folder = args[0];

		try {
			//HashMap f = searchForFilesInTheSameFolder(folder, Pattern.compile("^instance.*en\\.nt"), Pattern.compile("^instance.*de\\.nt"));
			//HashMap f = searchForFilesInTheSameFolder(folder, Pattern.compile("type-index"), Pattern.compile("page-form-index"), Pattern.compile("incoming-outgoing-index"), Pattern.compile("form-page-index"));
			//logger.info(f);

			//Map l = searchForFilesInTheSameFolder(folder, "type-index", "page-form-index", "incoming-outgoing-index", "form-page-index", "unigram");
			//logger.info(l);

			//Map resourceMap = GenericFileUtils.searchForFilesInTheSameFolder(folder, "type-index", "page-form-index", "incoming-outgoing-index", "form-page-index", "ngram-index", "page-freq", "form-freq", "cross-lang-index", "ngram.csv", "unigram", "one-example-per-sense-index", "incoming-outgoing-index", "page-file-source-index", "first-name-index", "person-info-index", "airpedia-class-index", "abstract-index");
			Map resourceMap = GenericFileUtils.searchForFilesInTheSameFolder(folder, "type-index", "page-form-index", "form-page-index", "ngram-index", "page-freq.csv", "form-freq", "cross-lang-index", "ngram.csv", "unigram", "one-example-per-sense-index", "page-file-source-index", "first-name-index", "person-info-index", "airpedia-class-index", "abstract-index", "page-category-index", "category-super-category-index", "page-vector-index", "incoming-outgoing-weighted-index");
			//GenericFileUtils.checkPatterns(folder, "type-index", "page-form-index", "form-page-index", "ngram-index", "page-freq.csv", "form-freq", "cross-lang-index", "ngram.csv", "unigram", "one-example-per-sense-index", "page-file-source-index", "first-name-index", "person-info-index", "airpedia-class-index", "abstract-index", "page-category-index", "category-super-category-index", "page-vector-index", "incoming-outgoing-weighted-index");
			logger.info(resourceMap);
		} catch (IOException e) {
			e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy