
eu.fbk.twm.utils.GenericFileUtils Maven / Gradle / Ivy
The newest version!
package eu.fbk.twm.utils;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import java.io.*;
import java.util.*;
import java.util.regex.*;
/**
* Created with IntelliJ IDEA.
* User: aprosio
* Date: 2/1/13
* Time: 4:53 PM
* To change this template use File | Settings | File Templates.
*/
public class GenericFileUtils {
/**
* Define a static logger variable so that it references the
* Logger instance named ExtractorParameters
.
*/
static Logger logger = Logger.getLogger(GenericFileUtils.class.getName());
public static String tail(File file) {
RandomAccessFile fileHandler = null;
try {
fileHandler = new RandomAccessFile(file, "r");
long fileLength = file.length() - 1;
StringBuilder sb = new StringBuilder();
for (long filePointer = fileLength; filePointer != -1; filePointer--) {
fileHandler.seek(filePointer);
int readByte = fileHandler.readByte();
if (readByte == 0xA) {
if (filePointer == fileLength) {
continue;
}
else {
break;
}
}
else if (readByte == 0xD) {
if (filePointer == fileLength - 1) {
continue;
}
else {
break;
}
}
sb.append((char) readByte);
}
String lastLine = sb.reverse().toString();
return lastLine;
} catch (java.io.FileNotFoundException e) {
e.printStackTrace();
return null;
} catch (java.io.IOException e) {
e.printStackTrace();
return null;
} finally {
try {
fileHandler.close();
} catch (Exception ignored) {
}
}
}
public static Object loadObjectFromDisk(String fileName) {
Object ret = null;
try {
FileInputStream fiStream = new FileInputStream(new File(fileName));
ObjectInputStream objectInputStreamFr = new ObjectInputStream(fiStream);
ret = objectInputStreamFr.readObject();
objectInputStreamFr.close();
} catch (Exception e) {
e.printStackTrace();
}
return ret;
}
public static void saveObjectToDisk(Object o, String fileName) {
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(o);
oos.close();
FileOutputStream foStream = new FileOutputStream(new File(fileName));
baos.writeTo(foStream);
foStream.close();
baos.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static String checkWriteableFolder(String dir, boolean clean) {
if (!dir.endsWith(System.getProperty("file.separator"))) {
dir += System.getProperty("file.separator");
}
File d = new File(dir);
if (!d.exists()) {
if (!d.mkdirs()) {
return null;
}
}
else {
if (clean) {
String a2[] = d.list();
if (a2 != null) {
for (int j = 0; j < a2.length; j++) {
String fileName = dir + a2[j];
File f = new File(fileName);
f.delete();
}
}
}
}
return dir;
}
public static ArrayList listFilesInFolder(File folder, boolean recursive) throws IOException {
ArrayList ret = new ArrayList();
if (!folder.exists()) {
throw new IOException("File/folder " + folder + " does not exist");
}
if (!folder.isDirectory()) {
throw new IOException("File/folder " + folder + " is not a directory");
}
File[] listOfFiles = folder.listFiles();
if (listOfFiles == null) {
return ret;
}
Arrays.sort(listOfFiles, Collections.reverseOrder());
ArrayList files = new ArrayList();
ArrayList dirs = new ArrayList();
for (File f : listOfFiles) {
if (f.isDirectory()) {
dirs.add(f);
}
else {
files.add(f);
}
}
for (File f : dirs) {
ret.add(f);
if (recursive) {
ret.addAll(listFilesInFolder(f, recursive));
}
}
for (File f : files) {
ret.add(f);
}
return ret;
}
public static ArrayList listFilesInFolder(String folder, boolean recursive) throws IOException {
return listFilesInFolder(new File(folder), recursive);
}
public static HashMap searchForFilesInTheSameFolder(String folder, Pattern p1, Pattern... ps) throws IOException {
Pattern[] patterns = new Pattern[ps.length + 1];
patterns[0] = p1;
System.arraycopy(ps, 0, patterns, 1, ps.length);
ArrayList files = listFilesInFolder(folder, true);
// System.out.println(files);
HashMap> found = new HashMap>();
for (File f : files) {
String thisFolder = f.getParent();
if (found.get(thisFolder) == null) {
found.put(thisFolder, new HashMap());
}
String thisFile = f.getName();
for (int i = 0; i < patterns.length; i++) {
if (patterns[i].matcher(thisFile).find()) {
found.get(thisFolder).put(i, f);
// System.out.println("Found " + thisFile + " in " + thisFolder);
if (found.get(thisFolder).size() >= patterns.length) {
// ArrayList ret = new ArrayList();
HashMap ret = new HashMap();
for (Integer index : found.get(thisFolder).keySet()) {
ret.put(patterns[index], found.get(thisFolder).get(index));
// ret.add(found.get(thisFolder).get(index));
}
return ret;
}
}
}
}
return null;
}
public static Map searchForFilesInTheSameFolder(String folder, String p1, String... ps) throws IOException {
//logger.debug("searchForFilesInTheSameFolder " + folder);
Pattern[] patterns = new Pattern[ps.length + 1];
patterns[0] = Pattern.compile(p1);
for (int i = 0; i < ps.length; i++) {
patterns[i + 1] = Pattern.compile(ps[i]);
}
//System.arraycopy(ps, 0, patterns, 1, ps.length);
ArrayList files = listFilesInFolder(folder, true);
//logger.debug(files);
HashMap> found = new HashMap>();
for (File f : files) {
String thisFolder = f.getParent();
if (found.get(thisFolder) == null) {
found.put(thisFolder, new HashMap());
}
String thisFile = f.getName();
for (int i = 0; i < patterns.length; i++) {
if (patterns[i].matcher(thisFile).find()) {
found.get(thisFolder).put(i, f);
// System.out.println("Found " + thisFile + " in " + thisFolder);
if (found.get(thisFolder).size() >= patterns.length) {
// ArrayList ret = new ArrayList();
HashMap ret = new HashMap();
for (Integer index : found.get(thisFolder).keySet()) {
ret.put(patterns[index].pattern(), found.get(thisFolder).get(index).getAbsolutePath());
// ret.add(found.get(thisFolder).get(index));
}
return ret;
}
}
}
}
checkPatterns(folder, p1,ps);
//logger.warn(found);
return null;
}
public static Map checkPatterns(String folder, String p1, String... ps) throws IOException {
logger.debug("checking for missing files in " + folder + "...");
Pattern[] patterns = new Pattern[ps.length + 1];
patterns[0] = Pattern.compile(p1);
for (int i = 0; i < ps.length; i++) {
patterns[i + 1] = Pattern.compile(ps[i]);
}
File[] files = new File(folder).listFiles();
Set set = new HashSet();
for (File f : files) {
for (int i = 0; i < patterns.length; i++) {
if (patterns[i].matcher(f.getName()).find()) {
//logger.debug(i + "\t" + patterns[i] + "\t" + f.getName());
set.add(i);
}
}
}
for (int i = 0; i < patterns.length; i++) {
if (!set.contains(i)) {
logger.error(i + "\t" + patterns[i]);
}
}
//logger.warn(found);
return null;
}
public static void main(String[] args) {
String logConfig = System.getProperty("log-config");
if (logConfig == null) {
logConfig = "configuration/log-config.txt";
}
PropertyConfigurator.configure(logConfig);
//java -cp dist/thewikimachine.jar eu.fbk.twm.utils.GenericFileUtils
String folder = args[0];
try {
//HashMap f = searchForFilesInTheSameFolder(folder, Pattern.compile("^instance.*en\\.nt"), Pattern.compile("^instance.*de\\.nt"));
//HashMap f = searchForFilesInTheSameFolder(folder, Pattern.compile("type-index"), Pattern.compile("page-form-index"), Pattern.compile("incoming-outgoing-index"), Pattern.compile("form-page-index"));
//logger.info(f);
//Map l = searchForFilesInTheSameFolder(folder, "type-index", "page-form-index", "incoming-outgoing-index", "form-page-index", "unigram");
//logger.info(l);
//Map resourceMap = GenericFileUtils.searchForFilesInTheSameFolder(folder, "type-index", "page-form-index", "incoming-outgoing-index", "form-page-index", "ngram-index", "page-freq", "form-freq", "cross-lang-index", "ngram.csv", "unigram", "one-example-per-sense-index", "incoming-outgoing-index", "page-file-source-index", "first-name-index", "person-info-index", "airpedia-class-index", "abstract-index");
Map resourceMap = GenericFileUtils.searchForFilesInTheSameFolder(folder, "type-index", "page-form-index", "form-page-index", "ngram-index", "page-freq.csv", "form-freq", "cross-lang-index", "ngram.csv", "unigram", "one-example-per-sense-index", "page-file-source-index", "first-name-index", "person-info-index", "airpedia-class-index", "abstract-index", "page-category-index", "category-super-category-index", "page-vector-index", "incoming-outgoing-weighted-index");
//GenericFileUtils.checkPatterns(folder, "type-index", "page-form-index", "form-page-index", "ngram-index", "page-freq.csv", "form-freq", "cross-lang-index", "ngram.csv", "unigram", "one-example-per-sense-index", "page-file-source-index", "first-name-index", "person-info-index", "airpedia-class-index", "abstract-index", "page-category-index", "category-super-category-index", "page-vector-index", "incoming-outgoing-weighted-index");
logger.info(resourceMap);
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy