com.antiaction.common.datastructures.flatfilelookup.FlatfileLookupManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of common-datastructures Show documentation
Show all versions of common-datastructures Show documentation
A library for common data structures.
The newest version!
package com.antiaction.common.datastructures.flatfilelookup;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Class for looking up URL-prefixes in multiple CDX-files.
*/
public class FlatfileLookupManager {
private static Logger logger = Logger.getLogger(FlatfileLookupManager.class.getName());
/** The singleton handling the CDXlookup. */
private static FlatfileLookupManager cdxIndexManager;
private static final FileComparator fileComparator = new FileComparator();
private List flatfileLookupList = new LinkedList();
private Map flatfileLookupMap = new HashMap();
private FlatfileLookupAbstract[] lookup_arr;
/**
* Constructor of the FlatfileLookupManager.
*/
private FlatfileLookupManager(String[] roots) {
List cdxes = findLookupFiles(roots);
File file;
FlatfileLookupAbstract flatfileLookup;
for (int i = 0; i < cdxes.size(); ++i) {
file = cdxes.get(i);
flatfileLookup = FlatfileLookupCaching.getInstance(file);
flatfileLookupList.add(flatfileLookup);
flatfileLookupMap.put(file.getPath(), flatfileLookup);
// debug
System.out.println(cdxes.get(i).lastModified() + " - " + cdxes.get(i).getPath());
}
lookup_arr = flatfileLookupList.toArray(new FlatfileLookupAbstract[0]);
}
public static synchronized FlatfileLookupManager getInstance(String[] roots) {
if (cdxIndexManager == null) {
cdxIndexManager = new FlatfileLookupManager(roots);
}
return cdxIndexManager;
}
public List lookup(String prefix) {
FlatfileLookupAbstract lookupFile;
String tmpStr;
List results = new LinkedList();
long millis;
for (int i = 0; i < lookup_arr.length; ++i) {
lookupFile = lookup_arr[i];
//logger.log(Level.INFO, "Looking in " + lookupFile.flatFile.getName());
millis = System.currentTimeMillis();
if (lookupFile.lock()) {
try {
if (lookupFile.open()) {
try {
lookupFile.lookup(prefix);
while ((tmpStr = lookupFile.readLine()) != null && tmpStr.startsWith(prefix)) {
results.add(tmpStr);
}
}
catch (IOException e) {
logger.log(Level.SEVERE, e.toString(), e);
}
lookupFile.close();
}
}
catch (FileNotFoundException e) {
}
lookupFile.unlock();
}
millis = System.currentTimeMillis() - millis;
//logger.log(Level.INFO, "Lookup in " + lookupFile.flatFile.getPath() + " took " + millis + " ms.");
}
return results;
}
public static List findLookupFiles(String[] roots) {
List lookupFilesList = new LinkedList();
Stack stack = new Stack();
for (int i = 0; i < roots.length; ++i) {
stack.add(new File(roots[i]));
}
long ctm = System.currentTimeMillis() - (60 * 60 * 1000);
File file;
File[] files;
while (!stack.isEmpty()) {
file = stack.pop();
if (file.exists()) {
if (file.isDirectory()) {
files = file.listFiles();
if (files != null) {
for (int i = 0; i < files.length; ++i) {
file = files[i];
if (file.isDirectory()) {
if (!file.getName().equalsIgnoreCase(".snapshot")) {
stack.push(file);
}
} else {
if (file.getName().endsWith(".cdx") || file.getName().startsWith("wayback.index")) {
// Screw the aggregator.
/*
if (file.lastModified() < ctm) {
}
*/
lookupFilesList.add(file);
}
}
}
}
} else {
if (file.getName().endsWith(".cdx") || file.getName().startsWith("wayback.index")) {
// Screw the aggregator.
/*
if (file.lastModified() < ctm) {
}
*/
lookupFilesList.add(file);
}
}
}
}
Collections.sort(lookupFilesList, fileComparator);
return lookupFilesList;
}
public static class FileComparator implements Comparator {
@Override
public int compare(File o1, File o2) {
return Long.signum(o1.lastModified() - o2.lastModified());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy