org.mapdb.Pump Maven / Gradle / Ivy
The newest version!
package org.mapdb;
import java.io.*;
import java.nio.ByteBuffer;
import java.util.*;
/**
* Data Pump moves data from one source to other.
* It can be used to import data from text file, or copy store from memory to disk.
*/
public final class Pump {
/** copies all data from first DB to second DB */
//TODO Pump between stores is disabled for now, make this method public once enabled
static void copy(DB db1, DB db2){
copy(Store.forDB(db1), Store.forDB(db2));
db2.engine.clearCache();
db2.reinit();
}
/** copies all data from first store to second store */
//TODO Pump between stores is disabled for now, make this method public once enabled
static void copy(Store s1, Store s2){
long maxRecid =s1.getMaxRecid();
for(long recid=1;recid<=maxRecid;recid++){
ByteBuffer bb = s1.getRaw(recid);
//System.out.println(recid+" - "+(bb==null?0:bb.remaining()));
if(bb==null) continue;
s2.updateRaw(recid, bb);
}
//now release unused recids
for(Iterator iter = s1.getFreeRecids(); iter.hasNext();){
s2.delete(iter.next(), null);
}
}
/**
* Sorts large data set by given `Comparator`. Data are sorted with in-memory cache and temporary files.
*
* @param source iterator over unsorted data
* @param mergeDuplicates should be duplicate keys merged into single one?
* @param batchSize how much items can fit into heap memory
* @param comparator used to sort data
* @param serializer used to store data in temporary files
* @param type of data
* @return iterator over sorted data set
*/
public static Iterator sort(final Iterator source, boolean mergeDuplicates, final int batchSize,
Comparator comparator, final Serializer serializer){
if(batchSize<=0) throw new IllegalArgumentException();
if(comparator==null)
comparator=BTreeMap.COMPARABLE_COMPARATOR;
int counter = 0;
final Object[] presort = new Object[batchSize];
final List presortFiles = new ArrayList();
final List presortCount2 = new ArrayList();
try{
while(source.hasNext()){
presort[counter]=source.next();
counter++;
if(counter>=batchSize){
//sort all items
Arrays.sort(presort,comparator);
//flush presort into temporary file
File f = File.createTempFile("mapdb","sort");
f.deleteOnExit();
presortFiles.add(f);
DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(f)));
for(Object e:presort){
serializer.serialize(out,e);
}
out.close();
presortCount2.add(counter);
Arrays.fill(presort,0);
counter = 0;
}
}
//now all records from source are fetch
if(presortFiles.isEmpty()){
//no presort files were created, so on-heap sorting is enough
Arrays.sort(presort,0,counter,comparator);
return arrayIterator(presort,0, counter);
}
final int[] presortCount = new int[presortFiles.size()];
for(int i=0;i0;
}
@Override public Object next() {
try {
Object ret = serializer.deserialize(ins[pos],-1);
if(--presortCount[pos]==0){
ins[pos].close();
presortFiles.get(pos).delete();
}
return ret;
} catch (IOException e) {
throw new IOError(e);
}
}
@Override public void remove() {
//ignored
}
};
}
//and add iterator over data on-heap
Arrays.sort(presort,0,counter,comparator);
iterators[iterators.length-1] = arrayIterator(presort,0,counter);
//and finally sort presorted iterators and return iterators over them
return sort(comparator, mergeDuplicates, iterators);
}catch(IOException e){
throw new IOError(e);
}finally{
for(File f:presortFiles) f.delete();
}
}
/**
* Merge presorted iterators into single sorted iterator.
*
* @param comp used to compare data
* @param mergeDuplicates if duplicate keys should be merged into single one
* @param iterators array of already sorted iterators
* @param type of data
* @return sorted iterator
*/
public static Iterator sort(Comparator comparator, final boolean mergeDuplicates, final Iterator... iterators) {
final Comparator comparator2 = comparator==null?BTreeMap.COMPARABLE_COMPARATOR:comparator;
return new Iterator(){
final NavigableSet> items = new TreeSet>(
new Fun.Tuple2Comparator
© 2015 - 2025 Weber Informatics LLC | Privacy Policy