com.uber.hoodie.common.util.collection.LazyFileIterable Maven / Gradle / Ivy
/*
* Copyright (c) 2016 Uber Technologies, Inc. ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util.collection;
import com.uber.hoodie.common.util.SpillableMapUtils;
import com.uber.hoodie.exception.HoodieException;
import com.uber.hoodie.exception.HoodieIOException;
import org.apache.avro.Schema;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.function.Consumer;
import java.util.stream.Collectors;
/**
* Iterable to lazily fetch values spilled to disk.
* This class uses RandomAccessFile to randomly access the position of
* the latest value for a key spilled to disk and returns the result.
* @param
*/
public class LazyFileIterable implements Iterable {
// Used to access the value written at a specific position in the file
private RandomAccessFile readOnlyFileHandle;
// Stores the key and corresponding value's latest metadata spilled to disk
private Map inMemoryMetadataOfSpilledData;
// Schema used to de-serialize payload written to disk
private Schema schema;
// Class used to de-serialize/realize payload written to disk
private String payloadClazz;
public LazyFileIterable(RandomAccessFile file, Map map,
Schema schema, String payloadClazz) {
this.readOnlyFileHandle = file;
this.inMemoryMetadataOfSpilledData = map;
this.schema = schema;
this.payloadClazz = payloadClazz;
}
@Override
public Iterator iterator() {
try {
return new LazyFileIterator<>(readOnlyFileHandle, inMemoryMetadataOfSpilledData, schema, payloadClazz);
} catch(IOException io) {
throw new HoodieException("Unable to initialize iterator for file on disk", io);
}
}
/**
* Iterator implementation for the iterable defined above.
* @param
*/
public class LazyFileIterator implements Iterator {
private RandomAccessFile readOnlyFileHandle;
private Schema schema;
private String payloadClazz;
private Iterator> metadataIterator;
public LazyFileIterator(RandomAccessFile file, Map map,
Schema schema, String payloadClazz) throws IOException {
this.readOnlyFileHandle = file;
this.schema = schema;
this.payloadClazz = payloadClazz;
// sort the map in increasing order of offset of value so disk seek is only in one(forward) direction
this.metadataIterator = map
.entrySet()
.stream()
.sorted((Map.Entry o1, Map.Entry o2) ->
o1.getValue().getOffsetOfValue().compareTo(o2.getValue().getOffsetOfValue()))
.collect(Collectors.toList()).iterator();
}
@Override
public boolean hasNext() {
return this.metadataIterator.hasNext();
}
@Override
public T next() {
Map.Entry entry = this.metadataIterator.next();
try {
return SpillableMapUtils.readFromDisk(readOnlyFileHandle, schema,
payloadClazz, entry.getValue().getOffsetOfValue(), entry.getValue().getSizeOfValue());
} catch(IOException e) {
throw new HoodieIOException("Unable to read hoodie record from value spilled to disk", e);
}
}
@Override
public void remove() {
this.metadataIterator.remove();
}
@Override
public void forEachRemaining(Consumer super T> action) {
action.accept(next());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy