com.marklogic.hub.util.DiskQueue Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of marklogic-data-hub Show documentation
Show all versions of marklogic-data-hub Show documentation
Library for Creating an Operational Data Hub on MarkLogic
/*
* * Copyright (c) 2004-2018 MarkLogic Corporation
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
* *
* * The use of the Apache License does not indicate that this project is
* * affiliated with the Apache Software Foundation.
* *
* * Code adapted from Bixio DiskQueue
* * https://github.com/bixo/bixo/blob/master/src/main/java/bixo/utils/DiskQueue.java
* * Original work Copyright 2009-2015 Scale Unlimited
* * Modifications copyright (c) 2016 MarkLogic Corporation
*
*/
package com.marklogic.hub.util;
import com.marklogic.hub.HubClientConfig;
import org.apache.commons.lang3.StringUtils;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.security.InvalidParameterException;
import java.text.MessageFormat;
import java.util.AbstractQueue;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* A queue that writes extra elements to disk, and reads them in as needed.
*
* This implementation is optimized for being filled once (ie by the iterator in
* a reducer) and then incrementally read. So it wouldn't work very well if
* reads/writes were happening simultaneously, once anything had spilled to
* disk.
*
* @param - A Serializable Class
*/
public class DiskQueue extends AbstractQueue implements AutoCloseable {
private static final Logger LOG = Logger.getLogger(DiskQueue.class.getName());
private static final float DEFAULT_REFILL_RATIO = 0.75f;
// The memoryQueue represents the head of the queue. It can also be the tail,
// if nothing has spilled over onto the disk.
final MemoryQueue memoryQueue;
Iterator memoryIterator;
// Percentage of memory queue used/capacity that triggers a refill from disk.
private final float refillMemoryRatio;
// Number of elements in the backing store file on disk.
int fileElementCount = 0;
private final File tempDir;
private BufferedWriter fileOut;
private BufferedReader fileIn;
// When moving elements from disk to memory, we don't know whether the memory
// queue has space until the offer is rejected. So rather than trying to push
// back an element into the file, just cache it in cachedElement.
String cachedElement;
private File fileQueue;
private static int safeIntCast(float f) {
if (f > Integer.MAX_VALUE) {
return Integer.MAX_VALUE - 1;
}
return (int) f;
}
/**
* Construct a disk-backed queue that keeps at most
* maxStringsInMemory
elements in memory.
*/
public DiskQueue(HubClientConfig hubClientConfig) {
super();
int maxStringsInMemory = hubClientConfig.getMaxStringsInMemory() != 0 ? hubClientConfig.getMaxStringsInMemory() :
safeIntCast((Runtime.getRuntime().freeMemory() / (float) 130) * DEFAULT_REFILL_RATIO);
File tempDir = StringUtils.isEmpty(hubClientConfig.getCollectorTmpDir()) ? null : new File(hubClientConfig.getCollectorTmpDir());
if (maxStringsInMemory < 1) {
throw new InvalidParameterException(DiskQueue.class.getSimpleName() + " max in-memory size must be at least one");
}
if (tempDir != null && !(tempDir.exists() && tempDir.isDirectory() && tempDir.canWrite())) {
throw new InvalidParameterException(DiskQueue.class.getSimpleName() + " temporary directory must exist and be writable");
}
this.tempDir = tempDir;
memoryQueue = new MemoryQueue<>(maxStringsInMemory);
refillMemoryRatio = DEFAULT_REFILL_RATIO;
}
@Override
public void close() {
if (closeFile()) {
LOG.warning(MessageFormat.format("{0} still had open file", DiskQueue.class.getSimpleName()));
}
}
/**
* Make sure the file streams are all closed down, the temp file is closed,
* and the temp file has been deleted.
*
* @return true if we had to close down the file.
*/
private boolean closeFile() {
if (fileQueue == null) {
return false;
}
closeQuietly(fileIn);
fileIn = null;
cachedElement = null;
closeQuietly(fileOut);
fileOut = null;
fileElementCount = 0;
if (!fileQueue.delete()) {
LOG.log(Level.INFO, "Unable to clean up file queue located at " + fileQueue.getAbsolutePath());
}
fileQueue = null;
return true;
}
private static boolean isEmpty(final CharSequence value) {
return value == null || value.length() == 0;
}
private static void closeQuietly(Closeable obj) {
if (obj != null) {
try {
obj.close();
} catch (IOException ex) {
// Ignore
}
}
}
private void openFile() throws IOException {
if (fileQueue == null) {
fileQueue = File.createTempFile(DiskQueue.class.getSimpleName() + "-backingstore-", null, tempDir);
fileQueue.deleteOnExit();
LOG.log(Level.INFO, "created backing store {0}", fileQueue.getAbsolutePath());
fileOut = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(fileQueue.toPath()), StandardCharsets.UTF_8));
// Flush output file, so there's something written when we open the input stream.
fileOut.flush();
fileIn = new BufferedReader( new InputStreamReader(
Files.newInputStream(fileQueue.toPath()), StandardCharsets.UTF_8)
);
}
}
@Override
public Iterator iterator() {
return new Itr();
}
@Override
public int size() {
return memoryQueue.size() + fileElementCount + (cachedElement != null ? 1 : 0);
}
@Override
public boolean offer(String element) {
if (element == null) {
throw new NullPointerException("Element cannot be null for AbstractQueue");
}
boolean hasFileQueue = fileQueue != null;
boolean offerRejected = false;
if (!hasFileQueue) {
offerRejected = !memoryQueue.offer(element);
if (offerRejected) {
memoryIterator = memoryQueue.iterator();
}
}
// If there's anything in the file, or the queue is full, then we have to write to the file.
if (hasFileQueue || offerRejected) {
try {
openFile();
fileOut.write(element);
fileOut.newLine();
fileElementCount++;
} catch (IOException e) {
LOG.severe(MessageFormat.format("Error writing to {0} backing store", DiskQueue.class.getSimpleName()));
return false;
}
}
return true;
}
@Override
public String peek() {
loadMemoryQueue();
return memoryQueue.peek();
}
@Override
public String remove() {
loadMemoryQueue();
return memoryQueue.remove();
}
@Override
public String poll() {
loadMemoryQueue();
return memoryQueue.poll();
}
/* (non-Javadoc)
* @see java.util.AbstractQueue#clear()
*
* Implement faster clear (so AbstractQueue doesn't call poll() repeatedly)
*/
@Override
public void clear() {
memoryQueue.clear();
cachedElement = null;
closeFile();
}
void loadMemoryQueue() {
// use the memory queue as our buffer, so only load it up when it's below capacity.
if (memoryQueue.size() / (float) memoryQueue.getCapacity() >= refillMemoryRatio) {
return;
}
// See if we have one saved element from the previous read request
if (cachedElement != null && memoryQueue.offer(cachedElement)) {
cachedElement = null;
}
// Now see if we have anything on disk
if (fileQueue != null) {
try {
// Since we buffer writes, we need to make sure everything has
// been written before we start reading.
fileOut.flush();
while (fileElementCount > 0) {
String nextFileElement = fileIn.readLine();
fileElementCount--;
if (!isEmpty(nextFileElement) && !memoryQueue.offer(nextFileElement)) {
//memory queue is full. Cache this entry and jump out
cachedElement = nextFileElement;
memoryIterator = memoryQueue.iterator();
return;
}
}
memoryIterator = memoryQueue.iterator();
// Nothing left in the file, so close/delete it.
closeFile();
// file queue is empty, so could reset length of file, read/write offsets
// to start from zero instead of closing file (but for current use case of fill once, drain
// once this works just fine)
} catch (IOException e) {
LOG.severe(MessageFormat.format("Error reading from {0} backing store", DiskQueue.class.getSimpleName()));
}
}
}
private class Itr implements Iterator {
public Itr() {
memoryIterator = memoryQueue.iterator();
}
public boolean hasNext() {
return memoryIterator.hasNext() || fileElementCount > 0 || cachedElement != null;
}
public String next() {
String next = memoryIterator.next();
if (!memoryIterator.hasNext() && (fileElementCount > 0 || cachedElement != null)) {
memoryQueue.clear();
loadMemoryQueue();
}
return next;
}
public void remove() {
memoryIterator.remove();
}
}
private static class MemoryQueue extends AbstractQueue {
private final Deque queue;
private final int capacity;
public MemoryQueue(int capacity) {
super();
this.capacity = capacity;
queue = new ArrayDeque<>(capacity);
}
@Override
public void clear() {
queue.clear();
}
@Override
public Iterator iterator() {
return queue.iterator();
}
public int getCapacity() {
return capacity;
}
@Override
public int size() {
return queue.size();
}
@Override
public boolean offer(String o) {
if (o == null) {
throw new NullPointerException();
} else if (queue.size() >= capacity) {
return false;
} else {
queue.add(o);
return true;
}
}
@Override
public String peek() {
if (queue.isEmpty()) {
return null;
} else {
return queue.peek();
}
}
@Override
public String poll() {
if (queue.isEmpty()) {
return null;
} else {
return queue.poll();
}
}
@Override
public String remove() {
return queue.remove();
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy