org.archive.io.WriterPool Maven / Gradle / Ivy
/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.io;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
/**
* Pool of Writers.
*
* Abstract. Override and pass in the Constructor a factory that creates
* {@link WriterPoolMember} implementations.
*
* @author stack
*/
public abstract class WriterPool {
private final Logger logger = Logger.getLogger(this.getClass().getName());
/**
* Used to generate unique filename sequences.
*/
final protected AtomicInteger serialNo;
/**
* Default maximum active number of files in the pool.
*/
public static final int DEFAULT_MAX_ACTIVE = 1;
/** Assumed largest possible value of maxActive; pool will have this
* maximum capacity, so dynamic changes beyond this number won't work. */
protected static final int LARGEST_MAX_ACTIVE = 255;
/**
* Maximum time to wait on a free file before considering
* making a new one (if not already at max)
*/
public static final int DEFAULT_MAX_WAIT_FOR_IDLE = 500;
/**
* File settings.
* Keep in data structure rather than as individual values.
*/
protected final WriterPoolSettings settings;
/** maximum number of writers to create at a time*/
protected int maxActive;
/** maximum ms to wait before considering creation of a writer */
protected int maxWait;
/** current count of active writers; only read/mutated in synchronized blocks */
protected int currentActive = 0;
/** round-robin queue of available writers */
protected BlockingQueue availableWriters;
/** system time when writer was last wanted (because one was not ready in time) */
protected long lastWriterNeededTime;
/** system time when writer was last 'rolled over' (imminent creation of new file) */
protected long lastWriterRolloverTime;
/**
* Constructor
* @param serial Used to generate unique filename sequences
* @param factory Factory that knows how to make a {@link WriterPoolMember}.
* @param settings Settings for this pool.
* @param poolMaximumActive
* @param poolMaximumWait
*/
public WriterPool(final AtomicInteger serial,
final WriterPoolSettings settings,
final int poolMaximumActive, final int poolMaximumWait) {
logger.info("Initial configuration:" +
" prefix=" + settings.getPrefix() +
", template=" + settings.getTemplate() +
", compress=" + settings.getCompress() +
", maxSize=" + settings.getMaxFileSizeBytes() +
", maxActive=" + poolMaximumActive +
", maxWait=" + poolMaximumWait);
this.settings = settings;
this.maxActive = poolMaximumActive;
this.maxWait = poolMaximumWait;
availableWriters = new ArrayBlockingQueue(LARGEST_MAX_ACTIVE, true);
this.serialNo = serial;
}
/**
* Check out a {@link WriterPoolMember}.
*
* This method should be followed by a call to
* {@link #returnFile(WriterPoolMember)} or
* {@link #invalidateFile(WriterPoolMember)} else pool starts leaking.
*
* @return Writer checked out of a pool of files or created
* @throws IOException Problem getting Writer from pool (Converted
* from Exception to IOException so this pool can live as a good citizen
* down in depths of ARCSocketFactory).
*/
public WriterPoolMember borrowFile()
throws IOException {
WriterPoolMember writer = null;
while(writer == null) {
try {
writer = availableWriters.poll(maxWait,TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
// nothing to do but proceed
}
if(writer==null) {
writer = makeNewWriterIfAppropriate();
}
}
return writer;
}
/**
* Create a new writer instance, if still below maxActive count.
* Remember times to help make later decision when writer should
* be discarded.
*
* @return WriterPoolMember or null if already at max
*/
protected synchronized WriterPoolMember makeNewWriterIfAppropriate() {
long now = System.currentTimeMillis();
lastWriterNeededTime = now;
if(currentActive < maxActive) {
currentActive++;
lastWriterRolloverTime = now;
return makeWriter();
}
return null;
}
/**
* @return new WriterPoolMember of appropriate type
*/
protected abstract WriterPoolMember makeWriter();
/**
* Discard a previously-used writer, cleanly closing it and leaving it out
* of the pool.
* @param writer
* @throws IOException
*/
public synchronized void destroyWriter(WriterPoolMember writer) throws IOException {
currentActive--;
writer.close();
}
/**
* Return a writer, for likely reuse unless (1) writer's current file has
* reached its target size; and (2) there's been no demand for additional
* writers since the last time a new writer-file was rolled-over. In that
* case, the possibly-superfluous writer instance is discarded.
* @param writer Writer to return to the pool.
* @throws IOException Problem returning File to pool.
*/
public void returnFile(WriterPoolMember writer)
throws IOException {
synchronized(this) {
if(writer.isOversize()) {
// maybe retire writer rather than recycle
if(lastWriterNeededTime<=lastWriterRolloverTime) {
// no timeouts waiting for recycled writer since last writer rollover
destroyWriter(writer);
return;
} else {
// reuse writer instance, causing new file to be created
lastWriterRolloverTime = System.currentTimeMillis();
}
}
}
if(!availableWriters.offer(writer)) {
logger.log(Level.WARNING, "writer unreturnable to available pool; closing early");
destroyWriter(writer);
}
}
/**
* Close and discard a writer that experienced a potentially-corrupting
* error.
* @param f writer with problem
* @throws IOException
*/
public synchronized void invalidateFile(WriterPoolMember f)
throws IOException {
try {
destroyWriter(f);
} catch (Exception e) {
// Convert exception.
throw new IOException(e.getMessage());
}
// It'll have been closed. Rename with an '.invalid' suffix so it
// gets attention.
File file = f.getFile();
file.renameTo(new File(file.getAbsoluteFile() +
WriterPoolMember.INVALID_SUFFIX));
}
/**
* @return Number of {@link WriterPoolMember}s checked out of pool.
* @throws java.lang.UnsupportedOperationException
*/
public synchronized int getNumActive()
throws UnsupportedOperationException {
return currentActive - getNumIdle();
}
/**
* @return Number of {@link WriterPoolMember} instances still in the pool.
* @throws java.lang.UnsupportedOperationException
*/
public int getNumIdle()
throws UnsupportedOperationException {
return availableWriters.size();
}
/**
* Close all {@link WriterPoolMember}s in pool.
*/
public void close() {
Collection writers = drainAllWriters();
for (WriterPoolMember writer: writers) {
try {
destroyWriter(writer);
} catch (IOException e) {
logger.log(Level.WARNING,"problem closing writer",e);
}
}
}
/**
* @return Returns settings.
*/
public WriterPoolSettings getSettings() {
return this.settings;
}
/**
* @return State of the pool string
*/
protected String getPoolState() {
StringBuffer buffer = new StringBuffer("Active ");
buffer.append(getNumActive());
buffer.append(" of max ");
buffer.append(maxActive);
buffer.append(", idle ");
buffer.append(getNumIdle());
return buffer.toString();
}
/**
* Returns the atomic integer used to generate serial numbers
* for files.
*
* @return the serial number generator
*/
public AtomicInteger getSerialNo() {
return serialNo;
}
/**
* Drains all the active writers from {@link #availableWriters}, blocking to
* wait for any writers currently in use to become available.
*
*
* When finished with writers, call availableWriters.addAll(...) to put them
* back into the rotation.
*
* @return all the active writers
*/
protected synchronized Collection drainAllWriters() {
LinkedList writers = new LinkedList();
availableWriters.drainTo(writers);
while (writers.size() < currentActive) {
try {
WriterPoolMember w = availableWriters.take();
writers.add(w);
} catch (InterruptedException e) {
logger.severe("caught " + e + " while waiting for writers to free up; returning only "
+ writers.size() + " of " + currentActive + " active writers");
break;
}
}
return writers;
}
public void flush() {
Collection writers = drainAllWriters();
for (WriterPoolMember writer: writers) {
try {
writer.flush();
} catch (IOException e) {
logger.log(Level.WARNING, "problem flushing writer " + writer, e);
}
}
availableWriters.addAll(writers);
}
public JSONArray jsonStatus() throws JSONException {
Collection writers = drainAllWriters();
JSONArray ja = new JSONArray();
for (WriterPoolMember w: writers) {
JSONObject jo = new JSONObject();
jo.put("file", w.getFile());
jo.put("position", w.getPosition());
ja.put(jo);
}
availableWriters.addAll(writers);
return ja;
}
}