All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.io.WriterPool Maven / Gradle / Ivy

The newest version!
/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.archive.io;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

/**
 * Pool of Writers.
 * 
 * Abstract. Override and pass in the Constructor a factory that creates
 * {@link WriterPoolMember} implementations.
 * 
 * @author stack
 */
public abstract class WriterPool {
    private final Logger logger =  Logger.getLogger(this.getClass().getName());
   
    /**
     * Used to generate unique filename sequences.
     */
    final protected AtomicInteger serialNo;
    
	/**
	 * Default maximum active number of files in the pool.
	 */
	public static final int DEFAULT_MAX_ACTIVE = 1;

	/** Assumed largest possible value of maxActive; pool will have this
	 * maximum capacity, so dynamic changes beyond this number won't work. */
	protected static final int LARGEST_MAX_ACTIVE = 255;
	
	/**
	 * Maximum time to wait on a free file before considering
	 * making a new one (if not already at max)
	 */
	public static final int DEFAULT_MAX_WAIT_FOR_IDLE = 500;
    
    /**
     * File settings.
     * Keep in data structure rather than as individual values.
     */
    protected final WriterPoolSettings settings;

    /** maximum number of writers to create at a time*/
    protected int maxActive;
    /** maximum ms to wait before considering creation of a writer */ 
    protected int maxWait;
    /** current count of active writers; only read/mutated in synchronized blocks */
    protected int currentActive = 0; 
    /** round-robin queue of available writers */ 
    protected BlockingQueue availableWriters;

    /** system time when writer was last wanted (because one was not ready in time) */     
    protected long lastWriterNeededTime;
    /** system time when writer was last 'rolled over' (imminent creation of new file) */ 
    protected long lastWriterRolloverTime; 
    
    /**
     * Constructor
     * @param serial  Used to generate unique filename sequences
     * @param settings Settings for this pool.
     */
    public WriterPool(final AtomicInteger serial,
    		final WriterPoolSettings settings,
            final int poolMaximumActive, final int poolMaximumWait) {
        logger.info("Initial configuration:" +
                " prefix=" + settings.getPrefix() +
                ", template=" + settings.getTemplate() +
                ", compress=" + settings.getCompress() +
                ", maxSize=" + settings.getMaxFileSizeBytes() +
                ", maxActive=" + poolMaximumActive +
                ", maxWait=" + poolMaximumWait);
        this.settings = settings;
        this.maxActive = poolMaximumActive;
        this.maxWait = poolMaximumWait;
        availableWriters = new ArrayBlockingQueue(LARGEST_MAX_ACTIVE, true);
        this.serialNo = serial;
    }

	/**
	 * Check out a {@link WriterPoolMember}.
	 * 
	 * This method should be followed by a call to
	 * {@link #returnFile(WriterPoolMember)} or 
	 * {@link #invalidateFile(WriterPoolMember)} else pool starts leaking.
	 * 
	 * @return Writer checked out of a pool of files or created
	 * @throws IOException Problem getting Writer from pool (Converted
	 * from Exception to IOException so this pool can live as a good citizen
	 * down in depths of ARCSocketFactory).
	 */
    public WriterPoolMember borrowFile()
    throws IOException {
        WriterPoolMember writer = null;
        while(writer == null) {
            try {
                writer = availableWriters.poll(maxWait,TimeUnit.MILLISECONDS);
            } catch (InterruptedException e) {
                // nothing to do but proceed
            }
            if(writer==null) {
                writer = makeNewWriterIfAppropriate(); 
            }
        }
        return writer;
    }

	/**
	 * Create a new writer instance, if still below maxActive count. 
	 * Remember times to help make later decision when writer should 
	 * be discarded. 
	 * 
	 * @return WriterPoolMember or null if already at max
	 */
	protected synchronized WriterPoolMember makeNewWriterIfAppropriate() {
	    long now = System.currentTimeMillis();
	    lastWriterNeededTime = now; 
        if(currentActive < maxActive) {
            currentActive++;
            lastWriterRolloverTime = now; 
            return makeWriter(); 
        }
        return null; 
    }

    /**
     * @return new WriterPoolMember of appropriate type
     */
    protected abstract WriterPoolMember makeWriter();
    
    /**
     * Discard a previously-used writer, cleanly closing it and leaving it out
     * of the pool. 
     * @param writer
     * @throws IOException
     */
    public synchronized void destroyWriter(WriterPoolMember writer) throws IOException {
        currentActive--; 
        writer.close();
    }
    /**
     * Return a writer, for likely reuse unless (1) writer's current file has 
     * reached its target size; and (2) there's been no demand for additional 
     * writers since the last time a new writer-file was rolled-over. In that
     * case, the possibly-superfluous writer instance is discarded. 
	 * @param writer Writer to return to the pool.
	 * @throws IOException Problem returning File to pool.
	 */
    public void returnFile(WriterPoolMember writer)
    throws IOException {
        synchronized(this) {
            if(writer.isOversize()) {
            // maybe retire writer rather than recycle
                if(lastWriterNeededTime<=lastWriterRolloverTime) {
                    // no timeouts waiting for recycled writer since last writer rollover
                    destroyWriter(writer);
                    return;
                } else {
                    // reuse writer instance, causing new file to be created
                    lastWriterRolloverTime = System.currentTimeMillis();
                }
            }
        }
        if(!availableWriters.offer(writer)) {
            logger.log(Level.WARNING, "writer unreturnable to available pool; closing early");
            destroyWriter(writer); 
        }
    }

    /**
     * Close and discard a writer that experienced a potentially-corrupting
     * error. 
     * @param f writer with problem 
     * @throws IOException
     */
    public synchronized void invalidateFile(WriterPoolMember f)
    throws IOException {
        try {
            destroyWriter(f);
        } catch (Exception e) {
            // Convert exception.
            throw new IOException(e.getMessage());
        }
        // It'll have been closed.  Rename with an '.invalid' suffix so it
        // gets attention.
        File file = f.getFile();
        file.renameTo(new File(file.getAbsoluteFile() +
                WriterPoolMember.INVALID_SUFFIX));
    }

	/**
	 * @return Number of {@link WriterPoolMember}s checked out of pool.
	 * @throws java.lang.UnsupportedOperationException
	 */
    public synchronized int getNumActive()
    throws UnsupportedOperationException {
        return currentActive - getNumIdle();
    }

	/**
	 * @return Number of {@link WriterPoolMember} instances still in the pool.
	 * @throws java.lang.UnsupportedOperationException
	 */
    public int getNumIdle()
    throws UnsupportedOperationException {
        return availableWriters.size();
    }
    
	/**
	 * Close all {@link WriterPoolMember}s in pool.
	 */
    public void close() {
        Collection writers = drainAllWriters();
        for (WriterPoolMember writer: writers) {
            try {
                destroyWriter(writer);
            } catch (IOException e) {
                logger.log(Level.WARNING,"problem closing writer",e); 
            }
        }
    }

	/**
	 * @return Returns settings.
	 */
    public WriterPoolSettings getSettings() {
        return this.settings;
    }

    /**
     * @return State of the pool string
     */
    protected String getPoolState() {
        StringBuffer buffer = new StringBuffer("Active ");
        buffer.append(getNumActive());
        buffer.append(" of max ");
        buffer.append(maxActive);
        buffer.append(", idle ");
        buffer.append(getNumIdle());
        return buffer.toString();
    }
    
    /**
     * Returns the atomic integer used to generate serial numbers
     * for files.
     * 
     * @return  the serial number generator
     */
    public AtomicInteger getSerialNo() {
        return serialNo;
    }
    
    /**
     * Drains all the active writers from {@link #availableWriters}, blocking to
     * wait for any writers currently in use to become available.
     * 
     * 

* When finished with writers, call availableWriters.addAll(...) to put them * back into the rotation. * * @return all the active writers */ protected synchronized Collection drainAllWriters() { LinkedList writers = new LinkedList(); availableWriters.drainTo(writers); while (writers.size() < currentActive) { try { WriterPoolMember w = availableWriters.take(); writers.add(w); } catch (InterruptedException e) { logger.severe("caught " + e + " while waiting for writers to free up; returning only " + writers.size() + " of " + currentActive + " active writers"); break; } } return writers; } public void flush() { Collection writers = drainAllWriters(); for (WriterPoolMember writer: writers) { try { writer.flush(); } catch (IOException e) { logger.log(Level.WARNING, "problem flushing writer " + writer, e); } } availableWriters.addAll(writers); } public JSONArray jsonStatus() throws JSONException { Collection writers = drainAllWriters(); JSONArray ja = new JSONArray(); for (WriterPoolMember w: writers) { JSONObject jo = new JSONObject(); jo.put("file", w.getFile()); jo.put("position", w.getPosition()); ja.put(jo); } availableWriters.addAll(writers); return ja; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy