mq5.0-source.main.persist.disk-io.src.main.java.com.sun.messaging.jmq.io.disk.VRFile Maven / Gradle / Ivy
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright (c) 2000-2012 Oracle and/or its affiliates. All rights reserved.
*
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common Development
* and Distribution License("CDDL") (collectively, the "License"). You
* may not use this file except in compliance with the License. You can
* obtain a copy of the License at
* https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
* or packager/legal/LICENSE.txt. See the License for the specific
* language governing permissions and limitations under the License.
*
* When distributing the software, include this License Header Notice in each
* file and include the License file at packager/legal/LICENSE.txt.
*
* GPL Classpath Exception:
* Oracle designates this particular file as subject to the "Classpath"
* exception as provided by Oracle in the GPL Version 2 section of the License
* file that accompanied this code.
*
* Modifications:
* If applicable, add the following below the License Header, with the fields
* enclosed by brackets [] replaced by your own identifying information:
* "Portions Copyright [year] [name of copyright owner]"
*
* Contributor(s):
* If you wish your version of this file to be governed by only the CDDL or
* only the GPL Version 2, indicate your decision by adding "[Contributor]
* elects to include this software in this distribution under the [CDDL or GPL
* Version 2] license." If you don't indicate a single choice of license, a
* recipient has the option to distribute your version of this file under
* either the CDDL, the GPL Version 2 or to extend the choice of license to
* its licensees as provided above. However, if you add GPL Version 2 code
* and therefore, elected the GPL Version 2 license, then the option applies
* only if the new code is made subject to such option by the copyright
* holder.
*/
/*
* @(#)VRFile.java 1.28 06/27/07
*/
package com.sun.messaging.jmq.io.disk;
import java.io.*;
import java.nio.*;
import java.nio.channels.*;
import java.util.*;
import com.sun.messaging.jmq.resources.*;
/**
* VRFile is a simple implementation of a variable sized record based file.
* The implementation is optimized for simiplicity and speed at the expense
* of diskspace.
*
* A VRFile is backed by a disk file. The file is split into records as
* records are requested by the caller. Record size is defined to be a
* multiple of a block size to increase the likelyhood of record-reuse.
* Free'd records are tracked for re-use. Overtime it is possible for
* holes to occur in the file of record sizes that are never re-used.
* A compact() method is provided to compact() the backing file -- this
* could be an expensive operation depending on the amount of data in the file.
*
* The initial design will support no record splitting or coelesceing
* (except via compact).
* Free'd buffers will simply be put on a free list and re-used
* if they are large enough to satisfy an allocate() request. This is
* based on the assumption that buffer sizes will not vary wildly for
* any given allocator.
*
*
* Specific Details:
*
*
* The backing file has the following header:
0 1 2 3
|0 1 2 3 4 5 6 7|8 9 0 1 2 3 4 5|6 7 8 9 0 1 2 3|4 5 6 7 8 9 0 1|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
0| magic # |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
4| version | reserved for later use |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
8| |
+ Application cookie +
12| |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
16| |
+ index to properties record |
20| |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* magic #: A constant that helps us validate the file is of
* the correct type
* version: Identifies the version of the file
* application cookie: 64 bits reserved for use by the application. This
* gives applications place to store version information, etc.
*
* Each allocated record has the following header:
*
0 1 2 3
|0 1 2 3 4 5 6 7|8 9 0 1 2 3 4 5|6 7 8 9 0 1 2 3|4 5 6 7 8 9 0 1|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| magic # |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| capacity |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| state | reserved for later use |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* magic #: Helps identify the start of an allocation. It is only used
* as a sanity check to help ensure we don't read bad data. It
* can also be used to search for the start of an allocation if
* a file gets corrupted.
* capacity: The size of the allocation including this header
*
state: State of the allocation. One of:
1 = Free
2 = Allocated
3 = Last
4 = Properties Record
5 = Pending (operation is pending).
*
* When an allocator is first created the backing file is created of size
* initialCapacity and the entire file is mapped. The header is written.
*
* When the first allocation request comes in the allocation size is
* rounded up to be a multiple of the block size. A slice is created from
* the MappedByteBuffer and the record header is written with a state
* of ALLOCATED. A VRecord is created and added to a list of
* allocated records and returned.
*
* When a record is free'd it's state is updated to FREE and it is
* moved from the allocated list to the free list that is sorted by capacity.
*
* When subsequent allocation requests come in the free list is searched
* first for a buffer that is of sufficient capacity. If one is found
* it is marked as ALLOCATED, moved to the allocated list and returned.
* If one is not found a new one is allocated from the backing file.
* If the file is not large enough we grow it using the growth factor.
*
* When compact() is called a new empty file is created and all records
* whose state is not free are written to it. The file is then grown to be
* of at least initialCapacity if need be. The old file is renamed to back
* it up, and the new file is renamed to become the new backing store.
* The free list is disgarded and the new file is loaded.
*
* When a file is loaded, the header is read to verify file
* type and version. Then the file is scanned and records are placed on the
* free and allocated lists as dictated by their state.
*
* Notes:
*
* - We should be able to easily extend VRecord to provide an indexed
* file where the key is always a long.
*
* The current message store also has this risk (if the system crashes
* while a message is being stored), but the consequence is loosing at
* most one message. We must be sure that if a record is corrupted we
* loose at most that one record and not the entire file. That is why
* we have magic numbers in the record headers (maybe they should be longer?)
* so the code has a chance to recover. That is also why we don't mess
* with storing any indexes, free lists, etc on disk -- that is just
* more stuff that can become corrupted.
*
*/
public abstract class VRFile {
private static boolean DEBUG = Boolean.getBoolean("vrfile.debug");
// file version 1 definitions:
public static final short FILE_VERSION_1 = 1;
public static final int FILE_HEADER_SIZE_1 = 16;
public static final short STATE_LAST_1 = 0;
public static final short STATE_FREE_1 = 1;
public static final short STATE_ALLOCATED_1 = 2;
// file header constants
public static final short FILE_VERSION = 2;
public static final int FILE_HEADER_SIZE = 24;
public static final int FILE_MAGIC_NUMBER = 0x5555AAAA;
public static final short RESERVED_SHORT = 0;
// record header constants
public static final int RECORD_HEADER_SIZE = 12;
public static final int RECORD_MAGIC_NUMBER = 0xAAAA5555;
public static final int RECORD_CAPACITY_OFFSET = 4;
public static final int RECORD_STATE_OFFSET = 8;
public static final int RECORD_COOKIE_OFFSET = 10;
public static final short STATE_CUTOFF = -1;
public static final short STATE_BAD_MAGIC_NUMBER = -2;
public static final short STATE_BAD_STATE = -3;
public static final short STATE_BAD_CAPACITY_TOO_SMALL = -4;
public static final short STATE_BAD_NEXT_MAGIC_NUMBER = -5;
public static final short STATE_BAD_CAPACITY = -6;
public static final short STATE_BAD_TRUNCATED_HEADER = -7;
public static final short STATE_FREE = 1;
public static final short STATE_ALLOCATED = 2;
public static final short STATE_LAST = 3;
public static final short STATE_PROPERTIES = 4;
protected static final short _STATE_LAST = 1001;
public static final int SHORT_LEN = 2; // len of short
public static final int INT_LEN = 4; // len of int
public static final int LONG_LEN = 8; // len of long
// Default block size is 128 bytes
public final static int DEFAULT_BLOCK_SIZE = 128;
// Default initial file size is 10 MB
public final static long DEFAULT_INITIAL_FILE_SIZE = 10 * (1024 * 1024);
public final static long MINIMUM_INITIAL_FILE_SIZE
= FILE_HEADER_SIZE + RECORD_HEADER_SIZE;
// Default growth factor is 50%
public final static float DEFAULT_GROWTH_FACTOR = 0.5f;
public final static float DEFAULT_THRESHOLD_FACTOR = 0.0f;
public final static long DEFAULT_THRESHOLD = 0;
protected long threshold = DEFAULT_THRESHOLD;
// growth factor after the threshold is reached
protected float thresholdFactor = DEFAULT_THRESHOLD_FACTOR;
// Block size. Every allocation is a multiple of this.
protected int blockSize = DEFAULT_BLOCK_SIZE;
// Initial file size. When a new (empty) file is created a file of this
// size will be mapped.
protected long initialFileSize = DEFAULT_INITIAL_FILE_SIZE;
// The amount the file grows by when more backing storage is needed.
// If this is 1.0 then the file size doubles. If it is 0.25 then
// the file is grown by 1/4. In general we want to minimize the
// number of MappedByteBuffers we allocate.
protected float growthFactor = DEFAULT_GROWTH_FACTOR;
// If true do all we can do ensure data is persisted to disk as
// soon as possible. This sacrifices performance for safety.
protected boolean safe = false;
// information about our backing file
protected short fileversion = 2; // will be changed if a file is loaded
protected long fileSize = 0; // current file size
protected long filePointer = 0; // ptr to end of allocated section
protected File backingFile = null; // name of backing file
// all allocated buffers
protected HashSet allocated = null;
protected long bytesAllocated = 0;
// free buffers: map hashed by capacity
// (capacity->LinkedList of free records)
protected TreeMap freeMap = null; // hashed by capacity
protected int numFree = 0; // number of free records
protected long cookie = 0;
long fileCookie = 0; /// the value of the cookie read from the file
// record header with a state of STATE_LAST marking the last record
// in the file; this record spans to the end of the file
protected byte[] lastRecordHeader = null;
// whether the file is opened and loaded
protected boolean opened = false;
protected VRFileWarning warning = null;
// statistics
protected int hits = 0;
protected int misses = 0;
protected boolean isMinimumWrites = false;
protected boolean interruptSafe = false;
protected VRFile(File file, long size, boolean isMinimumWrites, boolean interruptSafe) {
if (DEBUG) {
System.out.println("backing file: "+file);
}
this.isMinimumWrites = isMinimumWrites;
this.interruptSafe = interruptSafe;
backingFile = file;
initialFileSize = (size < MINIMUM_INITIAL_FILE_SIZE ?
MINIMUM_INITIAL_FILE_SIZE : size);
// set up last record header
lastRecordHeader = new byte[RECORD_HEADER_SIZE];
ByteBuffer temp = ByteBuffer.wrap(lastRecordHeader);
temp.putInt(RECORD_MAGIC_NUMBER);
temp.putInt(0);
temp.putShort(STATE_LAST);
temp.putShort(RESERVED_SHORT);
allocated = new HashSet(1000);
freeMap = new TreeMap();
}
protected boolean isMinimumWrites() {
return isMinimumWrites;
}
public File getFile() {
return backingFile;
}
/**
* Return the number of allocated records currently in the file.
*/
public short getFileVersion() {
checkOpen();
return fileversion;
}
/**
* Open and load the backing file.
* If the backing file does not exist, it will be created and it
* size set to the initial file size. Otherwise, all records
* (allocated or free) will be loaded in memory.
*/
public abstract void open() throws IOException, VRFileWarning;
// Close the VRFile and free up any resources
public abstract void close();
protected void reset() {
allocated.clear();
freeMap.clear();
opened = false;
numFree = 0;
bytesAllocated = 0;
hits = 0;
misses = 0;
}
/**
* Set safe.
* If true do all we can to ensure data is persisted to disk as
* soon as possible. This sacrifices performance for safety.
* It is still up to the caller to invoke VRecord.force() to
* force their writes to disk.
*/
public void setSafe(boolean safe) {
this.safe = safe;
}
public boolean getSafe() {
return this.safe;
}
/**
* Set the block size to use. Every record will be a multiple of
* this. Default is 128 bytes. You may change the block size at any time.
* We round up to the block size since it is a way for us to improve
* the likelyhood of similar sized requests reusing buffers.
*/
public void setBlockSize(int n) {
if (n <= 0) {
throw new IllegalArgumentException(
"Block size must be postive. Illegal block size: " + n);
}
blockSize = n;
}
public int getBlockSize() {
return blockSize;
}
/**
* The amount the file grows by when more backing storage is needed.
* Default is 0.5.
*/
public void setGrowthFactor(float n) {
if (n <= 0) {
throw new IllegalArgumentException(
"Growth factor must be postive. Illegal growth factor: " + n);
}
growthFactor = n;
}
public float getGrowthFactor() {
return growthFactor;
}
/**
* Return the number of allocated records currently in the file.
*/
public int getNRecords() {
checkOpen();
return allocated.size();
}
/**
* Return the number of free records currently in the file
*/
public int getNFreeRecords() {
checkOpen();
return numFree;
}
/**
* Get number of allocated bytes
*/
public long getBytesUsed() {
checkOpen();
return bytesAllocated;
}
/**
* Get number of free bytes
* Note that filePointer is used instead of fileSize
*/
public long getBytesFree() {
checkOpen();
int offset = (fileversion == FILE_VERSION) ?
FILE_HEADER_SIZE : FILE_HEADER_SIZE_1;
return (filePointer - getBytesUsed() - offset);
}
/**
* Get number of hits. A hit is defined to be when we are able
* to satisfy an allocation request by using a record on the free list.
*/
public int getHits() {
return hits;
}
/**
* Get number of misses. A miss is defined to be when we must allocate
* a new record from the backing store.
*/
public int getMisses() {
return misses;
}
/**
* Get hit ratio. A value of 1 means we are always re-using records.
* A value of 0 means we are never re-using records. A value of 0.5
* means we are reusing records half of the time
*/
public float getHitRatio() {
if ((hits + misses) == 0)
return 0;
else
return (((float)(hits*1.00))/(hits+misses));
}
/**
* Get the fragmentation ratio. This returns some measurement of
* how badly the file is fragmented. A value of 1 means the file is
* totally fragmented. A value of 0 means the file is not fragmented
* at all. WARNING! This may be an expensive operation.
* [To be honest I'm not sure how to compute this, but we need something
* to give the caller an indication of when it is time to compact()].
*/
public float getFragmentationRatio() {
throw new UnsupportedOperationException();
}
/**
* Get the utilization ratio. This returns a number indicating
* how much of the file is used. A value of 1 means the file is
* 100% used. A value of 0 means the file is not used at all.
*/
public float getUtilizationRatio() {
// used/total
int offset = (fileversion == FILE_VERSION) ?
FILE_HEADER_SIZE : FILE_HEADER_SIZE_1;
long total = filePointer - offset;
if (total == 0) {
return 1;
} else {
float r = (((float)(getBytesUsed()*1.00))/total);
return r;
}
}
/**
* Get a map of what the record layout of the file looks like. The
* array represents the sequence of records. The value represents
* the record's size (capacity). If the value is positive the record
* allocated, if the value is negative the record is free.
* WARNING! This may be an expensive operation.
*/
public abstract int[] getMap() throws IOException;
/**
* Allocate a record of at least size "size". The actual size allocated
* will be a multiple of the block size and may be larger than requested.
* The actual size allocated can
* be determined by inspecting the returned records capacity().
*/
public abstract VRecord allocate(int size) throws IOException;
protected VRecord findFreeRecord(int size) {
// get it from free list
Integer cap = new Integer(size);
LinkedList list = (LinkedList)freeMap.get(cap);
if (list != null && !list.isEmpty()) {
return ((VRecord)list.removeLast());
} else {
// try next bigger free record
SortedMap tail = freeMap.tailMap(cap);
Iterator itr = tail.entrySet().iterator();
while (itr.hasNext()) {
Map.Entry entry = (Map.Entry)itr.next();
list = (LinkedList)entry.getValue();
if (list != null && !list.isEmpty()) {
return ((VRecord)list.removeLast());
}
}
return null;
}
}
/**
* Free a record.
*/
public synchronized void free(VRecord vr) throws IOException {
checkOpenAndWrite();
if (DEBUG) {
System.out.println("free record:"+vr);
}
boolean in = allocated.remove(vr);
if (!in) {
String msg = backingFile.toString() + ":" + vr;
throw new IllegalStateException(
SharedResources.getResources().getString(
SharedResources.E_UNRECOGNIZED_VRECORD,
msg));
}
bytesAllocated -= vr.getCapacity();
putFreeList(vr, false);
try {
vr.free();
if (safe) {
vr.force();
}
} catch (BufferOverflowException e) {
String errmsg = "Failed to free vrecord:" + backingFile.toString()
+ ":" + vr + ":";
throw new IOException(errmsg + e.toString());
}
}
/**
* Get all the allocated records in no particular order. This is
* typically called immediately after open() to get all records
* that contain data.
*/
public synchronized Set getRecords() {
checkOpen();
return (Set)allocated.clone();
}
/**
* Compact the file. This may be a very time consuming operation.
* compact() may only be called on a closed file. After compact()
* completes the file may be open()ed.
*/
public synchronized void compact() throws IOException, VRFileWarning {
if (opened) {
throw new IllegalStateException(
SharedResources.getResources().getString(
SharedResources.E_CANNOT_COMPACT_ON_OPENED_FILE));
}
if (fileversion < FILE_VERSION) {
throw new IllegalStateException(backingFile +
"Cannot compact a file of version " + fileversion);
}
if (!backingFile.exists() || backingFile.length() == 0) {
// nothing to do
return;
}
// backing file
RandomAccessFile from = new RandomAccessFile(backingFile, "r");
// check file header
byte[] barray = new byte[FILE_HEADER_SIZE];
int num = from.read(barray);
if (num != FILE_HEADER_SIZE) {
throw new IOException(
SharedResources.getResources().getString(
SharedResources.E_UNRECOGNIZED_VRFILE_FORMAT,
backingFile, new Integer(num)));
}
checkFileHeader(ByteBuffer.wrap(barray));
// temp file
File tempfile = new File(backingFile.getParentFile(),
(backingFile.getName() + ".temp"));
RandomAccessFile temp = new RandomAccessFile(tempfile, "rw");
// write header
temp.write(barray);
// write all allocated record to the temp file
int numtransferred = 0;
int numfreeskipped = 0;
long filelength = from.length();
long frompos = from.getFilePointer();
boolean done = false;
ByteBuffer recordheader = ByteBuffer.wrap(new byte[RECORD_HEADER_SIZE]);
while (!done) {
int capacity = 0;
short state = getRecordState(from, recordheader, frompos,
filelength);
switch (state) {
case STATE_ALLOCATED:
case STATE_PROPERTIES:
capacity = recordheader.getInt(RECORD_CAPACITY_OFFSET);
byte buf[] = new byte[capacity];
from.seek(frompos);
from.read(buf);
temp.write(buf);
numtransferred++;
break;
case STATE_FREE:
capacity = recordheader.getInt(RECORD_CAPACITY_OFFSET);
numfreeskipped++;
break;
case _STATE_LAST:
writeLastRecordHeader(temp);
done = true;
break;
case STATE_BAD_MAGIC_NUMBER:
case STATE_BAD_NEXT_MAGIC_NUMBER:
case STATE_BAD_STATE:
case STATE_BAD_CAPACITY:
case STATE_BAD_CAPACITY_TOO_SMALL:
case STATE_BAD_TRUNCATED_HEADER:
// skip over the bad ones and just continue with the next
// good one if one can be found
long nextstart = findGoodRecord(from, frompos, filelength);
if (nextstart == filelength) {
// no more good record found; write last record header
writeLastRecordHeader(temp);
done = true;
} else {
frompos = nextstart;
}
addCompactWarning(getNewWarning(), state, frompos,
recordheader, nextstart);
break;
}
frompos += capacity;
from.seek(frompos);
}
temp.close();
from.close();
if (DEBUG) {
System.out.println("compact(): size of original file is " +
backingFile.length());
System.out.println("compact(): size of new file is " +
tempfile.length());
}
// rename backingFile to backupfile
File backupFile = new File(backingFile.getParentFile(),
(backingFile.getName() + ".bak"));
if (!backingFile.renameTo(backupFile)) {
throw new IOException(
SharedResources.getResources().getString(
SharedResources.E_RENAME_TO_BACKUP_FILE_FAILED,
backingFile, backupFile));
}
// rename tempfile to backingFile
if (!tempfile.renameTo(backingFile)) {
throw new IOException(
SharedResources.getResources().getString(
SharedResources.E_RENAME_TO_BACKING_FILE_FAILED,
tempfile, backingFile));
}
// get rid of the backupfile
if (!backupFile.delete()) {
throw new IOException(
SharedResources.getResources().getString(
SharedResources.E_DELETE_BACKUP_FILE_FAILED,
backupFile));
}
// reset counts
hits = 0;
misses = 0;
if (DEBUG) {
System.out.println("compact(): number of records written is "+
numtransferred);
System.out.println("compact(): number of free records skipped is "+
numfreeskipped);
}
// make sure warning is thrown at last so that
// all processing can be done
if (warning != null) {
throw warning;
}
}
// this method returns a state value that is independent of the file
// version, in specific, the state of last record is converted to
// _STATE_LAST
// it also makes sure that the state is valid
protected short adjustRecordState(short fversion, short s) {
if (fversion == FILE_VERSION_1) {
if ((s < STATE_LAST_1) || (s > STATE_ALLOCATED_1)) {
return STATE_BAD_STATE;
}
} else { // file version 2 cases
if ((s < STATE_FREE) || (s > STATE_PROPERTIES)) {
return STATE_BAD_STATE;
}
}
if (fversion == FILE_VERSION_1 && s == STATE_LAST_1) {
return _STATE_LAST;
} else if (fversion == FILE_VERSION && s == STATE_LAST) {
return _STATE_LAST;
} else {
return s;
}
}
// file pointer is at the record to be checked
// pos and limit are passed in to avoid invoking RandomAccessFile methods
short getRecordState(RandomAccessFile file, ByteBuffer recordheader,
long pos, long limit) throws IOException {
// read record header
int n = file.read(recordheader.array());
if (n != RECORD_HEADER_SIZE) {
return STATE_BAD_TRUNCATED_HEADER;
}
recordheader.rewind();
int magic = recordheader.getInt();
int capacity = recordheader.getInt();
short state = adjustRecordState(fileversion, recordheader.getShort());
if (magic != RECORD_MAGIC_NUMBER) {
// 1. check record magic number first
if (DEBUG) {
System.out.println(
"BAD RECORD("+pos+"): BAD MAGIC NUMER:"+magic);
}
return STATE_BAD_MAGIC_NUMBER;
} else if (state == STATE_BAD_STATE) {
// 2. check state
return state;
} else if (state == _STATE_LAST) {
// 3. if this is the last record do some sanity check
if (capacity != 0) {
if (DEBUG) {
System.out.println(
"BAD RECORD("+pos+"): LAST RECORD WOTH CAP="+capacity);
}
return STATE_BAD_CAPACITY;
} else {
return state;
}
} else if (capacity <= RECORD_HEADER_SIZE) {
// 4. do more sanity check
if (DEBUG) {
System.out.println(
"BAD RECORD("+pos+"): CAP