org.apache.jackrabbit.core.data.FileDataStore Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.core.data;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.lang.ref.WeakReference;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Simple file-based data store. Data records are stored as normal files
* named using a message digest of the contained binary stream.
*
* Configuration:
*
* <DataStore class="org.apache.jackrabbit.core.data.FileDataStore">
* <param name="{@link #setPath(String) path}" value="/data/datastore"/>
* <param name="{@link #setMinRecordLength(int) minRecordLength}" value="1024"/>
* </DataStore>
*
*
* If the directory is not set, the directory <repository home>/repository/datastore is used.
*
* A three level directory structure is used to avoid placing too many
* files in a single directory. The chosen structure is designed to scale
* up to billions of distinct records.
*
* This implementation relies on the underlying file system to support
* atomic O(1) move operations with {@link File#renameTo(File)}.
*/
public class FileDataStore extends AbstractDataStore
implements MultiDataStoreAware {
/**
* Logger instance
*/
private static Logger log = LoggerFactory.getLogger(FileDataStore.class);
/**
* The default value for the minimum object size.
*/
private static final int DEFAULT_MIN_RECORD_LENGTH = 100;
/**
* The maximum last modified time resolution of the file system.
*/
private static final int ACCESS_TIME_RESOLUTION = 2000;
/**
* Name of the directory used for temporary files.
* Must be at least 3 characters.
*/
private static final String TMP = "tmp";
/**
* The minimum modified date. If a file is accessed (read or write) with a modified date
* older than this value, the modified date is updated to the current time.
*/
private volatile long minModifiedDate;
/**
* The directory that contains all the data record files. The structure
* of content within this directory is controlled by this class.
*/
private File directory;
/**
* The name of the directory that contains all the data record files. The structure
* of content within this directory is controlled by this class.
*/
private String path;
/**
* The minimum size of an object that should be stored in this data store.
*/
private int minRecordLength = DEFAULT_MIN_RECORD_LENGTH;
/**
* All data identifiers that are currently in use are in this set until they are garbage collected.
*/
protected Map> inUse =
Collections.synchronizedMap(new WeakHashMap>());
/**
* Initialized the data store.
* If the path is not set, <repository home>/repository/datastore is used.
* This directory is automatically created if it does not yet exist.
*
* @param homeDir
*/
public void init(String homeDir) {
if (path == null) {
path = homeDir + "/repository/datastore";
}
directory = new File(path);
directory.mkdirs();
}
/**
* Get a data record for the given identifier.
*
* @param identifier the identifier
* @return the data record or null
*/
public DataRecord getRecordIfStored(DataIdentifier identifier) throws DataStoreException {
File file = getFile(identifier);
if (!file.exists()) {
return null;
}
if (minModifiedDate != 0) {
// only check when running garbage collection
synchronized (this) {
if (getLastModified(file) < minModifiedDate) {
setLastModified(file, System.currentTimeMillis() + ACCESS_TIME_RESOLUTION);
}
}
}
usesIdentifier(identifier);
return new FileDataRecord(this, identifier, file);
}
private void usesIdentifier(DataIdentifier identifier) {
inUse.put(identifier, new WeakReference(identifier));
}
/**
* Creates a new data record.
* The stream is first consumed and the contents are saved in a temporary file
* and the {@link #DIGEST} message digest of the stream is calculated. If a
* record with the same {@link #DIGEST} digest (and length) is found then it is
* returned. Otherwise the temporary file is moved in place to become
* the new data record that gets returned.
*
* @param input binary stream
* @return data record that contains the given stream
* @throws DataStoreException if the record could not be created
*/
public DataRecord addRecord(InputStream input) throws DataStoreException {
File temporary = null;
try {
temporary = newTemporaryFile();
DataIdentifier tempId = new DataIdentifier(temporary.getName());
usesIdentifier(tempId);
// Copy the stream to the temporary file and calculate the
// stream length and the message digest of the stream
long length = 0;
MessageDigest digest = MessageDigest.getInstance(DIGEST);
OutputStream output = new DigestOutputStream(
new FileOutputStream(temporary), digest);
try {
length = IOUtils.copyLarge(input, output);
} finally {
output.close();
}
DataIdentifier identifier =
new DataIdentifier(encodeHexString(digest.digest()));
File file;
synchronized (this) {
// Check if the same record already exists, or
// move the temporary file in place if needed
usesIdentifier(identifier);
file = getFile(identifier);
if (!file.exists()) {
File parent = file.getParentFile();
parent.mkdirs();
if (temporary.renameTo(file)) {
// no longer need to delete the temporary file
temporary = null;
} else {
throw new IOException(
"Can not rename " + temporary.getAbsolutePath()
+ " to " + file.getAbsolutePath()
+ " (media read only?)");
}
} else {
long now = System.currentTimeMillis();
if (getLastModified(file) < now + ACCESS_TIME_RESOLUTION) {
setLastModified(file, now + ACCESS_TIME_RESOLUTION);
}
}
if (file.length() != length) {
// Sanity checks on the record file. These should never fail,
// but better safe than sorry...
if (!file.isFile()) {
throw new IOException("Not a file: " + file);
}
throw new IOException(DIGEST + " collision: " + file);
}
}
// this will also make sure that
// tempId is not garbage collected until here
inUse.remove(tempId);
return new FileDataRecord(this, identifier, file);
} catch (NoSuchAlgorithmException e) {
throw new DataStoreException(DIGEST + " not available", e);
} catch (IOException e) {
throw new DataStoreException("Could not add record", e);
} finally {
if (temporary != null) {
temporary.delete();
}
}
}
/**
* Returns the identified file. This method implements the pattern
* used to avoid problems with too many files in a single directory.
*
* No sanity checks are performed on the given identifier.
*
* @param identifier data identifier
* @return identified file
*/
private File getFile(DataIdentifier identifier) {
usesIdentifier(identifier);
String string = identifier.toString();
File file = directory;
file = new File(file, string.substring(0, 2));
file = new File(file, string.substring(2, 4));
file = new File(file, string.substring(4, 6));
return new File(file, string);
}
/**
* Returns a unique temporary file to be used for creating a new
* data record.
*
* @return temporary file
* @throws IOException
*/
private File newTemporaryFile() throws IOException {
// the directory is already created in the init method
return File.createTempFile(TMP, null, directory);
}
public void updateModifiedDateOnAccess(long before) {
minModifiedDate = before;
}
public void deleteRecord(DataIdentifier identifier)
throws DataStoreException {
File file = getFile(identifier);
synchronized (this) {
if (file.exists()) {
if (file.delete()) {
deleteEmptyParentDirs(file);
} else {
log.warn("Failed to delete file " + file.getAbsolutePath());
}
}
}
}
private void deleteEmptyParentDirs(File file) {
File parent = file.getParentFile();
try {
// Only iterate & delete if parent directory of the blob file is child
// of the base directory and if it is empty
while (FileUtils.directoryContains(directory, parent)) {
String[] entries = parent.list();
if (entries == null) {
log.warn("Failed to list directory {}", parent.getAbsolutePath());
break;
}
if (entries.length > 0) {
break;
}
boolean deleted = parent.delete();
log.debug("Deleted parent [{}] of file [{}]: {}",
new Object[]{parent, file.getAbsolutePath(), deleted});
parent = parent.getParentFile();
}
} catch (IOException e) {
log.warn("Error in parents deletion for " + file.getAbsoluteFile(), e);
}
}
public int deleteAllOlderThan(long min) {
int count = 0;
for (File file : directory.listFiles()) {
if (file.isDirectory()) { // skip top-level files
count += deleteOlderRecursive(file, min);
}
}
return count;
}
private int deleteOlderRecursive(File file, long min) {
int count = 0;
if (file.isFile() && file.exists() && file.canWrite()) {
synchronized (this) {
long lastModified;
try {
lastModified = getLastModified(file);
} catch (DataStoreException e) {
log.warn("Failed to read modification date; file not deleted", e);
// don't delete the file, since the lastModified date is uncertain
lastModified = min;
}
if (lastModified < min) {
DataIdentifier id = new DataIdentifier(file.getName());
if (!inUse.containsKey(id)) {
if (log.isInfoEnabled()) {
log.info("Deleting old file " + file.getAbsolutePath() +
" modified: " + new Timestamp(lastModified).toString() +
" length: " + file.length());
}
if (!file.delete()) {
log.warn("Failed to delete old file " + file.getAbsolutePath());
}
count++;
}
}
}
} else if (file.isDirectory()) {
File[] list = file.listFiles();
if (list != null) {
for (File f: list) {
count += deleteOlderRecursive(f, min);
}
}
// JCR-1396: FileDataStore Garbage Collector and empty directories
// Automatic removal of empty directories (but not the root!)
synchronized (this) {
list = file.listFiles();
if (list != null && list.length == 0) {
file.delete();
}
}
}
return count;
}
private void listRecursive(List list, File file) {
File[] files = file.listFiles();
if (files != null) {
for (File f : files) {
if (f.isDirectory()) {
listRecursive(list, f);
} else {
list.add(f);
}
}
}
}
public Iterator getAllIdentifiers() {
ArrayList files = new ArrayList();
for (File file : directory.listFiles()) {
if (file.isDirectory()) { // skip top-level files
listRecursive(files, file);
}
}
ArrayList identifiers = new ArrayList();
for (File f: files) {
String name = f.getName();
identifiers.add(new DataIdentifier(name));
}
log.debug("Found " + identifiers.size() + " identifiers.");
return identifiers.iterator();
}
public void clearInUse() {
inUse.clear();
}
/**
* Get the name of the directory where this data store keeps the files.
*
* @return the full path name
*/
public String getPath() {
return path;
}
/**
* Set the name of the directory where this data store keeps the files.
*
* @param directoryName the path name
*/
public void setPath(String directoryName) {
this.path = directoryName;
}
public int getMinRecordLength() {
return minRecordLength;
}
/**
* Set the minimum object length.
*
* @param minRecordLength the length
*/
public void setMinRecordLength(int minRecordLength) {
this.minRecordLength = minRecordLength;
}
public void close() {
// nothing to do
}
//---------------------------------------------------------< protected >--
@Override
protected byte[] getOrCreateReferenceKey() throws DataStoreException {
File file = new File(directory, "reference.key");
try {
if (file.exists()) {
return FileUtils.readFileToByteArray(file);
} else {
byte[] key = super.getOrCreateReferenceKey();
FileUtils.writeByteArrayToFile(file, key);
return key;
}
} catch (IOException e) {
throw new DataStoreException(
"Unable to access reference key file " + file.getPath(), e);
}
}
//-----------------------------------------------------------< private >--
/**
* Get the last modified date of a file.
*
* @param file the file
* @return the last modified date
* @throws DataStoreException if reading fails
*/
private static long getLastModified(File file) throws DataStoreException {
long lastModified = file.lastModified();
if (lastModified == 0) {
throw new DataStoreException("Failed to read record modified date: " + file.getAbsolutePath());
}
return lastModified;
}
/**
* Set the last modified date of a file, if the file is writable.
*
* @param file the file
* @param time the new last modified date
* @throws DataStoreException if the file is writable but modifying the date fails
*/
private static void setLastModified(File file, long time) throws DataStoreException {
if (!file.setLastModified(time)) {
if (!file.canWrite()) {
// if we can't write to the file, so garbage collection will also not delete it
// (read only files or file systems)
return;
}
try {
// workaround for Windows: if the file is already open for reading
// (in this or another process), then setting the last modified date
// doesn't work - see also JCR-2872
RandomAccessFile r = new RandomAccessFile(file, "rw");
try {
r.setLength(r.length());
} finally {
r.close();
}
} catch (IOException e) {
throw new DataStoreException("An IO Exception occurred while trying to set the last modified date: " + file.getAbsolutePath(), e);
}
}
}
}