All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.jackrabbit.oak.plugins.index.lucene;

import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executor;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;

import javax.management.openmbean.CompositeDataSupport;
import javax.management.openmbean.CompositeType;
import javax.management.openmbean.OpenDataException;
import javax.management.openmbean.OpenType;
import javax.management.openmbean.SimpleType;
import javax.management.openmbean.TabularData;
import javax.management.openmbean.TabularDataSupport;
import javax.management.openmbean.TabularType;

import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.hash.Hashing;

import org.apache.commons.io.FileUtils;
import org.apache.jackrabbit.oak.commons.IOUtils;
import org.apache.jackrabbit.oak.commons.concurrent.NotifyingFutureTask;
import org.apache.jackrabbit.oak.util.PerfLogger;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NoLockFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Iterables.toArray;
import static com.google.common.collect.Iterables.transform;
import static com.google.common.collect.Maps.newConcurrentMap;
import static com.google.common.collect.Maps.newHashMap;
import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount;

public class IndexCopier implements CopyOnReadStatsMBean, Closeable {
    private static final Set REMOTE_ONLY = ImmutableSet.of("segments.gen");
    private static final int MAX_FAILURE_ENTRIES = 10000;
    private static final AtomicInteger UNIQUE_COUNTER = new AtomicInteger();
    private static final String WORK_DIR_NAME = "indexWriterDir";

    private final Logger log = LoggerFactory.getLogger(getClass());
    private final PerfLogger PERF_LOGGER = new PerfLogger(LoggerFactory.getLogger(log.getName() + ".perf"));
    private final Executor executor;
    private final File indexRootDir;
    private final File indexWorkDir;

    private final AtomicInteger readerLocalReadCount = new AtomicInteger();
    private final AtomicInteger writerLocalReadCount = new AtomicInteger();
    private final AtomicInteger readerRemoteReadCount = new AtomicInteger();
    private final AtomicInteger writerRemoteReadCount = new AtomicInteger();
    private final AtomicInteger invalidFileCount = new AtomicInteger();
    private final AtomicInteger deletedFileCount = new AtomicInteger();
    private final AtomicInteger scheduledForCopyCount = new AtomicInteger();
    private final AtomicInteger copyInProgressCount = new AtomicInteger();
    private final AtomicInteger maxCopyInProgressCount = new AtomicInteger();
    private final AtomicInteger maxScheduledForCopyCount = new AtomicInteger();
    private final AtomicInteger uploadCount = new AtomicInteger();
    private final AtomicInteger downloadCount = new AtomicInteger();
    private final AtomicLong copyInProgressSize = new AtomicLong();
    private final AtomicLong downloadSize = new AtomicLong();
    private final AtomicLong uploadSize = new AtomicLong();
    private final AtomicLong garbageCollectedSize = new AtomicLong();
    private final AtomicLong skippedFromUploadSize = new AtomicLong();
    private final AtomicLong downloadTime = new AtomicLong();
    private final AtomicLong uploadTime = new AtomicLong();


    private final Map indexPathMapping = newConcurrentMap();
    private final Map> sharedWorkingSetMap = newHashMap();
    private final Map indexPathVersionMapping = newConcurrentMap();
    private final ConcurrentMap failedToDeleteFiles = newConcurrentMap();
    private final Set copyInProgressFiles = Collections.newSetFromMap(new ConcurrentHashMap());
    private final boolean prefetchEnabled;
    private volatile boolean closed;

    public IndexCopier(Executor executor, File indexRootDir) throws IOException {
        this(executor, indexRootDir, false);
    }

    public IndexCopier(Executor executor, File indexRootDir, boolean prefetchEnabled) throws IOException {
        this.executor = executor;
        this.indexRootDir = indexRootDir;
        this.prefetchEnabled = prefetchEnabled;
        this.indexWorkDir = initializerWorkDir(indexRootDir);
    }

    public Directory wrapForRead(String indexPath, IndexDefinition definition,
            Directory remote) throws IOException {
        Directory local = createLocalDirForIndexReader(indexPath, definition);
        return new CopyOnReadDirectory(remote, local, prefetchEnabled, indexPath, getSharedWorkingSet(definition));
    }

    public Directory wrapForWrite(IndexDefinition definition, Directory remote, boolean reindexMode) throws IOException {
        Directory local = createLocalDirForIndexWriter(definition);
        return new CopyOnWriteDirectory(remote, local, reindexMode,
                getIndexPathForLogging(definition), getSharedWorkingSet(definition));
    }

    @Override
    public void close() throws IOException {
        this.closed = true;
    }

    File getIndexWorkDir() {
        return indexWorkDir;
    }

    File getIndexRootDir() {
        return indexRootDir;
    }

    protected Directory createLocalDirForIndexWriter(IndexDefinition definition) throws IOException {
        String indexPath = definition.getIndexPathFromConfig();
        File indexWriterDir;
        if (indexPath == null){
            //If indexPath is not known create a unique directory for work
            indexWriterDir = new File(indexWorkDir, String.valueOf(UNIQUE_COUNTER.incrementAndGet()));
        } else {
            File indexDir = getIndexDir(indexPath);
            String newVersion = String.valueOf(definition.getReindexCount());
            indexWriterDir = getVersionedDir(indexPath, indexDir, newVersion);
        }

        //By design indexing in Oak is single threaded so Lucene locking
        //can be disabled
        Directory dir = FSDirectory.open(indexWriterDir, NoLockFactory.getNoLockFactory());

        log.debug("IndexWriter would use {}", indexWriterDir);

        if (indexPath == null) {
            dir = new DeleteOldDirOnClose(dir, indexWriterDir);
            log.debug("IndexPath [{}] not configured in index definition {}. Writer would create index " +
                    "files in temporary dir {} which would be deleted upon close. For better performance do " +
                    "configure the 'indexPath' as part of your index definition", LuceneIndexConstants.INDEX_PATH,
                    definition, indexWriterDir);
        }
        return dir;
    }

    protected Directory createLocalDirForIndexReader(String indexPath, IndexDefinition definition) throws IOException {
        File indexDir = getIndexDir(indexPath);
        String newVersion = String.valueOf(definition.getReindexCount());
        File versionedIndexDir = getVersionedDir(indexPath, indexDir, newVersion);
        Directory result = FSDirectory.open(versionedIndexDir);

        String oldVersion = indexPathVersionMapping.put(indexPath, newVersion);
        if (!newVersion.equals(oldVersion) && oldVersion != null) {
            result = new DeleteOldDirOnClose(result, new File(indexDir, oldVersion));
        }
        return result;
    }

    private File getVersionedDir(String indexPath, File indexDir, String newVersion) {
        File versionedIndexDir = new File(indexDir, newVersion);
        if (!versionedIndexDir.exists()) {
            checkState(versionedIndexDir.mkdirs(), "Cannot create directory %s", versionedIndexDir);
        }
        indexPathMapping.put(indexPath, indexDir.getAbsolutePath());
        return versionedIndexDir;
    }

    public File getIndexDir(String indexPath) {
        String subDir = Hashing.sha256().hashString(indexPath, Charsets.UTF_8).toString();
        return new File(indexRootDir, subDir);
    }

    Map getFailedToDeleteFiles() {
        return Collections.unmodifiableMap(failedToDeleteFiles);
    }

    private void failedToDelete(LocalIndexFile file){
        //Limit the size on best effort basis
        if (failedToDeleteFiles.size() < MAX_FAILURE_ENTRIES) {
            LocalIndexFile failedToDeleteFile = failedToDeleteFiles.putIfAbsent(file.getKey(), file);
            if (failedToDeleteFile == null){
                failedToDeleteFile = file;
            }
            failedToDeleteFile.incrementAttemptToDelete();
        } else {
            log.warn("Not able to delete {}. Currently more than {} file with total size {} are pending delete.",
                    file.deleteLog(), failedToDeleteFiles.size(), getGarbageSize());
        }
    }

    private void successfullyDeleted(LocalIndexFile file, boolean fileExisted){
        LocalIndexFile failedToDeleteFile = failedToDeleteFiles.remove(file.getKey());
        if (failedToDeleteFile != null){
            log.debug("Deleted : {}", failedToDeleteFile.deleteLog());
        }

        if (fileExisted){
            garbageCollectedSize.addAndGet(file.size);
            deletedFileCount.incrementAndGet();
        }
    }

    /**
     * Provide the corresponding shared state to enable COW inform COR
     * about new files it is creating while indexing. This would allow COR to ignore
     * such files while determining the deletion candidates.
     *
     * @param defn index definition for which the directory is being created
     * @return a set to maintain the state of new files being created by the COW Directory
     */
    private Set getSharedWorkingSet(IndexDefinition defn){
        String indexPath = defn.getIndexPathFromConfig();

        if (indexPath == null){
            //With indexPath null the working directory would not
            //be shared between COR and COW. So just return a new set
            return new HashSet();
        }

        Set sharedSet;
        synchronized (sharedWorkingSetMap){
            sharedSet = sharedWorkingSetMap.get(indexPath);
            if (sharedSet == null){
                sharedSet = Sets.newConcurrentHashSet();
                sharedWorkingSetMap.put(indexPath, sharedSet);
            }
        }
        return sharedSet;
    }

    /**
     * Creates the workDir. If it exists then it is cleaned
     *
     * @param indexRootDir root directory under which all indexing related files are managed
     * @return work directory. Always empty
     */
    private static File initializerWorkDir(File indexRootDir) throws IOException {
        File workDir = new File(indexRootDir, WORK_DIR_NAME);
        FileUtils.deleteDirectory(workDir);
        checkState(workDir.mkdirs(), "Cannot create directory %s", workDir);
        return workDir;
    }

    private static String getIndexPathForLogging(IndexDefinition defn){
        String indexPath = defn.getIndexPathFromConfig();
        if (indexPath == null){
            return "UNKNOWN";
        }
        return indexPath;
    }

    /**
     * Directory implementation which lazily copies the index files from a
     * remote directory in background.
     */
    class CopyOnReadDirectory extends FilterDirectory {
        private final Directory remote;
        private final Directory local;
        private final String indexPath;

        private final ConcurrentMap files = newConcurrentMap();
        /**
         * Set of fileNames bound to current local dir. It is updated with any new file
         * which gets added by this directory
         */
        private final Set localFileNames = Sets.newConcurrentHashSet();

        public CopyOnReadDirectory(Directory remote, Directory local, boolean prefetch,
                                   String indexPath, Set sharedWorkingSet) throws IOException {
            super(remote);
            this.remote = remote;
            this.local = local;
            this.indexPath = indexPath;

            this.localFileNames.addAll(Arrays.asList(local.listAll()));
            //Remove files which are being worked upon by COW
            this.localFileNames.removeAll(sharedWorkingSet);

            if (prefetch) {
                prefetchIndexFiles();
            }
        }

        @Override
        public void deleteFile(String name) throws IOException {
            throw new UnsupportedOperationException("Cannot delete in a ReadOnly directory");
        }

        @Override
        public IndexOutput createOutput(String name, IOContext context) throws IOException {
            throw new UnsupportedOperationException("Cannot write in a ReadOnly directory");
        }

        @Override
        public IndexInput openInput(String name, IOContext context) throws IOException {
            if (REMOTE_ONLY.contains(name)) {
                log.trace("[{}] opening remote only file {}", indexPath, name);
                return remote.openInput(name, context);
            }

            CORFileReference ref = files.get(name);
            if (ref != null) {
                if (ref.isLocalValid()) {
                    log.trace("[{}] opening existing local file {}", indexPath, name);
                    return files.get(name).openLocalInput(context);
                } else {
                    readerRemoteReadCount.incrementAndGet();
                    log.trace(
                            "[{}] opening existing remote file as local version is not valid {}",
                            indexPath, name);
                    return remote.openInput(name, context);
                }
            }

            //If file does not exist then just delegate to remote and not
            //schedule a copy task
            if (!remote.fileExists(name)){
                if (log.isDebugEnabled()) {
                    log.debug("[{}] Looking for non existent file {}. Current known files {}",
                            indexPath, name, Arrays.toString(remote.listAll()));
                }
                return remote.openInput(name, context);
            }

            CORFileReference toPut = new CORFileReference(name);
            CORFileReference old = files.putIfAbsent(name, toPut);
            if (old == null) {
                log.trace("[{}] scheduled local copy for {}", indexPath, name);
                copy(toPut);
            }

            //If immediate executor is used the result would be ready right away
            if (toPut.isLocalValid()) {
                log.trace("[{}] opening new local file {}", indexPath, name);
                return toPut.openLocalInput(context);
            }

            log.trace("[{}] opening new remote file {}", indexPath, name);
            readerRemoteReadCount.incrementAndGet();
            return remote.openInput(name, context);
        }

        Directory getLocal() {
            return local;
        }

        private void copy(final CORFileReference reference) {
            updateMaxScheduled(scheduledForCopyCount.incrementAndGet());
            executor.execute(new Runnable() {
                @Override
                public void run() {
                    scheduledForCopyCount.decrementAndGet();
                    copyFilesToLocal(reference, true, true);
                }
            });
        }

        private void prefetchIndexFiles() throws IOException {
            long start = PERF_LOGGER.start();
            long totalSize = 0;
            int copyCount = 0;
            List copiedFileNames = Lists.newArrayList();
            for (String name : remote.listAll()) {
                if (REMOTE_ONLY.contains(name)) {
                    continue;
                }
                CORFileReference fileRef = new CORFileReference(name);
                files.putIfAbsent(name, fileRef);
                long fileSize = copyFilesToLocal(fileRef, false, false);
                if (fileSize > 0) {
                    copyCount++;
                    totalSize += fileSize;
                    copiedFileNames.add(name);
                }
            }

            local.sync(copiedFileNames);
            PERF_LOGGER.end(start, -1, "[{}] Copied {} files totaling {}", indexPath, copyCount, humanReadableByteCount(totalSize));
        }

        private long copyFilesToLocal(CORFileReference reference, boolean sync, boolean logDuration) {
            String name = reference.name;
            boolean success = false;
            boolean copyAttempted = false;
            long fileSize = 0;
            try {
                if (!local.fileExists(name)) {
                    long perfStart = -1;
                    if (logDuration) {
                        perfStart = PERF_LOGGER.start();
                    }

                    fileSize = remote.fileLength(name);
                    LocalIndexFile file = new LocalIndexFile(local, name, fileSize, true);
                    long start = startCopy(file);
                    copyAttempted = true;

                    remote.copy(local, name, name, IOContext.READ);
                    reference.markValid();

                    if (sync) {
                        local.sync(Collections.singleton(name));
                    }

                    doneCopy(file, start);
                    if (logDuration) {
                        PERF_LOGGER.end(perfStart, 0,
                                "[{}] Copied file {} of size {}", indexPath,
                                name, humanReadableByteCount(fileSize));
                    }
                } else {
                    long localLength = local.fileLength(name);
                    long remoteLength = remote.fileLength(name);

                    //Do a simple consistency check. Ideally Lucene index files are never
                    //updated but still do a check if the copy is consistent
                    if (localLength != remoteLength) {
                        log.warn("[{}] Found local copy for {} in {} but size of local {} differs from remote {}. " +
                                        "Content would be read from remote file only",
                                indexPath, name, local, localLength, remoteLength);
                        invalidFileCount.incrementAndGet();
                    } else {
                        reference.markValid();
                        log.trace("[{}] found local copy of file {}",
                                indexPath, name);
                    }
                }
                success = true;
            } catch (IOException e) {
                //TODO In case of exception there would not be any other attempt
                //to download the file. Look into support for retry
                log.warn("[{}] Error occurred while copying file [{}] from {} to {}", indexPath, name, remote, local, e);
            } finally {
                if (copyAttempted && !success){
                    try {
                        if (local.fileExists(name)) {
                            local.deleteFile(name);
                        }
                    } catch (IOException e) {
                        log.warn("[{}] Error occurred while deleting corrupted file [{}] from [{}]", indexPath, name, local, e);
                    }
                }
            }
            return fileSize;
        }

        /**
         * On close file which are not present in remote are removed from local.
         * CopyOnReadDir is opened at different revisions of the index state
         *
         * CDir1 - V1
         * CDir2 - V2
         *
         * Its possible that two different IndexSearcher are opened at same local
         * directory but pinned to different revisions. So while removing it must
         * be ensured that any currently opened IndexSearcher does not get affected.
         * The way IndexSearchers get created in IndexTracker it ensures that new searcher
         * pinned to newer revision gets opened first and then existing ones are closed.
         *
         *
         * @throws IOException
         */
        @Override
        public void close() throws IOException {
            //Always remove old index file on close as it ensures that
            //no other IndexSearcher are opened with previous revision of Index due to
            //way IndexTracker closes IndexNode. At max there would be only two IndexNode
            //opened pinned to different revision of same Lucene index
            executor.execute(new Runnable() {
                @Override
                public void run() {
                    try{
                        removeDeletedFiles();
                    } catch (IOException e) {
                        log.warn(
                                "[{}] Error occurred while removing deleted files from Local {}, Remote {}",
                                indexPath, local, remote, e);
                    }

                    try {
                        //This would also remove old index files if current
                        //directory was based on newerRevision as local would
                        //be of type DeleteOldDirOnClose
                        local.close();
                        remote.close();
                    } catch (IOException e) {
                        log.warn(
                                "[{}] Error occurred while closing directory ",
                                indexPath, e);
                    }
                }
            });
        }

        @Override
        public String toString() {
            return String.format("[COR] Local %s, Remote %s", local, remote);
        }

        private void removeDeletedFiles() throws IOException {
            //Files present in dest but not present in source have to be deleted
            Set filesToBeDeleted = Sets.difference(
                    ImmutableSet.copyOf(localFileNames),
                    ImmutableSet.copyOf(remote.listAll())
            );

            Set failedToDelete = Sets.newHashSet();

            for (String fileName : filesToBeDeleted) {
                boolean deleted = IndexCopier.this.deleteFile(local, fileName, true);
                if (!deleted){
                    failedToDelete.add(fileName);
                }
            }

            filesToBeDeleted = new HashSet(filesToBeDeleted);
            filesToBeDeleted.removeAll(failedToDelete);
            if(!filesToBeDeleted.isEmpty()) {
                log.debug(
                        "[{}] Following files have been removed from Lucene index directory {}",
                        indexPath, filesToBeDeleted);
            }
        }

        private class CORFileReference {
            final String name;
            private volatile boolean valid;

            private CORFileReference(String name) {
                this.name = name;
            }

            boolean isLocalValid(){
                return valid;
            }

            IndexInput openLocalInput( IOContext context) throws IOException {
                readerLocalReadCount.incrementAndGet();
                return local.openInput(name, context);
            }

            void markValid(){
                this.valid = true;
                localFileNames.add(name);
            }
        }
    }

    private class CopyOnWriteDirectory extends FilterDirectory {
        /**
         * Signal for the background thread to stop processing changes.
         */
        private final Callable STOP = new Callable() {
            @Override
            public Void call() throws Exception {
                return null;
            }
        };
        private final Directory remote;
        private final Directory local;
        private final ConcurrentMap fileMap = newConcurrentMap();
        private final Set deletedFilesLocal = Sets.newConcurrentHashSet();
        private final Set skippedFiles = Sets.newConcurrentHashSet();

        private final BlockingQueue> queue = new LinkedBlockingQueue>();
        private final AtomicReference errorInCopy = new AtomicReference();
        private final CountDownLatch copyDone = new CountDownLatch(1);
        private final boolean reindexMode;
        private final String indexPathForLogging;
        private final Set sharedWorkingSet;

        /**
         * Current background task
         */
        private volatile NotifyingFutureTask currentTask =  NotifyingFutureTask.completed();

        /**
         * Completion handler: set the current task to the next task and schedules that one
         * on the background thread.
         */
        private final Runnable completionHandler = new Runnable() {
            Callable task = new Callable() {
                @Override
                public Void call() throws Exception {
                    try {
                        Callable task = queue.poll();
                        if (task != null && task != STOP) {
                            if (errorInCopy.get() != null) {
                                log.trace("[COW][{}] Skipping task {} as some exception occurred in previous run",
                                        indexPathForLogging, task);
                            } else {
                                task.call();
                            }
                            currentTask.onComplete(completionHandler);
                        }

                        //Signal that all tasks completed
                        if (task == STOP){
                            copyDone.countDown();
                        }
                    } catch (Throwable t) {
                        errorInCopy.set(t);
                        log.debug("[COW][{}] Error occurred while copying files. Further processing would " +
                                "be skipped", indexPathForLogging, t);
                        currentTask.onComplete(completionHandler);
                    }
                    return null;
                }
            };

            @Override
            public void run() {
                currentTask = new NotifyingFutureTask(task);
                try {
                    executor.execute(currentTask);
                } catch (RejectedExecutionException e){
                    checkIfClosed(false);
                    throw e;
                }
            }
        };

        public CopyOnWriteDirectory(Directory remote, Directory local, boolean reindexMode,
                                    String indexPathForLogging, Set sharedWorkingSet) throws IOException {
            super(local);
            this.remote = remote;
            this.local = local;
            this.indexPathForLogging = indexPathForLogging;
            this.reindexMode = reindexMode;
            this.sharedWorkingSet = sharedWorkingSet;
            initialize();
        }

        @Override
        public String[] listAll() throws IOException {
            return Iterables.toArray(fileMap.keySet(), String.class);
        }

        @Override
        public boolean fileExists(String name) throws IOException {
            return fileMap.containsKey(name);
        }

        @Override
        public void deleteFile(String name) throws IOException {
            log.trace("[COW][{}] Deleted file {}", indexPathForLogging, name);
            COWFileReference ref = fileMap.remove(name);
            if (ref != null) {
                ref.delete();
            }
        }

        @Override
        public long fileLength(String name) throws IOException {
            COWFileReference ref = fileMap.get(name);
            if (ref == null) {
                throw new FileNotFoundException(name);
            }
            return ref.fileLength();
        }

        @Override
        public IndexOutput createOutput(String name, IOContext context) throws IOException {
            COWFileReference ref = fileMap.remove(name);
            if (ref != null) {
                ref.delete();
            }
            ref = new COWLocalFileReference(name);
            fileMap.put(name, ref);
            sharedWorkingSet.add(name);
            return ref.createOutput(context);
        }

        @Override
        public void sync(Collection names) throws IOException {
            for (String name : names){
                COWFileReference file = fileMap.get(name);
                if (file != null){
                    file.sync();
                }
            }
        }

        @Override
        public IndexInput openInput(String name, IOContext context) throws IOException {
            COWFileReference ref = fileMap.get(name);
            if (ref == null) {
                throw new FileNotFoundException(name);
            }
            return ref.openInput(context);
        }

        @Override
        public void close() throws IOException {
            int pendingCopies = queue.size();
            addTask(STOP);

            //Wait for all pending copy task to finish
            try {
                long start = PERF_LOGGER.start();

                //Loop untill queue finished or IndexCopier
                //found to be closed. Doing it with timeout to
                //prevent any bug causing the thread to wait indefinitely
                while (!copyDone.await(10, TimeUnit.SECONDS)) {
                    if (closed) {
                        throw new IndexCopierClosedException("IndexCopier found to be closed " +
                                "while processing copy task for" + remote.toString());
                    }
                }
                PERF_LOGGER.end(start, -1, "[COW][{}] Completed pending copying task {}", indexPathForLogging, pendingCopies);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new IOException(e);
            }

            Throwable t = errorInCopy.get();
            if (t != null){
                throw new IOException("Error occurred while copying files for " + indexPathForLogging, t);
            }

            //Sanity check
            checkArgument(queue.isEmpty(), "Copy queue still " +
                    "has pending task left [%d]. %s", queue.size(), queue);

            long skippedFilesSize = getSkippedFilesSize();

            for (String fileName : deletedFilesLocal){
                deleteLocalFile(fileName);
            }

            skippedFromUploadSize.addAndGet(skippedFilesSize);

            String msg = "[COW][{}] CopyOnWrite stats : Skipped copying {} files with total size {}";
            if (reindexMode || skippedFilesSize > 10 * FileUtils.ONE_MB){
                log.info(msg, indexPathForLogging, skippedFiles.size(), humanReadableByteCount(skippedFilesSize));
            } else {
                log.debug(msg,indexPathForLogging, skippedFiles.size(), humanReadableByteCount(skippedFilesSize));
            }

            if (log.isTraceEnabled()){
                log.trace("[COW][{}] File listing - Upon completion {}", indexPathForLogging, Arrays.toString(remote.listAll()));
            }

            local.close();
            remote.close();
            sharedWorkingSet.clear();
        }

        @Override
        public String toString() {
            return String.format("[COW][%s] Local %s, Remote %s", indexPathForLogging, local, remote);
        }

        private long getSkippedFilesSize() {
            long size = 0;
            for (String name : skippedFiles){
                try{
                    if (local.fileExists(name)){
                        size += local.fileLength(name);
                    }
                } catch (Exception ignore){

                }
            }
            return size;
        }

        private void deleteLocalFile(String fileName) {
            IndexCopier.this.deleteFile(local, fileName, false);
        }

        private void initialize() throws IOException {
            for (String name : remote.listAll()) {
                fileMap.put(name, new COWRemoteFileReference(name));
            }

            if (log.isTraceEnabled()){
                log.trace("[COW][{}] File listing - At start {}", indexPathForLogging, Arrays.toString(remote.listAll()));
            }
        }

        private void addCopyTask(final String name){
            updateMaxScheduled(scheduledForCopyCount.incrementAndGet());
            addTask(new Callable() {
                @Override
                public Void call() throws Exception {
                    scheduledForCopyCount.decrementAndGet();
                    if (deletedFilesLocal.contains(name)){
                        skippedFiles.add(name);
                        log.trace("[COW][{}] Skip copying of deleted file {}", indexPathForLogging, name);
                        return null;
                    }
                    long fileSize = local.fileLength(name);
                    LocalIndexFile file = new LocalIndexFile(local, name, fileSize, false);
                    long perfStart = PERF_LOGGER.start();
                    long start = startCopy(file);

                    local.copy(remote, name, name, IOContext.DEFAULT);

                    doneCopy(file, start);
                    PERF_LOGGER.end(perfStart, 0, "[COW][{}] Copied to remote {} -- size: {}",
                        indexPathForLogging, name, IOUtils.humanReadableByteCount(fileSize));
                    return null;
                }

                @Override
                public String toString() {
                    return "Copy: " + name;
                }
            });
        }

        private void addDeleteTask(final String name){
            addTask(new Callable() {
                @Override
                public Void call() throws Exception {
                    if (!skippedFiles.contains(name)) {
                        log.trace("[COW][{}] Marking as deleted {}", indexPathForLogging, name);
                        remote.deleteFile(name);
                    }
                    return null;
                }

                @Override
                public String toString() {
                    return "Delete : " + name;
                }
            });
        }

        private void addTask(Callable task){
            checkIfClosed(true);
            queue.add(task);
            currentTask.onComplete(completionHandler);
        }

        private void checkIfClosed(boolean throwException) {
            if (closed) {
                IndexCopierClosedException e = new IndexCopierClosedException("IndexCopier found to be closed " +
                        "while processing" +remote.toString());
                errorInCopy.set(e);
                copyDone.countDown();

                if (throwException) {
                    throw e;
                }
            }
        }

        private abstract class COWFileReference {
            protected final String name;

            public COWFileReference(String name) {
                this.name = name;
            }

            public abstract long fileLength() throws IOException;

            public abstract IndexInput openInput(IOContext context) throws IOException;

            public abstract IndexOutput createOutput(IOContext context) throws IOException;

            public abstract void delete() throws IOException;

            public void sync() throws IOException {

            }
        }

        private class COWRemoteFileReference extends COWFileReference {
            private boolean validLocalCopyPresent;
            private final long length;

            public COWRemoteFileReference(String name) throws IOException {
                super(name);
                this.length = remote.fileLength(name);
            }

            @Override
            public long fileLength() throws IOException {
                return length;
            }

            @Override
            public IndexInput openInput(IOContext context) throws IOException {
                checkIfLocalValid();
                if (validLocalCopyPresent && !REMOTE_ONLY.contains(name)) {
                    writerLocalReadCount.incrementAndGet();
                    return local.openInput(name, context);
                }
                writerRemoteReadCount.incrementAndGet();
                return remote.openInput(name, context);
            }

            @Override
            public IndexOutput createOutput(IOContext context) throws IOException {
                throw new UnsupportedOperationException("Cannot create output for existing remote file " + name);
            }

            @Override
            public void delete() throws IOException {
                //Remote file should not be deleted locally as it might be
                //in use by existing opened IndexSearcher. It would anyway
                //get deleted by CopyOnRead later
                //For now just record that these need to be deleted to avoid
                //potential concurrent access of the NodeBuilder
                addDeleteTask(name);
            }

            private void checkIfLocalValid() throws IOException {
                validLocalCopyPresent = local.fileExists(name)
                        && local.fileLength(name) == remote.fileLength(name);
            }
        }

        private class COWLocalFileReference extends COWFileReference {
            public COWLocalFileReference(String name) {
                super(name);
            }

            @Override
            public long fileLength() throws IOException {
                return local.fileLength(name);
            }

            @Override
            public IndexInput openInput(IOContext context) throws IOException {
                return local.openInput(name, context);
            }

            @Override
            public IndexOutput createOutput(IOContext context) throws IOException {
                log.debug("[COW][{}] Creating output {}", indexPathForLogging, name);
                return new CopyOnCloseIndexOutput(local.createOutput(name, context));
            }

            @Override
            public void delete() throws IOException {
                addDeleteTask(name);
                deletedFilesLocal.add(name);
            }

            @Override
            public void sync() throws IOException {
                local.sync(Collections.singleton(name));
            }

            /**
             * Implementation note - As we are decorating existing implementation
             * we would need to ensure that we also override methods (non abstract)
             * which might be implemented in say FSIndexInput like setLength
             */
            private class CopyOnCloseIndexOutput extends IndexOutput {
                private final IndexOutput delegate;

                public CopyOnCloseIndexOutput(IndexOutput delegate) {
                    this.delegate = delegate;
                }

                @Override
                public void flush() throws IOException {
                    delegate.flush();
                }

                @Override
                public void close() throws IOException {
                    delegate.close();
                    //Schedule this file to be copied in background
                    addCopyTask(name);
                }

                @Override
                public long getFilePointer() {
                    return delegate.getFilePointer();
                }

                @Override
                public void seek(long pos) throws IOException {
                    delegate.seek(pos);
                }

                @Override
                public long length() throws IOException {
                    return delegate.length();
                }

                @Override
                public void writeByte(byte b) throws IOException {
                    delegate.writeByte(b);
                }

                @Override
                public void writeBytes(byte[] b, int offset, int length) throws IOException {
                    delegate.writeBytes(b, offset, length);
                }

                @Override
                public void setLength(long length) throws IOException {
                    delegate.setLength(length);
                }
            }
        }
    }

    private boolean deleteFile(Directory dir, String fileName, boolean copiedFromRemote){
        LocalIndexFile file = new LocalIndexFile(dir, fileName, getFileLength(dir, fileName), copiedFromRemote);
        boolean successFullyDeleted = false;
        try {
            boolean fileExisted = false;
            if (dir.fileExists(fileName)) {
                fileExisted = true;
                dir.deleteFile(fileName);
            }
            successfullyDeleted(file, fileExisted);
            successFullyDeleted = true;
        } catch (IOException e) {
            failedToDelete(file);
            log.debug("Error occurred while removing deleted file {} from Local {}. " +
                    "Attempt would be made to delete it on next run ", fileName, dir, e);
        }
        return successFullyDeleted;
    }

    private long startCopy(LocalIndexFile file) {
        updateMaxInProgress(copyInProgressCount.incrementAndGet());
        copyInProgressSize.addAndGet(file.size);
        copyInProgressFiles.add(file);
        return System.currentTimeMillis();
    }

    private void doneCopy(LocalIndexFile file, long start) {
        copyInProgressFiles.remove(file);
        copyInProgressCount.decrementAndGet();
        copyInProgressSize.addAndGet(-file.size);

        if(file.copyFromRemote) {
            downloadTime.addAndGet(System.currentTimeMillis() - start);
            downloadSize.addAndGet(file.size);
            downloadCount.incrementAndGet();
        } else {
            uploadSize.addAndGet(file.size);
            uploadTime.addAndGet(System.currentTimeMillis() - start);
            uploadCount.incrementAndGet();
        }

    }

    private void updateMaxScheduled(int val) {
        synchronized (maxScheduledForCopyCount){
            int current = maxScheduledForCopyCount.get();
            if (val > current){
                maxScheduledForCopyCount.set(val);
            }
        }
    }

    private void updateMaxInProgress(int val) {
        synchronized (maxCopyInProgressCount){
            int current = maxCopyInProgressCount.get();
            if (val > current){
                maxCopyInProgressCount.set(val);
            }
        }
    }

    private class DeleteOldDirOnClose extends FilterDirectory {
        private final File oldIndexDir;

        protected DeleteOldDirOnClose(Directory in, File oldIndexDir) {
            super(in);
            this.oldIndexDir = oldIndexDir;
        }

        @Override
        public void close() throws IOException {
            try {
                super.close();
            } finally {
                //Clean out the local dir irrespective of any error occurring upon
                //close in wrapped directory
                try{
                    FileUtils.deleteDirectory(oldIndexDir);
                    log.debug("Removed old index content from {} ", oldIndexDir);
                } catch (IOException e){
                    log.warn("Not able to remove old version of copied index at {}", oldIndexDir, e);
                }
            }
        }

        @Override
        public String toString() {
            return "DeleteOldDirOnClose wrapper for " + getDelegate();
        }
    }
    
    static final class LocalIndexFile {
        final File dir;
        final String name;
        final long size;
        final boolean copyFromRemote;
        private volatile int deleteAttemptCount;
        final long creationTime = System.currentTimeMillis();
        
        public LocalIndexFile(Directory dir, String fileName,
                              long size, boolean copyFromRemote){
            this.copyFromRemote = copyFromRemote;
            this.dir = getFSDir(dir);
            this.name = fileName;
            this.size = size;
        }

        public LocalIndexFile(Directory dir, String fileName){
            this(dir, fileName, getFileLength(dir, fileName), true);
        }

        public String getKey(){
            if (dir != null){
                return new File(dir, name).getAbsolutePath();
            }
            return name;
        }

        public void incrementAttemptToDelete(){
            deleteAttemptCount++;
        }

        public int getDeleteAttemptCount() {
            return deleteAttemptCount;
        }

        public String deleteLog(){
            return String.format("%s (%s, %d attempts, %d s)", name,
                    humanReadableByteCount(size), deleteAttemptCount, timeTaken());
        }

        public String copyLog(){
            return String.format("%s (%s, %1.1f%%, %s, %d s)", name,
                    humanReadableByteCount(actualSize()),
                    copyProgress(),
                    humanReadableByteCount(size), timeTaken());
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;

            LocalIndexFile localIndexFile = (LocalIndexFile) o;

            if (dir != null ? !dir.equals(localIndexFile.dir) : localIndexFile.dir != null)
                return false;
            return name.equals(localIndexFile.name);

        }

        @Override
        public int hashCode() {
            int result = dir != null ? dir.hashCode() : 0;
            result = 31 * result + name.hashCode();
            return result;
        }

        private long timeTaken(){
            return TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - creationTime);
        }

        private float copyProgress(){
            return actualSize() * 1.0f / size * 100;
        }

        private long actualSize(){
            return dir != null ? new File(dir, name).length() : 0;
        }
    }

    static File getFSDir(Directory dir) {
        if (dir instanceof FilterDirectory){
            dir = ((FilterDirectory) dir).getDelegate();
        }

        if (dir instanceof FSDirectory){
            return ((FSDirectory) dir).getDirectory();
        }

        return null;
    }

    /**
     * Get the file length in best effort basis.
     * @return actual fileLength. -1 if cannot determine
     */
    private static long getFileLength(Directory dir, String fileName){
        try{
            return dir.fileLength(fileName);
        } catch (Exception e){
            return -1;
        }
    }

    //~------------------------------------------< CopyOnReadStatsMBean >

    @Override
    public TabularData getIndexPathMapping() {
        TabularDataSupport tds;
        try{
            TabularType tt = new TabularType(IndexMappingData.class.getName(),
                    "Lucene Index Stats", IndexMappingData.TYPE, new String[]{"jcrPath"});
            tds = new TabularDataSupport(tt);
            for (Map.Entry e : indexPathMapping.entrySet()){
                String size = humanReadableByteCount(FileUtils.sizeOfDirectory(new File(e.getValue())));
                tds.put(new CompositeDataSupport(IndexMappingData.TYPE,
                        IndexMappingData.FIELD_NAMES,
                        new String[]{e.getKey(), e.getValue(), size}));
            }
        } catch (OpenDataException e){
            throw new IllegalStateException(e);
        }
        return tds;
    }

    @Override
    public boolean isPrefetchEnabled() {
        return prefetchEnabled;
    }

    @Override
    public int getReaderLocalReadCount() {
        return readerLocalReadCount.get();
    }

    @Override
    public int getReaderRemoteReadCount() {
        return readerRemoteReadCount.get();
    }

    @Override
    public int getWriterLocalReadCount() {
        return writerLocalReadCount.get();
    }

    @Override
    public int getWriterRemoteReadCount() {
        return writerRemoteReadCount.get();
    }

    public int getInvalidFileCount(){
        return invalidFileCount.get();
    }

    @Override
    public String getDownloadSize() {
        return humanReadableByteCount(downloadSize.get());
    }

    @Override
    public long getDownloadTime() {
        return downloadTime.get();
    }

    @Override
    public int getDownloadCount() {
        return downloadCount.get();
    }

    @Override
    public int getUploadCount() {
        return uploadCount.get();
    }

    @Override
    public String getUploadSize() {
        return humanReadableByteCount(uploadSize.get());
    }

    @Override
    public long getUploadTime() {
        return uploadTime.get();
    }

    @Override
    public String getLocalIndexSize() {
        return humanReadableByteCount(FileUtils.sizeOfDirectory(indexRootDir));
    }

    @Override
    public String[] getGarbageDetails() {
        return toArray(transform(failedToDeleteFiles.values(),
                new Function() {
                    @Override
                    public String apply(LocalIndexFile input) {
                        return input.deleteLog();
                    }
                }), String.class);
    }

    @Override
    public String getGarbageSize() {
        long garbageSize = 0;
        for (LocalIndexFile failedToDeleteFile : failedToDeleteFiles.values()){
            garbageSize += failedToDeleteFile.size;
        }
        return humanReadableByteCount(garbageSize);
    }

    @Override
    public int getScheduledForCopyCount() {
        return scheduledForCopyCount.get();
    }

    @Override
    public int getCopyInProgressCount() {
        return copyInProgressCount.get();
    }

    @Override
    public String getCopyInProgressSize() {
        return humanReadableByteCount(copyInProgressSize.get());
    }

    @Override
    public int getMaxCopyInProgressCount() {
        return maxCopyInProgressCount.get();
    }

    @Override
    public int getMaxScheduledForCopyCount() {
        return maxScheduledForCopyCount.get();
    }

    public String getSkippedFromUploadSize() {
        return humanReadableByteCount(skippedFromUploadSize.get());
    }

    @Override
    public String[] getCopyInProgressDetails() {
        return toArray(transform(copyInProgressFiles,
                new Function() {
                    @Override
                    public String apply(LocalIndexFile input) {
                        return input.copyLog();
                    }
                }), String.class);
    }

    @Override
    public int getDeletedFilesCount() {
        return deletedFileCount.get();
    }

    @Override
    public String getGarbageCollectedSize() {
        return humanReadableByteCount(garbageCollectedSize.get());
    }

    private static class IndexMappingData {
        static final String[] FIELD_NAMES = new String[]{
                "jcrPath",
                "fsPath",
                "size",
        };

        static final String[] FIELD_DESCRIPTIONS = new String[]{
                "JCR Path",
                "Filesystem Path",
                "Size",
        };

        static final OpenType[] FIELD_TYPES = new OpenType[]{
                SimpleType.STRING,
                SimpleType.STRING,
                SimpleType.STRING,
        };

        static final CompositeType TYPE = createCompositeType();

        static CompositeType createCompositeType() {
            try {
                return new CompositeType(
                        IndexMappingData.class.getName(),
                        "Composite data type for Index Mapping Data",
                        IndexMappingData.FIELD_NAMES,
                        IndexMappingData.FIELD_DESCRIPTIONS,
                        IndexMappingData.FIELD_TYPES);
            } catch (OpenDataException e) {
                throw new IllegalStateException(e);
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy