All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.run.DataStoreCommand Maven / Gradle / Ivy

There is a newer version: 1.68.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.oak.run;

import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import com.google.common.io.Closer;
import com.google.common.io.Files;
import joptsimple.OptionParser;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.jackrabbit.core.data.DataRecord;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.FileIOUtils;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.commons.concurrent.ExecutorCloser;
import org.apache.jackrabbit.oak.commons.io.BurnOnCloseFileIterator;
import org.apache.jackrabbit.oak.commons.sort.EscapeUtils;
import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector;
import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
import org.apache.jackrabbit.oak.plugins.blob.SharedDataStore;
import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
import org.apache.jackrabbit.oak.plugins.index.lucene.directory.OakDirectory;
import org.apache.jackrabbit.oak.run.cli.BlobStoreOptions;
import org.apache.jackrabbit.oak.run.cli.CommonOptions;
import org.apache.jackrabbit.oak.run.cli.NodeStoreFixture;
import org.apache.jackrabbit.oak.run.cli.NodeStoreFixtureProvider;
import org.apache.jackrabbit.oak.run.cli.Options;
import org.apache.jackrabbit.oak.run.commons.Command;
import org.apache.jackrabbit.oak.run.commons.LoggingInitializer;
import org.apache.jackrabbit.oak.segment.SegmentBlobReferenceRetriever;
import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
import org.apache.jackrabbit.oak.spi.cluster.ClusterRepositoryInfo;
import org.apache.jackrabbit.oak.spi.state.AbstractChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
import static com.google.common.base.Stopwatch.createStarted;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeAsLine;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.AZURE;
import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.FAKE;
import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.FDS;
import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.S3;
import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.getService;

/**
 * Command to check data store consistency and also optionally retrieve ids
 * and references.
 */
public class DataStoreCommand implements Command {
    private static final Logger log = LoggerFactory.getLogger(DataStoreCommand.class);

    public static final String NAME = "datastore";
    private static final String summary = "Provides DataStore management operations";
    private static final String DELIM = ",";

    private Options opts;
    private DataStoreOptions dataStoreOpts;

    private static final Comparator idComparator = new Comparator() {
        @Override public int compare(String s1, String s2) {
            return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
        }
    };


    @Override public void execute(String... args) throws Exception {
        OptionParser parser = new OptionParser();

        opts = new Options();
        opts.setCommandName(NAME);
        opts.setSummary(summary);
        opts.setConnectionString(CommonOptions.DEFAULT_CONNECTION_STRING);
        opts.registerOptionsFactory(DataStoreOptions.FACTORY);
        opts.parseAndConfigure(parser, args);

        dataStoreOpts = opts.getOptionBean(DataStoreOptions.class);

        //Clean up before setting up NodeStore as the temp
        //directory might be used by NodeStore for cache stuff like persistentCache
        setupDirectories(dataStoreOpts);
        setupLogging(dataStoreOpts);

        logCliArgs(args);

        boolean success = false;
        try (Closer closer = Utils.createCloserWithShutdownHook()) {
            opts.setTempDirectory(dataStoreOpts.getWorkDir().getAbsolutePath());
            NodeStoreFixture fixture = NodeStoreFixtureProvider.create(opts);
            closer.register(fixture);

            if (!checkParameters(dataStoreOpts, opts, fixture, parser)) {
                return;
            }
            execute(fixture, dataStoreOpts, opts, closer);
            success = true;
        } catch (Throwable e) {
            log.error("Error occurred while performing datastore operation", e);
            e.printStackTrace(System.err);
        } finally {
            shutdownLogging();
        }

        if (!success) {
            System.exit(1);
        }
    }

    private static boolean checkParameters(DataStoreOptions dataStoreOpts, Options opts, NodeStoreFixture fixture,
        OptionParser parser) throws IOException {

        if (!dataStoreOpts.anyActionSelected()) {
            log.info("No actions specified");
            parser.printHelpOn(System.out);
            return false;
        } else if (fixture.getStore() == null) {
            log.info("No NodeStore specified");
            parser.printHelpOn(System.out);
            return false;
        } else if (!opts.getCommonOpts().isDocument() && fixture.getBlobStore() == null) {
            log.info("No BlobStore specified");
            parser.printHelpOn(System.out);
            return false;
        }
        return true;
    }

    private void execute(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts, Options opts, Closer closer)
        throws Exception {

        final BlobStoreOptions optionBean = opts.getOptionBean(BlobStoreOptions.class);
        try (Closer metricsCloser = Utils.createCloserWithShutdownHook()) {
            MetricsExporterFixture metricsExporterFixture =
                MetricsExporterFixtureProvider.create(dataStoreOpts, fixture.getWhiteboard());
            metricsCloser.register(metricsExporterFixture);

            if (dataStoreOpts.dumpRefs()) {
                final File referencesTemp = File.createTempFile("traverseref", null, new File(opts.getTempDirectory()));
                final BufferedWriter writer = Files.newWriter(referencesTemp, UTF_8);

                boolean threw = true;
                try {
                    BlobReferenceRetriever retriever = getRetriever(fixture, dataStoreOpts, opts);

                    retriever.collectReferences(new ReferenceCollector() {
                        @Override public void addReference(String blobId, String nodeId) {
                            try {
                                Iterator idIter =
                                    ((GarbageCollectableBlobStore) fixture.getBlobStore()).resolveChunks(blobId);

                                while (idIter.hasNext()) {
                                    String id = idIter.next();
                                    final Joiner delimJoiner = Joiner.on(DELIM).skipNulls();
                                    // If --verbose is present, convert blob ID to a backend friendly format and
                                    // concat the path that has the ref. Otherwise simply add the ID to the o/p file
                                    // as it is.
                                    String line = dataStoreOpts.isVerbose() ?
                                        VerboseIdLogger.encodeId(delimJoiner.join(id, escapeLineBreak(nodeId)),
                                            optionBean.getBlobStoreType()) :
                                        id;
                                    writeAsLine(writer, line, true);
                                }
                            } catch (Exception e) {
                                throw new RuntimeException("Error in retrieving references", e);
                            }
                        }
                    });

                    writer.flush();
                    threw = false;

                    sort(referencesTemp, idComparator);

                    File parent = new File(dataStoreOpts.getOutDir().getAbsolutePath(), "dump");
                    long startTime = System.currentTimeMillis();
                    final File references = new File(parent, "dump-ref-" + startTime);
                    FileUtils.forceMkdir(parent);

                    FileUtils.copyFile(referencesTemp, references);
                } finally {
                    Closeables.close(writer, threw);
                }
            } else if (dataStoreOpts.dumpIds()) {
                final File blobidsTemp = File.createTempFile("blobidstemp", null, new File(opts.getTempDirectory()));

                retrieveBlobIds((GarbageCollectableBlobStore) fixture.getBlobStore(), blobidsTemp);

                File parent = new File(dataStoreOpts.getOutDir().getAbsolutePath(), "dump");
                long startTime = System.currentTimeMillis();
                final File ids = new File(parent, "dump-id-" + startTime);
                FileUtils.forceMkdir(parent);

                if (dataStoreOpts.isVerbose()) {
                    verboseIds(optionBean, blobidsTemp, ids);
                } else {
                    FileUtils.copyFile(blobidsTemp, ids);
                }
            } else if (dataStoreOpts.getMetadata()) {

                List data = getMetadata(fixture);
                File outDir = opts.getOptionBean(DataStoreOptions.class).getOutDir();
                outDir.mkdirs();
                FileIOUtils.writeStrings(data.iterator(), new File(outDir, "metadata"), false);
            } else {
                MarkSweepGarbageCollector collector = getCollector(fixture, dataStoreOpts, opts, closer);
                if (dataStoreOpts.checkConsistency()) {
                    long missing = collector.checkConsistency();
                    log.warn("Found {} missing blobs", missing);

                    if (dataStoreOpts.isVerbose()) {
                        new VerboseIdLogger(opts).log();
                    }
                } else if (dataStoreOpts.collectGarbage()) {
                    collector.collectGarbage(dataStoreOpts.markOnly());
                }
            }
        }
    }

    private static void setupDirectories(DataStoreOptions opts) throws IOException {
        if (opts.getOutDir().exists()) {
            FileUtils.cleanDirectory(opts.getOutDir());
        }
        FileUtils.cleanDirectory(opts.getWorkDir());
    }

    private static List getMetadata(NodeStoreFixture fixture) {
        String repositoryId = ClusterRepositoryInfo.getId(fixture.getStore());
        checkNotNull(repositoryId);

        SharedDataStore dataStore = (SharedDataStore) fixture.getBlobStore();
        // Get all the start markers available
        List  markerFiles =
            dataStore.getAllMetadataRecords(SharedDataStoreUtils.SharedStoreRecordType.MARKED_START_MARKER.getType());
        Map> markers = markerFiles.stream().collect(Collectors.groupingBy(
            input -> SharedDataStoreUtils.SharedStoreRecordType.MARKED_START_MARKER
                .getIdFromName(input.getIdentifier().toString()),
            Collectors.mapping(Function.identity(), Collectors.toList())));
        log.info("Mapped markers {}", markers);

        // Get all the markers available
        List refFiles =
            dataStore.getAllMetadataRecords(SharedDataStoreUtils.SharedStoreRecordType.REFERENCES.getType());

        Map references = refFiles.stream().collect(Collectors.toMap(
            dataRecord -> dataRecord.getIdentifier().toString()
                .substring(SharedDataStoreUtils.SharedStoreRecordType.REFERENCES.getType().length() + 1),
            Function.identity()));
        log.info("Mapped references {}", references);

        // Get all the repositories registered
        List repoFiles =
            dataStore.getAllMetadataRecords(SharedDataStoreUtils.SharedStoreRecordType.REPOSITORY.getType());
        log.info("Repository files {}", repoFiles);

        List records = Lists.newArrayList();
        for (DataRecord repoRec : repoFiles) {
            String id =
                SharedDataStoreUtils.SharedStoreRecordType.REPOSITORY.getIdFromName(repoRec.getIdentifier().toString());

            long markerTime = 0;
            long refTime = 0;
            if (markers.containsKey(id)) {
                List refStartMarkers = markers.get(id);
                DataRecord earliestRefRecord = SharedDataStoreUtils.getEarliestRecord(refStartMarkers);
                log.info("Earliest record {}", earliestRefRecord);

                markerTime = TimeUnit.MILLISECONDS.toSeconds(earliestRefRecord.getLastModified());

                String uniqueSessionId = earliestRefRecord.getIdentifier().toString()
                    .substring(SharedDataStoreUtils.SharedStoreRecordType.MARKED_START_MARKER.getType().length() + 1);
                if (references.containsKey(uniqueSessionId)) {
                    refTime = TimeUnit.MILLISECONDS.toSeconds(references.get(uniqueSessionId).getLastModified());
                }
            }
            String isLocal = "-";
            if (id != null && id.equals(repositoryId)) {
                isLocal = "*";
            }
            records.add(Joiner.on("|").join(id, markerTime, refTime, isLocal));
        }
        log.info("Metadata retrieved {}", records);
        return records;
    }

    private static MarkSweepGarbageCollector getCollector(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts,
        Options opts, Closer closer) throws IOException {

        BlobReferenceRetriever retriever = getRetriever(fixture, dataStoreOpts, opts);

        ExecutorService service = Executors.newSingleThreadExecutor();
        closer.register(new ExecutorCloser(service));

        String repositoryId = ClusterRepositoryInfo.getId(fixture.getStore());
        checkNotNull(repositoryId);

        MarkSweepGarbageCollector collector =
            new MarkSweepGarbageCollector(retriever, (GarbageCollectableBlobStore) fixture.getBlobStore(), service,
                dataStoreOpts.getOutDir().getAbsolutePath(), dataStoreOpts.getBatchCount(),
                SECONDS.toMillis(dataStoreOpts.getBlobGcMaxAgeInSecs()), dataStoreOpts.checkConsistencyAfterGC(),
                dataStoreOpts.sweepIfRefsPastRetention(), repositoryId, fixture.getWhiteboard(),
                getService(fixture.getWhiteboard(), StatisticsProvider.class));
        collector.setTraceOutput(true);

        return collector;
    }

    private static BlobReferenceRetriever getRetriever(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts,
        Options opts) {
        BlobReferenceRetriever retriever;
        if (opts.getCommonOpts().isDocument() && !dataStoreOpts.hasVerboseRootPaths()) {
            retriever = new DocumentBlobReferenceRetriever((DocumentNodeStore) fixture.getStore());
        } else {
            if (dataStoreOpts.isVerbose()) {
                List rootPathList = dataStoreOpts.getVerboseRootPaths();
                List roothPathInclusionRegex = dataStoreOpts.getVerboseInclusionRegex();
                retriever = new NodeTraverserReferenceRetriever(fixture.getStore(),
                    rootPathList.toArray(new String[rootPathList.size()]),
                    roothPathInclusionRegex.toArray(new String[roothPathInclusionRegex.size()]),
                    dataStoreOpts.isUseDirListing());
            } else {
                ReadOnlyFileStore fileStore = getService(fixture.getWhiteboard(), ReadOnlyFileStore.class);
                retriever = new SegmentBlobReferenceRetriever(fileStore);
            }
        }
        return retriever;
    }

    private static void retrieveBlobIds(GarbageCollectableBlobStore blobStore, File blob) throws Exception {

        System.out.println("Starting dump of blob ids");
        Stopwatch watch = createStarted();

        Iterator blobIter = blobStore.getAllChunkIds(0);
        int count = writeStrings(blobIter, blob, false);

        sort(blob);
        System.out.println(count + " blob ids found");
        System.out.println("Finished in " + watch.elapsed(SECONDS) + " seconds");
    }

    private static void verboseIds(BlobStoreOptions blobOpts, File readFile, File writeFile) throws IOException {
        LineIterator idIterator = FileUtils.lineIterator(readFile, UTF_8.name());

        try (BurnOnCloseFileIterator iterator = new BurnOnCloseFileIterator(idIterator, readFile,
            (Function) input -> VerboseIdLogger.encodeId(input, blobOpts.getBlobStoreType()))) {
            writeStrings(iterator, writeFile, true, log, "Transformed to verbose ids - ");
        }
    }

    protected static void setupLogging(DataStoreOptions dataStoreOpts) throws IOException {
        new LoggingInitializer(dataStoreOpts.getWorkDir(), NAME, dataStoreOpts.isResetLoggingConfig()).init();
    }

    private static void shutdownLogging() {
        LoggingInitializer.shutdownLogging();
    }

    private static void logCliArgs(String[] args) {
        String[] filteredArgs = Arrays.stream(args).filter(str -> !str.startsWith("az:") && !str.startsWith("mongodb:"))
            .toArray(String[]::new);
        log.info("Command line arguments used for datastore command [{}]", Joiner.on(' ').join(filteredArgs));
        List inputArgs = ManagementFactory.getRuntimeMXBean().getInputArguments();
        if (!inputArgs.isEmpty()) {
            log.info("System properties and vm options passed {}", inputArgs);
        }
    }

    /**
     * {@link BlobReferenceRetriever} instance which iterates over the whole node store to find
     * blobs being referred. Useful when path of those blobs needed and the underlying {@link NodeStore}
     * native implementation does not provide that.
     */
    static class NodeTraverserReferenceRetriever implements BlobReferenceRetriever {
        private final NodeStore nodeStore;
        private final String[] paths;
        private final String[] inclusionRegex;
        private boolean useDirListing;

        public NodeTraverserReferenceRetriever(NodeStore nodeStore) {
            this(nodeStore, null, null, false);
        }

        public NodeTraverserReferenceRetriever(NodeStore nodeStore,
                                               String[] paths,
                                               String[] inclusionRegex,
                                               boolean useDirListing) {
            this.nodeStore = nodeStore;
            this.paths = paths;
            this.inclusionRegex = inclusionRegex;
            this.useDirListing = useDirListing;
        }

        private void binaryProperties(NodeState state, String path, ReferenceCollector collector) {
            for (PropertyState p : state.getProperties()) {
                String propPath = path;//PathUtils.concat(path, p.getName());
                if (p.getType() == Type.BINARY) {
                    String blobId = p.getValue(Type.BINARY).getContentIdentity();
                    if (blobId != null && !p.getValue(Type.BINARY).isInlined()) {
                        collector.addReference(blobId, propPath);
                    }
                } else if (p.getType() == Type.BINARIES && p.count() > 0) {
                    Iterator iterator = p.getValue(Type.BINARIES).iterator();
                    while (iterator.hasNext()) {
                        Blob blob = iterator.next();
                        String blobId = blob.getContentIdentity();
                        if (blobId != null && !blob.isInlined()) {
                            collector.addReference(blobId, propPath);
                        }
                    }
                }
            }
        }

        private void traverseChildren(NodeState state, String path, ReferenceCollector collector) {
            binaryProperties(state, path, collector);
            for (ChildNodeEntry c : getChildNodeEntries(state)) {
                traverseChildren(c.getNodeState(), PathUtils.concat(path, c.getName()), collector);
            }
        }

        private Iterable getChildNodeEntries(NodeState state) {
            if (useDirListing) {
                PropertyState dirListing = state.getProperty(OakDirectory.PROP_DIR_LISTING);
                if (dirListing != null && dirListing.isArray()) {
                    return StreamSupport.stream(dirListing.getValue(Type.STRINGS).spliterator(), false)
                            .map(name -> new AbstractChildNodeEntry() {
                                @Override
                                public @NotNull String getName() {
                                    return name;
                                }

                                @Override
                                public @NotNull NodeState getNodeState() {
                                    return state.getChildNode(name);
                                }
                            })
                            .filter(cne -> cne.getNodeState().exists())
                            .collect(Collectors.toList());
                }
            }

            // fallback to full traversal
            return state.getChildNodeEntries();
        }

        @Override public void collectReferences(ReferenceCollector collector) throws IOException {
            log.info("Starting dump of blob references by traversing");
            if (paths == null || paths.length == 0) {
                traverseChildren(nodeStore.getRoot(), "/", collector);
            } else {
                for (String path : paths) {
                    Iterable nodeList = PathUtils.elements(path);
                    NodeState state = nodeStore.getRoot();
                    for (String node : nodeList) {
                        state = state.getChildNode(node);
                    }

                    if (inclusionRegex == null || inclusionRegex.length == 0) {
                        traverseChildren(state, path, collector);
                    } else {
                        for (String regex : inclusionRegex) {
                            Map inclusionMap = new HashMap();
                            getInclusionListFromRegex(state, path, regex, inclusionMap);
                            if (inclusionMap.size() == 0) {
                                System.out.println(
                                    "No valid paths found for traversal, " + "for the inclusion Regex " + regex
                                        + " under the path " + path);
                                continue;
                            }
                            for (NodeState s : inclusionMap.keySet()) {
                                traverseChildren(s, inclusionMap.get(s), collector);
                            }
                        }
                    }

                }
            }


        }

        private void getInclusionListFromRegex(NodeState rootState, String rootPath, String inclusionRegex,
            Map inclusionNodeStates) {
            Splitter delimSplitter = Splitter.on("/").trimResults().omitEmptyStrings();
            List pathElementList = delimSplitter.splitToList(inclusionRegex);

            Joiner delimJoiner = Joiner.on("/").skipNulls();

            // Get the first pathElement from the regexPath
            String pathElement = pathElementList.get(0);
            // If the pathElement == *, get all child nodes and scan under them for the rest of the regex
            if ("*".equals(pathElement)) {
                for (String nodeName : rootState.getChildNodeNames()) {
                    String rootPathTemp = PathUtils.concat(rootPath, nodeName);
                    // Remove the current Path Element from the regexPath
                    // and recurse on getInclusionListFromRegex with this childNodeState and the regexPath
                    // under the current pathElement
                    String sub = delimJoiner.join(pathElementList.subList(1, pathElementList.size()));
                    getInclusionListFromRegex(rootState.getChildNode(nodeName), rootPathTemp, sub, inclusionNodeStates);
                }
            } else {
                NodeState rootStateToInclude = rootState.getChildNode(pathElement);
                if (rootStateToInclude.exists()) {
                    inclusionNodeStates.put(rootStateToInclude, PathUtils.concat(rootPath, pathElement));
                }

            }

        }
    }


    static class VerboseIdLogger {
        static final String DELIM = ",";
        static final String DASH = "-";
        static final String HASH = "#";
        static final Comparator idComparator = new Comparator() {
            @Override public int compare(String s1, String s2) {
                return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
            }
        };
        private final static Joiner delimJoiner = Joiner.on(DELIM).skipNulls();
        private final static Splitter delimSplitter = Splitter.on(DELIM).trimResults().omitEmptyStrings();

        private final BlobStoreOptions optionBean;
        private final BlobStoreOptions.Type blobStoreType;
        private final File outDir;
        private final List outFileList = new ArrayList();

        public VerboseIdLogger(Options options) {
            this.optionBean = options.getOptionBean(BlobStoreOptions.class);
            this.blobStoreType = optionBean.getBlobStoreType();
            outDir = options.getOptionBean(DataStoreOptions.class).getOutDir();

            outFileList.add(filterFiles(outDir, "marked-"));
            outFileList.add(filterFiles(outDir, "gccand-"));

            outFileList.removeAll(Collections.singleton(null));

            if (outFileList.size() == 0) {
                throw new IllegalArgumentException("No candidate file found");
            }
        }

        static File filterFiles(File outDir, String filePrefix) {
            return filterFiles(outDir, "gcworkdir-", filePrefix);
        }

        @Nullable static File filterFiles(File outDir, String dirPrefix, String filePrefix) {
            List subDirs = FileFilterUtils.filterList(
                FileFilterUtils.and(FileFilterUtils.prefixFileFilter(dirPrefix), FileFilterUtils.directoryFileFilter()),
                outDir.listFiles());

            if (subDirs != null && !subDirs.isEmpty()) {
                File workDir = subDirs.get(0);
                List outFiles =
                    FileFilterUtils.filterList(FileFilterUtils.prefixFileFilter(filePrefix), workDir.listFiles());

                if (outFiles != null && !outFiles.isEmpty()) {
                    return outFiles.get(0);
                }
            }

            return null;
        }

        /**
         * Encode the blob id/blob ref in a format understood by the backing datastore
         * 

* Example: * b47b58169f121822cd4a...#123311,/a/b/c => b47b-58169f121822cd4a...,/a/b/c (dsType = S3 or Azure) * b47b58169f121822cd4a...#123311 => b47b-58169f121822cd4a... (dsType = S3 or Azure) * * @param line can be either of the format b47b...#12311,/a/b/c or * b47b...#12311 * @param dsType * @return In case of ref dump, concatanated encoded blob ref in a * format understood by backing datastore impl and the path * on which ref is present separated by delimJoiner * In case of id dump, just the encoded blob ids. */ static String encodeId(String line, BlobStoreOptions.Type dsType) { // Split the input line on ",". This would be the case while dumping refs along with paths // Line would be like b47b58169f121822cd4a0a0a153ba5910e581ad2bc450b6af7e51e6214c2b173#123311,/a/b/c // In case of dumping ids, there would not be any paths associated and there the line would simply be // b47b58169f121822cd4a0a0a153ba5910e581ad2bc450b6af7e51e6214c2b173#123311 List list = delimSplitter.splitToList(line); String id = list.get(0); // Split b47b58169f121822cd4a0a0a153ba5910e581ad2bc450b6af7e51e6214c2b173#123311 on # to get the id List idLengthSepList = Splitter.on(HASH).trimResults().omitEmptyStrings().splitToList(id); String blobId = idLengthSepList.get(0); if (dsType == FAKE || dsType == FDS) { // 0102030405... => 01/02/03/0102030405... blobId = (blobId.substring(0, 2) + FILE_SEPARATOR.value() + blobId.substring(2, 4) + FILE_SEPARATOR.value() + blobId.substring(4, 6) + FILE_SEPARATOR.value() + blobId); } else if (dsType == S3 || dsType == AZURE) { //b47b58169f121822cd4a0... => b47b-58169f121822cd4a0... blobId = (blobId.substring(0, 4) + DASH + blobId.substring(4)); } // Check if the line provided as input was a line dumped from blob refs or blob ids // In case of blob refs dump, the list size would be 2 (Consisting of blob ref and the path on which ref //is present) // In case of blob ids dump, the list size would be 1 (Consisting of just the id) if (list.size() > 1) { // Join back the encoded blob ref and the path on which the ref is present return delimJoiner.join(blobId, EscapeUtils.unescapeLineBreaks(list.get(1))); } else { // return the encoded blob id return blobId; } } public void log() throws IOException { for (File outFile : outFileList) { File tempFile = new File(outDir, outFile.getName() + "-temp"); FileUtils.moveFile(outFile, tempFile); try (BurnOnCloseFileIterator iterator = new BurnOnCloseFileIterator( FileUtils.lineIterator(tempFile, UTF_8.toString()), tempFile, (Function) input -> encodeId(input, blobStoreType))) { writeStrings(iterator, outFile, true, log, "Transformed to verbose ids - "); } } } } public static void main(String[] args) { long timestamp = System.currentTimeMillis(); SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); formatter.setTimeZone(TimeZone.getTimeZone("UTC")); String dt = formatter.format(timestamp); System.out.println(dt); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy