org.apache.cassandra.tools.StandaloneSplitter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.cassandra.tools;
import java.io.File;
import java.util.*;
import java.util.concurrent.TimeUnit;
import org.apache.cassandra.config.Schema;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.commons.cli.*;
import org.apache.cassandra.db.ColumnFamilyStore;
import org.apache.cassandra.db.compaction.OperationType;
import org.apache.cassandra.db.Directories;
import org.apache.cassandra.db.Keyspace;
import org.apache.cassandra.db.compaction.CompactionManager;
import org.apache.cassandra.db.compaction.SSTableSplitter;
import org.apache.cassandra.db.lifecycle.LifecycleTransaction;
import org.apache.cassandra.io.sstable.*;
import org.apache.cassandra.utils.JVMStabilityInspector;
import org.apache.cassandra.utils.Pair;
import static org.apache.cassandra.tools.BulkLoader.CmdLineOptions;
public class StandaloneSplitter
{
public static final int DEFAULT_SSTABLE_SIZE = 50;
private static final String TOOL_NAME = "sstablessplit";
private static final String DEBUG_OPTION = "debug";
private static final String HELP_OPTION = "help";
private static final String NO_SNAPSHOT_OPTION = "no-snapshot";
private static final String SIZE_OPTION = "size";
public static void main(String args[])
{
Options options = Options.parseArgs(args);
Util.initDatabaseDescriptor();
try
{
// load keyspace descriptions.
Schema.instance.loadFromDisk(false);
String ksName = null;
String cfName = null;
Map> parsedFilenames = new HashMap>();
for (String filename : options.filenames)
{
File file = new File(filename);
if (!file.exists()) {
System.out.println("Skipping inexisting file " + file);
continue;
}
Pair pair = SSTable.tryComponentFromFilename(file.getParentFile(), file.getName());
if (pair == null) {
System.out.println("Skipping non sstable file " + file);
continue;
}
Descriptor desc = pair.left;
if (ksName == null)
ksName = desc.ksname;
else if (!ksName.equals(desc.ksname))
throw new IllegalArgumentException("All sstables must be part of the same keyspace");
if (cfName == null)
cfName = desc.cfname;
else if (!cfName.equals(desc.cfname))
throw new IllegalArgumentException("All sstables must be part of the same table");
Set components = new HashSet(Arrays.asList(new Component[]{
Component.DATA,
Component.PRIMARY_INDEX,
Component.FILTER,
Component.COMPRESSION_INFO,
Component.STATS
}));
Iterator iter = components.iterator();
while (iter.hasNext()) {
Component component = iter.next();
if (!(new File(desc.filenameFor(component)).exists()))
iter.remove();
}
parsedFilenames.put(desc, components);
}
if (ksName == null || cfName == null)
{
System.err.println("No valid sstables to split");
System.exit(1);
}
// Do not load sstables since they might be broken
Keyspace keyspace = Keyspace.openWithoutSSTables(ksName);
ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(cfName);
String snapshotName = "pre-split-" + System.currentTimeMillis();
List sstables = new ArrayList<>();
for (Map.Entry> fn : parsedFilenames.entrySet())
{
try
{
SSTableReader sstable = SSTableReader.openNoValidation(fn.getKey(), fn.getValue(), cfs);
if (!isSSTableLargerEnough(sstable, options.sizeInMB)) {
System.out.println(String.format("Skipping %s: it's size (%.3f MB) is less than the split size (%d MB)",
sstable.getFilename(), ((sstable.onDiskLength() * 1.0d) / 1024L) / 1024L, options.sizeInMB));
continue;
}
sstables.add(sstable);
if (options.snapshot) {
File snapshotDirectory = Directories.getSnapshotDirectory(sstable.descriptor, snapshotName);
sstable.createLinks(snapshotDirectory.getPath());
}
}
catch (Exception e)
{
JVMStabilityInspector.inspectThrowable(e);
System.err.println(String.format("Error Loading %s: %s", fn.getKey(), e.getMessage()));
if (options.debug)
e.printStackTrace(System.err);
}
}
if (sstables.isEmpty()) {
System.out.println("No sstables needed splitting.");
System.exit(0);
}
if (options.snapshot)
System.out.println(String.format("Pre-split sstables snapshotted into snapshot %s", snapshotName));
for (SSTableReader sstable : sstables)
{
try (LifecycleTransaction transaction = LifecycleTransaction.offline(OperationType.UNKNOWN, sstable))
{
new SSTableSplitter(cfs, transaction, options.sizeInMB).split();
}
catch (Exception e)
{
System.err.println(String.format("Error splitting %s: %s", sstable, e.getMessage()));
if (options.debug)
e.printStackTrace(System.err);
sstable.selfRef().release();
}
}
CompactionManager.instance.finishCompactionsAndShutdown(5, TimeUnit.MINUTES);
LifecycleTransaction.waitForDeletions();
System.exit(0); // We need that to stop non daemonized threads
}
catch (Exception e)
{
System.err.println(e.getMessage());
if (options.debug)
e.printStackTrace(System.err);
System.exit(1);
}
}
/**
* filter the sstable which size is less than the expected max sstable size.
*/
private static boolean isSSTableLargerEnough(SSTableReader sstable, int sizeInMB) {
return sstable.onDiskLength() > sizeInMB * 1024L * 1024L;
}
private static class Options
{
public final List filenames;
public boolean debug;
public boolean snapshot;
public int sizeInMB;
private Options(List filenames)
{
this.filenames = filenames;
}
public static Options parseArgs(String cmdArgs[])
{
CommandLineParser parser = new GnuParser();
CmdLineOptions options = getCmdLineOptions();
try
{
CommandLine cmd = parser.parse(options, cmdArgs, false);
if (cmd.hasOption(HELP_OPTION))
{
printUsage(options);
System.exit(0);
}
String[] args = cmd.getArgs();
if (args.length == 0)
{
System.err.println("No sstables to split");
printUsage(options);
System.exit(1);
}
Options opts = new Options(Arrays.asList(args));
opts.debug = cmd.hasOption(DEBUG_OPTION);
opts.snapshot = !cmd.hasOption(NO_SNAPSHOT_OPTION);
opts.sizeInMB = DEFAULT_SSTABLE_SIZE;
if (cmd.hasOption(SIZE_OPTION))
opts.sizeInMB = Integer.parseInt(cmd.getOptionValue(SIZE_OPTION));
return opts;
}
catch (ParseException e)
{
errorMsg(e.getMessage(), options);
return null;
}
}
private static void errorMsg(String msg, CmdLineOptions options)
{
System.err.println(msg);
printUsage(options);
System.exit(1);
}
private static CmdLineOptions getCmdLineOptions()
{
CmdLineOptions options = new CmdLineOptions();
options.addOption(null, DEBUG_OPTION, "display stack traces");
options.addOption("h", HELP_OPTION, "display this help message");
options.addOption(null, NO_SNAPSHOT_OPTION, "don't snapshot the sstables before splitting");
options.addOption("s", SIZE_OPTION, "size", "maximum size in MB for the output sstables (default: " + DEFAULT_SSTABLE_SIZE + ")");
return options;
}
public static void printUsage(CmdLineOptions options)
{
String usage = String.format("%s [options] []*", TOOL_NAME);
StringBuilder header = new StringBuilder();
header.append("--\n");
header.append("Split the provided sstables files in sstables of maximum provided file size (see option --" + SIZE_OPTION + ")." );
header.append("\n--\n");
header.append("Options are:");
new HelpFormatter().printHelp(usage, header.toString(), options, "");
}
}
}