org.apache.cassandra.io.sstable.SSTableLoader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
Show all versions of cassandra-all Show documentation
A fork of the Apache Cassandra Project that uses Lucene indexes for providing near real time search such as ElasticSearch or Solr, including full text search capabilities, multi-dimensional queries, and relevance scoring.
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.io.sstable;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.net.InetAddress;
import java.util.*;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.Config;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.dht.Range;
import org.apache.cassandra.dht.Token;
import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.service.ActiveRepairService;
import org.apache.cassandra.streaming.*;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.OutputHandler;
import org.apache.cassandra.utils.Pair;
/**
* Cassandra SSTable bulk loader.
* Load an externally created sstable into a cluster.
*/
public class SSTableLoader implements StreamEventHandler
{
private final File directory;
private final String keyspace;
private final Client client;
private final int connectionsPerHost;
private final OutputHandler outputHandler;
private final Set failedHosts = new HashSet<>();
private final List sstables = new ArrayList();
private final Multimap streamingDetails = HashMultimap.create();
static
{
Config.setClientMode(true);
}
public SSTableLoader(File directory, Client client, OutputHandler outputHandler)
{
this(directory, client, outputHandler, 1);
}
public SSTableLoader(File directory, Client client, OutputHandler outputHandler, int connectionsPerHost)
{
this.directory = directory;
this.keyspace = directory.getParentFile().getName();
this.client = client;
this.outputHandler = outputHandler;
this.connectionsPerHost = connectionsPerHost;
}
protected Collection openSSTables(final Map>> ranges)
{
outputHandler.output("Opening sstables and calculating sections to stream");
directory.list(new FilenameFilter()
{
public boolean accept(File dir, String name)
{
if (new File(dir, name).isDirectory())
return false;
Pair p = SSTable.tryComponentFromFilename(dir, name);
Descriptor desc = p == null ? null : p.left;
if (p == null || !p.right.equals(Component.DATA) || desc.type.isTemporary)
return false;
if (!new File(desc.filenameFor(Component.PRIMARY_INDEX)).exists())
{
outputHandler.output(String.format("Skipping file %s because index is missing", name));
return false;
}
CFMetaData metadata = client.getCFMetaData(keyspace, desc.cfname);
if (metadata == null)
{
outputHandler.output(String.format("Skipping file %s: column family %s.%s doesn't exist", name, keyspace, desc.cfname));
return false;
}
Set components = new HashSet();
components.add(Component.DATA);
components.add(Component.PRIMARY_INDEX);
if (new File(desc.filenameFor(Component.SUMMARY)).exists())
components.add(Component.SUMMARY);
if (new File(desc.filenameFor(Component.COMPRESSION_INFO)).exists())
components.add(Component.COMPRESSION_INFO);
if (new File(desc.filenameFor(Component.STATS)).exists())
components.add(Component.STATS);
try
{
// To conserve memory, open SSTableReaders without bloom filters and discard
// the index summary after calculating the file sections to stream and the estimated
// number of keys for each endpoint. See CASSANDRA-5555 for details.
SSTableReader sstable = SSTableReader.openForBatch(desc, components, metadata, client.getPartitioner());
sstables.add(sstable);
// calculate the sstable sections to stream as well as the estimated number of
// keys per host
for (Map.Entry>> entry : ranges.entrySet())
{
InetAddress endpoint = entry.getKey();
Collection> tokenRanges = entry.getValue();
List> sstableSections = sstable.getPositionsForRanges(tokenRanges);
long estimatedKeys = sstable.estimatedKeysForRanges(tokenRanges);
StreamSession.SSTableStreamingSections details = new StreamSession.SSTableStreamingSections(sstable, sstableSections, estimatedKeys, ActiveRepairService.UNREPAIRED_SSTABLE);
streamingDetails.put(endpoint, details);
}
// to conserve heap space when bulk loading
sstable.releaseSummary();
}
catch (IOException e)
{
outputHandler.output(String.format("Skipping file %s, error opening it: %s", name, e.getMessage()));
}
return false;
}
});
return sstables;
}
public StreamResultFuture stream()
{
return stream(Collections.emptySet());
}
public StreamResultFuture stream(Set toIgnore, StreamEventHandler... listeners)
{
client.init(keyspace);
outputHandler.output("Established connection to initial hosts");
StreamPlan plan = new StreamPlan("Bulk Load", 0, connectionsPerHost);
Map>> endpointToRanges = client.getEndpointToRangesMap();
openSSTables(endpointToRanges);
if (sstables.isEmpty())
{
// return empty result
return plan.execute();
}
outputHandler.output(String.format("Streaming relevant part of %sto %s", names(sstables), endpointToRanges.keySet()));
for (Map.Entry>> entry : endpointToRanges.entrySet())
{
InetAddress remote = entry.getKey();
if (toIgnore.contains(remote))
continue;
List endpointDetails = new LinkedList<>();
try
{
// transferSSTables assumes references have been acquired
for (StreamSession.SSTableStreamingSections details : streamingDetails.get(remote))
{
if (!details.sstable.acquireReference())
throw new IllegalStateException();
endpointDetails.add(details);
}
plan.transferFiles(remote, endpointDetails);
}
finally
{
for (StreamSession.SSTableStreamingSections details : endpointDetails)
details.sstable.releaseReference();
}
}
plan.listeners(this, listeners);
return plan.execute();
}
public void onSuccess(StreamState finalState) {}
public void onFailure(Throwable t) {}
public void handleStreamEvent(StreamEvent event)
{
if (event.eventType == StreamEvent.Type.STREAM_COMPLETE)
{
StreamEvent.SessionCompleteEvent se = (StreamEvent.SessionCompleteEvent) event;
if (!se.success)
failedHosts.add(se.peer);
}
}
private String names(Collection sstables)
{
StringBuilder builder = new StringBuilder();
for (SSTableReader sstable : sstables)
builder.append(sstable.descriptor.filenameFor(Component.DATA)).append(" ");
return builder.toString();
}
public Set getFailedHosts()
{
return failedHosts;
}
public static abstract class Client
{
private final Map>> endpointToRanges = new HashMap>>();
private IPartitioner partitioner;
/**
* Initialize the client.
* Perform any step necessary so that after the call to the this
* method:
* * partitioner is initialized
* * getEndpointToRangesMap() returns a correct map
* This method is guaranteed to be called before any other method of a
* client.
*/
public abstract void init(String keyspace);
/**
* Stop the client.
*/
public void stop() {}
/**
* Validate that {@code keyspace} is an existing keyspace and {@code
* cfName} one of its existing column family.
*/
public abstract CFMetaData getCFMetaData(String keyspace, String cfName);
public Map>> getEndpointToRangesMap()
{
return endpointToRanges;
}
protected void setPartitioner(String partclass) throws ConfigurationException
{
setPartitioner(FBUtilities.newPartitioner(partclass));
}
protected void setPartitioner(IPartitioner partitioner)
{
this.partitioner = partitioner;
DatabaseDescriptor.setPartitioner(partitioner);
}
public IPartitioner getPartitioner()
{
return partitioner;
}
protected void addRangeForEndpoint(Range range, InetAddress endpoint)
{
Collection> ranges = endpointToRanges.get(endpoint);
if (ranges == null)
{
ranges = new HashSet>();
endpointToRanges.put(endpoint, ranges);
}
ranges.add(range);
}
}
}