
oracle.kv.impl.rep.table.TableManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of oracle-nosql-server Show documentation
Show all versions of oracle-nosql-server Show documentation
NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.
/*-
* Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle NoSQL
* Database made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle NoSQL Database for a copy of the license and
* additional information.
*/
package oracle.kv.impl.rep.table;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Level;
import java.util.logging.Logger;
import oracle.kv.Key;
import oracle.kv.Key.BinaryKeyIterator;
import oracle.kv.impl.api.table.DroppedTableException;
import oracle.kv.impl.api.table.IndexImpl;
import oracle.kv.impl.api.table.NameUtils;
import oracle.kv.impl.api.table.TableImpl;
import oracle.kv.impl.api.table.TableMetadata;
import oracle.kv.impl.fault.DatabaseNotReadyException;
import oracle.kv.impl.fault.RNUnavailableException;
import oracle.kv.impl.fault.WrappedClientException;
import oracle.kv.impl.measurement.TableInfo;
import oracle.kv.impl.metadata.Metadata;
import oracle.kv.impl.metadata.Metadata.MetadataType;
import oracle.kv.impl.metadata.MetadataInfo;
import oracle.kv.impl.rep.MetadataManager;
import oracle.kv.impl.rep.RepNode;
import oracle.kv.impl.rep.RepNodeService.Params;
import oracle.kv.impl.rep.admin.ResourceInfo;
import oracle.kv.impl.rep.admin.ResourceInfo.UsageRecord;
import oracle.kv.impl.rep.admin.ResourceInfo.RateRecord;
import oracle.kv.impl.rep.table.SecondaryInfoMap.DeletedTableInfo;
import oracle.kv.impl.rep.table.SecondaryInfoMap.SecondaryInfo;
import oracle.kv.impl.test.TestHook;
import oracle.kv.impl.tif.TextIndexFeederManager;
import oracle.kv.impl.topo.PartitionId;
import oracle.kv.impl.topo.RepNodeId;
import oracle.kv.impl.util.DatabaseUtils;
import oracle.kv.impl.util.StateTracker;
import oracle.kv.impl.util.TxnUtil;
import oracle.kv.table.Index;
import oracle.kv.table.Table;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.SecondaryAssociation;
import com.sleepycat.je.SecondaryDatabase;
import com.sleepycat.je.rep.ReplicatedEnvironment;
import com.sleepycat.je.rep.StateChangeEvent;
/**
* Manages the secondary database handles for the rep node. The TableManager,
* SecondaryInfoMap, and MaintenanceThread classes are tightly coupled and
* there are things that are safe because of the way we know they are called
* from one another.
*/
public class TableManager extends MetadataManager
implements SecondaryAssociation {
/*
* Create the name of the secondary DB based on the specified index
* and table names. Use index name first. Format is:
* indexName.tableName[:namespace]
* The index name is first to more easily handle parent/child names in
* tableName (parent.child).
*
* The ":" is used to separate the namespace for the same reason.
*/
public static String createDbName(String namespace,
String indexName,
String tableName) {
final StringBuilder sb = new StringBuilder();
sb.append(indexName).append(".").append(tableName);
if (namespace != null) {
sb.append(":").append(namespace);
}
return sb.toString();
}
/*
* Gets the table name from a secondary database name.
*/
static String getTableName(String dbName) {
if (dbName.contains(":")) {
return dbName.split(":")[0].split("\\.", 2)[1];
}
return dbName.split("\\.", 2)[1];
}
static String getNamespace(String dbName) {
if (dbName.contains(":")) {
return dbName.split(":", 2)[1];
}
return null;
}
private final Params params;
private final StateTracker stateTracker;
/*
* The table metadata. TableMetada is not thread safe. The instance
* referenced by tableMetadata should be treated as immutable. Updates
* to the table metadata must be made to a copy first and then replace the
* reference with the copy.
*/
private volatile TableMetadata tableMetadata = null;
private final int maxChanges;
/*
* Map of secondary database handles. Modification or iteration can only
* be made within the maintenance thread, or with the threadLock held and
* the maintenance thread stopped.
*/
private final Map secondaryDbMap =
new HashMap<>();
/*
* Thread used to asynchronously open secondary database handles and
* preform other maintenance operations such as populating secondary DBs.
*/
private MaintenanceThread maintenanceThread = null;
/*
* Lock controlling the maintenanceThread and modification to
* secondaryDbMap. If object synchronization and the threadLock is needed
* the synchronization should be first.
*/
private final ReentrantLock threadLock = new ReentrantLock();
/**
* Set to true if there is ongoing maintenance operations. The
* isBusyMaintenance field and associated accessor method are used to
* interlock table maintenance with partition migration. The interlock
* scheme is:
* 1) a maintenance thread sets isBusyMaintenance
* 2) the maintenance thread waits for migration to complete (by calling
* either awaitIdle or awaitTargetIdle on the migration manager)
* 4) when maintenance is completed (or is interrupted)
* isBusyMaintenance is cleared
*
* Note: This assumes only one maintenance thread is running at a time. If
* this changes, the busy indicator will need to be reworked.
*/
private volatile boolean isBusyMaintenance;
/*
* The TableMetadata sequence number, it is updated in updateDbHandles()
*/
public int metadataSeqNum = TableMetadata.EMPTY_SEQUENCE_NUMBER;
/*
* The map is an optimization for secondary DB lookup. This lookup happens
* on every operation and so should be as efficient as possible. This map
* parallels the Table map in TableMetadata except that it only contains
* entries for tables which contain indexes (and therefore reference
* secondary DBs). The map is reconstructed whenever the table MD is
* updated.
*
* If the map is null, the table metadata has not yet been initialized and
* operations should not be allowed.
*
* Note that secondaryLookupMap may be out-of-date relitive to the
* non-synchronized tableMetadata.
*/
private volatile Map secondaryLookupMap = null;
/*
* Map to lookup a table via its ID.
*
* If the map is null, the table metadata has not yet been initialized and
* operations should not be allowed.
*
* Note that idLookupMap may be out-of-date relative to the non-synchronized
* tableMetadata.
*/
private volatile Map idLookupMap = null;
/*
* Maximum of all table IDs seen so far, including child tables. A value
* of 0 indicates that table metadata has not yet be updated.
*/
private volatile long maxTableId = 0;
/**
* Map to lookup a top-level table via its Key as a byte array.
*
* The map uses {@link IDBytesComparator} to allow lookup of a full record
* key (as a byte array). Since the map only contains top-level tables,
* only the first component of the key (the table ID) is compared.
*
* If the map is null, the table metadata has not yet been initialized and
* operations should not be allowed.
*
* A TreeMap is used, rather than a HashMap, because we cannot override
* the equals/hashcode methods for the byte array.
*/
private volatile Map idBytesLookupMap = null;
/**
* Map to lookup a r2compat table id via its name. It is used to
* accelerate the r2compat table match test in permisson checking.
*
* If the map is null, the table metadata has not yet been initialized and
* operations should not be allowed.
*
* Note that r2NameLookupMap may be out-of-date relative to the
* non-synchronized tableMetadata.
*/
private volatile Map r2NameIdLookupMap = null;
/*
* Mapps a table ID to a throughput collector object. A collector is
* present only if throughput limits are present on that table. There is
* only one collector per table hierarchy. Child table IDs are included
* in the map and will reference the collector instance for the top
* level table.
*/
private volatile Map collectorLookupMap = null;
/* The hook affects before remove database */
public static TestHook BEFORE_REMOVE_HOOK;
/* The hook affects after remove database */
public static TestHook AFTER_REMOVE_HOOK;
public TableManager(RepNode repNode, Params params) {
super(repNode, params);
this.params = params;
maxChanges = params.getRepNodeParams().getMaxTopoChanges();
logger.log(Level.INFO, "Table manager created (max change history={0})",
maxChanges);
stateTracker = new TableManagerStateTracker(logger);
}
@Override
public void shutdown() {
stateTracker.shutdown();
shutdownMaintenance();
super.shutdown();
}
/**
* Returns the table metadata object. Returns null if there was an error.
*
* @return the table metadata object or null
*/
public TableMetadata getTableMetadata() {
final TableMetadata currentTableMetadata = tableMetadata;
if (currentTableMetadata != null) {
return currentTableMetadata;
}
synchronized (this) {
if (tableMetadata == null) {
try {
tableMetadata = fetchMetadata();
} catch (DatabaseNotReadyException dnre) {
/* DB not ready, ignore */
return null;
}
/*
* If the DB is empty, we create a new instance so
* that we don't keep re-reading.
*/
if (tableMetadata == null) {
/*
* Keep change history because it's used to push to
* other RNs (in other shards) that need metadata
* updates. See MetadataUpdateThread.
*/
tableMetadata = new TableMetadata(true);
}
}
}
return tableMetadata;
}
/**
* Returns the table metadata sequence number.
*
* @return the table metadata sequence number
*/
public int getTableMetadataSeqNum() {
return metadataSeqNum;
}
/* -- public index APIs -- */
/**
* Returns true if the specified index has been successfully added.
*
* @param indexName the index ID
* @param tableName the fully qualified table name
* @return true if the specified index has been successfully added
*/
public boolean addIndexComplete(String namespace,
String indexName,
String tableName) {
final ReplicatedEnvironment repEnv = repNode.getEnv(1);
if (repEnv == null) {
return false;
}
final SecondaryInfoMap secondaryInfoMap = getSecondaryInfoMap(repEnv);
/* If there is an issue reading the info object, punt */
if (secondaryInfoMap == null) {
return false;
}
final String dbName = createDbName(namespace, indexName, tableName);
final SecondaryInfo info = secondaryInfoMap.getSecondaryInfo(dbName);
if (info == null) {
logger.log(Level.FINE, "addIndexComplete({0}), info is null, " +
"returning false", new Object[]{dbName});
return false;
}
final String msg = info.getErrorString();
if (msg != null) {
logger.log(Level.INFO,
"addIndexComplete({0}) throwing exception {1}",
new Object[]{dbName, msg});
throw new WrappedClientException(
new IllegalArgumentException(msg));
}
logger.log(Level.FINE, "addIndexComplete({0}) info is {1}",
new Object[]{dbName, info});
return !info.needsPopulating();
}
/**
* Returns true if the data associated with the specified table has been
* removed from the store.
*
* @param tableName the fully qualified table name
* @return true if the table data has been removed
*/
public boolean removeTableDataComplete(String namespace, String tableName) {
final ReplicatedEnvironment repEnv = repNode.getEnv(1);
if (repEnv == null) {
return false;
}
final SecondaryInfoMap secondaryInfoMap = getSecondaryInfoMap(repEnv);
/* If there is an issue reading the info object, punt */
if (secondaryInfoMap == null) {
return false;
}
final DeletedTableInfo info =
secondaryInfoMap.getDeletedTableInfo(
NameUtils.makeQualifiedName(namespace, tableName));
if (info != null) {
return info.isDone();
}
final TableMetadata md = getTableMetadata();
return (md == null) ? false :
(md.getTable(namespace, tableName) == null);
}
/**
* Gets the table instance for the specified ID. If no table is defined,
* or the table is being deleted null is returned.
*
* @param tableId a table ID
* @return the table instance or null
* @throws RNUnavailableException is the table metadata is not yet
* initialized
*/
public TableImpl getTable(long tableId) {
final TableImpl table = getTableInternal(tableId);
return table == null ? null : table.isDeleting() ? null : table;
}
/**
* Gets the table instance for the specified ID. If no table is defined
* null is returned. Note that the table returned may be in a deleting
* state.
*
* @param tableId a table ID
* @return the table instance or null
* @throws RNUnavailableException is the table metadata is not yet
* initialized
*/
TableImpl getTableInternal(long tableId) {
final Map map = idLookupMap;
if (map == null) {
/* Throwing RNUnavailableException should cause a retry */
throw new RNUnavailableException(
"Table metadata is not yet initialized");
}
return map.get(tableId);
}
/**
* Gets the table instance for a given record key as a byte array.
*
* @param key the record key as a byte array.
* @return the table instance or null if the key is not a table key.
* @throws RNUnavailableException is the table metadata is not yet
* initialized
* @throws DroppedTableException if the key is not for an existing table,
* and the key does not appear to be a non-table (KV API) key.
*
* See IDBytesComparator.
*/
public TableImpl getTable(byte[] key) {
final Map map = idBytesLookupMap;
if (map == null) {
/* Throwing RNUnavailableException should cause a retry */
throw new RNUnavailableException(
"Table metadata is not yet initialized");
}
TableImpl table = map.get(key);
final int nextOff = Key.findNextComponent(key, 0);
if (table == null) {
/* Check for a dropped top-level table. */
TableImpl.checkForDroppedTable(key, 0, nextOff, maxTableId);
return null;
}
table = table.findTargetTable(key, nextOff + 1, maxTableId);
if (table == null) {
return null;
}
/* A "deleting" table be considered as dropped */
if (table.isDeleting()) {
throw new DroppedTableException();
}
return table;
}
/**
* Gets the table instance for the specified table name. Returns null if no
* table with that name is in the metadata.
*
* @param tableName a table name
* @return the table instance or null
*/
TableImpl getTable(String namespace,
String tableName) {
final TableMetadata md = getTableMetadata();
return (md == null) ? null : md.getTable(namespace, tableName);
}
/**
* Gets a r2-compat table instance for the specified table name. Returns
* null if no r2-compat table with that name is defined.
*
* @param tableName a table name
* @return the table instance or null
*/
public TableImpl getR2CompatTable(String tableName) {
final Map map = r2NameIdLookupMap;
if (map == null) {
/* Throwing RNUnavailableException should cause a retry */
throw new RNUnavailableException(
"Table metadata is not yet initialized");
}
final Long tableId = map.get(tableName);
return tableId == null ? null : getTable(tableId);
}
/**
* Gets the params used to construct this instance.
*/
Params getParams() {
return params;
}
/**
* Gets the secondaryInfoMap for read-only. Returns null if there was an
* error.
*/
private SecondaryInfoMap getSecondaryInfoMap(ReplicatedEnvironment repEnv) {
try {
return SecondaryInfoMap.fetch(repEnv);
} catch (RuntimeException re) {
DatabaseUtils.handleException(re, logger, "index populate info");
}
return null;
}
/* -- Secondary DB methods -- */
/**
* Gets the secondary database of the specified name. Returns null if the
* secondary database does not exist.
*
* @param dbName the name of a secondary database
* @return a secondary database or null
*/
SecondaryDatabase getSecondaryDb(String dbName) {
return secondaryDbMap.get(dbName);
}
/**
* Gets the secondary database for the specified index. Returns null if the
* secondary database does not exist.
*
* @param indexName the index name
* @param tableName the table name
* @return a secondary database or null
*/
public SecondaryDatabase getIndexDB(String namespace,
String indexName,
String tableName) {
return getSecondaryDb(createDbName(namespace, indexName, tableName));
}
/**
* Closes all secondary DB handles.
*/
@Override
public synchronized void closeDbHandles() {
logger.log(Level.INFO, "Closing secondary database handles");
threadLock.lock();
try {
shutdownMaintenance();
final Iterator itr =
secondaryDbMap.values().iterator();
while (itr.hasNext()) {
closeSecondaryDb(itr.next());
itr.remove();
}
} finally {
threadLock.unlock();
super.closeDbHandles();
}
}
/**
* Closes the specified secondary database. Returns true if the close
* succeeded, or the database was not open.
*
* @param dbName the name of the secondary DB to close
* @return true on success
*/
boolean closeSecondary(String dbName) {
return closeSecondaryDb(secondaryDbMap.get(dbName));
}
/**
* Closes the specified secondary DB. Returns true if the close was
* successful or db is null.
*
* @param db secondary database to close
* @return true if successful or db is null
*/
boolean closeSecondaryDb(SecondaryDatabase db) {
if (db != null) {
try {
db.close();
} catch (RuntimeException re) {
logger.log(Level.INFO, "close of secondary DB failed: {0}",
re.getMessage());
return false;
}
}
return true;
}
/* -- From SecondaryAssociation -- */
@Override
public boolean isEmpty() {
/*
* This method is called on every operation. It must be as fast
* as possible.
*/
final Map map = secondaryLookupMap;
if (map == null) {
/* Throwing RNUnavailableException should cause a retry */
throw new RNUnavailableException(
"Table metadata is not yet initialized");
}
return map.isEmpty();
}
@Override
public Database getPrimary(DatabaseEntry primaryKey) {
return repNode.getPartitionDB(primaryKey.getData());
}
@SuppressWarnings("unchecked")
@Override
public Collection
getSecondaries(DatabaseEntry primaryKey) {
/*
* This is not synchronized so the map may have nulled since isEmpty()
* was called.
*/
final Map map = secondaryLookupMap;
if (map == null) {
/* Throwing RNUnavailableException should cause a retry */
throw new RNUnavailableException(
"Table metadata is not yet initialized");
}
/* Start by looking up the top level table for this key */
final BinaryKeyIterator keyIter =
new BinaryKeyIterator(primaryKey.getData());
/* The first element of the key will be the top level table */
final String rootId = keyIter.next();
final TableEntry entry = map.get(rootId);
/* The entry could be null if the table doesn't have indexes */
if (entry == null) {
return Collections.EMPTY_SET;
}
/* We have a table with indexes, match the rest of the key. */
final Collection matchedIndexes = entry.matchIndexes(keyIter);
/* This could be null if the key did not match any table with indexes */
if (matchedIndexes == null) {
return Collections.EMPTY_SET;
}
final List secondaries =
new ArrayList<>(matchedIndexes.size());
/*
* Get the DB for each of the matched DB. Opening of the DBs is async
* so we may not be able to complete the list. In this case thow an
* exception and hopefully the operation will be retried.
*/
for (String dbName : matchedIndexes) {
final SecondaryDatabase db = secondaryDbMap.get(dbName);
if (db == null) {
/* Throwing RNUnavailableException should cause a retry */
throw new RNUnavailableException(
"Secondary db not yet opened " + dbName);
}
secondaries.add(db);
}
return secondaries;
}
/* -- Metadata update methods -- */
/**
* Updates the table metadata with the specified metadata object. Returns
* true if the update is successful.
*
* @param newMetadata a new metadata
* @return true if the update is successful
*/
public synchronized boolean updateMetadata(Metadata> newMetadata) {
if (!(newMetadata instanceof TableMetadata)) {
throw new IllegalStateException("Bad metadata?" + newMetadata);
}
/* If no env, or not a master then we can't do an update */
final ReplicatedEnvironment repEnv = repNode.getEnv(1);
if ((repEnv == null) || !repEnv.getState().isMaster()) {
return false;
}
final TableMetadata md = getTableMetadata();
/* Can't update if we can't read it */
if (md == null) {
return false;
}
/* If the current md is up-to-date or newer, exit */
if (md.getSequenceNumber() >= newMetadata.getSequenceNumber()) {
return true;
}
logger.log(Level.INFO, "Updating table metadata with {0}", newMetadata);
return update((TableMetadata)newMetadata, repEnv);
}
/**
* Updates the table metadata with the specified metadata info object.
* Returns the sequence number of the table metadata at the end of the
* update.
*
* @param metadataInfo a table metadata info object
* @return the post update sequence number of the table metadata
*/
public synchronized int updateMetadata(MetadataInfo metadataInfo) {
final ReplicatedEnvironment repEnv = repNode.getEnv(1);
/* If no env or error then report back that we don't have MD */
if (repEnv == null) {
return Metadata.EMPTY_SEQUENCE_NUMBER;
}
final TableMetadata md = getTableMetadata();
if (md == null) {
return Metadata.EMPTY_SEQUENCE_NUMBER;
}
/* Only a master can actually update the metadata */
if (!repEnv.getState().isMaster()) {
logger.log(Level.FINE, "Metadata update attempted at replica");
return md.getSequenceNumber();
}
/*
* If the current seq num is >= that of the highest number in the
* update, skip it; we have a more current version.
*/
if (md.getSequenceNumber() >= metadataInfo.getSourceSeqNum()) {
logger.log(Level.FINE, "Metadata not updated, current seq " +
"number {0} is >= the update {1}",
new Object[]{md.getSequenceNumber(),
metadataInfo.getSourceSeqNum()});
return md.getSequenceNumber();
}
final TableMetadata newMetadata = md.getCopy();
try {
logger.log(Level.FINE,
"Attempting to update table metadata, seq num {0}," +
" with {1}", new Object[] {md.getSequenceNumber(),
metadataInfo});
if (newMetadata.update(metadataInfo)) {
logger.log(Level.FINE,
"Successful update of table metadata with {0}," +
" resulting seq num {1}",
new Object[] {metadataInfo,
newMetadata.getSequenceNumber()});
if (update(newMetadata, repEnv)) {
return newMetadata.getSequenceNumber();
}
}
} catch (Exception e) {
logger.log(Level.WARNING,
"Error updating table metadata with " +
metadataInfo, e);
}
logger.log(Level.INFO, "Metadata update failed, current seq number {0}",
md.getSequenceNumber());
/* Update failed, return the current seq # */
return md.getSequenceNumber();
}
/**
* Persists the specified table metadata and updates the secondary DB
* handles. The metadata is persisted only if this node is the master.
*
* @return true if the update was successful, or false if in shutdown or
* unable to restart text index feeder.
*/
private boolean update(final TableMetadata newMetadata,
ReplicatedEnvironment repEnv) {
assert Thread.holdsLock(this);
assert repEnv != null;
/* Only the master can persistMetadata the metadata */
if (repEnv.getState().isMaster()) {
if (!persistMetadata(newMetadata, maxChanges)) {
return false; /* In shutdown */
}
}
/*
* The TIF needs to see the new metadata in the RepNode when it
* restarts, so we set it now; however we must save the old metadata in
* case the TIF metadata update fails in some way, so that it can be
* restored. The success or failure of a metadata update is ultimately
* determined by whether the new metadata was installed, so if there
* was a failure we want to make sure we don't return from here with
* the new metadata installed.
*/
final TableMetadata oldMetadata = tableMetadata;
/* Tentatively update the cached metadata. */
tableMetadata = newMetadata;
/* Notify the text index feeder, if there is one. */
final TextIndexFeederManager tifm = repNode.getTextIndexFeederManager();
if (tifm != null) {
try {
tifm.newTableMetadata(oldMetadata, newMetadata);
} catch (Exception e) {
tableMetadata = oldMetadata; /* update failed, undo */
throw e;
}
}
/* Finally, update the DB handles */
updateDbHandles(repEnv);
return true;
}
/**
* Updates the secondary database handles based on the current table
* metadata. Update of the handles is done asynchronously. If an update
* thread is already running it is stopped and a new thread is started.
*
* This is called by the RN when 1) the handles have to be renewed due to
* an environment change 2) when the topology changes, and 3) when the
* table metadata is updated.
*
* The table maps are also set.
*
* @param repEnv the replicated environment handle
*
* @return true if table metadata handle needed to be updated.
*/
@Override
public synchronized boolean updateDbHandles(ReplicatedEnvironment repEnv) {
final boolean refresh = super.updateDbHandles(repEnv);
if (updateTableMaps(repEnv)) {
requestMaintenanceUpdate();
}
return refresh;
}
/**
* Rebuilds the secondaryLookupMap and idLookupMap maps. Returns true if
* the update was successful. If false is returned the table maps have been
* set such that operations will be disabled.
*
* @return true if the update was successful
*/
private synchronized boolean updateTableMaps(ReplicatedEnvironment repEnv) {
assert repEnv != null;
final TableMetadata tableMd = getTableMetadata();
/*
* If env is invalid, or tableMD null, disable ops
*/
if (!repEnv.isValid() || (tableMd == null)) {
secondaryLookupMap = null;
idLookupMap = null;
idBytesLookupMap = null;
r2NameIdLookupMap = null;
collectorLookupMap = null;
return false;
}
metadataSeqNum = tableMd.getSequenceNumber();
maxTableId = tableMd.getMaxTableId();
/*
* If empty, then a quick return. Note that we return true so that
* the update thread runs because there may have been tables/indexes
* that need cleaning.
*/
if (tableMd.isEmpty()) {
secondaryLookupMap = Collections.emptyMap();
idLookupMap = Collections.emptyMap();
idBytesLookupMap = Collections.emptyMap();
r2NameIdLookupMap = Collections.emptyMap();
collectorLookupMap = Collections.emptyMap();
return true;
}
// TODO - optimize if the MD has not changed?
final Map slm = new HashMap<>();
final Map ilm = new HashMap<>();
final Map r2nilm = new HashMap<>();
final Map clm = new HashMap<>();
final Map iblm =
new TreeMap<>(new IDBytesComparator());
/* Loop through the top level tables */
for (Table table : tableMd.getTables().values()) {
final TableImpl tableImpl = (TableImpl)table;
/*
* Add an entry for each table that has indexes somewhere in its
* hierarchy.
*/
final TableEntry entry = new TableEntry(tableImpl);
if (entry.hasSecondaries()) {
slm.put(tableImpl.getIdString(), entry);
}
/*
* tc is non-null if the table hierarchy has limits. Limits are
* set on the top level table only.
*/
ThroughputCollector tc;
if (tableImpl.hasThroughputLimits() ||
tableImpl.hasSizeLimit()) {
tc = collectorLookupMap == null ? null :
collectorLookupMap.get(tableImpl.getId());
/*
* If no collector exists, create a new one. Otherwise
* reuse the previous one, updating the table instance.
*/
if (tc == null) {
tc = new ThroughputCollector(tableImpl,
repNode.getAggrateThroughputTracker());
} else {
tc.updateTable(tableImpl);
}
} else {
tc = null;
}
/*
* The id map has an entry for each table, so descend into its
* hierarchy.
*/
addToMap(tableImpl, ilm, r2nilm, clm, tc);
/* The ID bytes map has entries only for top-level tables. */
iblm.put(tableImpl.getIDBytes(), tableImpl);
}
secondaryLookupMap = slm;
idLookupMap = ilm;
idBytesLookupMap = iblm;
r2NameIdLookupMap = r2nilm;
collectorLookupMap = clm;
return true;
}
private void addToMap(TableImpl tableImpl,
Map map,
Map r2Map,
Map clMap,
ThroughputCollector tc) {
map.put(tableImpl.getId(), tableImpl);
if (tc != null) {
clMap.put(tableImpl.getId(), tc);
}
if (tableImpl.isR2compatible()) {
r2Map.put(tableImpl.getFullName(), tableImpl.getId());
}
for (Table child : tableImpl.getChildTables().values()) {
addToMap((TableImpl)child, map, r2Map, clMap, tc);
}
}
/**
* Starts the state tracker
* TODO - Perhaps start the tracker on-demand in noteStateChange()?
*/
public void startTracker() {
stateTracker.start();
}
/**
* Notes a state change in the replicated environment. The actual
* work to change state is made asynchronously to allow a quick return.
*/
public void noteStateChange(StateChangeEvent stateChangeEvent) {
stateTracker.noteStateChange(stateChangeEvent);
}
@Override
protected MetadataType getType() {
return MetadataType.TABLE;
}
/**
* Refreshes the table metadata due to it being updated by the master.
* Called from the database trigger.
*/
@Override
protected synchronized void update(ReplicatedEnvironment repEnv) {
/*
* This will force the tableMetadata to be re-read from the db (in
* updateDBHandles)
*/
tableMetadata = null;
updateDbHandles(repEnv);
}
/**
* Note that this method should only be called from the maintenance thread.
*/
Map getSecondaryDbMap() {
return secondaryDbMap;
}
/**
* Gets the throughput collector for the specified table, if the table
* has throughput limits enforced. If the TableManager has not been
* initialized or there is no collector for the table, null is returned.
*
* @param tableId a table ID
* @return the throughput collector for the specified table or null
*/
public ThroughputCollector getThroughputCollector(long tableId) {
final Map clm = collectorLookupMap;
return (clm == null) ? null : clm.get(tableId);
}
public ResourceInfo getResourceInfo(long sinceMillis,
Collection usageRecords,
RepNodeId repNodeId, int topoSeqNum) {
final Map clm = collectorLookupMap;
if (clm == null) {
return null;
}
/* Reset the previous read/write rates. */
for (ThroughputCollector tc : clm.values()) {
tc.resetReadWriteRates();
}
/* If limit records were sent, update the TCs */
if (usageRecords != null) {
for (UsageRecord ur : usageRecords) {
final ThroughputCollector tc = clm.get(ur.getTableId());
if (tc != null) {
tc.report(ur.getSize(),
ur.getReadRate(),
ur.getWriteRate());
}
}
}
/*
* If sinceMills is <= 0 return an empty ResourceInfo. We send this
* instead of null so that the topo sequence number is sent.
*/
if (sinceMillis <= 0) {
return new ResourceInfo(repNodeId, topoSeqNum, null);
}
/* Get rate information for current tables */
final Map map = idLookupMap;
if (map == null) {
return null;
}
final Set records = new HashSet<>();
/* Convert now and since times to seconds */
final long nowSec = MILLISECONDS.toSeconds(System.currentTimeMillis());
final long sinceSec = MILLISECONDS.toSeconds(sinceMillis);
for (TableImpl table : map.values()) {
/* Only need to get records for the top level tables */
if (!table.isTop()) {
continue;
}
final ThroughputCollector tc = clm.get(table.getId());
if (tc != null) {
tc.collectRateRecords(records, sinceSec, nowSec);
}
}
return new ResourceInfo(repNodeId, topoSeqNum, records);
}
/**
* Returns a set of table info objects. An info object will be included
* in the set if there has been activity on that table since the last call
* to getTableInfo. That is, if the table total read or write KBytes is
* non-zero. If the TableManager has not been initialized, or there
* are no tables with activity null is returned.
*
* Info object are only generated for top level tables.
*
* @return a set of table info objects
*/
public Set getTableInfo() {
final Map ilm = idLookupMap;
final Map clm = collectorLookupMap;
if ((ilm == null) || (clm == null)) {
return null;
}
final long currentTimeMillis = System.currentTimeMillis();
final Set infoSet = new HashSet<>();
for (TableImpl table : ilm.values()) {
/* Only need to get records for the top level tables */
if (!table.isTop()) {
continue;
}
final ThroughputCollector tc = clm.get(table.getId());
if (tc != null) {
final TableInfo info = tc.getTableInfo(currentTimeMillis);
if (info != null) {
infoSet.add(info);
}
}
}
return infoSet.isEmpty() ? null : infoSet;
}
/**
* Checks if the specified table has any indexes (if indexes != null) or if
* the table is deleted and needs it data removed. If it has indexes, add
* them to indexes map. If the table is marked for deletion add that to the
* deletedTables map.
*
* If the table has children, recursively check those.
*/
static void scanTable(TableImpl table,
Map indexes,
Set deletedTables) {
if (table.getStatus().isDeleting()) {
// TODO - should we check for consistency? If so, exception?
if (!table.getChildTables().isEmpty()) {
throw new IllegalStateException("Table " + table +
" is deleted but has children");
}
if (!table.getIndexes(Index.IndexType.SECONDARY).isEmpty()) {
throw new IllegalStateException("Table " + table +
" is deleted but has indexes");
}
deletedTables.add(table);
return;
}
for (Table child : table.getChildTables().values()) {
scanTable((TableImpl)child, indexes, deletedTables);
}
if (indexes == null) {
return;
}
for (Index index :
table.getIndexes(Index.IndexType.SECONDARY).values()) {
indexes.put(createDbName(table.getInternalNamespace(),
index.getName(),
table.getFullName()),
(IndexImpl)index);
}
}
/**
* Thread to manage replicated environment state changes.
*/
private class TableManagerStateTracker extends StateTracker {
TableManagerStateTracker(Logger logger) {
super(TableManagerStateTracker.class.getSimpleName(),
repNode.getRepNodeId(), logger,
repNode.getExceptionHandler());
}
@Override
protected void doNotify(StateChangeEvent sce) {
logger.log(Level.INFO, "Table manager change state to {0}",
sce.getState());
/**
* Run the maintenance thread if needed. If the master, then start
* the maintenance thread in case maintenance operations need to
* resume. Otherwise, if the thread is already running make a
* request for update so that the thread notices the state change.
*/
if (sce.getState().isMaster() || sce.getState().isReplica()) {
requestMaintenanceUpdate();
} else {
shutdownMaintenance();
}
}
}
/**
* A container class for quick lookup of secondary DBs.
*/
private static class TableEntry {
private final int keySize;
private final Set secondaries = new HashSet<>();
private final Map children = new HashMap<>();
TableEntry(TableImpl table) {
/* For child tables subtract the key count from parent */
keySize = (table.getParent() == null ?
table.getPrimaryKeySize() :
table.getPrimaryKeySize() -
((TableImpl)table.getParent()).getPrimaryKeySize());
/* For each index, save the secondary DB name */
for (Index index :
table.getIndexes(Index.IndexType.SECONDARY).values()) {
secondaries.add(createDbName(
((TableImpl)index.getTable()).getInternalNamespace(),
index.getName(),
index.getTable().getFullName()));
}
/* Add only children which have indexes */
for (Table child : table.getChildTables().values()) {
final TableEntry entry = new TableEntry((TableImpl)child);
if (entry.hasSecondaries()) {
children.put(((TableImpl)child).getIdString(), entry);
}
}
}
private boolean hasSecondaries() {
return !secondaries.isEmpty() || !children.isEmpty();
}
private Collection matchIndexes(BinaryKeyIterator keyIter) {
/* Match up the primary keys with the input keys, in number only */
for (int i = 0; i < keySize; i++) {
/* If the key is short, then punt */
if (keyIter.atEndOfKey()) {
return null;
}
keyIter.skip();
}
/* If both are done we have a match */
if (keyIter.atEndOfKey()) {
return secondaries;
}
/* There is another component, check for a child table */
final String childId = keyIter.next();
final TableEntry entry = children.get(childId);
return (entry == null) ? null : entry.matchIndexes(keyIter);
}
}
/**
* Requests a maintenance update. If a maintenance thread is present
* MaintenanceThread.requestUpdate() is called. If unsuccessful (the
* thread is already in shutdown) or there is no maintenance thread,
* a new maintenance thread is started.
*/
private void requestMaintenanceUpdate() {
threadLock.lock();
try {
if ((maintenanceThread != null) &&
maintenanceThread.requestUpdate()) {
return;
}
maintenanceThread = new MaintenanceThread(maintenanceThread,
this, repNode,
logger);
maintenanceThread.start();
} finally {
threadLock.unlock();
}
}
/**
* Shuts down the maintenance thread.
*/
private void shutdownMaintenance() {
threadLock.lock();
try {
if (maintenanceThread != null) {
maintenanceThread.shutdownThread(logger);
maintenanceThread = null;
}
} finally {
threadLock.unlock();
}
}
/**
* Returns true if there is active table maintenance operations that
* require migration to be idle.
*
* @return true if there is active table maintenance operations
*/
public boolean isBusyMaintenance() {
return isBusyMaintenance;
}
/**
* Returns true if there is active table maintenance operations that
* require migration to be idle. Also returns true if there is pending
* secondary DB cleaning.
*
* @return true if there is active maintenance or pending secondary
* DB cleaning
*/
public boolean busySecondaryCleaning() {
/* If active maintennce exit early */
if (isBusyMaintenance) {
return true;
}
final ReplicatedEnvironment repEnv = repNode.getEnv(1);
/* If not the right env. we don't know so report busy to be safe */
if ((repEnv == null) || !repEnv.getState().isMaster()) {
return true;
}
Database infoDb = null;
try {
infoDb = SecondaryInfoMap.openDb(repEnv);
final SecondaryInfoMap infoMap = SecondaryInfoMap.fetch(infoDb);
/* infoMap == null means nothing is going on */
return (infoMap == null) ? false : infoMap.secondaryNeedsCleaning();
} catch (RuntimeException re) {
logger.log(Level.WARNING, "Unexpected exception", re);
return true; /* report busy on error */
} finally {
if (infoDb != null) {
TxnUtil.close(logger, infoDb, "secondary info db");
}
}
}
/**
* Sets is busy maintenance flag to true. This method should be called at
* the start of the maintenance operation that must be guarded. After
* returning, the thread should wait until migration operations have
* ceased. After completing the maintenance operation the thread must
* call clearBusyMaintenance().
*/
void setBusyMaintenance() {
isBusyMaintenance = true;
}
/**
* Clears the busy maintenance flag.
*/
void clearBusyMaintenance() {
isBusyMaintenance = false;
}
public void notifyRemoval(PartitionId partitionId) {
if (secondaryDbMap.isEmpty()) {
return;
}
logger.log(Level.INFO, "{0} has been removed, removing obsolete " +
"records from secondaries", partitionId);
final ReplicatedEnvironment repEnv = repNode.getEnv(1);
if (repEnv == null) {
return; // TODO - humm - lost info?
}
SecondaryInfoMap.markForSecondaryCleaning(repEnv, logger);
requestMaintenanceUpdate();
}
@Override
public String toString() {
return "TableManager[" +
((tableMetadata == null) ? "-" :
tableMetadata.getSequenceNumber()) +
", " + secondaryDbMap.size() + "]";
}
/**
* Comparator for the {@link #idBytesLookupMap}.
*
* Assumes that either param may be the input key param, passed to
* Map.get. This param will be a byte[] for the entire key, and we
* should only compare the first component, which is the top-level table
* ID. Therefore we must check the bytes of each param for delimiters,
* even though keys in the map will not contain delimiters.
*
* The ordering is simply a byte-by-byte comparison of the bytes in the
* table ID. This is not a meaningful ordering, but we only intend the
* map to be used for key lookups.
*
* An alternate implementation would be to use a ThreadLocal to hold the
* length of the input param, since this is already calculated by
* {@link #getTable(byte[])}. This would avoid checking for the delimiter
* in the comparator, but would require accessing a ThreadLocal every time
* the comparator is called. I suspect the ThreadLocal access is more
* expensive, since we check for delimiters only once per comparison.
*/
private static class IDBytesComparator implements Comparator {
@Override
public int compare(byte[] o1, byte[] o2) {
for (int i = 0;; i += 1) {
if (i >= o1.length) {
if (i >= o2.length) {
return 0;
}
return Key.isDelimiter(o2[i]) ? 0 : -1;
}
if (i >= o2.length) {
return Key.isDelimiter(o1[i]) ? 0 : 1;
}
if (o1[i] != o2[i]) {
return o1[i] - o2[i];
}
/*
* If bytes are equal then the value must not be a delimiter,
* since at least one parameter does not contain delimiters.
*/
assert !Key.isDelimiter(o1[i]);
assert !Key.isDelimiter(o2[i]);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy