
com.unboundid.directory.sdk.sync.scripting.ScriptedSyncSource Maven / Gradle / Ivy
Show all versions of server-sdk Show documentation
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at
* docs/licenses/cddl.txt
* or http://www.opensource.org/licenses/cddl1.php.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at
* docs/licenses/cddl.txt. If applicable,
* add the following below this CDDL HEADER, with the fields enclosed
* by brackets "[]" replaced with your own identifying information:
* Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
*
* Portions Copyright 2011-2024 Ping Identity Corporation
*/
package com.unboundid.directory.sdk.sync.scripting;
import java.io.Serializable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
import com.unboundid.directory.sdk.common.internal.Configurable;
import com.unboundid.directory.sdk.sync.config.SyncSourceConfig;
import com.unboundid.directory.sdk.sync.internal.SynchronizationServerExtension;
import com.unboundid.directory.sdk.sync.types.EndpointException;
import com.unboundid.directory.sdk.sync.types.SetStartpointOptions;
import com.unboundid.directory.sdk.sync.types.SyncOperation;
import com.unboundid.directory.sdk.sync.types.SyncServerContext;
import com.unboundid.directory.sdk.sync.types.ChangeRecord;
import com.unboundid.ldap.sdk.Entry;
import com.unboundid.util.Extensible;
import com.unboundid.util.args.ArgumentException;
import com.unboundid.util.args.ArgumentParser;
/**
* This class defines an API that must be implemented by extensions in order to
* synchronize data from a generic (non-LDAP and non-JDBC) endpoint. Since the
* ${SYNC_SERVER_PRODUCT_NAME} is LDAP-centric, this API allows you to take
* generic content and convert it into LDAP entries which can then be processed
* by the ${SYNC_SERVER_BASE_NAME}. The lifecycle of a sync operation is as
* follows:
*
* - Detect change at the synchronization source
* - Fetch full source entry
* - Perform any mappings and compute the equivalent destination entry
* - Fetch full destination entry
* - Diff the computed destination entry and actual (fetched) destination
* entry
* - Apply the minimal set of changes at the destination to bring it in sync
*
*
* This implies that the
* {@link #fetchEntry(SyncOperation)} method will be called once for every
* change that is returned by
* {@link #getNextBatchOfChanges(int, AtomicLong)}.
*
* This is a generic interface and there is no protocol-specific connection
* management provided. It is expected that implementers will provide their own
* libraries for talking to the source endpoint and handle the connection
* lifecycle in the {@link #initializeSyncSource(SyncServerContext,
* SyncSourceConfig, ArgumentParser)} and {@link #finalizeSyncSource()}
* methods of this extension.
*
* During realtime synchronization (i.e. when a Sync Pipe is running), there is
* a sliding window of changes being processed, and this API provides a
* distinction between some different points along that window:
*
* - Old changes: These are changes that the Sync Server has
* processed and acknowledged back to the Sync Source. The Sync Source is
* under no obligation to re-detect these changes.
* - Startpoint: This marks where the Sync Source will start
* detecting changes if it is restarted.
* - Detected but unacknowledged: These changes have been returned by
*
getNextBatchOfChanges()
but not completely processed and
* acknowledged back to the Sync Source.
* - Undetected changes: The next call to
*
getNextBatchOfChanges()
should return the first changes
* that have not been detected. This should be somewhere at or ahead of
* the startpoint.
*
*
* Several of these methods throw {@link EndpointException}, which should be
* used in the case of any connection or endpoint error. For other types of
* errors, runtime exceptions may be used (IllegalStateException,
* NullPointerException, etc.). The ${SYNC_SERVER_BASE_NAME} will automatically
* retry operations that fail, up to a configurable amount of attempts. The
* EndpointException class allows you to specify a retry policy as well.
*
*
Configuring Groovy Scripted Sync Sources
* In order to configure a Sync Source based on this API and written in Groovy,
* use a command like:
*
* dsconfig create-sync-source \
* --source-name "{source-name}" \
* --type groovy-scripted \
* --set "script-class:{class-name}" \
* --set "script-argument:{name=value}"
*
* where "{source-name}" is the name to use for the Sync Source
* instance, "{class-name}" is the fully-qualified
* name of the Groovy class written using this API, and "{name=value}"
* represents name-value pairs for any arguments to provide to the sync
* source. If multiple arguments should be provided to the sync source,
* then the "--set script-argument:{name=value}
" option
* should be provided multiple times.
*/
@Extensible()
@SynchronizationServerExtension(appliesToLocalContent=false,
appliesToSynchronizedContent=true)
public abstract class ScriptedSyncSource implements Configurable
{
/**
* {@inheritDoc}
*/
public void defineConfigArguments(final ArgumentParser parser)
throws ArgumentException
{
// No arguments will be allowed by default.
}
/**
* This hook is called when a Sync Pipe first starts up, when the
* resync process first starts up, or when the set-startpoint
* subcommand is called from the realtime-sync command line tool.
* Any initialization of this sync source should be performed here. This
* method should generally store the {@link SyncServerContext} in a class
* member so that it can be used elsewhere in the implementation.
*
* The default implementation is empty.
*
* @param serverContext A handle to the server context for the server in
* which this extension is running.
* @param config The general configuration for this sync source.
* @param parser The argument parser which has been initialized from
* the configuration for this JDBC sync source.
*/
public void initializeSyncSource(final SyncServerContext serverContext,
final SyncSourceConfig config,
final ArgumentParser parser)
{
// No initialization will be performed by default.
}
/**
* This hook is called when a Sync Pipe shuts down, when the resync
* process shuts down, or when the set-startpoint subcommand (from the
* realtime-sync command line tool) is finished. Any clean up of this
* sync source should be performed here.
*
* The default implementation is empty.
*/
public void finalizeSyncSource()
{
//No implementation required by default.
}
/**
* Return the URL or path identifying the source endpoint
* from which this extension is transmitting data. This is used for logging
* purposes only, so it could just be a server name or hostname and port, etc.
*
* @return the path to the source endpoint
*/
public abstract String getCurrentEndpointURL();
/**
* This method should effectively set the starting point for synchronization
* to the place specified by the options
parameter. This should
* cause all changes previous to the specified start point to be disregarded
* and only changes after that point to be returned by
* {@link #getNextBatchOfChanges(int, AtomicLong)}.
*
* There are several different startpoint types (see
* {@link SetStartpointOptions}), and this implementation is not required to
* support them all. If the specified startpoint type is unsupported, this
* method should throw an {@link UnsupportedOperationException}.
*
* IMPORTANT: The RESUME_AT_SERIALIZABLE
startpoint type
* must be supported by your implementation, because this is used when a Sync
* Pipe first starts up. The {@link Serializable} in this case is the same
* type that is returned by {@link #getStartpoint()}; the Sync Server persists
* it and passes it back in on a restart.
*
* This method can be called from two different contexts:
*
* - When the 'set-startpoint' subcommand of the realtime-sync CLI is used
* (the Sync Pipe is required to be stopped in this context)
* - Immediately after a Sync Pipe starts up and a connection is first
* established to the source server (e.g. before the first call to
* {@link #getNextBatchOfChanges(int, AtomicLong)})
*
*
* @param options
* an object which indicates where exactly to start synchronizing
* (e.g. the end of the changelog, specific change number, a certain
* time ago, etc)
* @throws EndpointException
* if there is any error while setting the start point
*/
public abstract void setStartpoint(final SetStartpointOptions options)
throws EndpointException;
/**
* Gets the current value of the startpoint for change detection. This is the
* "bookmark" which indicates which changes have already been processed and
* which have not. In most cases, a change number is used to detect changes
* and is managed by the ${SYNC_SERVER_BASE_NAME}, in which case this
* implementation needs only to return the latest acknowledged
* change number. In other cases, the return value may correspond to a
* different value, such as the SYS_CHANGE_VERSION in Microsoft SQL Server.
* In any case, this method should return the value that is updated by
* {@link #acknowledgeCompletedOps(LinkedList)}.
*
* This method is called periodically and the return value is saved in the
* persistent state for the Sync Pipe that uses this extension as its Sync
* Source.
*
* IMPORTANT: The internal value for the startpoint should only be
* updated after a sync operation is acknowledged back to this script (via
* {@link #acknowledgeCompletedOps(LinkedList)}).
* Otherwise it will be possible for changes to be missed when the
* ${SYNC_SERVER_BASE_NAME} is restarted or a connection error occurs.
* @return a value to store in the persistent state for the Sync Pipe. This is
* usually a change number, but if a changelog table is not used to
* detect changes, this value should represent some other token to
* pass into {@link #setStartpoint(SetStartpointOptions)}
* when the sync pipe starts up.
*/
public abstract Serializable getStartpoint();
/**
* Return the next batch of change records from the source. Change records
* are usually just hints that a change happened; they do not include
* the full contents of the target entry. In an effort to never synchronize
* stale data, the ${SYNC_SERVER_BASE_NAME} will go back and fetch the full
* target entry for each change record.
*
* On the first invocation, this should return changes starting from the
* startpoint that was set by
* {@link #setStartpoint(SetStartpointOptions)}. This method is also
* responsible for updating the internal state such that subsequent
* invocations do not return duplicate changes.
*
* The resulting list should be limited by maxChanges
. The
* numStillPending
reference should be set to the estimated
* number of changes that haven't yet been retrieved from the source endpoint
* when this method returns, or zero if all the current changes have been
* retrieved.
*
* IMPORTANT: While this method needs to keep track of which changes
* have already been returned so that it does not return them again, it should
* NOT modify the official startpoint. The internal value for the
* startpoint should only be updated after a sync operation is acknowledged
* back to this script (via
* {@link #acknowledgeCompletedOps(LinkedList)}).
* Otherwise it will be possible for changes to be missed when the
* ${SYNC_SERVER_BASE_NAME} is restarted or a connection error occurs. The
* startpoint should not change as a result of this method.
*
* This method does not need to be thread-safe. It will be invoked
* repeatedly by a single thread, based on the polling interval set in the
* Sync Pipe configuration.
*
* @param maxChanges
* the maximum number of changes to retrieve
* @param numStillPending
* this should be set to the number of unretrieved changes that
* are still pending after this batch has been retrieved. This will
* be passed in
* as zero, and may be left that way if the actual value cannot be
* determined.
* @return a list of {@link ChangeRecord} instances, each
* corresponding to a single change at the source endpoint.
* If there are no new changes to return, this method should return
* an empty list.
* @throws EndpointException
* if there is any error while retrieving the next batch of changes
*/
public abstract List getNextBatchOfChanges(
final int maxChanges,
final AtomicLong numStillPending)
throws EndpointException;
/**
* Return a full source entry (in LDAP form) from the source, corresponding
* to the {@link ChangeRecord} that is passed in through the
* {@link SyncOperation}. This method should perform any queries necessary to
* gather the latest values for all the attributes to be synchronized.
*
* This method must be thread safe, as it will be called repeatedly and
* concurrently by each of the Sync Pipe worker threads as they process
* entries.
*
* If the original ChangeRecord has the full entry already set on it (which
* can be done using ChangeRecord.Builder#fullEntry(Entry)
,
* then this method will not get called, and the Sync Server will
* automatically use the full entry from the ChangeRecord. In this case, the
* implementation can always return {@code null}.
*
* @param operation
* the SyncOperation which identifies the source "entry" to
* fetch. The ChangeRecord can be obtained by calling
* operation.getChangeRecord()
.
* These ChangeRecords are generated by
* {@link #getNextBatchOfChanges(int, AtomicLong)}
* or by
* {@link #listAllEntries(BlockingQueue)}.
*
* @return a full LDAP Entry, or null if no such entry exists.
* @throws EndpointException
* if there is an error fetching the entry
*/
public abstract Entry fetchEntry(final SyncOperation operation)
throws EndpointException;
/**
* Provides a way for the ${SYNC_SERVER_BASE_NAME} to acknowledge back to the
* script which sync operations it has processed. This method should update
* the official startpoint which was set by
* {@link #setStartpoint(SetStartpointOptions)} and is
* returned by {@link #getStartpoint()}.
*
* IMPORTANT: The internal value for the startpoint should only be
* updated after a sync operation is acknowledged back to this extension (via
* this method). Otherwise it will be possible for changes to be missed when
* the ${SYNC_SERVER_BASE_NAME} is restarted or a connection error occurs.
*
* @param completedOps
* a list of {@link SyncOperation}s that have finished processing.
* The records are listed in the order they were first detected.
* @throws EndpointException
* if there is an error acknowledging the changes back to the
* database
*/
public abstract void acknowledgeCompletedOps(
final LinkedList completedOps)
throws EndpointException;
/**
* Gets a list of all the entries in the source endpoint. This is used by the
* 'resync' command line tool. The default implementation throws a
* {@link UnsupportedOperationException}; subclasses should override if the
* resync functionality is needed.
*
* The outputQueue
should contain {@link ChangeRecord} objects
* with the ChangeType
set to null
to indicate that
* these are resync operations.
*
* This method should not return until all the entries at the source
* have been added to the output queue. Separate threads will concurrently
* drain entries from the queue and process them. The queue typically should
* not contain full entries, but rather ChangeRecord objects which identify
* the full source entries. These objects are then individually passed in to
* {@link #fetchEntry(SyncOperation)}. Therefore, it is important to make sure
* that the ChangeRecord instances contain enough identifiable information
* (e.g. primary keys) for each entry so that the entry can be found again.
*
* The lifecycle of resync is similar to that of real-time sync, with a few
* differences:
*
* - Stream out a list of identifiers for the entries in the source
* endpoint, using a ChangeRecord as the identifier
* - Fetch full source entry for a ChangeRecord
* - Perform any mappings and compute the equivalent destination entry
* - Fetch full destination entry
* - Diff the computed destination entry and actual destination entry
* - Apply the minimal set of changes at the destination to bring it in sync
*
*
*
* Alternatively, the full entry can be set on the ChangeRecord within this
* method, which will cause the "fetch full entry" step to be skipped. In this
* case the Sync Server will just use the entry specified on the ChangeRecord.
*
* If the total set of entries is very large, it is fine to split up the work
* into multiple network queries within this method. The queue will not grow
* out of control because it blocks when it becomes full. The queue capacity
* is fixed at 1000.
*
* @param outputQueue
* a queue of ChangeRecord objects which will be individually
* fetched via {@link #fetchEntry(SyncOperation)}
* @throws EndpointException
* if there is an error retrieving the list of entries to resync
*/
public void listAllEntries(final BlockingQueue outputQueue)
throws EndpointException
{
throw new UnsupportedOperationException(
"The listAllEntries(BlockingQueue) " +
"method must be implemented in the " +
getClass().getName() + " extension.");
}
/**
* Gets a list of all the entries in the source from a given file input.
* This is used by the 'resync' command line tool. The default implementation
* throws a {@link UnsupportedOperationException}; subclasses should override
* if the resync functionality is needed for specific records, which
* can be specified in the input file.
*
* The format for the inputLines
(e.g. the content of the file)
* is user-defined; it may be key/value pairs, primary keys, or full SQL
* statements, for example. The use of this method is triggered via the
* --sourceInputFile argument on the resync CLI. The
* outputQueue
should contain {@link ChangeRecord}
* objects with the ChangeType
set to null
to
* indicate that these are resync operations.
*
* This method should not return until all the entries specified by the input
* file have been added to the output queue. Separate threads will
* concurrently drain entries from the queue and process them. The queue
* typically should not contain full entries, but rather ChangeRecord
* objects which identify the full source entries. These objects are then
* individually passed in to {@link #fetchEntry(SyncOperation)}. Therefore,
* it is important to make sure that the ChangeRecord instances
* contain enough identifiable information (e.g. primary keys) for each entry
* so that the entry can be found again.
*
* The lifecycle of resync is similar to that of real-time sync, with a few
* differences:
*
* - Stream out a list of identifiers for entries in the source endpoint,
* using the given input file as the basis for which entries to resync
*
* - Fetch full source entry for an identifier
* - Perform any mappings and compute the equivalent destination entry
* - Fetch full destination entry
* - Diff the computed destination entry and actual destination entry
* - Apply the minimal set of changes at the destination to bring it in sync
*
*
*
* Alternatively, the full entry can be set on the ChangeRecord within this
* method, which will cause the "fetch full entry" step to be skipped. In this
* case the Sync Server will just use the entry specified on the ChangeRecord.
*
* If the total set of entries is very large, it is fine to split up the work
* into multiple network queries within this method. The queue will not grow
* out of control because it blocks when it becomes full. The queue capacity
* is fixed at 1000.
*
* @param inputLines
* an Iterator containing the lines from the specified input file to
* resync (this is specified on the CLI for the resync command).
* These lines can be any format, for example a set of primary keys,
* a set of WHERE clauses, a set of full SQL queries, etc.
* @param outputQueue
* a queue of ChangeRecord objects which will be individually
* fetched via {@link #fetchEntry(SyncOperation)}
* @throws EndpointException
* if there is an error retrieving the list of entries to resync
*/
public void listAllEntries(final Iterator inputLines,
final BlockingQueue outputQueue)
throws EndpointException
{
throw new UnsupportedOperationException(
"The listAllEntries(Iterator,BlockingQueue) " +
"method must be implemented in the " +
getClass().getName() + " extension.");
}
}