org.jitsi.utils.dsi.DominantSpeakerIdentification Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jitsi-utils Show documentation
A set of basic utilities used in Jitsi projects
There is a newer version: 1.0-133-g6af1020
/*
 * Copyright @ 2015 Atlassian Pty Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.jitsi.utils.dsi;

import java.lang.ref.*;
import java.time.*;
import java.util.*;
import java.util.concurrent.*;

import org.jetbrains.annotations.*;
import org.jitsi.utils.concurrent.*;
import org.jitsi.utils.logging2.*;
import org.json.simple.*;

/**
 * Implements {@link ActiveSpeakerDetector} with inspiration from the paper
 * "Dominant Speaker Identification for Multipoint Videoconferencing"
 * by Ilana Volfin and Israel Cohen.
 *
 * @param  The type used for speaker identifiers.
 *
 * @author Lyubomir Marinov
 */
@SuppressWarnings("unused")
public class DominantSpeakerIdentification
    extends AbstractActiveSpeakerDetector
{
    /**
     * The threshold of the relevant speech activities in the immediate
     * time-interval in "global decision"/"Dominant speaker
     * selection" phase of the algorithm.
     */
    private static final double C1 = 3;

    /**
     * The threshold of the relevant speech activities in the medium
     * time-interval in "global decision"/"Dominant speaker
     * selection" phase of the algorithm.
     */
    private static final double C2 = 2;

    /**
     * The threshold of the relevant speech activities in the long
     * time-interval in "global decision"/"Dominant speaker
     * selection" phase of the algorithm.
     */
    private static final double C3 = 0;

    /**
     * The indicator which determines whether the
     * DominantSpeakerIdentification class and its instances are to
     * execute in debug mode.
     */
    private static final boolean DEBUG;

    /**
     * The interval in milliseconds of the activation of the identification of
     * the dominant speaker in a multipoint conference.
     */
    private static final long DECISION_INTERVAL = 300;

    /**
     * The interval of time in milliseconds of idle execution of
     * DecisionMaker after which the latter should cease to exist. The
     * interval does not have to be very long because the background threads
     * running the DecisionMakers are pooled anyway.
     */
    private static final long DECISION_MAKER_IDLE_TIMEOUT = 15 * 1000;

    /**
     * The interval of time without a call to {@link Speaker#levelChanged(int,long)}
     * after which DominantSpeakerIdentification assumes that there
     * will be no report of a Speaker's level within a certain
     * time-frame. The default value of 40 is chosen in order to allow
     * non-aggressive fading of the last received or measured level and to be
     * greater than the most common RTP packet durations in milliseconds i.e.
     * 20 and 30. 
     */
    private static final long LEVEL_IDLE_TIMEOUT = 40;

    /**
     * The Logger used by the DominantSpeakerIdentification
     * class and its instances to print debug information.
     */
    private static final Logger logger = new LoggerImpl(DominantSpeakerIdentification.class.getName());

    /**
     * The (total) number of long time-intervals used for speech activity score
     * evaluation at a specific time-frame.
     */
    private static final int LONG_COUNT = 1;

    /**
     * The threshold in terms of active medium-length blocks which is used
     * during the speech activity evaluation step for the long time-interval.
     */
    private static final int LONG_THRESHOLD = 4;

    /**
     * The maximum value of audio level supported by
     * DominantSpeakerIdentification.
     */
    private static final int MAX_LEVEL = 127;

    /**
     * The minimum value of audio level supported by
     * DominantSpeakerIdentification.
     */
    private static final int MIN_LEVEL = 0;

    /**
     * The number of (audio) levels received or measured for a Speaker
     * to be monitored in order to determine that the minimum level for the
     * Speaker has increased.
     */
    private static final int MIN_LEVEL_WINDOW_LENGTH
        = 15 /* seconds */ * 1000 /* milliseconds */
            / 20 /* milliseconds per level */;

    /**
     * The minimum value of speech activity score supported by
     * DominantSpeakerIdentification. The value must be positive
     * because (1) we are going to use it as the argument of a logarithmic
     * function and the latter is undefined for negative arguments and (2) we
     * will be dividing by the speech activity score.
     */
    private static final double MIN_SPEECH_ACTIVITY_SCORE = 0.0000000001D;

    /**
     * The threshold in terms of active sub-bands in a frame which is used
     * during the speech activity evaluation step for the medium length
     * time-interval.
     */
    private static final int MEDIUM_THRESHOLD = 7;

    /**
     * The (total) number of sub-bands in the frequency range evaluated for
     * immediate speech activity. The implementation of the class
     * DominantSpeakerIdentification does not really operate on the
     * representation of the signal in the frequency domain, it works with audio
     * levels derived from RFC 6465 "A Real-time Transport Protocol (RTP)
     * Header Extension for Mixer-to-Client Audio Level Indication". 
     */
    private static final int N1 = 13;

    /**
     * The length/size of a sub-band in the frequency range evaluated for
     * immediate speech activity. In the context of the implementation of the
     * class DominantSpeakerIdentification, it specifies the
     * length/size of a sub-unit of the audio level range defined by RFC 6465.
     */
    private static final int N1_SUBUNIT_LENGTH = (MAX_LEVEL - MIN_LEVEL + N1 - 1) / N1;

    /**
     * The number of frames (i.e. {@link Speaker#immediates} evaluated for
     * medium speech activity.
     */
    private static final int N2 = 5;

    /**
     * The number of medium-length blocks constituting a long time-interval.
     */
    private static final int N3 = 10;

    /**
     * The interval of time without a call to {@link Speaker#levelChanged(int,long)}
     * after which DominantSpeakerIdentification assumes that a
     * non-dominant Speaker is to be automatically removed from
     * {@link #speakers}.
     */
    private static final long SPEAKER_IDLE_TIMEOUT = 60 * 60 * 1000;

    private static final ScheduledExecutorService DEFAULT_EXECUTOR
            = Executors.newScheduledThreadPool(1, new CustomizableThreadFactory("dsi", true));

    static
    {
        DEBUG = logger.isDebugEnabled();
    }

    /**
     * Computes the binomial coefficient indexed by n and r
     * i.e. the number of ways of picking r unordered outcomes from
     * n possibilities.
     *
     * @param n the number of possibilities to pick from
     * @param r the number unordered outcomes to pick from n
     * @return the binomial coefficient indexed by n and r
     * i.e. the number of ways of picking r unordered outcomes from
     * n possibilities
     */
    private static long binomialCoefficient(int n, int r)
    {
        int m = n - r; // r = Math.max(r, n - r);

        if (r < m)
        {
            r = m;
        }

        long t = 1;

        for (int i = n, j = 1; i > r; i--, j++)
        {
            t = t * i / j;
        }

        return t;
    }

    private static boolean computeBigs(
            byte[] littles,
            byte[] bigs,
            int threshold)
    {
        int bigLength = bigs.length;
        int littleLengthPerBig = littles.length / bigLength;
        boolean changed = false;

        for (int b = 0, l = 0; b < bigLength; b++)
        {
            byte sum = 0;

            for (int lEnd = l + littleLengthPerBig; l < lEnd; l++)
            {
                if (littles[l] > threshold)
                {
                    sum++;
                }
            }
            if (bigs[b] != sum)
            {
                bigs[b] = sum;
                changed = true;
            }
        }
        return changed;
    }

    private static double computeSpeechActivityScore(
            int vL,
            int nR,
            double lambda)
    {
        double p = 0.5;
        double speechActivityScore
            = Math.log(binomialCoefficient(nR, vL)) + vL * Math.log(p)
                + (nR - vL) * Math.log(1 - p) - Math.log(lambda) + lambda * vL;

        if (speechActivityScore < MIN_SPEECH_ACTIVITY_SCORE)
        {
            speechActivityScore = MIN_SPEECH_ACTIVITY_SCORE;
        }
        return speechActivityScore;
    }

    /**
     * The background task which repeatedly makes the (global) decision about speaker switches.
     */
    private DecisionMaker decisionMaker;

    /**
     * The identifier of the dominant speaker.
     */
    private T dominantId;

    /**
     * The last/latest time at which this DominantSpeakerIdentification
     * made a (global) decision about speaker switches. The (global) decision
     * about switcher switches should be made every {@link #DECISION_INTERVAL}
     * milliseconds.
     */
    private long lastDecisionTime;

    /**
     * The time in milliseconds of the most recent (audio) level report or
     * measurement (regardless of the Speaker).
     */
    private long lastLevelChangedTime;

    /**
     * The last/latest time at which this DominantSpeakerIdentification
     * notified the Speakers who have not received or measured audio
     * levels for a certain time (i.e. {@link #LEVEL_IDLE_TIMEOUT}) that they
     * will very likely not have a level within a certain time-frame of the
     * algorithm.
     */
    private long lastLevelIdleTime;

    /**
     * The relative speech activities for the immediate, medium and long
     * time-intervals, respectively, which were last calculated for a
     * Speaker. Simply reduces the number of allocations and the
     * penalizing effects of the garbage collector.
     */
    private final double[] relativeSpeechActivities = new double[3];

    /**
     * The Speakers in the multipoint conference associated with this
     * ActiveSpeakerDetector.
     */
    private final Map> speakers = new HashMap<>();

    /**
     * The special ID to use to indicate silence (no speakers).
     *
     * When set to null silence detection is disabled.
     */
    private final boolean enableSilence;

    /**
     * The interval (in milliseconds) of no activity before switching to {@code silence} (if silence detection is
     * enabled).
     */
    private final long timeoutToSilenceInterval;

    /**
     * The Speakers in the multipoint conference with the highest
     * current energy levels.
     */
    private final ArrayList> loudest = new ArrayList<>();

    private final Clock clock;

    /**
     * The executor used to schedule {@link #decisionMaker}.
     */
    private final ScheduledExecutorService executor;

    /**
     * The number of current loudest speakers to keep track of.
     */
    private int numLoudestToTrack = 0;

    /**
     * Time in milliseconds after which speaker is removed from loudest list if
     * no new audio packets have been received from that speaker.
     */
    private int energyExpireTimeMs = 150;

    /**
     * Alpha factor for exponential smoothing of energy values, multiplied by 100.
     */
    private int energyAlphaPct = 50;

    /**
     * Initializes a new DominantSpeakerIdentification instance.
     */
    public DominantSpeakerIdentification()
    {
        this(Clock.systemUTC(), DEFAULT_EXECUTOR);
    }

    public DominantSpeakerIdentification(long silenceTimeout)
    {
        this(Clock.systemUTC(), DEFAULT_EXECUTOR, silenceTimeout);
    }

    public DominantSpeakerIdentification(Clock clock, ScheduledExecutorService executor)
    {
        this(clock, executor, -1);
    }

    /**
     *
     * @param clock The clock to use
     * @param executor The executor in which to schedule periodic decision-making.
     * @param silenceTimeout the interval of no speech after which we switch to silence (fire an event with active
     * speaker {@code null}. A negative value means that silence detection is disabled and the speaker is always
     * non-null (as long as there are any speakers in the conference).
     */
    public DominantSpeakerIdentification(Clock clock, ScheduledExecutorService executor, long silenceTimeout)
    {
        this.clock = clock;
        this.executor = executor;
        this.timeoutToSilenceInterval = silenceTimeout;
        enableSilence = silenceTimeout > 0;
    }

    /**
     * Set energy ranking options
     */
    public synchronized void setLoudestConfig(int numLoudestToTrack_, int energyExpireTimeMs_, int energyAlphaPct_)
    {
        numLoudestToTrack = numLoudestToTrack_;
        energyExpireTimeMs = energyExpireTimeMs_;
        energyAlphaPct = energyAlphaPct_;
        logger.trace(() -> "numLoudestToTrack = " + numLoudestToTrack);
        logger.trace(() -> "energyExpireTimeMs = " + energyExpireTimeMs);
        logger.trace(() -> "energyAlphaPct = " + energyAlphaPct);

        while (loudest.size() > numLoudestToTrack)
            loudest.remove(numLoudestToTrack);
    }

    /**
     * Notifies this DominantSpeakerIdentification instance that a
     * specific DecisionMaker has permanently stopped executing (in its
     * background/daemon Thread). If the specified
     * decisionMaker is the one utilized by this
     * DominantSpeakerIdentification instance, the latter will update
     * its state to reflect that the former has exited.
     *
     * @param decisionMaker the DecisionMaker which has exited
     */
    synchronized void decisionMakerExited(DecisionMaker decisionMaker)
    {
        if (this.decisionMaker == decisionMaker)
        {
            this.decisionMaker = null;
        }
    }

    /**
     * Retrieves a JSON representation of this instance for the purposes of the
     * REST API of Videobridge.
     * 
     * By the way, the method name reflects the fact that the method handles an
     * HTTP GET request.
     * 
     *
     * @return a JSONObject which represents this instance of the
     * purposes of the REST API of Videobridge
     */
    @SuppressWarnings("unchecked")
    public JSONObject doGetJSON()
    {
        JSONObject jsonObject;

        if (DEBUG)
        {
            synchronized (this)
            {
                jsonObject = new JSONObject();

                // dominantSpeaker
                T dominantSpeaker = getDominantSpeaker();

                jsonObject.put("dominantSpeaker", Objects.toString(dominantSpeaker));

                // speakers
                Collection> speakersCollection = this.speakers.values();
                JSONArray speakersArray = new JSONArray();

                for (Speaker speaker : speakersCollection)
                {
                    JSONObject speakerJSONObject = new JSONObject();

                    // id
                    speakerJSONObject.put("id", speaker.id.toString());
                    // levels
                    speakerJSONObject.put("levels", speaker.getLevels());
                    speakersArray.add(speakerJSONObject);
                }
                jsonObject.put("speakers", speakersArray);
            }
        }
        else
        {
            // Retrieving a JSON representation of a
            // DominantSpeakerIdentification has been implemented for the
            // purposes of debugging only.
            jsonObject = null;
        }
        return jsonObject;
    }

    /**
     * Gets the identifier of the dominant speaker.
     */
    public T getDominantSpeaker()
    {
        return dominantId;
    }

    /**
     * Gets the Speaker in this multipoint conference identified by a
     * specific {@code id}. If no such Speaker exists, a new Speaker
     * is initialized and returned.
     *
     * @param id the identifier of the Speaker to return.
     * @return the Speaker in this multipoint conference identified by {@code id}.
     */
    @NotNull
    private synchronized Speaker getOrCreateSpeaker(T id)
    {
        Speaker speaker = speakers.get(id);

        if (speaker == null)
        {
            speaker = new Speaker<>(id);
            speakers.put(id, speaker);

            // Since we've created a new Speaker in the multipoint conference,
            // we'll very likely need to make a decision whether there have been
            // speaker switch events soon.
            maybeStartDecisionMaker();
        }
        return speaker;
    }

    /**
     * Update loudest speaker list.
     * @param speaker the speaker with a new energy level
     * @param level the energy level
     * @param now the current time
     * @return The current ranking statistics.
     */
    private synchronized SpeakerRanking updateLoudestList(Speaker speaker, int level, long now)
    {
        boolean isDominant = dominantId != null && dominantId.equals(speaker.id);

        if (level < 0)
        {
            /* Ignore this level, it is too old. Just gather the stats. */
            int rank = 0;
            while (rank < loudest.size() && loudest.get(rank) != speaker)
                ++rank;

            return new SpeakerRanking(isDominant, rank, speaker.energyScore);
        }

        /* exponential smoothing. round to nearest. */
        speaker.energyScore = (energyAlphaPct * level + (100 - energyAlphaPct) * speaker.energyScore + 50) / 100;

        if (numLoudestToTrack == 0)
            return new SpeakerRanking(isDominant, 0, speaker.energyScore);

        long oldestValid = now - energyExpireTimeMs;

        logger.trace(() -> "Want to add " + speaker.id.toString()
            + " with score " + speaker.energyScore + ". Last level = " + level + ".");

        int i = 0;
        while (i < loudest.size())
        {
            Speaker cur = loudest.get(i);
            if (cur.getLastLevelChangedTime() < oldestValid)
            {
                logger.trace(() -> "Removing " + cur.id.toString() + ". old.");
                loudest.remove(i);
                continue;
            }
            if (cur == speaker)
            {
                logger.trace(() -> "Removing " + cur.id.toString() + ". same.");
                loudest.remove(i);
                continue;
            }
            ++i;
        }

        int rank = 0;
        while (rank < loudest.size())
        {
            Speaker cur = loudest.get(rank);
            if (cur.energyScore < speaker.energyScore)
                break;
            ++rank;
        }

        if (rank < numLoudestToTrack)
        {
            final int pos = rank;
            logger.trace(() -> "Adding " + speaker.id.toString() + " at position " + pos + ".");
            loudest.add(rank, speaker);

            if (loudest.size() > numLoudestToTrack)
                loudest.remove(numLoudestToTrack);
        }

        if (logger.isTraceEnabled())
        {
            i = 0;
            while (i < loudest.size())
            {
                Speaker cur = loudest.get(i);
                logger.trace("New list: " + i + ": " + cur.id.toString() + ": " + cur.energyScore + ".");
                ++i;
            }
        }

        return new SpeakerRanking(isDominant, rank, speaker.energyScore);
    }

    /**
     * Query whether a particular endpoint is currently one of the loudest speakers.
     */
    public synchronized boolean isAmongLoudest(T id)
    {
        return loudest.stream().anyMatch(speaker -> speaker.id.equals(id));
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public SpeakerRanking levelChanged(T id, int level)
    {
        Speaker speaker;
        long now = clock.millis();

        synchronized (this)
        {
            speaker = getOrCreateSpeaker(id);

            // Note that this ActiveSpeakerDetector is still in use. When it is
            // not in use long enough, its DecisionMaker i.e. background thread
            // will prepare itself and, consequently, this
            // DominantSpeakerIdentification for garbage collection.
            if (lastLevelChangedTime < now)
            {
                lastLevelChangedTime = now;

                // A report or measurement of an audio level indicates that this
                // DominantSpeakerIdentification is in use and, consequently,
                // that it'll very likely need to make a decision whether there
                // have been speaker switch events soon.
                maybeStartDecisionMaker();
            }
        }

        int cookedLevel = speaker.levelChanged(level, now);
        return updateLoudestList(speaker, cookedLevel, now);
    }

    /**
     * Makes the decision whether there has been a speaker switch event. If
     * there has been such an event, notifies the registered listeners that a
     * new speaker is dominating the multipoint conference.
     */
    private void makeDecision(long now)
    {
        // If we have to fire events to any registered listeners eventually, we
        // will want to do it outside the synchronized block.
        T oldDominantSpeakerValue = null, newDominantSpeakerValue = null;

        synchronized (this)
        {

        int speakerCount = speakers.size();
        T newDominantId;

        if (speakerCount == 0)
        {
            // If there are no Speakers in a multipoint conference, then there are no speaker switch events to detect.
            // We either have no dominant speaker, or we're in a silence period (if silence detection is enabled).
            newDominantId = null;
        }
        else if (speakerCount == 1)
        {
            // If there is a single Speaker in a multipoint conference, then it is either the dominant speaker, or
            // we're in a silence period (if silence detection is enabled).
            Speaker speaker = speakers.values().iterator().next();
            newDominantId = speaker.id;

            if (enableSilence)
            {
                speaker.evaluateSpeechActivityScores(now);

                long timeSinceNonSilence = now - speaker.lastNonSilence;
                if (timeSinceNonSilence > timeoutToSilenceInterval)
                {
                    newDominantId = null;
                }
            }
        }
        else
        {
            boolean inSilence = enableSilence && dominantId == null;
            Speaker dominantSpeaker
                = (dominantId == null)
                    ? null
                    : speakers.get(dominantId);

            // If there is no dominant speaker, nominate one at random and then
            // let the other speakers compete with the nominated one.
            if (dominantSpeaker == null)
            {
                Map.Entry> s = speakers.entrySet().iterator().next();

                dominantSpeaker = s.getValue();
                newDominantId = s.getKey();
            }
            else
            {
                newDominantId = dominantSpeaker.id;
            }
            // At this point dominantSpeaker==null iff inSilence==true.

            if (dominantSpeaker != null)
            {
                dominantSpeaker.evaluateSpeechActivityScores(now);
            }

            double[] relativeSpeechActivities = this.relativeSpeechActivities;
            // If multiple speakers cause speaker switches, they compete among
            // themselves by their relative speech activities in the middle
            // time-interval.
            double newDominantC2 = C2;

            for (Map.Entry> s : speakers.entrySet())
            {
                Speaker speaker = s.getValue();

                // The dominant speaker does not compete with itself. In other
                // words, there is no use detecting a speaker switch from the
                // dominant speaker to the dominant speaker. Technically, the
                // relative speech activities are all zeroes for the dominant
                // speaker.
                if (speaker == dominantSpeaker)
                {
                    continue;
                }

                speaker.evaluateSpeechActivityScores(now);

                // Compute the relative speech activities for the immediate,
                // medium and long time-intervals.
                for (int interval = 0;
                        interval < relativeSpeechActivities.length;
                        ++interval)
                {
                    // When in a silence period we use MIN_SPEECH_ACTIVITY_SCORE as the scores to compete against.
                    double dominantSpeakerScore = dominantSpeaker == null ? MIN_SPEECH_ACTIVITY_SCORE
                            : dominantSpeaker.getSpeechActivityScore(interval);
                    relativeSpeechActivities[interval]
                        = Math.log(speaker.getSpeechActivityScore(interval) / dominantSpeakerScore);
                }

                double c1 = relativeSpeechActivities[0];
                double c2 = relativeSpeechActivities[1];
                double c3 = relativeSpeechActivities[2];

                if ((c1 > C1) && (c2 > C2) && (c3 > C3) && (c2 > newDominantC2))
                {
                    // If multiple speakers cause speaker switches, they compete
                    // among themselves by their relative speech activities in
                    // the middle time-interval.
                    newDominantC2 = c2;
                    newDominantId = s.getKey();
                }
            }

            if (enableSilence && dominantSpeaker != null && newDominantId == dominantSpeaker.id)
            {
                // We're not in a silence period, and none of the non-dominant speakers won the challenge. Check if
                // the current dominant speaker has been silent for the timeout period, and if so switch to "silence"
                // mode.
                long timeSinceNonSilence = now - dominantSpeaker.lastNonSilence;
                if (timeSinceNonSilence > timeoutToSilenceInterval)
                {
                    newDominantId = null;
                }
            }
        }
        if ((newDominantId != null || enableSilence) && !Objects.equals(newDominantId, dominantId))
        {
            oldDominantSpeakerValue = dominantId;
            dominantId = newDominantId;
            newDominantSpeakerValue = dominantId;
        }

        } // synchronized (this)

        // Now that we are outside the synchronized block, fire events, if any,
        // to any registered listeners.
        if ((newDominantSpeakerValue != null || enableSilence) &&
            !Objects.equals(newDominantSpeakerValue, oldDominantSpeakerValue))
        {
            fireActiveSpeakerChanged(newDominantSpeakerValue);
        }
    }

    /**
     * Starts a background thread which is to repeatedly make the (global)
     * decision about speaker switches if such a background thread has not been
     * started yet and if the current state of this
     * DominantSpeakerIdentification justifies the start of such a
     * background thread (e.g. there is at least one Speaker in this
     * multipoint conference). 
     */
    private synchronized void maybeStartDecisionMaker()
    {
        if ((this.decisionMaker == null) && !speakers.isEmpty())
        {
            DecisionMaker decisionMaker = new DecisionMaker(this);
            boolean scheduled = false;

            this.decisionMaker = decisionMaker;
            try
            {
                executor.execute(decisionMaker);
                scheduled = true;
            }
            finally
            {
                if (!scheduled && (this.decisionMaker == decisionMaker))
                {
                    this.decisionMaker = null;
                }
            }
        }
    }

    /**
     * Runs in the background/daemon Thread of {@link #decisionMaker}
     * and makes the decision whether there has been a speaker switch event.
     *
     * @return a negative integer if the DecisionMaker is to exit or
     * a non-negative integer to specify the time in milliseconds until the next
     * execution of the DecisionMaker
     */
    private long runInDecisionMaker()
    {
        long now = clock.millis();
        long levelIdleTimeout = LEVEL_IDLE_TIMEOUT - (now - lastLevelIdleTime);
        long sleep = 0;

        if (levelIdleTimeout <= 0)
        {
            if (lastLevelIdleTime != 0)
            {
                timeoutIdleLevels(now);
            }
            lastLevelIdleTime = now;
        }
        else
        {
            sleep = levelIdleTimeout;
        }

        long decisionTimeout = DECISION_INTERVAL - (now - lastDecisionTime);

        if (decisionTimeout <= 0)
        {
            // The identification of the dominant active speaker may be a
            // time-consuming ordeal so the time of the last decision is the
            // time of the beginning of a decision iteration.
            lastDecisionTime = now;
            makeDecision(now);
            // The identification of the dominant active speaker may be a
            // time-consuming ordeal so the timeout to the next decision
            // iteration should be computed after the end of the decision
            // iteration.
            decisionTimeout = DECISION_INTERVAL - (clock.millis() - now);

        }
        if ((decisionTimeout > 0) && (sleep > decisionTimeout))
        {
            sleep = decisionTimeout;
        }

        return sleep;
    }

    /**
     * Runs in the background/daemon Thread of a specific
     * DecisionMaker and makes the decision whether there has been a
     * speaker switch event.
     *
     * @param decisionMaker the DecisionMaker invoking the method
     * @return a negative integer if the decisionMaker is to exit or
     * a non-negative integer to specify the time in milliseconds until the next
     * execution of the decisionMaker
     */
    long runInDecisionMaker(DecisionMaker decisionMaker)
    {
        synchronized (this)
        {
            // Most obviously, DecisionMakers no longer employed by this
            // DominantSpeakerIdentification should cease to exist as soon as
            // possible.
            if (this.decisionMaker != decisionMaker)
            {
                return -1;
            }

            // If the decisionMaker has been unnecessarily executing long
            // enough, kill it in order to have a more deterministic behavior
            // with respect to disposal.
            if (0 < lastDecisionTime)
            {
                long idle = lastDecisionTime - lastLevelChangedTime;

                if (idle >= DECISION_MAKER_IDLE_TIMEOUT)
                {
                    return -1;
                }
            }
        }

        return runInDecisionMaker();
    }

    /**
     * Notifies the Speakers in this multipoint conference who have not
     * received or measured audio levels for a certain time (i.e.
     * {@link #LEVEL_IDLE_TIMEOUT}) that they will very likely not have a level
     * within a certain time-frame of the DominantSpeakerIdentification
     * algorithm. Additionally, removes the non-dominant Speakers who
     * have not received or measured audio levels for far too long (i.e.
     * {@link #SPEAKER_IDLE_TIMEOUT}).
     *
     * @param now the time at which the timing out is being detected
     */
    private synchronized void timeoutIdleLevels(long now)
    {
        Iterator>> i = speakers.entrySet().iterator();

        while (i.hasNext())
        {
            Speaker speaker = i.next().getValue();
            long idle = now - speaker.getLastLevelChangedTime();

            // Remove a non-dominant Speaker if he/she has been idle for far too
            // long.
            if ((SPEAKER_IDLE_TIMEOUT < idle) && ((dominantId == null) || (speaker.id != dominantId)))
            {
                i.remove();
            }
            else if (LEVEL_IDLE_TIMEOUT < idle)
            {
                speaker.levelTimedOut();
            }
        }
    }

    /**
     * Represents the background thread which repeatedly makes the (global)
     * decision about speaker switches. Weakly references an associated
     * DominantSpeakerIdentification instance in order to eventually
     * detect that the multipoint conference has actually expired and that the
     * background Thread should perish.
     *
     * @author Lyubomir Marinov
     */
    private static class DecisionMaker
        implements Runnable
    {
        /**
         * The DominantSpeakerIdentification instance which is
         * repeatedly run into this background thread in order to make the
         * (global) decision about speaker switches. It is a
         * WeakReference in order to eventually detect that the
         * mulipoint conference has actually expired and that this background
         * Thread should perish.
         */
        private final WeakReference> algorithm;

        /**
         * Initializes a new DecisionMaker instance which is to
         * repeatedly run a specific DominantSpeakerIdentification
         * into a background thread in order to make the (global) decision about
         * speaker switches.
         *
         * @param algorithm the DominantSpeakerIdentification to be
         * repeatedly run by the new instance in order to make the (global)
         * decision about speaker switches
         */
        public DecisionMaker(DominantSpeakerIdentification algorithm)
        {
            this.algorithm = new WeakReference<>(algorithm);
        }

        /**
         * Repeatedly runs {@link #algorithm} i.e. makes the (global) decision
         * about speaker switches until the multipoint conference expires.
         */
        @Override
        public void run()
        {
            boolean exit = false;

            DominantSpeakerIdentification algorithm = this.algorithm.get();

            if (algorithm == null)
            {
                exit = true;
            }
            else
            {
                long sleep;
                try
                {
                    sleep = algorithm.runInDecisionMaker(this);
                }
                catch (Exception e)
                {
                    // If an exception occurs we do not re-schedule.
                    sleep = -1;
                }

                // A negative sleep value is contracted to mean that this DecisionMaker should not re-schedule itself.
                if (sleep < 0)
                {
                    exit = true;
                }
                else
                {
                    algorithm.executor.schedule(this, sleep, TimeUnit.MILLISECONDS);
                }
            }

            if (exit)
            {
                // Notify the algorithm that this DecisionMaker will no longer run. Subsequently, the algorithm may
                // decide to create and schedule another one if and when it's needed.
                algorithm = this.algorithm.get();

                if (algorithm != null)
                {
                    algorithm.decisionMakerExited(this);
                }
            }
        }
    }

    /**
     * Represents a speaker in a multipoint conference identified by an ID.
     *
     * @author Lyubomir Marinov
     */
    private class Speaker
    {
        private final byte[] immediates = new byte[LONG_COUNT * N3 * N2];

        /**
         * The last {@link #clock} time in millis at which this speaker was not silent. We consider it being silent if
         * {@link #longSpeechActivityScore} == {@link #MIN_SPEECH_ACTIVITY_SCORE} to allow short bursts of activity
         * to not interrupt a silence period.
         */
        private long lastNonSilence = -1;

        /**
         * The speech activity score of this Speaker for the immediate
         * time-interval.
         */
        private double immediateSpeechActivityScore = MIN_SPEECH_ACTIVITY_SCORE;

        /**
         * The time in milliseconds of the most recent invocation of
         * {@link #levelChanged(int,long)} i.e. the last time at which an actual
         * (audio) level was reported or measured for this Speaker. If
         * no level is reported or measured for this Speaker long
         * enough i.e. {@link #LEVEL_IDLE_TIMEOUT}, the associated
         * DominantSpeakerIdentification will presume that this
         * Speaker was muted for the duration of a certain frame.
         */
        private long lastLevelChangedTime = clock.millis();

        /**
         * The (history of) audio levels received or measured for this
         * Speaker.
         */
        private final byte[] levels;

        private final byte[] longs = new byte[LONG_COUNT];

        /**
         * The speech activity score of this Speaker for the long
         * time-interval.
         */
        private double longSpeechActivityScore = MIN_SPEECH_ACTIVITY_SCORE;

        private final byte[] mediums = new byte[LONG_COUNT * N3];

        /**
         * The speech activity score of this Speaker for the medium
         * time-interval.
         */
        private double mediumSpeechActivityScore = MIN_SPEECH_ACTIVITY_SCORE;

        /**
         * The minimum (audio) level received or measured for this
         * Speaker. Since MIN_LEVEL is specified for samples
         * generated by a muted audio source, a value equal to
         * MIN_LEVEL indicates that the minimum level for this
         * Speaker has not been determined yet.
         */
        private byte minLevel = MIN_LEVEL;

        /**
         * The (current) estimate of the minimum (audio) level received or
         * measured for this Speaker. Used to increase the value of
         * {@link #minLevel}
         */
        private byte nextMinLevel = MIN_LEVEL;

        /**
         * The number of subsequent (audio) levels received or measured for this
         * Speaker which have been monitored thus far in order to
         * estimate an up-to-date minimum (audio) level received or measured for
         * this Speaker.
         */
        private int nextMinLevelWindowLength;

        /** Exponential smoothing of filtered energy values.
         *  Synchronized by parent class.
         */
        int energyScore;

        /**
         * The identifier of this Speaker which is unique within this {@link DominantSpeakerIdentification}.
         */
        public final U id;

        /**
         * Initializes a new Speaker instance with a specific identifier
         *
         * @param id the object identifying this speaker.
         * instance
         */
        public Speaker(U id)
        {
            this.id = id;

            levels = new byte[immediates.length];
        }

        private boolean computeImmediates()
        {
            // The minimum audio level received or measured for this Speaker is
            // the level of "silence" for this Speaker. Since the various
            // Speakers may differ in their levels of "silence", put all
            // Speakers on equal footing by replacing the individual levels of
            // "silence" with the uniform level of absolute silence.
            byte[] immediates = this.immediates;
            byte[] levels = this.levels;
            byte minLevel = (byte) (this.minLevel + N1_SUBUNIT_LENGTH);
            boolean changed = false;

            for (int i = 0; i < immediates.length; ++i)
            {
                byte level = levels[i];

                if (level < minLevel)
                    level = MIN_LEVEL;

                byte immediate = (byte) (level / N1_SUBUNIT_LENGTH);

                if (immediates[i] != immediate)
                {
                    immediates[i] = immediate;
                    changed = true;
                }
            }
            return changed;
        }

        private boolean computeLongs()
        {
            return computeBigs(mediums, longs, LONG_THRESHOLD);
        }

        private boolean computeMediums()
        {
            return computeBigs(immediates, mediums, MEDIUM_THRESHOLD);
        }

        /**
         * Computes/evaluates the speech activity score of this Speaker
         * for the immediate time-interval.
         */
        private void evaluateImmediateSpeechActivityScore()
        {
            immediateSpeechActivityScore = computeSpeechActivityScore(immediates[0], N1, 0.78);
        }

        /**
         * Computes/evaluates the speech activity score of this Speaker
         * for the long time-interval.
         */
        private void evaluateLongSpeechActivityScore(long now)
        {
            longSpeechActivityScore = computeSpeechActivityScore(longs[0], N3, 47);
            if (longSpeechActivityScore > MIN_SPEECH_ACTIVITY_SCORE)
            {
                lastNonSilence = now;
            }
        }

        /**
         * Computes/evaluates the speech activity score of this Speaker
         * for the medium time-interval.
         */
        private void evaluateMediumSpeechActivityScore()
        {
            mediumSpeechActivityScore = computeSpeechActivityScore(mediums[0], N2, 24);
        }

        /**
         * Evaluates the speech activity scores of this Speaker for the
         * immediate, medium, and long time-intervals. Invoked when it is time
         * to decide whether there has been a speaker switch event.
         */
        synchronized void evaluateSpeechActivityScores(long now)
        {
            if (computeImmediates())
            {
                evaluateImmediateSpeechActivityScore();
                if (computeMediums())
                {
                    evaluateMediumSpeechActivityScore();
                    if (computeLongs())
                    {
                        evaluateLongSpeechActivityScore(now);
                    }
                }
            }
        }

        /**
         * Gets the time in milliseconds at which an actual (audio) level was
         * reported or measured for this Speaker last.
         *
         * @return the time in milliseconds at which an actual (audio) level
         * was reported or measured for this Speaker last
         */
        public synchronized long getLastLevelChangedTime()
        {
            return lastLevelChangedTime;
        }

        /**
         * Gets the (history of) audio levels received or measured for this
         * Speaker.
         *
         * @return a String that lists the (history of) audio
         * levels received or measured for this Speaker
         */
        String getLevels()
        {
            // The levels of Speaker are internally maintained starting with the
            // last audio level received or measured for this Speaker and ending
            // with the first audio level received or measured for this Speaker.
            // Reverse the list and print them in the order they were received.
            byte[] src = this.levels;
            StringBuilder sb = new StringBuilder();
            sb.append('[');

            for (int s = src.length - 1; s >= 0; --s)
            {
                sb.append(src[s]);
                sb.append(',');
            }

            sb.setCharAt(sb.length() - 1, ']'); /* replace last comma */
            return sb.toString();
        }

        /**
         * Gets the speech activity score of this Speaker for a
         * specific time-interval.
         *
         * @param interval 0 for the immediate time-interval,
         * 1 for the medium time-interval, or 2 for the long
         * time-interval
         * @return the speech activity score of this Speaker for the
         * time-interval specified by index
         */
        double getSpeechActivityScore(int interval)
        {
            switch (interval)
            {
            case 0:
                return immediateSpeechActivityScore;
            case 1:
                return mediumSpeechActivityScore;
            case 2:
                return longSpeechActivityScore;
            default:
                throw new IllegalArgumentException("interval " + interval);
            }
        }

        /**
         * Notifies this Speaker that a new audio level has been
         * received or measured at a specific time.
         *
         * @param level the audio level which has been received or measured for
         * this Speaker
         * @param time the (local System) time in milliseconds at which
         * the specified level has been received or measured
         * @return the audio level that was applied, after any filtering or
         * level adjustment has taken place. If negative, the audio level
         * was ignored.
         */
        public synchronized int levelChanged(int level, long time)
        {
            // It sounds relatively reasonable that late audio levels should
            // better be discarded.
            if (lastLevelChangedTime <= time)
            {
                lastLevelChangedTime = time;

                // Ensure that the specified level is within the supported
                // range.
                byte b;

                if (level < MIN_LEVEL)
                    b = MIN_LEVEL;
                else if (level > MAX_LEVEL)
                    b = MAX_LEVEL;
                else
                    b = (byte) level;

                // Push the specified level into the history of audio levels
                // received or measured for this Speaker.
                System.arraycopy(levels, 0, levels, 1, levels.length - 1);
                levels[0] = b;

                // Determine the minimum level received or measured for this
                // Speaker.
                updateMinLevel(b);

                return b >= (minLevel + N1_SUBUNIT_LENGTH) ? b : 0;
            }
            else
            {
                return -1;
            }
        }

        /**
         * Notifies this Speaker that no new audio level has been
         * received or measured for a certain time which very likely means that
         * this Speaker will not have a level within a certain
         * time-frame of a DominantSpeakerIdentification algorithm.
         */
        public synchronized void levelTimedOut()
        {
            levelChanged(MIN_LEVEL, lastLevelChangedTime);
        }

        /**
         * Updates the minimum (audio) level received or measured for this
         * Speaker in light of the receipt of a specific level.
         *
         * @param level the audio level received or measured for this
         * Speaker
         */
        private void updateMinLevel(byte level)
        {
            if (level != MIN_LEVEL)
            {
                if ((minLevel == MIN_LEVEL) || (minLevel > level))
                {
                    minLevel = level;
                    nextMinLevel = MIN_LEVEL;
                    nextMinLevelWindowLength = 0;
                }
                else
                {
                    // The specified (audio) level is greater than the minimum
                    // level received or measure for this Speaker. However, the
                    // minimum level may be out-of-date by now. Estimate an
                    // up-to-date minimum level and, eventually, make it the
                    // minimum level received or measured for this Speaker.
                    if (nextMinLevel == MIN_LEVEL)
                    {
                        nextMinLevel = level;
                        nextMinLevelWindowLength = 1;
                    }
                    else
                    {
                        if (nextMinLevel > level)
                        {
                            nextMinLevel = level;
                        }
                        nextMinLevelWindowLength++;
                        if (nextMinLevelWindowLength >= MIN_LEVEL_WINDOW_LENGTH)
                        {
                            // The arithmetic mean will increase the minimum
                            // level faster than the geometric mean. Since the
                            // goal is to track a minimum, it sounds reasonable
                            // to go with a slow increase.
                            double newMinLevel
                                = Math.sqrt(minLevel * (double) nextMinLevel);

                            // Ensure that the new minimum level is within the
                            // supported range.
                            if (newMinLevel < MIN_LEVEL)
                                newMinLevel = MIN_LEVEL;
                            else if (newMinLevel > MAX_LEVEL)
                                newMinLevel = MAX_LEVEL;

                            minLevel = (byte) newMinLevel;

                            nextMinLevel = MIN_LEVEL;
                            nextMinLevelWindowLength = 0;
                        }
                    }
                }
            }
        }
    }
}