All Downloads are FREE. Search and download functionalities are using the official Maven repository.

be.tarsos.dsp.onsets.BeatRootSpectralFluxOnsetDetector Maven / Gradle / Ivy

There is a newer version: 2.4-1
Show newest version
/*
*      _______                       _____   _____ _____  
*     |__   __|                     |  __ \ / ____|  __ \ 
*        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
*        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
*        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
*        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
*                                                         
* -------------------------------------------------------------
*
* TarsosDSP is developed by Joren Six at IPEM, University Ghent
*  
* -------------------------------------------------------------
*
*  Info: http://0110.be/tag/TarsosDSP
*  Github: https://github.com/JorenSix/TarsosDSP
*  Releases: http://0110.be/releases/TarsosDSP/
*  
*  TarsosDSP includes modified source code by various authors,
*  for credits and info, see README.
* 
*/

package be.tarsos.dsp.onsets;

import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;

import be.tarsos.dsp.AudioDispatcher;
import be.tarsos.dsp.AudioEvent;
import be.tarsos.dsp.AudioProcessor;
import be.tarsos.dsp.beatroot.Peaks;
import be.tarsos.dsp.util.fft.FFT;
import be.tarsos.dsp.util.fft.ScaledHammingWindow;

/**
 * 

* A non real-time spectral flux onset detection method, as implemented in the * BeatRoot system of Centre for Digital Music, Queen Mary, University of * London. *

* *

* This onset detection function does not, NOT work in real-time. It analyzes an * audio-stream and detects onsets during a post processing step. *

* * @author Joren Six * @author Simon Dixon */ public class BeatRootSpectralFluxOnsetDetector implements AudioProcessor, OnsetDetector { /** RMS amplitude of the current frame. */ private double frameRMS; /** The number of overlapping frames of audio data which have been read. */ private int frameCount; /** Long term average frame energy (in frequency domain representation). */ private double ltAverage; /** The real part of the data for the in-place FFT computation. * Since input data is real, this initially contains the input data. */ private float[] reBuffer; /** The imaginary part of the data for the in-place FFT computation. * Since input data is real, this initially contains zeros. */ private float[] imBuffer; /** Spectral flux onset detection function, indexed by frame. */ private double[] spectralFlux; /** A mapping function for mapping FFT bins to final frequency bins. * The mapping is linear (1-1) until the resolution reaches 2 points per * semitone, then logarithmic with a semitone resolution. e.g. for * 44.1kHz sampling rate and fftSize of 2048 (46ms), bin spacing is * 21.5Hz, which is mapped linearly for bins 0-34 (0 to 732Hz), and * logarithmically for the remaining bins (midi notes 79 to 127, bins 35 to * 83), where all energy above note 127 is mapped into the final bin. */ private int[] freqMap; /** The number of entries in freqMap. Note that the length of * the array is greater, because its size is not known at creation time. */ private int freqMapSize; /** The magnitude spectrum of the most recent frame. * Used for calculating the spectral flux. */ private float[] prevFrame; /** The magnitude spectrum of the current frame. */ private double[] newFrame; /** The magnitude spectra of all frames, used for plotting the spectrogram. */ private double[][] frames; /** The RMS energy of all frames. */ private double[] energy; /** Spacing of audio frames in samples (see hopTime) */ protected int hopSize; /** The size of an FFT frame in samples (see fftTime) */ protected int fftSize; /** Total number of audio frames if known, or -1 for live or compressed input. */ private int totalFrames; /** RMS frame energy below this value results in the frame being set to zero, * so that normalization does not have undesired side-effects. */ public static double silenceThreshold = 0.0004; /** For dynamic range compression, this value is added to the log magnitude * in each frequency bin and any remaining negative values are then set to zero. */ public static double rangeThreshold = 10; /** Determines method of normalization. Values can be:
    *
  • 0: no normalization
  • *
  • 1: normalization by current frame energy
  • *
  • 2: normalization by exponential average of frame energy
  • *
*/ public static int normaliseMode = 2; /** Ratio between rate of sampling the signal energy (for the amplitude envelope) and the hop size */ public static int energyOversampleFactor = 2; private OnsetHandler handler; private double hopTime; private final FFT fft; public BeatRootSpectralFluxOnsetDetector(AudioDispatcher d,int fftSize, int hopSize){ this.hopSize = hopSize; this.hopTime = hopSize/d.getFormat().getSampleRate(); this.fftSize = fftSize; System.err.println("Please use the ComplexOnset detector: BeatRootSpectralFluxOnsetDetector does currenlty not support streaming"); //no overlap //FIXME: int durationInFrames = -1000; totalFrames = (int)(durationInFrames / hopSize) + 4; energy = new double[totalFrames*energyOversampleFactor]; spectralFlux = new double[totalFrames]; reBuffer = new float[fftSize/2]; imBuffer = new float[fftSize/2]; prevFrame = new float[fftSize/2]; makeFreqMap(fftSize, d.getFormat().getSampleRate()); newFrame = new double[freqMapSize]; frames = new double[totalFrames][freqMapSize]; handler = new PrintOnsetHandler(); fft = new FFT(fftSize,new ScaledHammingWindow()); } @Override public boolean process(AudioEvent audioEvent) { frameRMS = audioEvent.getRMS()/2.0; float[] audioBuffer = audioEvent.getFloatBuffer().clone(); Arrays.fill(imBuffer, 0); fft.powerPhaseFFTBeatRootOnset(audioBuffer, reBuffer, imBuffer); Arrays.fill(newFrame, 0); double flux = 0; for (int i = 0; i < fftSize/2; i++) { if (reBuffer[i] > prevFrame[i]) flux += reBuffer[i] - prevFrame[i]; newFrame[freqMap[i]] += reBuffer[i]; } spectralFlux[frameCount] = flux; for (int i = 0; i < freqMapSize; i++) frames[frameCount][i] = newFrame[i]; int sz = (fftSize - hopSize) / energyOversampleFactor; int index = hopSize; for (int j = 0; j < energyOversampleFactor; j++) { double newEnergy = 0; for (int i = 0; i < sz; i++) { newEnergy += audioBuffer[index] * audioBuffer[index]; if (++index == fftSize) index = 0; } energy[frameCount * energyOversampleFactor + j] = newEnergy / sz <= 1e-6? 0: Math.log(newEnergy / sz) + 13.816; } double decay = frameCount >= 200? 0.99: (frameCount < 100? 0: (frameCount - 100) / 100.0); if (ltAverage == 0) ltAverage = frameRMS; else ltAverage = ltAverage * decay + frameRMS * (1.0 - decay); if (frameRMS <= silenceThreshold) for (int i = 0; i < freqMapSize; i++) frames[frameCount][i] = 0; else { if (normaliseMode == 1) for (int i = 0; i < freqMapSize; i++) frames[frameCount][i] /= frameRMS; else if (normaliseMode == 2) for (int i = 0; i < freqMapSize; i++) frames[frameCount][i] /= ltAverage; for (int i = 0; i < freqMapSize; i++) { frames[frameCount][i] = Math.log(frames[frameCount][i]) + rangeThreshold; if (frames[frameCount][i] < 0) frames[frameCount][i] = 0; } } float[] tmp = prevFrame; prevFrame = reBuffer; reBuffer = tmp; frameCount++; return true; } /** * Creates a map of FFT frequency bins to comparison bins. * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is * one to one. Where the spacing is greater than 0.5 semitones, the FFT * energy is mapped into semitone-wide bins. No scaling is performed; that * is the energy is summed into the comparison bins. See also * processFrame() */ protected void makeFreqMap(int fftSize, float sampleRate) { freqMap = new int[fftSize/2+1]; double binWidth = sampleRate / fftSize; int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1)); int crossoverMidi = (int)Math.round(Math.log(crossoverBin*binWidth/440)/ Math.log(2) * 12 + 69); // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; int i = 0; while (i <= crossoverBin) freqMap[i++] = i; while (i <= fftSize/2) { double midi = Math.log(i*binWidth/440) / Math.log(2) * 12 + 69; if (midi > 127) midi = 127; freqMap[i++] = crossoverBin + (int)Math.round(midi) - crossoverMidi; } freqMapSize = freqMap[i-1] + 1; } // makeFreqMap() private void findOnsets(double p1, double p2){ LinkedList peaks = Peaks.findPeaks(spectralFlux, (int)Math.round(0.06 / hopTime), p1, p2, true); Iterator it = peaks.iterator(); double minSalience = Peaks.min(spectralFlux); for (int i = 0; i < peaks.size(); i++) { int index = it.next(); double time = index * hopTime; double salience = spectralFlux[index] - minSalience; handler.handleOnset(time,salience); } } public void setHandler(OnsetHandler handler) { this.handler = handler; } @Override public void processingFinished() { double p1 = 0.35; double p2 = 0.84; Peaks.normalise(spectralFlux); findOnsets(p1, p2); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy