be.tarsos.dsp.SpectralPeakProcessor Maven / Gradle / Ivy
/*
* _______ _____ _____ _____
* |__ __| | __ \ / ____| __ \
* | | __ _ _ __ ___ ___ ___| | | | (___ | |__) |
* | |/ _` | '__/ __|/ _ \/ __| | | |\___ \| ___/
* | | (_| | | \__ \ (_) \__ \ |__| |____) | |
* |_|\__,_|_| |___/\___/|___/_____/|_____/|_|
*
* -------------------------------------------------------------
*
* TarsosDSP is developed by Joren Six at IPEM, University Ghent
*
* -------------------------------------------------------------
*
* Info: http://0110.be/tag/TarsosDSP
* Github: https://github.com/JorenSix/TarsosDSP
* Releases: http://0110.be/releases/TarsosDSP/
*
* TarsosDSP includes modified source code by various authors,
* for credits and info, see README.
*
*/
package be.tarsos.dsp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import be.tarsos.dsp.util.PitchConverter;
import be.tarsos.dsp.util.fft.FFT;
import be.tarsos.dsp.util.fft.HammingWindow;
/**
*
* This class implements a spectral peak follower as described in Sethares et
* al. 2009 - Spectral Tools for Dynamic Tonality and Audio Morphing - section
* "Analysis-Resynthessis". It calculates a noise floor and picks spectral peaks
* rising above a calculated noise floor with a certain factor. The noise floor
* is determined using a simple median filter.
*
*
* Parts of the code is modified from the code accompanying
* "Spectral Tools for Dynamic Tonality and Audio Morphing".
*
*
* To get the spectral peaks from an audio frame, call getPeakList
*
AudioDispatcher dispatcher = new AudioDispatcher(stream, fftsize, overlap);
dispatcher.addAudioProcessor(spectralPeakFollower);
dispatcher.addAudioProcessor(new AudioProcessor() {
public void processingFinished() {
}
public boolean process(AudioEvent audioEvent) {
float[] noiseFloor = SpectralPeakProcessor.calculateNoiseFloor(spectralPeakFollower.getMagnitudes(), medianFilterLength, noiseFloorFactor);
List localMaxima = SpectralPeakProcessor.findLocalMaxima(spectralPeakFollower.getMagnitudes(), noiseFloor);
List<> list = SpectralPeakProcessor.findPeaks(spectralPeakFollower.getMagnitudes(), spectralPeakFollower.getFrequencyEstimates(), localMaxima, numberOfPeaks);
// do something with the list...
return true;
}
});
dispatcher.run();
*
* @author Joren Six
* @author William A. Sethares
* @author Andrew J. Milne
* @author Stefan Tiedje
* @author Anthony Prechtl
* @author James Plamondon
*
*/
public class SpectralPeakProcessor implements AudioProcessor {
/**
* The sample rate of the signal.
*/
private final int sampleRate;
/**
* Cached calculations for the frequency calculation
*/
private final double dt;
private final double cbin;
private final double inv_2pi;
private final double inv_deltat;
private final double inv_2pideltat;
/**
* The fft object used to calculate phase and magnitudes.
*/
private final FFT fft;
/**
* The pahse info of the current frame.
*/
private final float[] currentPhaseOffsets;
/**
* The magnitudes in the current frame.
*/
private final float[] magnitudes;
/**
* Detailed frequency estimates for each bin, using phase info
*/
private final float[] frequencyEstimates;
/**
* The phase information of the previous frame, or null.
*/
private float[] previousPhaseOffsets;
public SpectralPeakProcessor(int bufferSize, int overlap, int sampleRate) {
fft = new FFT(bufferSize, new HammingWindow());
magnitudes = new float[bufferSize / 2];
currentPhaseOffsets = new float[bufferSize / 2];
frequencyEstimates = new float[bufferSize / 2];
dt = (bufferSize - overlap) / (double) sampleRate;
cbin = (double) (dt * sampleRate / (double) bufferSize);
inv_2pi = (double) (1.0 / (2.0 * Math.PI));
inv_deltat = (double) (1.0 / dt);
inv_2pideltat = (double) (inv_deltat * inv_2pi);
this.sampleRate = sampleRate;
}
private void calculateFFT(float[] audio) {
// Clone to prevent overwriting audio data
float[] fftData = audio.clone();
// Extract the power and phase data
fft.powerPhaseFFT(fftData, magnitudes, currentPhaseOffsets);
}
private void normalizeMagintudes(){
float maxMagnitude = (float) -1e6;
for(int i = 0;i= 0 && j < magnitudes.length){
noiseFloorBuffer[index] = magnitudes[j];
} else{
noiseFloorBuffer[index] = median;
}
index++;
}
// calculate the noise floor value.
noisefloor[i] = (float) (median(noiseFloorBuffer) * (noiseFloorFactor)) ;
}
float rampLength = 12.0f;
for(int i = 0 ; i <= rampLength ; i++){
//ramp
float ramp = 1.0f;
ramp = (float) (-1 * (Math.log(i/rampLength))) + 1.0f;
noisefloor[i] = ramp * noisefloor[i];
}
return noisefloor;
}
/**
* Finds the local magintude maxima and stores them in the given list.
* @param magnitudes The magnitudes.
* @param noisefloor The noise floor.
* @return a list of local maxima.
*/
public static List findLocalMaxima(float[] magnitudes,float[] noisefloor){
List localMaximaIndexes = new ArrayList();
for (int i = 1; i < magnitudes.length - 1; i++) {
boolean largerThanPrevious = (magnitudes[i - 1] < magnitudes[i]);
boolean largerThanNext = (magnitudes[i] > magnitudes[i + 1]);
boolean largerThanNoiseFloor = (magnitudes[i] > noisefloor[i]);
if (largerThanPrevious && largerThanNext && largerThanNoiseFloor) {
localMaximaIndexes.add(i);
}
}
return localMaximaIndexes;
}
/**
* @param magnitudes the magnitudes.
* @return the index for the maximum magnitude.
*/
private static int findMaxMagnitudeIndex(float[] magnitudes){
int maxMagnitudeIndex = 0;
float maxMagnitude = (float) -1e6;
for (int i = 1; i < magnitudes.length - 1; i++) {
if(magnitudes[i] > maxMagnitude){
maxMagnitude = magnitudes[i];
maxMagnitudeIndex = i;
}
}
return maxMagnitudeIndex;
}
/**
*
* @param magnitudes the magnitudes..
* @param frequencyEstimates The frequency estimates for each bin.
* @param localMaximaIndexes The indexes of the local maxima.
* @param numberOfPeaks The requested number of peaks.
* @param minDistanceInCents The minimum distance in cents between the peaks
* @return A list with spectral peaks.
*/
public static List findPeaks(float[] magnitudes, float[] frequencyEstimates, List localMaximaIndexes, int numberOfPeaks, int minDistanceInCents){
int maxMagnitudeIndex = findMaxMagnitudeIndex(magnitudes);
List spectralPeakList = new ArrayList();
if(localMaximaIndexes.size()==0)
return spectralPeakList;
float referenceFrequency=0;
//the frequency of the bin with the highest magnitude
referenceFrequency = frequencyEstimates[maxMagnitudeIndex];
//remove frequency estimates below zero
for(int i = 0 ; i < localMaximaIndexes.size() ; i++){
if(frequencyEstimates[localMaximaIndexes.get(i)] < 0 ){
localMaximaIndexes.remove(i);
frequencyEstimates[localMaximaIndexes.get(i)]=1;//Hz
i--;
}
}
//filter the local maxima indexes, remove peaks that are too close to each other
//assumes that localmaximaIndexes is sorted from lowest to higest index
for(int i = 1 ; i < localMaximaIndexes.size() ; i++){
double centCurrent = PitchConverter.hertzToAbsoluteCent(frequencyEstimates[localMaximaIndexes.get(i)]);
double centPrev = PitchConverter.hertzToAbsoluteCent(frequencyEstimates[localMaximaIndexes.get(i-1)]);
double centDelta = centCurrent - centPrev;
if(centDelta < minDistanceInCents ){
if(magnitudes[localMaximaIndexes.get(i)] > magnitudes[localMaximaIndexes.get(i-1)]){
localMaximaIndexes.remove(i-1);
}else{
localMaximaIndexes.remove(i);
}
i--;
}
}
// Retrieve the maximum values for the indexes
float[] maxMagnitudes = new float[localMaximaIndexes.size()];
for(int i = 0 ; i < localMaximaIndexes.size() ; i++){
maxMagnitudes[i] = magnitudes[localMaximaIndexes.get(i)];
}
// Sort the magnitudes in ascending order
Arrays.sort(maxMagnitudes);
// Find the threshold, the first value or somewhere in the array.
float peakthresh = maxMagnitudes[0];
if (maxMagnitudes.length > numberOfPeaks) {
peakthresh = maxMagnitudes[maxMagnitudes.length - numberOfPeaks];
}
//store the peaks
for(Integer i : localMaximaIndexes){
if(magnitudes[i]>= peakthresh){
final float frequencyInHertz= frequencyEstimates[i];
//ignore frequencies lower than 30Hz
float binMagnitude = magnitudes[i];
SpectralPeak peak = new SpectralPeak(0,frequencyInHertz, binMagnitude, referenceFrequency,i);
spectralPeakList.add(peak);
}
}
return spectralPeakList;
}
public static final float median(double[] arr){
return percentile(arr, 0.5);
}
/**
* Returns the p-th percentile of values in an array. You can use this
* function to establish a threshold of acceptance. For example, you can
* decide to examine candidates who score above the 90th percentile (0.9).
* The elements of the input array are modified (sorted) by this method.
*
* @param arr An array of sample data values that define relative standing.
* The contents of the input array are sorted by this method.
* @param p The percentile value in the range 0..1, inclusive.
* @return The p-th percentile of values in an array. If p is not a multiple
* of 1/(n - 1), this method interpolates to determine the value at
* the p-th percentile.
**/
public static final float percentile( double[] arr, double p ) {
if (p < 0 || p > 1)
throw new IllegalArgumentException("Percentile out of range.");
// Sort the array in ascending order.
Arrays.sort(arr);
// Calculate the percentile.
double t = p*(arr.length - 1);
int i = (int)t;
return (float) ((i + 1 - t)*arr[i] + (t - i)*arr[i + 1]);
}
public static double median(float[] m) {
// Sort the array in ascending order.
Arrays.sort(m);
int middle = m.length/2;
if (m.length%2 == 1) {
return m[middle];
} else {
return (m[middle-1] + m[middle]) / 2.0;
}
}
public static class SpectralPeak{
private final float frequencyInHertz;
private final float magnitude;
private final float referenceFrequency;
private final int bin;
/**
* Timestamp in fractional seconds
*/
private final float timeStamp;
public SpectralPeak(float timeStamp,float frequencyInHertz, float magnitude,float referenceFrequency,int bin){
this.frequencyInHertz = frequencyInHertz;
this.magnitude = magnitude;
this.referenceFrequency = referenceFrequency;
this.timeStamp = timeStamp;
this.bin = bin;
}
public float getRelativeFrequencyInCents(){
if(referenceFrequency > 0 && frequencyInHertz > 0){
float refInCents = (float) PitchConverter.hertzToAbsoluteCent(referenceFrequency);
float valueInCents = (float) PitchConverter.hertzToAbsoluteCent(frequencyInHertz);
return valueInCents - refInCents;
}else{
return 0;
}
}
public float getTimeStamp(){
return timeStamp;
}
public float getMagnitude(){
return magnitude;
}
public float getFrequencyInHertz(){
return frequencyInHertz;
}
public float getRefFrequencyInHertz(){
return referenceFrequency;
}
public String toString(){
return String.format("%.2f %.2f %.2f", frequencyInHertz,getRelativeFrequencyInCents(),magnitude);
}
public int getBin() {
return bin;
}
}
}