All Downloads are FREE. Search and download functionalities are using the official Maven repository.

be.tarsos.dsp.pitch.McLeodPitchMethod Maven / Gradle / Ivy

There is a newer version: 2.4-1
Show newest version
/*
*      _______                       _____   _____ _____  
*     |__   __|                     |  __ \ / ____|  __ \ 
*        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
*        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
*        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
*        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
*                                                         
* -------------------------------------------------------------
*
* TarsosDSP is developed by Joren Six at IPEM, University Ghent
*  
* -------------------------------------------------------------
*
*  Info: http://0110.be/tag/TarsosDSP
*  Github: https://github.com/JorenSix/TarsosDSP
*  Releases: http://0110.be/releases/TarsosDSP/
*  
*  TarsosDSP includes modified source code by various authors,
*  for credits and info, see README.
* 
*/


/**
 */
package be.tarsos.dsp.pitch;

import java.util.ArrayList;
import java.util.List;

/**
 * 

* Implementation of The McLeod Pitch Method (MPM). It is described in the * article A Smarter Way to Find Pitch. According to the article: *

*
*

* A fast, accurate and robust method for finding the continuous pitch in * monophonic musical sounds. [It uses] a special normalized version of the * Squared Difference Function (SDF) coupled with a peak picking algorithm. *

*

* MPM runs in real time with a standard 44.1 kHz sampling rate. It operates * without using low-pass filtering so it can work on sound with high harmonic * frequencies such as a violin and it can display pitch changes of one cent * reliably. MPM works well without any post processing to correct the pitch. *

*
*

* For the moment this implementation uses the inefficient way of calculating * the pitch. It uses O(Ww) with W the window size in samples and w * the desired number of ACF coefficients. The implementation can be optimized * to O((W+w)log(W+w)) by using an FFT to calculate the ACF. But I am still afraid of the * dark magic of the FFT and clinging to the familiar, friendly, laggard time * domain. *

* * @author Phillip McLeod * @author Joren Six */ public final class McLeodPitchMethod implements PitchDetector { /** * The expected size of an audio buffer (in samples). */ public static final int DEFAULT_BUFFER_SIZE = 1024; /** * Overlap defines how much two audio buffers following each other should * overlap (in samples). 75% overlap is advised in the MPM article. */ public static final int DEFAULT_OVERLAP = 768; /** * Defines the relative size the chosen peak (pitch) has. 0.93 means: choose * the first peak that is higher than 93% of the highest peak detected. 93% * is the default value used in the Tartini user interface. */ private static final double DEFAULT_CUTOFF = 0.97; /** * For performance reasons, peaks below this cutoff are not even considered. */ private static final double SMALL_CUTOFF = 0.5; /** * Pitch annotations below this threshold are considered invalid, they are * ignored. */ private static final double LOWER_PITCH_CUTOFF = 80.0; // Hz /** * Defines the relative size the chosen peak (pitch) has. */ private final double cutoff; /** * The audio sample rate. Most audio has a sample rate of 44.1kHz. */ private final float sampleRate; /** * Contains a normalized square difference function value for each delay * (tau). */ private final float[] nsdf; /** * The x and y coordinate of the top of the curve (nsdf). */ private float turningPointX, turningPointY; /** * A list with minimum and maximum values of the nsdf curve. */ private final List maxPositions = new ArrayList(); /** * A list of estimates of the period of the signal (in samples). */ private final List periodEstimates = new ArrayList(); /** * A list of estimates of the amplitudes corresponding with the period * estimates. */ private final List ampEstimates = new ArrayList(); /** * The result of the pitch detection iteration. */ private final PitchDetectionResult result; /** * Initializes the normalized square difference value array and stores the * sample rate. * * @param audioSampleRate * The sample rate of the audio to check. */ public McLeodPitchMethod(final float audioSampleRate) { this(audioSampleRate, DEFAULT_BUFFER_SIZE, DEFAULT_CUTOFF); } /** * Create a new pitch detector. * * @param audioSampleRate * The sample rate of the audio. * @param audioBufferSize * The size of one audio buffer 1024 samples is common. */ public McLeodPitchMethod(final float audioSampleRate, final int audioBufferSize) { this(audioSampleRate, audioBufferSize, DEFAULT_CUTOFF); } /** * Create a new pitch detector. * * @param audioSampleRate * The sample rate of the audio. * @param audioBufferSize * The size of one audio buffer 1024 samples is common. * @param cutoffMPM * The cutoff (similar to the YIN threshold). In the Tartini * paper 0.93 is used. */ public McLeodPitchMethod(final float audioSampleRate, final int audioBufferSize, final double cutoffMPM) { this.sampleRate = audioSampleRate; nsdf = new float[audioBufferSize]; this.cutoff = cutoffMPM; result = new PitchDetectionResult(); } /** * Implements the normalized square difference function. See section 4 (and * the explanation before) in the MPM article. This calculation can be * optimized by using an FFT. The results should remain the same. * * @param audioBuffer * The buffer with audio information. */ private void normalizedSquareDifference(final float[] audioBuffer) { for (int tau = 0; tau < audioBuffer.length; tau++) { float acf = 0; float divisorM = 0; for (int i = 0; i < audioBuffer.length - tau; i++) { acf += audioBuffer[i] * audioBuffer[i + tau]; divisorM += audioBuffer[i] * audioBuffer[i] + audioBuffer[i + tau] * audioBuffer[i + tau]; } nsdf[tau] = 2 * acf / divisorM; } } /* * (non-Javadoc) * * @see be.tarsos.pitch.pure.PurePitchDetector#getPitch(float[]) */ public PitchDetectionResult getPitch(final float[] audioBuffer) { final float pitch; // 0. Clear previous results (Is this faster than initializing a list // again and again?) maxPositions.clear(); periodEstimates.clear(); ampEstimates.clear(); // 1. Calculate the normalized square difference for each Tau value. normalizedSquareDifference(audioBuffer); // 2. Peak picking time: time to pick some peaks. peakPicking(); double highestAmplitude = Double.NEGATIVE_INFINITY; for (final Integer tau : maxPositions) { // make sure every annotation has a probability attached highestAmplitude = Math.max(highestAmplitude, nsdf[tau]); if (nsdf[tau] > SMALL_CUTOFF) { // calculates turningPointX and Y parabolicInterpolation(tau); // store the turning points ampEstimates.add(turningPointY); periodEstimates.add(turningPointX); // remember the highest amplitude highestAmplitude = Math.max(highestAmplitude, turningPointY); } } if (periodEstimates.isEmpty()) { pitch = -1; } else { // use the overall maximum to calculate a cutoff. // The cutoff value is based on the highest value and a relative // threshold. final double actualCutoff = cutoff * highestAmplitude; // find first period above or equal to cutoff int periodIndex = 0; for (int i = 0; i < ampEstimates.size(); i++) { if (ampEstimates.get(i) >= actualCutoff) { periodIndex = i; break; } } final double period = periodEstimates.get(periodIndex); final float pitchEstimate = (float) (sampleRate / period); if (pitchEstimate > LOWER_PITCH_CUTOFF) { pitch = pitchEstimate; } else { pitch = -1; } } result.setProbability((float) highestAmplitude); result.setPitch(pitch); result.setPitched(pitch != -1); return result; } /** *

* Finds the x value corresponding with the peak of a parabola. *

*

* a,b,c are three samples that follow each other. E.g. a is at 511, b at * 512 and c at 513; f(a), f(b) and f(c) are the normalized square * difference values for those samples; x is the peak of the parabola and is * what we are looking for. Because the samples follow each other * b - a = 1 the formula for parabolic interpolation * can be simplified a lot. *

*

* The following ASCII ART shows it a bit more clear, imagine this to be a * bit more curvaceous. *

* *
	 *     nsdf(x)
	 *       ^
	 *       |
	 * f(x)  |------ ^
	 * f(b)  |     / |\
	 * f(a)  |    /  | \
	 *       |   /   |  \
	 *       |  /    |   \
	 * f(c)  | /     |    \
	 *       |_____________________> x
	 *            a  x b  c
	 * 
* * @param tau * The delay tau, b value in the drawing is the tau value. */ private void parabolicInterpolation(final int tau) { final float nsdfa = nsdf[tau - 1]; final float nsdfb = nsdf[tau]; final float nsdfc = nsdf[tau + 1]; final float bValue = tau; final float bottom = nsdfc + nsdfa - 2 * nsdfb; if (bottom == 0.0) { turningPointX = bValue; turningPointY = nsdfb; } else { final float delta = nsdfa - nsdfc; turningPointX = bValue + delta / (2 * bottom); turningPointY = nsdfb - delta * delta / (8 * bottom); } } /** *

* Implementation based on the GPL'ED code of Tartini This code can be found in the file * general/mytransforms.cpp. *

*

* Finds the highest value between each pair of positive zero crossings. * Including the highest value between the last positive zero crossing and * the end (if any). Ignoring the first maximum (which is at zero). In this * diagram the desired values are marked with a + *

* *
	 *  f(x)
	 *   ^
	 *   |
	 *  1|               +
	 *   | \      +     /\      +     /\
	 *  0| _\____/\____/__\/\__/\____/_______> x
	 *   |   \  /  \  /      \/  \  /
	 * -1|    \/    \/            \/
	 *   |
	 * 
* * @param nsdf * The array to look for maximum values in. It should contain * values between -1 and 1 * @author Phillip McLeod */ private void peakPicking() { int pos = 0; int curMaxPos = 0; // find the first negative zero crossing while (pos < (nsdf.length - 1) / 3 && nsdf[pos] > 0) { pos++; } // loop over all the values below zero while (pos < nsdf.length - 1 && nsdf[pos] <= 0.0) { pos++; } // can happen if output[0] is NAN if (pos == 0) { pos = 1; } while (pos < nsdf.length - 1) { assert nsdf[pos] >= 0; if (nsdf[pos] > nsdf[pos - 1] && nsdf[pos] >= nsdf[pos + 1]) { if (curMaxPos == 0) { // the first max (between zero crossings) curMaxPos = pos; } else if (nsdf[pos] > nsdf[curMaxPos]) { // a higher max (between the zero crossings) curMaxPos = pos; } } pos++; // a negative zero crossing if (pos < nsdf.length - 1 && nsdf[pos] <= 0) { // if there was a maximum add it to the list of maxima if (curMaxPos > 0) { maxPositions.add(curMaxPos); curMaxPos = 0; // clear the maximum position, so we start // looking for a new ones } while (pos < nsdf.length - 1 && nsdf[pos] <= 0.0f) { pos++; // loop over all the values below zero } } } if (curMaxPos > 0) { // if there was a maximum in the last part maxPositions.add(curMaxPos); // add it to the vector of maxima } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy