All Downloads are FREE. Search and download functionalities are using the official Maven repository.

be.tarsos.dsp.mfcc.MFCC Maven / Gradle / Ivy

There is a newer version: 2.4-1
Show newest version
/*
*      _______                       _____   _____ _____  
*     |__   __|                     |  __ \ / ____|  __ \ 
*        | | __ _ _ __ ___  ___  ___| |  | | (___ | |__) |
*        | |/ _` | '__/ __|/ _ \/ __| |  | |\___ \|  ___/ 
*        | | (_| | |  \__ \ (_) \__ \ |__| |____) | |     
*        |_|\__,_|_|  |___/\___/|___/_____/|_____/|_|     
*                                                         
* -------------------------------------------------------------
*
* TarsosDSP is developed by Joren Six at IPEM, University Ghent
*  
* -------------------------------------------------------------
*
*  Info: http://0110.be/tag/TarsosDSP
*  Github: https://github.com/JorenSix/TarsosDSP
*  Releases: http://0110.be/releases/TarsosDSP/
*  
*  TarsosDSP includes modified source code by various authors,
*  for credits and info, see README.
* 
*/

package be.tarsos.dsp.mfcc;

import be.tarsos.dsp.AudioEvent;
import be.tarsos.dsp.AudioProcessor;
import be.tarsos.dsp.util.fft.FFT;
import be.tarsos.dsp.util.fft.HammingWindow;


public class MFCC implements AudioProcessor {
	
    private int amountOfCepstrumCoef; //Number of MFCCs per frame
    protected int amountOfMelFilters; //Number of mel filters (SPHINX-III uses 40)
    protected float lowerFilterFreq; //lower limit of filter (or 64 Hz?)
    protected float upperFilterFreq; //upper limit of filter (or half of sampling freq.?)
    
    float[] audioFloatBuffer;
    //Er zijn evenveel mfccs als er frames zijn!?
    //Per frame zijn er dan CEPSTRA coëficienten
    private float[] mfcc;
    
    int centerFrequencies[];

    private FFT fft;
    private int samplesPerFrame; 
    private float sampleRate;
    
    public MFCC(int samplesPerFrame, int sampleRate){
    	this(samplesPerFrame, sampleRate, 30, 30, 133.3334f, ((float)sampleRate)/2f);
    }

    public MFCC(int samplesPerFrame, float sampleRate, int amountOfCepstrumCoef, int amountOfMelFilters, float lowerFilterFreq, float upperFilterFreq) {
        this.samplesPerFrame = samplesPerFrame; 
        this.sampleRate = sampleRate;
        this.amountOfCepstrumCoef = amountOfCepstrumCoef;
        this.amountOfMelFilters = amountOfMelFilters;
        this.fft = new FFT(samplesPerFrame, new HammingWindow());
        
        this.lowerFilterFreq = Math.max(lowerFilterFreq, 25);
        this.upperFilterFreq = Math.min(upperFilterFreq, sampleRate / 2);
        calculateFilterBanks();       
    }

	@Override
	public boolean process(AudioEvent audioEvent) {
		audioFloatBuffer = audioEvent.getFloatBuffer().clone();

        // Magnitude Spectrum
        float bin[] = magnitudeSpectrum(audioFloatBuffer);
        // get Mel Filterbank
        float fbank[] = melFilter(bin, centerFrequencies);
        // Non-linear transformation
        float f[] = nonLinearTransformation(fbank);
        // Cepstral coefficients
        mfcc = cepCoefficients(f);
        
		return true;
	}

	@Override
	public void processingFinished() {

	}
	
    /**
     * computes the magnitude spectrum of the input frame
* calls: none
* called by: featureExtraction * @param frame Input frame signal * @return Magnitude Spectrum array */ public float[] magnitudeSpectrum(float frame[]){ float magSpectrum[] = new float[frame.length]; // calculate FFT for current frame fft.forwardTransform(frame); // calculate magnitude spectrum for (int k = 0; k < frame.length/2; k++){ magSpectrum[frame.length/2+k] = fft.modulus(frame, frame.length/2-1-k); magSpectrum[frame.length/2-1-k] = magSpectrum[frame.length/2+k]; } return magSpectrum; } /** * calculates the FFT bin indices
calls: none
called by: * featureExtraction * */ public final void calculateFilterBanks() { centerFrequencies = new int[amountOfMelFilters + 2]; centerFrequencies[0] = Math.round(lowerFilterFreq / sampleRate * samplesPerFrame); centerFrequencies[centerFrequencies.length - 1] = (int) (samplesPerFrame / 2); double mel[] = new double[2]; mel[0] = freqToMel(lowerFilterFreq); mel[1] = freqToMel(upperFilterFreq); float factor = (float)((mel[1] - mel[0]) / (amountOfMelFilters + 1)); //Calculates te centerfrequencies. for (int i = 1; i <= amountOfMelFilters; i++) { float fc = (inverseMel(mel[0] + factor * i) / sampleRate) * samplesPerFrame; centerFrequencies[i - 1] = Math.round(fc); } } /** * the output of mel filtering is subjected to a logarithm function (natural logarithm)
* calls: none
* called by: featureExtraction * @param fbank Output of mel filtering * @return Natural log of the output of mel filtering */ public float[] nonLinearTransformation(float fbank[]){ float f[] = new float[fbank.length]; final float FLOOR = -50; for (int i = 0; i < fbank.length; i++){ f[i] = (float) Math.log(fbank[i]); // check if ln() returns a value less than the floor if (f[i] < FLOOR) f[i] = FLOOR; } return f; } /** * Calculate the output of the mel filter
calls: none called by: * featureExtraction * @param bin The bins. * @param centerFrequencies The frequency centers. * @return Output of mel filter. */ public float[] melFilter(float bin[], int centerFrequencies[]) { float temp[] = new float[amountOfMelFilters + 2]; for (int k = 1; k <= amountOfMelFilters; k++) { float num1 = 0, num2 = 0; float den = (centerFrequencies[k] - centerFrequencies[k - 1] + 1); for (int i = centerFrequencies[k - 1]; i <= centerFrequencies[k]; i++) { num1 += bin[i] * (i - centerFrequencies[k - 1] + 1); } num1 /= den; den = (centerFrequencies[k + 1] - centerFrequencies[k] + 1); for (int i = centerFrequencies[k] + 1; i <= centerFrequencies[k + 1]; i++) { num2 += bin[i] * (1 - ((i - centerFrequencies[k]) / den)); } temp[k] = num1 + num2; } float fbank[] = new float[amountOfMelFilters]; for (int i = 0; i < amountOfMelFilters; i++) { fbank[i] = temp[i + 1]; } return fbank; } /** * Cepstral coefficients are calculated from the output of the Non-linear Transformation method
* calls: none
* called by: featureExtraction * @param f Output of the Non-linear Transformation method * @return Cepstral Coefficients */ public float[] cepCoefficients(float f[]){ float cepc[] = new float[amountOfCepstrumCoef]; for (int i = 0; i < cepc.length; i++){ for (int j = 0; j < f.length; j++){ cepc[i] += f[j] * Math.cos(Math.PI * i / f.length * (j + 0.5)); } } return cepc; } // /** // * calculates center frequency
// * calls: none
// * called by: featureExtraction // * @param i Index of mel filters // * @return Center Frequency // */ // private static float centerFreq(int i,float samplingRate){ // double mel[] = new double[2]; // mel[0] = freqToMel(lowerFilterFreq); // mel[1] = freqToMel(samplingRate / 2); // // // take inverse mel of: // double temp = mel[0] + ((mel[1] - mel[0]) / (amountOfMelFilters + 1)) * i; // return inverseMel(temp); // } /** * convert frequency to mel-frequency
* calls: none
* called by: featureExtraction * @param freq Frequency * @return Mel-Frequency */ protected static float freqToMel(float freq){ return (float) (2595 * log10(1 + freq / 700)); } /** * calculates the inverse of Mel Frequency
* calls: none
* called by: featureExtraction */ private static float inverseMel(double x) { return (float) (700 * (Math.pow(10, x / 2595) - 1)); } /** * calculates logarithm with base 10
* calls: none
* called by: featureExtraction * @param value Number to take the log of * @return base 10 logarithm of the input values */ protected static float log10(float value){ return (float) (Math.log(value) / Math.log(10)); } public float[] getMFCC() { return mfcc.clone(); } public int[] getCenterFrequencies() { return centerFrequencies; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy