be.tarsos.dsp.mfcc.MFCC Maven / Gradle / Ivy
/*
* _______ _____ _____ _____
* |__ __| | __ \ / ____| __ \
* | | __ _ _ __ ___ ___ ___| | | | (___ | |__) |
* | |/ _` | '__/ __|/ _ \/ __| | | |\___ \| ___/
* | | (_| | | \__ \ (_) \__ \ |__| |____) | |
* |_|\__,_|_| |___/\___/|___/_____/|_____/|_|
*
* -------------------------------------------------------------
*
* TarsosDSP is developed by Joren Six at IPEM, University Ghent
*
* -------------------------------------------------------------
*
* Info: http://0110.be/tag/TarsosDSP
* Github: https://github.com/JorenSix/TarsosDSP
* Releases: http://0110.be/releases/TarsosDSP/
*
* TarsosDSP includes modified source code by various authors,
* for credits and info, see README.
*
*/
package be.tarsos.dsp.mfcc;
import be.tarsos.dsp.AudioEvent;
import be.tarsos.dsp.AudioProcessor;
import be.tarsos.dsp.util.fft.FFT;
import be.tarsos.dsp.util.fft.HammingWindow;
public class MFCC implements AudioProcessor {
private int amountOfCepstrumCoef; //Number of MFCCs per frame
protected int amountOfMelFilters; //Number of mel filters (SPHINX-III uses 40)
protected float lowerFilterFreq; //lower limit of filter (or 64 Hz?)
protected float upperFilterFreq; //upper limit of filter (or half of sampling freq.?)
float[] audioFloatBuffer;
//Er zijn evenveel mfccs als er frames zijn!?
//Per frame zijn er dan CEPSTRA coëficienten
private float[] mfcc;
int centerFrequencies[];
private FFT fft;
private int samplesPerFrame;
private float sampleRate;
public MFCC(int samplesPerFrame, int sampleRate){
this(samplesPerFrame, sampleRate, 30, 30, 133.3334f, ((float)sampleRate)/2f);
}
public MFCC(int samplesPerFrame, float sampleRate, int amountOfCepstrumCoef, int amountOfMelFilters, float lowerFilterFreq, float upperFilterFreq) {
this.samplesPerFrame = samplesPerFrame;
this.sampleRate = sampleRate;
this.amountOfCepstrumCoef = amountOfCepstrumCoef;
this.amountOfMelFilters = amountOfMelFilters;
this.fft = new FFT(samplesPerFrame, new HammingWindow());
this.lowerFilterFreq = Math.max(lowerFilterFreq, 25);
this.upperFilterFreq = Math.min(upperFilterFreq, sampleRate / 2);
calculateFilterBanks();
}
@Override
public boolean process(AudioEvent audioEvent) {
audioFloatBuffer = audioEvent.getFloatBuffer().clone();
// Magnitude Spectrum
float bin[] = magnitudeSpectrum(audioFloatBuffer);
// get Mel Filterbank
float fbank[] = melFilter(bin, centerFrequencies);
// Non-linear transformation
float f[] = nonLinearTransformation(fbank);
// Cepstral coefficients
mfcc = cepCoefficients(f);
return true;
}
@Override
public void processingFinished() {
}
/**
* computes the magnitude spectrum of the input frame
* calls: none
* called by: featureExtraction
* @param frame Input frame signal
* @return Magnitude Spectrum array
*/
public float[] magnitudeSpectrum(float frame[]){
float magSpectrum[] = new float[frame.length];
// calculate FFT for current frame
fft.forwardTransform(frame);
// calculate magnitude spectrum
for (int k = 0; k < frame.length/2; k++){
magSpectrum[frame.length/2+k] = fft.modulus(frame, frame.length/2-1-k);
magSpectrum[frame.length/2-1-k] = magSpectrum[frame.length/2+k];
}
return magSpectrum;
}
/**
* calculates the FFT bin indices
calls: none
called by:
* featureExtraction
*
*/
public final void calculateFilterBanks() {
centerFrequencies = new int[amountOfMelFilters + 2];
centerFrequencies[0] = Math.round(lowerFilterFreq / sampleRate * samplesPerFrame);
centerFrequencies[centerFrequencies.length - 1] = (int) (samplesPerFrame / 2);
double mel[] = new double[2];
mel[0] = freqToMel(lowerFilterFreq);
mel[1] = freqToMel(upperFilterFreq);
float factor = (float)((mel[1] - mel[0]) / (amountOfMelFilters + 1));
//Calculates te centerfrequencies.
for (int i = 1; i <= amountOfMelFilters; i++) {
float fc = (inverseMel(mel[0] + factor * i) / sampleRate) * samplesPerFrame;
centerFrequencies[i - 1] = Math.round(fc);
}
}
/**
* the output of mel filtering is subjected to a logarithm function (natural logarithm)
* calls: none
* called by: featureExtraction
* @param fbank Output of mel filtering
* @return Natural log of the output of mel filtering
*/
public float[] nonLinearTransformation(float fbank[]){
float f[] = new float[fbank.length];
final float FLOOR = -50;
for (int i = 0; i < fbank.length; i++){
f[i] = (float) Math.log(fbank[i]);
// check if ln() returns a value less than the floor
if (f[i] < FLOOR) f[i] = FLOOR;
}
return f;
}
/**
* Calculate the output of the mel filter
calls: none called by:
* featureExtraction
* @param bin The bins.
* @param centerFrequencies The frequency centers.
* @return Output of mel filter.
*/
public float[] melFilter(float bin[], int centerFrequencies[]) {
float temp[] = new float[amountOfMelFilters + 2];
for (int k = 1; k <= amountOfMelFilters; k++) {
float num1 = 0, num2 = 0;
float den = (centerFrequencies[k] - centerFrequencies[k - 1] + 1);
for (int i = centerFrequencies[k - 1]; i <= centerFrequencies[k]; i++) {
num1 += bin[i] * (i - centerFrequencies[k - 1] + 1);
}
num1 /= den;
den = (centerFrequencies[k + 1] - centerFrequencies[k] + 1);
for (int i = centerFrequencies[k] + 1; i <= centerFrequencies[k + 1]; i++) {
num2 += bin[i] * (1 - ((i - centerFrequencies[k]) / den));
}
temp[k] = num1 + num2;
}
float fbank[] = new float[amountOfMelFilters];
for (int i = 0; i < amountOfMelFilters; i++) {
fbank[i] = temp[i + 1];
}
return fbank;
}
/**
* Cepstral coefficients are calculated from the output of the Non-linear Transformation method
* calls: none
* called by: featureExtraction
* @param f Output of the Non-linear Transformation method
* @return Cepstral Coefficients
*/
public float[] cepCoefficients(float f[]){
float cepc[] = new float[amountOfCepstrumCoef];
for (int i = 0; i < cepc.length; i++){
for (int j = 0; j < f.length; j++){
cepc[i] += f[j] * Math.cos(Math.PI * i / f.length * (j + 0.5));
}
}
return cepc;
}
// /**
// * calculates center frequency
// * calls: none
// * called by: featureExtraction
// * @param i Index of mel filters
// * @return Center Frequency
// */
// private static float centerFreq(int i,float samplingRate){
// double mel[] = new double[2];
// mel[0] = freqToMel(lowerFilterFreq);
// mel[1] = freqToMel(samplingRate / 2);
//
// // take inverse mel of:
// double temp = mel[0] + ((mel[1] - mel[0]) / (amountOfMelFilters + 1)) * i;
// return inverseMel(temp);
// }
/**
* convert frequency to mel-frequency
* calls: none
* called by: featureExtraction
* @param freq Frequency
* @return Mel-Frequency
*/
protected static float freqToMel(float freq){
return (float) (2595 * log10(1 + freq / 700));
}
/**
* calculates the inverse of Mel Frequency
* calls: none
* called by: featureExtraction
*/
private static float inverseMel(double x) {
return (float) (700 * (Math.pow(10, x / 2595) - 1));
}
/**
* calculates logarithm with base 10
* calls: none
* called by: featureExtraction
* @param value Number to take the log of
* @return base 10 logarithm of the input values
*/
protected static float log10(float value){
return (float) (Math.log(value) / Math.log(10));
}
public float[] getMFCC() {
return mfcc.clone();
}
public int[] getCenterFrequencies() {
return centerFrequencies;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy