All Downloads are FREE. Search and download functionalities are using the official Maven repository.

sound.musicg.api.DetectionApi Maven / Gradle / Ivy

/*
 * Copyright (C) 2011 Jacquet Wong
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package sound.musicg.api;

import sound.musicg.math.rank.ArrayRankDouble;
import sound.musicg.math.statistics.StandardDeviation;
import sound.musicg.math.statistics.ZeroCrossingRate;
import sound.musicg.wave.Wave;
import sound.musicg.wave.WaveHeader;
import sound.musicg.wave.extension.Spectrogram;

/**
 * Api for detecting different sounds
 * 
 * @author Jacquet Wong
 * 
 */
public class DetectionApi {

	protected WaveHeader waveHeader;
	protected int fftSampleSize;
	protected int numFrequencyUnit;
	protected double unitFrequency;
	protected double minFrequency, maxFrequency;
	protected double minIntensity, maxIntensity;
	protected double minStandardDeviation, maxStandardDeviation;
	protected int highPass, lowPass;
	protected int minNumZeroCross, maxNumZeroCross;
	protected int lowerBoundary, upperBoundary;
	protected int numRobust;
	
	/**
	 * Constructor, support mono Wav only, 4096 sample byte size for 44100Hz
	 * 16bit mono wav
	 * 
	 * @param sampleRate
	 *            Sample rate of the input audio byte
	 * @param bitsPerSample
	 *            Bit size of a sample of the input audio byte
	 */
	public DetectionApi(WaveHeader waveHeader) {
		if (waveHeader.getChannels() == 1) {
			this.waveHeader = waveHeader;
			init();
		} else {
			System.err.println("DetectionAPI supports mono Wav only");
		}
	}

	/**
	 * Initiate the settings for specific sound detection
	 */
	protected void init(){
		// do nothing, needed to be overrided
	}
	
	/**
	 * Determine the audio bytes contains a specific sound or not
	 * 
	 * @param audioBytes
	 *            input audio byte
	 * @return
	 */
	public boolean isSpecificSound(byte[] audioBytes) {

		int bytesPerSample = waveHeader.getBitsPerSample() / 8;
		int numSamples = audioBytes.length / bytesPerSample;

		// numSamples required to be a power of 2
		if (numSamples > 0 && Integer.bitCount(numSamples) == 1) {
			fftSampleSize = numSamples;
			numFrequencyUnit = fftSampleSize / 2;

			// frequency could be caught within the half of nSamples according to Nyquist theory
			unitFrequency = (double) waveHeader.getSampleRate() / 2 / numFrequencyUnit;

			// set boundary
			lowerBoundary = (int) (highPass / unitFrequency);
			upperBoundary = (int) (lowPass / unitFrequency);
			// end set boundary

			Wave wave = new Wave(waveHeader, audioBytes);	// audio bytes of this frame
			short[] amplitudes = wave.getSampleAmplitudes();

			// spectrum for the clip
			Spectrogram spectrogram = wave.getSpectrogram(fftSampleSize, 0);
			
			double[][] spectrogramData = spectrogram.getAbsoluteSpectrogramData();

			// since fftSampleSize==numSamples, there're only one spectrum which is thisFrameSpectrogramData[0]
			double[] spectrum = spectrogramData[0];

			int frequencyUnitRange = upperBoundary - lowerBoundary + 1;		
			double[] rangedSpectrum = new double[frequencyUnitRange];
			System.arraycopy(spectrum, lowerBoundary, rangedSpectrum, 0, rangedSpectrum.length);
			
			if (frequencyUnitRange <= spectrum.length) {

				if (isPassedIntensity(spectrum)){
					if (isPassedStandardDeviation(spectrogramData)){
						if (isPassedZeroCrossingRate(amplitudes)){
							if (isPassedFrequency(rangedSpectrum)){
								return true;
							}
						}
					}
				}
				
				/*
				// run all checking for debug
				boolean isPassedChecking = true;
				// rule 1: check the intensity of this frame
				isPassedChecking &= isPassedIntensity(spectrum);
				// rule 2: check the frequency of this frame
				isPassedChecking &= isPassedFrequency(rangedSpectrum);
				// rule 3: check the zero crossing rate of this frame
				isPassedChecking &= isPassedZeroCrossingRate(amplitudes);
				// rule 4: check the standard deviation of this frame with reference of previous frames
				isPassedChecking &= isPassedStandardDeviation(spectrogramData);
				System.out.println("Result: " + isPassedChecking + "\n");
				return isPassedChecking;
				// end run all checking for debug
			 */
				
			} else {
				System.err
						.println("is error: the wave needed to be higher sample rate");
			}

		} else {
			System.out.println("The sample size must be a power of 2");
		}

		return false;
	}

	protected void normalizeSpectrogramData(double[][] spectrogramData) {

		// normalization of absoultSpectrogram
		// set max and min amplitudes
		double maxAmp = Double.MIN_VALUE;
		double minAmp = Double.MAX_VALUE;
		for (int i = 0; i < spectrogramData.length; i++) {
			for (int j = 0; j < spectrogramData[i].length; j++){
				if (spectrogramData[i][j] > maxAmp) {
					maxAmp = spectrogramData[i][j];
				} else if (spectrogramData[i][j] < minAmp) {
					minAmp = spectrogramData[i][j];
				}
			}
		}
		// end set max and min amplitudes

		// normalization
		// avoiding divided by zero
		double minValidAmp = 0.00000000001F;
		if (minAmp == 0) {
			minAmp = minValidAmp;
		}

		double diff = Math.log10(maxAmp / minAmp); // perceptual difference
		for (int i = 0; i < spectrogramData.length; i++) {
			for (int j = 0; j < spectrogramData[i].length; j++) {
				if (spectrogramData[i][j] < minValidAmp) {
					spectrogramData[i][j] = 0;
				} else {
					spectrogramData[i][j] = (Math.log10(spectrogramData[i][j] / minAmp)) / diff;
				}
			}
		}
		// end normalization
	}
	
	protected boolean isPassedStandardDeviation(double[][] spectrogramData){
		
		// normalize the spectrogramData (with all frames in the spectrogram)
		normalizeSpectrogramData(spectrogramData);
			
		// analyst data in this frame
		// since fftSampleSize==numSamples, there're only one spectrum which is spectrogramData[last]
		double[] spectrum = spectrogramData[spectrogramData.length - 1];
		// find top most robust frequencies in this frame
		double[] robustFrequencies = new double[numRobust];
		ArrayRankDouble arrayRankDouble = new ArrayRankDouble();
		double nthValue = arrayRankDouble.getNthOrderedValue(spectrum, numRobust, false);
		// end analyst data in this frame
				
		int count = 0;
		for (int i = 0; i < spectrum.length; i++) {
			if (spectrum[i] >= nthValue) {
				robustFrequencies[count++] = spectrum[i];
				if (count >= numRobust) {
					break;
				}
			}
		}
		// end find top most robust frequencies

		StandardDeviation standardDeviation = new StandardDeviation();
		standardDeviation.setValues(robustFrequencies);
		double sd = standardDeviation.evaluate();
		
		// range of standard deviation
		boolean result = (sd >= minStandardDeviation && sd <= maxStandardDeviation);
		//System.out.println("sd: " + sd + " " + result);
		return result;
	}
	
	protected boolean isPassedFrequency(double[] spectrum){
		// find the robust frequency
		ArrayRankDouble arrayRankDouble = new ArrayRankDouble();
		double robustFrequency = arrayRankDouble.getMaxValueIndex(spectrum) * unitFrequency;
		
		// frequency of the sound should not be too low or too high
		boolean result = (robustFrequency >= minFrequency && robustFrequency <= maxFrequency);
		//System.out.println("freq: " + robustFrequency + " " + result);
		return result;
	}
	
	protected boolean isPassedIntensity(double[] spectrum){
		// get the average intensity of the signal
		double intensity = 0;
		for (int i = 0; i < spectrum.length; i++) {
			intensity += spectrum[i];
		}
		intensity /= spectrum.length;
		// end get the average intensity of the signal
		
		// intensity of the whistle should not be too soft
		boolean result = (intensity > minIntensity && intensity <= maxIntensity);
		//System.out.println("intensity: " + intensity + " " + result);
		
		return result;
	}
	
	protected boolean isPassedZeroCrossingRate(short[] amplitudes){
		ZeroCrossingRate zcr = new ZeroCrossingRate(amplitudes, 1);
		int numZeroCrosses = (int) zcr.evaluate();
		
		// different sound has different range of zero crossing value
		// when lengthInSecond=1, zero crossing rate is the num
		// of zero crosses
		boolean result = (numZeroCrosses >= minNumZeroCross && numZeroCrosses <= maxNumZeroCross);
		//System.out.println("zcr: " + numZeroCrosses + " " +result);
		
		return result;
	}
		
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy