org.openimaj.vis.audio.AudioOverviewVisualisation Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of visualisations Show documentation
A library that contains classes for visualising various different features, such as audio and video.
There is a newer version: 1.3.5
/**
 * Copyright (c) 2011, The University of Southampton and the individual contributors.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * 	Redistributions of source code must retain the above copyright notice,
 * 	this list of conditions and the following disclaimer.
 *
 *   *	Redistributions in binary form must reproduce the above copyright notice,
 * 	this list of conditions and the following disclaimer in the documentation
 * 	and/or other materials provided with the distribution.
 *
 *   *	Neither the name of the University of Southampton nor the names of its
 * 	contributors may be used to endorse or promote products derived from this
 * 	software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.openimaj.vis.audio;

import gnu.trove.list.array.TFloatArrayList;

import java.awt.Dimension;
import java.util.ArrayList;

import org.openimaj.audio.AudioFormat;
import org.openimaj.audio.AudioStream;
import org.openimaj.audio.SampleChunk;
import org.openimaj.audio.processor.AudioProcessor;
import org.openimaj.audio.samples.SampleBuffer;
import org.openimaj.image.MBFImage;
import org.openimaj.image.colour.RGBColour;
import org.openimaj.image.renderer.MBFImageRenderer;
import org.openimaj.image.typography.hershey.HersheyFont;
import org.openimaj.math.geometry.point.Point2d;
import org.openimaj.math.geometry.point.Point2dImpl;
import org.openimaj.math.geometry.shape.Polygon;
import org.openimaj.vis.DataUnitsTransformer;
import org.openimaj.vis.VisualisationImpl;
import org.openimaj.vis.timeline.TimelineObject;
import org.openimaj.vis.timeline.TimelineObjectAdapter;

/**
 * 	Utilises an audio processor to plot the audio waveform to an image. This class
 * 	is both a {@link VisualisationImpl} and a {@link TimelineObject}. This means that
 * 	it can be used to plot a complete visualisation of the overview of the data
 * 	or it can be used to plot temporal parts of the data into the visualisation window.
 * 	
 * 	An internal class (AudioOverviewGenerator) can be used to generate overviews
 * 	if necessary.
 * 	

 * 	This class also extends {@link TimelineObjectAdapter} which allows an audio
 * 	waveform to be put upon a timeline.
 *
 *  @author David Dupplaw ([email protected])
 *
 *	@created 9 Jun 2011
 */
public class AudioOverviewVisualisation extends VisualisationImpl
	implements TimelineObject
{
	/** */
	private static final long serialVersionUID = 1L;

	/**
	 * 	Generates an audio overview. This is a lower-resolution version of
	 * 	the audio waveform. It takes the maximum value from a set of
	 * 	values and stores this as the overview. By default the processor
	 * 	takes the maximum value from every 5000 samples.  The method
	 * 	{@link #getAudioOverview(int, int)} allows resampling of that
	 * 	overview.
	 *
	 *	@author David Dupplaw ([email protected])
	 *  @created 21 Jul 2011
	 *
	 */
	public class AudioOverviewGenerator extends AudioProcessor
	{
    	/** Number of bins in the overview */
    	private int nSamplesPerBin = -1;

    	/** The maximum in the current bin for each channel */
    	private float[] channelMax = null;

    	/** The number of samples so far in the current bin being processed */
    	private int nSamplesInBin = 0;

    	/** The overview data */
    	private TFloatArrayList[] audioOverview = null;

    	/** The number of channels in the audio data */
		private int nChannels = 0;

		/** The audio format of the samples we're processing */
		private AudioFormat af = null;

    	/**
    	 * 	Constructor
    	 *
    	 *	@param nSamplesPerBin The number of samples per bin
    	 *	@param nChannels The number of channels
    	 */
    	public AudioOverviewGenerator( final int nSamplesPerBin, final int nChannels )
		{
			this.nSamplesPerBin = nSamplesPerBin;
			this.nChannels = nChannels;
			this.audioOverview = new TFloatArrayList[nChannels];
			this.channelMax = new float[nChannels];

			for( int i = 0; i < nChannels; i++ )
				this.audioOverview[i] = new TFloatArrayList();
		}

    	/**
    	 *	{@inheritDoc}
    	 * 	@see org.openimaj.audio.processor.AudioProcessor#process(org.openimaj.audio.SampleChunk)
    	 */
		@Override
		public SampleChunk process( final SampleChunk samples )
		{
			// Store the format of the data
			if( this.af == null ) this.af = samples.getFormat();

			// Get the sample data
			final SampleBuffer b = samples.getSampleBuffer();

			// The number of samples (per channel) in this sample chunk
			final int nSamples = b.size() / this.af.getNumChannels();

			// Keep a running total of how many samples we've processed
			AudioOverviewVisualisation.this.numberOfProcessedSamples += nSamples;

			for( int x = 0; x < nSamples; x++ )
			{
				for( int c = 0; c < this.nChannels; c++ )
				{
					// Store the maximum for the current bin
					this.channelMax[c] = Math.max( this.channelMax[c],
							b.get(x*this.nChannels+c) );
				}

				// If we're still within the bin
				if( this.nSamplesInBin < this.nSamplesPerBin )
					this.nSamplesInBin++;
				else
				{
					// We've overflowed the bin
					for( int c = 0; c < this.nChannels; c++ )
					{
						// Store the current bin
						this.audioOverview[c].add( this.channelMax[c] );
						this.channelMax[c] = Integer.MIN_VALUE;
					}

					// Reset for the next bin
					this.nSamplesInBin = 0;
				}
			}

			return samples;
		}

		/**
		 * 	@return Get the overview data.
		 */
		public TFloatArrayList[] getAudioOverview()
		{
			return this.audioOverview;
		}

		/**
		 * 	Refactors the overview to given another overview. If the number
		 * 	of bins specified an overview that's finer than the actual overview
		 * 	the original overview is returned. The output of this function will
		 * 	then only return an array list of nBins or less.
		 *
		 * 	@param channel The channel to get
		 *	@param nBins The number of bins in the overview
		 *	@return A refactors overview
		 */
		public TFloatArrayList getAudioOverview( final int channel, final int nBins )
		{
			if( nBins >= this.audioOverview[channel].size() )
				return this.audioOverview[channel];

			final TFloatArrayList ii = new TFloatArrayList();
			final double scalar = (double)this.audioOverview[channel].size() / (double)nBins;
			for( int xx = 0; xx < nBins; xx++ )
			{
				final int startBin = (int)(xx * scalar);
				final int endBin = (int)((xx+1) * scalar);
				float m = Integer.MIN_VALUE;
				for( int yy = startBin; yy < endBin; yy++ )
					m = Math.max( m, this.audioOverview[channel].get(yy) );
				ii.add( m );
			}
			return ii;
		}

		/**
		 * 	Returns a polygon representing the channel overview.
		 *	@param channel The channel to get the polygon for
		 *	@param mirror whether to mirror the polygon
		 *	@param width The width of the overview to return
		 *	@return A polygon
		 */
		public Polygon getChannelPolygon( final int channel, final boolean mirror, final int width )
		{
			final TFloatArrayList overview = this.getAudioOverview( channel, width );
			final int len = overview.size();
			final double scalar = width / (double)len;

			final ArrayList l = new ArrayList();
			for( int x = 0; x < len; x++ )
				l.add( new Point2dImpl( (float)(x * scalar), overview.get(x) ) );

			if( mirror )
			{
				for( int x = 1; x <= len; x++ )
					l.add( new Point2dImpl( (float)((len-x)*scalar),
						-overview.get(len-x) ) );
			}

			// Store how long the given overview is in milliseconds
			AudioOverviewVisualisation.this.millisecondsInView = (long)(AudioOverviewVisualisation.this.numberOfProcessedSamples /
					this.af.getSampleRateKHz());

			return new Polygon( l );
		}
	}

	/**
	 * 	The calculation of how many milliseconds are in the last generated
	 * 	view at the resampled overview.
	 */
	public long millisecondsInView = 0;

	/** The number of samples that were originally read in from the data */
	public long numberOfProcessedSamples = 0;

	/** The start time in milliseconds */
	private long start = 0;

	/** The length of the audio data */
	private long length = 1000;

	/** The overview generator */
	private AudioOverviewGenerator aap = null;

	/** Number of samples per pixel */
	private int nSamplesPerPixel = 500;

	/** Whether the generation is complete */
	private boolean generationComplete = false;

	/**
	 * 	Default constructor
	 * 	@param as The audio data to plot
	 */
	public AudioOverviewVisualisation( final AudioStream as )
	{
		this.data  = as;
		this.length = this.data.getLength();

	    // How many pixels we'll overview per pixel
	    this.nSamplesPerPixel  = 500;
	    // TODO: This is currently fixed-size but should be based on audio length

	    // Generate the audio overview
		this.aap = new AudioOverviewGenerator(
				this.nSamplesPerPixel, this.data.getFormat().getNumChannels() );

		new Thread( new Runnable()
		{
			@Override
			public void run()
			{
			    try
				{
			    	synchronized( AudioOverviewVisualisation.this.aap )
					{
						AudioOverviewVisualisation.this.aap.process( AudioOverviewVisualisation.this.data );
						AudioOverviewVisualisation.this.generationComplete = true;
						AudioOverviewVisualisation.this.aap.notifyAll();
					}
				}
				catch( final Exception e )
				{
					e.printStackTrace();
					AudioOverviewVisualisation.this.aap = null;
				}
			}
		} ).start();

	    this.setPreferredSize( new Dimension( -1, 100 ) );
	}

	/**
	 * 	Generates a waveform image that fits within the given width and height
	 * 	and drawn in the given colour. Note that the generated image is RGBA
	 * 	so that the colours need to be 4 dimensions and may stipulate
	 * 	transparency.
	 *
	 * 	@param a The audio to draw
	 *	@param w The width of the image to return
	 *	@param h The height of the image to return
	 *	@param backgroundColour The background colour to draw on the image
	 *  @param colour The colour in which to draw the audio waveform.
	 *  @return The input image.
	 */
	public static MBFImage getAudioWaveformImage( final AudioStream a,
			final int w, final int h, final Float[] backgroundColour,
			final Float[] colour  )
    {
		return new AudioOverviewVisualisation(a).plotAudioWaveformImage(
				w, h, backgroundColour, colour );
    }

	/**
	 * 	Generates a waveform image that fits within the given width and height
	 * 	and drawn in the given colour. Note that the generated image is RGBA
	 * 	so that the colours need to be 4 dimensions and may stipulate
	 * 	transparency.
	 * 	
	 * 	If you require information about the plot afterwards you can check
	 * 	the fields that are stored within this instance.
	 *
	 *	@param w The width of the image to return
	 *	@param h The height of the image to return
	 *	@param backgroundColour The background colour to draw on the image
	 *  @param colour The colour in which to draw the audio waveform.
	 *  @return The input image.
	 */
	public MBFImage plotAudioWaveformImage(
			final int w, final int h, final Float[] backgroundColour,
			final Float[] colour  )
	{
		// Check if the overview's been generated, if not return empty image
		if( this.aap == null )
		{
			this.visImage.drawText( "Processing...", 20, 20, HersheyFont.TIMES_BOLD, 12, RGBColour.WHITE );
			return this.visImage;
		}

		// If the generation isn't complete (and aap is not null) it means
		// we're processing the overview. Wait until it's finished.
		while( !this.generationComplete )
		{
			synchronized( this.aap )
			{
				try
				{
					this.aap.wait();
				}
				catch( final InterruptedException e )
				{
					e.printStackTrace();
				}
			}
		}

	    // Work out how high each channel will be
	    final double channelSize = h/(double)this.data.getFormat().getNumChannels();

	    // This is the scalar from audio amplitude to pixels
	    final double ampScalar = channelSize / Integer.MAX_VALUE;

	    // Create the image we're going to draw on to - RGBA
//	    final MBFImage m = new MBFImage( w, h, 4 );
	    final MBFImageRenderer renderer = this.visImage.createRenderer();
	    this.visImage.fill( backgroundColour );

	    try
        {
	        // Draw the polygon onto the image
	        final float ww = 1;
	        for( int i = 0; i < this.data.getFormat().getNumChannels(); i++ )
	        {
	        	final Polygon p = this.aap.getChannelPolygon( i, true, w );
	        	p.scaleXY( ww, (float)-ampScalar/2f );
	        	p.translate( 0f, (float)(-p.minY() + channelSize*i) );
	        	renderer.drawPolygonFilled( p, colour );
	        }
        }
        catch( final Exception e )
        {
        	System.err.println( "WARNING: Could not process audio " +
        			"to generate the audio overview.");
	        e.printStackTrace();
        }

		return this.visImage;
    }

	/**
	 * 	Returns the length of the audio data in milliseconds.
	 * 	Only returns the correct value after processing. Until then, it will
	 * 	return 1 second.
	 *	@return Length of the audio data.
	 */
	public long getLength()
	{
		return this.length;
	}

	/**
	 *	{@inheritDoc}
	 * 	@see org.openimaj.vis.timeline.TimelineObjectAdapter#getStartTimeMilliseconds()
	 */
	@Override
	public long getStartTimeMilliseconds()
	{
		return this.start;
	}

	/**
	 *	{@inheritDoc}
	 * 	@see org.openimaj.vis.timeline.TimelineObjectAdapter#getEndTimeMilliseconds()
	 */
	@Override
	public long getEndTimeMilliseconds()
	{
		return this.start + this.getLength();
	}

	/**
	 *	{@inheritDoc}
	 * 	@see org.openimaj.vis.VisualisationImpl#update()
	 */
	@Override
	public void update()
	{
		if( this.visImage == null )
				this.plotAudioWaveformImage(
					this.visImage.getWidth(), this.visImage.getHeight(),
					new Float[]{1f,1f,0f,1f}, new Float[]{0f,0f,0f,1f} );
	}

	/**
	 *	{@inheritDoc}
	 * 	@see org.openimaj.vis.timeline.TimelineObject#setStartTimeMilliseconds(long)
	 */
	@Override
	public void setStartTimeMilliseconds( final long l )
	{
		this.start = l;
	}

	/**
	 *	{@inheritDoc}
	 * 	@see org.openimaj.vis.timeline.TimelineObject#setDataPixelTransformer(org.openimaj.vis.DataUnitsTransformer)
	 */
	@Override
	public void setDataPixelTransformer( final DataUnitsTransformer dpt )
	{
	}
}