io.spokestack.spokestack.SpeechSampler Maven / Gradle / Ivy

Go to download
package io.spokestack.spokestack;

import java.io.File;
import java.io.FileOutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

/**
 * speech sampling logger.
 *
 * 
 * This is the spokestack pipeline component for logging speech samples. The
 * samples are written in the wav format to the configured output directory
 * with rotating file names. The sampler is useful for debugging pipeline
 * configuration, microphone levels, etc. The sampler only logs audio samples
 * that correspond to speech (where context.isSpeech() is true).
 * 
 *
 * 
 * This pipeline component supports the following configuration properties:
 * 
 * 
 *   
 *      sample-log-path (string): path to the directory to write logs
 *   
 *   
 *      sample-log-max-files (int): maximum number of rotated files
 *      to create (default: 10)
 *   
 * 
 *
 */
public final class SpeechSampler implements SpeechProcessor {
    /** default maximum number of rotated sample files. */
    public static final int DEFAULT_SAMPLE_MAX = 10;

    private final String logPath;
    private final int sampleMax;
    private int sampleId;
    private FileOutputStream stream;
    private final ByteBuffer header;

    /**
     * constructs a new sampler instance.
     * @param config the pipeline configuration instance
     * @throws Exception on error
     */
    public SpeechSampler(SpeechConfig config) throws Exception {
        this.logPath = config.getString("sample-log-path");
        this.sampleMax = config.getInteger(
            "sample-log-max-files",
            DEFAULT_SAMPLE_MAX);

        // create the log path if it doesn't exist
        new File(logPath).mkdirs();

        // create the wav file header
        int sampleRate = config.getInteger("sample-rate");
        this.header = ByteBuffer
            .allocate(44)
            .order(ByteOrder.LITTLE_ENDIAN);
        // riff/wave header
        this.header.put("RIFF".getBytes("ASCII"));
        this.header.putInt(Integer.MAX_VALUE);
        this.header.put("WAVE".getBytes("ASCII"));
        // format chunk
        this.header.put("fmt ".getBytes("ASCII"));
        this.header.putInt(16);                     // size of format chunk
        this.header.putShort((short) 1);            // pcm
        this.header.putShort((short) 1);            // channels
        this.header.putInt(sampleRate);             // sample rate
        this.header.putInt(sampleRate * 2);         // byte rate
        this.header.putShort((short) 2);            // block align
        this.header.putShort((short) 16);           // bits per sample
        // data chunk
        this.header.put("data".getBytes("ASCII"));
        this.header.putInt(Integer.MAX_VALUE);      // size of data chunk
    }

    @Override
    public void reset() throws Exception {
        close();
    }

    /**
     * destroys the resources attached to the copmonent.
     * @throws Exception on error
     */
    public void close() throws Exception {
        if (this.stream != null)
            this.stream.close();
    }

    /**
     * processes a frame of audio.
     * @param context the current speech context
     * @param frame   the audio frame to detect
     * @throws Exception on error
     */
    public void process(SpeechContext context, ByteBuffer frame)
            throws Exception {
        if (context.isSpeech() && this.stream == null) {
            // speech rising edge, create and attach the log file
            File file = new File(
                this.logPath,
                String.format("%05d.wav", this.sampleId++ % this.sampleMax));
            this.stream = new FileOutputStream(file);
            this.stream.write(this.header.array());
        } else if (!context.isSpeech() && this.stream != null) {
            // speech falling edge, flush changes and close
            this.stream.close();
            this.stream = null;
        }
        // write the current audio frame to the wav file
        if (this.stream != null) {
            byte[] data = new byte[frame.remaining()];
            frame.get(data);
            this.stream.write(data);
        }
    }
}