com.ibm.watson.speech_to_text.v1.model.AudioMetricsDetails Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of speech-to-text Show documentation
Java client library to use the IBM Speech to Text API
There is a newer version: 13.0.0
/*
 * (C) Copyright IBM Corp. 2019.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package com.ibm.watson.speech_to_text.v1.model;

import java.util.List;

import com.google.gson.annotations.SerializedName;
import com.ibm.cloud.sdk.core.service.model.GenericModel;

/**
 * Detailed information about the signal characteristics of the input audio.
 */
public class AudioMetricsDetails extends GenericModel {

  @SerializedName("final")
  private Boolean xFinal;
  @SerializedName("end_time")
  private Float endTime;
  @SerializedName("signal_to_noise_ratio")
  private Float signalToNoiseRatio;
  @SerializedName("speech_ratio")
  private Float speechRatio;
  @SerializedName("high_frequency_loss")
  private Float highFrequencyLoss;
  @SerializedName("direct_current_offset")
  private List directCurrentOffset;
  @SerializedName("clipping_rate")
  private List clippingRate;
  @SerializedName("speech_level")
  private List speechLevel;
  @SerializedName("non_speech_level")
  private List nonSpeechLevel;

  /**
   * Gets the xFinal.
   *
   * If `true`, indicates the end of the audio stream, meaning that transcription is complete. Currently, the field is
   * always `true`. The service returns metrics just once per audio stream. The results provide aggregated audio metrics
   * that pertain to the complete audio stream.
   *
   * @return the xFinal
   */
  public Boolean isXFinal() {
    return xFinal;
  }

  /**
   * Gets the endTime.
   *
   * The end time in seconds of the block of audio to which the metrics apply.
   *
   * @return the endTime
   */
  public Float getEndTime() {
    return endTime;
  }

  /**
   * Gets the signalToNoiseRatio.
   *
   * The signal-to-noise ratio (SNR) for the audio signal. The value indicates the ratio of speech to noise in the
   * audio. A valid value lies in the range of 0 to 100 decibels (dB). The service omits the field if it cannot compute
   * the SNR for the audio.
   *
   * @return the signalToNoiseRatio
   */
  public Float getSignalToNoiseRatio() {
    return signalToNoiseRatio;
  }

  /**
   * Gets the speechRatio.
   *
   * The ratio of speech to non-speech segments in the audio signal. The value lies in the range of 0.0 to 1.0.
   *
   * @return the speechRatio
   */
  public Float getSpeechRatio() {
    return speechRatio;
  }

  /**
   * Gets the highFrequencyLoss.
   *
   * The probability that the audio signal is missing the upper half of its frequency content.
   * * A value close to 1.0 typically indicates artificially up-sampled audio, which negatively impacts the accuracy of
   * the transcription results.
   * * A value at or near 0.0 indicates that the audio signal is good and has a full spectrum.
   * * A value around 0.5 means that detection of the frequency content is unreliable or not available.
   *
   * @return the highFrequencyLoss
   */
  public Float getHighFrequencyLoss() {
    return highFrequencyLoss;
  }

  /**
   * Gets the directCurrentOffset.
   *
   * An array of `AudioMetricsHistogramBin` objects that defines a histogram of the cumulative direct current (DC)
   * component of the audio signal.
   *
   * @return the directCurrentOffset
   */
  public List getDirectCurrentOffset() {
    return directCurrentOffset;
  }

  /**
   * Gets the clippingRate.
   *
   * An array of `AudioMetricsHistogramBin` objects that defines a histogram of the clipping rate for the audio
   * segments. The clipping rate is defined as the fraction of samples in the segment that reach the maximum or minimum
   * value that is offered by the audio quantization range. The service auto-detects either a 16-bit Pulse-Code
   * Modulation(PCM) audio range (-32768 to +32767) or a unit range (-1.0 to +1.0). The clipping rate is between 0.0 and
   * 1.0, with higher values indicating possible degradation of speech recognition.
   *
   * @return the clippingRate
   */
  public List getClippingRate() {
    return clippingRate;
  }

  /**
   * Gets the speechLevel.
   *
   * An array of `AudioMetricsHistogramBin` objects that defines a histogram of the signal level in segments of the
   * audio that contain speech. The signal level is computed as the Root-Mean-Square (RMS) value in a decibel (dB) scale
   * normalized to the range 0.0 (minimum level) to 1.0 (maximum level).
   *
   * @return the speechLevel
   */
  public List getSpeechLevel() {
    return speechLevel;
  }

  /**
   * Gets the nonSpeechLevel.
   *
   * An array of `AudioMetricsHistogramBin` objects that defines a histogram of the signal level in segments of the
   * audio that do not contain speech. The signal level is computed as the Root-Mean-Square (RMS) value in a decibel
   * (dB) scale normalized to the range 0.0 (minimum level) to 1.0 (maximum level).
   *
   * @return the nonSpeechLevel
   */
  public List getNonSpeechLevel() {
    return nonSpeechLevel;
  }
}