io.vacco.libmp3lame.Jencoder Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of jlame Show documentation
Java versions of the Lame mp3 codec libraries
The newest version!
package io.vacco.libmp3lame;

/*
 *      encoder.h include file
 *
 *      Copyright (c) 2000 Mark Taylor
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

// encoder.h

final class Jencoder {
  /***********************************************************************
   *
   *  encoder and decoder delays
   *
   ***********************************************************************/

  /*
   * layer III enc.dec delay:  1056 (1057?)   (observed)
   * layer  II enc.dec delay:   480  (481?)   (observed)
   *
   * polyphase 256-16             (dec or enc)        = 240
   * mdct      256+32  (9*32)     (dec or enc)        = 288
   * total:    512+16
   *
   * My guess is that delay of polyphase filterbank is actualy 240.5
   * (there are technical reasons for this, see postings in mp3encoder).
   * So total Encode+Decode delay = ENCDELAY + 528 + 1
   */

  /*
   * ENCDELAY  The encoder delay.
   *
   * Minimum allowed is MDCTDELAY (see below)
   *
   * The first 96 samples will be attenuated, so using a value less than 96
   * will result in corrupt data for the first 96-ENCDELAY samples.
   *
   * suggested: 576
   * set to 1160 to sync with FhG.
   */

  static final int ENCDELAY = 576;

  /*
   * make sure there is at least one complete frame after the
   * last frame containing real data
   *
   * Using a value of 288 would be sufficient for a
   * a very sophisticated decoder that can decode granule-by-granule instead
   * of frame by frame.  But lets not assume this, and assume the decoder
   * will not decode frame N unless it also has data for frame N+1
   *
   */
  /*#define POSTDELAY   288*/
  static final int POSTDELAY = 1152;

  /*
   * delay of the MDCT used in mdct.c
   * original ISO routines had a delay of 528!
   * Takehiro's routines:
   */
  static final int MDCTDELAY = 48;
  static final int FFTOFFSET = (224 + MDCTDELAY);

  /** Most decoders, including the one we use, have a delay of 528 samples. */
  static final int DECDELAY = 528;

  /** number of subbands */
  static final int SBLIMIT = 32;

  /** parition bands bands */
  static final int CBANDS = 64;

  /** number of critical bands/scale factor bands where masking is computed */
  static final int SBPSY_l = 21;

  static final int SBPSY_s = 12;

  /** total number of scalefactor bands encoded */
  static final int SBMAX_l = 22;

  static final int SBMAX_s = 13;
  static final int PSFB21 = 6;
  static final int PSFB12 = 6;

  /** max scalefactor band, max(SBMAX_l, SBMAX_s*3, (SBMAX_s-3)*3+8) */
  static final int SFBMAX = (SBMAX_s * 3);

  /* FFT sizes */
  static final int BLKSIZE = 1024;
  static final int HBLKSIZE = (BLKSIZE / 2 + 1);
  static final int BLKSIZE_s = 256;
  static final int HBLKSIZE_s = (BLKSIZE_s / 2 + 1);

  /* #define switch_pe        1800 */
  static final int NORM_TYPE = 0;
  static final int START_TYPE = 1;
  static final int SHORT_TYPE = 2;
  static final int STOP_TYPE = 3;

  /*
   * Mode Extention:
   * When we are in stereo mode, there are 4 possible methods to store these
   * two channels. The stereo modes -m? are using a subset of them.
   *
   *  -ms: MPG_MD_LR_LR
   *  -mj: MPG_MD_LR_LR and MPG_MD_MS_LR
   *  -mf: MPG_MD_MS_LR
   *  -mi: all
   */
  /*
  #define MPG_MD_LR_LR  0
  #define MPG_MD_LR_I   1
  #define MPG_MD_MS_LR  2
  #define MPG_MD_MS_I   3
  */
  // enum MPEGChannelMode {
  private static final int MPG_MD_LR_LR = 0;
  // private static final int MPG_MD_LR_I  = 1;// FIXME never uses MPG_MD_LR_I
  static final int MPG_MD_MS_LR = 2;
  // private static final int MPG_MD_MS_I  = 3;// FIXME never uses MPG_MD_MS_I
  // };

  /**
   * auto-adjust of ATH, useful for low volume Gabriel Bouvigne 3 feb 2001
   *
   * modifies some values in gfp.internal_flags.ATH (gfc.ATH)
   */
  private static final void adjust_ATH(final Jlame_internal_flags gfc) {
    final JATH ath = gfc.ATH; // java
    if (ath.use_adjust == 0) {
      ath.adjust_factor = 1.0f; /* no adjustment */
      return;
    }

    final JSessionConfig cfg = gfc.cfg;
    /* jd - 2001 mar 12, 27, jun 30 */
    /* loudness based on equal loudness curve; */
    /* use granule with maximum combined loudness */
    float max_pow = gfc.ov_psy.loudness_sq[0][0];
    float gr2_max = gfc.ov_psy.loudness_sq[1][0];
    if (cfg.channels_out == 2) {
      max_pow += gfc.ov_psy.loudness_sq[0][1];
      gr2_max += gfc.ov_psy.loudness_sq[1][1];
    } else {
      max_pow += max_pow;
      gr2_max += gr2_max;
    }
    if (cfg.mode_gr == 2) {
      max_pow = (max_pow >= gr2_max ? max_pow : gr2_max);
    }
    max_pow *= 0.5f; /* max_pow approaches 1.0 for full band noise */

    /* jd - 2001 mar 31, jun 30 */
    /* user tuning of ATH adjustment region */
    max_pow *= ath.aa_sensitivity_p;

    /*  adjust ATH depending on range of maximum value
     */

    /* jd - 2001 feb27, mar12,20, jun30, jul22 */
    /* continuous curves based on approximation */
    /* to GB's original values. */
    /* For an increase in approximate loudness, */
    /* set ATH adjust to adjust_limit immediately */
    /* after a delay of one frame. */
    /* For a loudness decrease, reduce ATH adjust */
    /* towards adjust_limit gradually. */
    /* max_pow is a loudness squared or a power. */
    if (max_pow > 0.03125) {
      /* ((1 - 0.000625)/ 31.98) from curve below */
      if (ath.adjust_factor >= 1.0f) {
        ath.adjust_factor = 1.0f;
      } else {
        /* preceding frame has lower ATH adjust; */
        /* ascend only to the preceding adjust_limit */
        /* in case there is leading low volume */
        if (ath.adjust_factor < ath.adjust_limit) {
          ath.adjust_factor = ath.adjust_limit;
        }
      }
      ath.adjust_limit = 1.0f;
    } else {
      /* adjustment curve */
      /* about 32 dB maximum adjust (0.000625) */
      final float adj_lim_new = 31.98f * max_pow + 0.000625f;
      if (ath.adjust_factor >= adj_lim_new) {
        /* descend gradually */
        ath.adjust_factor *= adj_lim_new * 0.075f + 0.925f;
        if (ath.adjust_factor < adj_lim_new) {
          /* stop descent */
          ath.adjust_factor = adj_lim_new;
        }
      } else {
        /* ascend */
        if (ath.adjust_limit >= adj_lim_new) {
          ath.adjust_factor = adj_lim_new;
        } else {
          /* preceding frame has lower ATH adjust; */
          /* ascend only to the preceding adjust_limit */
          if (ath.adjust_factor < ath.adjust_limit) {
            ath.adjust_factor = ath.adjust_limit;
          }
        }
      }
      ath.adjust_limit = adj_lim_new;
    }
  }

  /***********************************************************************
   *
   *  some simple statistics
   *
   *  bitrate index 0: free bitrate . not allowed in VBR mode
   *  : bitrates, kbps depending on MPEG version
   *  bitrate index 15: forbidden
   *
   *  mode_ext:
   *  0:  LR
   *  1:  LR-i
   *  2:  MS
   *  3:  MS-i
   *
   ***********************************************************************/
  private static final void updateStats(final Jlame_internal_flags gfc) {
    final JSessionConfig cfg = gfc.cfg;
    final JEncResult eov = gfc.ov_enc;
    final int bitrate_index = eov.bitrate_index; // java
    /* count bitrate indices */
    eov.bitrate_channelmode_hist[bitrate_index][4]++;
    eov.bitrate_channelmode_hist[15][4]++;

    /* count 'em for every mode extension in case of 2 channel encoding */
    final int channels_out = cfg.channels_out; // java
    if (channels_out == 2) {
      eov.bitrate_channelmode_hist[bitrate_index][eov.mode_ext]++;
      eov.bitrate_channelmode_hist[15][eov.mode_ext]++;
    }
    final Jgr_info[][] tt = gfc.l3_side.tt; // java
    final int[][] bitrate_blocktype_hist = eov.bitrate_blocktype_hist; // java
    for (int gr = 0; gr < cfg.mode_gr; ++gr) {
      final Jgr_info[] tt_gr = tt[gr]; // java
      int ch = 0;
      do {
        int bt = tt_gr[ch].block_type;
        if (tt_gr[ch].mixed_block_flag) {
          bt = 4;
        }
        bitrate_blocktype_hist[bitrate_index][bt]++;
        bitrate_blocktype_hist[bitrate_index][5]++;
        bitrate_blocktype_hist[15][bt]++;
        bitrate_blocktype_hist[15][5]++;
      } while (++ch < channels_out);
    }
  }

  private static final void lame_encode_frame_init(
      final Jlame_internal_flags gfc, final float inbuf[ /* 2 */][]) {
    final JSessionConfig cfg = gfc.cfg;

    if (!gfc.lame_encode_frame_init) {
      final float primebuff0[] = new float[286 + 1152 + 576]; // java: already zeroed
      final float primebuff1[] = new float[286 + 1152 + 576]; // java: already zeroed
      final int mode_gr = cfg.mode_gr; // java
      final int framesize = 576 * mode_gr;
      final int channels_out = cfg.channels_out; // java
      /* prime the MDCT/polyphase filterbank with a short block */
      gfc.lame_encode_frame_init = true;
      for (int i = 0, j = 0, ie = 286 + 576 * (1 + mode_gr); i < ie; ++i) {
        if (i < framesize) {
          primebuff0[i] = 0;
          if (channels_out == 2) {
            primebuff1[i] = 0;
          }
        } else {
          primebuff0[i] = inbuf[0][j];
          if (channels_out == 2) {
            primebuff1[i] = inbuf[1][j];
          }
          ++j;
        }
      }
      /* polyphase filtering / mdct */
      final Jgr_info[][] tt = gfc.l3_side.tt; // java
      for (int gr = 0; gr < mode_gr; gr++) {
        final Jgr_info[] tt_gr = tt[gr]; // java
        int ch = 0;
        do {
          tt_gr[ch].block_type = Jencoder.SHORT_TYPE;
        } while (++ch < channels_out);
      }
      Jnewmdct.mdct_sub48(gfc, primebuff0, primebuff1);
    }
  }

  /************************************************************************
   *
   * encodeframe()           Layer 3
   *
   * encode a single frame
   *
   ************************************************************************
   * lame_encode_frame()
   *
   *
   * gr 0            gr 1
   * inbuf:           |--------------|--------------|--------------|
   *
   *
   * Polyphase (18 windows, each shifted 32)
   * gr 0:
   * window1          <----512---.
   * window18                 <----512---.
   *
   * gr 1:
   * window1                         <----512---.
   * window18                                <----512---.
   *
   *
   *
   * MDCT output:  |--------------|--------------|--------------|
   *
   * FFT's                    <---------1024---------.
   * <---------1024-------.
   *
   *
   *
   * inbuf = buffer of PCM data size=MP3 framesize
   * encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
   * so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]
   *
   * psy-model FFT has a 1 granule delay, so we feed it data for the
   * next granule.
   * FFT is centered over granule:  224+576+224
   * So FFT starts at:   576-224-MDCTDELAY
   *
   * MPEG2:  FFT ends at:  BLKSIZE+576-224-MDCTDELAY      (1328)
   * MPEG1:  FFT ends at:  BLKSIZE+2*576-224-MDCTDELAY    (1904)
   *
   * MPEG2:  polyphase first window:  [0..511]
   * 18th window:   [544..1055]          (1056)
   * MPEG1:            36th window:   [1120..1631]         (1632)
   * data needed:  512+framesize-32
   *
   * A close look newmdct.c shows that the polyphase filterbank
   * only uses data from [0..510] for each window.  Perhaps because the window
   * used by the filterbank is zero for the last point, so Takehiro's
   * code doesn't bother to compute with it.
   *
   * FFT starts at 576-224-MDCTDELAY (304)  = 576-FFTOFFSET
   *
   */

  // typedef float chgrdata[2][2];

  private static final float fircoef[] = { // [9] = {
    -0.0207887f * 5, -0.0378413f * 5, -0.0432472f * 5, -0.031183f * 5,
    7.79609e-18f * 5, 0.0467745f * 5, 0.10091f * 5, 0.151365f * 5,
    0.187098f * 5
  };
  /**
   * @param gfc Context
   * @param inbuf_l Input
   * @param inbuf_r Input
   * @param mp3buf Output
   * @param mp3buf_size
   * @return
   */
  static final int lame_encode_mp3_frame(
      final Jlame_internal_flags gfc,
      final float[] inbuf_l,
      final float[] inbuf_r,
      final byte[] mp3buf,
      final int mp3buf_offset,
      final int mp3buf_size) {
    final JSessionConfig cfg = gfc.cfg;
    final JIII_psy_ratio masking_LR[][] = new JIII_psy_ratio[2][2]; /*LR masking & energy */
    masking_LR[0][0] = new JIII_psy_ratio();
    masking_LR[0][1] = new JIII_psy_ratio();
    masking_LR[1][0] = new JIII_psy_ratio();
    masking_LR[1][1] = new JIII_psy_ratio();
    final JIII_psy_ratio masking_MS[][] = new JIII_psy_ratio[2][2]; /*MS masking & energy */
    masking_MS[0][0] = new JIII_psy_ratio();
    masking_MS[0][1] = new JIII_psy_ratio();
    masking_MS[1][0] = new JIII_psy_ratio();
    masking_MS[1][1] = new JIII_psy_ratio();
    JIII_psy_ratio masking[][] = new JIII_psy_ratio[2][]; /*pointer to selected maskings */
    final float inbuf[][] = new float[2][];

    final float tot_ener[][] = new float[2][4];
    final float ms_ener_ratio[ /* 2 */] = {.5f, .5f};
    final float pe[][] = new float[2][2]; // = { {0.f, 0.f}, {0.f, 0.f} };// java: already zeroed
    final float pe_MS[][] = new float[2][2]; // = { {0.f, 0.f}, {0.f, 0.f}};// java: already zeroed
    float pe_use[][] = new float[2][];

    inbuf[0] = inbuf_l;
    inbuf[1] = inbuf_r;

    if (!gfc.lame_encode_frame_init) {
      /*first run? */
      lame_encode_frame_init(gfc, inbuf);
    }
    /********************** padding *****************************/
    /* padding method as described in
     * "MPEG-Layer3 / Bitstream Syntax and Decoding"
     * by Martin Sieler, Ralph Sperschneider
     *
     * note: there is no padding for the very first frame
     *
     * Robert Hegemann 2000-06-22
     */
    gfc.ov_enc.padding = false;
    if ((gfc.sv_enc.slot_lag -= gfc.sv_enc.frac_SpF) < 0) {
      gfc.sv_enc.slot_lag += cfg.samplerate;
      gfc.ov_enc.padding = true;
    }

    final int mode_gr = cfg.mode_gr; // java
    final int channels_out = cfg.channels_out; // java
    /****************************************
     *   Stage 1: psychoacoustic model       *
     ****************************************/
    {
      /* psychoacoustic model
       * psy model has a 1 granule (576) delay that we must compensate for
       * (mt 6/99).
       */
      // final int bufp;// {0, 0}; /* address of beginning of left & right granule */
      final int blocktype[] = new int[2];

      for (int gr = 0; gr < mode_gr; gr++) {

        // for( ch = 0; ch < cfg.channels_out; ch++ ) {
        final int bufp = (576 - FFTOFFSET) + gr * 576; // &inbuf[ch][576 + gr * 576 - FFTOFFSET];
        // }
        final float[] tot_ener_gr = tot_ener[gr]; // java
        final int ret =
            Jpsymodel.L3psycho_anal_vbr(
                gfc,
                inbuf,
                bufp,
                gr,
                masking_LR,
                masking_MS,
                pe[gr],
                pe_MS[gr],
                tot_ener_gr,
                blocktype);
        if (ret != 0) {
          return -4;
        }

        if (cfg.mode == Jlame.JOINT_STEREO) {
          ms_ener_ratio[gr] = tot_ener_gr[2] + tot_ener_gr[3];
          if (ms_ener_ratio[gr] > 0) {
            ms_ener_ratio[gr] = tot_ener_gr[3] / ms_ener_ratio[gr];
          }
        }

        /* block type flags */
        int ch = 0;
        do {
          final Jgr_info cod_info = gfc.l3_side.tt[gr][ch];
          cod_info.block_type = blocktype[ch];
          cod_info.mixed_block_flag = false;
        } while (++ch < channels_out);
      }
    }

    /* auto-adjust of ATH, useful for low volume */
    adjust_ATH(gfc);

    /****************************************
     *   Stage 2: MDCT                       *
     ****************************************/

    /* polyphase filtering / mdct */
    Jnewmdct.mdct_sub48(gfc, inbuf[0], inbuf[1]);

    /****************************************
     *   Stage 3: MS/LR decision             *
     ****************************************/

    /* Here will be selected MS or LR coding of the 2 stereo channels */
    gfc.ov_enc.mode_ext = MPG_MD_LR_LR;

    if (cfg.force_ms) {
      gfc.ov_enc.mode_ext = MPG_MD_MS_LR;
    } else if (cfg.mode == Jlame.JOINT_STEREO) {
      /* ms_ratio = is scaled, for historical reasons, to look like
        a ratio of side_channel / total.
        0 = signal is 100% mono
        .5 = L & R uncorrelated
      */

      /* [0] and [1] are the results for the two granules in MPEG-1,
       * in MPEG-2 it's only a faked averaging of the same value
       * _prev is the value of the last granule of the previous frame
       * _next is the value of the first granule of the next frame
       */

      float sum_pe_MS = 0;
      float sum_pe_LR = 0;
      for (int gr = 0; gr < mode_gr; gr++) {
        final float[] pe_MS_gr = pe_MS[gr]; // java
        final float[] pe_gr = pe[gr]; // java
        int ch = 0;
        do {
          sum_pe_MS += pe_MS_gr[ch];
          sum_pe_LR += pe_gr[ch];
        } while (++ch < channels_out);
      }

      /* based on PE: M/S coding would not use much more bits than L/R */
      if (sum_pe_MS <= /*1.00f * */ sum_pe_LR) {
        final Jgr_info[][] tt = gfc.l3_side.tt; // java
        final Jgr_info[] gi0 = tt[0]; // [0];
        final Jgr_info[] gi1 = tt[mode_gr - 1]; // [0];

        if (gi0[0].block_type == gi0[1].block_type && gi1[0].block_type == gi1[1].block_type) {
          gfc.ov_enc.mode_ext = MPG_MD_MS_LR;
        }
      }
    }

    /* bit and noise allocation */
    if (gfc.ov_enc.mode_ext == MPG_MD_MS_LR) {
      masking = masking_MS; /* use MS masking */
      pe_use = pe_MS;
    } else {
      masking = masking_LR; /* use LR masking */
      pe_use = pe;
    }

    /****************************************
     *   Stage 4: quantization loop          *
     ****************************************/

    if (cfg.vbr == Jlame.vbr_off || cfg.vbr == Jlame.vbr_abr) {
      final float[] pefirbuf = gfc.sv_enc.pefirbuf; // java
      int i = 0;
      do {
        pefirbuf[i] = pefirbuf[i + 1];
      } while (++i < 18);

      float f = 0.0f;
      for (int gr = 0; gr < mode_gr; gr++) {
        for (int ch = 0; ch < channels_out; ch++) {
          f += pe_use[gr][ch];
        }
      }
      pefirbuf[18] = f;

      f = pefirbuf[9];
      i = 0;
      do {
        f += (pefirbuf[i] + pefirbuf[18 - i]) * fircoef[i];
      } while (++i < 9);

      f = (670 * 5 * mode_gr * channels_out) / f;
      for (int gr = 0; gr < mode_gr; gr++) {
        int ch = 0;
        do {
          pe_use[gr][ch] *= f;
        } while (++ch < channels_out);
      }
    }
    switch (cfg.vbr) {
      default:
      case Jlame.vbr_off:
        JCBR_iteration_loop.iteration(gfc, pe_use, ms_ener_ratio, masking);
        break;
      case Jlame.vbr_abr:
        JABR_iteration_loop.iteration(gfc, pe_use, ms_ener_ratio, masking);
        break;
      case Jlame.vbr_rh:
        JVBR_old_iteration_loop.iteration(gfc, pe_use, ms_ener_ratio, masking);
        break;
      case Jlame.vbr_mt:
      case Jlame.vbr_mtrh:
        JVBR_new_iteration_loop.iteration(gfc, pe_use, ms_ener_ratio, masking);
        break;
    }

    /****************************************
     *   Stage 5: bitstream formatting       *
     ****************************************/

    /*  write the frame to the bitstream  */
    Jbitstream.format_bitstream(gfc);

    /* copy mp3 bit buffer into array */
    final int mp3count = Jbitstream.copy_buffer(gfc, mp3buf, mp3buf_offset, mp3buf_size, true);

    if (cfg.write_lame_tag) {
      JVbrTag.AddVbrFrame(gfc);
    }

    ++gfc.ov_enc.frame_number;

    updateStats(gfc);

    return mp3count;
  }
}