com.google.zxing.pdf417.encoder.PDF417HighLevelEncoder Maven / Gradle / Ivy

/*
 * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * This file has been modified from its original form in Barcode4J.
 */

package com.google.zxing.pdf417.encoder;

import com.google.zxing.WriterException;
import com.google.zxing.common.CharacterSetECI;

import java.math.BigInteger;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

/**
 * PDF417 high-level encoder following the algorithm described in ISO/IEC 15438:2001(E) in
 * annex P.
 */
final class PDF417HighLevelEncoder {

  /**
   * code for Text compaction
   */
  private static final int TEXT_COMPACTION = 0;

  /**
   * code for Byte compaction
   */
  private static final int BYTE_COMPACTION = 1;

  /**
   * code for Numeric compaction
   */
  private static final int NUMERIC_COMPACTION = 2;

  /**
   * Text compaction submode Alpha
   */
  private static final int SUBMODE_ALPHA = 0;

  /**
   * Text compaction submode Lower
   */
  private static final int SUBMODE_LOWER = 1;

  /**
   * Text compaction submode Mixed
   */
  private static final int SUBMODE_MIXED = 2;

  /**
   * Text compaction submode Punctuation
   */
  private static final int SUBMODE_PUNCTUATION = 3;

  /**
   * mode latch to Text Compaction mode
   */
  private static final int LATCH_TO_TEXT = 900;

  /**
   * mode latch to Byte Compaction mode (number of characters NOT a multiple of 6)
   */
  private static final int LATCH_TO_BYTE_PADDED = 901;

  /**
   * mode latch to Numeric Compaction mode
   */
  private static final int LATCH_TO_NUMERIC = 902;

  /**
   * mode shift to Byte Compaction mode
   */
  private static final int SHIFT_TO_BYTE = 913;

  /**
   * mode latch to Byte Compaction mode (number of characters a multiple of 6)
   */
  private static final int LATCH_TO_BYTE = 924;

  /**
   * identifier for a user defined Extended Channel Interpretation (ECI)
   */
  private static final int ECI_USER_DEFINED = 925;

  /**
   * identifier for a general purpose ECO format
   */
  private static final int ECI_GENERAL_PURPOSE = 926;

  /**
   * identifier for an ECI of a character set of code page
   */
  private static final int ECI_CHARSET = 927;

  /**
   * Raw code table for text compaction Mixed sub-mode
   */
  private static final byte[] TEXT_MIXED_RAW = {
      48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
      35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0, 32, 0, 0, 0};

  /**
   * Raw code table for text compaction: Punctuation sub-mode
   */
  private static final byte[] TEXT_PUNCTUATION_RAW = {
      59, 60, 62, 64, 91, 92, 93, 95, 96, 126, 33, 13, 9, 44, 58,
      10, 45, 46, 36, 47, 34, 124, 42, 40, 41, 63, 123, 125, 39, 0};

  private static final byte[] MIXED = new byte[128];
  private static final byte[] PUNCTUATION = new byte[128];

  private static final Charset DEFAULT_ENCODING = StandardCharsets.ISO_8859_1;

  private PDF417HighLevelEncoder() {
  }

  static {
    //Construct inverse lookups
    Arrays.fill(MIXED, (byte) -1);
    for (int i = 0; i < TEXT_MIXED_RAW.length; i++) {
      byte b = TEXT_MIXED_RAW[i];
      if (b > 0) {
        MIXED[b] = (byte) i;
      }
    }
    Arrays.fill(PUNCTUATION, (byte) -1);
    for (int i = 0; i < TEXT_PUNCTUATION_RAW.length; i++) {
      byte b = TEXT_PUNCTUATION_RAW[i];
      if (b > 0) {
        PUNCTUATION[b] = (byte) i;
      }
    }
  }

  /**
   * Performs high-level encoding of a PDF417 message using the algorithm described in annex P
   * of ISO/IEC 15438:2001(E). If byte compaction has been selected, then only byte compaction
   * is used.
   *
   * @param msg the message
   * @param compaction compaction mode to use
   * @param encoding character encoding used to encode in default or byte compaction
   *  or {@code null} for default / not applicable
   * @return the encoded message (the char values range from 0 to 928)
   */
  static String encodeHighLevel(String msg, Compaction compaction, Charset encoding) throws WriterException {

    //the codewords 0..928 are encoded as Unicode characters
    StringBuilder sb = new StringBuilder(msg.length());

    if (encoding == null) {
      encoding = DEFAULT_ENCODING;
    } else if (!DEFAULT_ENCODING.equals(encoding)) {
      CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding.name());
      if (eci != null) {
        encodingECI(eci.getValue(), sb);
      }
    }

    int len = msg.length();
    int p = 0;
    int textSubMode = SUBMODE_ALPHA;

    // User selected encoding mode
    switch (compaction) {
      case TEXT:
        encodeText(msg, p, len, sb, textSubMode);
        break;
      case BYTE:
        byte[] msgBytes = msg.getBytes(encoding);
        encodeBinary(msgBytes, p, msgBytes.length, BYTE_COMPACTION, sb);
        break;
      case NUMERIC:
        sb.append((char) LATCH_TO_NUMERIC);
        encodeNumeric(msg, p, len, sb);
        break;
      default:
        int encodingMode = TEXT_COMPACTION; //Default mode, see 4.4.2.1
        while (p < len) {
          int n = determineConsecutiveDigitCount(msg, p);
          if (n >= 13) {
            sb.append((char) LATCH_TO_NUMERIC);
            encodingMode = NUMERIC_COMPACTION;
            textSubMode = SUBMODE_ALPHA; //Reset after latch
            encodeNumeric(msg, p, n, sb);
            p += n;
          } else {
            int t = determineConsecutiveTextCount(msg, p);
            if (t >= 5 || n == len) {
              if (encodingMode != TEXT_COMPACTION) {
                sb.append((char) LATCH_TO_TEXT);
                encodingMode = TEXT_COMPACTION;
                textSubMode = SUBMODE_ALPHA; //start with submode alpha after latch
              }
              textSubMode = encodeText(msg, p, t, sb, textSubMode);
              p += t;
            } else {
              int b = determineConsecutiveBinaryCount(msg, p, encoding);
              if (b == 0) {
                b = 1;
              }
              byte[] bytes = msg.substring(p, p + b).getBytes(encoding);
              if (bytes.length == 1 && encodingMode == TEXT_COMPACTION) {
                //Switch for one byte (instead of latch)
                encodeBinary(bytes, 0, 1, TEXT_COMPACTION, sb);
              } else {
                //Mode latch performed by encodeBinary()
                encodeBinary(bytes, 0, bytes.length, encodingMode, sb);
                encodingMode = BYTE_COMPACTION;
                textSubMode = SUBMODE_ALPHA; //Reset after latch
              }
              p += b;
            }
          }
        }
        break;
    }

    return sb.toString();
  }

  /**
   * Encode parts of the message using Text Compaction as described in ISO/IEC 15438:2001(E),
   * chapter 4.4.2.
   *
   * @param msg            the message
   * @param startpos       the start position within the message
   * @param count          the number of characters to encode
   * @param sb             receives the encoded codewords
   * @param initialSubmode should normally be SUBMODE_ALPHA
   * @return the text submode in which this method ends
   */
  private static int encodeText(CharSequence msg,
                                int startpos,
                                int count,
                                StringBuilder sb,
                                int initialSubmode) {
    StringBuilder tmp = new StringBuilder(count);
    int submode = initialSubmode;
    int idx = 0;
    while (true) {
      char ch = msg.charAt(startpos + idx);
      switch (submode) {
        case SUBMODE_ALPHA:
          if (isAlphaUpper(ch)) {
            if (ch == ' ') {
              tmp.append((char) 26); //space
            } else {
              tmp.append((char) (ch - 65));
            }
          } else {
            if (isAlphaLower(ch)) {
              submode = SUBMODE_LOWER;
              tmp.append((char) 27); //ll
              continue;
            } else if (isMixed(ch)) {
              submode = SUBMODE_MIXED;
              tmp.append((char) 28); //ml
              continue;
            } else {
              tmp.append((char) 29); //ps
              tmp.append((char) PUNCTUATION[ch]);
              break;
            }
          }
          break;
        case SUBMODE_LOWER:
          if (isAlphaLower(ch)) {
            if (ch == ' ') {
              tmp.append((char) 26); //space
            } else {
              tmp.append((char) (ch - 97));
            }
          } else {
            if (isAlphaUpper(ch)) {
              tmp.append((char) 27); //as
              tmp.append((char) (ch - 65));
              //space cannot happen here, it is also in "Lower"
              break;
            } else if (isMixed(ch)) {
              submode = SUBMODE_MIXED;
              tmp.append((char) 28); //ml
              continue;
            } else {
              tmp.append((char) 29); //ps
              tmp.append((char) PUNCTUATION[ch]);
              break;
            }
          }
          break;
        case SUBMODE_MIXED:
          if (isMixed(ch)) {
            tmp.append((char) MIXED[ch]);
          } else {
            if (isAlphaUpper(ch)) {
              submode = SUBMODE_ALPHA;
              tmp.append((char) 28); //al
              continue;
            } else if (isAlphaLower(ch)) {
              submode = SUBMODE_LOWER;
              tmp.append((char) 27); //ll
              continue;
            } else {
              if (startpos + idx + 1 < count) {
                char next = msg.charAt(startpos + idx + 1);
                if (isPunctuation(next)) {
                  submode = SUBMODE_PUNCTUATION;
                  tmp.append((char) 25); //pl
                  continue;
                }
              }
              tmp.append((char) 29); //ps
              tmp.append((char) PUNCTUATION[ch]);
            }
          }
          break;
        default: //SUBMODE_PUNCTUATION
          if (isPunctuation(ch)) {
            tmp.append((char) PUNCTUATION[ch]);
          } else {
            submode = SUBMODE_ALPHA;
            tmp.append((char) 29); //al
            continue;
          }
      }
      idx++;
      if (idx >= count) {
        break;
      }
    }
    char h = 0;
    int len = tmp.length();
    for (int i = 0; i < len; i++) {
      boolean odd = (i % 2) != 0;
      if (odd) {
        h = (char) ((h * 30) + tmp.charAt(i));
        sb.append(h);
      } else {
        h = tmp.charAt(i);
      }
    }
    if ((len % 2) != 0) {
      sb.append((char) ((h * 30) + 29)); //ps
    }
    return submode;
  }

  /**
   * Encode parts of the message using Byte Compaction as described in ISO/IEC 15438:2001(E),
   * chapter 4.4.3. The Unicode characters will be converted to binary using the cp437
   * codepage.
   *
   * @param bytes     the message converted to a byte array
   * @param startpos  the start position within the message
   * @param count     the number of bytes to encode
   * @param startmode the mode from which this method starts
   * @param sb        receives the encoded codewords
   */
  private static void encodeBinary(byte[] bytes,
                                   int startpos,
                                   int count,
                                   int startmode,
                                   StringBuilder sb) {
    if (count == 1 && startmode == TEXT_COMPACTION) {
      sb.append((char) SHIFT_TO_BYTE);
    } else {
      if ((count % 6) == 0) {
        sb.append((char) LATCH_TO_BYTE);
      } else {
        sb.append((char) LATCH_TO_BYTE_PADDED);
      }
    }

    int idx = startpos;
    // Encode sixpacks
    if (count >= 6) {
      char[] chars = new char[5];
      while ((startpos + count - idx) >= 6) {
        long t = 0;
        for (int i = 0; i < 6; i++) {
          t <<= 8;
          t += bytes[idx + i] & 0xff;
        }
        for (int i = 0; i < 5; i++) {
          chars[i] = (char) (t % 900);
          t /= 900;
        }
        for (int i = chars.length - 1; i >= 0; i--) {
          sb.append(chars[i]);
        }
        idx += 6;
      }
    }
    //Encode rest (remaining n<5 bytes if any)
    for (int i = idx; i < startpos + count; i++) {
      int ch = bytes[i] & 0xff;
      sb.append((char) ch);
    }
  }

  private static void encodeNumeric(String msg, int startpos, int count, StringBuilder sb) {
    int idx = 0;
    StringBuilder tmp = new StringBuilder(count / 3 + 1);
    BigInteger num900 = BigInteger.valueOf(900);
    BigInteger num0 = BigInteger.valueOf(0);
    while (idx < count) {
      tmp.setLength(0);
      int len = Math.min(44, count - idx);
      String part = '1' + msg.substring(startpos + idx, startpos + idx + len);
      BigInteger bigint = new BigInteger(part);
      do {
        tmp.append((char) bigint.mod(num900).intValue());
        bigint = bigint.divide(num900);
      } while (!bigint.equals(num0));

      //Reverse temporary string
      for (int i = tmp.length() - 1; i >= 0; i--) {
        sb.append(tmp.charAt(i));
      }
      idx += len;
    }
  }


  private static boolean isDigit(char ch) {
    return ch >= '0' && ch <= '9';
  }

  private static boolean isAlphaUpper(char ch) {
    return ch == ' ' || (ch >= 'A' && ch <= 'Z');
  }

  private static boolean isAlphaLower(char ch) {
    return ch == ' ' || (ch >= 'a' && ch <= 'z');
  }

  private static boolean isMixed(char ch) {
    return MIXED[ch] != -1;
  }

  private static boolean isPunctuation(char ch) {
    return PUNCTUATION[ch] != -1;
  }

  private static boolean isText(char ch) {
    return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
  }

  /**
   * Determines the number of consecutive characters that are encodable using numeric compaction.
   *
   * @param msg      the message
   * @param startpos the start position within the message
   * @return the requested character count
   */
  private static int determineConsecutiveDigitCount(CharSequence msg, int startpos) {
    int count = 0;
    int len = msg.length();
    int idx = startpos;
    if (idx < len) {
      char ch = msg.charAt(idx);
      while (isDigit(ch) && idx < len) {
        count++;
        idx++;
        if (idx < len) {
          ch = msg.charAt(idx);
        }
      }
    }
    return count;
  }

  /**
   * Determines the number of consecutive characters that are encodable using text compaction.
   *
   * @param msg      the message
   * @param startpos the start position within the message
   * @return the requested character count
   */
  private static int determineConsecutiveTextCount(CharSequence msg, int startpos) {
    int len = msg.length();
    int idx = startpos;
    while (idx < len) {
      char ch = msg.charAt(idx);
      int numericCount = 0;
      while (numericCount < 13 && isDigit(ch) && idx < len) {
        numericCount++;
        idx++;
        if (idx < len) {
          ch = msg.charAt(idx);
        }
      }
      if (numericCount >= 13) {
        return idx - startpos - numericCount;
      }
      if (numericCount > 0) {
        //Heuristic: All text-encodable chars or digits are binary encodable
        continue;
      }
      ch = msg.charAt(idx);

      //Check if character is encodable
      if (!isText(ch)) {
        break;
      }
      idx++;
    }
    return idx - startpos;
  }

  /**
   * Determines the number of consecutive characters that are encodable using binary compaction.
   *
   * @param msg      the message
   * @param startpos the start position within the message
   * @param encoding the charset used to convert the message to a byte array
   * @return the requested character count
   */
  private static int determineConsecutiveBinaryCount(String msg, int startpos, Charset encoding)
      throws WriterException {
    CharsetEncoder encoder = encoding.newEncoder();
    int len = msg.length();
    int idx = startpos;
    while (idx < len) {
      char ch = msg.charAt(idx);
      int numericCount = 0;

      while (numericCount < 13 && isDigit(ch)) {
        numericCount++;
        //textCount++;
        int i = idx + numericCount;
        if (i >= len) {
          break;
        }
        ch = msg.charAt(i);
      }
      if (numericCount >= 13) {
        return idx - startpos;
      }
      ch = msg.charAt(idx);

      if (!encoder.canEncode(ch)) {
        throw new WriterException("Non-encodable character detected: " + ch + " (Unicode: " + (int) ch + ')');
      }
      idx++;
    }
    return idx - startpos;
  }

  private static void encodingECI(int eci, StringBuilder sb) throws WriterException {
    if (eci >= 0 && eci < 900) {
      sb.append((char) ECI_CHARSET);
      sb.append((char) eci);
    } else if (eci < 810900) {
      sb.append((char) ECI_GENERAL_PURPOSE);
      sb.append((char) (eci / 900 - 1));
      sb.append((char) (eci % 900));
    } else if (eci < 811800) {
      sb.append((char) ECI_USER_DEFINED);
      sb.append((char) (810900 - eci));
    } else {
      throw new WriterException("ECI number not in valid range from 0..811799, but was " + eci);
    }
  }

}