com.google.zxing.pdf417.encoder.PDF417HighLevelEncoder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of core Show documentation
Core barcode encoding/decoding library
There is a newer version: 3.5.3
/*
 * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * This file has been modified from its original form in Barcode4J.
 */

package com.google.zxing.pdf417.encoder;

import com.google.zxing.WriterException;
import com.google.zxing.common.CharacterSetECI;
import com.google.zxing.common.ECIInput;
import com.google.zxing.common.MinimalECIInput;

import java.math.BigInteger;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;

/**
 * PDF417 high-level encoder following the algorithm described in ISO/IEC 15438:2001(E) in
 * annex P.
 */
final class PDF417HighLevelEncoder {

  /**
   * code for Text compaction
   */
  private static final int TEXT_COMPACTION = 0;

  /**
   * code for Byte compaction
   */
  private static final int BYTE_COMPACTION = 1;

  /**
   * code for Numeric compaction
   */
  private static final int NUMERIC_COMPACTION = 2;

  /**
   * Text compaction submode Alpha
   */
  private static final int SUBMODE_ALPHA = 0;

  /**
   * Text compaction submode Lower
   */
  private static final int SUBMODE_LOWER = 1;

  /**
   * Text compaction submode Mixed
   */
  private static final int SUBMODE_MIXED = 2;

  /**
   * Text compaction submode Punctuation
   */
  private static final int SUBMODE_PUNCTUATION = 3;

  /**
   * mode latch to Text Compaction mode
   */
  private static final int LATCH_TO_TEXT = 900;

  /**
   * mode latch to Byte Compaction mode (number of characters NOT a multiple of 6)
   */
  private static final int LATCH_TO_BYTE_PADDED = 901;

  /**
   * mode latch to Numeric Compaction mode
   */
  private static final int LATCH_TO_NUMERIC = 902;

  /**
   * mode shift to Byte Compaction mode
   */
  private static final int SHIFT_TO_BYTE = 913;

  /**
   * mode latch to Byte Compaction mode (number of characters a multiple of 6)
   */
  private static final int LATCH_TO_BYTE = 924;

  /**
   * identifier for a user defined Extended Channel Interpretation (ECI)
   */
  private static final int ECI_USER_DEFINED = 925;

  /**
   * identifier for a general purpose ECO format
   */
  private static final int ECI_GENERAL_PURPOSE = 926;

  /**
   * identifier for an ECI of a character set of code page
   */
  private static final int ECI_CHARSET = 927;

  /**
   * Raw code table for text compaction Mixed sub-mode
   */
  private static final byte[] TEXT_MIXED_RAW = {
      48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
      35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0, 32, 0, 0, 0};

  /**
   * Raw code table for text compaction: Punctuation sub-mode
   */
  private static final byte[] TEXT_PUNCTUATION_RAW = {
      59, 60, 62, 64, 91, 92, 93, 95, 96, 126, 33, 13, 9, 44, 58,
      10, 45, 46, 36, 47, 34, 124, 42, 40, 41, 63, 123, 125, 39, 0};

  private static final byte[] MIXED = new byte[128];
  private static final byte[] PUNCTUATION = new byte[128];

  private static final Charset DEFAULT_ENCODING = StandardCharsets.ISO_8859_1;

  private PDF417HighLevelEncoder() {
  }

  static {
    //Construct inverse lookups
    Arrays.fill(MIXED, (byte) -1);
    for (int i = 0; i < TEXT_MIXED_RAW.length; i++) {
      byte b = TEXT_MIXED_RAW[i];
      if (b > 0) {
        MIXED[b] = (byte) i;
      }
    }
    Arrays.fill(PUNCTUATION, (byte) -1);
    for (int i = 0; i < TEXT_PUNCTUATION_RAW.length; i++) {
      byte b = TEXT_PUNCTUATION_RAW[i];
      if (b > 0) {
        PUNCTUATION[b] = (byte) i;
      }
    }
  }

  /**
   * Performs high-level encoding of a PDF417 message using the algorithm described in annex P
   * of ISO/IEC 15438:2001(E). If byte compaction has been selected, then only byte compaction
   * is used.
   *
   * @param msg the message
   * @param compaction compaction mode to use
   * @param encoding character encoding used to encode in default or byte compaction
   *  or {@code null} for default / not applicable
   * @param autoECI encode input minimally using multiple ECIs if needed
   *   If autoECI encoding is specified and additionally {@code encoding} is specified, then the encoder
   *   will use the specified {@link Charset} for any character that can be encoded by it, regardless
   *   if a different encoding would lead to a more compact encoding. When no {@code encoding} is specified
   *   then charsets will be chosen so that the byte representation is minimal.
   * @return the encoded message (the char values range from 0 to 928)
   */
  static String encodeHighLevel(String msg, Compaction compaction, Charset encoding, boolean autoECI)
      throws WriterException {

    if (msg.isEmpty()) {
      throw new WriterException("Empty message not allowed");
    }

    if (encoding == null && !autoECI) {
      for (int i = 0; i < msg.length(); i++) {
        if (msg.charAt(i) > 255) {
          throw new WriterException("Non-encodable character detected: " + msg.charAt(i) + " (Unicode: " +
              (int) msg.charAt(i) +
              "). Consider specifying EncodeHintType.PDF417_AUTO_ECI and/or EncodeTypeHint.CHARACTER_SET.");
        }
      }
    }
    //the codewords 0..928 are encoded as Unicode characters
    StringBuilder sb = new StringBuilder(msg.length());

    ECIInput input;
    if (autoECI) {
      input = new MinimalECIInput(msg, encoding, -1);
    } else {
      input = new NoECIInput(msg);
      if (encoding == null) {
        encoding = DEFAULT_ENCODING;
      } else if (!DEFAULT_ENCODING.equals(encoding)) {
        CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(encoding);
        if (eci != null) {
          encodingECI(eci.getValue(), sb);
        }
      }
    }

    int len = input.length();
    int p = 0;
    int textSubMode = SUBMODE_ALPHA;

    // User selected encoding mode
    switch (compaction) {
      case TEXT:
        encodeText(input, p, len, sb, textSubMode);
        break;
      case BYTE:
        if (autoECI) {
          encodeMultiECIBinary(input, 0, input.length(), TEXT_COMPACTION, sb);
        } else {
          byte[] msgBytes = input.toString().getBytes(encoding);
          encodeBinary(msgBytes, p, msgBytes.length, BYTE_COMPACTION, sb);
        }
        break;
      case NUMERIC:
        sb.append((char) LATCH_TO_NUMERIC);
        encodeNumeric(input, p, len, sb);
        break;
      default:
        int encodingMode = TEXT_COMPACTION; //Default mode, see 4.4.2.1
        while (p < len) {
          while (p < len && input.isECI(p)) {
            encodingECI(input.getECIValue(p), sb);
            p++;
          }
          if (p >= len) {
            break;
          }
          int n = determineConsecutiveDigitCount(input, p);
          if (n >= 13) {
            sb.append((char) LATCH_TO_NUMERIC);
            encodingMode = NUMERIC_COMPACTION;
            textSubMode = SUBMODE_ALPHA; //Reset after latch
            encodeNumeric(input, p, n, sb);
            p += n;
          } else {
            int t = determineConsecutiveTextCount(input, p);
            if (t >= 5 || n == len) {
              if (encodingMode != TEXT_COMPACTION) {
                sb.append((char) LATCH_TO_TEXT);
                encodingMode = TEXT_COMPACTION;
                textSubMode = SUBMODE_ALPHA; //start with submode alpha after latch
              }
              textSubMode = encodeText(input, p, t, sb, textSubMode);
              p += t;
            } else {
              int b = determineConsecutiveBinaryCount(input, p, autoECI ? null : encoding);
              if (b == 0) {
                b = 1;
              }
              byte[] bytes = autoECI ? null : input.subSequence(p, p + b).toString().getBytes(encoding);
              if (((bytes == null && b == 1) || (bytes != null && bytes.length == 1))
                  && encodingMode == TEXT_COMPACTION) {
                //Switch for one byte (instead of latch)
                if (autoECI) {
                  encodeMultiECIBinary(input, p, 1, TEXT_COMPACTION, sb);
                } else {
                  encodeBinary(bytes, 0, 1, TEXT_COMPACTION, sb);
                }
              } else {
                //Mode latch performed by encodeBinary()
                if (autoECI) {
                  encodeMultiECIBinary(input, p, p + b, encodingMode, sb);
                } else {
                  encodeBinary(bytes, 0, bytes.length, encodingMode, sb);
                }
                encodingMode = BYTE_COMPACTION;
                textSubMode = SUBMODE_ALPHA; //Reset after latch
              }
              p += b;
            }
          }
        }
        break;
    }

    return sb.toString();
  }

  /**
   * Encode parts of the message using Text Compaction as described in ISO/IEC 15438:2001(E),
   * chapter 4.4.2.
   *
   * @param input          the input
   * @param startpos       the start position within the message
   * @param count          the number of characters to encode
   * @param sb             receives the encoded codewords
   * @param initialSubmode should normally be SUBMODE_ALPHA
   * @return the text submode in which this method ends
   */
  private static int encodeText(ECIInput input,
                                int startpos,
                                int count,
                                StringBuilder sb,
                                int initialSubmode) throws WriterException {
    StringBuilder tmp = new StringBuilder(count);
    int submode = initialSubmode;
    int idx = 0;
    while (true) {
      if (input.isECI(startpos + idx)) {
        encodingECI(input.getECIValue(startpos + idx), sb);
        idx++;
      } else {
        char ch = input.charAt(startpos + idx);
        switch (submode) {
          case SUBMODE_ALPHA:
            if (isAlphaUpper(ch)) {
              if (ch == ' ') {
                tmp.append((char) 26); //space
              } else {
                tmp.append((char) (ch - 65));
              }
            } else {
              if (isAlphaLower(ch)) {
                submode = SUBMODE_LOWER;
                tmp.append((char) 27); //ll
                continue;
              } else if (isMixed(ch)) {
                submode = SUBMODE_MIXED;
                tmp.append((char) 28); //ml
                continue;
              } else {
                tmp.append((char) 29); //ps
                tmp.append((char) PUNCTUATION[ch]);
                break;
              }
            }
            break;
          case SUBMODE_LOWER:
            if (isAlphaLower(ch)) {
              if (ch == ' ') {
                tmp.append((char) 26); //space
              } else {
                tmp.append((char) (ch - 97));
              }
            } else {
              if (isAlphaUpper(ch)) {
                tmp.append((char) 27); //as
                tmp.append((char) (ch - 65));
                //space cannot happen here, it is also in "Lower"
                break;
              } else if (isMixed(ch)) {
                submode = SUBMODE_MIXED;
                tmp.append((char) 28); //ml
                continue;
              } else {
                tmp.append((char) 29); //ps
                tmp.append((char) PUNCTUATION[ch]);
                break;
              }
            }
            break;
          case SUBMODE_MIXED:
            if (isMixed(ch)) {
              tmp.append((char) MIXED[ch]);
            } else {
              if (isAlphaUpper(ch)) {
                submode = SUBMODE_ALPHA;
                tmp.append((char) 28); //al
                continue;
              } else if (isAlphaLower(ch)) {
                submode = SUBMODE_LOWER;
                tmp.append((char) 27); //ll
                continue;
              } else {
                if (startpos + idx + 1 < count &&
                    !input.isECI(startpos + idx + 1) &&
                    isPunctuation(input.charAt(startpos + idx + 1))) {
                  submode = SUBMODE_PUNCTUATION;
                  tmp.append((char) 25); //pl
                  continue;
                }
                tmp.append((char) 29); //ps
                tmp.append((char) PUNCTUATION[ch]);
              }
            }
            break;
          default: //SUBMODE_PUNCTUATION
            if (isPunctuation(ch)) {
              tmp.append((char) PUNCTUATION[ch]);
            } else {
              submode = SUBMODE_ALPHA;
              tmp.append((char) 29); //al
              continue;
            }
        }
        idx++;
        if (idx >= count) {
          break;
        }
      }
    }
    char h = 0;
    int len = tmp.length();
    for (int i = 0; i < len; i++) {
      boolean odd = (i % 2) != 0;
      if (odd) {
        h = (char) ((h * 30) + tmp.charAt(i));
        sb.append(h);
      } else {
        h = tmp.charAt(i);
      }
    }
    if ((len % 2) != 0) {
      sb.append((char) ((h * 30) + 29)); //ps
    }
    return submode;
  }

  /**
   * Encode all of the message using Byte Compaction as described in ISO/IEC 15438:2001(E)
   *
   * @param input     the input
   * @param startpos  the start position within the message
   * @param count     the number of bytes to encode
   * @param startmode the mode from which this method starts
   * @param sb        receives the encoded codewords
   */
  private static void encodeMultiECIBinary(ECIInput input,
                                          int startpos,
                                          int count,
                                          int startmode,
                                          StringBuilder sb) throws WriterException {
    final int end = Math.min(startpos + count, input.length());
    int localStart = startpos;
    while (true) {
      //encode all leading ECIs and advance localStart
      while (localStart < end && input.isECI(localStart)) {
        encodingECI(input.getECIValue(localStart), sb);
        localStart++;
      }
      int localEnd = localStart;
      //advance end until before the next ECI
      while (localEnd < end && !input.isECI(localEnd)) {
        localEnd++;
      }

      final int localCount = localEnd - localStart;
      if (localCount <= 0) {
        //done
        break;
      } else {
        //encode the segment
        encodeBinary(subBytes(input, localStart, localEnd),
            0, localCount, localStart == startpos ? startmode : BYTE_COMPACTION, sb);
        localStart = localEnd;
      }
    }
  }

  static byte[] subBytes(ECIInput input, int start, int end) {
    final int count = end - start;
    byte[] result = new byte[count];
    for (int i = start; i < end; i++) {
      result[i - start] = (byte) (input.charAt(i) & 0xff);
    }
    return result;
  }

  /**
   * Encode parts of the message using Byte Compaction as described in ISO/IEC 15438:2001(E),
   * chapter 4.4.3. The Unicode characters will be converted to binary using the cp437
   * codepage.
   *
   * @param bytes     the message converted to a byte array
   * @param startpos  the start position within the message
   * @param count     the number of bytes to encode
   * @param startmode the mode from which this method starts
   * @param sb        receives the encoded codewords
   */
  private static void encodeBinary(byte[] bytes,
                                   int startpos,
                                   int count,
                                   int startmode,
                                   StringBuilder sb) {
    if (count == 1 && startmode == TEXT_COMPACTION) {
      sb.append((char) SHIFT_TO_BYTE);
    } else {
      if ((count % 6) == 0) {
        sb.append((char) LATCH_TO_BYTE);
      } else {
        sb.append((char) LATCH_TO_BYTE_PADDED);
      }
    }

    int idx = startpos;
    // Encode sixpacks
    if (count >= 6) {
      char[] chars = new char[5];
      while ((startpos + count - idx) >= 6) {
        long t = 0;
        for (int i = 0; i < 6; i++) {
          t <<= 8;
          t += bytes[idx + i] & 0xff;
        }
        for (int i = 0; i < 5; i++) {
          chars[i] = (char) (t % 900);
          t /= 900;
        }
        for (int i = chars.length - 1; i >= 0; i--) {
          sb.append(chars[i]);
        }
        idx += 6;
      }
    }
    //Encode rest (remaining n<5 bytes if any)
    for (int i = idx; i < startpos + count; i++) {
      int ch = bytes[i] & 0xff;
      sb.append((char) ch);
    }
  }

  private static void encodeNumeric(ECIInput input, int startpos, int count, StringBuilder sb) {
    int idx = 0;
    StringBuilder tmp = new StringBuilder(count / 3 + 1);
    BigInteger num900 = BigInteger.valueOf(900);
    BigInteger num0 = BigInteger.valueOf(0);
    while (idx < count) {
      tmp.setLength(0);
      int len = Math.min(44, count - idx);
      String part = "1" + input.subSequence(startpos + idx, startpos + idx + len);
      BigInteger bigint = new BigInteger(part);
      do {
        tmp.append((char) bigint.mod(num900).intValue());
        bigint = bigint.divide(num900);
      } while (!bigint.equals(num0));

      //Reverse temporary string
      for (int i = tmp.length() - 1; i >= 0; i--) {
        sb.append(tmp.charAt(i));
      }
      idx += len;
    }
  }


  private static boolean isDigit(char ch) {
    return ch >= '0' && ch <= '9';
  }

  private static boolean isAlphaUpper(char ch) {
    return ch == ' ' || (ch >= 'A' && ch <= 'Z');
  }

  private static boolean isAlphaLower(char ch) {
    return ch == ' ' || (ch >= 'a' && ch <= 'z');
  }

  private static boolean isMixed(char ch) {
    return MIXED[ch] != -1;
  }

  private static boolean isPunctuation(char ch) {
    return PUNCTUATION[ch] != -1;
  }

  private static boolean isText(char ch) {
    return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
  }

  /**
   * Determines the number of consecutive characters that are encodable using numeric compaction.
   *
   * @param input      the input
   * @param startpos the start position within the input
   * @return the requested character count
   */
  private static int determineConsecutiveDigitCount(ECIInput input, int startpos) {
    int count = 0;
    final int len = input.length();
    int idx = startpos;
    if (idx < len) {
      while (idx < len && !input.isECI(idx) && isDigit(input.charAt(idx))) {
        count++;
        idx++;
      }
    }
    return count;
  }

  /**
   * Determines the number of consecutive characters that are encodable using text compaction.
   *
   * @param input      the input
   * @param startpos the start position within the input
   * @return the requested character count
   */
  private static int determineConsecutiveTextCount(ECIInput input, int startpos) {
    final int len = input.length();
    int idx = startpos;
    while (idx < len) {
      int numericCount = 0;
      while (numericCount < 13 && idx < len && !input.isECI(idx) && isDigit(input.charAt(idx))) {
        numericCount++;
        idx++;
      }
      if (numericCount >= 13) {
        return idx - startpos - numericCount;
      }
      if (numericCount > 0) {
        //Heuristic: All text-encodable chars or digits are binary encodable
        continue;
      }

      //Check if character is encodable
      if (input.isECI(idx) || !isText(input.charAt(idx))) {
        break;
      }
      idx++;
    }
    return idx - startpos;
  }

  /**
   * Determines the number of consecutive characters that are encodable using binary compaction.
   *
   * @param input    the input
   * @param startpos the start position within the message
   * @param encoding the charset used to convert the message to a byte array
   * @return the requested character count
   */
  private static int determineConsecutiveBinaryCount(ECIInput input, int startpos, Charset encoding)
      throws WriterException {
    CharsetEncoder encoder = encoding == null ? null : encoding.newEncoder();
    int len = input.length();
    int idx = startpos;
    while (idx < len) {
      int numericCount = 0;

      int i = idx;
      while (numericCount < 13 && !input.isECI(i) && isDigit(input.charAt(i))) {
        numericCount++;
        //textCount++;
        i = idx + numericCount;
        if (i >= len) {
          break;
        }
      }
      if (numericCount >= 13) {
        return idx - startpos;
      }

      if (encoder != null && !encoder.canEncode(input.charAt(idx))) {
        assert input instanceof NoECIInput;
        char ch = input.charAt(idx);
        throw new WriterException("Non-encodable character detected: " + ch + " (Unicode: " + (int) ch + ')');
      }
      idx++;
    }
    return idx - startpos;
  }

  private static void encodingECI(int eci, StringBuilder sb) throws WriterException {
    if (eci >= 0 && eci < 900) {
      sb.append((char) ECI_CHARSET);
      sb.append((char) eci);
    } else if (eci < 810900) {
      sb.append((char) ECI_GENERAL_PURPOSE);
      sb.append((char) (eci / 900 - 1));
      sb.append((char) (eci % 900));
    } else if (eci < 811800) {
      sb.append((char) ECI_USER_DEFINED);
      sb.append((char) (810900 - eci));
    } else {
      throw new WriterException("ECI number not in valid range from 0..811799, but was " + eci);
    }
  }

  private static final class NoECIInput implements ECIInput {

    String input;

    private NoECIInput(String input) {
      this.input = input;
    }

    public int length() {
      return input.length();
    }

    public char charAt(int index) {
      return input.charAt(index);
    }

    public boolean isECI(int index) {
      return false;
    }

    public int getECIValue(int index) {
      return -1;
    }

    public boolean haveNCharacters(int index, int n) {
      return index + n <= input.length();
    }

    public CharSequence subSequence(int start, int end) {
      return input.subSequence(start, end);
    }

    public String toString() {
      return input;
    }
  }
}