All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.zxing.common.MinimalECIInput Maven / Gradle / Ivy

There is a newer version: 3.5.3_1
Show newest version
/*
 * Copyright 2021 ZXing authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.zxing.common;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

/**
 * Class that converts a character string into a sequence of ECIs and bytes
 *
 * The implementation uses the Dijkstra algorithm to produce minimal encodings
 *
 * @author Alex Geller
 */
public class MinimalECIInput implements ECIInput {

  private static final int COST_PER_ECI = 3; // approximated (latch + 2 codewords)
  private final int[] bytes;
  private final int fnc1;

 /**
  * Constructs a minimal input
  *
  * @param stringToEncode the character string to encode
  * @param priorityCharset The preferred {@link Charset}. When the value of the argument is null, the algorithm
  *   chooses charsets that leads to a minimal representation. Otherwise the algorithm will use the priority
  *   charset to encode any character in the input that can be encoded by it if the charset is among the
  *   supported charsets.
  * @param fnc1 denotes the character in the input that represents the FNC1 character or -1 if this is not GS1
  *   input.
  */
  public MinimalECIInput(String stringToEncode, Charset priorityCharset, int fnc1) {
    this.fnc1 = fnc1;
    ECIEncoderSet encoderSet = new ECIEncoderSet(stringToEncode, priorityCharset, fnc1);
    if (encoderSet.length() == 1) { //optimization for the case when all can be encoded without ECI in ISO-8859-1
      bytes = new int[stringToEncode.length()];
      for (int i = 0; i < bytes.length; i++) {
        char c = stringToEncode.charAt(i);
        bytes[i] = c == fnc1 ? 1000 : (int) c;
      }
    } else {
      bytes = encodeMinimally(stringToEncode, encoderSet, fnc1);
    }
  }

  public int getFNC1Character() {
    return fnc1;
  }

 /**
  * Returns the length of this input.  The length is the number
  * of {@code byte}s, FNC1 characters or ECIs in the sequence.
  *
  * @return  the number of {@code char}s in this sequence
  */
  public int length() {
    return bytes.length;
  }

  public boolean haveNCharacters(int index, int n) {
    if (index + n - 1 >= bytes.length) {
      return false;
    }
    for (int i = 0; i < n; i++) {
      if (isECI(index + i)) {
        return false;
      }
    }
    return true;
  }

 /**
  * Returns the {@code byte} value at the specified index.  An index ranges from zero
  * to {@code length() - 1}.  The first {@code byte} value of the sequence is at
  * index zero, the next at index one, and so on, as for array
  * indexing.
  *
  * @param   index the index of the {@code byte} value to be returned
  *
  * @return  the specified {@code byte} value as character or the FNC1 character
  *
  * @throws  IndexOutOfBoundsException
  *          if the {@code index} argument is negative or not less than
  *          {@code length()}
  * @throws  IllegalArgumentException
  *          if the value at the {@code index} argument is an ECI (@see #isECI)
  */
  public char charAt(int index) {
    if (index < 0 || index >= length()) {
      throw new IndexOutOfBoundsException("" + index);
    }
    if (isECI(index)) {
      throw new IllegalArgumentException("value at " + index + " is not a character but an ECI");
    }
    return isFNC1(index) ? (char) fnc1 : (char) bytes[index];
  }

 /**
  * Returns a {@code CharSequence} that is a subsequence of this sequence.
  * The subsequence starts with the {@code char} value at the specified index and
  * ends with the {@code char} value at index {@code end - 1}.  The length
  * (in {@code char}s) of the
  * returned sequence is {@code end - start}, so if {@code start == end}
  * then an empty sequence is returned.
  *
  * @param   start   the start index, inclusive
  * @param   end     the end index, exclusive
  *
  * @return  the specified subsequence
  *
  * @throws  IndexOutOfBoundsException
  *          if {@code start} or {@code end} are negative,
  *          if {@code end} is greater than {@code length()},
  *          or if {@code start} is greater than {@code end}
  * @throws  IllegalArgumentException
  *          if a value in the range {@code start}-{@code end} is an ECI (@see #isECI)
  */
  public CharSequence subSequence(int start, int end) {
    if (start < 0 || start > end || end > length()) {
      throw new IndexOutOfBoundsException("" + start);
    }
    StringBuilder result = new StringBuilder();
    for (int i = start; i < end; i++) {
      if (isECI(i)) {
        throw new IllegalArgumentException("value at " + i + " is not a character but an ECI");
      }
      result.append(charAt(i));
    }
    return result;
  }

 /**
  * Determines if a value is an ECI
  *
  * @param   index the index of the value
  *
  * @return  true if the value at position {@code index} is an ECI
  *
  * @throws  IndexOutOfBoundsException
  *          if the {@code index} argument is negative or not less than
  *          {@code length()}
  */
  public boolean isECI(int index) {
    if (index < 0 || index >= length()) {
      throw new IndexOutOfBoundsException("" + index);
    }
    return bytes[index] > 255 && bytes[index] <= 999;
  }

 /**
  * Determines if a value is the FNC1 character
  *
  * @param   index the index of the value
  *
  * @return  true if the value at position {@code index} is the FNC1 character
  *
  * @throws  IndexOutOfBoundsException
  *          if the {@code index} argument is negative or not less than
  *          {@code length()}
  */
  public boolean isFNC1(int index) {
    if (index < 0 || index >= length()) {
      throw new IndexOutOfBoundsException("" + index);
    }
    return bytes[index] == 1000;
  }

 /**
  * Returns the {@code int} ECI value at the specified index.  An index ranges from zero
  * to {@code length() - 1}.  The first {@code byte} value of the sequence is at
  * index zero, the next at index one, and so on, as for array
  * indexing.
  *
  * @param   index the index of the {@code int} value to be returned
  *
  * @return  the specified {@code int} ECI value. 
  *          The ECI specified the encoding of all bytes with a higher index until the
  *          next ECI or until the end of the input if no other ECI follows.
  *
  * @throws  IndexOutOfBoundsException
  *          if the {@code index} argument is negative or not less than
  *          {@code length()}
  * @throws  IllegalArgumentException
  *          if the value at the {@code index} argument is not an ECI (@see #isECI)
  */
  public int getECIValue(int index) {
    if (index < 0 || index >= length()) {
      throw new IndexOutOfBoundsException("" + index);
    }
    if (!isECI(index)) {
      throw new IllegalArgumentException("value at " + index + " is not an ECI but a character");
    }
    return bytes[index] - 256;
  }

  @Override
  public String toString() {
    StringBuilder result = new StringBuilder();
    for (int i = 0; i < length(); i++) {
      if (i > 0) {
        result.append(", ");
      }
      if (isECI(i)) {
        result.append("ECI(");
        result.append(getECIValue(i));
        result.append(')');
      } else if (charAt(i) < 128) {
        result.append('\'');
        result.append(charAt(i));
        result.append('\'');
      } else {
        result.append((int) charAt(i));
      }
    }
    return result.toString();
  }
  static void addEdge(InputEdge[][] edges, int to, InputEdge edge) {
    if (edges[to][edge.encoderIndex] == null ||
        edges[to][edge.encoderIndex].cachedTotalSize > edge.cachedTotalSize) {
      edges[to][edge.encoderIndex] = edge;
    }
  }

  static void addEdges(String stringToEncode, 
                       ECIEncoderSet encoderSet, 
                       InputEdge[][] edges, 
                       int from, 
                       InputEdge previous, 
                       int fnc1) {

    char ch = stringToEncode.charAt(from);

    int start = 0;
    int end = encoderSet.length();
    if (encoderSet.getPriorityEncoderIndex() >= 0 && (ch == fnc1 || encoderSet.canEncode(ch,
        encoderSet.getPriorityEncoderIndex()))) {
      start = encoderSet.getPriorityEncoderIndex();
      end = start + 1;
    }

    for (int i = start; i < end; i++) {
      if (ch == fnc1 || encoderSet.canEncode(ch,i)) {
        addEdge(edges, from + 1, new InputEdge(ch, encoderSet, i, previous, fnc1));
      }
    }
  }

  static int[] encodeMinimally(String stringToEncode, ECIEncoderSet encoderSet, int fnc1) {
    int inputLength = stringToEncode.length();

    // Array that represents vertices. There is a vertex for every character and encoding.
    InputEdge[][] edges = new InputEdge[inputLength + 1][encoderSet.length()];
    addEdges(stringToEncode, encoderSet, edges, 0, null, fnc1);

    for (int i = 1; i <= inputLength; i++) {
      for (int j = 0; j < encoderSet.length(); j++) {
        if (edges[i][j] != null && i < inputLength) {
          addEdges(stringToEncode, encoderSet, edges, i, edges[i][j], fnc1);
        }
      }
      //optimize memory by removing edges that have been passed.
      for (int j = 0; j < encoderSet.length(); j++) {
        edges[i - 1][j] = null;
      }
    }
    int minimalJ = -1;
    int minimalSize = Integer.MAX_VALUE;
    for (int j = 0; j < encoderSet.length(); j++) {
      if (edges[inputLength][j] != null) {
        InputEdge edge = edges[inputLength][j];
        if (edge.cachedTotalSize < minimalSize) {
          minimalSize = edge.cachedTotalSize;
          minimalJ = j;
        }
      }
    }
    if (minimalJ < 0) {
      throw new RuntimeException("Internal error: failed to encode \"" + stringToEncode + "\"");
    }
    List intsAL = new ArrayList<>();
    InputEdge current = edges[inputLength][minimalJ];
    while (current != null) {
      if (current.isFNC1()) {
        intsAL.add(0, 1000);
      } else {
        byte[] bytes = encoderSet.encode(current.c,current.encoderIndex);
        for (int i = bytes.length - 1; i >= 0; i--) {
          intsAL.add(0, (bytes[i] & 0xFF));
        }
      }
      int previousEncoderIndex = current.previous == null ? 0 : current.previous.encoderIndex;
      if (previousEncoderIndex != current.encoderIndex) {
        intsAL.add(0,256 + encoderSet.getECIValue(current.encoderIndex));
      }
      current = current.previous;
    }
    int[] ints = new int[intsAL.size()];
    for (int i = 0; i < ints.length; i++) {
      ints[i] = intsAL.get(i);
    }
    return ints;
  }

  private static final class InputEdge {
    private final char c;
    private final int encoderIndex; //the encoding of this edge
    private final InputEdge previous;
    private final int cachedTotalSize;

    private InputEdge(char c, ECIEncoderSet encoderSet, int encoderIndex, InputEdge previous, int fnc1) {
      this.c = c == fnc1 ? 1000 : c;
      this.encoderIndex = encoderIndex;
      this.previous = previous;

      int size = this.c == 1000 ? 1 : encoderSet.encode(c, encoderIndex).length;
      int previousEncoderIndex = previous == null ? 0 : previous.encoderIndex;
      if (previousEncoderIndex != encoderIndex) {
        size += COST_PER_ECI;
      }
      if (previous != null) {
        size += previous.cachedTotalSize;
      }
      this.cachedTotalSize = size;
    }

    boolean isFNC1() {
      return c == 1000;
    }

  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy