com.google.zxing.datamatrix.encoder.MinimalEncoder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of core Show documentation
Show all versions of core Show documentation
Core barcode encoding/decoding library
The newest version!
/*
* Copyright 2021 ZXing authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.zxing.datamatrix.encoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import com.google.zxing.common.MinimalECIInput;
/**
* Encoder that encodes minimally
*
* Algorithm:
*
* Uses Dijkstra to produce mathematically minimal encodings that are in some cases smaller than the results produced
* by the algorithm described in annex S in the specification ISO/IEC 16022:200(E). The biggest improvment of this
* algorithm over that one is the case when the algorithm enters the most inefficient mode, the B256 mode. The
* algorithm from the specification algorithm will exit this mode only if it encounters digits so that arbitrarily
* inefficient results can be produced if the postfix contains no digits.
*
* Multi ECI support and ECI switching:
*
* For multi language content the algorithm selects the most compact representation using ECI modes. Note that unlike
* the compaction algorithm used for QR-Codes, this implementation operates in two stages and therfore is not
* mathematically optimal. In the first stage, the input string is encoded minimally as a stream of ECI character set
* selectors and bytes encoded in the selected encoding. In this stage the algorithm might for example decide to
* encode ocurrences of the characters "\u0150\u015C" (O-double-acute, S-circumflex) in UTF-8 by a single ECI or
* alternatively by multiple ECIs that switch between IS0-8859-2 and ISO-8859-3 (e.g. in the case that the input
* contains many * characters from ISO-8859-2 (Latin 2) and few from ISO-8859-3 (Latin 3)).
* In a second stage this stream of ECIs and bytes is minimally encoded using the various Data Matrix encoding modes.
* While both stages encode mathematically minimally it is not ensured that the result is mathematically minimal since
* the size growth for inserting an ECI in the first stage can only be approximated as the first stage does not know
* in which mode the ECI will occur in the second stage (may, or may not require an extra latch to ASCII depending on
* the current mode). The reason for this shortcoming are difficulties in implementing it in a straightforward and
* readable manner.
*
* GS1 support
*
* FNC1 delimiters can be encoded in the input string by using the FNC1 character specified in the encoding function.
* When a FNC1 character is specified then a leading FNC1 will be encoded and all ocurrences of delimiter characters
* while result in FNC1 codewords in the symbol.
*
* @author Alex Geller
*/
public final class MinimalEncoder {
enum Mode {
ASCII,
C40,
TEXT,
X12,
EDF,
B256
}
static final char[] C40_SHIFT2_CHARS = {'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/',
':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_' };
private MinimalEncoder() {
}
static boolean isExtendedASCII(char ch, int fnc1) {
return ch != fnc1 && ch >= 128 && ch <= 255;
}
private static boolean isInC40Shift1Set(char ch) {
return ch <= 31;
}
private static boolean isInC40Shift2Set(char ch, int fnc1) {
for (char c40Shift2Char : C40_SHIFT2_CHARS) {
if (c40Shift2Char == ch) {
return true;
}
}
return ch == fnc1;
}
private static boolean isInTextShift1Set(char ch) {
return isInC40Shift1Set(ch);
}
private static boolean isInTextShift2Set(char ch, int fnc1) {
return isInC40Shift2Set(ch, fnc1);
}
/**
* Performs message encoding of a DataMatrix message
*
* @param msg the message
* @return the encoded message (the char values range from 0 to 255)
*/
public static String encodeHighLevel(String msg) {
return encodeHighLevel(msg, null, -1, SymbolShapeHint.FORCE_NONE);
}
/**
* Performs message encoding of a DataMatrix message
*
* @param msg the message
* @param priorityCharset The preferred {@link Charset}. When the value of the argument is null, the algorithm
* chooses charsets that leads to a minimal representation. Otherwise the algorithm will use the priority
* charset to encode any character in the input that can be encoded by it if the charset is among the
* supported charsets.
* @param fnc1 denotes the character in the input that represents the FNC1 character or -1 if this is not a GS1
* bar code. If the value is not -1 then a FNC1 is also prepended.
* @param shape requested shape.
* @return the encoded message (the char values range from 0 to 255)
*/
public static String encodeHighLevel(String msg, Charset priorityCharset, int fnc1, SymbolShapeHint shape) {
int macroId = 0;
if (msg.startsWith(HighLevelEncoder.MACRO_05_HEADER) && msg.endsWith(HighLevelEncoder.MACRO_TRAILER)) {
macroId = 5;
msg = msg.substring(HighLevelEncoder.MACRO_05_HEADER.length(), msg.length() - 2);
} else if (msg.startsWith(HighLevelEncoder.MACRO_06_HEADER) && msg.endsWith(HighLevelEncoder.MACRO_TRAILER)) {
macroId = 6;
msg = msg.substring(HighLevelEncoder.MACRO_06_HEADER.length(), msg.length() - 2);
}
return new String(encode(msg, priorityCharset, fnc1, shape, macroId), StandardCharsets.ISO_8859_1);
}
/**
* Encodes input minimally and returns an array of the codewords
*
* @param input The string to encode
* @param priorityCharset The preferred {@link Charset}. When the value of the argument is null, the algorithm
* chooses charsets that leads to a minimal representation. Otherwise the algorithm will use the priority
* charset to encode any character in the input that can be encoded by it if the charset is among the
* supported charsets.
* @param fnc1 denotes the character in the input that represents the FNC1 character or -1 if this is not a GS1
* bar code. If the value is not -1 then a FNC1 is also prepended.
* @param shape requested shape.
* @param macroId Prepends the specified macro function in case that a value of 5 or 6 is specified.
* @return An array of bytes representing the codewords of a minimal encoding.
*/
static byte[] encode(String input, Charset priorityCharset, int fnc1, SymbolShapeHint shape, int macroId) {
return encodeMinimally(new Input(input, priorityCharset, fnc1, shape, macroId)).getBytes();
}
static void addEdge(Edge[][] edges, Edge edge) {
int vertexIndex = edge.fromPosition + edge.characterLength;
if (edges[vertexIndex][edge.getEndMode().ordinal()] == null ||
edges[vertexIndex][edge.getEndMode().ordinal()].cachedTotalSize > edge.cachedTotalSize) {
edges[vertexIndex][edge.getEndMode().ordinal()] = edge;
}
}
/** @return the number of words in which the string starting at from can be encoded in c40 or text mode.
* The number of characters encoded is returned in characterLength.
* The number of characters encoded is also minimal in the sense that the algorithm stops as soon
* as a character encoding fills a C40 word competely (three C40 values). An exception is at the
* end of the string where two C40 values are allowed (according to the spec the third c40 value
* is filled with 0 (Shift 1) in this case).
*/
static int getNumberOfC40Words(Input input, int from, boolean c40,int[] characterLength) {
int thirdsCount = 0;
for (int i = from; i < input.length(); i++) {
if (input.isECI(i)) {
characterLength[0] = 0;
return 0;
}
char ci = input.charAt(i);
if (c40 && HighLevelEncoder.isNativeC40(ci) || !c40 && HighLevelEncoder.isNativeText(ci)) {
thirdsCount++; //native
} else if (!isExtendedASCII(ci, input.getFNC1Character())) {
thirdsCount += 2; //shift
} else {
int asciiValue = ci & 0xff;
if (asciiValue >= 128 && (c40 && HighLevelEncoder.isNativeC40((char) (asciiValue - 128)) ||
!c40 && HighLevelEncoder.isNativeText((char) (asciiValue - 128)))) {
thirdsCount += 3; // shift, Upper shift
} else {
thirdsCount += 4; // shift, Upper shift, shift
}
}
if (thirdsCount % 3 == 0 || ((thirdsCount - 2) % 3 == 0 && i + 1 == input.length())) {
characterLength[0] = i - from + 1;
return (int) Math.ceil(((double) thirdsCount) / 3.0);
}
}
characterLength[0] = 0;
return 0;
}
static void addEdges(Input input, Edge[][] edges, int from, Edge previous) {
if (input.isECI(from)) {
addEdge(edges, new Edge(input, Mode.ASCII, from, 1, previous));
return;
}
char ch = input.charAt(from);
if (previous == null || previous.getEndMode() != Mode.EDF) { //not possible to unlatch a full EDF edge to something
//else
if (HighLevelEncoder.isDigit(ch) && input.haveNCharacters(from, 2) &&
HighLevelEncoder.isDigit(input.charAt(from + 1))) {
// two digits ASCII encoded
addEdge(edges, new Edge(input, Mode.ASCII, from, 2, previous));
} else {
// one ASCII encoded character or an extended character via Upper Shift
addEdge(edges, new Edge(input, Mode.ASCII, from, 1, previous));
}
Mode[] modes = {Mode.C40, Mode.TEXT};
for (Mode mode : modes) {
int[] characterLength = new int[1];
if (getNumberOfC40Words(input, from, mode == Mode.C40, characterLength) > 0) {
addEdge(edges, new Edge(input, mode, from, characterLength[0], previous));
}
}
if (input.haveNCharacters(from,3) &&
HighLevelEncoder.isNativeX12(input.charAt(from)) &&
HighLevelEncoder.isNativeX12(input.charAt(from + 1)) &&
HighLevelEncoder.isNativeX12(input.charAt(from + 2))) {
addEdge(edges, new Edge(input, Mode.X12, from, 3, previous));
}
addEdge(edges, new Edge(input, Mode.B256, from, 1, previous));
}
//We create 4 EDF edges, with 1, 2 3 or 4 characters length. The fourth normally doesn't have a latch to ASCII
//unless it is 2 characters away from the end of the input.
int i;
for (i = 0; i < 3; i++) {
int pos = from + i;
if (input.haveNCharacters(pos,1) && HighLevelEncoder.isNativeEDIFACT(input.charAt(pos))) {
addEdge(edges, new Edge(input, Mode.EDF, from, i + 1, previous));
} else {
break;
}
}
if (i == 3 && input.haveNCharacters(from, 4) && HighLevelEncoder.isNativeEDIFACT(input.charAt(from + 3))) {
addEdge(edges, new Edge(input, Mode.EDF, from, 4, previous));
}
}
static Result encodeMinimally(Input input) {
@SuppressWarnings("checkstyle:lineLength")
/* The minimal encoding is computed by Dijkstra. The acyclic graph is modeled as follows:
* A vertex represents a combination of a position in the input and an encoding mode where position 0
* denotes the position left of the first character, 1 the position left of the second character and so on.
* Likewise the end vertices are located after the last character at position input.length().
* For any position there might be up to six vertices, one for each of the encoding types ASCII, C40, TEXT, X12,
* EDF and B256.
*
* As an example consider the input string "ABC123" then at position 0 there is only one vertex with the default
* ASCII encodation. At position 3 there might be vertices for the types ASCII, C40, X12, EDF and B256.
*
* An edge leading to such a vertex encodes one or more of the characters left of the position that the vertex
* represents. It encodes the characters in the encoding mode of the vertex that it ends on. In other words,
* all edges leading to a particular vertex encode the same characters (the length of the suffix can vary) using the same
* encoding mode.
* As an example consider the input string "ABC123" and the vertex (4,EDF). Possible edges leading to this vertex
* are:
* (0,ASCII) --EDF(ABC1)--> (4,EDF)
* (1,ASCII) --EDF(BC1)--> (4,EDF)
* (1,B256) --EDF(BC1)--> (4,EDF)
* (1,EDF) --EDF(BC1)--> (4,EDF)
* (2,ASCII) --EDF(C1)--> (4,EDF)
* (2,B256) --EDF(C1)--> (4,EDF)
* (2,EDF) --EDF(C1)--> (4,EDF)
* (3,ASCII) --EDF(1)--> (4,EDF)
* (3,B256) --EDF(1)--> (4,EDF)
* (3,EDF) --EDF(1)--> (4,EDF)
* (3,C40) --EDF(1)--> (4,EDF)
* (3,X12) --EDF(1)--> (4,EDF)
*
* The edges leading to a vertex are stored in such a way that there is a fast way to enumerate the edges ending
* on a particular vertex.
*
* The algorithm processes the vertices in order of their position thereby performing the following:
*
* For every vertex at position i the algorithm enumerates the edges ending on the vertex and removes all but the
* shortest from that list.
* Then it processes the vertices for the position i+1. If i+1 == input.length() then the algorithm ends
* and chooses the the edge with the smallest size from any of the edges leading to vertices at this position.
* Otherwise the algorithm computes all possible outgoing edges for the vertices at the position i+1
*
* Examples:
* The process is illustrated by showing the graph (edges) after each iteration from left to right over the input:
* An edge is drawn as follows "(" + fromVertex + ") -- " + encodingMode + "(" + encodedInput + ") (" +
* accumulatedSize + ") --> (" + toVertex + ")"
*
* Example 1 encoding the string "ABCDEFG":
*
*
* Situation after adding edges to the start vertex (0,ASCII)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII)
* (0,ASCII) B256(A) (3) --> (1,B256)
* (0,ASCII) EDF(AB) (4) --> (2,EDF)
* (0,ASCII) C40(ABC) (3) --> (3,C40)
* (0,ASCII) TEXT(ABC) (5) --> (3,TEXT)
* (0,ASCII) X12(ABC) (3) --> (3,X12)
* (0,ASCII) EDF(ABC) (4) --> (3,EDF)
* (0,ASCII) EDF(ABCD) (4) --> (4,EDF)
*
* Situation after adding edges to vertices at position 1
* (0,ASCII) ASCII(A) (1) --> (1,ASCII)
* (0,ASCII) B256(A) (3) --> (1,B256)
* (0,ASCII) EDF(AB) (4) --> (2,EDF)
* (0,ASCII) C40(ABC) (3) --> (3,C40)
* (0,ASCII) TEXT(ABC) (5) --> (3,TEXT)
* (0,ASCII) X12(ABC) (3) --> (3,X12)
* (0,ASCII) EDF(ABC) (4) --> (3,EDF)
* (0,ASCII) EDF(ABCD) (4) --> (4,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) B256(B) (4) --> (2,B256)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) EDF(BC) (5) --> (3,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) C40(BCD) (4) --> (4,C40)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) TEXT(BCD) (6) --> (4,TEXT)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) X12(BCD) (4) --> (4,X12)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) EDF(BCD) (5) --> (4,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) EDF(BCDE) (5) --> (5,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) ASCII(B) (4) --> (2,ASCII)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256)
* (0,ASCII) B256(A) (3) --> (1,B256) EDF(BC) (6) --> (3,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) C40(BCD) (5) --> (4,C40)
* (0,ASCII) B256(A) (3) --> (1,B256) TEXT(BCD) (7) --> (4,TEXT)
* (0,ASCII) B256(A) (3) --> (1,B256) X12(BCD) (5) --> (4,X12)
* (0,ASCII) B256(A) (3) --> (1,B256) EDF(BCD) (6) --> (4,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) EDF(BCDE) (6) --> (5,EDF)
*
* Edge "(1,ASCII) ASCII(B) (2) --> (2,ASCII)" is minimal for the vertex (2,ASCII) so that edge "(1,B256) ASCII(B) (4) --> (2,ASCII)" is removed.
* Edge "(1,B256) B256(B) (3) --> (2,B256)" is minimal for the vertext (2,B256) so that the edge "(1,ASCII) B256(B) (4) --> (2,B256)" is removed.
*
* Situation after adding edges to vertices at position 2
* (0,ASCII) ASCII(A) (1) --> (1,ASCII)
* (0,ASCII) B256(A) (3) --> (1,B256)
* (0,ASCII) EDF(AB) (4) --> (2,EDF)
* (0,ASCII) C40(ABC) (3) --> (3,C40)
* (0,ASCII) TEXT(ABC) (5) --> (3,TEXT)
* (0,ASCII) X12(ABC) (3) --> (3,X12)
* (0,ASCII) EDF(ABC) (4) --> (3,EDF)
* (0,ASCII) EDF(ABCD) (4) --> (4,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) EDF(BC) (5) --> (3,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) C40(BCD) (4) --> (4,C40)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) TEXT(BCD) (6) --> (4,TEXT)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) X12(BCD) (4) --> (4,X12)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) EDF(BCD) (5) --> (4,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) EDF(BCDE) (5) --> (5,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256)
* (0,ASCII) B256(A) (3) --> (1,B256) EDF(BC) (6) --> (3,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) C40(BCD) (5) --> (4,C40)
* (0,ASCII) B256(A) (3) --> (1,B256) TEXT(BCD) (7) --> (4,TEXT)
* (0,ASCII) B256(A) (3) --> (1,B256) X12(BCD) (5) --> (4,X12)
* (0,ASCII) B256(A) (3) --> (1,B256) EDF(BCD) (6) --> (4,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) EDF(BCDE) (6) --> (5,EDF)
* (0,ASCII) EDF(AB) (4) --> (2,EDF) ASCII(C) (5) --> (3,ASCII)
* (0,ASCII) EDF(AB) (4) --> (2,EDF) B256(C) (6) --> (3,B256)
* (0,ASCII) EDF(AB) (4) --> (2,EDF) EDF(CD) (7) --> (4,EDF)
* (0,ASCII) EDF(AB) (4) --> (2,EDF) C40(CDE) (6) --> (5,C40)
* (0,ASCII) EDF(AB) (4) --> (2,EDF) TEXT(CDE) (8) --> (5,TEXT)
* (0,ASCII) EDF(AB) (4) --> (2,EDF) X12(CDE) (6) --> (5,X12)
* (0,ASCII) EDF(AB) (4) --> (2,EDF) EDF(CDE) (7) --> (5,EDF)
* (0,ASCII) EDF(AB) (4) --> (2,EDF) EDF(CDEF) (7) --> (6,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) ASCII(C) (3) --> (3,ASCII)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) B256(C) (5) --> (3,B256)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) EDF(CD) (6) --> (4,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) C40(CDE) (5) --> (5,C40)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) TEXT(CDE) (7) --> (5,TEXT)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) X12(CDE) (5) --> (5,X12)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) EDF(CDE) (6) --> (5,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) EDF(CDEF) (6) --> (6,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) ASCII(C) (4) --> (3,ASCII)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) B256(C) (4) --> (3,B256)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) EDF(CD) (6) --> (4,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) C40(CDE) (5) --> (5,C40)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) TEXT(CDE) (7) --> (5,TEXT)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) X12(CDE) (5) --> (5,X12)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) EDF(CDE) (6) --> (5,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) EDF(CDEF) (6) --> (6,EDF)
*
* Edge "(2,ASCII) ASCII(C) (3) --> (3,ASCII)" is minimal for the vertex (3,ASCII) so that edges "(2,EDF) ASCII(C) (5) --> (3,ASCII)"
* and "(2,B256) ASCII(C) (4) --> (3,ASCII)" can be removed.
* Edge "(0,ASCII) EDF(ABC) (4) --> (3,EDF)" is minimal for the vertex (3,EDF) so that edges "(1,ASCII) EDF(BC) (5) --> (3,EDF)"
* and "(1,B256) EDF(BC) (6) --> (3,EDF)" can be removed.
* Edge "(2,B256) B256(C) (4) --> (3,B256)" is minimal for the vertex (3,B256) so that edges "(2,ASCII) B256(C) (5) --> (3,B256)"
* and "(2,EDF) B256(C) (6) --> (3,B256)" can be removed.
*
* This continues for vertices 3 thru 7
*
* Situation after adding edges to vertices at position 7
* (0,ASCII) ASCII(A) (1) --> (1,ASCII)
* (0,ASCII) B256(A) (3) --> (1,B256)
* (0,ASCII) EDF(AB) (4) --> (2,EDF)
* (0,ASCII) C40(ABC) (3) --> (3,C40)
* (0,ASCII) TEXT(ABC) (5) --> (3,TEXT)
* (0,ASCII) X12(ABC) (3) --> (3,X12)
* (0,ASCII) EDF(ABC) (4) --> (3,EDF)
* (0,ASCII) EDF(ABCD) (4) --> (4,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) C40(BCD) (4) --> (4,C40)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) TEXT(BCD) (6) --> (4,TEXT)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) X12(BCD) (4) --> (4,X12)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) EDF(BCDE) (5) --> (5,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256)
* (0,ASCII) C40(ABC) (3) --> (3,C40) C40(DEF) (5) --> (6,C40)
* (0,ASCII) X12(ABC) (3) --> (3,X12) X12(DEF) (5) --> (6,X12)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) ASCII(C) (3) --> (3,ASCII)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) C40(CDE) (5) --> (5,C40)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) TEXT(CDE) (7) --> (5,TEXT)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) X12(CDE) (5) --> (5,X12)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) EDF(CDEF) (6) --> (6,EDF)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) C40(BCD) (4) --> (4,C40) C40(EFG) (6) --> (7,C40) //Solution 1
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) X12(BCD) (4) --> (4,X12) X12(EFG) (6) --> (7,X12) //Solution 2
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) B256(C) (4) --> (3,B256)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) ASCII(C) (3) --> (3,ASCII) ASCII(D) (4) --> (4,ASCII)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) ASCII(C) (3) --> (3,ASCII) TEXT(DEF) (8) --> (6,TEXT)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) ASCII(C) (3) --> (3,ASCII) EDF(DEFG) (7) --> (7,EDF)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) B256(C) (4) --> (3,B256) B256(D) (5) --> (4,B256)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) ASCII(C) (3) --> (3,ASCII) ASCII(D) (4) --> (4,ASCII) ASCII(E) (5) --> (5,ASCII)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) ASCII(C) (3) --> (3,ASCII) ASCII(D) (4) --> (4,ASCII) TEXT(EFG) (9) --> (7,TEXT)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) B256(C) (4) --> (3,B256) B256(D) (5) --> (4,B256) B256(E) (6) --> (5,B256)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) ASCII(C) (3) --> (3,ASCII) ASCII(D) (4) --> (4,ASCII) ASCII(E) (5) --> (5,ASCII) ASCII(F) (6) --> (6,ASCII)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) B256(C) (4) --> (3,B256) B256(D) (5) --> (4,B256) B256(E) (6) --> (5,B256) B256(F) (7) --> (6,B256)
* (0,ASCII) ASCII(A) (1) --> (1,ASCII) ASCII(B) (2) --> (2,ASCII) ASCII(C) (3) --> (3,ASCII) ASCII(D) (4) --> (4,ASCII) ASCII(E) (5) --> (5,ASCII) ASCII(F) (6) --> (6,ASCII) ASCII(G) (7) --> (7,ASCII)
* (0,ASCII) B256(A) (3) --> (1,B256) B256(B) (3) --> (2,B256) B256(C) (4) --> (3,B256) B256(D) (5) --> (4,B256) B256(E) (6) --> (5,B256) B256(F) (7) --> (6,B256) B256(G) (8) --> (7,B256)
*
* Hence a minimal encoding of "ABCDEFG" is either ASCII(A),C40(BCDEFG) or ASCII(A), X12(BCDEFG) with a size of 5 bytes.
*/
int inputLength = input.length();
// Array that represents vertices. There is a vertex for every character and mode.
// The last dimension in the array below encodes the 6 modes ASCII, C40, TEXT, X12, EDF and B256
Edge[][] edges = new Edge[inputLength + 1][6];
addEdges(input, edges, 0, null);
for (int i = 1; i <= inputLength; i++) {
for (int j = 0; j < 6; j++) {
if (edges[i][j] != null && i < inputLength) {
addEdges(input, edges, i, edges[i][j]);
}
}
//optimize memory by removing edges that have been passed.
for (int j = 0; j < 6; j++) {
edges[i - 1][j] = null;
}
}
int minimalJ = -1;
int minimalSize = Integer.MAX_VALUE;
for (int j = 0; j < 6; j++) {
if (edges[inputLength][j] != null) {
Edge edge = edges[inputLength][j];
int size = j >= 1 && j <= 3 ? edge.cachedTotalSize + 1 : edge.cachedTotalSize; //C40, TEXT and X12 need an
// extra unlatch at the end
if (size < minimalSize) {
minimalSize = size;
minimalJ = j;
}
}
}
if (minimalJ < 0) {
throw new IllegalStateException("Failed to encode \"" + input + "\"");
}
return new Result(edges[inputLength][minimalJ]);
}
private static final class Edge {
private static final int[] allCodewordCapacities = {3, 5, 8, 10, 12, 16, 18, 22, 30, 32, 36, 44, 49, 62, 86, 114,
144, 174, 204, 280, 368, 456, 576, 696, 816, 1050, 1304, 1558};
private static final int[] squareCodewordCapacities = {3, 5, 8, 12, 18, 22, 30, 36, 44, 62, 86, 114, 144, 174, 204,
280, 368, 456, 576, 696, 816, 1050, 1304, 1558};
private static final int[] rectangularCodewordCapacities = {5, 10, 16, 33, 32, 49};
private final Input input;
private final Mode mode; //the mode at the start of this edge.
private final int fromPosition;
private final int characterLength;
private final Edge previous;
private final int cachedTotalSize;
private Edge(Input input, Mode mode, int fromPosition, int characterLength, Edge previous) {
this.input = input;
this.mode = mode;
this.fromPosition = fromPosition;
this.characterLength = characterLength;
this.previous = previous;
assert fromPosition + characterLength <= input.length();
int size = previous != null ? previous.cachedTotalSize : 0;
Mode previousMode = getPreviousMode();
/*
* Switching modes
* ASCII -> C40: latch 230
* ASCII -> TEXT: latch 239
* ASCII -> X12: latch 238
* ASCII -> EDF: latch 240
* ASCII -> B256: latch 231
* C40 -> ASCII: word(c1,c2,c3), 254
* TEXT -> ASCII: word(c1,c2,c3), 254
* X12 -> ASCII: word(c1,c2,c3), 254
* EDIFACT -> ASCII: Unlatch character,0,0,0 or c1,Unlatch character,0,0 or c1,c2,Unlatch character,0 or
* c1,c2,c3,Unlatch character
* B256 -> ASCII: without latch after n bytes
*/
switch (mode) {
case ASCII:
size++;
if (input.isECI(fromPosition) || isExtendedASCII(input.charAt(fromPosition), input.getFNC1Character())) {
size++;
}
if (previousMode == Mode.C40 ||
previousMode == Mode.TEXT ||
previousMode == Mode.X12) {
size++; // unlatch 254 to ASCII
}
break;
case B256:
size++;
if (previousMode != Mode.B256) {
size++; //byte count
} else if (getB256Size() == 250) {
size++; //extra byte count
}
if (previousMode == Mode.ASCII) {
size++; //latch to B256
} else if (previousMode == Mode.C40 ||
previousMode == Mode.TEXT ||
previousMode == Mode.X12) {
size += 2; //unlatch to ASCII, latch to B256
}
break;
case C40:
case TEXT:
case X12:
if (mode == Mode.X12) {
size += 2;
} else {
int[] charLen = new int[1];
size += getNumberOfC40Words(input, fromPosition, mode == Mode.C40, charLen) * 2;
}
if (previousMode == Mode.ASCII || previousMode == Mode.B256) {
size++; //additional byte for latch from ASCII to this mode
} else if (previousMode != mode && (previousMode == Mode.C40 ||
previousMode == Mode.TEXT ||
previousMode == Mode.X12)) {
size += 2; //unlatch 254 to ASCII followed by latch to this mode
}
break;
case EDF:
size += 3;
if (previousMode == Mode.ASCII || previousMode == Mode.B256) {
size++; //additional byte for latch from ASCII to this mode
} else if (previousMode == Mode.C40 ||
previousMode == Mode.TEXT ||
previousMode == Mode.X12) {
size += 2; //unlatch 254 to ASCII followed by latch to this mode
}
break;
}
cachedTotalSize = size;
}
// does not count beyond 250
int getB256Size() {
int cnt = 0;
Edge current = this;
while (current != null && current.mode == Mode.B256 && cnt <= 250) {
cnt++;
current = current.previous;
}
return cnt;
}
Mode getPreviousStartMode() {
return previous == null ? Mode.ASCII : previous.mode;
}
Mode getPreviousMode() {
return previous == null ? Mode.ASCII : previous.getEndMode();
}
/** Returns Mode.ASCII in case that:
* - Mode is EDIFACT and characterLength is less than 4 or the remaining characters can be encoded in at most 2
* ASCII bytes.
* - Mode is C40, TEXT or X12 and the remaining characters can be encoded in at most 1 ASCII byte.
* Returns mode in all other cases.
* */
Mode getEndMode() {
if (mode == Mode.EDF) {
if (characterLength < 4) {
return Mode.ASCII;
}
int lastASCII = getLastASCII(); // see 5.2.8.2 EDIFACT encodation Rules
if (lastASCII > 0 && getCodewordsRemaining(cachedTotalSize + lastASCII) <= 2 - lastASCII) {
return Mode.ASCII;
}
}
if (mode == Mode.C40 ||
mode == Mode.TEXT ||
mode == Mode.X12) {
// see 5.2.5.2 C40 encodation rules and 5.2.7.2 ANSI X12 encodation rules
if (fromPosition + characterLength >= input.length() && getCodewordsRemaining(cachedTotalSize) == 0) {
return Mode.ASCII;
}
int lastASCII = getLastASCII();
if (lastASCII == 1 && getCodewordsRemaining(cachedTotalSize + 1) == 0) {
return Mode.ASCII;
}
}
return mode;
}
Mode getMode() {
return mode;
}
/** Peeks ahead and returns 1 if the postfix consists of exactly two digits, 2 if the postfix consists of exactly
* two consecutive digits and a non extended character or of 4 digits.
* Returns 0 in any other case
**/
int getLastASCII() {
int length = input.length();
int from = fromPosition + characterLength;
if (length - from > 4 || from >= length) {
return 0;
}
if (length - from == 1) {
if (isExtendedASCII(input.charAt(from), input.getFNC1Character())) {
return 0;
}
return 1;
}
if (length - from == 2) {
if (isExtendedASCII(input.charAt(from), input.getFNC1Character()) || isExtendedASCII(input.charAt(from + 1),
input.getFNC1Character())) {
return 0;
}
if (HighLevelEncoder.isDigit(input.charAt(from)) && HighLevelEncoder.isDigit(input.charAt(from + 1))) {
return 1;
}
return 2;
}
if (length - from == 3) {
if (HighLevelEncoder.isDigit(input.charAt(from)) && HighLevelEncoder.isDigit(input.charAt(from + 1))
&& !isExtendedASCII(input.charAt(from + 2), input.getFNC1Character())) {
return 2;
}
if (HighLevelEncoder.isDigit(input.charAt(from + 1)) && HighLevelEncoder.isDigit(input.charAt(from + 2))
&& !isExtendedASCII(input.charAt(from), input.getFNC1Character())) {
return 2;
}
return 0;
}
if (HighLevelEncoder.isDigit(input.charAt(from)) && HighLevelEncoder.isDigit(input.charAt(from + 1))
&& HighLevelEncoder.isDigit(input.charAt(from + 2)) && HighLevelEncoder.isDigit(input.charAt(from + 3))) {
return 2;
}
return 0;
}
/** Returns the capacity in codewords of the smallest symbol that has enough capacity to fit the given minimal
* number of codewords.
**/
int getMinSymbolSize(int minimum) {
switch (input.getShapeHint()) {
case FORCE_SQUARE:
for (int capacity : squareCodewordCapacities) {
if (capacity >= minimum) {
return capacity;
}
}
break;
case FORCE_RECTANGLE:
for (int capacity : rectangularCodewordCapacities) {
if (capacity >= minimum) {
return capacity;
}
}
break;
}
for (int capacity : allCodewordCapacities) {
if (capacity >= minimum) {
return capacity;
}
}
return allCodewordCapacities[allCodewordCapacities.length - 1];
}
/** Returns the remaining capacity in codewords of the smallest symbol that has enough capacity to fit the given
* minimal number of codewords.
**/
int getCodewordsRemaining(int minimum) {
return getMinSymbolSize(minimum) - minimum;
}
static byte[] getBytes(int c) {
byte[] result = new byte[1];
result[0] = (byte) c;
return result;
}
static byte[] getBytes(int c1,int c2) {
byte[] result = new byte[2];
result[0] = (byte) c1;
result[1] = (byte) c2;
return result;
}
static void setC40Word(byte[] bytes, int offset, int c1, int c2, int c3) {
int val16 = (1600 * (c1 & 0xff)) + (40 * (c2 & 0xff)) + (c3 & 0xff) + 1;
bytes[offset] = (byte) (val16 / 256);
bytes[offset + 1] = (byte) (val16 % 256);
}
private static int getX12Value(char c) {
return c == 13 ? 0 :
c == 42 ? 1 :
c == 62 ? 2 :
c == 32 ? 3 :
c >= 48 && c <= 57 ? c - 44 :
c >= 65 && c <= 90 ? c - 51 : c;
}
byte[] getX12Words() {
assert characterLength % 3 == 0;
byte[] result = new byte[characterLength / 3 * 2];
for (int i = 0; i < result.length; i += 2) {
setC40Word(result,i,getX12Value(input.charAt(fromPosition + i / 2 * 3)),
getX12Value(input.charAt(fromPosition + i / 2 * 3 + 1)),
getX12Value(input.charAt(fromPosition + i / 2 * 3 + 2)));
}
return result;
}
static int getShiftValue(char c, boolean c40, int fnc1) {
return (c40 && isInC40Shift1Set(c) ||
!c40 && isInTextShift1Set(c)) ? 0 :
(c40 && isInC40Shift2Set(c, fnc1) ||
!c40 && isInTextShift2Set(c, fnc1)) ? 1 : 2;
}
private static int getC40Value(boolean c40, int setIndex, char c, int fnc1) {
if (c == fnc1) {
assert setIndex == 2;
return 27;
}
if (c40) {
return c <= 31 ? c :
c == 32 ? 3 :
c <= 47 ? c - 33 :
c <= 57 ? c - 44 :
c <= 64 ? c - 43 :
c <= 90 ? c - 51 :
c <= 95 ? c - 69 :
c <= 127 ? c - 96 : c;
} else {
return c == 0 ? 0 :
setIndex == 0 && c <= 3 ? c - 1 : //is this a bug in the spec?
setIndex == 1 && c <= 31 ? c :
c == 32 ? 3 :
c >= 33 && c <= 47 ? c - 33 :
c >= 48 && c <= 57 ? c - 44 :
c >= 58 && c <= 64 ? c - 43 :
c >= 65 && c <= 90 ? c - 64 :
c >= 91 && c <= 95 ? c - 69 :
c == 96 ? 0 :
c >= 97 && c <= 122 ? c - 83 :
c >= 123 && c <= 127 ? c - 96 : c;
}
}
byte[] getC40Words(boolean c40, int fnc1) {
List c40Values = new ArrayList<>();
for (int i = 0; i < characterLength; i++) {
char ci = input.charAt(fromPosition + i);
if (c40 && HighLevelEncoder.isNativeC40(ci) || !c40 && HighLevelEncoder.isNativeText(ci)) {
c40Values.add((byte) getC40Value(c40, 0, ci, fnc1));
} else if (!isExtendedASCII(ci, fnc1)) {
int shiftValue = getShiftValue(ci, c40, fnc1);
c40Values.add((byte) shiftValue); //Shift[123]
c40Values.add((byte) getC40Value(c40, shiftValue, ci, fnc1));
} else {
char asciiValue = (char) ((ci & 0xff) - 128);
if (c40 && HighLevelEncoder.isNativeC40(asciiValue) ||
!c40 && HighLevelEncoder.isNativeText(asciiValue)) {
c40Values.add((byte) 1); //Shift 2
c40Values.add((byte) 30); //Upper Shift
c40Values.add((byte) getC40Value(c40, 0, asciiValue, fnc1));
} else {
c40Values.add((byte) 1); //Shift 2
c40Values.add((byte) 30); //Upper Shift
int shiftValue = getShiftValue(asciiValue, c40, fnc1);
c40Values.add((byte) shiftValue); // Shift[123]
c40Values.add((byte) getC40Value(c40, shiftValue, asciiValue, fnc1));
}
}
}
if ((c40Values.size() % 3) != 0) {
assert (c40Values.size() - 2) % 3 == 0 && fromPosition + characterLength == input.length();
c40Values.add((byte) 0); // pad with 0 (Shift 1)
}
byte[] result = new byte[c40Values.size() / 3 * 2];
int byteIndex = 0;
for (int i = 0; i < c40Values.size(); i += 3) {
setC40Word(result,byteIndex, c40Values.get(i) & 0xff, c40Values.get(i + 1) & 0xff, c40Values.get(i + 2) & 0xff);
byteIndex += 2;
}
return result;
}
byte[] getEDFBytes() {
int numberOfThirds = (int) Math.ceil(characterLength / 4.0);
byte[] result = new byte[numberOfThirds * 3];
int pos = fromPosition;
int endPos = Math.min(fromPosition + characterLength - 1 , input.length() - 1);
for (int i = 0; i < numberOfThirds; i += 3) {
int[] edfValues = new int[4];
for (int j = 0; j < 4; j++) {
if (pos <= endPos) {
edfValues[j] = input.charAt(pos++) & 0x3f;
} else {
edfValues[j] = pos == endPos + 1 ? 0x1f : 0;
}
}
int val24 = edfValues[0] << 18;
val24 |= edfValues[1] << 12;
val24 |= edfValues[2] << 6;
val24 |= edfValues[3];
result[i] = (byte) ((val24 >> 16) & 0xff);
result[i + 1] = (byte) ((val24 >> 8) & 0xff);
result[i + 2] = (byte) (val24 & 0xff);
}
return result;
}
byte[] getLatchBytes() {
switch (getPreviousMode()) {
case ASCII:
case B256: //after B256 ends (via length) we are back to ASCII
switch (mode) {
case B256:
return getBytes(231);
case C40:
return getBytes(230);
case TEXT:
return getBytes(239);
case X12:
return getBytes(238);
case EDF:
return getBytes(240);
}
break;
case C40:
case TEXT:
case X12:
if (mode != getPreviousMode()) {
switch (mode) {
case ASCII:
return getBytes(254);
case B256:
return getBytes(254, 231);
case C40:
return getBytes(254, 230);
case TEXT:
return getBytes(254, 239);
case X12:
return getBytes(254, 238);
case EDF:
return getBytes(254, 240);
}
}
break;
case EDF:
assert mode == Mode.EDF; //The rightmost EDIFACT edge always contains an unlatch character
break;
}
return new byte[0];
}
// Important: The function does not return the length bytes (one or two) in case of B256 encoding
byte[] getDataBytes() {
switch (mode) {
case ASCII:
if (input.isECI(fromPosition)) {
return getBytes(241,input.getECIValue(fromPosition) + 1);
} else if (isExtendedASCII(input.charAt(fromPosition), input.getFNC1Character())) {
return getBytes(235,input.charAt(fromPosition) - 127);
} else if (characterLength == 2) {
return getBytes((input.charAt(fromPosition) - '0') * 10 + input.charAt(fromPosition + 1) - '0' + 130);
} else if (input.isFNC1(fromPosition)) {
return getBytes(232);
} else {
return getBytes(input.charAt(fromPosition) + 1);
}
case B256:
return getBytes(input.charAt(fromPosition));
case C40:
return getC40Words(true, input.getFNC1Character());
case TEXT:
return getC40Words(false, input.getFNC1Character());
case X12:
return getX12Words();
case EDF:
return getEDFBytes();
}
assert false;
return new byte[0];
}
}
private static final class Result {
private final byte[] bytes;
Result(Edge solution) {
Input input = solution.input;
int size = 0;
List bytesAL = new ArrayList<>();
List randomizePostfixLength = new ArrayList<>();
List randomizeLengths = new ArrayList<>();
if ((solution.mode == Mode.C40 ||
solution.mode == Mode.TEXT ||
solution.mode == Mode.X12) &&
solution.getEndMode() != Mode.ASCII) {
size += prepend(MinimalEncoder.Edge.getBytes(254),bytesAL);
}
Edge current = solution;
while (current != null) {
size += prepend(current.getDataBytes(),bytesAL);
if (current.previous == null || current.getPreviousStartMode() != current.getMode()) {
if (current.getMode() == Mode.B256) {
if (size <= 249) {
bytesAL.add(0, (byte) size);
size++;
} else {
bytesAL.add(0, (byte) (size % 250));
bytesAL.add(0, (byte) (size / 250 + 249));
size += 2;
}
randomizePostfixLength.add(bytesAL.size());
randomizeLengths.add(size);
}
prepend(current.getLatchBytes(), bytesAL);
size = 0;
}
current = current.previous;
}
if (input.getMacroId() == 5) {
size += prepend(MinimalEncoder.Edge.getBytes(236), bytesAL);
} else if (input.getMacroId() == 6) {
size += prepend(MinimalEncoder.Edge.getBytes(237), bytesAL);
}
if (input.getFNC1Character() > 0) {
size += prepend(MinimalEncoder.Edge.getBytes(232), bytesAL);
}
for (int i = 0; i < randomizePostfixLength.size(); i++) {
applyRandomPattern(bytesAL,bytesAL.size() - randomizePostfixLength.get(i), randomizeLengths.get(i));
}
//add padding
int capacity = solution.getMinSymbolSize(bytesAL.size());
if (bytesAL.size() < capacity) {
bytesAL.add((byte) 129);
}
while (bytesAL.size() < capacity) {
bytesAL.add((byte) randomize253State(bytesAL.size() + 1));
}
bytes = new byte[bytesAL.size()];
for (int i = 0; i < bytes.length; i++) {
bytes[i] = bytesAL.get(i);
}
}
static int prepend(byte[] bytes, List into) {
for (int i = bytes.length - 1; i >= 0; i--) {
into.add(0, bytes[i]);
}
return bytes.length;
}
private static int randomize253State(int codewordPosition) {
int pseudoRandom = ((149 * codewordPosition) % 253) + 1;
int tempVariable = 129 + pseudoRandom;
return tempVariable <= 254 ? tempVariable : tempVariable - 254;
}
static void applyRandomPattern(List bytesAL,int startPosition, int length) {
for (int i = 0; i < length; i++) {
//See "B.1 253-state algorithm
int Pad_codeword_position = startPosition + i;
int Pad_codeword_value = bytesAL.get(Pad_codeword_position) & 0xff;
int pseudo_random_number = ((149 * (Pad_codeword_position + 1)) % 255) + 1;
int temp_variable = Pad_codeword_value + pseudo_random_number;
bytesAL.set(Pad_codeword_position, (byte) (temp_variable <= 255 ? temp_variable : temp_variable - 256));
}
}
public byte[] getBytes() {
return bytes;
}
}
private static final class Input extends MinimalECIInput {
private final SymbolShapeHint shape;
private final int macroId;
private Input(String stringToEncode, Charset priorityCharset, int fnc1, SymbolShapeHint shape, int macroId) {
super(stringToEncode, priorityCharset, fnc1);
this.shape = shape;
this.macroId = macroId;
}
private int getMacroId() {
return macroId;
}
private SymbolShapeHint getShapeHint() {
return shape;
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy