boofcv.alg.fiducial.aztec.AztecEncoderAutomatic Maven / Gradle / Ivy
/*
* Copyright (c) 2022, Peter Abeles. All Rights Reserved.
*
* This file is part of BoofCV (http://boofcv.org).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package boofcv.alg.fiducial.aztec;
import boofcv.alg.fiducial.aztec.AztecCode.Mode;
import boofcv.misc.BoofMiscOps;
import org.ddogleg.struct.DogArray;
import org.ddogleg.struct.VerbosePrint;
import org.jetbrains.annotations.Nullable;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.util.Set;
/**
* Automatic encoding algorithm as described in [1] which seeks to encode text in a format which minimizes the amount
* of storage required.
*
*
* - latch = switching into a mode until it switches out of it
* - shift = switching into a mode for a single then going back into the source mode
*
*
* [1] ISO/IEC 24778:2008(E)
*
* @author Peter Abeles
*/
public class AztecEncoderAutomatic implements VerbosePrint {
// Modes it will consider when automatically encoding
public static final Mode[] modes = new Mode[]{Mode.UPPER, Mode.LOWER, Mode.MIXED, Mode.PUNCT, Mode.DIGIT, Mode.BYTE};
// State of each mode
final DogArray states = new DogArray<>(State::new, State::reset);
// Shorthand for infinite cost
final static int E = 0x0fffffff;
// Latch length table. bits to latch from one mode to another mode
// This can involve transitioning between multiple modes or adding length bits for BYTE mode
final static int[][] latlen = new int[][]{
{0, 5, 5, 10, 5, 10},
{10, 0, 5, 10, 5, 10},
{5, 5, 0, 5, 10, 10},
{5, 10, 10, 0, 10, 15},
{4, 9, 9, 14, 0, 14},
{0, 0, 0, 0, 0, 0}};
// Shift length table. bits to shift from one mode to another mode.
final static int[][] shiftlen = new int[][]{
{E, E, E, 5, E},
{5, E, E, 5, E},
{E, E, E, 5, E},
{E, E, E, E, E},
{4, E, E, 4, E}};
@Nullable PrintStream verbose = null;
/**
* Processes and encodes the string
*
* @param message (Input) String that's to be encoded
* @param encoder (Input/Output) Encoder for parses segments
*/
public void process( String message, AztecEncoder encoder ) {
byte[] characters = message.getBytes(StandardCharsets.ISO_8859_1);
initialize();
encodeCharacters(characters);
encodeMessageGivenSequence(selectBestState(), message, encoder);
}
void initialize() {
states.reset().resize(latlen.length);
State s = states.get(Mode.UPPER.ordinal());
s.curLen = 0;
s.characterCount = 0;
s.sequence.grow().setTo(Mode.UPPER, 0);
}
void encodeCharacters( byte[] characters ) {
for (int charIdx = 0; charIdx < characters.length; charIdx++) {
int curr = characters[charIdx] & 0xFF;
if (verbose != null)
verbose.println("charIdx=" + charIdx + " value=" + curr + " char='" + (char)characters[charIdx] + "'");
// Step 1: See if any of the encodings could be made shorter if they latched from another mode
latchToReduceMessageSize(charIdx);
// Step 2: Set all nxtLen to 0
states.forEach(m -> m.nxtLen = 0);
// Step 3: Find all modes which can encode this character
addCharacterToStates(curr);
// See if it should do a shift. Can't shift into or out of byte, hence -1
considerShiftingInstead(charIdx, curr);
// Step 4: Handle special 2-character sequence
int prev = charIdx > 0 ? characters[charIdx - 1] & 0xFF : 0;
if (isTwoCharacterSequence(prev, curr)) {
// The second character isn't a standalone character and needs to be accounted for
State state = states.get(Mode.PUNCT.ordinal());
state.characterCount++;
state.sequence.getTail().count++;
}
// Step 5: See if byte mode needs to have a longer length integer
if (states.get(Mode.BYTE.ordinal()).sequence.getTail().count == 32) {
states.get(Mode.BYTE.ordinal()).nxtLen += 11;
}
// Step 6: Transfer nxtLen to curLen
states.forEach(m -> m.curLen = m.nxtLen);
}
}
/**
* See if it makes sense to replace the sequence for one state with the sequence of another state then latch
* it into the state's target mode
*
* @param charIdx Index of the character being considered
*/
private void latchToReduceMessageSize( int charIdx ) {
for (int modeIdxA = 0; modeIdxA < states.size; modeIdxA++) {
State stateA = states.get(modeIdxA);
// Make sure that the path leading to this mode has been able to encode everything up to this point
if (stateA.characterCount != charIdx)
continue;
// See if it should transfer from this state into another state
for (int modeIdxB = 0; modeIdxB < states.size; modeIdxB++) {
if (modeIdxA == modeIdxB) {
continue;
}
// If in byte mode and the previous mode is not modeB then it will need to transition into modeB
boolean byteTransition = modes[modeIdxA] == Mode.BYTE && modes[modeIdxB] != stateA.backTo;
// NUmber of bits if it transitioned from A to B
int lengthIfTransition = stateA.curLen + latlen[modeIdxA][modeIdxB];
if (byteTransition) {
// cost of the extra transition
lengthIfTransition += latlen[stateA.backTo.ordinal()][modeIdxB];
}
State stateB = states.get(modeIdxB);
// See if the result would be a smaller encoding or if stateB is impossible to be in
if (lengthIfTransition >= stateB.curLen && stateB.characterCount == charIdx)
continue;
if (verbose != null) verbose.println("latching " + modes[modeIdxA] + "->" + modes[modeIdxB]);
// The encoding is better, so replace the history of B for the history in A
stateB.curLen = lengthIfTransition;
stateB.characterCount = stateA.characterCount;
stateB.sequence.reset();
stateB.sequence.copyAll(stateA.sequence.toList(), ( src, dst ) -> dst.setTo(src));
stateB.backTo = modes[modeIdxA];
// add in the extra byte mode transition
if (byteTransition)
stateB.sequence.grow().setTo(modes[modeIdxA], 0);
// Add in this transition
stateB.sequence.grow().setTo(modes[modeIdxB], 0);
}
}
}
/**
* If a mode can encode this character then add it to it's state
*
* @param curr character's value
*/
private void addCharacterToStates( int curr ) {
for (int modeIdx = 0; modeIdx < modes.length; modeIdx++) {
State state = states.get(modeIdx);
if (!isMember(modeIdx, curr)) {
// avoid zeroing curLen later on
state.nxtLen = state.curLen;
continue;
}
// See if encoding this character into this mode is better
int length = state.curLen + modes[modeIdx].wordSize;
if (state.nxtLen == 0 || state.nxtLen > length) {
state.nxtLen = length;
state.characterCount++;
state.sequence.getTail().count++;
if (verbose != null) verbose.printf("add %5s length=%d\n", modes[modeIdx], length);
}
}
}
/**
* See if it makes more sense to shift instead of latching
*
* @param curr character's value
*/
private void considerShiftingInstead( int charIdx, int curr ) {
// outermost loop goes through modes that could be shifted into
// modes.length - 1 because BYTE never has shifts
for (int modeIdxA = 0; modeIdxA < modes.length - 1; modeIdxA++) {
// The mode it would shift into must be compatible with the character
if (!isMember(modeIdxA, curr))
continue;
// See if any shifts are possible and make sense
for (int modeIdxB = 0; modeIdxB < modes.length - 1; modeIdxB++) {
if (isMember(modeIdxB, curr) || shiftlen[modeIdxB][modeIdxA] == E)
continue;
State stateB = states.get(modeIdxB);
int shiftLength = stateB.curLen + shiftlen[modeIdxB][modeIdxA] + modes[modeIdxA].wordSize;
if (stateB.characterCount == (charIdx + 1) && shiftLength >= stateB.nxtLen)
continue;
// This can only be less expensive if it couldn't encode the current character
stateB.nxtLen = shiftLength;
stateB.characterCount++;
// Add the shift character
stateB.sequence.grow().setTo(modes[modeIdxA], 1);
// Transition back into the mode
stateB.sequence.grow().setTo(modes[modeIdxB], 0);
}
}
}
/**
* Some two character punctuations are encoded as a single character
*/
private boolean isTwoCharacterSequence( int a, int b ) {
if (a == 13 && b == 10) {
return true;
} else if (b == 32) {
if (a == 46) {
return true;
} else if (a == 44) {
return true;
} else if (a == 58) {
return true;
}
}
return false;
}
private void encodeMessageGivenSequence( State state, String message, AztecEncoder encoder ) {
int char0 = 0;
for (int i = 0; i < state.sequence.size; i++) {
Group g = state.sequence.get(i);
int char1 = char0 + g.count;
switch (g.mode) {
case UPPER -> encoder.addUpper(message.substring(char0, char1));
case LOWER -> encoder.addLower(message.substring(char0, char1));
case MIXED -> encoder.addMixed(message.substring(char0, char1));
case PUNCT -> encoder.addPunctuation(message.substring(char0, char1));
case DIGIT -> encoder.addDigit(message.substring(char0, char1));
case BYTE -> {
byte[] data = message.substring(char0, char1).getBytes(StandardCharsets.ISO_8859_1);
encoder.addBytes(data, 0, data.length);
}
default -> throw new RuntimeException("Invalid");
}
char0 = char1;
}
}
/**
* Selects the state with the smallest bit count
*/
State selectBestState() {
State state = states.get(0);
for (int i = 1; i < states.size; i++) {
State candidate = states.get(i);
if (candidate.characterCount > state.characterCount) {
state = states.get(i);
} else if (candidate.characterCount == state.characterCount && states.get(i).curLen < state.curLen) {
state = states.get(i);
}
}
return state;
}
/**
* Checks to see if the character is a memeber of the specified mode
*/
boolean isMember( int mode, int curr ) {
return switch (modes[mode]) {
case UPPER -> isUpper(curr);
case LOWER -> isLower(curr);
case MIXED -> isMixed(curr);
case PUNCT -> isPunctuation(curr);
case DIGIT -> isDigit(curr);
case BYTE -> true;
default -> throw new RuntimeException("Invalid");
};
}
boolean isUpper( int c ) {
if (c == 32) {
return true;
} else if (c >= 65 && c <= 90) {
return true;
}
return false;
}
boolean isLower( int c ) {
if (c == 32) {
return true;
} else if (c >= 97 && c <= 122) {
return true;
}
return false;
}
boolean isMixed( int c ) {
if (c >= 1 && c <= 13) {
return true;
} else if (c >= 27 && c <= 32) {
return true;
} else if (c == 64) {
return true;
} else if (c == 92) {
return true;
} else if (c == 94) {
return true;
} else if (c == 95) {
return true;
} else if (c == 96) {
return true;
} else if (c == 124) {
return true;
} else if (c == 126) {
return true;
} else if (c == 127) {
return true;
}
return false;
}
public boolean isPunctuation( int c ) {
if (c == 13) {
return true;
} else if (c >= 33 && c <= 47) {
return true;
} else if (c >= 58 && c <= 63) {
return true;
} else if (c == 91) {
return true;
} else if (c == 93) {
return true;
} else if (c == 123) {
return true;
} else if (c == 125) {
return true;
}
return false;
}
boolean isDigit( int c ) {
if (c == 32) {
return true;
} else if (c >= 48 && c <= 57) {
return true;
} else if (c == 44) {
return true;
} else if (c == 46) {
return true;
}
return false;
}
@Override public void setVerbose( @Nullable PrintStream out, @Nullable Set configuration ) {
this.verbose = BoofMiscOps.addPrefix(this, out);
}
/**
* Encodes the state as described in Annex H.
*/
static class State {
// Number of bits to encode it into this state
int curLen = Integer.MAX_VALUE;
// Number of bits to encode and the latest character
int nxtLen = -1;
// Number of characters it has encoded
int characterCount = -1;
// Sequence of character sets to get to this state
DogArray sequence = new DogArray<>(Group::new, Group::reset);
// Mode that the binary shift came from
Mode backTo = Mode.UPPER;
public void reset() {
curLen = Integer.MAX_VALUE;
nxtLen = -1;
characterCount = -1;
sequence.reset();
backTo = Mode.UPPER;
}
}
/** Character set and the number of characters. Group = group of characters */
static class Group {
Mode mode = Mode.UPPER;
int count;
public void reset() {
mode = Mode.UPPER;
count = 0;
}
public void setTo( Mode mode, int count ) {
this.mode = mode;
this.count = count;
}
public void setTo( Group g ) {
mode = g.mode;
count = g.count;
}
}
}