All Downloads are FREE. Search and download functionalities are using the official Maven repository.

boofcv.alg.fiducial.aztec.AztecEncoderAutomatic Maven / Gradle / Ivy

/*
 * Copyright (c) 2022, Peter Abeles. All Rights Reserved.
 *
 * This file is part of BoofCV (http://boofcv.org).
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package boofcv.alg.fiducial.aztec;

import boofcv.alg.fiducial.aztec.AztecCode.Mode;
import boofcv.misc.BoofMiscOps;
import org.ddogleg.struct.DogArray;
import org.ddogleg.struct.VerbosePrint;
import org.jetbrains.annotations.Nullable;

import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.util.Set;

/**
 * Automatic encoding algorithm as described in [1] which seeks to encode text in a format which minimizes the amount
 * of storage required.
 *
 * 
    *
  • latch = switching into a mode until it switches out of it
  • *
  • shift = switching into a mode for a single then going back into the source mode
  • *
* *

[1] ISO/IEC 24778:2008(E)

* * @author Peter Abeles */ public class AztecEncoderAutomatic implements VerbosePrint { // Modes it will consider when automatically encoding public static final Mode[] modes = new Mode[]{Mode.UPPER, Mode.LOWER, Mode.MIXED, Mode.PUNCT, Mode.DIGIT, Mode.BYTE}; // State of each mode final DogArray states = new DogArray<>(State::new, State::reset); // Shorthand for infinite cost final static int E = 0x0fffffff; // Latch length table. bits to latch from one mode to another mode // This can involve transitioning between multiple modes or adding length bits for BYTE mode final static int[][] latlen = new int[][]{ {0, 5, 5, 10, 5, 10}, {10, 0, 5, 10, 5, 10}, {5, 5, 0, 5, 10, 10}, {5, 10, 10, 0, 10, 15}, {4, 9, 9, 14, 0, 14}, {0, 0, 0, 0, 0, 0}}; // Shift length table. bits to shift from one mode to another mode. final static int[][] shiftlen = new int[][]{ {E, E, E, 5, E}, {5, E, E, 5, E}, {E, E, E, 5, E}, {E, E, E, E, E}, {4, E, E, 4, E}}; @Nullable PrintStream verbose = null; /** * Processes and encodes the string * * @param message (Input) String that's to be encoded * @param encoder (Input/Output) Encoder for parses segments */ public void process( String message, AztecEncoder encoder ) { byte[] characters = message.getBytes(StandardCharsets.ISO_8859_1); initialize(); encodeCharacters(characters); encodeMessageGivenSequence(selectBestState(), message, encoder); } void initialize() { states.reset().resize(latlen.length); State s = states.get(Mode.UPPER.ordinal()); s.curLen = 0; s.characterCount = 0; s.sequence.grow().setTo(Mode.UPPER, 0); } void encodeCharacters( byte[] characters ) { for (int charIdx = 0; charIdx < characters.length; charIdx++) { int curr = characters[charIdx] & 0xFF; if (verbose != null) verbose.println("charIdx=" + charIdx + " value=" + curr + " char='" + (char)characters[charIdx] + "'"); // Step 1: See if any of the encodings could be made shorter if they latched from another mode latchToReduceMessageSize(charIdx); // Step 2: Set all nxtLen to 0 states.forEach(m -> m.nxtLen = 0); // Step 3: Find all modes which can encode this character addCharacterToStates(curr); // See if it should do a shift. Can't shift into or out of byte, hence -1 considerShiftingInstead(charIdx, curr); // Step 4: Handle special 2-character sequence int prev = charIdx > 0 ? characters[charIdx - 1] & 0xFF : 0; if (isTwoCharacterSequence(prev, curr)) { // The second character isn't a standalone character and needs to be accounted for State state = states.get(Mode.PUNCT.ordinal()); state.characterCount++; state.sequence.getTail().count++; } // Step 5: See if byte mode needs to have a longer length integer if (states.get(Mode.BYTE.ordinal()).sequence.getTail().count == 32) { states.get(Mode.BYTE.ordinal()).nxtLen += 11; } // Step 6: Transfer nxtLen to curLen states.forEach(m -> m.curLen = m.nxtLen); } } /** * See if it makes sense to replace the sequence for one state with the sequence of another state then latch * it into the state's target mode * * @param charIdx Index of the character being considered */ private void latchToReduceMessageSize( int charIdx ) { for (int modeIdxA = 0; modeIdxA < states.size; modeIdxA++) { State stateA = states.get(modeIdxA); // Make sure that the path leading to this mode has been able to encode everything up to this point if (stateA.characterCount != charIdx) continue; // See if it should transfer from this state into another state for (int modeIdxB = 0; modeIdxB < states.size; modeIdxB++) { if (modeIdxA == modeIdxB) { continue; } // If in byte mode and the previous mode is not modeB then it will need to transition into modeB boolean byteTransition = modes[modeIdxA] == Mode.BYTE && modes[modeIdxB] != stateA.backTo; // NUmber of bits if it transitioned from A to B int lengthIfTransition = stateA.curLen + latlen[modeIdxA][modeIdxB]; if (byteTransition) { // cost of the extra transition lengthIfTransition += latlen[stateA.backTo.ordinal()][modeIdxB]; } State stateB = states.get(modeIdxB); // See if the result would be a smaller encoding or if stateB is impossible to be in if (lengthIfTransition >= stateB.curLen && stateB.characterCount == charIdx) continue; if (verbose != null) verbose.println("latching " + modes[modeIdxA] + "->" + modes[modeIdxB]); // The encoding is better, so replace the history of B for the history in A stateB.curLen = lengthIfTransition; stateB.characterCount = stateA.characterCount; stateB.sequence.reset(); stateB.sequence.copyAll(stateA.sequence.toList(), ( src, dst ) -> dst.setTo(src)); stateB.backTo = modes[modeIdxA]; // add in the extra byte mode transition if (byteTransition) stateB.sequence.grow().setTo(modes[modeIdxA], 0); // Add in this transition stateB.sequence.grow().setTo(modes[modeIdxB], 0); } } } /** * If a mode can encode this character then add it to it's state * * @param curr character's value */ private void addCharacterToStates( int curr ) { for (int modeIdx = 0; modeIdx < modes.length; modeIdx++) { State state = states.get(modeIdx); if (!isMember(modeIdx, curr)) { // avoid zeroing curLen later on state.nxtLen = state.curLen; continue; } // See if encoding this character into this mode is better int length = state.curLen + modes[modeIdx].wordSize; if (state.nxtLen == 0 || state.nxtLen > length) { state.nxtLen = length; state.characterCount++; state.sequence.getTail().count++; if (verbose != null) verbose.printf("add %5s length=%d\n", modes[modeIdx], length); } } } /** * See if it makes more sense to shift instead of latching * * @param curr character's value */ private void considerShiftingInstead( int charIdx, int curr ) { // outermost loop goes through modes that could be shifted into // modes.length - 1 because BYTE never has shifts for (int modeIdxA = 0; modeIdxA < modes.length - 1; modeIdxA++) { // The mode it would shift into must be compatible with the character if (!isMember(modeIdxA, curr)) continue; // See if any shifts are possible and make sense for (int modeIdxB = 0; modeIdxB < modes.length - 1; modeIdxB++) { if (isMember(modeIdxB, curr) || shiftlen[modeIdxB][modeIdxA] == E) continue; State stateB = states.get(modeIdxB); int shiftLength = stateB.curLen + shiftlen[modeIdxB][modeIdxA] + modes[modeIdxA].wordSize; if (stateB.characterCount == (charIdx + 1) && shiftLength >= stateB.nxtLen) continue; // This can only be less expensive if it couldn't encode the current character stateB.nxtLen = shiftLength; stateB.characterCount++; // Add the shift character stateB.sequence.grow().setTo(modes[modeIdxA], 1); // Transition back into the mode stateB.sequence.grow().setTo(modes[modeIdxB], 0); } } } /** * Some two character punctuations are encoded as a single character */ private boolean isTwoCharacterSequence( int a, int b ) { if (a == 13 && b == 10) { return true; } else if (b == 32) { if (a == 46) { return true; } else if (a == 44) { return true; } else if (a == 58) { return true; } } return false; } private void encodeMessageGivenSequence( State state, String message, AztecEncoder encoder ) { int char0 = 0; for (int i = 0; i < state.sequence.size; i++) { Group g = state.sequence.get(i); int char1 = char0 + g.count; switch (g.mode) { case UPPER -> encoder.addUpper(message.substring(char0, char1)); case LOWER -> encoder.addLower(message.substring(char0, char1)); case MIXED -> encoder.addMixed(message.substring(char0, char1)); case PUNCT -> encoder.addPunctuation(message.substring(char0, char1)); case DIGIT -> encoder.addDigit(message.substring(char0, char1)); case BYTE -> { byte[] data = message.substring(char0, char1).getBytes(StandardCharsets.ISO_8859_1); encoder.addBytes(data, 0, data.length); } default -> throw new RuntimeException("Invalid"); } char0 = char1; } } /** * Selects the state with the smallest bit count */ State selectBestState() { State state = states.get(0); for (int i = 1; i < states.size; i++) { State candidate = states.get(i); if (candidate.characterCount > state.characterCount) { state = states.get(i); } else if (candidate.characterCount == state.characterCount && states.get(i).curLen < state.curLen) { state = states.get(i); } } return state; } /** * Checks to see if the character is a memeber of the specified mode */ boolean isMember( int mode, int curr ) { return switch (modes[mode]) { case UPPER -> isUpper(curr); case LOWER -> isLower(curr); case MIXED -> isMixed(curr); case PUNCT -> isPunctuation(curr); case DIGIT -> isDigit(curr); case BYTE -> true; default -> throw new RuntimeException("Invalid"); }; } boolean isUpper( int c ) { if (c == 32) { return true; } else if (c >= 65 && c <= 90) { return true; } return false; } boolean isLower( int c ) { if (c == 32) { return true; } else if (c >= 97 && c <= 122) { return true; } return false; } boolean isMixed( int c ) { if (c >= 1 && c <= 13) { return true; } else if (c >= 27 && c <= 32) { return true; } else if (c == 64) { return true; } else if (c == 92) { return true; } else if (c == 94) { return true; } else if (c == 95) { return true; } else if (c == 96) { return true; } else if (c == 124) { return true; } else if (c == 126) { return true; } else if (c == 127) { return true; } return false; } public boolean isPunctuation( int c ) { if (c == 13) { return true; } else if (c >= 33 && c <= 47) { return true; } else if (c >= 58 && c <= 63) { return true; } else if (c == 91) { return true; } else if (c == 93) { return true; } else if (c == 123) { return true; } else if (c == 125) { return true; } return false; } boolean isDigit( int c ) { if (c == 32) { return true; } else if (c >= 48 && c <= 57) { return true; } else if (c == 44) { return true; } else if (c == 46) { return true; } return false; } @Override public void setVerbose( @Nullable PrintStream out, @Nullable Set configuration ) { this.verbose = BoofMiscOps.addPrefix(this, out); } /** * Encodes the state as described in Annex H. */ static class State { // Number of bits to encode it into this state int curLen = Integer.MAX_VALUE; // Number of bits to encode and the latest character int nxtLen = -1; // Number of characters it has encoded int characterCount = -1; // Sequence of character sets to get to this state DogArray sequence = new DogArray<>(Group::new, Group::reset); // Mode that the binary shift came from Mode backTo = Mode.UPPER; public void reset() { curLen = Integer.MAX_VALUE; nxtLen = -1; characterCount = -1; sequence.reset(); backTo = Mode.UPPER; } } /** Character set and the number of characters. Group = group of characters */ static class Group { Mode mode = Mode.UPPER; int count; public void reset() { mode = Mode.UPPER; count = 0; } public void setTo( Mode mode, int count ) { this.mode = mode; this.count = count; } public void setTo( Group g ) { mode = g.mode; count = g.count; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy