com.innovatrics.mrz.MrzParser Maven / Gradle / Ivy
The newest version!
/**
* Java parser for the MRZ records, as specified by the ICAO organization.
* Copyright (C) 2011 Innovatrics s.r.o.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package com.innovatrics.mrz;
import com.innovatrics.mrz.types.MrzDate;
import com.innovatrics.mrz.types.MrzFormat;
import com.innovatrics.mrz.types.MrzSex;
import java.text.Normalizer;
import java.util.HashMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Parses the MRZ records.
*
* @author Martin Vysny
*/
public class MrzParser {
private static final Logger LOG = LoggerFactory.getLogger(MrzParser.class);
private static final int[] MRZ_WEIGHTS = new int[]{7, 3, 1};
private static final Map EXPAND_CHARACTERS = new HashMap();
/**
* The filler character, '<'.
*/
public static final char FILLER = '<';
static {
EXPAND_CHARACTERS.put("\u00C4", "AE"); // Ä
EXPAND_CHARACTERS.put("\u00E4", "AE"); // ä
EXPAND_CHARACTERS.put("\u00C5", "AA"); // Å
EXPAND_CHARACTERS.put("\u00E5", "AA"); // å
EXPAND_CHARACTERS.put("\u00C6", "AE"); // Æ
EXPAND_CHARACTERS.put("\u00E6", "AE"); // æ
EXPAND_CHARACTERS.put("\u0132", "IJ"); // IJ
EXPAND_CHARACTERS.put("\u0133", "IJ"); // ij
EXPAND_CHARACTERS.put("\u00D6", "OE"); // Ö
EXPAND_CHARACTERS.put("\u00F6", "OE"); // ö
EXPAND_CHARACTERS.put("\u00D8", "OE"); // Ø
EXPAND_CHARACTERS.put("\u00F8", "OE"); // ø
EXPAND_CHARACTERS.put("\u00DC", "UE"); // Ü
EXPAND_CHARACTERS.put("\u00FC", "UE"); // ü
EXPAND_CHARACTERS.put("\u00DF", "SS"); // ß
}
/**
* The MRZ record, not null.
*/
private final String mrz;
/**
* The MRZ record separated into rows.
*/
private final String[] rows;
/**
* MRZ record format.
*/
private final MrzFormat format;
/**
* Creates new parser which parses given MRZ record.
*
* @param mrz the MRZ record, not null.
*
* @throws MrzParseException could not parse MRZ
*/
public MrzParser(final String mrz) throws MrzParseException {
this.mrz = mrz;
this.rows = mrz.split("\n");
this.format = MrzFormat.get(mrz);
}
/**
* @return the MRZ record
*/
public String getMrz() {
return mrz;
}
/**
* @return the MRZ rows
*/
public String[] getRows() {
return rows;
}
/**
* @return the MRZ format
*/
public MrzFormat getFormat() {
return format;
}
/**
* Parses the MRZ name in form of SURNAME <<FIRSTNAME<.
*
* @author jllarraz@github
* @param range the range
* @return array of [surname, first_name], never null, always with a length of 2.
* @throws MrzParseException could not parse range
*/
public String[] parseName(final MrzRange range) throws MrzParseException {
checkValidCharacters(range);
String str = rawValue(range);
while (str.endsWith("<")) {
str = str.substring(0, str.length() - 1);
}
final String[] names = str.split("<<");
String givenNames = "";
String surname = parseString(new MrzRange(range.getColumn(), range.getColumn() + names[0].length(), range.getRow()));
if (names.length == 1) {
givenNames = parseString(new MrzRange(range.getColumn(), range.getColumn() + names[0].length(), range.getRow()));
surname = "";
} else if (names.length > 1) {
surname = parseString(new MrzRange(range.getColumn(), range.getColumn() + names[0].length(), range.getRow()));
givenNames = parseString(new MrzRange(range.getColumn() + names[0].length() + 2, range.getColumn() + str.length(), range.getRow()));
}
return new String[]{surname, givenNames};
}
/**
* Returns a raw MRZ value from given range. If multiple ranges are specified, the value is concatenated.
*
* @param range the ranges, not null.
* @return raw value, never null, may be empty.
*/
public String rawValue(final MrzRange... range) {
final StringBuilder sb = new StringBuilder();
for (MrzRange r : range) {
sb.append(getRows()[r.getRow()].substring(r.getColumn(), r.getColumnTo()));
}
return sb.toString();
}
/**
* Checks that given range contains valid characters.
*
* @param range the range to check.
* @throws MrzParseException could not parse range
*/
public void checkValidCharacters(final MrzRange range) throws MrzParseException {
final String str = rawValue(range);
for (int i = 0; i < str.length(); i++) {
final char c = str.charAt(i);
if (c != FILLER && (c < '0' || c > '9') && (c < 'A' || c > 'Z')) {
throw new MrzParseException("Invalid character in MRZ record: " + c, getMrz(), new MrzRange(range.getColumn() + i, range.getColumn() + i + 1, range.getRow()), getFormat());
}
}
}
/**
* Parses a string in given range. << are replaced with ", ", < is replaced by space.
*
* @param range the range
* @return parsed string.
* @throws MrzParseException could not parse range
*/
public String parseString(final MrzRange range) throws MrzParseException {
checkValidCharacters(range);
String str = rawValue(range);
while (str.endsWith("<")) {
str = str.substring(0, str.length() - 1);
}
return str.replace("" + FILLER + FILLER, ", ").replace(FILLER, ' ');
}
/**
* Verifies the check digit.
*
* @param col the 0-based column of the check digit.
* @param row the 0-based column of the check digit.
* @param strRange the range for which the check digit is computed.
* @param fieldName (optional) field name. Used only when validity check fails.
* @return true if check digit is valid, false if not
*/
public boolean checkDigit(final int col, final int row, final MrzRange strRange, final String fieldName) {
return checkDigit(col, row, rawValue(strRange), fieldName);
}
/**
* Verifies the check digit.
*
* @param col the 0-based column of the check digit.
* @param row the 0-based column of the check digit.
* @param str the raw MRZ substring.
* @param fieldName (optional) field name. Used only when validity check fails.
* @return true if check digit is valid, false if not
*/
public boolean checkDigit(final int col, final int row, final String str, final String fieldName) {
// If the check digit validation fails, this will contain the location.
MrzRange invalidCheckdigit = null;
final char digit = (char) (computeCheckDigit(str) + '0');
char checkDigit = getRows()[row].charAt(col);
if (checkDigit == FILLER) {
checkDigit = '0';
}
if (digit != checkDigit) {
invalidCheckdigit = new MrzRange(col, col + 1, row);
LOG.info("Check digit verification failed for " + fieldName + ": expected " + digit + " but got " + checkDigit);
}
return invalidCheckdigit == null;
}
/**
* Parses MRZ date.
*
* @param range the range containing the date, in the YYMMDD format. The range must be 6 characters long.
* @return parsed date
* @throws IllegalArgumentException if the range is not 6 characters long.
*/
public MrzDate parseDate(final MrzRange range) {
if (range.length() != 6) {
throw new IllegalArgumentException("Parameter range: invalid value " + range + ": must be 6 characters long");
}
int year = parseYear(range);
int month = parseMonth(range);
int day = parseDay(range);
return new MrzDate(year, month, day, rawValue(range));
}
private int parseYear(final MrzRange range) {
int year;
MrzRange r = new MrzRange(range.getColumn(), range.getColumn() + 2, range.getRow());
try {
year = Integer.parseInt(rawValue(r));
} catch (NumberFormatException ex) {
year = -1;
LOG.debug("Failed to parse MRZ date year " + rawValue(range) + ": " + ex, getMrz(), r);
}
if (year < 0 || year > 99) {
LOG.debug("Invalid year value " + year + ": must be 0..99");
}
return year;
}
private int parseMonth(final MrzRange range) {
int month;
MrzRange r = new MrzRange(range.getColumn() + 2, range.getColumn() + 4, range.getRow());
try {
month = Integer.parseInt(rawValue(r));
} catch (NumberFormatException ex) {
month = -1;
LOG.debug("Failed to parse MRZ date month " + rawValue(range) + ": " + ex, getMrz(), r);
}
if (month < 1 || month > 12) {
LOG.debug("Invalid month value " + month + ": must be 1..12");
}
return month;
}
private int parseDay(final MrzRange range) {
int day;
MrzRange r = new MrzRange(range.getColumn() + 4, range.getColumn() + 6, range.getRow());
try {
day = Integer.parseInt(rawValue(r));
} catch (NumberFormatException ex) {
day = -1;
LOG.debug("Failed to parse MRZ date day " + rawValue(range) + ": " + ex, getMrz(), r);
}
if (day < 1 || day > 31) {
LOG.debug("Invalid day value " + day + ": must be 1..31");
}
return day;
}
/**
* Parses the "sex" value from given column/row.
*
* @param col the 0-based column
* @param row the 0-based row
* @return sex, never null.
*/
public MrzSex parseSex(final int col, final int row) {
return MrzSex.fromMrz(getRows()[row].charAt(col));
}
/**
* Checks if given character is valid in MRZ.
*
* @param c the character.
* @return true if the character is valid, false otherwise.
*/
private static boolean isValid(final char c) {
return (c == FILLER) || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z');
}
private static int getCharacterValue(final char c) {
if (c == FILLER) {
return 0;
}
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'Z') {
return c - 'A' + 10;
}
throw new IllegalArgumentException("Invalid character in MRZ record: " + c);
}
/**
* Computes MRZ check digit for given string of characters.
*
* @param str the string
* @return check digit in range of 0..9, inclusive. See
* MRTD documentation
* part 15 for details.
*/
public static int computeCheckDigit(final String str) {
int result = 0;
for (int i = 0; i < str.length(); i++) {
result += getCharacterValue(str.charAt(i)) * MRZ_WEIGHTS[i % MRZ_WEIGHTS.length];
}
return result % 10;
}
/**
* Computes MRZ check digit for given string of characters.
*
* @param str the string
* @return check digit in range of 0..9, inclusive. See
* MRTD documentation
* part 15 for details.
*/
public static char computeCheckDigitChar(final String str) {
return (char) ('0' + computeCheckDigit(str));
}
/**
* Factory method, which parses the MRZ and returns appropriate record class.
*
* @param mrz MRZ to parse.
* @return record class.
* @throws MrzParseException could not parse MRZ
*/
public static MrzRecord parse(final String mrz) throws MrzParseException {
final MrzRecord result = MrzFormat.get(mrz).newRecord();
result.fromMrz(mrz);
return result;
}
/**
* Converts given string to a MRZ string: removes all accents, converts the string to upper-case and replaces all spaces and invalid characters
* with '<'.
*
* Several characters are expanded:
*
*
* Expanded characters
* Character Expand to
* Ä AE
* Å AA
* Æ AE
* IJ IJ
* IJ IJ
* Ö OE
* Ø OE
* Ü UE
* ß SS
*
* Examples:
*
* toMrz("Sedím na konári", 20)
yields "SEDIM<NA<KONARI<<<<<"
* toMrz("Pat, Mat", 8)
yields "PAT<<MAT"
* toMrz("foo/bar baz", 4)
yields "FOO<"
* toMrz("*$()&/\", 8)
yields "<<<<<<<<"
*
*
* @param input the string to convert. Passing null is the same as passing in an empty string.
* @param length required length of the string. If given string is longer, it is truncated. If given string is shorter than given length, '<'
* characters are appended at the end. If -1, the string is neither truncated nor enlarged.
* @return MRZ-valid string.
*/
public static String toMrz(final String input, final int length) {
String string = input == null ? "" : input;
for (final Map.Entry e : EXPAND_CHARACTERS.entrySet()) {
string = string.replace(e.getKey(), e.getValue());
}
string = string.replace("’", "");
string = string.replace("'", "");
string = deaccent(string).toUpperCase();
if (length >= 0 && string.length() > length) {
string = string.substring(0, length);
}
final StringBuilder sb = new StringBuilder(string);
for (int i = 0; i < sb.length(); i++) {
if (!isValid(sb.charAt(i))) {
sb.setCharAt(i, FILLER);
}
}
while (sb.length() < length) {
sb.append(FILLER);
}
return sb.toString();
}
private static boolean isBlank(final String str) {
return str == null || str.trim().length() == 0;
}
/**
* Converts a surname and given names to a MRZ string, shortening them as per Doc 9303 Part 3 Vol 1 Section 6.7 of the MRZ specification when
* necessary.
*
* @param surname the surname, not blank.
* @param givenNames given names, not blank.
* @param length required length of the string. If given string is longer, it is shortened. If given string is shorter than given length, '<'
* characters are appended at the end.
* @return name, properly converted to MRZ format of SURNAME<<GIVENNAMES<..., with the exact length of given length.
*/
public static String nameToMrz(final String surname, final String givenNames, final int length) {
if (isBlank(surname)) {
throw new IllegalArgumentException("Parameter surname: invalid value " + surname + ": blank");
}
if (isBlank(givenNames)) {
throw new IllegalArgumentException("Parameter givenNames: invalid value " + givenNames + ": blank");
}
if (length <= 0) {
throw new IllegalArgumentException("Parameter length: invalid value " + length + ": not positive");
}
final String[] surnames = extractNames(surname);
final String[] given = extractNames(givenNames);
// Truncate
truncateNames(surnames, given, length, surname, givenNames);
return toMrz(toName(surnames, given), length);
}
private static String[] extractNames(final String name) {
final String[] names = name.replace(", ", " ").trim().split("[ \n\t\f\r]+");
for (int i = 0; i < names.length; i++) {
names[i] = toMrz(names[i], -1);
}
return names;
}
private static void truncateNames(final String[] surnames, final String[] given, final int length, final String surname, final String givenNames) {
int nameSize = getNameSize(surnames, given);
String[] currentlyTruncating = given;
int currentlyTruncatingIndex = given.length - 1;
while (nameSize > length) {
final String ct = currentlyTruncating[currentlyTruncatingIndex];
final int ctsize = ct.length();
if (nameSize - ctsize + 1 <= length) {
currentlyTruncating[currentlyTruncatingIndex] = ct.substring(0, ctsize - (nameSize - length));
} else {
currentlyTruncating[currentlyTruncatingIndex] = ct.substring(0, 1);
currentlyTruncatingIndex--;
if (currentlyTruncatingIndex < 0) {
if (currentlyTruncating == surnames) {
throw new IllegalArgumentException("Cannot truncate name " + surname + " " + givenNames + ": length too small: " + length + "; truncated to " + toName(surnames, given));
}
currentlyTruncating = surnames;
currentlyTruncatingIndex = currentlyTruncating.length - 1;
}
}
nameSize = getNameSize(surnames, given);
}
}
private static String toName(final String[] surnames, final String[] given) {
final StringBuilder sb = new StringBuilder();
boolean first = true;
for (String s : surnames) {
if (first) {
first = false;
} else {
sb.append(FILLER);
}
sb.append(s);
}
sb.append(FILLER);
for (String s : given) {
sb.append(FILLER);
sb.append(s);
}
return sb.toString();
}
private static int getNameSize(final String[] surnames, final String[] given) {
int result = 0;
for (String s : surnames) {
result += s.length() + 1;
}
for (String s : given) {
result += s.length() + 1;
}
return result;
}
private static String deaccent(final String str) {
String n = Normalizer.normalize(str, Normalizer.Form.NFD);
return n.replaceAll("[^\\p{ASCII}]", "").toLowerCase();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy