All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.imsweb.algorithms.napiia.NapiiaUtils Maven / Gradle / Ivy

/*
 * Copyright (C) 2013 Information Management Services, Inc.
 */
package com.imsweb.algorithms.napiia;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.commons.lang3.StringUtils;

import com.opencsv.CSVReader;
import com.opencsv.exceptions.CsvException;

/**
 * This class is used to calculate the NAPIIA variable. More information can be found here:
 * http://www.naaccr.org/Research/DataAnalysisTools.aspx
 * 

* This Java implementation is based ONLY on the SAS implementation of the algorithm; the PDF documentation was not accurate when * this algorithm was implemented and therefore was not taken into account. */ public final class NapiiaUtils { public static final String ALG_NAME = "NAACCR Asian/Pacific Islander Identification Algorithm (NAPIIA)"; public static final String ALG_VERSION = "v17 released in April 2017"; public static final String REASON_1_3_3 = "More than one race 04-32 and one race 96 or 97 (step 1.3.3)."; public static final String REASON_1_3_5 = "One or more races 02-03 and one race 96 (step 1.3.5)."; public static final String REASON_1_3_6 = "One race 96 and one race 97 (step 1.3.6)."; public static final String REASON_1_3_7 = "Multiple combination of races involving race 96 (step 1.3.7)."; public static final String REASON_2_2_3 = "Multiple combination of races (step 2.2.3)."; private static final String _GENDER_MALE = "1"; private static final String _GENDER_FEMALE = "2"; // special birth place countries corresponding to a given race (indirect identification based on birth place) - Asian private static final String[][] _BPC_ASIAN = {{"XCH", "04"}, // Chinese {"CHN", "04"}, // Chinese {"HKG", "04"}, // Chinese {"TWN", "04"}, // Chinese {"MAC", "04"}, // Chinese {"JPN", "05"}, // Japanese {"PHL", "06"}, // Filipino {"KOR", "08"}, // Korean {"PRK", "08"}, // Korean {"VNM", "10"}, // Vietnamese {"KHM", "13"}, // Kampuchean {"THA", "14"}, // Thai {"IND", "16"}, // Asian Indian {"PAK", "17"} // Pakistan }; private static final Map _BPC_MAP_ASIAN = new HashMap<>(); static { for (String[] s : _BPC_ASIAN) _BPC_MAP_ASIAN.put(s[0], s[1]); } // special birth place countries corresponding to a given race (indirect identification based on birth place) - Asian private static final String[][] _BPC_PACIFIC_ISLANDER = {{"ASM", "27"}, // Samoan {"KIR", "20"}, // Micronesian, NOS {"FSM", "20"}, // Micronesian, NOS {"MHL", "20"}, // Micronesian, NOS {"PLW", "20"}, // Micronesian, NOS {"XMC", "20"}, // Micronesian, NOS {"NRU", "20"}, // Micronesian, NOS {"COK", "25"}, // Polynesian, NOS {"TUV", "25"}, // Polynesian, NOS {"TKL", "25"}, // Polynesian, NOS {"XPL", "25"}, // Polynesian, NOS {"PYF", "25"}, // Polynesian, NOS {"NFK", "25"}, // Polynesian, NOS {"PCN", "25"}, // Polynesian, NOS {"WSM", "25"}, // Polynesian, NOS {"TON", "25"} // Polynesian, NOS }; private static final Map _BPC_MAP_PACIFIC_ISLANDER = new HashMap<>(); static { for (String[] s : _BPC_PACIFIC_ISLANDER) _BPC_MAP_PACIFIC_ISLANDER.put(s[0], s[1]); } //Asian birth place countries excluded from indirect identification based on names private static final List _BPC_EXCLUDE_ASIAN = Arrays.asList("MDV", "NPL", "BTN", "BGD", "LKA", "MMR", "XMS", "MYS", "SGP", "BND", "IDN", "TLS", "MNG"); //Hispanic birth place countries excluded from identification based on names private static final List _BPC_EXCLUDE_HISP = Arrays.asList("PRI", "MEX", "CUB", "DOM", "ZZC", "GTM", "HND", "SLV", "NIC", "CRI", "PAN", "ZZS", "COL", "VEN", "ECU", "PER", "BOL", "CHL", "ARG", "PRY", "URY", "ESP", "AND"); private static final List _SPANISH_ORIGIN_IS_SPANISH = Arrays.asList("1", "2", "3", "4", "5", "6", "8"); //lookups private static final Map _LKUP_NAPIIA_SURNAME_CENSUS_ASIAN = new HashMap<>(); private static final Map _LKUP_NAPIIA_SURNAME_CENSUS_PI = new HashMap<>(); private static final Map _LKUP_NAPIIA_SURNAME_LAUD = new HashMap<>(); private static final Map _LKUP_NAPIIA_GIVEN_LAUD_MALE = new HashMap<>(); private static final Map _LKUP_NAPIIA_GIVEN_LAUD_FEMALE = new HashMap<>(); private static final Map _LKUP_NAPIIA_SURNAME_NAACCR = new HashMap<>(); private static final Map _LKUP_NAPIIA_GIVEN_NAACCR = new HashMap<>(); // internal lock to control concurrency to the data private static final ReentrantReadWriteLock _LOCK = new ReentrantReadWriteLock(); private NapiiaUtils() { // no instances of this class allowed! } /** * Calculates the NAPIIA value for the provided Patient DTO. *

* The provided patient dto has a list of record input dto. The record Dto has the following parameters. *
    *
  • race1
  • *
  • race2
  • *
  • race3
  • *
  • race4
  • *
  • race5
  • *
  • spanishHispanicOrigin
  • *
  • birthplaceCountry
  • *
  • sex
  • *
  • nameLast
  • *
  • nameMaiden
  • *
  • nameBirthSurname
  • *
  • nameFirst
  • *
*

* This algorithm returns a result containing the calculated value, a boolean indication whether a human review is required, in which case * a reason is also provided. * @param patient Dto * @return the computed NAPIIA Results Dto, which has a calculated napiia value, a boolean which indicates whether a human review is needed or not and a reason if human review is required */ public static NapiiaResultsDto computeNapiia(NapiiaInputPatientDto patient) { NapiiaInputRecordDto input = new NapiiaInputRecordDto(); //Since the properties used to calculate napiia are the same for all record of a patient lets use one of them and build a record input dto if (patient != null && patient.getNapiiaInputPatientDtoList() != null && !patient.getNapiiaInputPatientDtoList().isEmpty()) input = patient.getNapiiaInputPatientDtoList().get(0); return computeNapiia(input); } /** * Calculates the NAPIIA value for the provided Record DTO. *

* The provided record Dto has the following parameters. *
    *
  • race1
  • *
  • race2
  • *
  • race3
  • *
  • race4
  • *
  • race5
  • *
  • spanishHispanicOrigin
  • *
  • birthplaceCountry
  • *
  • sex
  • *
  • nameLast
  • *
  • nameMaiden
  • *
  • nameBirthSurname
  • *
  • nameFirst
  • *
*

* This algorithm returns a result containing the calculated value, a boolean indication whether a human review is required, in which case * a reason is also provided. * @param input the NapiiaInputRecordDto input DTO object * @return the computed NAPIIA Results Dto, which has a calculated napiia value, a boolean which indicates whether a human review is needed or not and a reason if human review is required */ @SuppressWarnings({"ConstantConditions", "java:S3776"}) // method too complex public static NapiiaResultsDto computeNapiia(NapiiaInputRecordDto input) { NapiiaResultsDto napiiaResults = new NapiiaResultsDto(); //NPE if (input == null) return napiiaResults; String napiia = null; String reason = null; boolean runBirthPlaceCountries = false; boolean runNames = false; // get spanish/hispanic origin and race 1 String spanishOrigin = input.getSpanishHispanicOrigin(); String race1 = input.getRace1(); // gather a few stats List races = Arrays.asList(input.getRace1(), input.getRace2(), input.getRace3(), input.getRace4(), input.getRace5()); int r01 = 0; int r0203 = 0; int r0432 = 0; int r07 = 0; int r88 = 0; int r96 = 0; int r97 = 0; int r98 = 0; int r99 = 0; String r0203val = null; String r0432val = null; String r07val = null; for (String r : races) { if (r == null || r.equals("88") || r.trim().isEmpty()) r88++; else if (r.equals("99")) r99++; else if (r.equals("98")) r98++; else if (r.equals("97")) r97++; else if (r.equals("96")) r96++; else if (r.compareTo("04") >= 0 && r.compareTo("32") <= 0) { r0432++; if (r0432val == null) r0432val = r; if (r.equals("07")) { r07++; if (r07val == null) r07val = r; } } else if (r.equals("02") || r.equals("03")) { r0203++; if (r0203val == null) r0203val = r; } else if (r.equals("01")) r01++; } // step 1 - Identify cases containing race code 96 or 97 if (r96 > 0 || r97 > 0) { // step 1.1 - Single race code of 96 if (r96 == 1 && r88 == 4 && "96".equals(race1)) { napiia = "96"; if (!_SPANISH_ORIGIN_IS_SPANISH.contains(spanishOrigin)) runBirthPlaceCountries = true; } // step 1.2 - Single race code of 97 else if (r97 == 1 && r88 == 4 && "97".equals(race1)) { napiia = "97"; if (!_SPANISH_ORIGIN_IS_SPANISH.contains(spanishOrigin)) runBirthPlaceCountries = true; } // step 1.3 - Race code of 96 or 97 in combination with one or more other race codes else { // step 1.3.1 - One race code is 04-32, one race code is 96 or 97; others are blank or 88 if (r0432 == 1 && (r96 == 1 || r97 == 1) && r88 == 3) napiia = r0432val; // step 1.3.2 - One Race code is 07; one race code is 96 or 97; else if (r07 >= 1 && (r96 == 1 || r97 == 1)) napiia = r07val; // step 1.3.3 - More than one race code is 04-32; one race code is 96 or 97; others are blank or 88. else if (r0432 > 1 && (r96 == 1 || r97 == 1) && (r0432 + r88 == 4) && r07 == 0) reason = REASON_1_3_3; // step 1.3.4 - One race code is 01; one race code is 96 or 97; others are blank or 88 else if (r01 == 1 && (r96 == 1 || r97 == 1) && r88 == 3) { napiia = r96 == 1 ? "96" : "97"; //(#215) runBirthPlaceCountries = true; } // step 1.3.5 - One or more race codes is 02-03; one race code is 96 or 97; others are blank or 88 else if (r0203 >= 1 && (r96 == 1 || r97 == 1) && (r0203 + r88 == 4)) reason = REASON_1_3_5; // step 1.3.6 - One race code is 96; one race code is 97; others are any value else if (r96 == 1 && r97 == 1) reason = REASON_1_3_6; // step 1.3.7 - Any multiple race combination involving code 96 or 97 not listed above else reason = REASON_1_3_7; } } // step 2 - Directly code cases not containing code 96 or 97 else { // step 2.1 - Direct Code Single Race Cases if (((r01 == 1 || r0203 == 1 || r0432 == 1 || r98 == 1 || r99 == 1) && r88 == 4) || r99 == 5) napiia = race1; // step 2.2 - Multiple Race Cases else { // step 2.2.1 - One race code is 01, one race code is 02-32; others are blank or 88 if (r01 == 1 && (r0203 == 1 || r0432 == 1) && r88 == 3) napiia = r0203 == 1 ? r0203val : r0432val; // step 2.2.2 - One race code is 07, and at least one other race code in race2-race5 else if (r07 == 1 && r88 < 4) napiia = r07val; // step 2.2.3 - All other multiple race combinations else if (r88 < 4 || r88 == 5) // RACE1 will take precedence. reason = REASON_2_2_3; } } // step 3 - Indirect Identification Based on Birthplace String birthplaceCountry = input.getBirthplaceCountry(); if (runBirthPlaceCountries) { // step 3.1 - Included Asian and Pacific Island Birthplaces if (r96 > 0 && _BPC_MAP_ASIAN.containsKey(birthplaceCountry)) napiia = _BPC_MAP_ASIAN.get(birthplaceCountry); else if (r97 > 0 && _BPC_MAP_PACIFIC_ISLANDER.containsKey(birthplaceCountry)) napiia = _BPC_MAP_PACIFIC_ISLANDER.get(birthplaceCountry); // step 3.2 - Excluded Asian and Pacific Island Birthplaces // step 3.3 - Excluded Hispanic Birthplaces else if (_BPC_EXCLUDE_ASIAN.contains(birthplaceCountry) || _BPC_EXCLUDE_HISP.contains(birthplaceCountry)) { if (napiia == null) //(#215) napiia = race1; } else runNames = true; } // step 4 - Indirect Identification Based on Name String sex = input.getSex(); String nameResult = null; if (runNames) { if (_GENDER_MALE.equals(sex)) nameResult = applyMaleNameIdentification(input, r96 > 0, r97 > 0); else if (_GENDER_FEMALE.equals(sex)) nameResult = applyFemaleNameIdentification(input, r96 > 0, r97 > 0); else nameResult = applyNotMaleNotFemaleNameIdentification(input, r96 > 0, r97 > 0); } //Assign the run names result to the napiia value if it is not null if (nameResult != null) napiia = nameResult; // NAPIIA wasn't set and there were an error reason, should default to race1 if (reason != null && napiia == null) napiia = race1; // If result is still null or missing at this point, default to unknown if (napiia == null) napiia = ""; napiiaResults.setNapiiaValue(napiia); if (reason != null) { napiiaResults.setNeedsHumanReview(true); napiiaResults.setReasonForReview(reason); } return napiiaResults; } /** * Executes the male indirect identification based on name. * @return new NAPIIA code, possible the not-calculated code */ private static String applyMaleNameIdentification(NapiiaInputRecordDto input, boolean asianNOS, boolean pacificIslanderNOS) { Short result = null; String lastname = input.getNameLast(); String firstname = input.getNameFirst(); if (lastname != null && !lastname.trim().isEmpty()) { // M1 - Surname in Census surname if (asianNOS) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_CENSUS_ASIAN); if (pacificIslanderNOS && result == null) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_CENSUS_PI); // M2 - Surname in Lauderdale surname if (asianNOS && result == null) result = codeName(truncateName(lastname), _LKUP_NAPIIA_SURNAME_LAUD); // M3 - Surname in NAACCR surname if (asianNOS && result == null) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_NAACCR); } if (firstname != null && !firstname.trim().isEmpty()) { // M4 - Given name in Lauderdale given name if (asianNOS && result == null) result = codeName(truncateName(firstname), _LKUP_NAPIIA_GIVEN_LAUD_MALE); // M5 - Given name in NAACCR given name if (asianNOS && result == null) result = codeName(firstname, _LKUP_NAPIIA_GIVEN_NAACCR); } return result == null ? null : String.format("%02d", result); } /** * Executes the female indirect identification based on name. * @return new NAPIIA code, possible the not-calculated code */ private static String applyFemaleNameIdentification(NapiiaInputRecordDto input, boolean asianNOS, boolean pacificIslanderNOS) { Short result = null; String lastname = input.getNameLast(); String firstname = input.getNameFirst(); String birthSurname = StringUtils.isEmpty(input.getNameBirthSurname()) ? input.getNameMaiden() : input.getNameBirthSurname(); if (birthSurname == null || birthSurname.trim().isEmpty()) { if (lastname != null && !lastname.trim().isEmpty()) { // F2a - Surname in Census surname if (asianNOS) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_CENSUS_ASIAN); if (pacificIslanderNOS && result == null) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_CENSUS_PI); // F3a - Surname in Lauderdale surname if (asianNOS && result == null) result = codeName(truncateName(lastname), _LKUP_NAPIIA_SURNAME_LAUD); // F4a - Surname in NAACCR surname if (asianNOS && result == null) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_NAACCR); } if (firstname != null && !firstname.trim().isEmpty()) { // F5a - Given name in Lauderdale given name if (asianNOS && result == null) result = codeName(truncateName(firstname), _LKUP_NAPIIA_GIVEN_LAUD_FEMALE); // F6a - Given name in NAACCR given name if (asianNOS && result == null) result = codeName(firstname, _LKUP_NAPIIA_GIVEN_NAACCR); } } else { // F2b - Maiden in Census surname if (asianNOS) result = codeName(birthSurname, _LKUP_NAPIIA_SURNAME_CENSUS_ASIAN); if (pacificIslanderNOS && result == null) result = codeName(birthSurname, _LKUP_NAPIIA_SURNAME_CENSUS_PI); // F3b - Maiden name in Lauderdale surname if (asianNOS && result == null) result = codeName(truncateName(birthSurname), _LKUP_NAPIIA_SURNAME_LAUD); // F4b - Maiden name in NAACCR surname if (asianNOS && result == null) result = codeName(birthSurname, _LKUP_NAPIIA_SURNAME_NAACCR); if (firstname != null && !firstname.trim().isEmpty()) { // F5b - Given name in Lauderdale given name if (asianNOS && result == null) result = codeName(truncateName(firstname), _LKUP_NAPIIA_GIVEN_LAUD_FEMALE); // F6b - Given name in NAACCR given name if (asianNOS && result == null) result = codeName(firstname, _LKUP_NAPIIA_GIVEN_NAACCR); } if (lastname != null && !lastname.trim().isEmpty()) { // F7b - Surname in Census surname if (asianNOS && result == null) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_CENSUS_ASIAN); if (pacificIslanderNOS && result == null) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_CENSUS_PI); // F8b - Surname in Lauderdale surname if (asianNOS && result == null) result = codeName(truncateName(lastname), _LKUP_NAPIIA_SURNAME_LAUD); // F9b - Surname in NAACCR surname if (asianNOS && result == null) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_NAACCR); } } return result == null ? null : String.format("%02d", result); } /** * Executes the non-male-and-non-female indirect identification based on name. * @return new NAPIIA code, possible the not-calculated code */ private static String applyNotMaleNotFemaleNameIdentification(NapiiaInputRecordDto input, boolean asianNOS, boolean pacificIslanderNOS) { Short result = null; String lastname = input.getNameLast(); if (lastname != null && !lastname.trim().isEmpty()) { // NMF1 - Surname in Census surname if (asianNOS) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_CENSUS_ASIAN); if (pacificIslanderNOS && result == null) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_CENSUS_PI); // NMF2 - Surname in Lauderdale surname if (asianNOS && result == null) result = codeName(truncateName(lastname), _LKUP_NAPIIA_SURNAME_LAUD); // NMF3 - Surname in NAACCR surname if (asianNOS && result == null) result = codeName(lastname, _LKUP_NAPIIA_SURNAME_NAACCR); } return result == null ? null : String.format("%02d", result); } //Returns the name truncated to 12 characters. static String truncateName(String name) { if (name == null || name.length() <= 12) return name; else return name.substring(0, 12); } static Short codeName(String name, Map lkup) { if (name == null) return null; _LOCK.readLock().lock(); try { if (lkup.isEmpty()) { _LOCK.readLock().unlock(); _LOCK.writeLock().lock(); try { readNameData("napiia-census-asian.csv", _LKUP_NAPIIA_SURNAME_CENSUS_ASIAN); readNameData("napiia-census-pi.csv", _LKUP_NAPIIA_SURNAME_CENSUS_PI); readNameData("napiia-laud-surname.csv", _LKUP_NAPIIA_SURNAME_LAUD); readNameData("napiia-laud-given-male.csv", _LKUP_NAPIIA_GIVEN_LAUD_MALE); readNameData("napiia-laud-given-female.csv", _LKUP_NAPIIA_GIVEN_LAUD_FEMALE); readNameData("napiia-naaccr-surname.csv", _LKUP_NAPIIA_SURNAME_NAACCR); readNameData("napiia-naaccr-given.csv", _LKUP_NAPIIA_GIVEN_NAACCR); } finally { _LOCK.writeLock().unlock(); _LOCK.readLock().lock(); } } return lkup.get(name.toUpperCase()); } finally { _LOCK.readLock().unlock(); } } private static void readNameData(String file, Map map) { try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("napiia/" + file)) { if (is == null) throw new IllegalStateException("Unable to read internal " + file); try (CSVReader reader = new CSVReader(new InputStreamReader(is, StandardCharsets.US_ASCII))) { for (String[] row : reader.readAll()) map.put(row[0].toUpperCase(), Short.valueOf(row[1])); } } catch (CsvException | IOException e) { throw new IllegalStateException("Unable to read internal " + file, e); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy