Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright (C) 2013 Information Management Services, Inc.
*/
package com.imsweb.algorithms.nhia;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import com.opencsv.CSVReader;
/**
* This class is used to calculate the NHIA variable. More information can be found here:
* http://www.naaccr.org/Research/DataAnalysisTools.aspx
*
* This Java implementation is based ONLY on the SAS implementation of the algorithm; the PDF documentation was not accurate when
* this algorithm was implemented and therefore was not taken into account.
*/
public final class NhiaUtils {
public static final String ALG_NAME = "NAACCR Hispanic Identification Algorithm";
public static final String ALG_VERSION = "17";
public static final String ALG_INFO = "NHAPIIA v17 released in April 2017";
public static final String PROP_SPANISH_HISPANIC_ORIGIN = "spanishHispanicOrigin";
public static final String PROP_NAME_LAST = "nameLast";
public static final String PROP_NAME_MAIDEN = "nameMaiden";
public static final String PROP_BIRTH_PLACE_COUNTRY = "birthplaceCountry";
public static final String PROP_RACE1 = "race1";
public static final String PROP_SEX = "sex";
public static final String PROP_IHS = "ihs";
public static final String PROP_COUNTY_DX = "addressAtDxCounty";
public static final String PROP_STATE_DX = "addressAtDxState";
public static final String NHIA_NON_HISPANIC = "0";
public static final String NHIA_MEXICAN = "1";
public static final String NHIA_PUERTO_RICAN = "2";
public static final String NHIA_CUBAN = "3";
public static final String NHIA_SOUTH_CENTRAL_AMER = "4";
public static final String NHIA_OTHER_SPANISH = "5";
public static final String NHIA_SPANISH_NOS = "6";
public static final String NHIA_SURNAME_ONLY = "7";
public static final String NHIA_DOMINICAN = "8";
public static final String NHIA_OPTION_ALL_CASES = "0";
public static final String NHIA_OPTION_SEVEN_AND_NINE = "1";
public static final String NHIA_OPTION_SEVEN_ONLY = "2";
private static final String _SPAN_HISP_ORIG_NON_HISPANIC = "0";
private static final String _SPAN_HISP_ORIG_MEXICAN = "1";
private static final String _SPAN_HISP_ORIG_PUERTO_RICAN = "2";
private static final String _SPAN_HISP_ORIG_CUBAN = "3";
private static final String _SPAN_HISP_ORIG_SOUTH_CENTRAL_AMER = "4";
private static final String _SPAN_HISP_ORIG_OTHER_SPANISH = "5";
private static final String _SPAN_HISP_ORIG_SPANISH_NOS = "6";
private static final String _SPAN_HISP_ORIG_SURNAME_ONLY = "7";
private static final String _SPAN_HISP_ORIG_DOMINICAN = "8";
private static final String _SPAN_HISP_ORIG_UNKNOWN = "9";
private static final String _GENDER_MALE = "1";
private static final String _GENDER_FEMALE = "2";
// spanish/Hispanic origins for direct identification
private static final List _DIRECT_IDENTIFICATION_ORIGINS = Arrays.asList("1", "2", "3", "4", "5", "6", "8");
// spanish/Hispanic origins for indirect identification
private static final List _INDIRECT_IDENTIFICATION_ORIGINS = Arrays.asList("0", "6", "7", "9");
// birthplace countries corresponding to NHIA of NON-HISPANIC (called Low Probability of Hispanic Ethnicity in documentation)
private static final List _BPC_NON_HISP = Arrays.asList("VIR", "ASM", "KIR", "FSM", "COK", "TUV", "GUM", "MNP", "MHL", "TKL", "UMI", "BRA", "GUY", "SUR", "GUF", "GBR", "XEN", "ENG", "GGY",
"JEY", "IMN", "WLS", "SCT", "NIR", "IRL", "XSC", "ISL", "NOR", "SJM", "DNK", "FRO", "SWE", "FIN", "ALA", "XGR", "DEU", "NLD", "BEL", "LUX", "CHE", "AUT", "LIE", "FRA", "MCO", "PRT", "CPV",
"ITA", "SMR", "VAT", "ROU", "XSL", "POL", "CSK", "CZE", "SWK", "YUG", "BIH", "HRV", "MKD", "MNE", "SRB", "SVN", "BGR", "RUS", "XUM", "UKR", "MDA", "BLR", "EST", "LVA", "LTU", "GRC", "HUN",
"ALB", "GIB", "MLT", "CYP", "ZZE", "PHL");
// Birthplace countries corresponding to NHIA of MEXICAN (under the High Probability of Hispanic Ethnicity in documentation)
private static final List _BPC_MEXICAN = Collections.singletonList("MEX");
// birthplace countries corresponding to NHIA of PERTO-RICAN (under the High Probability of Hispanic Ethnicity in documentation)
private static final List _BPC_PUERTO_RICAN = Collections.singletonList("PRI");
// birthplace countries corresponding to NHIA of CUBAN (under the High Probability of Hispanic Ethnicity in documentation)
private static final List _BPC_CUBAN = Collections.singletonList("CUB");
// birthplace countries corresponding to NHIA of SOUTH-CENTRAL-AMERICAN (under the High Probability of Hispanic Ethnicity in documentation)
private static final List _BPC_SOUTH_CENTRAL_AMER = Arrays.asList("ZZC", "GTM", "HND", "SLV", "NIC", "CRI", "PAN", "ZZS", "COL", "VEN", "ECU", "PER", "BOL", "CHL", "ARG", "PRY", "URY");
// birthplace countries corresponding to NHIA of OTHER-SPANISH (under the High Probability of Hispanic Ethnicity in documentation)
private static final List _BPC_OTHER_SPANISH = Arrays.asList("ESP", "AND");
// birthplace countries corresponding to NHIA of DOMINICAN-REPUBLIC (under the High Probability of Hispanic Ethnicity in documentation)
private static final List _BPC_DOMINICAN_REP = Collections.singletonList("DOM");
// race being excluded from Indirect Identification
private static final List _RACE_EXCLUDED = Arrays.asList("03", "06", "07");
// special Asian and Pacific Islander
private static final List _RACE_PACIFIC = Arrays.asList("96", "97");
// cached lookups
private static Set _LOW_HISP_ETHN_COUNTIES;
private static Set _HEAVILY_HISPANIC_NAMES;
private static Set _RARELY_HISPANIC_NAMES;
/**
* Calculates the NHIA value for the provided record and option.
*
* The provided record doesn't need to contain all the input variables, but the algorithm wil use the following ones:
*
*
spanishHispanicOrigin (#190)
*
birthplaceCountry (#254)
*
race1 (#160)
*
ihs (#192)
*
addressAtDxState (#80)
*
addressAtDxCounty (#90)
*
sex (#220)
*
nameLast (#2230)
*
nameMaiden (#2390)
*
* All those properties are defined as constants in this class.
*
* Note that some of those properties are part of the full NAACCR Abstract; providing only Indicence information is not enough
* for this algorithm.
*
* The optiosn are also defined as constants in this class:
*
*
O: always apply the surname portion of the algorithm (corresponds to the 'All Records' option in the SAS algorithm
*
1: run the surname portion only if Spanish/Hispanic Origin is 7 or 9 (corresponds to the 'OPTION1' option in the SAS algorithm)
*
2: run the surname portion only if Spanish/Hispanic Origin is 7 and convert cases with a Spanish/Hispanic Origin of 9 to 0
* (corresponds to the 'OPTION2' option in the SAS algorithm)
*
* If you are not sure which option to provide, use NHIA_OPTION_SEVEN_AND_NINE since this is the default that the SAS algorithm uses.
* @param record a map of properties representing a NAACCR line
* @param option option indicating when to apply the Indirect Identification based on names for spanish/hispanic original values of 0, 7 and 9
* @return the computed NHIA value
*/
public static NhiaResultsDto computeNhia(Map record, String option) {
NhiaInputRecordDto input = new NhiaInputRecordDto();
input.setSpanishHispanicOrigin(record.get(PROP_SPANISH_HISPANIC_ORIGIN));
input.setBirthplaceCountry(record.get(PROP_BIRTH_PLACE_COUNTRY));
input.setSex(record.get(PROP_SEX));
input.setRace1(record.get(PROP_RACE1));
input.setIhs(record.get(PROP_IHS));
input.setNameLast(record.get(PROP_NAME_LAST));
input.setNameMaiden(record.get(PROP_NAME_MAIDEN));
input.setCountyAtDx(record.get(PROP_COUNTY_DX));
input.setStateAtDx(record.get(PROP_STATE_DX));
return computeNhia(input, option);
}
/**
* Calculates the NHIA value for the provided patient and option.
*
* The provided patient doesn't need to contain all the input variables, but the algorithm wil use the following ones:
*
*
spanishHispanicOrigin (#190)
*
birthplaceCountry (#254)
*
race1 (#160)
*
ihs (#192)
*
addressAtDxState (#80)
*
addressAtDxCounty (#90)
*
sex (#220)
*
nameLast (#2230)
*
nameMaiden (#2390)
*
* All those properties are defined as constants in this class.
*
* Note that some of those properties are part of the full NAACCR Abstract; providing only Indicence information is not enough
* for this algorithm.
*
* The optiosn are also defined as constants in this class:
*
*
O: always apply the surname portion of the algorithm (corresponds to the 'All Records' option in the SAS algorithm
*
1: run the surname portion only if Spanish/Hispanic Origin is 7 or 9 (corresponds to the 'OPTION1' option in the SAS algorithm)
*
2: run the surname portion only if Spanish/Hispanic Origin is 7 and convert cases with a Spanish/Hispanic Origin of 9 to 0
* (corresponds to the 'OPTION2' option in the SAS algorithm)
*
* If you are not sure which option to provide, use NHIA_OPTION_SEVEN_AND_NINE since this is the default that the SAS algorithm uses.
* @param patient a List of map of properties representing a NAACCR line
* @param option option indicating when to apply the Indirect Identification based on names for spanish/hispanic original values of 0, 7 and 9
* @return the computed NHIA value
*/
public static NhiaResultsDto computeNhia(List