com.imsweb.algorithms.internal.CountryData Maven / Gradle / Ivy
/*
* Copyright (C) 2018 Information Management Services, Inc.
*/
package com.imsweb.algorithms.internal;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.zip.GZIPInputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
/**
* Several algorithms need to use data related to either states, counties or census trace codes.
* To optimize the memory usage, this class was introduced so those algorithms can use a shared
* data structure.
*
* This class is the root of the data structure which goes like this:
* CountryData -> StateData -> CountyData -> CensusData
* Algorithms can register data at either one of those levels.
*
* This class provides concurrency to safely register the data and use it in a thread-safe manner.
* But it is the responsibility of the algorithms to check that the data has been properly
* initialized before it's being accessed.
*/
@SuppressWarnings("BooleanMethodIsAlwaysInverted")
public class CountryData {
private static final String _SEER_CENSUS_DATA_FILE = "tract/tract-data.txt.gz";
//
private static final Map _TRACT_FIELDS = new LinkedHashMap<>();
static {
_TRACT_FIELDS.put("stateAbbreviation", 2);
_TRACT_FIELDS.put("countyFips", 3);
_TRACT_FIELDS.put("censusTract", 6);
_TRACT_FIELDS.put("yearData", 370);
_TRACT_FIELDS.put("ruca2000", 1);
_TRACT_FIELDS.put("ruca2010", 1);
_TRACT_FIELDS.put("uric2000", 1);
_TRACT_FIELDS.put("uric2010", 1);
_TRACT_FIELDS.put("cancerReportingZone", 10);
_TRACT_FIELDS.put("cancerReportingZoneTractCert", 1);
_TRACT_FIELDS.put("naaccrPovertyIndicator9504", 1);
_TRACT_FIELDS.put("naaccrPovertyIndicator0507", 1);
_TRACT_FIELDS.put("npcrEphtSubcounty5k", 11);
_TRACT_FIELDS.put("npcrEphtSubcounty20k", 11);
_TRACT_FIELDS.put("npcrEphtSubcounty50k", 11);
_TRACT_FIELDS.put("tractEstCongressDist", 2);
_TRACT_FIELDS.put("sviOverallStateBased", 5);
}
public static Map getTractFields() {
return Collections.unmodifiableMap(_TRACT_FIELDS);
}
private static final Map _TRACT_YEAR_BASED_FIELDS = new LinkedHashMap<>();
static {
_TRACT_YEAR_BASED_FIELDS.put("yostUsBasedQuintile", 1);
_TRACT_YEAR_BASED_FIELDS.put("yostStateBasedQuintile", 1);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyAllRaces", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyWhite", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyBlack", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyAmIndian", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyAsian", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyWhiteNotHisp", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyHisp", 5);
}
public static Map getTractYearBasedFields() {
return Collections.unmodifiableMap(_TRACT_YEAR_BASED_FIELDS);
}
public static final int TRACT_YEAR_MIN_VAL = 2008;
public static final int TRACT_YEAR_MAX_VAL = 2017;
// singleton instance
private static final CountryData _INSTANCE = new CountryData();
// unique access to the singleton
public static CountryData getInstance() {
return _INSTANCE;
}
// shared internal data structure; sates mapped by state abbreviation
private final Map _stateData = new HashMap<>();
// the states that had their census-related data initialized
private final Set _stateTractDataInitialized = new HashSet<>();
// the states that had their year-based census-related data initialized
private final Set _stateTractDataYearBasedInitialized = new HashSet<>();
// the states that had their Continuum 1993/2003/2013 data initialized
private final Set _continuumStateInitialized = new HashSet<>();
private boolean _countyAtDxAnalysisInitialized = false;
private boolean _prcdaInitialized = false;
private boolean _uihoInitialized = false;
// internal lock to control concurrency
private final ReentrantReadWriteLock _lock = new ReentrantReadWriteLock();
/**
* Unregister all data.
*/
public void uninitializeAllData() {
_lock.writeLock().lock();
try {
_stateData.clear();
_stateTractDataInitialized.clear();
_stateTractDataYearBasedInitialized.clear();
_continuumStateInitialized.clear();
_countyAtDxAnalysisInitialized = false;
_prcdaInitialized = false;
_uihoInitialized = false;
}
finally {
_lock.writeLock().unlock();
}
}
public boolean isTractDataInitialized(String requestedState) {
_lock.readLock().lock();
try {
return _stateTractDataInitialized.contains(requestedState);
}
finally {
_lock.readLock().unlock();
}
}
public void initializeTractData(String requestedState) {
_lock.writeLock().lock();
try {
if (!_stateTractDataInitialized.contains(requestedState)) {
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(_SEER_CENSUS_DATA_FILE)) {
if (is == null)
throw new IllegalStateException("Unable to get SEER census tract data file");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(is), StandardCharsets.US_ASCII))) {
String line = reader.readLine();
while (line != null) {
int index = 0;
Map values = new HashMap<>();
for (Entry entry : _TRACT_FIELDS.entrySet()) {
values.put(entry.getKey(), line.substring(index, index + entry.getValue()));
index += entry.getValue();
}
String state = values.get("stateAbbreviation");
String county = values.get("countyFips");
String tract = values.get("censusTract");
if (Objects.equals(state, requestedState)) {
StateData stateData = _stateData.computeIfAbsent(state, k -> new StateData());
CountyData countyData = stateData.getData().computeIfAbsent(county, k -> new CountyData());
CensusData censusData = countyData.getData().computeIfAbsent(tract, k -> new CensusData());
// NAACCR Poverty Indicator (only "old" years, the "recent" years are computed from the ACS poverty percentages for "all races")
censusData.setNaaccrPovertyIndicator9504(StringUtils.trimToNull(values.get("naaccrPovertyIndicator9504")));
censusData.setNaaccrPovertyIndicator0507(StringUtils.trimToNull(values.get("naaccrPovertyIndicator0507")));
// RUCA
censusData.setCommutingArea2000(Objects.toString(StringUtils.trimToNull(values.get("ruca2000")), "9"));
censusData.setCommutingArea2010(Objects.toString(StringUtils.trimToNull(values.get("ruca2010")), "9"));
// URIC
censusData.setIndicatorCode2000(Objects.toString(StringUtils.trimToNull(values.get("uric2000")), "9"));
censusData.setIndicatorCode2010(Objects.toString(StringUtils.trimToNull(values.get("uric2010")), "9"));
// NPCR EPHT SubCounty
censusData.setEpht2010GeoId5k(StringUtils.leftPad(StringUtils.trimToNull(values.get("npcrEphtSubcounty5k")), 11, '0'));
censusData.setEpht2010GeoId20k(StringUtils.leftPad(StringUtils.trimToNull(values.get("npcrEphtSubcounty20k")), 11, '0'));
censusData.setEpht2010GeoId50k(StringUtils.leftPad(StringUtils.trimToNull(values.get("npcrEphtSubcounty50k")), 11, '0'));
// Cancer Reporting Zone
censusData.setCancerReportingZone(StringUtils.trimToNull(values.get("cancerReportingZone")));
censusData.setCancerReportingZoneTractCert(StringUtils.trimToNull(values.get("cancerReportingZoneTractCert")));
// Tract-estimate Congressional Districts
censusData.setTractEstCongressDist(StringUtils.trimToNull(values.get("tractEstCongressDist")));
// Social Vulnerability Index (SVI)
censusData.setSviOverallStateBased(StringUtils.trimToNull(values.get("sviOverallStateBased")));
}
line = reader.readLine();
}
}
}
catch (IOException e) {
throw new IllegalStateException("Unable to initialize tract data", e);
}
}
_stateTractDataInitialized.add(requestedState);
}
finally {
_lock.writeLock().unlock();
}
}
public StateData getTractData(String state) {
_lock.readLock().lock();
try {
if (!isTractDataInitialized(state))
throw new IllegalStateException("Census tract data cannot be access before it has been initialized!");
return _stateData.get(state);
}
finally {
_lock.readLock().unlock();
}
}
public boolean isYearBasedTractDataInitialized(String requestedState) {
_lock.readLock().lock();
try {
return _stateTractDataYearBasedInitialized.contains(requestedState);
}
finally {
_lock.readLock().unlock();
}
}
public void initializeYearBasedTractData(String requestedState) {
_lock.writeLock().lock();
try {
if (!_stateTractDataYearBasedInitialized.contains(requestedState)) {
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(_SEER_CENSUS_DATA_FILE)) {
if (is == null)
throw new IllegalStateException("Unable to get year-based SEER census tract data file");
try (LineNumberReader reader = new LineNumberReader(new InputStreamReader(new GZIPInputStream(is), StandardCharsets.US_ASCII))) {
String line = reader.readLine();
while (line != null) {
int lineNum = reader.getLineNumber();
int index = 0;
Map values = new HashMap<>();
for (Entry entry : _TRACT_FIELDS.entrySet()) {
values.put(entry.getKey(), line.substring(index, index + entry.getValue()));
index += entry.getValue();
}
String state = values.get("stateAbbreviation");
String county = values.get("countyFips");
String tract = values.get("censusTract");
String rawYearData = values.get("yearData");
if (Objects.equals(state, requestedState)) {
StateData stateData = _stateData.computeIfAbsent(state, k -> new StateData());
CountyData countyData = stateData.getData().computeIfAbsent(county, k -> new CountyData());
CensusData censusData = countyData.getData().computeIfAbsent(tract, k -> new CensusData());
index = 0;
for (int year = TRACT_YEAR_MIN_VAL; year <= TRACT_YEAR_MAX_VAL; year++) {
YearData yearData = censusData.getData().computeIfAbsent(String.valueOf(year), k -> new YearData());
Map yearValues = new HashMap<>();
for (Entry entry : _TRACT_YEAR_BASED_FIELDS.entrySet()) {
yearValues.put(entry.getKey(), rawYearData.substring(index, index + entry.getValue()));
index += entry.getValue();
}
// YOST
yearData.setYostQuintileState(StringUtils.trim(yearValues.get("yostStateBasedQuintile")));
yearData.setYostQuintileUS(StringUtils.trim(yearValues.get("yostUsBasedQuintile")));
// ACS Poverty
yearData.setAcsPctPovAllRaces(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyAllRaces"))));
yearData.setAcsPctPovWhite(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyWhite"))));
yearData.setAcsPctPovBlack(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyBlack"))));
yearData.setAcsPctPovAIAN(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyAmIndian"))));
yearData.setAcsPctPovAsianNHOPI(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyAsian"))));
yearData.setAcsPctPovWhiteNonHisp(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyWhiteNotHisp"))));
yearData.setAcsPctPovHispanic(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyHisp"))));
}
}
line = reader.readLine();
}
}
}
catch (IOException e) {
throw new IllegalStateException("Unable to initialize year based tract data", e);
}
}
_stateTractDataYearBasedInitialized.add(requestedState);
}
finally {
_lock.writeLock().unlock();
}
}
private String cleanPoverty(int lineNum, String value) {
if (value.isEmpty())
return value;
if (value.length() != 5 || !NumberUtils.isDigits(value))
throw new IllegalStateException("Invalid ACS poverty value at line " + lineNum + ": " + value);
String left = value.substring(0, 3);
String right = value.substring(3);
if (left.startsWith("00"))
left = left.substring(2);
else if (left.startsWith("0"))
left = left.substring(1);
return left + "." + right;
}
public StateData getYearBasedTractData(String state) {
_lock.readLock().lock();
try {
if (!isYearBasedTractDataInitialized(state))
throw new IllegalStateException("Year-based census tract data cannot be access before it has been initialized!");
return _stateData.get(state);
}
finally {
_lock.readLock().unlock();
}
}
/**
* Returns requested state data to be used by the Continuum algorithm.
*/
public StateData getContinuumStateData(String state) {
_lock.readLock().lock();
try {
if (!_continuumStateInitialized.contains(state))
throw new IllegalStateException("Continuum data cannot be access before it has been initialized!");
return _stateData.get(state);
}
finally {
_lock.readLock().unlock();
}
}
/**
* Returns true if the Continuum data has been initialized, false otherwise.
*/
public boolean isContinuumDataInitialized(String requestedState) {
_lock.readLock().lock();
try {
return _continuumStateInitialized.contains(requestedState);
}
finally {
_lock.readLock().unlock();
}
}
/**
* Initializes the given Continuum data (this call will make all other access to the data structure block).
*/
public void initializeContinuumData(String requestedState, Map> data) {
_lock.writeLock().lock();
try {
if (!_continuumStateInitialized.contains(requestedState)) {
for (Map.Entry> stateEntry : data.entrySet()) {
if (!Objects.equals(stateEntry.getKey(), requestedState))
continue;
StateData stateData = _stateData.computeIfAbsent(stateEntry.getKey(), k -> new StateData());
for (Map.Entry countyEntry : stateEntry.getValue().entrySet()) {
CountyData countyData = stateData.getData().computeIfAbsent(countyEntry.getKey(), k -> new CountyData());
countyData.setUrbanContinuum1993(countyEntry.getValue().getUrbanContinuum1993());
countyData.setUrbanContinuum2003(countyEntry.getValue().getUrbanContinuum2003());
countyData.setUrbanContinuum2013(countyEntry.getValue().getUrbanContinuum2013());
}
}
}
_continuumStateInitialized.add(requestedState);
}
finally {
_lock.writeLock().unlock();
}
}
/**
* Returns requested state data to be used by the county at diagnosis analysis algorithm
*/
public StateData getCountyAtDxAnalysisData(String state) {
_lock.readLock().lock();
try {
if (!_countyAtDxAnalysisInitialized)
throw new IllegalStateException("County at diagnosis analysis data cannot be access before it has been initialized!");
return _stateData.get(state);
}
finally {
_lock.readLock().unlock();
}
}
public boolean isCountyAtDxAnalysisInitialized() {
_lock.readLock().lock();
try {
return _countyAtDxAnalysisInitialized;
}
finally {
_lock.readLock().unlock();
}
}
public void initializeCountyAtDxAnalysisData(Map> data) {
_lock.writeLock().lock();
try {
if (!_countyAtDxAnalysisInitialized) {
for (Map.Entry> stateEntry : data.entrySet()) {
StateData stateData = _stateData.computeIfAbsent(stateEntry.getKey(), k -> new StateData());
for (Map.Entry countyEntry : stateEntry.getValue().entrySet())
stateData.getData().computeIfAbsent(countyEntry.getKey(), k -> new CountyData());
}
}
_countyAtDxAnalysisInitialized = true;
}
finally {
_lock.writeLock().unlock();
}
}
/**
* Returns requested state data to be used by the PRCDA algorithm.
*/
public StateData getPrcdaData(String state) {
_lock.readLock().lock();
try {
if (!_prcdaInitialized)
throw new IllegalStateException("PRCDA data cannot be access before it has been initialized!");
return _stateData.get(state);
}
finally {
_lock.readLock().unlock();
}
}
/**
* Returns true if the PRCDA data has been initialized, false otherwise.
*/
public boolean isPrcdaDataInitialized() {
_lock.readLock().lock();
try {
return _prcdaInitialized;
}
finally {
_lock.readLock().unlock();
}
}
/**
* Initializes the given PRCDA data (this call will make all other access to the data structure block).
*/
public void initializePrcdaData(Map> data) {
_lock.writeLock().lock();
try {
if (!_prcdaInitialized) {
for (Map.Entry> stateEntry : data.entrySet()) {
StateData stateData = _stateData.computeIfAbsent(stateEntry.getKey(), k -> new StateData());
for (Map.Entry countyEntry : stateEntry.getValue().entrySet()) {
CountyData countyData = stateData.getData().computeIfAbsent(countyEntry.getKey(), k -> new CountyData());
countyData.setPrcda(countyEntry.getValue().getPrcda());
countyData.setPrcda2017(countyEntry.getValue().getPrcda2017());
}
}
}
_prcdaInitialized = true;
}
finally {
_lock.writeLock().unlock();
}
}
/**
* Returns requested state data to be used by the UIHO algorithm.
*/
public StateData getUihoData(String state) {
_lock.readLock().lock();
try {
if (!_uihoInitialized)
throw new IllegalStateException("UIHO data cannot be access before it has been initialized!");
return _stateData.get(state);
}
finally {
_lock.readLock().unlock();
}
}
/**
* Returns true if the UIHO data has been initialized, false otherwise.
*/
public boolean isUihoDataInitialized() {
_lock.readLock().lock();
try {
return _uihoInitialized;
}
finally {
_lock.readLock().unlock();
}
}
/**
* Initializes the given UIHO data (this call will make all other access to the data structure block).
*/
public void initializeUihoData(Map> data) {
_lock.writeLock().lock();
try {
if (!_uihoInitialized) {
for (Map.Entry> stateEntry : data.entrySet()) {
StateData stateData = _stateData.computeIfAbsent(stateEntry.getKey(), k -> new StateData());
for (Map.Entry countyEntry : stateEntry.getValue().entrySet()) {
CountyData countyData = stateData.getData().computeIfAbsent(countyEntry.getKey(), k -> new CountyData());
countyData.setUiho(countyEntry.getValue().getUiho());
countyData.setUihoCity(countyEntry.getValue().getUihoCity());
}
}
}
_uihoInitialized = true;
}
finally {
_lock.writeLock().unlock();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy