com.imsweb.algorithms.internal.CountryData Maven / Gradle / Ivy
* Copyright (C) 2018 Information Management Services, Inc.
package com.imsweb.algorithms.internal;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
* Several algorithms need to use data related to either states, counties or census trace codes.
* To optimize the memory usage, this class was introduced so those algorithms can use a shared
* data structure.
* This class is the root of the data structure which goes like this:
* CountryData -> StateData -> CountyData -> CensusData
* Algorithms can register data at either one of those levels.
* This class provides concurrency to safely register the data and use it in a thread-safe manner.
* But it is the responsibility of the algorithms to check that the data has been properly
* initialized before it's being accessed.
public class CountryData {
private static final String _SEER_CENSUS_DATA_FILE = "tract/tract-data.txt.gz";
private static final Map _TRACT_FIELDS = new LinkedHashMap<>();
static {
_TRACT_FIELDS.put("stateAbbreviation", 2);
_TRACT_FIELDS.put("countyFips", 3);
_TRACT_FIELDS.put("censusTract", 6);
_TRACT_FIELDS.put("yearData", 370);
_TRACT_FIELDS.put("ruca2000", 1);
_TRACT_FIELDS.put("ruca2010", 1);
_TRACT_FIELDS.put("uric2000", 1);
_TRACT_FIELDS.put("uric2010", 1);
_TRACT_FIELDS.put("cancerReportingZone", 10);
_TRACT_FIELDS.put("cancerReportingZoneTractCert", 1);
_TRACT_FIELDS.put("naaccrPovertyIndicator9504", 1);
_TRACT_FIELDS.put("naaccrPovertyIndicator0507", 1);
_TRACT_FIELDS.put("npcrEphtSubcounty5k", 11);
_TRACT_FIELDS.put("npcrEphtSubcounty20k", 11);
_TRACT_FIELDS.put("npcrEphtSubcounty50k", 11);
_TRACT_FIELDS.put("tractEstCongressDist", 2);
_TRACT_FIELDS.put("sviOverallStateBased", 5);
public static Map getTractFields() {
return Collections.unmodifiableMap(_TRACT_FIELDS);
private static final Map _TRACT_YEAR_BASED_FIELDS = new LinkedHashMap<>();
static {
_TRACT_YEAR_BASED_FIELDS.put("yostUsBasedQuintile", 1);
_TRACT_YEAR_BASED_FIELDS.put("yostStateBasedQuintile", 1);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyAllRaces", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyWhite", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyBlack", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyAmIndian", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyAsian", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyWhiteNotHisp", 5);
_TRACT_YEAR_BASED_FIELDS.put("percentBelowPovertyHisp", 5);
public static Map getTractYearBasedFields() {
return Collections.unmodifiableMap(_TRACT_YEAR_BASED_FIELDS);
public static final int TRACT_YEAR_MIN_VAL = 2008;
public static final int TRACT_YEAR_MAX_VAL = 2017;
// singleton instance
private static final CountryData _INSTANCE = new CountryData();
// unique access to the singleton
public static CountryData getInstance() {
return _INSTANCE;
// shared internal data structure; sates mapped by state abbreviation
private final Map _stateData = new HashMap<>();
// the states that had their census-related data initialized
private final Set _stateTractDataInitialized = new HashSet<>();
// the states that had their year-based census-related data initialized
private final Set _stateTractDataYearBasedInitialized = new HashSet<>();
// the states that had their Continuum 1993/2003/2013 data initialized
private final Set _continuumStateInitialized = new HashSet<>();
private boolean _countyAtDxAnalysisInitialized = false;
private boolean _prcdaInitialized = false;
private boolean _uihoInitialized = false;
// internal lock to control concurrency
private final ReentrantReadWriteLock _lock = new ReentrantReadWriteLock();
* Unregister all data.
public void uninitializeAllData() {
try {
_countyAtDxAnalysisInitialized = false;
_prcdaInitialized = false;
_uihoInitialized = false;
finally {
public boolean isTractDataInitialized(String requestedState) {
try {
return _stateTractDataInitialized.contains(requestedState);
finally {
public void initializeTractData(String requestedState) {
try {
if (!_stateTractDataInitialized.contains(requestedState)) {
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(_SEER_CENSUS_DATA_FILE)) {
if (is == null)
throw new IllegalStateException("Unable to get SEER census tract data file");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(is), StandardCharsets.US_ASCII))) {
String line = reader.readLine();
while (line != null) {
int index = 0;
Map values = new HashMap<>();
for (Entry entry : _TRACT_FIELDS.entrySet()) {
values.put(entry.getKey(), line.substring(index, index + entry.getValue()));
index += entry.getValue();
String state = values.get("stateAbbreviation");
String county = values.get("countyFips");
String tract = values.get("censusTract");
if (Objects.equals(state, requestedState)) {
StateData stateData = _stateData.computeIfAbsent(state, k -> new StateData());
CountyData countyData = stateData.getData().computeIfAbsent(county, k -> new CountyData());
CensusData censusData = countyData.getData().computeIfAbsent(tract, k -> new CensusData());
// NAACCR Poverty Indicator (only "old" years, the "recent" years are computed from the ACS poverty percentages for "all races")
censusData.setCommutingArea2000(Objects.toString(StringUtils.trimToNull(values.get("ruca2000")), "9"));
censusData.setCommutingArea2010(Objects.toString(StringUtils.trimToNull(values.get("ruca2010")), "9"));
censusData.setIndicatorCode2000(Objects.toString(StringUtils.trimToNull(values.get("uric2000")), "9"));
censusData.setIndicatorCode2010(Objects.toString(StringUtils.trimToNull(values.get("uric2010")), "9"));
// NPCR EPHT SubCounty
censusData.setEpht2010GeoId5k(StringUtils.leftPad(StringUtils.trimToNull(values.get("npcrEphtSubcounty5k")), 11, '0'));
censusData.setEpht2010GeoId20k(StringUtils.leftPad(StringUtils.trimToNull(values.get("npcrEphtSubcounty20k")), 11, '0'));
censusData.setEpht2010GeoId50k(StringUtils.leftPad(StringUtils.trimToNull(values.get("npcrEphtSubcounty50k")), 11, '0'));
// Cancer Reporting Zone
// Tract-estimate Congressional Districts
// Social Vulnerability Index (SVI)
line = reader.readLine();
catch (IOException e) {
throw new IllegalStateException("Unable to initialize tract data", e);
finally {
public StateData getTractData(String state) {
try {
if (!isTractDataInitialized(state))
throw new IllegalStateException("Census tract data cannot be access before it has been initialized!");
return _stateData.get(state);
finally {
public boolean isYearBasedTractDataInitialized(String requestedState) {
try {
return _stateTractDataYearBasedInitialized.contains(requestedState);
finally {
public void initializeYearBasedTractData(String requestedState) {
try {
if (!_stateTractDataYearBasedInitialized.contains(requestedState)) {
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(_SEER_CENSUS_DATA_FILE)) {
if (is == null)
throw new IllegalStateException("Unable to get year-based SEER census tract data file");
try (LineNumberReader reader = new LineNumberReader(new InputStreamReader(new GZIPInputStream(is), StandardCharsets.US_ASCII))) {
String line = reader.readLine();
while (line != null) {
int lineNum = reader.getLineNumber();
int index = 0;
Map values = new HashMap<>();
for (Entry entry : _TRACT_FIELDS.entrySet()) {
values.put(entry.getKey(), line.substring(index, index + entry.getValue()));
index += entry.getValue();
String state = values.get("stateAbbreviation");
String county = values.get("countyFips");
String tract = values.get("censusTract");
String rawYearData = values.get("yearData");
if (Objects.equals(state, requestedState)) {
StateData stateData = _stateData.computeIfAbsent(state, k -> new StateData());
CountyData countyData = stateData.getData().computeIfAbsent(county, k -> new CountyData());
CensusData censusData = countyData.getData().computeIfAbsent(tract, k -> new CensusData());
index = 0;
for (int year = TRACT_YEAR_MIN_VAL; year <= TRACT_YEAR_MAX_VAL; year++) {
YearData yearData = censusData.getData().computeIfAbsent(String.valueOf(year), k -> new YearData());
Map yearValues = new HashMap<>();
for (Entry entry : _TRACT_YEAR_BASED_FIELDS.entrySet()) {
yearValues.put(entry.getKey(), rawYearData.substring(index, index + entry.getValue()));
index += entry.getValue();
// ACS Poverty
yearData.setAcsPctPovAllRaces(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyAllRaces"))));
yearData.setAcsPctPovWhite(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyWhite"))));
yearData.setAcsPctPovBlack(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyBlack"))));
yearData.setAcsPctPovAIAN(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyAmIndian"))));
yearData.setAcsPctPovAsianNHOPI(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyAsian"))));
yearData.setAcsPctPovWhiteNonHisp(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyWhiteNotHisp"))));
yearData.setAcsPctPovHispanic(cleanPoverty(lineNum, StringUtils.trim(yearValues.get("percentBelowPovertyHisp"))));
line = reader.readLine();
catch (IOException e) {
throw new IllegalStateException("Unable to initialize year based tract data", e);
finally {
private String cleanPoverty(int lineNum, String value) {
if (value.isEmpty())
return value;
if (value.length() != 5 || !NumberUtils.isDigits(value))
throw new IllegalStateException("Invalid ACS poverty value at line " + lineNum + ": " + value);
String left = value.substring(0, 3);
String right = value.substring(3);
if (left.startsWith("00"))
left = left.substring(2);
else if (left.startsWith("0"))
left = left.substring(1);
return left + "." + right;
public StateData getYearBasedTractData(String state) {
try {
if (!isYearBasedTractDataInitialized(state))
throw new IllegalStateException("Year-based census tract data cannot be access before it has been initialized!");
return _stateData.get(state);
finally {
* Returns requested state data to be used by the Continuum algorithm.
public StateData getContinuumStateData(String state) {
try {
if (!_continuumStateInitialized.contains(state))
throw new IllegalStateException("Continuum data cannot be access before it has been initialized!");
return _stateData.get(state);
finally {
* Returns true if the Continuum data has been initialized, false otherwise.
public boolean isContinuumDataInitialized(String requestedState) {
try {
return _continuumStateInitialized.contains(requestedState);
finally {
* Initializes the given Continuum data (this call will make all other access to the data structure block).
public void initializeContinuumData(String requestedState, Map> data) {
try {
if (!_continuumStateInitialized.contains(requestedState)) {
for (Map.Entry> stateEntry : data.entrySet()) {
if (!Objects.equals(stateEntry.getKey(), requestedState))
StateData stateData = _stateData.computeIfAbsent(stateEntry.getKey(), k -> new StateData());
for (Map.Entry countyEntry : stateEntry.getValue().entrySet()) {
CountyData countyData = stateData.getData().computeIfAbsent(countyEntry.getKey(), k -> new CountyData());
finally {
* Returns requested state data to be used by the county at diagnosis analysis algorithm
public StateData getCountyAtDxAnalysisData(String state) {
try {
if (!_countyAtDxAnalysisInitialized)
throw new IllegalStateException("County at diagnosis analysis data cannot be access before it has been initialized!");
return _stateData.get(state);
finally {
public boolean isCountyAtDxAnalysisInitialized() {
try {
return _countyAtDxAnalysisInitialized;
finally {
public void initializeCountyAtDxAnalysisData(Map> data) {
try {
if (!_countyAtDxAnalysisInitialized) {
for (Map.Entry> stateEntry : data.entrySet()) {
StateData stateData = _stateData.computeIfAbsent(stateEntry.getKey(), k -> new StateData());
for (Map.Entry countyEntry : stateEntry.getValue().entrySet())
stateData.getData().computeIfAbsent(countyEntry.getKey(), k -> new CountyData());
_countyAtDxAnalysisInitialized = true;
finally {
* Returns requested state data to be used by the PRCDA algorithm.
public StateData getPrcdaData(String state) {
try {
if (!_prcdaInitialized)
throw new IllegalStateException("PRCDA data cannot be access before it has been initialized!");
return _stateData.get(state);
finally {
* Returns true if the PRCDA data has been initialized, false otherwise.
public boolean isPrcdaDataInitialized() {
try {
return _prcdaInitialized;
finally {
* Initializes the given PRCDA data (this call will make all other access to the data structure block).
public void initializePrcdaData(Map> data) {
try {
if (!_prcdaInitialized) {
for (Map.Entry> stateEntry : data.entrySet()) {
StateData stateData = _stateData.computeIfAbsent(stateEntry.getKey(), k -> new StateData());
for (Map.Entry countyEntry : stateEntry.getValue().entrySet()) {
CountyData countyData = stateData.getData().computeIfAbsent(countyEntry.getKey(), k -> new CountyData());
_prcdaInitialized = true;
finally {
* Returns requested state data to be used by the UIHO algorithm.
public StateData getUihoData(String state) {
try {
if (!_uihoInitialized)
throw new IllegalStateException("UIHO data cannot be access before it has been initialized!");
return _stateData.get(state);
finally {
* Returns true if the UIHO data has been initialized, false otherwise.
public boolean isUihoDataInitialized() {
try {
return _uihoInitialized;
finally {
* Initializes the given UIHO data (this call will make all other access to the data structure block).
public void initializeUihoData(Map> data) {
try {
if (!_uihoInitialized) {
for (Map.Entry> stateEntry : data.entrySet()) {
StateData stateData = _stateData.computeIfAbsent(stateEntry.getKey(), k -> new StateData());
for (Map.Entry countyEntry : stateEntry.getValue().entrySet()) {
CountyData countyData = stateData.getData().computeIfAbsent(countyEntry.getKey(), k -> new CountyData());
_uihoInitialized = true;
finally {
© 2015 - 2025 Weber Informatics LLC | Privacy Policy