
com.imsweb.algorithms.internal.Utils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of algorithms Show documentation
Show all versions of algorithms Show documentation
Java implementation of cancer-related algorithms (NHIA, NAPIIA, Survival Time, etc...)
The newest version!
/*
* Copyright (C) 2019 Information Management Services, Inc.
*/
package com.imsweb.algorithms.internal;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.regex.Pattern;
import org.apache.commons.lang3.Range;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import de.siegmar.fastcsv.reader.CsvReader;
import de.siegmar.fastcsv.reader.CsvRecord;
import de.siegmar.fastcsv.reader.NamedCsvRecord;
import com.imsweb.algorithms.AlgorithmInput;
import com.imsweb.algorithms.Algorithms;
public final class Utils {
private static final Pattern _SITE_PATTERN = Pattern.compile("[A-Z](\\d){0,3}");
private static final Pattern _HIST_PATTERN = Pattern.compile("\\d{4}");
private static final Pattern _HIST_RANGE_PATTERN = Pattern.compile("\\d{4}-\\d{4}");
private static final Pattern _BEH_PATTERN = Pattern.compile("\\d");
private static final Pattern _BEH_RANGE_PATTERN = Pattern.compile("\\d-\\d");
private Utils() {
// no instances of this class allowed!
}
public static void processInternalFile(String file, Consumer consumer) {
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(file)) {
if (is == null)
throw new IllegalStateException("Unable to find " + file);
try (Reader reader = new InputStreamReader(is, StandardCharsets.UTF_8); CsvReader csvReader = CsvReader.builder().ofNamedCsvRecord(reader)) {
csvReader.stream().forEach(consumer);
}
}
catch (IOException e) {
throw new IllegalStateException("Unable to read " + file, e);
}
}
public static void processInternalFileNoHeaders(String file, Consumer consumer) {
try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(file)) {
if (is == null)
throw new IllegalStateException("Unable to find " + file);
try (Reader reader = new InputStreamReader(is, StandardCharsets.UTF_8); CsvReader csvReader = CsvReader.builder().ofCsvRecord(reader)) {
csvReader.stream().forEach(consumer);
}
}
catch (IOException e) {
throw new IllegalStateException("Unable to read " + file, e);
}
}
/**
* Expands the provided string of sites into a list of sites, individual elements or ranges should be comma separated.
*
* Supports ranges (C120-C129) and optional last digits (C12 means C120-C129; C1 means C100-C199).
*
* Method is not case sensitive (c123 will be translated into C123).
*
* Created on Dec 19, 2011 by depryf
* @param sites string of sites
* @return the list of expanded sites, null if no site has been provided
*/
public static List expandSites(String sites) {
if (sites == null || sites.trim().isEmpty())
return null;
List result = new ArrayList<>();
for (String elem : StringUtils.split(StringUtils.replace(sites, " ", "").toUpperCase(), ',')) {
if (elem.isEmpty())
continue;
// range
if (elem.contains("-")) {
String[] parts = StringUtils.split(elem, '-');
String left = parts[0];
if (NumberUtils.isDigits(left))
left = "C" + elem;
String right = parts[1];
if (NumberUtils.isDigits(right))
right = "C" + elem;
if (_SITE_PATTERN.matcher(left).matches() && _SITE_PATTERN.matcher(right).matches()) {
String leftPrefix = elem.substring(0, 1);
String rightPrefix = elem.substring(0, 1);
if (leftPrefix.equals(rightPrefix)) {
String leftIndex = left.substring(1);
String rightIndex = right.substring(1);
int start = Integer.parseInt(StringUtils.rightPad(leftIndex, 3, "0"));
int end = Integer.parseInt(StringUtils.rightPad(rightIndex, 3, "9"));
for (int i = start; i <= end; i++)
result.add(leftPrefix + StringUtils.leftPad(String.valueOf(i), 3, "0"));
}
}
}
else {
if (NumberUtils.isDigits(elem))
elem = "C" + elem;
if (_SITE_PATTERN.matcher(elem).matches()) {
String prefix = elem.substring(0, 1);
String index = elem.substring(1);
int start = Integer.parseInt(StringUtils.rightPad(index, 3, "0"));
int end = Integer.parseInt(StringUtils.rightPad(index, 3, "9"));
for (int i = start; i <= end; i++)
result.add(prefix + StringUtils.leftPad(String.valueOf(i), 3, "0"));
}
}
}
return result;
}
/**
* Expands the provided primary sites as either individual integer codes, or as ranges.
* @param toExpand sites to expand
* @return expanded sites
*/
public static List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy