org.kuali.ole.utility.callnumber.Utils Maven / Gradle / Ivy
The newest version!
package org.kuali.ole.utility.callnumber;
/**
* Created with IntelliJ IDEA.
* User: ?
* Date: 19/2/13
* Time: 9:23 PM
* To change this template use File | Settings | File Templates.
*/
import org.marc4j.marc.DataField;
import org.marc4j.marc.Record;
import org.marc4j.marc.Subfield;
import org.marc4j.marc.VariableField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* General utility functions for solrmarc
*
* @author Wayne Graham
* @version $Id: Utils.java 1612 2012-03-21 17:39:16Z [email protected] $
*/
public final class Utils {
private final static Pattern FOUR_DIGIT_PATTERN_BRACES = Pattern.compile("\\[[12]\\d{3,3}\\]");
private final static Pattern FOUR_DIGIT_PATTERN_ONE_BRACE = Pattern.compile("\\[[12]\\d{3,3}");
private final static Pattern FOUR_DIGIT_PATTERN_STARTING_WITH_1_2 = Pattern.compile("(20|19|18|17|16|15)[0-9][0-9]");
private final static Pattern FOUR_DIGIT_PATTERN_OTHER_1 = Pattern.compile("l\\d{3,3}");
private final static Pattern FOUR_DIGIT_PATTERN_OTHER_2 = Pattern.compile("\\[19\\]\\d{2,2}");
private final static Pattern FOUR_DIGIT_PATTERN_OTHER_3 = Pattern.compile("(20|19|18|17|16|15)[0-9][-?0-9]");
private final static Pattern FOUR_DIGIT_PATTERN_OTHER_4 = Pattern.compile("i.e. (20|19|18|17|16|15)[0-9][0-9]");
private final static Pattern BC_DATE_PATTERN = Pattern.compile("[0-9]+ [Bb][.]?[Cc][.]?");
private final static Pattern FOUR_DIGIT_PATTERN = Pattern.compile("\\d{4,4}");
private static Matcher matcher;
private static Matcher matcher_braces;
private static Matcher matcher_one_brace;
private static Matcher matcher_start_with_1_2;
private static Matcher matcher_l_plus_three_digits;
private static Matcher matcher_bracket_19_plus_two_digits;
private static Matcher matcher_ie_date;
private static Matcher matcher_bc_date;
private static Matcher matcher_three_digits_plus_unk;
private final static DecimalFormat timeFormat = new DecimalFormat("00.00");
protected static Logger logger = LoggerFactory.getLogger(Utils.class);
/**
* Default Constructor
* It's private, so it can't be instantiated by other objects
*/
private Utils() {
}
/**
* Check first for a particular property in the System Properties, so that the -Dprop="value" command line arg
* mechanism can be used to override values defined in the passed in property file. This is especially useful
* for defining the marc.source property to define which file to operate on, in a shell script loop.
*
* @param props - property set in which to look.
* @param propname - name of the property to lookup.
* @returns String - value stored for that property (or null if it doesn't exist)
*/
public static String getProperty(Properties props, String propname) {
return getProperty(props, propname, null);
}
/**
* Check first for a particular property in the System Properties, so that the -Dprop="value" command line arg
* mechanism can be used to override values defined in the passed in property file. This is especially useful
* for defining the marc.source property to define which file to operate on, in a shell script loop.
*
* @param props - property set in which to look.
* @param propname - name of the property to lookup.
* @param defVal - the default value to use if property is not defined
* @returns String - value stored for that property (or the if it doesn't exist)
*/
public static String getProperty(Properties props, String propname, String defVal) {
String prop;
if ((prop = System.getProperty(propname)) != null) {
return (prop);
}
if (props != null && (prop = props.getProperty(propname)) != null) {
return (prop);
}
return defVal;
}
/**
* load a properties file into a Properties object
*
* @param propertyPaths the directories to search for the properties file
* @param propertyFileName name of the sought properties file
* @return Properties object
*/
public static Properties loadProperties(String propertyPaths[], String propertyFileName) {
return (loadProperties(propertyPaths, propertyFileName, false, null));
}
/**
* load a properties file into a Properties object
*
* @param propertyPaths the directories to search for the properties file
* @param propertyFileName name of the sought properties file
* @return Properties object
*/
public static Properties loadProperties(String propertyPaths[], String propertyFileName, boolean showName) {
return (loadProperties(propertyPaths, propertyFileName, showName, null));
}
/**
* load a properties file into a Properties object
*
* @param fullFilenameURLStr String representation of url to properties file whether it is in a local file or a resource
* @return Properties object
*/
public static Properties loadProperties(String fullFilenameURLStr) {
InputStream in = getPropertyFileInputStream(fullFilenameURLStr);
String errmsg = "Fatal error: Unable to find specified properties file: " + fullFilenameURLStr;
// load the properties
Properties props = new Properties();
try {
if (fullFilenameURLStr.endsWith(".xml") || fullFilenameURLStr.endsWith(".XML")) {
props.loadFromXML(in);
} else {
props.load(in);
}
in.close();
} catch (IOException e) {
throw new IllegalArgumentException(errmsg);
}
return props;
}
/**
* load a properties file into a Properties object
*
* @param propertyPaths the directories to search for the properties file
* @param propertyFileName name of the sought properties file
* @param showName whether the name of the file/resource being read should be shown.
* @return Properties object
*/
public static Properties loadProperties(String propertyPaths[], String propertyFileName, boolean showName, String filenameProperty) {
String inputStreamSource[] = new String[]{null};
InputStream in = getPropertyFileInputStream(propertyPaths, propertyFileName, showName, inputStreamSource);
String errmsg = "Fatal error: Unable to find specified properties file: " + propertyFileName;
// load the properties
Properties props = new Properties();
try {
if (propertyFileName.endsWith(".xml") || propertyFileName.endsWith(".XML")) {
props.loadFromXML(in);
} else {
props.load(in);
}
in.close();
if (filenameProperty != null && inputStreamSource[0] != null) {
File tmpFile = new File(inputStreamSource[0]);
props.setProperty(filenameProperty, tmpFile.getParent());
}
} catch (IOException e) {
throw new IllegalArgumentException(errmsg);
}
return props;
}
public static InputStream getPropertyFileInputStream(String[] propertyPaths, String propertyFileName) {
return (getPropertyFileInputStream(propertyPaths, propertyFileName, false));
}
public static InputStream getPropertyFileInputStream(String[] propertyPaths, String propertyFileName, boolean showName) {
return (getPropertyFileInputStream(propertyPaths, propertyFileName, false, null));
}
public static InputStream getPropertyFileInputStream(String propertyFileURLStr) {
InputStream in = null;
String errmsg = "Fatal error: Unable to open specified properties file: " + propertyFileURLStr;
try {
URL url = new URL(propertyFileURLStr);
in = url.openStream();
} catch (IOException e) {
throw new IllegalArgumentException(errmsg);
}
return (in);
}
public static InputStream getPropertyFileInputStream(String[] propertyPaths, String propertyFileName, boolean showName, String inputSource[]) {
InputStream in = null;
String fullPropertyFileURLStr = getPropertyFileAbsoluteURL(propertyPaths, propertyFileName, showName, inputSource);
return (getPropertyFileInputStream(fullPropertyFileURLStr));
}
// String verboseStr = System.getProperty("marc.test.verbose");
// boolean verbose = (verboseStr != null && verboseStr.equalsIgnoreCase("true"));
// String lookedIn = "";
// if (propertyPaths != null)
// {
// File propertyFile = new File(propertyFileName);
// int pathCnt = 0;
// do
// {
// if (propertyFile.exists() && propertyFile.isFile() && propertyFile.canRead())
// {
// try
// {
// in = new FileInputStream(propertyFile);
// if (inputSource != null && inputSource.length >= 1)
// {
// inputSource[0] = propertyFile.getAbsolutePath();
// }
// if (showName)
// logger.info("Opening file: "+ propertyFile.getAbsolutePath());
// else
// logger.debug("Opening file: "+ propertyFile.getAbsolutePath());
// }
// catch (FileNotFoundException e)
// {
// // simply eat this exception since we should only try to open the file if we previously
// // determined that the file exists and is readable.
// }
// break; // we found it!
// }
// if (verbose) lookedIn = lookedIn + propertyFile.getAbsolutePath() + "\n";
// if (propertyPaths != null && pathCnt < propertyPaths.length)
// {
// propertyFile = new File(propertyPaths[pathCnt], propertyFileName);
// }
// pathCnt++;
// } while (propertyPaths != null && pathCnt <= propertyPaths.length);
// }
// // if we didn't find it as a file, look for it as a URL
// String errmsg = "Fatal error: Unable to find specified properties file: " + propertyFileName;
// if (verbose) errmsg = errmsg + "\n Looked in: "+ lookedIn;
// if (in == null)
// {
// Utils utilObj = new Utils();
// URL url = utilObj.getClass().getClassLoader().getResource(propertyFileName);
// if (url == null)
// url = utilObj.getClass().getResource("/" + propertyFileName);
// if (url == null)
// {
// logger.error(errmsg);
// throw new IllegalArgumentException(errmsg);
// }
// if (showName)
// logger.info("Opening resource via URL: "+ url.toString());
// else
// logger.debug("Opening resource via URL: "+ url.toString());
//
///*
// if (url == null)
// url = utilObj.getClass().getClassLoader().getResource(propertyPath + "/" + propertyFileName);
// if (url == null)
// url = utilObj.getClass().getResource("/" + propertyPath + "/" + propertyFileName);
//*/
// if (url != null)
// {
// try
// {
// in = url.openStream();
// }
// catch (IOException e)
// {
// throw new IllegalArgumentException(errmsg);
// }
// }
// }
// return(in);
// }
public static String getPropertyFileAbsoluteURL(String[] propertyPaths, String propertyFileName, boolean showName, String inputSource[]) {
InputStream in = null;
// look for properties file in paths
String verboseStr = System.getProperty("marc.test.verbose");
boolean verbose = (verboseStr != null && verboseStr.equalsIgnoreCase("true"));
String lookedIn = "";
String fullPathName = null;
if (propertyPaths != null) {
File propertyFile = new File(propertyFileName);
int pathCnt = 0;
do {
if (propertyFile.exists() && propertyFile.isFile() && propertyFile.canRead()) {
try {
fullPathName = propertyFile.toURI().toURL().toExternalForm();
if (inputSource != null && inputSource.length >= 1) {
inputSource[0] = propertyFile.getAbsolutePath();
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (showName)
logger.info("Opening file: " + propertyFile.getAbsolutePath());
else
logger.debug("Opening file: " + propertyFile.getAbsolutePath());
break; // we found it!
}
if (verbose) lookedIn = lookedIn + propertyFile.getAbsolutePath() + "\n";
if (propertyPaths != null && pathCnt < propertyPaths.length) {
propertyFile = new File(propertyPaths[pathCnt], propertyFileName);
}
pathCnt++;
} while (propertyPaths != null && pathCnt <= propertyPaths.length);
}
// if we didn't find it as a file, look for it as a URL
String errmsg = "Fatal error: Unable to find specified properties file: " + propertyFileName;
if (verbose) errmsg = errmsg + "\n Looked in: " + lookedIn;
if (fullPathName == null) {
Utils utilObj = new Utils();
URL url = utilObj.getClass().getClassLoader().getResource(propertyFileName);
if (url == null)
url = utilObj.getClass().getResource("/" + propertyFileName);
if (url == null) {
logger.error(errmsg);
throw new IllegalArgumentException(errmsg);
}
if (showName)
logger.info("Opening resource via URL: " + url.toString());
else
logger.debug("Opening resource via URL: " + url.toString());
/*
if (url == null)
url = utilObj.getClass().getClassLoader().getResource(propertyPath + "/" + propertyFileName);
if (url == null)
url = utilObj.getClass().getResource("/" + propertyPath + "/" + propertyFileName);
*/
fullPathName = url.toExternalForm();
}
return (fullPathName);
}
/**
* Takes an InputStream, reads the entire contents into a String
*
* @param stream - the stream to read in.
* @return String containing entire contents of stream.
*/
public static String readStreamIntoString(InputStream stream) throws IOException {
Reader in = new BufferedReader(new InputStreamReader(stream));
StringBuilder sb = new StringBuilder();
char[] chars = new char[4096];
int length;
while ((length = in.read(chars)) > 0) {
sb.append(chars, 0, length);
}
return sb.toString();
}
/**
* Cleans non-digits from a String
*
* @param date String to parse
* @return Numeric part of date String (or null)
*/
public static String cleanDate(final String date) {
matcher_braces = FOUR_DIGIT_PATTERN_BRACES.matcher(date);
matcher_one_brace = FOUR_DIGIT_PATTERN_ONE_BRACE.matcher(date);
matcher_start_with_1_2 = FOUR_DIGIT_PATTERN_STARTING_WITH_1_2.matcher(date);
matcher_l_plus_three_digits = FOUR_DIGIT_PATTERN_OTHER_1.matcher(date);
matcher_bracket_19_plus_two_digits = FOUR_DIGIT_PATTERN_OTHER_2.matcher(date);
matcher_three_digits_plus_unk = FOUR_DIGIT_PATTERN_OTHER_3.matcher(date);
matcher_ie_date = FOUR_DIGIT_PATTERN_OTHER_4.matcher(date);
matcher = FOUR_DIGIT_PATTERN.matcher(date);
matcher_bc_date = BC_DATE_PATTERN.matcher(date);
String cleanDate = null; // raises DD-anomaly
if (matcher_braces.find()) {
cleanDate = matcher_braces.group();
cleanDate = Utils.removeOuterBrackets(cleanDate);
if (matcher.find()) {
String tmp = matcher.group();
if (!tmp.equals(cleanDate)) {
tmp = "" + tmp;
}
}
} else if (matcher_ie_date.find()) {
cleanDate = matcher_ie_date.group().replaceAll("i.e. ", "");
} else if (matcher_one_brace.find()) {
cleanDate = matcher_one_brace.group();
cleanDate = Utils.removeOuterBrackets(cleanDate);
if (matcher.find()) {
String tmp = matcher.group();
if (!tmp.equals(cleanDate)) {
tmp = "" + tmp;
}
}
} else if (matcher_bc_date.find()) {
cleanDate = null;
} else if (matcher_start_with_1_2.find()) {
cleanDate = matcher_start_with_1_2.group();
} else if (matcher_l_plus_three_digits.find()) {
cleanDate = matcher_l_plus_three_digits.group().replaceAll("l", "1");
} else if (matcher_bracket_19_plus_two_digits.find()) {
cleanDate = matcher_bracket_19_plus_two_digits.group().replaceAll("\\[", "").replaceAll("\\]", "");
} else if (matcher_three_digits_plus_unk.find()) {
cleanDate = matcher_three_digits_plus_unk.group().replaceAll("[-?]", "0");
}
if (cleanDate != null) {
Calendar calendar = Calendar.getInstance();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy");
String thisYear = dateFormat.format(calendar.getTime());
try {
if (Integer.parseInt(cleanDate) > Integer.parseInt(thisYear) + 1)
cleanDate = null;
} catch (NumberFormatException nfe) {
cleanDate = null;
}
}
if (cleanDate != null) {
logger.debug("Date : " + date + " mapped to : " + cleanDate);
} else {
logger.debug("No Date match: " + date);
}
return cleanDate;
}
/**
* Removes trailing characters (space, comma, slash, semicolon, colon),
* trailing period if it is preceded by at least three letters,
* and single square bracket characters if they are the start and/or end
* chars of the cleaned string
*
* @param origStr String to clean
* @return cleaned string
*/
public static String cleanData(String origStr) {
String currResult = origStr;
String prevResult;
do {
prevResult = currResult;
currResult = currResult.trim();
currResult = currResult.replaceAll(" *([,/;:])$", "");
// trailing period removed in certain circumstances
if (currResult.endsWith(".")) {
if (currResult.matches(".*[JS]r\\.$")) {
// dont strip period off of Jr. or Sr.
} else if (currResult.matches(".*\\w\\w\\.$")) {
currResult = currResult.substring(0, currResult.length() - 1);
} else if (currResult.matches(".*\\p{L}\\p{L}\\.$")) {
currResult = currResult.substring(0, currResult.length() - 1);
} else if (currResult.matches(".*\\w\\p{InCombiningDiacriticalMarks}?\\w\\p{InCombiningDiacriticalMarks}?\\.$")) {
currResult = currResult.substring(0, currResult.length() - 1);
} else if (currResult.matches(".*\\p{Punct}\\.$")) {
currResult = currResult.substring(0, currResult.length() - 1);
}
}
currResult = removeOuterBrackets(currResult);
if (currResult.length() == 0)
return currResult;
} while (!currResult.equals(prevResult));
// if (!currResult.equals(origStr))
// System.out.println(origStr + " -> "+ currResult);
return currResult;
}
/**
* Call cleanData on an entire set of Strings has a side effect
* of deleting entries that are identical when they are cleaned.
*
* @param values - the set to clean
* @return Set - the "same" set with all of its entries cleaned.
*/
private static Set cleanData(Set values) {
Set result = new LinkedHashSet();
for (String entry : values) {
String cleaned = cleanData(entry);
result.add(cleaned);
}
return (result);
}
/**
* Repeatedly removes trailing characters indicated in regular expression,
* PLUS trailing period if it is preceded by its regular expression
*
* @param origStr String to clean
* @param trailingCharsRegEx a regular expression of trailing chars to be
* removed (see java Pattern class). Note that the regular expression
* should NOT have '$' at the end.
* (e.g. " *[,/;:]" replaces any commas, slashes, semicolons or colons
* at the end of the string, and these chars may optionally be preceded
* by a space)
* @param charsB4periodRegEx a regular expression that must immediately
* precede a trailing period IN ORDER FOR THE PERIOD TO BE REMOVED.
* Note that the regular expression will NOT have the period or '$' at
* the end.
* (e.g. "[a-zA-Z]{3,}" means at least three letters must immediately
* precede the period for it to be removed.)
* @return cleaned string
*/
public static String removeAllTrailingCharAndPeriod(String origStr, String trailingCharsRegEx, String charsB4periodRegEx) {
if (origStr == null)
return null;
String currResult = origStr;
String prevResult;
do {
prevResult = currResult;
currResult = removeTrailingCharAndPeriod(currResult.trim(), trailingCharsRegEx, charsB4periodRegEx);
if (currResult.length() == 0)
return currResult;
} while (!currResult.equals(prevResult));
return currResult;
}
/**
* Removes trailing characters indicated in regular expression, PLUS
* trailing period if it is preceded by its regular expression.
*
* @param origStr String to clean
* @param trailingCharsRegEx a regular expression of trailing chars to be
* removed (see java Pattern class). Note that the regular expression
* should NOT have '$' at the end.
* (e.g. " *[,/;:]" replaces any commas, slashes, semicolons or colons
* at the end of the string, and these chars may optionally be preceded
* by a space)
* @param charsB4periodRegEx a regular expression that must immediately
* precede a trailing period IN ORDER FOR THE PERIOD TO BE REMOVED.
* Note that the regular expression will NOT have the period or '$' at
* the end.
* (e.g. "[a-zA-Z]{3,}" means at least three letters must immediately
* precede the period for it to be removed.)
* @return cleaned string
*/
public static String removeTrailingCharAndPeriod(String origStr, String trailingCharsRegEx, String charsB4periodRegEx) {
if (origStr == null)
return null;
String result = removeTrailingChar(origStr, trailingCharsRegEx);
result = removeTrailingPeriod(result, charsB4periodRegEx);
return result;
}
/**
* Remove the characters per the regular expression if they are at the end
* of the string.
*
* @param origStr string to be cleaned
* @param charsToReplaceRegEx - a regular expression of the trailing string/chars
* to be removed e.g. " *([,/;:])" meaning last character is a comma,
* slash, semicolon, colon, possibly preceded by one or more spaces.
* @return the string with the specified trailing characters removed
* @see java.util.regex.Pattern class in java api
*/
public static String removeTrailingChar(String origStr, String charsToReplaceRegEx) {
if (origStr == null)
return origStr;
// get rid of reg ex specified chars at the end of the string
return origStr.trim().replaceAll(charsToReplaceRegEx + "$", "");
}
/**
* If there is a period at the end of the string, remove the period if it is
* immediately preceded by the regular expression
*
* @param origStr the string to be cleaned
* @param charsB4periodRegEx a regular expression that must immediately
* precede a trailing period IN ORDER FOR THE PERIOD TO BE REMOVED.
* Note that the regular expression will NOT have the period or '$' at
* the end.
* (e.g. "[a-zA-Z]{3,}" means at least three letters must immediately
* precede the period for it to be removed.)
* @return the string without a trailing period iff the regular expression
* param was found immediately before the trailing period
*/
public static String removeTrailingPeriod(String origStr, String precedingCharsRegEx) {
if (origStr == null)
return origStr;
String result = origStr.trim();
if (result.endsWith(".") && result.matches(".*" + precedingCharsRegEx + "\\.$"))
result = result.substring(0, result.length() - 1).trim();
return result;
}
/**
* Remove single square bracket characters if they are the start and/or end
* chars (matched or unmatched) and are the only square bracket chars in
* the string.
*/
public static String removeOuterBrackets(String origStr) {
if (origStr == null || origStr.length() == 0)
return origStr;
String result = origStr.trim();
if (result.length() > 0) {
boolean openBracketFirst = result.charAt(0) == '[';
boolean closeBracketLast = result.endsWith("]");
if (openBracketFirst && closeBracketLast &&
result.indexOf('[', 1) == -1 &&
result.lastIndexOf(']', result.length() - 2) == -1)
// only square brackets are at beginning and end
result = result.substring(1, result.length() - 1);
else if (openBracketFirst && result.indexOf(']') == -1)
// starts with '[' but no ']'; remove open bracket
result = result.substring(1);
else if (closeBracketLast && result.indexOf('[') == -1)
// ends with ']' but no '['; remove close bracket
result = result.substring(0, result.length() - 1);
}
return result.trim();
}
/**
* Calculate time from milliseconds
*
* @param totalTime Time in milliseconds
* @return Time in the format mm:ss.ss
*/
public static String calcTime(final long totalTime) {
return totalTime / 60000 + ":" + timeFormat.format((totalTime % 60000) / 1000);
}
/**
* Test if a String has a numeric equivalent
*
* @param number String representation of a number
* @return True if String is a number; False if it is not
*/
public static boolean isNumber(final String number) {
boolean isNumber; // fix for dd-anomaly
try {
Integer.parseInt(number);
isNumber = true;
} catch (NumberFormatException nfe) {
// eat the exception
isNumber = false;
}
return isNumber;
}
/**
* Remap a field value. If the field value is not present in the map, then:
* if "displayRawIfMissing" is a key in the map, then the raw field value
* is used.
* if "displayRawIfMissing" is not a key in the map, and the allowDefault
* param is set to true, then if the map contains "__DEFAULT" as a key,
* the value of "__DEFAULT" in the map is used; if allowDefault is true
* and there is neither "displayRawIfMissing" nor "__DEFAULT", as a key
* in the map, then if the map contains an empty key, the map value of the
* empty key is used.
* NOTE: If the spec for a field is supposed to contain all matching
* values, then the default lookup needs to be done here. If the spec
* for a field is only supposed to return the first matching mappable
* value, then the default mapping should be done in the calling method
*
* @param fieldVal - the raw value to be mapped
* @param map - the map to be used
* @param allowDefault - if "displayRawIfMissing" is not a key in the map,
* and this is to true, then if the map contains "__DEFAULT" as a key,
* the value of "__DEFAULT" in the map is used.
* @return the new value, as determined by the mapping.
*/
public static String remap(String fieldVal, Map map, boolean allowDefault) {
String result = null;
if (map.keySet().contains("pattern_0")) {
for (int i = 0; i < map.keySet().size(); i++) {
String patternStr = map.get("pattern_" + i);
String parts[] = patternStr.split("=>");
if (containsMatch(fieldVal, parts[0])) {
String newVal = parts[1];
if (parts[1].contains("$")) {
newVal = fieldVal.replaceAll(parts[0], parts[1]);
fieldVal = newVal;
}
result = newVal;
}
}
}
if (map.containsKey(fieldVal)) {
result = map.get(fieldVal);
} else if (map.containsKey("displayRawIfMissing")) {
result = fieldVal;
} else if (allowDefault && map.containsKey("__DEFAULT")) {
result = map.get("__DEFAULT");
} else if (allowDefault && map.containsKey("")) {
result = map.get("");
}
if (result == null || result.length() == 0) return null;
return result;
}
/**
* Remap a set of field values. If a field value is not present in the map,
* then:
* if "displayRawIfMissing" is a key in the map, then the raw field value
* is used.
* if "displayRawIfMissing" is not a key in the map, and the allowDefault
* param is set to true, then if the map contains "__DEFAULT" as a key,
* the value of "__DEFAULT" in the map is used; if allowDefault is true
* and there is neither "displayRawIfMissing" nor "__DEFAULT", as a key
* in the map, then if the map contains an empty key, the map value of the
* empty key is used.
* NOTE: If the spec for a field is supposed to contain all matching
* values, then the default lookup needs to be done here. If the spec
* for a field is only supposed to return the first matching mappable
* value, then the default mapping should be done in the calling method
*
* @param fieldVal - the raw value to be mapped
* @param map - the map to be used
* @param allowDefault - if "displayRawIfMissing" is not a key in the map,
* and this is to true, then if the map contains "__DEFAULT" as a key,
* the value of "__DEFAULT" in the map is used.
* @return the new value, as determined by the mapping.
*/
public static Set remap(Set set, Map map, boolean allowDefault) {
if (map == null) return (set);
Iterator iter = set.iterator();
Set result = new LinkedHashSet();
while (iter.hasNext()) {
String val = iter.next();
if (map.keySet().contains("pattern_0")) {
String tmpResult = null;
for (int i = 0; i < map.keySet().size(); i++) {
String patternStr = map.get("pattern_" + i);
String parts[] = patternStr.split("=>");
if (containsMatch(val, parts[0])) {
String newVal = parts[1];
if (parts[1].contains("$")) {
newVal = val.replaceAll(parts[0], parts[1]);
val = newVal;
} else {
result.add(newVal);
}
tmpResult = newVal;
}
}
if (tmpResult != null) result.add(tmpResult);
} else {
String mappedVal = remap(val, map, allowDefault);
if (mappedVal != null) {
if (mappedVal.contains("|")) {
String vals[] = mappedVal.split("[|]");
for (String oneVal : vals) {
result.add(oneVal);
}
} else
result.add(mappedVal);
}
}
}
return result;
}
private static boolean containsMatch(String val, String pattern) {
String rep = val.replaceFirst(pattern, "###match###");
if (!rep.equals(val)) {
return true;
}
return false;
}
/**
* Test if a set contains a specified pattern
*
* @param set Set of marc fields to test
* @param pattern Regex String pattern to match
* @return If the set contains the pattern, return true, else false
*/
public static boolean setItemContains(Set set, String pattern) {
if (set.isEmpty()) {
return (false);
}
Iterator iter = set.iterator();
while (iter.hasNext()) {
String value = (String) iter.next();
if (containsMatch(value, pattern)) {
return true;
}
}
return false;
}
/**
* Join two fields together with seperator
*
* @param set Set of marc fields to join
* @param separator Separation character to put between
* @return Joined fields
*/
public static String join(Set set, String separator) {
Iterator iter = set.iterator();
//String result = "";
StringBuffer result = new StringBuffer("");
while (iter.hasNext()) {
//result += iter.next();
result.append(iter.next());
if (iter.hasNext()) {
//result += separator;
result.append(separator);
}
}
return result.toString();
}
public static Set trimNearDuplicates(Set locations) {
locations = cleanData(locations);
if (locations.size() <= 1) return (locations);
Object locArr[] = locations.toArray();
int size = locArr.length;
for (int i = 0; i < size; i++) {
boolean copyStrI = true;
for (int j = 0; j < size; j++) {
if (i == j) continue;
if (locArr[j].toString().contains(locArr[i].toString())) {
copyStrI = false;
break;
}
}
if (copyStrI == false) locations.remove(locArr[i]);
}
return locations;
}
/**
* returns true if the 3 letter language code is for a right to left
* language (one written in arabic or hebrew characters)
*
* @param langcode
* @return
*/
public final static boolean isRightToLeftLanguage(String langcode) {
if (
// arabic characters
langcode.equals("ara") || langcode.equals("per") || langcode.equals("urd")
||
// hebrew characters
langcode.equals("heb") || langcode.equals("yid") || langcode.equals("lad")
|| langcode.equals("jpr") || langcode.equals("jrb")
)
return true;
else
return false;
}
/**
* return the index within this string of the first occurrence of an open
* parenthesis that isn't escaped with a backslash.
*
* @param str
* @return if an unescaped open parenthesis occurs within this object,
* return the index of the first open paren; -1 if no unescaped open paren.
*/
public final static int getIxUnescapedOpenParen(String str) {
if (str.startsWith("("))
return 0;
Pattern p = Pattern.compile(".*[^\\\\](\\().*");
Matcher m = p.matcher(str);
if (m.matches())
return m.start(1);
else
return -1;
}
/**
* return the index within this string of the first occurrence of a comma
* that isn't escaped with a backslash.
*
* @param str
* @return if an unescaped comma occurs within this object, the index of the
* first comma; -1 if no unescaped comma.
*/
public final static int getIxUnescapedComma(String str) {
if (str.startsWith(","))
return 0;
Pattern p = Pattern.compile(".*[^\\\\](,).*");
Matcher m = p.matcher(str);
if (m.matches())
return m.start(1);
else
return -1;
}
/**
* Look for Strings in the set, that start with the given prefix. If found,
* remove the prefix, trim the result and add it to the returned set of
* Strings to be returned.
*
* @param valueSet
* @param prefix
* @return set members that had the prefix, but now prefix is removed and
* remaining value is trimmed.
*/
public final static Set getPrefixedVals(Set valueSet, String prefix) {
Set resultSet = new LinkedHashSet();
if (!valueSet.isEmpty()) {
Iterator iter = valueSet.iterator();
while (iter.hasNext()) {
String s = removePrefix((String) iter.next(), prefix);
if (s != null) {
String value = s.trim();
if (value != null && value.length() != 0)
resultSet.add(value);
}
}
}
return resultSet;
}
/**
* remove prefix from the beginning of the value string.
*/
public final static String removePrefix(String value, String prefix) {
if (value.startsWith(prefix)) {
value = value.substring(prefix.length());
if (value != null && value.length() != 0)
return value;
}
return null;
}
/**
* returns the valid ISBN(s) from the set of candidate Strings
*
* @return Set of strings containing valid ISBN numbers
*/
public static Set returnValidISBNs(Set candidates) {
// NOTE 1: last digit of ISBN is a check digit and may be "X" (0,1,2,3,4,5,6.7.8.9.X)
// NOTE 2: ISBN can be 10 or 13 digits (and may end with X).
// NOTE 3: 13 digit ISBN must start with 978 or 979.
// NOTE 4: there may be text after the ISBN, which should be removed
Set isbnSet = new LinkedHashSet();
Pattern p10 = Pattern.compile("^\\d{9}[\\dX].*");
Pattern p13 = Pattern.compile("^(978|979)\\d{9}[X\\d].*");
// p13any matches a 13 digit isbn pattern without the correct prefix
Pattern p13any = Pattern.compile("^\\d{12}[X\\d].*");
Iterator iter = candidates.iterator();
while (iter.hasNext()) {
String value = (String) iter.next().trim();
// check we have the right pattern, and remove trailing text
if (p13.matcher(value).matches())
isbnSet.add(value.substring(0, 13));
else if (p10.matcher(value).matches() && !p13any.matcher(value).matches())
isbnSet.add(value.substring(0, 10));
}
return isbnSet;
}
/**
* For each occurrence of a marc field in the tags list, extract all
* subfield data from the field, place it in a single string (individual
* subfield data separated by spaces) and add the string to the result set.
*/
@SuppressWarnings("unchecked")
public static final Set getAllSubfields(final Record record, String[] tags) {
Set result = new LinkedHashSet();
List varFlds = record.getVariableFields(tags);
for (VariableField vf : varFlds) {
StringBuffer buffer = new StringBuffer(500);
DataField df = (DataField) vf;
if (df != null) {
List subfields = df.getSubfields();
for (Subfield sf : subfields) {
if (buffer.length() > 0) {
buffer.append(" " + sf.getData());
} else {
buffer.append(sf.getData());
}
}
}
if (buffer.length() > 0)
result.add(buffer.toString());
}
return result;
}
/**
* get the contents of a subfield, rigorously ensuring no NPE
*
* @param df - DataField of interest
* @param code - code of subfield of interest
* @return the contents of the subfield, if it exists; null otherwise
*/
public static final String getSubfieldData(DataField df, char code) {
String result = null;
if (df != null) {
Subfield sf = df.getSubfield(code);
if (sf != null && sf.getData() != null) {
result = sf.getData();
}
}
return result;
}
/**
* returns all values of subfield strings of a particular code
* contained in the data field
*/
@SuppressWarnings("unchecked")
public static final List getSubfieldStrings(DataField df, char code) {
List listSubcode = df.getSubfields(code);
List vals = new ArrayList(listSubcode.size());
for (Subfield s : listSubcode) {
vals.add(s.getData());
}
return vals;
}
/**
* given a latin letter with a diacritic, return the latin letter without
* the diacritic.
* Shamelessly stolen from UnicodeCharUtil class of UnicodeNormalizeFilter
* by Bob Haschart
*/
public static char foldDiacriticLatinChar(char c) {
switch (c) {
case 0x0181:
return (0x0042); // LATIN CAPITAL LETTER B WITH HOOK -> LATIN CAPITAL LETTER B
case 0x0182:
return (0x0042); // LATIN CAPITAL LETTER B WITH TOPBAR -> LATIN CAPITAL LETTER B
case 0x0187:
return (0x0043); // LATIN CAPITAL LETTER C WITH HOOK -> LATIN CAPITAL LETTER C
case 0x0110:
return (0x0044); // LATIN CAPITAL LETTER D WITH STROKE -> LATIN CAPITAL LETTER D
case 0x018A:
return (0x0044); // LATIN CAPITAL LETTER D WITH HOOK -> LATIN CAPITAL LETTER D
case 0x018B:
return (0x0044); // LATIN CAPITAL LETTER D WITH TOPBAR -> LATIN CAPITAL LETTER D
case 0x0191:
return (0x0046); // LATIN CAPITAL LETTER F WITH HOOK -> LATIN CAPITAL LETTER F
case 0x0193:
return (0x0047); // LATIN CAPITAL LETTER G WITH HOOK -> LATIN CAPITAL LETTER G
case 0x01E4:
return (0x0047); // LATIN CAPITAL LETTER G WITH STROKE -> LATIN CAPITAL LETTER G
case 0x0126:
return (0x0048); // LATIN CAPITAL LETTER H WITH STROKE -> LATIN CAPITAL LETTER H
case 0x0197:
return (0x0049); // LATIN CAPITAL LETTER I WITH STROKE -> LATIN CAPITAL LETTER I
case 0x0198:
return (0x004B); // LATIN CAPITAL LETTER K WITH HOOK -> LATIN CAPITAL LETTER K
case 0x0141:
return (0x004C); // LATIN CAPITAL LETTER L WITH STROKE -> LATIN CAPITAL LETTER L
case 0x019D:
return (0x004E); // LATIN CAPITAL LETTER N WITH LEFT HOOK -> LATIN CAPITAL LETTER N
case 0x0220:
return (0x004E); // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG -> LATIN CAPITAL LETTER N
case 0x00D8:
return (0x004F); // LATIN CAPITAL LETTER O WITH STROKE -> LATIN CAPITAL LETTER O
case 0x019F:
return (0x004F); // LATIN CAPITAL LETTER O WITH MIDDLE TILDE -> LATIN CAPITAL LETTER O
case 0x01FE:
return (0x004F); // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE -> LATIN CAPITAL LETTER O
case 0x01A4:
return (0x0050); // LATIN CAPITAL LETTER P WITH HOOK -> LATIN CAPITAL LETTER P
case 0x0166:
return (0x0054); // LATIN CAPITAL LETTER T WITH STROKE -> LATIN CAPITAL LETTER T
case 0x01AC:
return (0x0054); // LATIN CAPITAL LETTER T WITH HOOK -> LATIN CAPITAL LETTER T
case 0x01AE:
return (0x0054); // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK -> LATIN CAPITAL LETTER T
case 0x01B2:
return (0x0056); // LATIN CAPITAL LETTER V WITH HOOK -> LATIN CAPITAL LETTER V
case 0x01B3:
return (0x0059); // LATIN CAPITAL LETTER Y WITH HOOK -> LATIN CAPITAL LETTER Y
case 0x01B5:
return (0x005A); // LATIN CAPITAL LETTER Z WITH STROKE -> LATIN CAPITAL LETTER Z
case 0x0224:
return (0x005A); // LATIN CAPITAL LETTER Z WITH HOOK -> LATIN CAPITAL LETTER Z
case 0x0180:
return (0x0062); // LATIN SMALL LETTER B WITH STROKE -> LATIN SMALL LETTER B
case 0x0183:
return (0x0062); // LATIN SMALL LETTER B WITH TOPBAR -> LATIN SMALL LETTER B
case 0x0253:
return (0x0062); // LATIN SMALL LETTER B WITH HOOK -> LATIN SMALL LETTER B
case 0x0188:
return (0x0063); // LATIN SMALL LETTER C WITH HOOK -> LATIN SMALL LETTER C
case 0x0255:
return (0x0063); // LATIN SMALL LETTER C WITH CURL -> LATIN SMALL LETTER C
case 0x0111:
return (0x0064); // LATIN SMALL LETTER D WITH STROKE -> LATIN SMALL LETTER D
case 0x018C:
return (0x0064); // LATIN SMALL LETTER D WITH TOPBAR -> LATIN SMALL LETTER D
case 0x0221:
return (0x0064); // LATIN SMALL LETTER D WITH CURL -> LATIN SMALL LETTER D
case 0x0256:
return (0x0064); // LATIN SMALL LETTER D WITH TAIL -> LATIN SMALL LETTER D
case 0x0257:
return (0x0064); // LATIN SMALL LETTER D WITH HOOK -> LATIN SMALL LETTER D
case 0x0192:
return (0x0066); // LATIN SMALL LETTER F WITH HOOK -> LATIN SMALL LETTER F
case 0x01E5:
return (0x0067); // LATIN SMALL LETTER G WITH STROKE -> LATIN SMALL LETTER G
case 0x0260:
return (0x0067); // LATIN SMALL LETTER G WITH HOOK -> LATIN SMALL LETTER G
case 0x0127:
return (0x0068); // LATIN SMALL LETTER H WITH STROKE -> LATIN SMALL LETTER H
case 0x0266:
return (0x0068); // LATIN SMALL LETTER H WITH HOOK -> LATIN SMALL LETTER H
case 0x0268:
return (0x0069); // LATIN SMALL LETTER I WITH STROKE -> LATIN SMALL LETTER I
case 0x029D:
return (0x006A); // LATIN SMALL LETTER J WITH CROSSED-TAIL -> LATIN SMALL LETTER J
case 0x0199:
return (0x006B); // LATIN SMALL LETTER K WITH HOOK -> LATIN SMALL LETTER K
case 0x0142:
return (0x006C); // LATIN SMALL LETTER L WITH STROKE -> LATIN SMALL LETTER L
case 0x019A:
return (0x006C); // LATIN SMALL LETTER L WITH BAR -> LATIN SMALL LETTER L
case 0x0234:
return (0x006C); // LATIN SMALL LETTER L WITH CURL -> LATIN SMALL LETTER L
case 0x026B:
return (0x006C); // LATIN SMALL LETTER L WITH MIDDLE TILDE -> LATIN SMALL LETTER L
case 0x026C:
return (0x006C); // LATIN SMALL LETTER L WITH BELT -> LATIN SMALL LETTER L
case 0x026D:
return (0x006C); // LATIN SMALL LETTER L WITH RETROFLEX HOOK -> LATIN SMALL LETTER L
case 0x0271:
return (0x006D); // LATIN SMALL LETTER M WITH HOOK -> LATIN SMALL LETTER M
case 0x019E:
return (0x006E); // LATIN SMALL LETTER N WITH LONG RIGHT LEG -> LATIN SMALL LETTER N
case 0x0235:
return (0x006E); // LATIN SMALL LETTER N WITH CURL -> LATIN SMALL LETTER N
case 0x0272:
return (0x006E); // LATIN SMALL LETTER N WITH LEFT HOOK -> LATIN SMALL LETTER N
case 0x0273:
return (0x006E); // LATIN SMALL LETTER N WITH RETROFLEX HOOK -> LATIN SMALL LETTER N
case 0x00F8:
return (0x006F); // LATIN SMALL LETTER O WITH STROKE -> LATIN SMALL LETTER O
case 0x01FF:
return (0x006F); // LATIN SMALL LETTER O WITH STROKE AND ACUTE -> LATIN SMALL LETTER O
case 0x01A5:
return (0x0070); // LATIN SMALL LETTER P WITH HOOK -> LATIN SMALL LETTER P
case 0x02A0:
return (0x0071); // LATIN SMALL LETTER Q WITH HOOK -> LATIN SMALL LETTER Q
case 0x027C:
return (0x0072); // LATIN SMALL LETTER R WITH LONG LEG -> LATIN SMALL LETTER R
case 0x027D:
return (0x0072); // LATIN SMALL LETTER R WITH TAIL -> LATIN SMALL LETTER R
case 0x0282:
return (0x0073); // LATIN SMALL LETTER S WITH HOOK -> LATIN SMALL LETTER S
case 0x0167:
return (0x0074); // LATIN SMALL LETTER T WITH STROKE -> LATIN SMALL LETTER T
case 0x01AB:
return (0x0074); // LATIN SMALL LETTER T WITH PALATAL HOOK -> LATIN SMALL LETTER T
case 0x01AD:
return (0x0074); // LATIN SMALL LETTER T WITH HOOK -> LATIN SMALL LETTER T
case 0x0236:
return (0x0074); // LATIN SMALL LETTER T WITH CURL -> LATIN SMALL LETTER T
case 0x0288:
return (0x0074); // LATIN SMALL LETTER T WITH RETROFLEX HOOK -> LATIN SMALL LETTER T
case 0x028B:
return (0x0076); // LATIN SMALL LETTER V WITH HOOK -> LATIN SMALL LETTER V
case 0x01B4:
return (0x0079); // LATIN SMALL LETTER Y WITH HOOK -> LATIN SMALL LETTER Y
case 0x01B6:
return (0x007A); // LATIN SMALL LETTER Z WITH STROKE -> LATIN SMALL LETTER Z
case 0x0225:
return (0x007A); // LATIN SMALL LETTER Z WITH HOOK -> LATIN SMALL LETTER Z
case 0x0290:
return (0x007A); // LATIN SMALL LETTER Z WITH RETROFLEX HOOK -> LATIN SMALL LETTER Z
case 0x0291:
return (0x007A); // LATIN SMALL LETTER Z WITH CURL -> LATIN SMALL LETTER Z
default:
return (0x00);
}
}
// @SuppressWarnings("unchecked")
// public static void setLog4jLogLevel(org.apache.log4j.Level newLevel)
// {
// Logger rootLogger = org.apache.log4j.Logger.getRootLogger();
// Enumeration enLogger = rootLogger.getLoggerRepository().getCurrentLoggers();
// Logger tmpLogger = null;
// /* If logger is root, then need to loop through all loggers under root
// * and change their logging levels too. Also, skip sql loggers so
// they
// * do not get effected.
// */
// while(enLogger.hasMoreElements())
// {
// tmpLogger = (Logger)(enLogger.nextElement());
// tmpLogger.setLevel(newLevel);
// }
// Enumeration enAppenders = rootLogger.getAllAppenders();
// Appender appender;
// while(enAppenders.hasMoreElements())
// {
// appender = (Appender)enAppenders.nextElement();
//
// if(appender instanceof AsyncAppender)
// {
// AsyncAppender asyncAppender = (AsyncAppender)appender;
// asyncAppender.activateOptions();
//// rfa = (RollingFileAppender)asyncAppender.getAppender("R");
//// rfa.activateOptions();
//// ca = (ConsoleAppender)asyncAppender.getAppender("STDOUT");
//// ca.activateOptions();
// }
// }
//
// }
}