
com.intuit.fuzzymatcher.component.Dictionary Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of fuzzy-matcher Show documentation
Show all versions of fuzzy-matcher Show documentation
A java library to determine probability of objects being similar
package com.intuit.fuzzymatcher.component;
import com.intuit.fuzzymatcher.exception.MatchException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Map;
import java.util.stream.Collectors;
/**
* Used for Pre-Processing, the Dictionary caches a pre-defined normalization and replacement for common characters
* found in names and adresses.
*
*/
@Component
public class Dictionary {
private static final Logger LOGGER = LoggerFactory.getLogger(Dictionary.class);
private static Map addressDictionary;
private static Map nameDictionary;
@Value("classpath:address-dictionary.txt")
private Resource addressDictonaryPath;
@Value("classpath:name-dictionary.txt")
private Resource nameDictonaryPath;
public static Map getAddressDictionary() {
return addressDictionary;
}
public static Map getNameDictionary() {
return nameDictionary;
}
@PostConstruct
public void setAddressDictionary() {
try {
addressDictionary = getDictionary(addressDictonaryPath);
} catch (IOException e) {
LOGGER.error("could not load address dictonary", e);
throw new MatchException("could not load address dictonary", e);
}
}
@PostConstruct
public void setNameDictionary() {
try {
nameDictionary = getDictionary(nameDictonaryPath);
} catch (IOException e) {
LOGGER.error("could not load address dictonary", e);
throw new MatchException("could not load address dictonary", e);
}
}
private Map getDictionary(Resource resource) throws IOException {
return new BufferedReader(new InputStreamReader(resource.getInputStream()))
.lines()
.map(String::toLowerCase)
.map(s -> s.split(":", 2))
.collect(Collectors.toMap(arr -> arr[0].trim(), arr -> arr[1].trim(), (a1, a2) -> a1));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy