apoc.text.Strings Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of apoc Show documentation
Show all versions of apoc Show documentation
A collection of useful Neo4j Procedures
package apoc.text;
import apoc.util.Util;
import org.apache.commons.text.similarity.HammingDistance;
import org.apache.commons.text.similarity.JaroWinklerDistance;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.helpers.collection.Pair;
import org.neo4j.procedure.Description;
import apoc.result.StringResult;
import org.neo4j.procedure.Name;
import org.neo4j.procedure.Procedure;
import org.neo4j.procedure.UserFunction;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.text.Normalizer;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils;
import static apoc.util.Util.quote;
import static java.lang.Math.toIntExact;
import static java.util.Arrays.asList;
/**
* @author mh
* @since 05.05.16
*/
public class Strings {
private final static HammingDistance hammingDistance = new HammingDistance();
private final static JaroWinklerDistance jaroWinklerDistance = new JaroWinklerDistance();
private final static LevenshteinDistance levenshteinDistance = new LevenshteinDistance();
@UserFunction
@Description("apoc.text.indexOf(text, lookup, from=0, to=-1==len) - find the first occurence of the lookup string in the text, from inclusive, to exclusive, -1 if not found, null if text is null.")
public Long indexOf(final @Name("text") String text, final @Name("lookup") String lookup, final @Name(value = "from",defaultValue="0") long from, @Name(value = "to",defaultValue="-1") long to) {
if (text==null) return null;
if (lookup == null) return -1L;
if (to == -1L || to > text.length()) return (long)text.indexOf(lookup,(int)from);
if (to <= from) return -1L;
return (long)text.substring(0,(int)to).indexOf(lookup,(int)from);
}
@UserFunction
@Description("apoc.text.indexesOf(text, lookup, from=0, to=-1==len) - finds all occurences of the lookup string in the text, return list, from inclusive, to exclusive, empty list if not found, null if text is null.")
public List indexesOf(final @Name("text") String text, final @Name("lookup") String lookup, final @Name(value = "from", defaultValue = "0") long from, @Name(value = "to", defaultValue = "-1") long to) {
if (text == null) return null;
if (lookup == null) return Collections.emptyList();
if (to == -1L) to = text.length();
List result = new ArrayList<>();
int idx = (int) from - 1;
while (true) {
idx = text.indexOf(lookup, idx + 1);
if (idx == -1 || idx >= to) {
return result;
} else {
result.add((long) idx);
}
}
}
@UserFunction
@Description("apoc.text.replace(text, regex, replacement) - replace each substring of the given string that matches the given regular expression with the given replacement.")
public String replace(final @Name("text") String text, final @Name("regex") String regex, final @Name("replacement") String replacement) {
return regreplace(text,regex,replacement);
}
@UserFunction
@Description("apoc.text.byteCount(text,[charset]) - return size of text in bytes")
public long byteCount(final @Name("text") String text, @Name(value = "charset", defaultValue = "UTF-8") String charset) throws UnsupportedEncodingException {
return text.getBytes(charset).length;
}
@UserFunction
@Description("apoc.text.bytes(text,[charset]) - return bytes of the text")
public List bytes(final @Name("text") String text, @Name(value = "charset", defaultValue = "UTF-8") String charset) throws UnsupportedEncodingException {
byte[] bytes = text.getBytes(charset);
List result = new ArrayList<>(bytes.length);
for (byte b : bytes) {
result.add((long)b & 0xFFL);
}
return result;
}
@UserFunction
@Description("apoc.text.regreplace(text, regex, replacement) - replace each substring of the given string that matches the given regular expression with the given replacement.")
public String regreplace(final @Name("text") String text, final @Name("regex") String regex, final @Name("replacement") String replacement) {
if (text == null || regex == null || replacement == null) {
return null;
}
return text.replaceAll(regex, replacement);
}
@UserFunction
@Description("apoc.text.split(text, regex, limit) - splits the given text around matches of the given regex.")
public List split(final @Name("text") String text, final @Name("regex") String regex, final @Name(value = "limit", defaultValue = "0") Long limit) {
if (text == null || regex == null || limit == null) {
return null;
}
String[] resultArray = text.split(regex, limit.intValue());
return new ArrayList<>(asList(resultArray));
}
@UserFunction
@Description("apoc.text.regexGroups(text, regex) - return all matching groups of the regex on the given text.")
public List> regexGroups(final @Name("text") String text, final @Name("regex") String regex) {
if (text==null || regex==null) {
return Collections.EMPTY_LIST;
} else {
final Pattern pattern = Pattern.compile(regex);
final Matcher matcher = pattern.matcher(text);
List> result = new ArrayList<>();
while (matcher.find()) {
List matchResult = new ArrayList<>();
for (int i=0;i<=matcher.groupCount(); i++) {
matchResult.add(matcher.group(i));
}
result.add(matchResult);
}
return result;
}
}
@UserFunction
@Description("apoc.text.join(['text1','text2',...], delimiter) - join the given strings with the given delimiter.")
public String join(
final @Name("texts") List texts,
final @Name("delimiter") String delimiter) {
if (texts == null || delimiter == null) {
return null;
}
return String.join(delimiter, texts);
}
@UserFunction
@Description("apoc.text.clean(text) - strip the given string of everything except alpha numeric characters and convert it to lower case.")
public String clean(final @Name("text") String text) {
return text == null ? null : removeNonWordCharacters(text);
}
@UserFunction
@Description("apoc.text.compareCleaned(text1, text2) - compare the given strings stripped of everything except alpha numeric characters converted to lower case.")
public boolean compareCleaned(final @Name("text1") String text1, final @Name("text2") String text2) {
if (text1 == null || text2 == null) {
return false;
}
return removeNonWordCharacters(text1).equals(removeNonWordCharacters(text2));
}
@UserFunction
@Description("apoc.text.distance(text1, text2) - compare the given strings with the Levenshtein distance algorithm.")
public Long distance(final @Name("text1") String text1, @Name("text2")final String text2) {
return levenshteinDistance(text1, text2);
}
@UserFunction
@Description("apoc.text.levenshteinDistance(text1, text2) - compare the given strings with the Levenshtein distance algorithm.")
public Long levenshteinDistance(final @Name("text1") String text1, @Name("text2")final String text2) {
if (text1 == null || text2 == null) {
return null;
}
return (long)levenshteinDistance.apply(text1, text2);
}
@UserFunction
@Description( "apoc.text.levenshteinSimilarity(text1, text2) - calculate the similarity (a value within 0 and 1) between two texts." )
public Double levenshteinSimilarity(final @Name("text1") String text1, @Name("text2")final String text2) {
if ( text1 == null || text2 == null ) {
return null;
}
int longerLength = Math.max(text1.length(), text2.length());
if (longerLength == 0) {
return 1.0;
}
long editDistance = distance( text1, text2 );
return (longerLength - editDistance) / (double)longerLength;
}
@UserFunction
@Description( "apoc.text.hammingDistance(text1, text2) - compare the given strings with the Hamming distance algorithm." )
public Long hammingDistance(final @Name("text1") String text1, @Name("text2")final String text2) {
if (text1 == null || text2 == null) {
return null;
}
return (long)hammingDistance.apply(text1, text2) ;
}
@UserFunction
@Description( "apoc.text.jaroWinklerDistance(text1, text2) - compare the given strings with the Jaro-Winkler distance algorithm." )
public Double jaroWinklerDistance(final @Name("text1") String text1, @Name("text2")final String text2) {
if (text1 == null || text2 == null) {
return null;
}
return jaroWinklerDistance.apply(text1, text2);
}
@UserFunction
@Description("apoc.text.sorensenDiceSimilarityWithLanguage(text1, text2, languageTag) - compare the given strings with the Sørensen–Dice coefficient formula, with the provided IETF language tag")
public Double sorensenDiceSimilarity(final @Name("text1") String text1, final @Name("text2") String text2, final @Name(value = "languageTag", defaultValue = "en") String languageTag) {
if (text1 == null || text2 == null || languageTag == null) {
return null;
}
return SorensenDiceCoefficient.compute(text1, text2, languageTag);
}
@UserFunction
@Description("apoc.text.fuzzyMatch(text1, text2) - check if 2 words can be matched in a fuzzy way. Depending on the" +
" length of the String it will allow more characters that needs to be edited to match the second String.")
public Boolean fuzzyMatch(final @Name("text1") String text1, @Name("text2")final String text2) {
if (text1 == null || text2 == null) {
return null;
}
int termLength = text1.length();
int maxDistanceAllowed = termLength < 3 ? 0 : termLength < 5 ? 1 : 2;
Long distance = distance(text1, text2);
return distance <= maxDistanceAllowed;
}
@UserFunction
@Description("apoc.text.urlencode(text) - return the urlencoded text")
public String urlencode(@Name("text") String text) {
try {
return URLEncoder.encode(text, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("urlencoding failed", e);
}
}
@UserFunction
@Description("apoc.text.urldecode(text) - return the urldecoded text")
public String urldecode(@Name("text") String text) {
try {
return URLDecoder.decode(text, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("urldecoding failed", e);
}
}
private static Pattern cleanPattern = Pattern.compile("[^\\p{L}\\p{N}]+");
private static Pattern specialCharPattern = Pattern.compile("\\p{IsM}+");
private static String[][] UMLAUT_REPLACEMENTS = {
{ new String("Ä"), "Ae" },
{ new String("Ü"), "Ue" },
{ new String("Ö"), "Oe" },
{ new String("ä"), "ae" },
{ new String("ü"), "ue" },
{ new String("ö"), "oe" },
{ new String("ß"), "ss" }
};
private static String removeNonWordCharacters(String s) {
String result = s ;
for (int i=0; i= count) return text;
StringBuilder sb = new StringBuilder((int)count);
char[] chars = new char[(int)count - len];
Arrays.fill(chars, delim.charAt(0));
sb.append(chars);
sb.append(text);
return sb.toString();
}
@UserFunction
@Description("apoc.text.rpad(text,count,delim) YIELD value - right pad the string to the given width")
public String rpad(@Name("text") String text, @Name("count") long count, @Name(value = "delim",defaultValue = " ") String delim) {
int len = text.length();
if (len >= count) return text;
StringBuilder sb = new StringBuilder(text);
char[] chars = new char[(int)count - len];
Arrays.fill(chars, delim.charAt(0));
sb.append(chars);
return sb.toString();
}
@UserFunction
@Description("apoc.text.format(text,[params],language) - sprintf format the string with the params given")
public String format(@Name("text") String text, @Name("params") List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy