com.day.text.TextUtils Maven / Gradle / Ivy
Show all versions of aem-sdk-api Show documentation
/*************************************************************************
*
* ADOBE CONFIDENTIAL
* __________________
*
* Copyright 2012 Adobe Systems Incorporated
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any. The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
package com.day.text;
import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Simple text manipulation tasks - these aren't optimized, and are intended
* primarily to be used by test code. (eg. to compare returned values from
* functions with the values expected).
*/
public class TextUtils {
/** default log */
private static final Logger log = LoggerFactory.getLogger(TextUtils.class);
public static String removeControlCharacters(String characters) {
log.debug("Starting removeControlCharacters (" + "" + "characters : "
+ characters + " , " + ")");
StringBuffer result = new StringBuffer();
for (int i = 0; i < characters.length(); i++) {
char c = characters.charAt(i);
if (c < ' ' || c == 127) continue;
result.append((char) c);
}
return result.toString();
}
public static String removeWhitespace(String characters) {
StringBuffer result = new StringBuffer();
for (int i = 0; i < characters.length(); i++) {
char c = characters.charAt(i);
if ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
continue;
result.append((char) c);
}
return result.toString();
}
/**
* Compare two strings, and determine whether they are approximately
* "similar" - by this we mean whether they have the same numbers of each
* character in (barring whitespace). We do this by sorting the strings as a
* character array and stripping whitespace.
*
* This will generate false positives on some text - for example anagrams.
*
* For example, the text "Watch the carthorse" and "Watch the orchestra"
* will both be converted to "aacceehhhorrstttw" and compared - so they will
* be identical.
*
* This method is mostly useful as a simple way of comparing data which can
* be differently ordered but still functionally equivalent - for example
* DTDs.
*/
public static boolean isTextSimilar(String docOne, String docTwo) {
String sortedOne = stripSortedWhitespace(sortString(convertWhitespaceToSpaces(docOne.toLowerCase())));
String sortedTwo = stripSortedWhitespace(sortString(convertWhitespaceToSpaces(docTwo.toLowerCase())));
return sortedOne.equals(sortedTwo);
}
public static boolean noisyIsTextSimilar(String docOne, String docTwo) {
String sortedOne = stripSortedWhitespace(sortString(convertWhitespaceToSpaces(docOne.toLowerCase())));
String sortedTwo = stripSortedWhitespace(sortString(convertWhitespaceToSpaces(docTwo.toLowerCase())));
System.out.println("Comparing :" + sortedOne);
System.out.println("Compare to :" + sortedTwo);
return sortedOne.equals(sortedTwo);
}
/**
* Convert all whitespace (tabs and newlines) to single spaces - so they can
* more easily be removed.
*/
private static String convertWhitespaceToSpaces(String s) {
s = s.replace('\t', ' ');
s = s.replace('\n', ' ');
s = s.replace('\r', ' ');
return s;
}
/**
* Remove one block of contiguous whitespace - the behaviour will be
* incorrect if there is more than one block of whitespace in the string.
* This is intended to be used after sortString.
*/
protected static String stripSortedWhitespace(String s) {
String first = s.substring(0, s.indexOf(' '));
String last = s.substring(s.lastIndexOf(' ') + 1);
return first + last;
}
/**
* Sort a string into alphabetical order of the characters in it.
*/
protected static String sortString(String in) {
char[] characters = in.toCharArray();
Arrays.sort(characters);
return new String(characters);
}
/**
* Remove all whitespace from a string - most useful for test purposes
* (where you compare two strings, but whitespace isn't syntactically
* significant)
*/
public String stripWhitespace(String s) {
StringBuffer out = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
if (!Character.isWhitespace(s.charAt(i))) {
out.append(s.charAt(i));
}
}
return out.toString();
}
/**
* These are characters not allowed in a text string in a CQ any file - they
* require escaping.
*/
public static String sanitizeForCommunique(String s) {
log.debug("Starting sanitizeForCommunique (" + "" + "s : " + s + " , "
+ ")");
// s = s.replace('\"', '\'');
// s = s.replace('\n', ' ');
// s = s.replace('\r', ' ');
// s = s.replace('\t', ' ');
StringBuffer out = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '"' || c == '\n' || c == '\r' || c == '\t' || c == '\\') {
out.append("\\");
}
out.append(c);
}
return out.toString();
}
/**
* Search and replace
*/
public static String replace(String allOfTheText, String toFind,
String toReplace) {
log.debug("Starting replace (" + "" + "allOfTheText : " + allOfTheText
+ " , " + "toFind : " + toFind + " , " + "toReplace : " + toReplace
+ " , " + ")");
// String will be built up here
StringBuffer dest = new StringBuffer();
int nextFound = allOfTheText.indexOf(toFind, 0);
int lastFound = 0;
if (nextFound == -1) return allOfTheText;
while (true) {
dest.append(allOfTheText.substring(lastFound, nextFound));
dest.append(toReplace);
lastFound = nextFound + toFind.length();
nextFound = allOfTheText.indexOf(toFind, lastFound);
if (nextFound == -1) {
dest.append(allOfTheText.substring(lastFound));
break;
}
}
return dest.toString();
}
public static String trimTrailingSlashes(String input) {
log.debug("Starting trimTrailingSlashes (" + "" + "input : " + input
+ " , " + ")");
while (input.endsWith("/")) {
input = input.substring(0, input.length() - 1);
}
return input;
}
public static String trimLeadingSlashes(String input) {
log.debug("Starting trimLeadingSlashes (" + "" + "input : " + input
+ " , " + ")");
while (input.startsWith("/")) {
input = input.substring(1);
}
return input;
}
public static boolean occursOnceOnly(String big, String small) {
return occursExactlyNTimes(big, small, 1);
}
public static boolean occursExactlyNTimes(String big, String small, int n) {
try {
int occurences = countOccurences(big, small);
return (occurences == n);
} catch (NullPointerException e) {
return false;
}
}
public static int countOccurences(String big, String small) {
int count = 0;
int off = 0;
if (big == null || small == null || "".equals(small)) {
throw new NullPointerException(
"Cannot count occurrences of the empty or null String.");
}
// small != null, so definitely occurs 0 times.
if ("".equals(big)) {
return 0;
}
count: while (true) {
off = big.indexOf(small, off);
if (off == -1) break count;
off += small.length(); // so off is bigger than it was before, as small != "".
count++;
}
return count;
}
/**
* Removes last level from
* a "slashy string" representaion of a
* hierarchical path.
*
* For example, '/a/b/c' becomes '/a/b'
* with the exception that '/' and ''
* remain unchanged
*/
public static String trimLastSlashLevel(String input) {
if (input.equals("/") || input.equals("")) {
return input;
}
String output = trimTrailingSlashes(input);
output = output.substring(0, output.lastIndexOf("/"));
return output;
}
}