All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.text.TextUtils Maven / Gradle / Ivy

There is a newer version: 6.5.21
Show newest version
/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * __________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/

package com.day.text;

import java.util.Arrays;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Simple text manipulation tasks - these aren't optimized, and are intended
 * primarily to be used by test code. (eg. to compare returned values from
 * functions with the values expected).
 */
public class TextUtils {

    /** default log */
    private static final Logger log = LoggerFactory.getLogger(TextUtils.class);

    public static String removeControlCharacters(String characters) {
        log.debug("Starting removeControlCharacters (" + "" + "characters : "
            + characters + " , " + ")");

        StringBuffer result = new StringBuffer();
        for (int i = 0; i < characters.length(); i++) {
            char c = characters.charAt(i);
            if (c < ' ' || c == 127) continue;
            result.append((char) c);
        }
        return result.toString();
    }

    public static String removeWhitespace(String characters) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < characters.length(); i++) {
            char c = characters.charAt(i);
            if ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
                continue;
            result.append((char) c);
        }
        return result.toString();
    }

    /**
     * Compare two strings, and determine whether they are approximately
     * "similar" - by this we mean whether they have the same numbers of each
     * character in (barring whitespace). We do this by sorting the strings as a
     * character array and stripping whitespace.
     * 

* This will generate false positives on some text - for example anagrams. *

* For example, the text "Watch the carthorse" and "Watch the orchestra" * will both be converted to "aacceehhhorrstttw" and compared - so they will * be identical. *

* This method is mostly useful as a simple way of comparing data which can * be differently ordered but still functionally equivalent - for example * DTDs. */ public static boolean isTextSimilar(String docOne, String docTwo) { String sortedOne = stripSortedWhitespace(sortString(convertWhitespaceToSpaces(docOne.toLowerCase()))); String sortedTwo = stripSortedWhitespace(sortString(convertWhitespaceToSpaces(docTwo.toLowerCase()))); return sortedOne.equals(sortedTwo); } public static boolean noisyIsTextSimilar(String docOne, String docTwo) { String sortedOne = stripSortedWhitespace(sortString(convertWhitespaceToSpaces(docOne.toLowerCase()))); String sortedTwo = stripSortedWhitespace(sortString(convertWhitespaceToSpaces(docTwo.toLowerCase()))); System.out.println("Comparing :" + sortedOne); System.out.println("Compare to :" + sortedTwo); return sortedOne.equals(sortedTwo); } /** * Convert all whitespace (tabs and newlines) to single spaces - so they can * more easily be removed. */ private static String convertWhitespaceToSpaces(String s) { s = s.replace('\t', ' '); s = s.replace('\n', ' '); s = s.replace('\r', ' '); return s; } /** * Remove one block of contiguous whitespace - the behaviour will be * incorrect if there is more than one block of whitespace in the string. * This is intended to be used after sortString. */ protected static String stripSortedWhitespace(String s) { String first = s.substring(0, s.indexOf(' ')); String last = s.substring(s.lastIndexOf(' ') + 1); return first + last; } /** * Sort a string into alphabetical order of the characters in it. */ protected static String sortString(String in) { char[] characters = in.toCharArray(); Arrays.sort(characters); return new String(characters); } /** * Remove all whitespace from a string - most useful for test purposes * (where you compare two strings, but whitespace isn't syntactically * significant) */ public String stripWhitespace(String s) { StringBuffer out = new StringBuffer(); for (int i = 0; i < s.length(); i++) { if (!Character.isWhitespace(s.charAt(i))) { out.append(s.charAt(i)); } } return out.toString(); } /** * These are characters not allowed in a text string in a CQ any file - they * require escaping. */ public static String sanitizeForCommunique(String s) { log.debug("Starting sanitizeForCommunique (" + "" + "s : " + s + " , " + ")"); // s = s.replace('\"', '\''); // s = s.replace('\n', ' '); // s = s.replace('\r', ' '); // s = s.replace('\t', ' '); StringBuffer out = new StringBuffer(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c == '"' || c == '\n' || c == '\r' || c == '\t' || c == '\\') { out.append("\\"); } out.append(c); } return out.toString(); } /** * Search and replace */ public static String replace(String allOfTheText, String toFind, String toReplace) { log.debug("Starting replace (" + "" + "allOfTheText : " + allOfTheText + " , " + "toFind : " + toFind + " , " + "toReplace : " + toReplace + " , " + ")"); // String will be built up here StringBuffer dest = new StringBuffer(); int nextFound = allOfTheText.indexOf(toFind, 0); int lastFound = 0; if (nextFound == -1) return allOfTheText; while (true) { dest.append(allOfTheText.substring(lastFound, nextFound)); dest.append(toReplace); lastFound = nextFound + toFind.length(); nextFound = allOfTheText.indexOf(toFind, lastFound); if (nextFound == -1) { dest.append(allOfTheText.substring(lastFound)); break; } } return dest.toString(); } public static String trimTrailingSlashes(String input) { log.debug("Starting trimTrailingSlashes (" + "" + "input : " + input + " , " + ")"); while (input.endsWith("/")) { input = input.substring(0, input.length() - 1); } return input; } public static String trimLeadingSlashes(String input) { log.debug("Starting trimLeadingSlashes (" + "" + "input : " + input + " , " + ")"); while (input.startsWith("/")) { input = input.substring(1); } return input; } public static boolean occursOnceOnly(String big, String small) { return occursExactlyNTimes(big, small, 1); } public static boolean occursExactlyNTimes(String big, String small, int n) { try { int occurences = countOccurences(big, small); return (occurences == n); } catch (NullPointerException e) { return false; } } public static int countOccurences(String big, String small) { int count = 0; int off = 0; if (big == null || small == null || "".equals(small)) { throw new NullPointerException( "Cannot count occurrences of the empty or null String."); } // small != null, so definitely occurs 0 times. if ("".equals(big)) { return 0; } count: while (true) { off = big.indexOf(small, off); if (off == -1) break count; off += small.length(); // so off is bigger than it was before, as small != "". count++; } return count; } /** * Removes last level from * a "slashy string" representaion of a * hierarchical path. * * For example, '/a/b/c' becomes '/a/b' * with the exception that '/' and '' * remain unchanged */ public static String trimLastSlashLevel(String input) { if (input.equals("/") || input.equals("")) { return input; } String output = trimTrailingSlashes(input); output = output.substring(0, output.lastIndexOf("/")); return output; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy