All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ma.vi.base.string.Strings Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2016 Vikash Madhow
 */

package ma.vi.base.string;

import java.text.Normalizer;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static java.lang.Character.*;

/**
 * Utility functions working on strings.
 *
 * @author Vikash Madhow ([email protected])
 */
public class Strings {

//    /** A character-based splitter which should be faster than the regex-based String.split method. */
//    private static class CharacterSplit implements Iterator {
//        private CharacterSplit(String text, char splitter) {
//            this.text = text;
//            this.splitter = splitter;
//        }
//
//        @Override public boolean hasNext() {
//            return currentPos < text.length();
//        }
//
//        @Override public String next() {
//            checkState(currentPos < text.length(), "No more tokens");
//            currentPos++;
//            int nextPos = text.indexOf(currentPos, splitter);
//            if (nextPos == -1) {
//                String token = text.substring(currentPos);
//                currentPos = text.length();
//                return token;
//
//            } else {
//                String token = text.substring(currentPos, nextPos);
//                currentPos = nextPos;
//                return token;
//            }
//        }
//
//        private final String text;
//        private final char splitter;
//        private int currentPos = -1;
//    }

  public static String shorten(String text) {
    return shorten(text, 20, "...");
  }

  public static String shorten(String text, int len, String suffix) {
    if (text.length() < len) {
      return text;
    } else {
      return text.substring(0, len) + suffix;
    }
  }

  /**
   * Return true if string conforms to this regular expression:
   * 

* HEX{8}-HEX{4}-HEX{4}-HEX{4}-HEX{12} *

* where HEX is [A-Fa-f0-9] *

* I.e., it conforms to the hexadecimal representation of a UUID. */ public static boolean isUUID(String text) { if (text != null) { int len = text.length(); if (len == UUID_SEP.length) { for (int i = 0; i < len; i++) { char c = text.charAt(i); if (UUID_SEP[i] == 1) { if (c != '-') { return false; } } else if (!(c >= '0' && c <= '9') && !(c >= 'A' && c <= 'F') && !(c >= 'a' && c <= 'f')) { return false; } } return true; } } return false; } /** * Position of separators (1) in a UUID */ private static final byte[] UUID_SEP = new byte[]{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; /** * Cleans the string by removing unnecessary white spaces, leading and * trailing non-alphanumeric characters and by fully lower case except * for the first character which is converted to upper case. */ public static String clean(String value) { if (value == null) { return null; } else { return capFirst(trimPunctuations(collapseWhiteSpace(value)).toLowerCase()); } } /** * Remove leading and trailing whitespaces and reduce all consecutive * whitespaces to one. */ public static String collapseWhiteSpace(String value) { if (value == null) { return null; } // reduce consecutive whitespaces to one char last = ' '; StringBuilder s = new StringBuilder(); for (char c : value.toCharArray()) { if (isWhitespace(c) && isWhitespace(last)) { continue; } s.append(c); last = c; } return s.toString().trim(); } /** * Returns true if a string containing only lower-case letters * or non-alphabetic characters. */ public static boolean isLower(String s) { if (s == null) { return true; } for (char c : s.toCharArray()) { if (isLetter(c) && !Character.isLowerCase(c)) { return false; } } return true; } /** * Returns true if a string containing only upper-case letters * or non-alphabetic characters. */ public static boolean isUpper(String s) { return !isLower(s); } /** * Strip the string of anything but letters. */ public static String lettersOnly(String s) { if (s == null) { return null; } StringBuilder lettersOnly = new StringBuilder(); for (char c : s.toCharArray()) { if (isLetter(c)) { lettersOnly.append(c); } } return lettersOnly.toString(); } /** * Remove all leading non-alphanumeric characters */ public static String ltrimPunctuations(String value) { if (value == null) { return null; } int i = 0; for (; i < value.length(); i++) { char c = value.charAt(i); if (isLetter(c) || isDigit(c)) { break; } } return value.substring(i); } /** * Remove all trailing non-alphanumeric characters */ public static String rtrimPunctuations(String value) { if (value == null) { return null; } int i = value.length() - 1; for (; i >= 0; i--) { char c = value.charAt(i); if (isLetter(c) || isDigit(c)) { break; } } return value.substring(0, i + 1); } /** * Remove all leading and trailing non-alphanumeric characters */ public static String trimPunctuations(String value) { return ltrimPunctuations(rtrimPunctuations(value)); } /** * Replace all characters with accents in value with their equivalent * characters with the accent removed. */ public static String removeDiacritics(String value) { if (value == null) { return null; } StringBuilder buf = new StringBuilder(); value = Normalizer.normalize(value, Normalizer.Form.NFD); for (char c : value.toCharArray()) { if (getType(c) != NON_SPACING_MARK) { buf.append(c); } } return buf.toString(); } /** * Remove from the string any continuous sequence which does not match * the supplied regular expression. * * @param text The text to preserve substrings from. * @param preservePattern Only sequences in text matching this pattern will * be preserved in the result. * @return The processed text. */ public static String preserve(String text, Pattern preservePattern) { if (text == null || preservePattern == null) { return null; } StringBuilder result = new StringBuilder(); Matcher matcher = preservePattern.matcher(text); while (matcher.find()) { result.append(text.substring(matcher.start(), matcher.end())); } return result.toString(); } // /** // * Creates a label by expanding a camel case identifier. // */ // public static String expandByCase(String text) { // if (text == null) { // throw new IllegalArgumentException("name to expand must not be null"); // } // // StringBuilder result = new StringBuilder(); // Matcher matcher = CASE_SPLITTER.matcher(text); // while (matcher.find()) { // if (result.length() != 0) { // if (isDigit(result.charAt(result.length() - 1)) || // isDigit(text.charAt(matcher.start()))) { // // append a non-breakable space between a word and a number // result.append('\u00A0'); // } else { // result.append(' '); // } // } // result.append(text.substring(matcher.start(), matcher.end())); // } // return result.toString(); // } /** * Creates a label by expanding a camel case identifier. */ public static String expandByCase(String text) { return String.join(" ", splitByCase(text)); } /** * Splits a string by case boundaries. E.g., AnotherCasePhrase is split * to ['Another', 'Case', 'Phrase']. */ public static List splitByCase(String text) { if (text == null) { throw new IllegalArgumentException("Can't split null"); } List result = new ArrayList<>(); Matcher matcher = CASE_SPLITTER.matcher(text); while (matcher.find()) { result.add(text.substring(matcher.start(), matcher.end())); } return result; } /** * Capitalize first character only. */ public static String capFirst(String text) { if (text == null) { return null; } return switch (text.length()) { case 0 -> text; case 1 -> text.toUpperCase(); default -> Character.toUpperCase(text.charAt(0)) + text.substring(1); }; } /** * De-capitalize first character only. */ public static String uncapFirst(String text) { if (text == null) { return null; } return switch (text.length()) { case 0 -> text; case 1 -> text.toLowerCase(); default -> Character.toLowerCase(text.charAt(0)) + text.substring(1); }; } /** * Returns a string of length characters chosen randomly from the supplied * character array. */ public static String random(int length, char[] chars) { if (length <= 0) { length = 8; } if (chars == null || chars.length == 0) { chars = ALPHA_NUMERIC; } StringBuilder str = new StringBuilder(); for (int i = 0; i < length; i++) { str.append(chars[random.nextInt(chars.length)]); } return str.toString(); } public static String random() { return random(0, null); } public static String random(int length) { return random(length, null); } // public static String makeUnique(Set existing, String prefix) { // String p = prefix; // while (existing.contains(p)) { // p = prefix + '_' + random(4); // } // existing.add(p); // return p; // } /** * Change a name by adding a random substring until it is unique * within the set of names provided. If the name has an optional * suffix following a period (i.e. an extension), the random string * is added before the extension. If the name is not present in * the set of names initially, it is returned unchanged. The unique * unique name is added to the set before returning. * * @param names Set of existing names. * @param name Name to make unique. * @return A unique name as per the set of names. */ public static String makeUnique(Set names, String name) { int pos = name.lastIndexOf('.'); String previousExt = pos == -1 ? "" : name.substring(pos); String previousName = pos == -1 ? name : name.substring(0, pos); while (names.contains(name)) { name = previousName + "_" + Strings.random(4) + previousExt; } names.add(name); return name; } private Strings() { } /** * alpha-numeric characters; useful to generate random alpha-numeric strings */ public static final char[] ALPHA_NUMERIC = new char[]{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; /** * numeric characters; useful to generate random numeric strings */ public static final char[] NUMERIC = new char[]{ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}; /** * alphabetic characters; useful to generate random alphabetic strings */ public static final char[] ALPHABETIC = new char[]{ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; /** * Random number generator */ private static final Random random = new Random(); /** * Regular expression to split camel case identifier into separate words. */ public static final Pattern CASE_SPLITTER = Pattern.compile("[a-z]+|[A-Z][a-z]+|[A-Z]+|[0-9]+"); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy