All Downloads are FREE. Search and download functionalities are using the official Maven repository.

goja.core.kits.base.CharKit Maven / Gradle / Ivy

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 2013-2014 sagyf Yang. The Four Group.
 */

package goja.core.kits.base;

import goja.core.StringPool;
import org.apache.commons.lang3.StringUtils;

import java.io.UnsupportedEncodingException;

/**
 * 

* . *

* * @author sagyf yang * @version 1.0 2014-01-31 17:51 * @since JDK 1.6 */ public class CharKit { // ---------------------------------------------------------------- simple /** * Converts (signed) byte to (unsigned) char. */ public static char toChar(byte b) { return (char) (b & 0xFF); } /** * Converts char array into byte array by stripping the high byte of each character. */ public static byte[] toSimpleByteArray(char[] carr) { byte[] barr = new byte[carr.length]; for (int i = 0; i < carr.length; i++) { barr[i] = (byte) carr[i]; } return barr; } /** * Converts char sequence into byte array. * * @see #toSimpleByteArray(char[]) */ public static byte[] toSimpleByteArray(CharSequence charSequence) { byte[] barr = new byte[charSequence.length()]; for (int i = 0; i < barr.length; i++) { barr[i] = (byte) charSequence.charAt(i); } return barr; } /** * Converts byte array to char array by simply extending bytes to chars. */ public static char[] toSimpleCharArray(byte[] barr) { char[] carr = new char[barr.length]; for (int i = 0; i < barr.length; i++) { carr[i] = (char) (barr[i] & 0xFF); } return carr; } // ---------------------------------------------------------------- ascii /** * Returns ASCII value of a char. In case of overload, 0x3F is returned. */ public static int toAscii(char c) { if (c <= 0xFF) { return c; } else { return 0x3F; } } /** * Converts char array into {@link #toAscii(char) ASCII} array. */ public static byte[] toAsciiByteArray(char[] carr) { byte[] barr = new byte[carr.length]; for (int i = 0; i < carr.length; i++) { barr[i] = (byte) ((int) (carr[i] <= 0xFF ? carr[i] : 0x3F)); } return barr; } /** * Converts char sequence into ASCII byte array. */ public static byte[] toAsciiByteArray(CharSequence charSequence) { byte[] barr = new byte[charSequence.length()]; for (int i = 0; i < barr.length; i++) { char c = charSequence.charAt(i); barr[i] = (byte) ((int) (c <= 0xFF ? c : 0x3F)); } return barr; } // ---------------------------------------------------------------- raw arrays /** * Converts char array into byte array by replacing each character with two bytes. */ public static byte[] toRawByteArray(char[] carr) { byte[] barr = new byte[carr.length << 1]; for (int i = 0, bpos = 0; i < carr.length; i++) { char c = carr[i]; barr[bpos++] = (byte) ((c & 0xFF00) >> 8); barr[bpos++] = (byte) (c & 0x00FF); } return barr; } public static char[] toRawCharArray(byte[] barr) { int carrLen = barr.length >> 1; if (carrLen << 1 < barr.length) { carrLen++; } char[] carr = new char[carrLen]; int i = 0, j = 0; while (i < barr.length) { char c = (char) (barr[i] << 8); i++; if (i != barr.length) { c += barr[i] & 0xFF; i++; } carr[j++] = c; } return carr; } // ---------------------------------------------------------------- encoding /** * Converts char array to byte array using default Jodd encoding. */ public static byte[] toByteArray(char[] carr) throws UnsupportedEncodingException { return new String(carr).getBytes(StringPool.UTF_8); } /** * Converts char array to byte array using provided encoding. */ public static byte[] toByteArray(char[] carr, String charset) throws UnsupportedEncodingException { return new String(carr).getBytes(charset); } /** * Converts byte array of default Jodd encoding to char array. */ public static char[] toCharArray(byte[] barr) throws UnsupportedEncodingException { return new String(barr, StringPool.UTF_8).toCharArray(); } /** * Converts byte array of specific encoding to char array. */ public static char[] toCharArray(byte[] barr, String charset) throws UnsupportedEncodingException { return new String(barr, charset).toCharArray(); } // ---------------------------------------------------------------- find /** * Match if one character equals to any of the given character. * * @return true if characters match any character from given array, * otherwise false */ public static boolean equalsOne(char c, char[] match) { for (char aMatch : match) { if (c == aMatch) { return true; } } return false; } /** * Finds index of the first character in given array the matches any from the * given set of characters. * * @return index of matched character or -1 */ public static int findFirstEqual(char[] source, int index, char[] match) { for (int i = index; i < source.length; i++) { if (equalsOne(source[i], match) == true) { return i; } } return -1; } /** * Finds index of the first character in given array the matches any from the * given set of characters. * * @return index of matched character or -1 */ public static int findFirstEqual(char[] source, int index, char match) { for (int i = index; i < source.length; i++) { if (source[i] == match) { return i; } } return -1; } /** * Finds index of the first character in given array the differs from the * given set of characters. * * @return index of matched character or -1 */ public static int findFirstDiff(char[] source, int index, char[] match) { for (int i = index; i < source.length; i++) { if (equalsOne(source[i], match) == false) { return i; } } return -1; } /** * Finds index of the first character in given array the differs from the * given set of characters. * * @return index of matched character or -1 */ public static int findFirstDiff(char[] source, int index, char match) { for (int i = index; i < source.length; i++) { if (source[i] != match) { return i; } } return -1; } // ---------------------------------------------------------------- is /** * Returns true if character is a white space (<= ' '). * White space definition is taken from String class (see: trim()). */ public static boolean isWhitespace(char c) { return c <= ' '; } /** * Returns true if specified character is lowercase ASCII. * If user uses only ASCIIs, it is much much faster. */ public static boolean isLowercaseAlpha(char c) { return (c >= 'a') && (c <= 'z'); } /** * Returns true if specified character is uppercase ASCII. * If user uses only ASCIIs, it is much much faster. */ public static boolean isUppercaseAlpha(char c) { return (c >= 'A') && (c <= 'Z'); } public static boolean isAlphaOrDigit(char c) { return isDigit(c) || isAlpha(c); } public static boolean isWordChar(char c) { return isDigit(c) || isAlpha(c) || (c == '_'); } public static boolean isPropertyNameChar(char c) { return isDigit(c) || isAlpha(c) || (c == '_') || (c == '.') || (c == '[') || (c == ']'); } // ---------------------------------------------------------------- RFC /** * Indicates whether the given character is in the {@code ALPHA} set. * * @see RFC 3986, appendix A */ public static boolean isAlpha(char c) { return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')); } /** * Indicates whether the given character is in the {@code DIGIT} set. * * @see RFC 3986, appendix A */ public static boolean isDigit(char c) { return c >= '0' && c <= '9'; } /** * Indicates whether the given character is in the gen-delims set. * * @see RFC 3986, appendix A */ public static boolean isGenericDelimiter(int c) { return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || c == ']' || c == '@'; } /** * Indicates whether the given character is in the sub-delims set. * * @see RFC 3986, appendix A */ protected static boolean isSubDelimiter(int c) { return c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '='; } /** * Indicates whether the given character is in the reserved set. * * @see RFC 3986, appendix A */ protected static boolean isReserved(char c) { return isGenericDelimiter(c) || isReserved(c); } /** * Indicates whether the given character is in the unreserved set. * * @see RFC 3986, appendix A */ protected static boolean isUnreserved(char c) { return isAlpha(c) || isDigit(c) || c == '-' || c == '.' || c == '_' || c == '~'; } /** * Indicates whether the given character is in the pchar set. * * @see RFC 3986, appendix A */ protected static boolean isPchar(char c) { return isUnreserved(c) || isSubDelimiter(c) || c == ':' || c == '@'; } // ---------------------------------------------------------------- conversions /** * Uppers lowercase ASCII char. */ public static char toUpperAscii(char c) { if (isLowercaseAlpha(c)) { c -= (char) 0x20; } return c; } /** * Lowers uppercase ASCII char. */ public static char toLowerAscii(char c) { if (isUppercaseAlpha(c)) { c += (char) 0x20; } return c; } /** * @see #indexOfChars(String, String, int) */ public static int indexOfChars(String string, String chars) { return indexOfChars(string, chars, 0); } /** * Returns the very first index of any char from provided string, starting from specified index offset. * Returns index of founded char, or -1 if nothing found. */ public static int indexOfChars(String string, String chars, int startindex) { int stringLen = string.length(); int charsLen = chars.length(); if (startindex < 0) { startindex = 0; } for (int i = startindex; i < stringLen; i++) { char c = string.charAt(i); for (int j = 0; j < charsLen; j++) { if (c == chars.charAt(j)) { return i; } } } return -1; } public static int indexOfChars(String string, char[] chars) { return indexOfChars(string, chars, 0); } /** * Returns the very first index of any char from provided string, starting from specified index offset. * Returns index of founded char, or -1 if nothing found. */ public static int indexOfChars(String string, char[] chars, int startindex) { int stringLen = string.length(); for (int i = startindex; i < stringLen; i++) { char c = string.charAt(i); for (char aChar : chars) { if (c == aChar) { return i; } } } return -1; } /** * Returns if string starts with given character. */ public static boolean startsWithChar(String s, char c) { return s.length() != 0 && s.charAt(0) == c; } /** * Returns true if string contains only digits. */ public static boolean containsOnlyDigits(String string) { int size = string.length(); for (int i = 0; i < size; i++) { char c = string.charAt(i); if (!CharKit.isDigit(c)) { return false; } } return true; } /** * Returns true if string {@link #containsOnlyDigits(String) contains only digits} * or signs plus or minus. */ public static boolean containsOnlyDigitsAndSigns(String string) { int size = string.length(); for (int i = 0; i < size; i++) { char c = string.charAt(i); if ((!CharKit.isDigit(c)) && (c != '-') && (c != '+')) { return false; } } return true; } /** * Splits a string in several parts (tokens) that are separated by delimiter * characters. Delimiter may contains any number of character and it is * always surrounded by two strings. * * @param src source to examine * @param d string with delimiter characters * @return array of tokens */ public static String[] splitc(String src, String d) { if ((d.length() == 0) || (src.length() == 0)) { return new String[]{src}; } char[] delimiters = d.toCharArray(); char[] srcc = src.toCharArray(); int maxparts = srcc.length + 1; int[] start = new int[maxparts]; int[] end = new int[maxparts]; int count = 0; start[0] = 0; int s = 0, e; if (CharKit.equalsOne(srcc[0], delimiters)) { // string starts with delimiter end[0] = 0; count++; s = CharKit.findFirstDiff(srcc, 1, delimiters); if (s == -1) { // nothing after delimiters return new String[]{StringUtils.EMPTY, StringUtils.EMPTY}; } start[1] = s; // new start } while (true) { // find new end e = CharKit.findFirstEqual(srcc, s, delimiters); if (e == -1) { end[count] = srcc.length; break; } end[count] = e; // find new start count++; s = CharKit.findFirstDiff(srcc, e, delimiters); if (s == -1) { start[count] = end[count] = srcc.length; break; } start[count] = s; } count++; String[] result = new String[count]; for (int i = 0; i < count; i++) { result[i] = src.substring(start[i], end[i]); } return result; } /** * Splits a string in several parts (tokens) that are separated by single delimiter * characters. Delimiter is always surrounded by two strings. * * @param src source to examine * @param delimiter delimiter character * @return array of tokens */ public static String[] splitc(String src, char delimiter) { if (src.length() == 0) { return new String[]{StringUtils.EMPTY}; } char[] srcc = src.toCharArray(); int maxparts = srcc.length + 1; int[] start = new int[maxparts]; int[] end = new int[maxparts]; int count = 0; start[0] = 0; int s = 0, e; if (srcc[0] == delimiter) { // string starts with delimiter end[0] = 0; count++; s = CharKit.findFirstDiff(srcc, 1, delimiter); if (s == -1) { // nothing after delimiters return new String[]{StringUtils.EMPTY, StringUtils.EMPTY}; } start[1] = s; // new start } while (true) { // find new end e = CharKit.findFirstEqual(srcc, s, delimiter); if (e == -1) { end[count] = srcc.length; break; } end[count] = e; // find new start count++; s = CharKit.findFirstDiff(srcc, e, delimiter); if (s == -1) { start[count] = end[count] = srcc.length; break; } start[count] = s; } count++; String[] result = new String[count]; for (int i = 0; i < count; i++) { result[i] = src.substring(start[i], end[i]); } return result; } /** * Returns if string ends with provided character. */ public static boolean endsWithChar(String s, char c) { return s.length() != 0 && s.charAt(s.length() - 1) == c; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy