All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jvntextpro.util.StringUtils Maven / Gradle / Ivy

Go to download

HeidelTime is a multilingual cross-domain temporal tagger that extracts temporal expressions from documents and normalizes them according to the TIMEX3 annotation standard.

There is a newer version: 2.2.1
Show newest version
/*
 Copyright (C) 2010 by
 * 
 * 	Cam-Tu Nguyen 
 *  [email protected] or [email protected]
 *
 *  Xuan-Hieu Phan  
 *  [email protected] 
 *
 *  College of Technology, Vietnamese University, Hanoi
 * 	Graduate School of Information Sciences, Tohoku University
 *
 * JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License,
 * or (at your option) any later version.
 *
 * JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with  JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 */

package jvntextpro.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Set;
import java.util.Vector;

// TODO: Auto-generated Javadoc
/**
 * The Class StringUtils.
 */
public class StringUtils {
		
	/**
	 * Find the first occurrence .
	 *
	 * @param container the string on which we search
	 * @param chars the string which we search for the occurrence
	 * @param begin the start position to search from
	 * @return the position where chars first occur in the container
	 */
	public static int findFirstOf (String container, String chars, int begin){        
        int minIdx = -1;        
        for (int i = 0; i < chars.length() && i >= 0; ++i){
            int idx = container.indexOf(chars.charAt(i), begin);            
            if ( (idx < minIdx && idx != -1) || minIdx == -1){                    
                    minIdx = idx;
            }
        }
        return minIdx;
    }
	   
    /**
     * Find the last occurrence.
     *
     * @param container the string on which we search 
     * @param charSeq the string which we search for the occurrence 
     * @param begin the start position in container to search from
     * @return the position where charSeq occurs for the last time in container (from right to left).
     */
    public static int findLastOf (String container, String charSeq, int begin){        
		//find the last occurrence of one of characters in charSeq from begin backward
        for (int i = begin; i < container.length() && i >= 0; --i){
            if (charSeq.contains("" + container.charAt(i)))
                return i;
        }
        return -1;        
    }
    
    /**
     * Find the first occurrence of characters not in the charSeq from begin 
     *
     * @param container the container
     * @param chars the chars
     * @param begin the begin
     * @return the int
     */
    public static int findFirstNotOf(String container, String chars, int begin){
		//find the first occurrence of characters not in the charSeq	from begin forward	
		for (int i = begin; i < container.length() && i >=0; ++i) 
		   if (!chars.contains("" + container.charAt(i)))
				return i;
		return -1;
    }
    
    /**
     * Find last not of.
     *
     * @param container the container
     * @param charSeq the char seq
     * @param end the end
     * @return the int
     */
    public static int findLastNotOf(String container, String charSeq, int end){
        for (int i = end; i < container.length() && i >= 0; --i){
            if (!charSeq.contains("" + container.charAt(i)))
                return i;        
        }
        return -1;
    } 
    
    //Syllable Features 
    /**
     * Contain number.
     *
     * @param str the str
     * @return true, if successful
     */
    public static boolean containNumber(String str) {
		for (int i = 0; i < str.length(); i++) {
		    if (Character.isDigit(str.charAt(i))) {
			return true;
		    }
		}		
		return false;
    }    
    
    /**
     * Contain letter.
     *
     * @param str the str
     * @return true, if successful
     */
    public static boolean containLetter(String str) {
		for (int i = 0; i < str.length(); i++) {
		    if (Character.isLetter(str.charAt(i))) {
			return true;
		    }
		}
		
		return false;
    }
    
    /**
     * Contain letter and digit.
     *
     * @param str the string
     * @return true, if str consists both letters & digits
     */
    public static boolean containLetterAndDigit(String str) {
    	return (containLetter(str) && containNumber(str));
    }
            
    /**
     * Checks if is all number.
     *
     * @param str the string
     * @return true, if str consists all numbers
     */
    public static boolean isAllNumber(String str) {
    	boolean hasNumber = false;
		for (int i = 0; i < str.length(); i++) {
		    if (!(Character.isDigit(str.charAt(i)) || 
				str.charAt(i) == '.' || str.charAt(i) == ',' || str.charAt(i) == '%'
				|| str.charAt(i) == '$' || str.charAt(i) == '_')) {
			return false;
		    }
		    else if (Character.isDigit(str.charAt(i)))
		    	hasNumber = true;
		}
		
		if (hasNumber == true)
			return true;
		else return false;		
    }
    
    /**
     * Checks if is first cap.
     *
     * @param str the string
     * @return true, if str has the first character capitalized
     */
    public static boolean isFirstCap(String str) {
    	if (isAllCap(str)) return false;
    	
		if (str.length() > 0 && Character.isLetter(str.charAt(0)) &&
				Character.isUpperCase(str.charAt(0))) {
		    return true;
		}
		
		return false;
    }
    
    
    /**
     * Checks if is all capitalized.
     *
     * @param str the string
     * @return true, if is all characters capitalized 
     */
    public static boolean isAllCap(String str) {
		if (str.length() <= 0) {
		    return false;
		}
		
		for (int i = 0; i < str.length(); i++) {
		    if (!Character.isLetter(str.charAt(i)) ||
		    		!Character.isUpperCase(str.charAt(i))) {
				    return false;				
		    }
		}
		
		return true;	
    }
    
    /**
     * Checks if is not first capitalized.
     *
     * @param str the str
     * @return true, if is not first capitalized
     */
    public static boolean isNotFirstCap(String str) {
    	return !isFirstCap(str);
    }    
    
    /**
     * Ends with sign.
     *
     * @param str the string token to test
     * @return true, if this token is ended with punctuation (such as ?:\;)  
     */
    public static boolean endsWithPunc(String str) {
		if (str.endsWith(".") || str.endsWith("?") || str.endsWith("!") ||
			    str.endsWith(",") || str.endsWith(":") || str.endsWith("\"") || 
			    str.endsWith("'") || str.endsWith("''") || str.endsWith(";")) {
		    return true;
		}
		
		return false;
    }

    /**
     * Ends with stop.
     *
     * @param str the string
     * @return true, if this token is ended with stop '.'
     */
    public static boolean endsWithStop(String str) {
	if (str.endsWith(".") || str.endsWith("?") || str.endsWith("!")) {
	    return true;
	}
	
	return false;
    }
    
    /**
     * Count stops.
     *
     * @param str string
     * @return how many stops '.' str contains
     */
    public static int countStops(String str) {
		int count = 0;
	    
		for (int i = 0; i < str.length(); i++) {
		    if (str.charAt(i) == '.' || str.charAt(i) == '?' || str.charAt(i) == '!') {
			count++;
		    }
		}
		
		return count;
    }
    
    /**
     * Count signs.
     *
     * @param str string 
     * @return the number of punctuation marks in this token
     */
    public static int countPuncs(String str) {
		int count = 0;
	    
		for (int i = 0; i < str.length(); i++) {
		    if (str.charAt(i) == '.' || str.charAt(i) == '?' || str.charAt(i) == '!' ||
				str.charAt(i) == ',' || str.charAt(i) == ':' || str.charAt(i) == ';') {
			count++;
		    }
		}
		
		return count;
    }
    
    /**
     * Checks if is stop.
     *
     * @param str string
     * @return true, if the input is the stop character '.'
     */
    public static boolean isStop(String str) {
		if (str.compareTo(".") == 0) {
		    return true;
		}
	
		if (str.compareTo("?") == 0) {
		    return true;
		}
		
		if (str.compareTo("!") == 0) {
		    return true;
		}
		
		return false;
    }
    
    /**
     * Checks if is punctuation.
     *
     * @param str the string token to test
     * @return true, if the input is one of the punctuation marks 
     */
    public static boolean isPunc(String str) {
    	if (str == null) return false;
    	str = str.trim();
    	
    	for (int i = 0; i < str.length(); ++i){
    		char c = str.charAt(i);
    		if (Character.isDigit(c) || Character.isLetter(c)){
    			return false;
    		}
    	}
		return true;
    }
    
    /**
     * Join the String representations of an array of objects, with the specified
     * separator.
     *
     * @param objects the objects
     * @param sep the sep
     * @return  newly created .
     */
	public static String join( Object[] objects, char sep )
	{
		if( objects.length == 0 )
		{
			return "";
		}
		StringBuffer buffer = new StringBuffer( objects[0].toString() );
		for (int i = 1; i < objects.length; i++)
		{
			buffer.append( sep );
			buffer.append( objects[i].toString() );
		}
		return buffer.toString();
	}
	
	/**
	 * Join the String representations of a collection of objects, with the specified
	 * separator.
	 *
	 * @param col the col
	 * @param sep the sep
	 * @return  newly created .
	 */
	public static String join( Collection col, char sep )
	{
		if( col.isEmpty() )
		{
			return "";
		}
		StringBuffer buffer = new StringBuffer();
		boolean first = true; 
		for (Object o : col)
		{
			if( first )
			{
				first = false;
			}
			else
			{
				buffer.append( sep );
			}
			buffer.append( o.toString() );
		}
		return buffer.toString();
	}
	
	// ---------------------------------------------------------
	// String Manipulation
	// ---------------------------------------------------------
	
	/**
	 * Capitalises the first letter of a given string.
	 *  
	 * @param s  the input string
	 * 
	 * @return   the capitalized string
	 */
	public static String capitalizeWord( String s )
	{
		// validate
		if( (s == null) || (s.length() == 0) )
		{
			return s;
		}
		return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase();
	}
	
	/** 
	 * Encloses the specified String in single quotes.
	 * 
	 * @param s  the input string
	 * 
	 * @return the quoted String
	 */
	public static String quote( String s )
	{
		return '\'' + s + '\''; 
	}

	/** 
	 * Encloses the specified String in double quotes.
	 * 
	 * @param s  the input string
	 * 
	 * @return the quoted String
	 */
	public static String doubleQuote( String s )
	{
		return '"' + s + '"'; 
	}

	/**
	 * Pad the specified String with spaces on the right-hand side.
	 * 
	 * @param s       String to add spaces
	 * @param length  Desired length of string after padding
	 * 
	 * @return padded string.
	 */
	public static String pad( String s, int length )
	{
		// Trim if longer...
		if( s.length() > length )
		{
			return s.substring( 0, length );
		}
		StringBuffer buffer = new StringBuffer(s);
		int spaces = length - s.length();
		while( spaces-- > 0 )
		{
			buffer.append(' ');
		}
		return buffer.toString();
	}
	
	/**
	 * Sorts the characters in the specified string.
	 * 
	 * @param s   input String to sort.
	 * 
	 * @return  output String, containing sorted characters.
	 */
	public static String sort( String s )
	{
		char[] chars = s.toCharArray();
		Arrays.sort( chars );
		return new String( chars );
	}

	  
	// ---------------------------------------------------------
	// String Matching
	// ---------------------------------------------------------
	
   /**
	 * Checks whether a String is whitespace, empty or null.
	 *
	 * @param s   the String to analyze.
	 * @return  otherwise.
	 */
	public static boolean isBlank( String s )
	{
		if (s == null)
		{
			return true;
		}
		int sLen = s.length();
		for (int i = 0; i < sLen; i++)
		{
			if (!Character.isWhitespace(s.charAt(i)))
			{
				return false;
			}
		}
		return true;
	}
   
   /**
    * Checks whether a String is composed entirely of whitespace characters.
    *
    * @param s   the String to analyze.
    * @return  otherwise.
    */
	public static boolean isWhitespace( String s )
	{
		if( s == null )
		{
			return false;
		}
		int sLen = s.length();
		for (int i = 0; i < sLen; i++)
		{
			if (!Character.isWhitespace(s.charAt(i)))
			{
				return false;
			}
		}
		return true;
	}
	
	// ---------------------------------------------------------
	// Search-related
	// ---------------------------------------------------------
   
	/**
	 * Counts the number of occurrences of a character in the specified String.
	 * 
	 * @param s   the String to analyze.
	 * @param c   the character to search for.
	 * 
	 * @return number of occurrences found.
	 */
	public static int countOccurrences( String s, char c )
	{
		int count = 0;
		int index = 0;
		while( true )
		{
			index = s.indexOf( c, index );
			if( index == -1 )
			{
				break;
			}
			count++;
		}
		return count;
	}
	
	/**
	 * Indicates whether the specified array of Strings contains
	 * a given String.
	 *
	 * @param array the array
	 * @param s the s
	 * @return  otherwise.
	 */
	public static boolean isContained( String[] array, String s )
	{
		for (String string : array)
		{
			if( string.equals( s ) )
			{
				return true;
			}
		}
		return false;
	}
	
	// ---------------------------------------------------------
	// Array/Collection conversion
	// ---------------------------------------------------------
	
	/**
	 * Returns the index of the first occurrence of the specified String
	 * in an array of Strings.
	 * 
	 * @param array  array of Strings to search.
	 * @param s      the String to search for.
	 * 
	 * @return the index of the first occurrence of the argument in this list, 
	 *         or -1 if the string is not found.
	 */
	public static int indexOf( String[] array, String s )
	{
		for (int index = 0; index < array.length; index++)
		{
			if( s.equals( array[index] ) )
			{
				return index;
			}
		}
		return -1;
	}
	
	/**
	 * Creates a new ArrayList collection from the specified array of Strings.
	 *
	 * @param array the array
	 * @return  newly created .
	 */
	public static ArrayList toList( String[] array )
	{
		if( array == null )
		{
			return new ArrayList( 0 );
		}
		ArrayList list = new ArrayList( array.length );
		for (String s : array)
		{
			list.add( s );
		}
		return list;
	}
	
	/**
	 * Creates a new Vector collection from the specified array of Strings.
	 *
	 * @param array the array
	 * @return  newly created .
	 */
	public static Vector toVector( String[] array )
	{
		if( array == null )
		{
			return new Vector( 0 );
		}
		Vector v = new Vector( array.length );
		v.copyInto( array );
		return v;
	}
	
	/**
	 * Creates a new ArrayList collection from the specified Set of Strings.
	 *
	 * @param set   a set of Strings.
	 * @return newly created .
	 */
	public static ArrayList toList( Set set )
	{
		int n = set.size();
		ArrayList list = new ArrayList( n );
		for (String string : set)
		{
			list.add(string);
		}
		return list;
	}

    
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy