All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.org.retep.util.string.Sentence Maven / Gradle / Ivy

The newest version!
/*
 * 

Copyright (c) 1998-2010, Peter T Mount
* All rights reserved.

* *

Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met:

* *
    *
  • Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer.
  • * *
  • Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution.
  • * *
  • Neither the name of the retep.org.uk nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission.
  • * *
* *

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

*/ package uk.org.retep.util.string; import java.util.Arrays; import javax.annotation.concurrent.NotThreadSafe; /** * * @author peter */ @NotThreadSafe public class Sentence { private String original; private Integer mappings[]; private String normalized; private String splitted[]; /** * * @param original */ public Sentence( String original ) { this( original, null, null ); } /** * * @param original * @param mappings * @param normalized */ public Sentence( String original, Integer mappings[], String normalized ) { setOriginal( original ); setMappings( mappings ); setNormalized( normalized ); } /** * * @param obj * @return */ @Override public boolean equals( Object obj ) { if( obj == null || !(obj instanceof Sentence) ) { return false; } Sentence compared = (Sentence) obj; return original.equals( compared.original ) && Arrays.equals( mappings, compared.mappings ) && normalized.equals( compared.normalized ); } @Override public int hashCode() { return original.hashCode() ^ Arrays.hashCode( mappings ) ^ normalized.hashCode(); } /** * Gets the number of individual words contained by the Sentence. * @return */ public int length() { return splitted.length; } /** * Returns the normalized as an array of String words. * @return */ public String[] normalized() { return splitted; } /** * Returns the normalised word at a specified position * @param index Word index * @return the word at the specified index */ public String getWord( int index ) { return splitted[index]; } /** * Returns the normalised word at a specified position in uppercase * @param index Word index * @return the word at the specified index */ public String getWordUppercase( int index ) { String word = getWord( index ); return word == null ? null : word.toUpperCase(); } /** * Gets the (index)th word of the Sentence, in its normalized form. * @param index * @return */ public String normalized( int index ) { return splitted[index]; } /** * * @param beginIndex * @param endIndex * @return */ public String original( int beginIndex, int endIndex ) { // beginIndex = Math.max( -1, Math.min( beginIndex, mappings.length-1 ) ); // endIndex = Math.max( 0, Math.min( endIndex, mappings.length-1 ) ); if( beginIndex < 0 ) { throw new ArrayIndexOutOfBoundsException( beginIndex ); } while( beginIndex >= 0 && mappings[beginIndex] == null ) { beginIndex--; } int n = mappings.length; while( endIndex < n && mappings[endIndex] == null ) { endIndex++; } if( endIndex >= n ) { endIndex = n - 1; } return StringUtils.removeNonAlphaNumeric( original.substring( mappings[beginIndex], mappings[endIndex] + 1 ) ); } /** * Returns a string representation of the Sentence. This is useful for printing the state of Sentence objects during tests. * * @return A string formed of three bracket-separated sections: the original sentence string, the normalized-to-original word mapping array, and the normalized string. */ @Override public String toString() { return "[" + original + "]" + Arrays.toString( mappings ) + "[" + normalized + "]"; } /** * Returns a trimmed version of the original Sentence string. * * @return A trimmed version of the original Sentence string. */ public String trimOriginal() { return original.trim(); } /** * * @return */ public Integer[] getMappings() { return mappings; } /** * * @param mappings */ public void setMappings( Integer[] mappings ) { this.mappings = mappings; } /** * Gets the Sentence in its normalized form. * @return */ public String getNormalised() { return normalized; } /** * * @param normalized */ public void setNormalized( String normalized ) { this.normalized = normalized; if( normalized != null ) { splitted = normalized.trim().split( " " ); } } /** * Gets the Sentence, in its original, unformatted form. * @return */ public String getOriginal() { return original; } /** * * @param original */ public void setOriginal( String original ) { this.original = original; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy