
uk.org.retep.util.string.Sentence Maven / Gradle / Ivy
/*
* Copyright (c) 1998-2010, Peter T Mount
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the retep.org.uk nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package uk.org.retep.util.string;
import java.util.Arrays;
import javax.annotation.concurrent.NotThreadSafe;
/**
*
* @author peter
*/
@NotThreadSafe
public class Sentence
{
private String original;
private Integer mappings[];
private String normalized;
private String splitted[];
/**
*
* @param original
*/
public Sentence( String original )
{
this( original, null, null );
}
/**
*
* @param original
* @param mappings
* @param normalized
*/
public Sentence( String original, Integer mappings[], String normalized )
{
setOriginal( original );
setMappings( mappings );
setNormalized( normalized );
}
/**
*
* @param obj
* @return
*/
@Override
public boolean equals( Object obj )
{
if( obj == null || !(obj instanceof Sentence) )
{
return false;
}
Sentence compared = (Sentence) obj;
return original.equals( compared.original ) &&
Arrays.equals( mappings, compared.mappings ) &&
normalized.equals( compared.normalized );
}
@Override
public int hashCode()
{
return original.hashCode() ^ Arrays.hashCode( mappings ) ^ normalized.hashCode();
}
/**
* Gets the number of individual words contained by the Sentence.
* @return
*/
public int length()
{
return splitted.length;
}
/**
* Returns the normalized as an array of String words.
* @return
*/
public String[] normalized()
{
return splitted;
}
/**
* Returns the normalised word at a specified position
* @param index Word index
* @return the word at the specified index
*/
public String getWord( int index )
{
return splitted[index];
}
/**
* Returns the normalised word at a specified position in uppercase
* @param index Word index
* @return the word at the specified index
*/
public String getWordUppercase( int index )
{
String word = getWord( index );
return word == null ? null : word.toUpperCase();
}
/**
* Gets the (index)th word of the Sentence, in its normalized form.
* @param index
* @return
*/
public String normalized( int index )
{
return splitted[index];
}
/**
*
* @param beginIndex
* @param endIndex
* @return
*/
public String original( int beginIndex, int endIndex )
{
// beginIndex = Math.max( -1, Math.min( beginIndex, mappings.length-1 ) );
// endIndex = Math.max( 0, Math.min( endIndex, mappings.length-1 ) );
if( beginIndex < 0 )
{
throw new ArrayIndexOutOfBoundsException( beginIndex );
}
while( beginIndex >= 0 && mappings[beginIndex] == null )
{
beginIndex--;
}
int n = mappings.length;
while( endIndex < n && mappings[endIndex] == null )
{
endIndex++;
}
if( endIndex >= n )
{
endIndex = n - 1;
}
return StringUtils.removeNonAlphaNumeric( original.substring(
mappings[beginIndex], mappings[endIndex] + 1 ) );
}
/**
* Returns a string representation of the Sentence. This is useful for printing the state of Sentence objects during tests.
*
* @return A string formed of three bracket-separated sections: the original sentence string, the normalized-to-original word mapping array, and the normalized string.
*/
@Override
public String toString()
{
return "[" + original + "]" + Arrays.toString( mappings ) + "[" + normalized + "]";
}
/**
* Returns a trimmed version of the original Sentence string.
*
* @return A trimmed version of the original Sentence string.
*/
public String trimOriginal()
{
return original.trim();
}
/**
*
* @return
*/
public Integer[] getMappings()
{
return mappings;
}
/**
*
* @param mappings
*/
public void setMappings( Integer[] mappings )
{
this.mappings = mappings;
}
/**
* Gets the Sentence in its normalized form.
* @return
*/
public String getNormalised()
{
return normalized;
}
/**
*
* @param normalized
*/
public void setNormalized( String normalized )
{
this.normalized = normalized;
if( normalized != null )
{
splitted = normalized.trim().split( " " );
}
}
/**
* Gets the Sentence, in its original, unformatted form.
* @return
*/
public String getOriginal()
{
return original;
}
/**
*
* @param original
*/
public void setOriginal( String original )
{
this.original = original;
}
}