All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openmdx.base.query.spi.Soundex Maven / Gradle / Ivy

There is a newer version: 2.18.10
Show newest version
/*
 * ====================================================================
 * Project:     openMDX, http://www.openmdx.org/
 * Description: Soundex
 * Owner:       OMEX AG, Switzerland, http://www.omex.ch
 * ====================================================================
 *
 * This software is published under the BSD license as listed below.
 * 
 * Copyright (c) 2013, OMEX AG, Switzerland
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the following
 * conditions are met:
 * 
 * * Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * 
 * * Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in
 *   the documentation and/or other materials provided with the
 *   distribution.
 *  
 * * Neither the name of the openMDX team nor the names of its
 *   contributors may be used to endorse or promote products derived
 *   from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * ------------------
 * 
 * This product includes software developed by other organizations as
 * listed in the NOTICE file.
 */
package org.openmdx.base.query.spi;

/**
 * Encodes words using the soundex phonetic algorithm.
 * 
 * @author Aaron Hansen
 */
public final class Soundex {

    private Soundex() {
		super();
	}

	/**
     * 
     */
    public static Soundex getInstance(
    ) {
        return instance;
    }

    /**
     * @param _word
     */    
    public String encode(
        String _word
    ) {
        String word = _word.trim();
        if (dropFinalSBoolean) {
            //we're not dropping double s as in guess
            if ( (word.length() > 1) 
                    && (word.endsWith("S") || word.endsWith("s")))
                word = word.substring(0, (word.length() - 1));
        }
        if(word.length() == 0) return "";
        word = reduce(word);
        int wordLength = word.length(); //original word size
        int sofar = 0; //how many codes have been created
        int max = lengthInt - 1; //max codes to create (less the first char)
        if (lengthInt < 0) //if NO_MAX
            max = wordLength; //wordLength was the max possible size.
        int code = 0; 
        StringBuilder buf = new StringBuilder(
            max
        ).append(
            Character.toLowerCase(word.charAt(0))
        );
        for (int i = 1;(i < wordLength) && (sofar < max); i++) {
            code = getCode(word.charAt(i));
            if (code > 0) {
                buf.append(code);
                sofar++;
            }
        }
        if (padBoolean && (lengthInt > 0)) {
            for (;sofar < max; sofar++)
                buf.append('0');
        }
        return buf.toString();
    }

    /**
     * Returns the Soundex code for the specified character.
     * @param ch Should be between A-Z or a-z
     * @return -1 if the character has no phonetic code.
     */
    public int getCode(
        char ch
    ) {
        int arrayidx = -1;
        if (('a' <= ch) && (ch <= 'z'))
            arrayidx = ch - 'a';
        else if (('A' <= ch) && (ch <= 'Z'))
            arrayidx = ch - 'A';
        if ((arrayidx >= 0) && (arrayidx < soundexInts.length))
            return soundexInts[arrayidx];
        else
            return -1;
    }

    /**
     * If true, a final char of 's' or 'S' of the word being encoded will be 
     * dropped. By dropping the last s, lady and ladies for example,
     * will encode the same. False by default.
     */
    public boolean getDropFinalS(
    ) {
        return dropFinalSBoolean;
    }

    /**
     * The length of code strings to build, 4 by default.
     * If negative, length is unlimited.
     * @see #NO_MAX
     */
    public int getLength(
    ) {
        return lengthInt;
    }

    /**
     * If true, appends zeros to a soundex code if the code is less than
     * Soundex.getLength().  True by default.
     */
    public boolean getPad(
    ) {
        return padBoolean;
    }

    /**
     * Allows you to modify the default code table
     * @param ch The character to specify the code for.
     * @param code The code to represent ch with, must be -1, or 1 thru 9
     */
    public void setCode(
        char ch, 
        int code
    ) {
        int arrayidx = -1;
        if (('a' <= ch) && (ch <= 'z'))
            arrayidx = ch - 'a';
        else if (('A' <= ch) && (ch <= 'Z'))
            arrayidx = ch - 'A';
        if ((0 <= arrayidx) && (arrayidx < soundexInts.length))
            soundexInts[arrayidx] = code;
    }

    /**
     * If true, a final char of 's' or 'S' of the word being encoded will be 
     * dropped.
     */
    public void setDropFinalS(
        boolean bool
    ) {
        dropFinalSBoolean = bool;
    }

    /**
     * Sets the length of code strings to build. 4 by default.
     * @param Length of code to produce, must be >= 1
     */
    public void setLength(
        int length
    ) {
        lengthInt = length;
    }

    /**
     * If true, appends zeros to a soundex code if the code is less than
     * Soundex.getLength().  True by default.
     */
    public void setPad(
        boolean bool
    ) {
        padBoolean = bool;
    }

    /**
     * Creates the Soundex code table.
     */
    private static int[] createArray(
    ) {
        return new int[] {
            -1, //a 
            1, //b
            2, //c 
            3, //d
            -1, //e 
            1, //f
            2, //g 
            -1, //h
            -1, //i 
            2, //j
            2, //k
            4, //l
            5, //m
            5, //n
            -1, //o
            1, //p
            2, //q
            6, //r
            2, //s
            3, //t
            -1, //u
            1, //v
            -1, //w
            2, //x
            -1, //y
            2  //z
        };
    }

    /**
     * Removes adjacent sounds.
     */
    private String reduce(
        String word
    ) {
        int len = word.length();
        StringBuilder buf = new StringBuilder(len);
        char ch = word.charAt(0);
        int currentCode = getCode(ch);
        buf.append(ch);
        int lastCode = currentCode;
        for (int i = 1; i < len; i++) {
            ch = word.charAt(i);
            currentCode = getCode(ch);
            if ((currentCode != lastCode) && (currentCode >= 0)) {
                buf.append(ch);
                lastCode = currentCode;
            }
        }
        return buf.toString();
    }

    /**
     * The singleton
     */
    private static final Soundex instance = new Soundex();  

    /**
     * 
     */
    public static final int NO_MAX = -1;

    /**
     * If true, the final 's' of the word being encoded is dropped.
     */
    private boolean dropFinalSBoolean = false;

    /**
     * Length of code to build.
     */
    private int lengthInt = 4;

    /**
     * If true, codes are padded to the LengthInt with zeros.
     */
    private boolean padBoolean = true;

    /**
     * Soundex code table.
     */
    private final int[] soundexInts = createArray();

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy