All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.articulate.sigma.nlp.KMeans Maven / Gradle / Ivy

Go to download

Sigma knowledge engineering system is an system for developing, viewing and debugging theories in first order logic. It works with Knowledge Interchange Format (KIF) and is optimized for the Suggested Upper Merged Ontology (SUMO) www.ontologyportal.org.

The newest version!
package com.articulate.sigma.nlp;

import java.util.*;

import com.articulate.sigma.KBmanager;
import com.articulate.sigma.semRewrite.Interpreter;

/*
Copyright 2014-2015 IPsoft

Author: Adam Pease [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program ; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA  02111-1307 USA 
 */
// code thanks to http://adityamandhare.blogspot.com/2013/02/kmeans-clustering-algorithm-java-code.html
// modified to solve fixed number of clusters - APease

// Hartigan, J. A.; Wong, M. A. (1979). "Algorithm AS 136: A K-Means Clustering Algorithm". 
// Journal of the Royal Statistical Society, Series C 28 (1): 100–108. JSTOR 2346830.

public class KMeans {

    private static int[] counts;
    private static float k[][];
    private static float tempk[][];
    private static double m[];
    private static double diff[];

    /** ***************************************************************
     * This method will determine the cluster in which an element should 
     * go at a particular step.
     */
    private static int calcDiff(float a, int p) {
        
        int temp1 = 0;
        for (int i = 0; i < p; ++i) {
            if (a > m[i])
                diff[i] = a - m[i];
            else
                diff[i] = m[i] - a;
        }
        int val = 0;
        double temp = diff[0];
        for (int i = 0; i < p; ++i) {
            if (diff[i] < temp) {
                temp = diff[i];
                val = i;
            }
        }
        return val;
    }

    /** ***************************************************************
     * This method will determine mean values
     */
    public static float mean(float[] list, int n) {
    
        int cnt = 0;
        float mean = 0;
        for (int j = 0; j < n - 1; ++j) {
            if (list[j] != -1) {
                mean += list[j];
                ++cnt;
            }
        }
        return mean / cnt;
    }
    
    /** ***************************************************************
     * This method will determine intermediate mean values
     */
    private static void calcMeans(int n, int p) {
        
       // for (int i = 0; i < p; ++i)
       //     m[i] = 0; // initializing means to 0
        int cnt = 0;
        for (int i = 0; i < p; ++i) {
            m[i] = mean(k[i],n);
        }
    }

    /** ***************************************************************
     * This checks if previous k ie. tempk and current k are same.
     * Used as terminating case.
     */
    private static boolean check1(int n, int p) {
        
        for (int i = 0; i < p; ++i) {
            for (int j = 0; j < n; ++j) {
                if (tempk[i][j] != k[i][j]) {
                    return false;
                }
            }
        }
        return true;
    }
    
    /** ***************************************************************
     */
    public static ArrayList> run(int n, float[] d, int p) {
        
        counts = new int[p];
        Arrays.fill(counts, 0);
        ArrayList> result = new ArrayList>();
        k = new float[p][n];
        tempk = new float[p][n];
        m = new double[p];
        diff = new double[p];
        /* Initializing m */
        for (int i = 0; i < p; ++i)
            m[i] = d[i];

        int temp = 0;
        boolean flag = false;
        do {
            for (int i = 0; i < p; ++i)
                for (int j = 0; j < n; ++j) {
                    k[i][j] = -1;
                }
            for (int i = 0; i < n; ++i) { // for loop will cal calcDiff(int) for every element.            
                temp = calcDiff(d[i],p);
                k[temp][counts[temp]++] = d[i];
            }
            calcMeans(n,p); // call to method which will calculate mean at this step.
            flag = check1(n,p); // check if terminating condition is satisfied.
            if (flag != true)
                // Take backup of k in tempk so that you can check for equivalence in next step
                for (int i = 0; i < p; ++i)
                    for (int j = 0; j < n; ++j)
                        tempk[i][j] = k[i][j];

            //System.out.println("\n\nAt this step");
            //System.out.println("\nValue of clusters");
            //for (int i = 0; i < p; ++i) {
            //    System.out.print("K" + (i + 1) + "{ ");
            //    for (int j = 0; k[i][j] != -1 && j < n - 1; ++j)
            //        System.out.print(k[i][j] + " ");
                //System.out.println("}");
            //}
            //System.out.println("\nValue of m ");
            //for (int i = 0; i < p; ++i)
            //    System.out.print("m" + (i + 1) + "=" + m[i] + "  ");

            Arrays.fill(counts, 0);
        } while (flag == false);

        //System.out.println("\n\n\nThe Final Clusters By Kmeans are as follows: ");
        for (int i = 0; i < p; ++i) {
            //System.out.print("K" + (i + 1) + "{ ");
            ArrayList oneCluster = new ArrayList();
            for (int j = 0; k[i][j] != -1 && j < n - 1; ++j) {
                //System.out.print(k[i][j] + " ");
                oneCluster.add(new Float(k[i][j]));
            }
            //System.out.println("}");
            result.add(oneCluster);
        }
        return result;
    }
    
    /** ***************************************************************
     */
    public static void interact() {
        
        System.out.println("INFO in KMeans.interact()");
        Scanner scr = new Scanner(System.in);
        /* Accepting number of elements */
        System.out.println("Enter the number of elements ");
        int n = scr.nextInt();
        float[] d = new float[n];
        /* Accepting elements */
        System.out.println("Enter " + n + " elements: ");
        for (int i = 0; i < n; ++i)
            d[i] = scr.nextFloat();
        /* Accepting num of clusters */
        System.out.println("Enter the number of clusters: ");
        int p = scr.nextInt();
        /* Initialising arrays */
        ArrayList> result = run(n,d,p);
        System.out.println(result);
    }

    /** ***************************************************************
     */
    public static void testKmeans() {
        
        //System.out.println("INFO in KMeans.testKmeans()");
        int n = 8;
        float[] d = new float[] {2, 3, 6, 8, 12, 15, 18, 22};
        int p = 3;
        ArrayList> result = run(n,d,p);
        System.out.println(result);
        System.out.println("Should be: [[2.0, 3.0], [6.0, 8.0], [12.0, 15.0, 18.0, 22.0]]");
    }
    
    /** ***************************************************************
     */
    public static void main(String[] args) {  

        //System.out.println("INFO in KMeans.main()");
        if (args != null && args.length > 0 && args[0].equals("-i")) {
            interact();
        }
        else if (args != null && args.length > 0 && args[0].equals("-h")) {
            System.out.println("K-means");
            System.out.println("  options:");
            System.out.println("  -h - show this help screen");
            System.out.println("  -i - runs a loop of calculations taking user input ");
        }
        else {
            testKmeans();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy