Joined: Tue Mar 27, 2007 10:55 pm Posts: 1483 Location: Earth Has thanked: 3 time Have thanks: 11 time
* Project Name:
k-means Cluster Anlysis * Programmer:Shyam Sivaraman * Type:Algorithms * Technology:Java * IDE:Any * Description:The package aims at providing an implementation of k-means Clustering Algorithm in Java. The package does not provide for any UI and it is up to the user to display the output in the required format.
This class is the entry point for constructing Cluster Analysis objects. Each instance of JCA object is associated with one or more clusters, and a Vector of DataPoint objects. The JCA and DataPoint classes are the only classes available from other packages. @see DataPoint
**/
public class JCA { private Cluster[] clusters; private int miter; private Vector mDataPoints = new Vector(); private double mSWCSS;
public JCA(int k, int iter, Vector dataPoints) { clusters = new Cluster[k]; for (int i = 0; i < k; i++) { clusters[i] = new Cluster("Cluster" + i); } this.miter = iter; this.mDataPoints = dataPoints; }
private void calcSWCSS() { double temp = 0; for (int i = 0; i < clusters.length; i++) { temp = temp + clusters[i].getSumSqr(); } mSWCSS = temp; }
public void startAnalysis() { //set Starting centroid positions - Start of Step 1 setInitialCentroids(); int n = 0; //assign DataPoint to clusters loop1: while (true) { for (int l = 0; l < clusters.length; l++) { clusters[l].addDataPoint((DataPoint)mDataPoints.elementAt(n)); n++; if (n >= mDataPoints.size()) break loop1; } }
//calculate E for all the clusters calcSWCSS();
//recalculate Cluster centroids - Start of Step 2 for (int i = 0; i < clusters.length; i++) { clusters[i].getCentroid().calcCentroid(); }
//recalculate E for all the clusters calcSWCSS();
for (int i = 0; i < miter; i++) { //enter the loop for cluster 1 for (int j = 0; j < clusters.length; j++) { for (int k = 0; k < clusters[j].getNumDataPoints(); k++) {
//pick the first element of the first cluster //get the current Euclidean distance double tempEuDt = clusters[j].getDataPoint(k).getCurrentEuDt(); Cluster tempCluster = null; boolean matchFoundFlag = false;
//call testEuclidean distance for all clusters for (int l = 0; l < clusters.length; l++) {
//if testEuclidean < currentEuclidean then if (tempEuDt > clusters[j].getDataPoint(k).testEuclideanDistance(clusters[l].getCentroid())) { tempEuDt = clusters[j].getDataPoint(k).testEuclideanDistance(clusters[l].getCentroid()); tempCluster = clusters[l]; matchFoundFlag = true; } //if statement - Check whether the Last EuDt is > Present EuDt
} //for variable 'l' - Looping between different Clusters for matching a Data Point. //add DataPoint to the cluster and calcSWCSS
if (matchFoundFlag) { tempCluster.addDataPoint(clusters[j].getDataPoint(k)); clusters[j].removeDataPoint(clusters[j].getDataPoint(k)); for (int m = 0; m < clusters.length; m++) { clusters[m].getCentroid().calcCentroid(); }
//for variable 'm' - Recalculating centroids for all Clusters
calcSWCSS(); }
//if statement - A Data Point is eligible for transfer between Clusters. } //for variable 'k' - Looping through all Data Points of the current Cluster. }//for variable 'j' - Looping through all the Clusters. }//for variable 'i' - Number of iterations. }
public Vector[] getClusterOutput() { Vector v[] = new Vector[clusters.length]; for (int i = 0; i < clusters.length; i++) { v[i] = clusters[i].getDataPoints(); } return v; }
private void setInitialCentroids() { //kn = (round((max-min)/k)*n)+min where n is from 0 to (k-1). double cx = 0, cy = 0; for (int n = 1; n <= clusters.length; n++) { cx = (((getMaxXValue() - getMinXValue()) / (clusters.length + 1)) * n) + getMinXValue(); cy = (((getMaxYValue() - getMinYValue()) / (clusters.length + 1)) * n) + getMinYValue(); Centroid c1 = new Centroid(cx, cy); clusters[n - 1].setCentroid(c1); c1.setCluster(clusters[n - 1]); } }
public int getKValue() { return clusters.length; }
public int getIterations() { return miter; }
public int getTotalDataPoints() { return mDataPoints.size(); }
public double getSWCSS() { return mSWCSS; }
public Cluster getCluster(int pos) { return clusters[pos]; } }
/*-----------------Cluster.java----------------*/
package org.c4s.algorithm.cluster;
import java.util.Vector;
/** * This class represents a Cluster in a Cluster Analysis Instance. A Cluster is associated * with one and only one JCA Instance. A Cluster is related to more than one DataPoints and * one centroid. * @author Shyam Sivaraman * @version 1.1 * @see DataPoint * @see Centroid */
public Cluster(String name) { this.mName = name; this.mCentroid = null; //will be set by calling setCentroid() mDataPoints = new Vector(); }
public void setCentroid(Centroid c) { mCentroid = c; }
public Centroid getCentroid() { return mCentroid; }
public void addDataPoint(DataPoint dp) { //called from CAInstance dp.setCluster(this); //initiates a inner call to calcEuclideanDistance() in DP. this.mDataPoints.addElement(dp); calcSumOfSquares(); }
public void removeDataPoint(DataPoint dp) { this.mDataPoints.removeElement(dp); calcSumOfSquares(); }
public int getNumDataPoints() { return this.mDataPoints.size(); }
public DataPoint getDataPoint(int pos) { return (DataPoint) this.mDataPoints.elementAt(pos); }
public void calcSumOfSquares() { //called from Centroid int size = this.mDataPoints.size(); double temp = 0; for (int i = 0; i < size; i++) { temp = temp + ((DataPoint) this.mDataPoints.elementAt(i)).getCurrentEuDt(); } this.mSumSqr = temp; }
public double getSumSqr() { return this.mSumSqr; }
public String getName() { return this.mName; }
public Vector getDataPoints() { return this.mDataPoints; }
}
/*---------------Centroid.java-----------------*/
package org.c4s.algorithm.cluster;
/** * This class represents the Centroid for a Cluster. The initial centroid is calculated * using a equation which divides the sample space for each dimension into equal parts * depending upon the value of k. * @author Shyam Sivaraman * @version 1.0 * @see Cluster */
class Centroid { private double mCx, mCy; private Cluster mCluster;
public void calcCentroid() { //only called by CAInstance int numDP = mCluster.getNumDataPoints(); double tempX = 0, tempY = 0; int i; //caluclating the new Centroid for (i = 0; i < numDP; i++) { tempX = tempX + mCluster.getDataPoint(i).getX(); //total for x tempY = tempY + mCluster.getDataPoint(i).getY(); //total for y } this.mCx = tempX / numDP; this.mCy = tempY / numDP; //calculating the new Euclidean Distance for each Data Point tempX = 0; tempY = 0; for (i = 0; i < numDP; i++) { mCluster.getDataPoint(i).calcEuclideanDistance(); } //calculate the new Sum of Squares for the Cluster mCluster.calcSumOfSquares(); }
public void setCluster(Cluster c) { this.mCluster = c; }
/** This class represents a candidate for Cluster analysis. A candidate must have a name and two independent variables on the basis of which it is to be clustered. A Data Point must have two variables and a name. A Vector of Data Point object is fed into the constructor of the JCA class. JCA and DataPoint are the only classes which may be available from other packages. @author Shyam Sivaraman @version 1.0 @see JCA @see Cluster */
public class DataPoint { private double mX,mY; private String mObjName; private Cluster mCluster; private double mEuDt;
public void setCluster(Cluster cluster) { this.mCluster = cluster; calcEuclideanDistance(); }
public void calcEuclideanDistance() {
//called when DP is added to a cluster or when a Centroid is recalculated. mEuDt = Math.sqrt(Math.pow((mX - mCluster.getCentroid().getCx()), 2) + Math.pow((mY - mCluster.getCentroid().getCy()), 2)); }
public double testEuclideanDistance(Centroid c) { return Math.sqrt(Math.pow((mX - c.getCx()), 2) + Math.pow((mY - c.getCy()), 2)); }
Users browsing this forum: No registered users and 1 guest
You cannot post new topics in this forum You cannot reply to topics in this forum You cannot edit your posts in this forum You cannot delete your posts in this forum You cannot post attachments in this forum