-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTesting.py
More file actions
101 lines (77 loc) · 3.34 KB
/
Testing.py
File metadata and controls
101 lines (77 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import random
import numpy as py
import copy
#This is a logical abstraction of Cluster.
class Cluster:
# Centriod of that Cluster
# HashTable containing a mapping between points and the distances
def __init__(self, dataPoint, pointsandDistance = {}):
self.centroid = dataPoint
self.pointsandDistance = pointsandDistance
def getCurrentCentroid(self):
print "Current centroid for this cluster is - ", self.centroid
def __repr__(self):
return '%s %s' % (self.centroid, self.pointsandDistance)
#This is a logical abstraction of a Point
class DataPoint:
#Method to initialize the Tuple (basically the Data Point)
def __init__(self, tuple):
self.tuple = tuple
#Method to be able to print the Tuple
'''Reference: http://stackoverflow.com/questions/1984162/purpose-of-pythons-repr'''
def __repr__(self):
return '%s' % self.tuple
#def __eq__(self,other):
#return self.tuple == other.tuple
#Generate Random Data
def fireUp(lowerBound, upperBound, maxPoints, numClusters, threshold):
dataCollection = []
for i in range(1, maxPoints):
dataCollection.append(DataPoint([(py.random.uniform(lowerBound,upperBound)) for j in range(2)])) #2D points
#Creating Dictionaries for each of the Clusters. Since they need to operate on that
ListofHashtableofDataPoints = []
'''Created a HashTable with the Points and Distance set to ZERO'''
tempHashTable = {}
for i in range(len(dataCollection)):
tempHashTable.update({dataCollection[i]:0})
'''Deep Copy to Make as many copies as the Number of Clusters we need'''
for i in range(numClusters):
ListofHashtableofDataPoints.append(copy.deepcopy(tempHashTable));
print ListofHashtableofDataPoints
#Get three random Centriods
initialCentroids = random.sample(dataCollection, numClusters)
print initialCentroids
#numClusters will automatically ensure that the size of ListofHashtableofDataPoints and initialCentroids is same.
clusterList = []
for i in range(len(ListofHashtableofDataPoints)):
clusterList.append(Cluster(initialCentroids[i], ListofHashtableofDataPoints[i]));
print clusterList
flag = True;
#Calling the KMeans Algorithm
'''kMeansAlgo(clusterList, threshold, flag)'''
# Experimental Code to Learn Python
x = {}
x.update({dataCollection[0] : 10})
print x
x.update({dataCollection[0] : 11})
print x
'''
c = Cluster(dataCollection[0], x)
if(10 == x.get(dataCollection[0])):
print "We Are good"
for k in sorted(x.keys()):
if(dataCollection[0] == k):
print "even better"
c.getCurrentCentroid()
print dataCollection[0]
'''
def main():
#numDimensions = int (raw_input("Enter number of Dimensions N:\n"))
lowerBound = 1 #int (raw_input("Enter the lowerBound for DataGenerationN: \n"))
upperBound = 2 #int (raw_input("Enter the upperBound for DataGenerationN: \n"))
maxPoints = 10 #int (raw_input("Enter the maxPoints for DataGenerationN:\n"))
numClusters = 3 #int (raw_input("Enter the number of Clusters :\n"))
threshold = 0.2
fireUp(lowerBound, upperBound, maxPoints, numClusters, threshold)
if __name__ == "__main__":
main()