-
Notifications
You must be signed in to change notification settings - Fork 3
/
linkage.go
140 lines (113 loc) · 3.31 KB
/
linkage.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package clustering
// LinkageType is an interface that defines how two clusters are scored
// based on the pairwise distances of their items.
type LinkageType interface {
// Reset clears the internal state for this linkage type.
Reset()
// Put adds a new distance observation for the item-pair.
Put(item1, item2 ClusterItem, dist float64)
// Get returns the current value of the linkage based on all the observed
// values so far.
Get() float64
// LWParams returns the lance-williams parameters for updating clusters
// after a merge. Return (alpha_i, alpha_j, Beta, gamma), if return value
// is not 4 floats, then clustering falls back to recomputing at each pass.
LWParams() []float64
}
// CompleteLinkage implements complete-linkage clustering, which is defined as
// the maximum distance between any pair of items from the two clusters.
func CompleteLinkage() LinkageType {
return &maxLinkage{}
}
// SingleLinkage implements single-linkage clustering, which is defined as
// the minimum distance between any pair of items from the two clusters.
func SingleLinkage() LinkageType {
return &minLinkage{}
}
// AverageLinkage implements average-linkage (sometimes referred to as UPGMA)
// clustering, which is defined as the average of all distances between all
// pairs of items in the two clusters.
func AverageLinkage() LinkageType {
return &avgLinkage{}
}
// WeightedAverageLinkage implements WPGMA linkage agglomeration method
// clustering, which is defined as the average of all distances between pairs
// of items in the two clusters. It weights clusters equally regardless of
// number of items.
func WeightedAverageLinkage() LinkageType {
return &avgLinkage{isWeighted: true}
}
////////////////
type maxLinkage struct {
maxDist float64
}
func (c *maxLinkage) Reset() {
c.maxDist = -1.0
}
func (c *maxLinkage) Get() float64 {
return c.maxDist
}
func (c *maxLinkage) Put(a, b ClusterItem, dist float64) {
if dist > c.maxDist || c.maxDist < 0.0 {
c.maxDist = dist
}
}
func (c *maxLinkage) LWParams() []float64 {
return []float64{0.5, 0.5, 0.0, 0.5}
}
////////////////
type minLinkage struct {
minDist float64
}
func (c *minLinkage) Reset() {
c.minDist = -1.0
}
func (c *minLinkage) Get() float64 {
return c.minDist
}
func (c *minLinkage) Put(a, b ClusterItem, dist float64) {
if dist < c.minDist || c.minDist < 0.0 {
c.minDist = dist
}
}
func (c *minLinkage) LWParams() []float64 {
return []float64{0.5, 0.5, 0.0, -0.5}
}
////////////////
type avgLinkage struct {
avgDist float64
totalPairs float64
isWeighted bool
leftCounts map[ClusterItem]struct{}
rightCounts map[ClusterItem]struct{}
}
func (c *avgLinkage) Reset() {
c.avgDist = 0.0
c.totalPairs = 0.0
if !c.isWeighted {
c.leftCounts = make(map[ClusterItem]struct{})
c.rightCounts = make(map[ClusterItem]struct{})
}
}
func (c *avgLinkage) Get() float64 {
if c.totalPairs <= 0.0 {
return 0.0
}
return c.avgDist / c.totalPairs
}
func (c *avgLinkage) Put(a, b ClusterItem, dist float64) {
c.avgDist += dist
c.totalPairs++
if !c.isWeighted {
c.leftCounts[a] = struct{}{}
c.rightCounts[b] = struct{}{}
}
}
func (c *avgLinkage) LWParams() []float64 {
if c.isWeighted {
return []float64{0.5, 0.5, 0.0, 0.0}
}
ni := float64(len(c.leftCounts))
nj := float64(len(c.rightCounts))
return []float64{ni / (ni + nj), nj / (ni + nj), 0.0, 0.0}
}