-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmutualinformation.scala
155 lines (137 loc) · 4.97 KB
/
mutualinformation.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
package org.apache.spark.examples.MyCode
import scala.collection.mutable.ArrayBuffer
import scala.math
/**
* Created by otto on 8/4/16.
*/
class mutualinformation {
def combinestr(Attri1:Array[String],Attri2:Array[String])={
val combineattri=new Array[String](Attri1.length)
for(i<-0 until Attri1.length) {
combineattri(i) = Attri1(i).concat(Attri2(i))
}
combineattri
}
def mutualinformation(Attri1:Array[String],Attri2:Array[String])={
val Attri=combinestr(Attri1,Attri2)
val combineentropy=new entr()
val jointentropy=combineentropy.shannonEntr(Attri)
val entropy1=combineentropy.shannonEntr(Attri1)
val entropy2=combineentropy.shannonEntr(Attri2)
val MI=entropy1+entropy2-jointentropy
MI
}
def ISF(Attri1:Array[String],Attri2:Array[String])={
val Attri=combinestr(Attri1,Attri2)
val combineentropy=new entr()
val jointentropy=combineentropy.shannonEntr(Attri)
val entropy1=combineentropy.shannonEntr(Attri1)
val entropy2=combineentropy.shannonEntr(Attri2)
val MI=entropy1+entropy2-jointentropy
val isf=MI+entropy1+entropy2
isf
}
def Choose2attributes(Num1:Int,Num2:Int,Dataset:Array[String],objectnumber:Int,attributesnumber:Int)= {
if((Num1<attributesnumber)&&(Num2<attributesnumber)){
val attriselect1=new Array[String](objectnumber-1)
val attriselect2=new Array[String](objectnumber-1)
for(k<-1 until objectnumber){
attriselect1(k-1)=Dataset(Num1*objectnumber+k)
attriselect2(k-1)=Dataset(Num2*objectnumber+k)
}
ISF(attriselect1,attriselect2)
}
else println("Wrong in number")
}
def findMISS(Dataset:Array[String],objectnumber:Int,attributesnumber:Int)={
val AllISF=new ArrayBuffer[Double]()
for(i<-0 until attributesnumber){
for(j<-i+1 until attributesnumber){
val temp=Choose2attributes(i,j,Dataset,objectnumber,attributesnumber)
val value=temp.asInstanceOf[Double]
if(value<0.06){
AllISF+=i
AllISF+=j
AllISF+=value
}
}
}
AllISF
}
def MISSnumber(ISFcombination:ArrayBuffer[Double],attributesnumber:Int) ={
val length=ISFcombination.length
val MISSlabel=new ArrayBuffer[Double]()
val MISSlength=MISSlabel.length
val flag=0
for(i<-0 until length){
if(i%3!=2){
MISSlabel+=ISFcombination(i)
}
}
val MISSsorted=MISSlabel.sorted.toArray
val MISSlist=MISSlabel.toList.distinct
val MISSleft=MISSlist.toArray.toBuffer
for (i<-0 until attributesnumber){
if(MISSlist.exists(n=>n==i)==false){
MISSleft+=i.toDouble
}
}
MISSleft.toArray
}
def Extractsubsets(Dataset:Array[String],Extractnumber: Array[Double],objectnumber:Int,attributesnumber:Int)={
val Subsets=new ArrayBuffer[String]()
for(i<-0 until Extractnumber.length){
for(j<-0 until objectnumber){
val temp=Extractnumber(i).toInt
Subsets+=Dataset(objectnumber*temp+j)
}
}
Subsets
}
def CalDiffHoloentropy(Dataset:Array[String],objectnumber:Int,attributesnumber:Int)={
val reciprocal_of_b=objectnumber-1
val reciprocal_of_a=reciprocal_of_b-1
val coef1=(-1)*scala.math.log(reciprocal_of_a)+(reciprocal_of_b/reciprocal_of_a)*scala.math.log(reciprocal_of_b)
val entropy_each_attributes=new Array[Double](attributesnumber)
val attributeselect=new Array[String](objectnumber-1)
val calculateentropy=new entr()
val OFfactor=new Array[Double](objectnumber-1)
val labelmatrix=new Array[String]((objectnumber-1)*attributesnumber)
for (i<-0 until attributesnumber){
for (k <- 1 until objectnumber) {
attributeselect(k - 1) = Dataset(i* objectnumber + k)
}
entropy_each_attributes(i)=calculateentropy.shannonEntr(attributeselect.toArray)
val counts = attributeselect.groupBy(w => w).mapValues(_.size).toList
val frequency=new ArrayBuffer[String]()
for(i1<-0 until counts.length){
frequency+=counts(i1)._1
if(counts(i1)._2==1){
frequency+=0.toString
}
else frequency+=((counts(i1)._2-1)*scala.math.log((counts(i1)._2-1))-(counts(i1)._2)*scala.math.log((counts(i1)._2))).toString
}
for(i2<-0 until attributeselect.length){
for(j<-0 until frequency.length by 2){
if(attributeselect(i2)==frequency(j)){
OFfactor(i2)=frequency(j+1).toDouble
attributeselect(i2)=(coef1*reciprocal_of_a-entropy_each_attributes(i)+frequency(j+1).toDouble).toString
if(attributeselect(i2).toDouble>0){
labelmatrix(i*(objectnumber-1)+i2)=1.toString
}
else labelmatrix(i*(objectnumber-1)+i2)=0.toString
}
}
}
val minimumOF=OFfactor.min
for(i2<-0 until attributeselect.length){
for(i2<-0 until attributeselect.length){
if(OFfactor(i2)==0||OFfactor==minimumOF){
labelmatrix(i*(objectnumber-1)+i2)=1.toString
}
}
}
}
labelmatrix
}
}