Skip to content

Commit 9f9f6a9

Browse files
author
Tomasz Latkowski
committed
added ttest and correlation with class
1 parent 98e2d68 commit 9f9f6a9

File tree

4 files changed

+50
-0
lines changed

4 files changed

+50
-0
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.tsv filter=lfs diff=lfs merge=lfs -text

data/autism.tsv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:b3b7d953d54f6bd08f9860347df05bbacfcccca254400cf4711b2be30e1cde71
3+
size 95496413

selection.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,47 @@
22

33

44
def fisher(data, num_instances: list, top_k=10):
5+
"""
6+
Performs Fisher feature selection method according to the following formula:
7+
D(f) = (m1(f) - m2(f) / (std1(f) - std2(f))
8+
9+
:param data:
10+
:param num_instances:
11+
:param top_k:
12+
:return: the list of most significant features.
13+
"""
14+
assert len(num_instances) == 2, "Fisher selection method can be performed for two-class problems."
515
class1, class2 = tf.split(data, num_instances)
616
mean1, std1 = tf.nn.moments(class1, axes=0)
717
mean2, std2 = tf.nn.moments(class2, axes=0)
818
fisher_coeffs = (mean1 - mean2) / (std1 + std2)
919
return tf.nn.top_k(fisher_coeffs, k=top_k)
1020

21+
22+
def feature_correlation_with_class(data, num_instances: list, top_k=10):
23+
"""
24+
Makes feature correlation with class selection according to the following formula:
25+
D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2
26+
:return: the list of most significant features.
27+
"""
28+
data = tf.convert_to_tensor(data)
29+
class1, class2 = tf.split(data, num_instances)
30+
mean1, std1 = tf.nn.moments(class1, axes=0)
31+
mean2, std2 = tf.nn.moments(class2, axes=0)
32+
mean, std = tf.nn.moments(data, axes=0)
33+
corr_coeffs = (tf.square(mean1 - mean) + tf.square(mean2 - mean)) / 2*tf.square(std) # FIXME sth is wrong
34+
return tf.nn.top_k(corr_coeffs, k=top_k)
35+
36+
37+
def t_test(data, num_instances: list, top_k=10):
38+
"""
39+
Makes feature correlation with class selection according to the following formula:
40+
D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2
41+
:return: the list of most significant features.
42+
"""
43+
class1, class2 = tf.split(data, num_instances)
44+
mean1, std1 = tf.nn.moments(class1, axes=0)
45+
mean2, std2 = tf.nn.moments(class2, axes=0)
46+
t_test_coeffs = (mean1 - mean2) / tf.sqrt(tf.square(std1)/num_instances[0] + tf.square(std2) / num_instances[1])
47+
return tf.nn.top_k(t_test_coeffs, k=top_k)
48+

statistics.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
3+
def pearson_correlation():
4+
pass
5+
6+
7+
def f_test():
8+
pass

0 commit comments

Comments
 (0)