1- import pandas as pd
21import tensorflow as tf
32
4- data_file = '../data/autism.tsv'
5- df = pd .read_csv (data_file , sep = '\t ' , header = None , index_col = 0 ).T
63
7-
8- def fisher (data , num_instances : list , top_k_features = 10 ):
4+ def fisher (data , num_instances : list , top_k_features = 2 ):
95 """
106 Performs Fisher feature selection method according to the following formula:
117 D(f) = (m1(f) - m2(f) / (std1(f) - std2(f))
@@ -18,13 +14,15 @@ def fisher(data, num_instances: list, top_k_features=10):
1814 assert len (num_instances ) == 2 , "Fisher selection method can be performed for two-class problems."
1915 data = tf .convert_to_tensor (data )
2016 _ , num_features = data .get_shape ().as_list ()
21- if top_k_features < num_features :
17+ if top_k_features > num_features :
2218 top_k_features = num_features
2319 class1 , class2 = tf .split (data , num_instances )
2420 mean1 , std1 = tf .nn .moments (class1 , axes = 0 )
2521 mean2 , std2 = tf .nn .moments (class2 , axes = 0 )
2622 fisher_coeffs = tf .abs (mean1 - mean2 ) / (std1 + std2 )
27- return tf .nn .top_k (fisher_coeffs , k = top_k_features )
23+ values , indices = tf .nn .top_k (fisher_coeffs , k = top_k_features )
24+ most_sig_f = tf .gather (data , indices , axis = 1 )
25+ return most_sig_f
2826
2927
3028def feature_correlation_with_class (data , num_instances : list , top_k_features = 10 ):
@@ -35,7 +33,7 @@ def feature_correlation_with_class(data, num_instances: list, top_k_features=10)
3533 """
3634 data = tf .convert_to_tensor (data )
3735 _ , num_features = data .get_shape ().as_list ()
38- if top_k_features < num_features :
36+ if top_k_features > num_features :
3937 top_k_features = num_features
4038 class1 , class2 = tf .split (data , num_instances )
4139 mean1 , std1 = tf .nn .moments (class1 , axes = 0 )
@@ -53,20 +51,10 @@ def t_test(data, num_instances: list, top_k_features=10):
5351 """
5452 data = tf .convert_to_tensor (data )
5553 _ , num_features = data .get_shape ().as_list ()
56- if top_k_features < num_features :
54+ if top_k_features > num_features :
5755 top_k_features = num_features
5856 class1 , class2 = tf .split (data , num_instances )
5957 mean1 , std1 = tf .nn .moments (class1 , axes = 0 )
6058 mean2 , std2 = tf .nn .moments (class2 , axes = 0 )
6159 t_test_coeffs = tf .abs (mean1 - mean2 ) / tf .sqrt (tf .square (std1 )/ num_instances [0 ] + tf .square (std2 ) / num_instances [1 ])
6260 return tf .nn .top_k (t_test_coeffs , k = top_k_features )
63-
64- with tf .Session () as session :
65- input_data = df .as_matrix ()
66- instances_per_class = [82 , 64 ]
67- fisher_coeffs = session .run (fisher (data = input_data , num_instances = instances_per_class , top_k_features = 5 ))
68- corr_coeffs = session .run (feature_correlation_with_class (data = input_data , num_instances = instances_per_class , top_k_features = 5 ))
69- t_test_coeff = session .run (t_test (data = input_data , num_instances = instances_per_class , top_k_features = 5 ))
70- print (fisher_coeffs )
71- print (corr_coeffs )
72- print (t_test_coeff )
0 commit comments