1- import tensorflow as tf
21import pandas as pd
2+ import tensorflow as tf
33
44data_file = '../data/autism.tsv'
55df = pd .read_csv (data_file , sep = '\t ' , header = None , index_col = 0 ).T
66
77
8- def fisher (data , num_instances : list , top_k = 10 ):
8+ def fisher (data , num_instances : list , top_k_features = 10 ):
99 """
1010 Performs Fisher feature selection method according to the following formula:
1111 D(f) = (m1(f) - m2(f) / (std1(f) - std2(f))
1212
1313 :param data:
1414 :param num_instances:
15- :param top_k :
15+ :param top_k_features :
1616 :return: the list of most significant features.
1717 """
1818 assert len (num_instances ) == 2 , "Fisher selection method can be performed for two-class problems."
19+ data = tf .convert_to_tensor (data )
20+ _ , num_features = data .get_shape ().as_list ()
21+ if top_k_features < num_features :
22+ top_k_features = num_features
1923 class1 , class2 = tf .split (data , num_instances )
2024 mean1 , std1 = tf .nn .moments (class1 , axes = 0 )
2125 mean2 , std2 = tf .nn .moments (class2 , axes = 0 )
22- fisher_coeffs = tf .abs (( mean1 - mean2 ) ) / (std1 + std2 )
23- return tf .nn .top_k (fisher_coeffs , k = top_k )
26+ fisher_coeffs = tf .abs (mean1 - mean2 ) / (std1 + std2 )
27+ return tf .nn .top_k (fisher_coeffs , k = top_k_features )
2428
2529
26- def feature_correlation_with_class (data , num_instances : list , top_k = 10 ):
30+ def feature_correlation_with_class (data , num_instances : list , top_k_features = 10 ):
2731 """
2832 Makes feature correlation with class selection according to the following formula:
2933 D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2
3034 :return: the list of most significant features.
3135 """
3236 data = tf .convert_to_tensor (data )
37+ _ , num_features = data .get_shape ().as_list ()
38+ if top_k_features < num_features :
39+ top_k_features = num_features
3340 class1 , class2 = tf .split (data , num_instances )
3441 mean1 , std1 = tf .nn .moments (class1 , axes = 0 )
3542 mean2 , std2 = tf .nn .moments (class2 , axes = 0 )
3643 mean , std = tf .nn .moments (data , axes = 0 )
37- corr_coeffs = (tf .square (mean1 - mean ) + tf .square (mean2 - mean )) / 2 * tf .square (std ) # FIXME sth is wrong
38- return tf .nn .top_k (corr_coeffs , k = top_k )
44+ corr_coeffs = (tf .square (mean1 - mean ) + tf .square (mean2 - mean )) / 2 * tf .square (std )
45+ return tf .nn .top_k (corr_coeffs , k = top_k_features )
3946
4047
41- def t_test (data , num_instances : list , top_k = 10 ):
48+ def t_test (data , num_instances : list , top_k_features = 10 ):
4249 """
4350 Makes feature correlation with class selection according to the following formula:
4451 D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2
4552 :return: the list of most significant features.
4653 """
54+ data = tf .convert_to_tensor (data )
55+ _ , num_features = data .get_shape ().as_list ()
56+ if top_k_features < num_features :
57+ top_k_features = num_features
4758 class1 , class2 = tf .split (data , num_instances )
4859 mean1 , std1 = tf .nn .moments (class1 , axes = 0 )
4960 mean2 , std2 = tf .nn .moments (class2 , axes = 0 )
50- t_test_coeffs = (mean1 - mean2 ) / tf .sqrt (tf .square (std1 )/ num_instances [0 ] + tf .square (std2 ) / num_instances [1 ])
51- return tf .nn .top_k (t_test_coeffs , k = top_k )
61+ t_test_coeffs = tf . abs (mean1 - mean2 ) / tf .sqrt (tf .square (std1 )/ num_instances [0 ] + tf .square (std2 ) / num_instances [1 ])
62+ return tf .nn .top_k (t_test_coeffs , k = top_k_features )
5263
5364with tf .Session () as session :
5465 input_data = df .as_matrix ()
5566 instances_per_class = [82 , 64 ]
56- fisher_coeffs = session .run (fisher (data = input_data , num_instances = instances_per_class ))
57- corr_coeffs = session .run (feature_correlation_with_class (data = input_data , num_instances = instances_per_class ))
58- t_test_coeff = session .run (t_test (data = input_data , num_instances = instances_per_class ))
67+ fisher_coeffs = session .run (fisher (data = input_data , num_instances = instances_per_class , top_k_features = 5 ))
68+ corr_coeffs = session .run (feature_correlation_with_class (data = input_data , num_instances = instances_per_class , top_k_features = 5 ))
69+ t_test_coeff = session .run (t_test (data = input_data , num_instances = instances_per_class , top_k_features = 5 ))
5970 print (fisher_coeffs )
6071 print (corr_coeffs )
6172 print (t_test_coeff )
0 commit comments