|
2 | 2 |
|
3 | 3 |
|
4 | 4 | def fisher(data, num_instances: list, top_k=10): |
| 5 | + """ |
| 6 | + Performs Fisher feature selection method according to the following formula: |
| 7 | + D(f) = (m1(f) - m2(f) / (std1(f) - std2(f)) |
| 8 | +
|
| 9 | + :param data: |
| 10 | + :param num_instances: |
| 11 | + :param top_k: |
| 12 | + :return: the list of most significant features. |
| 13 | + """ |
| 14 | + assert len(num_instances) == 2, "Fisher selection method can be performed for two-class problems." |
5 | 15 | class1, class2 = tf.split(data, num_instances) |
6 | 16 | mean1, std1 = tf.nn.moments(class1, axes=0) |
7 | 17 | mean2, std2 = tf.nn.moments(class2, axes=0) |
8 | 18 | fisher_coeffs = (mean1 - mean2) / (std1 + std2) |
9 | 19 | return tf.nn.top_k(fisher_coeffs, k=top_k) |
10 | 20 |
|
| 21 | + |
| 22 | +def feature_correlation_with_class(data, num_instances: list, top_k=10): |
| 23 | + """ |
| 24 | + Makes feature correlation with class selection according to the following formula: |
| 25 | + D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2 |
| 26 | + :return: the list of most significant features. |
| 27 | + """ |
| 28 | + data = tf.convert_to_tensor(data) |
| 29 | + class1, class2 = tf.split(data, num_instances) |
| 30 | + mean1, std1 = tf.nn.moments(class1, axes=0) |
| 31 | + mean2, std2 = tf.nn.moments(class2, axes=0) |
| 32 | + mean, std = tf.nn.moments(data, axes=0) |
| 33 | + corr_coeffs = (tf.square(mean1 - mean) + tf.square(mean2 - mean)) / 2*tf.square(std) # FIXME sth is wrong |
| 34 | + return tf.nn.top_k(corr_coeffs, k=top_k) |
| 35 | + |
| 36 | + |
| 37 | +def t_test(data, num_instances: list, top_k=10): |
| 38 | + """ |
| 39 | + Makes feature correlation with class selection according to the following formula: |
| 40 | + D(f) = [(m1(f) - m(f))^2 + (m2(f) - m(f))^2] / 2*sigma(f)^2 |
| 41 | + :return: the list of most significant features. |
| 42 | + """ |
| 43 | + class1, class2 = tf.split(data, num_instances) |
| 44 | + mean1, std1 = tf.nn.moments(class1, axes=0) |
| 45 | + mean2, std2 = tf.nn.moments(class2, axes=0) |
| 46 | + t_test_coeffs = (mean1 - mean2) / tf.sqrt(tf.square(std1)/num_instances[0] + tf.square(std2) / num_instances[1]) |
| 47 | + return tf.nn.top_k(t_test_coeffs, k=top_k) |
| 48 | + |
0 commit comments