Merge pull request #151 from MatthewScholefield/feature/useful-hyperopt

forslund · web-flow · commit 036e3f062e77 · 2020-05-04T14:19:21.000+02:00
Improve optimization functionality
diff --git a/precise/annoyance_estimator.py b/precise/annoyance_estimator.py
@@ -0,0 +1,130 @@
+# Copyright 2020 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from glob import glob
+from os.path import join
+
+import numpy as np
+
+from precise.params import pr
+from precise.util import load_audio
+from precise.vectorization import vectorize_raw
+
+AnnoyanceEstimate = namedtuple(
+    'AnnoyanceEstimate',
+    'annoyance ww_annoyance nww_annoyance threshold'
+)
+
+
+class AnnoyanceEstimator:
+    """
+    This class attempts to estimate the "annoyance" of a user
+    of a given network. It models annoyance as follows:
+
+    Annoyance from false negatives (not activating when it should):
+    We assume that the annoyance incurred by each subsequent failed
+    activation attempt is double that of the previous attempt. ie.
+    two failed activations causes 1 + 2 = 3 annoyance units but three
+    failed activations causes 1 + 2 + 4 = 7 annoyance units.
+
+    Annoyance from false positives (activating when it should not):
+    We assume that each false positive incurs some constant annoyance
+
+    With this, we can compute net annoyance from false positives
+    and negatives individually, combine them for the total annoyance.
+
+    Finally, we can recompute this annoyance for each threshold
+    value to find the threshold that yields the lowest net annoyance
+    """
+
+    def __init__(self, model, interaction_estimate, ambient_annoyance):
+        self.thresholds = 1 / (1 + np.exp(-np.linspace(-20, 20, 1000)))
+        self.interaction_estimate = interaction_estimate
+        self.ambient_annoyance = ambient_annoyance
+
+    def compute_nww_annoyances(self, model, noise_folder, batch_size):
+        """
+        Given some number, x, of ambient activations per hour, we can
+        compute the annoyance per day from false positives as 24 * x
+        times the annoyance incurred per ambient activation.
+        """
+        nww_seconds = 0.0
+        nww_buckets = np.zeros_like(self.thresholds)
+        for i in glob(join(noise_folder, '*.wav')):
+            print('Evaluating ambient activations on {}...'.format(i))
+            inputs, audio_len = self._load_inputs(i)
+            nww_seconds += audio_len / pr.sample_rate
+            ambient_predictions = model.predict(inputs, batch_size=batch_size)
+            del inputs
+            nww_buckets += (ambient_predictions.reshape((-1, 1))
+                            > self.thresholds.reshape((1, -1))).sum(axis=0)
+        nww_acts_per_hour = nww_buckets * 60 * 60 / nww_seconds
+        return self.ambient_annoyance * nww_acts_per_hour * 24
+
+    def compute_ww_annoyances(self, ww_predictions):
+        """
+        Given some proportion, p, of not recognizing the wake word, our
+        total annoyance per interaction is modelled as p^1 * 2^0 + p^2 * 2^1
+        + ... + p^i * 2^(i - 1) which converges to 1 / (1 - 2p) - 1.
+        Given some number of interactions per day we can then find the
+        expected annoyance per day from false negatives.
+        """
+        ww_buckets = (ww_predictions.reshape((-1, 1)) >
+                      self.thresholds.reshape((1, -1))).sum(axis=0)
+        ww_fail_ratios = 1 - ww_buckets / len(ww_predictions)
+        # Performs 1 / (1 - 2 * ww_fail_ratios) - 1, handling edge case
+        ann_per_interaction = np.divide(
+            1, 1 - 2 * ww_fail_ratios,
+            where=ww_fail_ratios < 0.5
+        ) - 1
+        ann_per_interaction[ww_fail_ratios >= 0.5] = float('inf')
+        return self.interaction_estimate * ann_per_interaction
+
+    def estimate(self, model, predictions, targets, noise_folder, batch_size):
+        """
+        Estimates the annoyance a model incurs according to the model
+        described in the class documentation
+        """
+        ww_predictions = predictions[np.where(targets > 0.5)]
+        ww_annoyances = self.compute_ww_annoyances(ww_predictions)
+        nww_annoyances = self.compute_nww_annoyances(
+            model, noise_folder, batch_size
+        )
+        annoyance_by_threshold = ww_annoyances + nww_annoyances
+        best_threshold_id = np.argmin(annoyance_by_threshold)
+        min_annoyance = annoyance_by_threshold[best_threshold_id]
+        return AnnoyanceEstimate(
+            annoyance=min_annoyance,
+            ww_annoyance=ww_annoyances[best_threshold_id],
+            nww_annoyance=nww_annoyances[best_threshold_id],
+            threshold=self.thresholds[best_threshold_id]
+        )
+
+    @staticmethod
+    def _load_inputs(audio_file, chunk_size=4096):
+        """
+        Loads network inputs from an audio file without caching
+        Handles data conservatively in case the audio file is large
+        Args:
+            audio_file: Filename to load
+            chunk_size: Samples to skip forward when loading network inpus
+        """
+        audio = load_audio(audio_file)
+        audio_len = len(audio)
+        mfccs = vectorize_raw(audio)
+        del audio
+        mfcc_hops = chunk_size // pr.hop_samples
+        return np.array([
+            mfccs[i - pr.n_features:i] for i in range(pr.n_features, len(mfccs), mfcc_hops)
+        ]), audio_len
diff --git a/precise/scripts/train_optimize.py b/precise/scripts/train_optimize.py
@@ -13,116 +13,122 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Use black box optimization to tune model hyperparameters
+Use black box optimization to tune model hyperparameters. Call
+this script in a loop to iteratively tune parameters
 
-:-t --trials-name str -
+:trials_name str
     Filename to save hyperparameter optimization trials in
     '.bbopt.json' will automatically be appended
 
-:-c --cycles int 20
-    Number of cycles of optimization to run
+:noise_folder str
+    Folder with random noise to evaluate ambient activations
 
-:-m --model str .cache/optimized.net
-    Model to load from
+:-ie --interaction-estimate int 100
+    Estimated number of interactions per day
+
+:-aaa --ambient-activation-annoyance float 1.0
+    An ambient activation is X times as annoying as a failed
+    activation when the wake word is spoken
+
+:-bp --base-params str {}
+    Json string containing base ListenerParams for all models
 
 ...
 """
-import numpy
-# Optimizer blackhat
-from glob import glob
-from os import remove
-from os.path import isfile, splitext, join
-from pprint import pprint
+import json
+from math import exp
+from uuid import uuid4
+
+from keras.models import save_model
 from prettyparse import Usage
-from shutil import rmtree
-from typing import Any
 
+from precise.annoyance_estimator import AnnoyanceEstimator
 from precise.model import ModelParams, create_model
+from precise.params import pr, save_params
 from precise.scripts.train import TrainScript
-from precise.train_data import TrainData
+from precise.stats import Stats
 
 
 class TrainOptimizeScript(TrainScript):
-    Usage(__doc__) | TrainScript.usage
+    usage = Usage(__doc__) | TrainScript.usage
+    del usage.arguments['model']  # Remove 'model' argument from original TrainScript
 
     def __init__(self, args):
-        super().__init__(args)
         from bbopt import BlackBoxOptimizer
+        pr.__dict__.update(json.loads(args.base_params))
+        args.model = args.trials_name + '-cur'
+        save_params(args.model)
+        super().__init__(args)
         self.bb = BlackBoxOptimizer(file=self.args.trials_name)
-        if not self.test:
-            data = TrainData.from_both(self.args.tags_file, self.args.tags_folder, self.args.folder)
-            _, self.test = data.load(False, True)
-
-        from keras.callbacks import ModelCheckpoint
-        for i in list(self.callbacks):
-            if isinstance(i, ModelCheckpoint):
-                self.callbacks.remove(i)
-
-    def process_args(self, args: Any):
-        model_parts = glob(splitext(args.model)[0] + '.*')
-        if len(model_parts) < 5:
-            for name in model_parts:
-                if isfile(name):
-                    remove(name)
-                else:
-                    rmtree(name)
-        args.trials_name = args.trials_name.replace('.bbopt.json', '').replace('.json', '')
-        if not args.trials_name:
-            if isfile(join('.cache', 'trials.bbopt.json')):
-                remove(join('.cache', 'trials.bbopt.json'))
-            args.trials_name = join('.cache', 'trials')
+
+    def calc_params_cost(self, model):
+        """
+        Models the real world cost of additional model parameters
+        Up to a certain point, having more parameters isn't worse.
+        However, at a certain point more parameters will risk
+        running slower than realtime and become unfeasible. This
+        is why it's modelled exponentially with some reasonable
+        number of acceptable parameters.
+
+        Ideally, this would be replaced with floating point
+        computations and the numbers would be configurable
+        rather than chosen relatively arbitrarily
+        """
+        return 1.0 + exp((model.count_params() - 11000) / 10000)
 
     def run(self):
-        print('Writing to:', self.args.trials_name + '.bbopt.json')
-        for i in range(self.args.cycles):
-            self.bb.run(backend="random")
-            print("\n= %d = (example #%d)" % (i + 1, len(self.bb.get_data()["examples"]) + 1))
-
-            params = ModelParams(
-                recurrent_units=self.bb.randint("units", 1, 70, guess=50),
-                dropout=self.bb.uniform("dropout", 0.1, 0.9, guess=0.6),
-                extra_metrics=self.args.extra_metrics,
-                skip_acc=self.args.no_validation,
-                loss_bias=1.0 - self.args.sensitivity
-            )
-            print('Testing with:', params)
-            model = create_model(self.args.model, params)
-            model.fit(
-                *self.sampled_data, batch_size=self.args.batch_size,
-                epochs=self.epoch + self.args.epochs,
-                validation_data=self.test * (not self.args.no_validation),
-                callbacks=self.callbacks, initial_epoch=self.epoch
-            )
-            resp = model.evaluate(*self.test, batch_size=self.args.batch_size)
-            if not isinstance(resp, (list, tuple)):
-                resp = [resp, None]
-            test_loss, test_acc = resp
-            predictions = model.predict(self.test[0], batch_size=self.args.batch_size)
-
-            num_false_positive = numpy.sum(predictions * (1 - self.test[1]) > 0.5)
-            num_false_negative = numpy.sum((1 - predictions) * self.test[1] > 0.5)
-            false_positives = num_false_positive / numpy.sum(self.test[1] < 0.5)
-            false_negatives = num_false_negative / numpy.sum(self.test[1] > 0.5)
-
-            from math import exp
-            param_score = 1.0 / (1.0 + exp((model.count_params() - 11000) / 2000))
-            fitness = param_score * (1.0 - 0.2 * false_negatives - 0.8 * false_positives)
-
-            self.bb.remember({
-                "test loss": test_loss,
-                "test accuracy": test_acc,
-                "false positive%": false_positives,
-                "false negative%": false_negatives,
-                "fitness": fitness
-            })
-
-            print("False positive: ", false_positives * 100, "%")
-
-            self.bb.maximize(fitness)
-            pprint(self.bb.get_current_run())
-        best_example = self.bb.get_optimal_run()
-        print("\n= BEST = (example #%d)" % self.bb.get_data()["examples"].index(best_example))
-        pprint(best_example)
+        self.bb.run(alg='tree_structured_parzen_estimator')
+
+        model = create_model(None, ModelParams(
+            recurrent_units=self.bb.randint("units", 1, 120, guess=30),
+            dropout=self.bb.uniform("dropout", 0.05, 0.9, guess=0.2),
+            extra_metrics=self.args.extra_metrics,
+            skip_acc=self.args.no_validation,
+            loss_bias=self.bb.uniform(
+                'loss_bias', 0.01, 0.99, guess=1.0 - self.args.sensitivity
+            ),
+            freeze_till=0
+        ))
+        model.fit(
+            *self.sampled_data, batch_size=self.args.batch_size,
+            epochs=self.args.epochs,
+            validation_data=self.test * (not self.args.no_validation),
+            callbacks=[]
+        )
+        test_in, test_out = self.test
+        test_pred = model.predict(test_in, batch_size=self.args.batch_size)
+        stats_dict = Stats(test_pred, test_out, []).to_dict()
+
+        ann_est = AnnoyanceEstimator(
+            model, self.args.interaction_estimate,
+            self.args.ambient_activation_annoyance
+        ).estimate(
+            model, test_pred, test_out,
+            self.args.noise_folder, self.args.batch_size
+        )
+        params_cost = self.calc_params_cost(model)
+        cost = ann_est.annoyance + params_cost
+
+        model_name = '{}-{}.net'.format(self.args.trials_name, str(uuid4()))
+        save_model(model, model_name)
+        save_params(model_name)
+
+        self.bb.remember({
+            'test_stats': stats_dict,
+            'best_threshold': ann_est.threshold,
+            'cost': cost,
+            'cost_info': {
+                'params_cost': params_cost,
+                'annoyance': ann_est.annoyance,
+                'ww_annoyance': ann_est.ww_annoyance,
+                'nww_annoyance': ann_est.nww_annoyance,
+            },
+            'model': model_name
+        })
+        print('Current Run: {}'.format(json.dumps(
+            self.bb.get_current_run(), indent=4
+        )))
+        self.bb.minimize(cost)
 
 
 main = TrainOptimizeScript.run_main