MycroftAI
diff --git a/‎precise/functions.py‎
Lines changed: 21 additions & 0 deletions b/‎precise/functions.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎precise/model.py‎
Lines changed: 14 additions & 6 deletions b/‎precise/model.py‎
Lines changed: 14 additions & 6 deletions
diff --git a/‎precise/network_runner.py‎
Lines changed: 9 additions & 0 deletions b/‎precise/network_runner.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎precise/params.py‎
Lines changed: 59 additions & 5 deletions b/‎precise/params.py‎
Lines changed: 59 additions & 5 deletions
diff --git a/‎precise/pocketsphinx/listener.py‎
Lines changed: 4 additions & 0 deletions b/‎precise/pocketsphinx/listener.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎precise/pocketsphinx/scripts/listen.py‎
Lines changed: 19 additions & 18 deletions b/‎precise/pocketsphinx/scripts/listen.py‎
Lines changed: 19 additions & 18 deletions
diff --git a/‎precise/pocketsphinx/scripts/test.py‎
Lines changed: 24 additions & 23 deletions b/‎precise/pocketsphinx/scripts/test.py‎
Lines changed: 24 additions & 23 deletions
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Mathematical functions used to customize
+computation in various places
+"""
 from math import exp, log, sqrt, pi
 import numpy as np
 from typing import *
@@ -20,6 +24,11 @@
 
 def set_loss_bias(bias: float):
     """
+    Changes the loss bias
+
+    This allows customizing the acceptable tolerance between
+    false negatives and false positives
+
     Near 1.0 reduces false positives
     Near 0.0 reduces false negatives
     """
@@ -42,6 +51,7 @@ def weighted_log_loss(yt, yp) -> Any:
 
 
 def weighted_mse_loss(yt, yp) -> Any:
+    """Standard mse loss with a weighting between false negatives and positives"""
     from keras import backend as K
 
     total = K.sum(K.ones_like(yt))
@@ -52,16 +62,27 @@ def weighted_mse_loss(yt, yp) -> Any:
 
 
 def false_pos(yt, yp) -> Any:
+    """
+    Metric for Keras that *estimates* false positives while training
+    This will not be completely accurate because it weights batches
+    equally
+    """
     from keras import backend as K
     return K.sum(K.cast(yp * (1 - yt) > 0.5, 'float')) / K.maximum(1.0, K.sum(1 - yt))
 
 
 def false_neg(yt, yp) -> Any:
+    """
+    Metric for Keras that *estimates* false negatives while training
+    This will not be completely accurate because it weights batches
+    equally
+    """
     from keras import backend as K
     return K.sum(K.cast((1 - yp) * (0 + yt) > 0.5, 'float')) / K.maximum(1.0, K.sum(0 + yt))
 
 
 def load_keras() -> Any:
+    """Imports Keras injecting custom functions to prevent exceptions"""
     import keras
     keras.losses.weighted_log_loss = weighted_log_loss
     keras.metrics.false_pos = false_pos
 
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Loads model
+"""
 import attr
 from os.path import isfile
 from typing import *
@@ -26,17 +29,20 @@
 class ModelParams:
     """
     Attributes:
-        recurrent_units:
-        dropout:
-        extra_metrics: Whether to include false positive and false negative metrics
+        recurrent_units: Number of GRU units. Higher values increase computation
+                         but allow more complex learning. Too high of a value causes overfitting
+        dropout: Reduces overfitting but can potentially decrease accuracy if too high
+        extra_metrics: Whether to include false positive and false negative metrics while training
         skip_acc: Whether to skip accuracy calculation while training
+        loss_bias: Near 1.0 reduces false positives. See <set_loss_bias>
+        freeze_till: Layer number from start to freeze after loading (allows for partial training)
     """
     recurrent_units = attr.ib(20)  # type: int
     dropout = attr.ib(0.2)  # type: float
     extra_metrics = attr.ib(False)  # type: bool
     skip_acc = attr.ib(False)  # type: bool
     loss_bias = attr.ib(0.7)  # type: float
-    freeze_till = attr.ib(0)  # type: bool
+    freeze_till = attr.ib(0)  # type: int
 
 
 def load_precise_model(model_name: str) -> Any:
@@ -70,7 +76,8 @@ def create_model(model_name: Optional[str], params: ModelParams) -> 'Sequential'
         model = Sequential()
         model.add(GRU(
             params.recurrent_units, activation='linear',
-            input_shape=(pr.n_features, pr.feature_size), dropout=params.dropout, name='net'
+            input_shape=(
+                pr.n_features, pr.feature_size), dropout=params.dropout, name='net'
         ))
         model.add(Dense(1, activation='sigmoid'))
 
@@ -79,5 +86,6 @@ def create_model(model_name: Optional[str], params: ModelParams) -> 'Sequential'
     set_loss_bias(params.loss_bias)
     for i in model.layers[:params.freeze_till]:
         i.trainable = False
-    model.compile('rmsprop', weighted_log_loss, metrics=(not params.skip_acc) * metrics)
+    model.compile('rmsprop', weighted_log_loss,
+                  metrics=(not params.skip_acc) * metrics)
     return model
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Pieces that convert audio to predictions
+"""
 import numpy as np
 from abc import abstractmethod, ABCMeta
 from importlib import import_module
@@ -26,6 +29,10 @@
 
 
 class Runner(metaclass=ABCMeta):
+    """
+    Classes that execute trained models on vectorized audio
+    and produce prediction values
+    """
     @abstractmethod
     def predict(self, inputs: np.ndarray) -> np.ndarray:
         pass
@@ -36,6 +43,7 @@ def run(self, inp: np.ndarray) -> float:
 
 
 class TensorFlowRunner(Runner):
+    """Executes a frozen Tensorflow model created from precise-convert"""
     def __init__(self, model_name: str):
         if model_name.endswith('.net'):
             print('Warning: ', model_name, 'looks like a Keras model.')
@@ -67,6 +75,7 @@ def run(self, inp: np.ndarray) -> float:
 
 
 class KerasRunner(Runner):
+    """ Executes a regular Keras model created from precise-train"""
     def __init__(self, model_name: str):
         import tensorflow as tf
         # ISSUE 88 - Following 3 lines added to resolve issue 88 - JM 2020-02-04 per liny90626
 
@@ -11,6 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Parameters used in the audio pipeline
+These configure the following stages:
+ - Conversion from audio to input vectors
+ - Interpretation of the network output to a confidence value
+"""
 from math import floor
 
 import attr
@@ -21,42 +27,78 @@
 
 @attr.s(frozen=True)
 class ListenerParams:
+    """
+    General pipeline information:
+     - Audio goes through a series of transformations to convert raw audio into machine readable data
+     - These transformations are as follows:
+       - Raw audio -> chopped audio
+         - buffer_t, sample_depth: Input audio loaded and truncated using these value
+         - window_t, hop_t: Linear audio chopped into overlapping frames using a sliding window
+       - Chopped audio -> FFT spectrogram
+         - n_fft, sample_rate: Each audio frame is converted to n_fft frequency intensities
+       - FFT spectrogram -> Mel spectrogram (compressed)
+         - n_filt: Each fft frame is compressed to n_filt summarized mel frequency bins/bands
+       - Mel spectrogram -> MFCC
+         - n_mfcc: Each mel frame is converted to MFCCs and the first n_mfcc values are taken
+       - Disabled by default: Last phase -> Delta vectors
+         - use_delta: If this value is true, the difference between consecutive vectors is concatenated to each frame
+
+    Parameters for audio pipeline:
+     - buffer_t: Input size of audio. Wakeword must fit within this time
+     - window_t: Time of the window used to calculate a single spectrogram frame
+     - hop_t: Time the window advances forward to calculate the next spectrogram frame
+     - sample_rate: Input audio sample rate
+     - sample_depth: Bytes per input audio sample
+     - n_fft: Size of FFT to generate from audio frame
+     - n_filt: Number of filters to compress FFT to
+     - n_mfcc: Number of MFCC coefficients to use
+     - use_delta: If True, generates "delta vectors" before sending to network
+     - vectorizer: The type of input fed into the network. Options listed in class Vectorizer
+     - threshold_config: Output distribution configuration automatically generated from precise-calc-threshold
+     - threshold_center: Output distribution center automatically generated from precise-calc-threshold
+    """
+    buffer_t = attr.ib()  # type: float
     window_t = attr.ib()  # type: float
     hop_t = attr.ib()  # type: float
-    buffer_t = attr.ib()  # type: float
     sample_rate = attr.ib()  # type: int
     sample_depth = attr.ib()  # type: int
-    n_mfcc = attr.ib()  # type: int
-    n_filt = attr.ib()  # type: int
     n_fft = attr.ib()  # type: int
+    n_filt = attr.ib()  # type: int
+    n_mfcc = attr.ib()  # type: int
     use_delta = attr.ib()  # type: bool
     vectorizer = attr.ib()  # type: int
     threshold_config = attr.ib()  # type: tuple
     threshold_center = attr.ib()  # type: float
 
     @property
     def buffer_samples(self):
+        """buffer_t converted to samples, truncating partial frames"""
         samples = int(self.sample_rate * self.buffer_t + 0.5)
         return self.hop_samples * (samples // self.hop_samples)
 
     @property
     def n_features(self):
+        """Number of timesteps in one input to the network"""
         return 1 + int(floor((self.buffer_samples - self.window_samples) / self.hop_samples))
 
     @property
     def window_samples(self):
+        """window_t converted to samples"""
         return int(self.sample_rate * self.window_t + 0.5)
 
     @property
     def hop_samples(self):
+        """hop_t converted to samples"""
         return int(self.sample_rate * self.hop_t + 0.5)
 
     @property
     def max_samples(self):
+        """The input size converted to audio samples"""
         return int(self.buffer_t * self.sample_rate)
 
     @property
     def feature_size(self):
+        """The size of an input vector generated with these parameters"""
         num_features = {
             Vectorizer.mfccs: self.n_mfcc,
             Vectorizer.mels: self.n_filt,
@@ -77,15 +119,27 @@ def vectorization_md5_hash(self):
 
 
 class Vectorizer:
+    """
+    Chooses which function to call to vectorize audio
+
+    Options:
+        mels: Convert to a compressed Mel spectrogram
+        mfccs: Convert to a MFCC spectrogram
+        speechpy_mfccs: Legacy option to convert to MFCCs using old library
+    """
     mels = 1
     mfccs = 2
     speechpy_mfccs = 3
 
 
 # Global listener parameters
+# These are the default values for all parameters
+# These were selected tentatively to balance CPU usage with accuracy
+# For the Hey Mycroft wake word, small changes to these parameters
+# did not make a significant difference in accuracy
 pr = ListenerParams(
-    window_t=0.1, hop_t=0.05, buffer_t=1.5, sample_rate=16000,
-    sample_depth=2, n_mfcc=13, n_filt=20, n_fft=512, use_delta=False,
+    buffer_t=1.5, window_t=0.1, hop_t=0.05, sample_rate=16000,
+    sample_depth=2, n_fft=512, n_filt=20, n_mfcc=13, use_delta=False,
     threshold_config=((6, 4),), threshold_center=0.2, vectorizer=Vectorizer.mfccs
 )
 
 
@@ -12,6 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Conversion of audio data to predictions using Pocketsphinx
+Used for comparison with Precise
+"""
 import numpy as np
 from typing import *
 from typing import BinaryIO
 
@@ -12,6 +12,24 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Run Pocketsphinx on microphone audio input
+
+:key_phrase str
+    Key phrase composed of words from dictionary
+
+:dict_file str
+    Filename of dictionary with word pronunciations
+
+:hmm_folder str
+    Folder containing hidden markov model
+
+:-th --threshold str 1e-90
+    Threshold for activations
+
+:-c --chunk-size int 2048
+    Samples between inferences
+"""
 from precise_runner import PreciseRunner
 from precise_runner.runner import ListenerEngine
 from prettyparse import Usage
@@ -23,24 +41,7 @@
 
 
 class PocketsphinxListenScript(BaseScript):
-    usage = Usage('''
-        Run Pocketsphinx on microphone audio input
-
-        :key_phrase str
-            Key phrase composed of words from dictionary
-
-        :dict_file str
-            Filename of dictionary with word pronunciations
-
-        :hmm_folder str
-            Folder containing hidden markov model
-
-        :-th --threshold str 1e-90
-            Threshold for activations
-
-        :-c --chunk-size int 2048
-            Samples between inferences
-    ''')
+    usage = Usage(__doc__)
 
     def run(self):
         def on_activation():
 
@@ -12,6 +12,29 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Test a dataset using Pocketsphinx
+
+:key_phrase str
+    Key phrase composed of words from dictionary
+
+:dict_file str
+    Filename of dictionary with word pronunciations
+
+:hmm_folder str
+    Folder containing hidden markov model
+
+:-th --threshold str 1e-90
+    Threshold for activations
+
+:-t --use-train
+    Evaluate training data instead of test data
+
+:-nf --no-filenames
+    Don't show the names of files that failed
+
+...
+"""
 import wave
 from prettyparse import Usage
 from subprocess import check_output, PIPE
@@ -23,29 +46,7 @@
 
 
 class PocketsphinxTestScript(BaseScript):
-    usage = Usage('''
-        Test a dataset using Pocketsphinx
-
-        :key_phrase str
-            Key phrase composed of words from dictionary
-
-        :dict_file str
-            Filename of dictionary with word pronunciations
-
-        :hmm_folder str
-            Folder containing hidden markov model
-
-        :-th --threshold str 1e-90
-            Threshold for activations
-
-        :-t --use-train
-            Evaluate training data instead of test data
-
-        :-nf --no-filenames
-            Don't show the names of files that failed
-
-        ...
-    ''') | TrainData.usage
+    usage = Usage(__doc__) | TrainData.usage
 
     def __init__(self, args):
         super().__init__(args)