wip

balancap · balancap · commit 54a9ff21f33c · 2024-01-08T18:47:25.000Z
diff --git a/experiments/mnist/mnist_classifier_from_scratch.py b/experiments/mnist/mnist_classifier_from_scratch.py
@@ -16,9 +16,8 @@
 
 The primary aim here is simplicity and minimal dependencies.
 """
-
-
 import time
+from functools import partial
 
 import datasets
 import jax
@@ -31,13 +30,22 @@
 import jax_scaled_arithmetics as jsa
 
 
+def print_mean_std(name, v):
+    # Always use np.float32, to avoid floating errors in descaling + stats.
+    v = jsa.asarray(v, dtype=np.float32)
+    m, s = np.mean(v), np.std(v)
+    print(name, m, s)
+
+
 def init_random_params(scale, layer_sizes, rng=npr.RandomState(0)):
     return [(scale * rng.randn(m, n), scale * rng.randn(n)) for m, n, in zip(layer_sizes[:-1], layer_sizes[1:])]
 
 
 def predict(params, inputs):
     activations = inputs
     for w, b in params[:-1]:
+        jsa.ops.debug_callback(partial(print_mean_std, "W:"), w)
+
         # Matmul + relu
         outputs = jnp.dot(activations, w) + b
         activations = jnp.maximum(outputs, 0)
@@ -66,7 +74,7 @@ def accuracy(params, batch):
     step_size = 0.001
     num_epochs = 10
     batch_size = 128
-    training_dtype = np.float16
+    training_dtype = np.float32
 
     train_images, train_labels, test_images, test_labels = datasets.mnist()
     num_train = train_images.shape[0]
@@ -93,9 +101,13 @@ def update(params, batch):
         grads = grad(loss)(params, batch)
         return [(w - step_size * dw, b - step_size * db) for (w, b), (dw, db) in zip(params, grads)]
 
+    num_batches = 2
+    num_epochs = 2
     for epoch in range(num_epochs):
+        print("EPOCH:", epoch)
         start_time = time.time()
         for _ in range(num_batches):
+            print("BATCH...")
             batch = next(batches)
             # Scaled micro-batch + training dtype cast.
             batch = jsa.as_scaled_array(batch)
@@ -108,8 +120,8 @@ def update(params, batch):
 
         # Evaluation in float32, for consistency.
         raw_params = jsa.asarray(params, dtype=np.float32)
-        train_acc = accuracy(raw_params, (train_images, train_labels))
-        test_acc = accuracy(raw_params, (test_images, test_labels))
-        print(f"Epoch {epoch} in {epoch_time:0.2f} sec")
-        print(f"Training set accuracy {train_acc:0.5f}")
-        print(f"Test set accuracy {test_acc:0.5f}")
+        # train_acc = accuracy(raw_params, (train_images, train_labels))
+        # test_acc = accuracy(raw_params, (test_images, test_labels))
+        # print(f"Epoch {epoch} in {epoch_time:0.2f} sec")
+        # print(f"Training set accuracy {train_acc:0.5f}")
+        # print(f"Test set accuracy {test_acc:0.5f}")