Minor fixes in fake_classifiers nb

MischaPanch · MischaPanch · commit 62918c17fb0c · 2022-11-28T12:22:12.000+01:00
diff --git a/notebooks/fake_classifiers.ipynb b/notebooks/fake_classifiers.ipynb
@@ -6,6 +6,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
+    "\n",
     "%load_ext autoreload\n",
     "%autoreload 2"
    ]
@@ -18,13 +20,10 @@
    "source": [
     "import logging\n",
     "\n",
-    "from kyle.evaluation import (\n",
-    "    EvalStats,\n",
-    "    compute_accuracy,\n",
-    "    compute_ECE,\n",
-    ")\n",
+    "from kyle.evaluation import EvalStats\n",
     "from kyle.sampling.fake_clf import DirichletFC\n",
     "from kyle.transformations import *\n",
+    "import matplotlib.pyplot as plt\n",
     "\n",
     "logging.basicConfig(level=logging.INFO)"
    ]
@@ -71,22 +70,27 @@
    "outputs": [],
    "source": [
     "print(\n",
-    "    \"mostly underestimating all classes (starting at 1/n_classes) with PowerLawSimplexAut\"\n",
+    "    \"mostly overestimating all classes (starting at 1/n_classes) with PowerLawSimplexAut\"\n",
     ")\n",
     "transform = PowerLawSimplexAut(np.array([2, 2, 2]))\n",
     "dirichlet_fc.set_simplex_automorphism(transform)\n",
+    "\n",
+    "\n",
     "eval_stats = EvalStats(*dirichlet_fc.get_sample_arrays(n_samples))\n",
     "\n",
     "print(f\"Accuracy is {eval_stats.accuracy()}\")\n",
     "print(f\"ECE is {eval_stats.expected_calibration_error(n_bins=200)}\")\n",
-    "ece_approx = -eval_stats.expected_confidence() + eval_stats.accuracy()\n",
+    "ece_approx = eval_stats.expected_confidence() - eval_stats.accuracy()\n",
     "print(f\"{ece_approx=}\")\n",
-    "eval_stats.plot_reliability_curves([0, 1, \"top_class\"], display_weights=True)\n",
+    "eval_stats.plot_reliability_curves(\n",
+    "    [0, 1, \"top_class\"], display_weights=True, n_bins=200\n",
+    ")\n",
+    "plt.show()\n",
     "\n",
     "\n",
-    "theoretical_acc = compute_accuracy(dirichlet_fc)[0]\n",
-    "theoretical_ece = compute_ECE(dirichlet_fc)[0]\n",
-    "print(f\"{theoretical_acc=} , {theoretical_ece=}\")"
+    "# theoretical_acc = compute_accuracy(dirichlet_fc)[0]\n",
+    "# theoretical_ece = compute_ECE(dirichlet_fc)[0]\n",
+    "# print(f\"{theoretical_acc=} , {theoretical_ece=}\")"
    ]
   },
   {
@@ -96,7 +100,7 @@
    "outputs": [],
    "source": [
     "print(\n",
-    "    \"mostly overestimating all classes (starting at 1/n_classes) with PowerLawSimplexAut\"\n",
+    "    \"mostly underestimating all classes (starting at 1/n_classes) with PowerLawSimplexAut\"\n",
     ")\n",
     "print(\"Note the variance and the resulting sensitivity to binning\")\n",
     "\n",
@@ -106,9 +110,10 @@
     "\n",
     "print(f\"Accuracy is {eval_stats.accuracy()}\")\n",
     "print(f\"ECE is {eval_stats.expected_calibration_error()}\")\n",
-    "ece_approx = eval_stats.expected_confidence() - eval_stats.accuracy()\n",
+    "ece_approx = -eval_stats.expected_confidence() + eval_stats.accuracy()\n",
     "print(f\"{ece_approx=}\")\n",
     "eval_stats.plot_reliability_curves([0, 1, \"top_class\"], display_weights=True)\n",
+    "plt.show()\n",
     "\n",
     "\n",
     "# theoretical_acc = compute_accuracy(dirichlet_fc)[0]\n",
@@ -122,7 +127,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(\"Overestimating predictions with MaxComponent\")\n",
+    "print(\"Underestimating predictions with MaxComponent\")\n",
     "\n",
     "\n",
     "def overestimating_max(x: np.ndarray):\n",
@@ -139,6 +144,7 @@
     "print(f\"Accuracy is {eval_stats.accuracy()}\")\n",
     "print(f\"ECE is {eval_stats.expected_calibration_error()}\")\n",
     "eval_stats.plot_reliability_curves([0, 1, \"top_class\"], display_weights=True)\n",
+    "plt.show()\n",
     "\n",
     "# Integrals converge pretty slowly, this takes time\n",
     "# theoretical_acc = compute_accuracy(dirichlet_fc, opts={\"limit\": 75})[0]\n",
@@ -188,7 +194,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(\"mostly underestimating first two classes with RestrictedPowerSimplexAut\")\n",
+    "print(\"mostly overestimating first two classes with RestrictedPowerSimplexAut\")\n",
     "\n",
     "transform = RestrictedPowerSimplexAut(np.array([2, 4]))\n",
     "dirichlet_fc.set_simplex_automorphism(transform)\n",
@@ -199,30 +205,14 @@
     "print(\"Theoretical approximation of ECE\")\n",
     "print(eval_stats.expected_confidence() - eval_stats.accuracy())\n",
     "eval_stats.plot_reliability_curves([0, 1, 2, \"top_class\"], display_weights=True)\n",
+    "plt.show()\n",
     "\n",
     "\n",
     "# theoretical_acc = compute_accuracy(dirichlet_fc)[0]\n",
     "# theoretical_ece = compute_ECE(dirichlet_fc)[0]\n",
     "# print(f\"{theoretical_acc=} , {theoretical_ece=}\")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\n",
-    "    f\"\"\"\n",
-    "NOTE: here the ECE completely fails to converge to it's true, continuous value.\n",
-    "This is probably due to the binning-variance, see plots below with 500 bins.\n",
-    "The sharp peak in weights at the end certainly does not help convergence either.\n",
-    "\"\"\"\n",
-    ")\n",
-    "\n",
-    "eval_stats.plot_reliability_curves([\"top_class\"], n_bins=500, display_weights=True)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -265,7 +255,8 @@
     "\n",
     "print(f\"Accuracy is {eval_stats.accuracy()}\")\n",
     "print(f\"ECE is {eval_stats.expected_calibration_error(n_bins=200)}\")\n",
-    "eval_stats.plot_reliability_curves([0, \"top_class\"], display_weights=True)"
+    "eval_stats.plot_reliability_curves([0, \"top_class\"], display_weights=True)\n",
+    "plt.show()"
    ]
   },
   {
@@ -290,7 +281,8 @@
     "\n",
     "print(f\"Accuracy is {eval_stats.accuracy()}\")\n",
     "print(f\"ECE is {eval_stats.expected_calibration_error(n_bins=200)}\")\n",
-    "eval_stats.plot_reliability_curves([4, \"top_class\"], display_weights=True)"
+    "eval_stats.plot_reliability_curves([4, \"top_class\"], display_weights=True)\n",
+    "plt.show()"
    ]
   },
   {
@@ -313,7 +305,8 @@
     "\n",
     "print(f\"Accuracy is {eval_stats.accuracy()}\")\n",
     "print(f\"ECE is {eval_stats.expected_calibration_error()}\")\n",
-    "eval_stats.plot_reliability_curves([4, \"top_class\"], display_weights=True)"
+    "eval_stats.plot_reliability_curves([4, \"top_class\"], display_weights=True)\n",
+    "plt.show()"
    ]
   }
  ],