Simulate large dataset

anitagraser · anitagraser · commit 31e8206591bc · 2025-05-17T19:46:39.000+02:00
diff --git a/mesa/examples/basic/ships_hybrid_algorithm/config/config.json b/mesa/examples/basic/ships_hybrid_algorithm/config/config.json
@@ -1,8 +1,8 @@
 {
-    "simulation_steps": 1000,
+    "simulation_steps": 10000,
     "width": 100,
     "height": 100,
-    "num_ships": 100,
+    "num_ships": 1000,
     "max_speed_range": [1.0, 1.5],
     "resolution": 1,
     "obstacle_threshold": 2,
diff --git a/mesa/examples/basic/ships_hybrid_algorithm/ships.ipynb b/mesa/examples/basic/ships_hybrid_algorithm/ships.ipynb
@@ -122,7 +122,10 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "#tc = mpd.TrajectoryCollection(df, traj_id_col='AgentID', t='t', x='x', y='y', crs=31287)\n",
+    "tc"
+   ]
   },
   {
    "cell_type": "code",
@@ -171,7 +174,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tc.add_speed(overwrite=True)\n",
+    "import os\n",
+    "\n",
+    "tc.add_speed(overwrite=True, n_threads=os.cpu_count())\n",
     "tc.trajectories[0].df#.hvplot(c='speed')"
    ]
   },
@@ -216,7 +221,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sim_out_file_name = 'sim_20250517_100643.csv'\n",
+    "sim_out_file_name = 'sim_20250517_164810.csv'\n",
     "PERCENTAGE_OF_ANOMALIES = 0.03"
    ]
   },
@@ -236,8 +241,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "unique_ids = sims.AgentID.unique()\n",
-    "unique_ids"
+    "#sims = sims[sims.AgentID<29].copy()\n",
+    "sims['is_anomaly'] = False\n",
+    "sims['anomaly_type'] = 'n/a'\n"
    ]
   },
   {
@@ -246,8 +252,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sample_ids_pd = pd.Series(unique_ids).sample(frac=PERCENTAGE_OF_ANOMALIES).tolist()\n",
-    "print(\"Subset we'll insert anomalies in: \", sample_ids_pd)"
+    "tc = mpd.TrajectoryCollection(sims, t='t', traj_id_col='AgentID', x='x', y='y', crs=31287)\n",
+    "tc"
    ]
   },
   {
@@ -256,21 +262,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#sims = sims[sims.AgentID<29].copy()\n",
-    "sims['is_anomaly'] = False\n",
-    "sims['anomaly_type'] = 'n/a'\n",
-    "tc = mpd.TrajectoryCollection(sims, t='t', traj_id_col='AgentID', x='x', y='y', crs=31287)\n",
+    "# Remove stopped segments (with speed < 0.0001)\n",
+    "tc = mpd.SpeedSplitter(tc).split(speed=0.0001, duration=timedelta(seconds=5))\n",
     "tc"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "tc.add_speed(overwrite=True)\n",
-    "tc.plot(column='speed', legend=True)"
+    "### Speed\n",
+    "\n",
+    "Change the timestamps to make a subsection of the trajectory faster"
    ]
   },
   {
@@ -279,17 +282,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Remove stopped segments (with speed < 0.01)\n",
-    "tc = mpd.SpeedSplitter(tc).split(speed=0.01, duration=timedelta(seconds=5))"
+    "unique_ids = sims.AgentID.unique()\n",
+    "unique_ids"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "### Speed\n",
-    "\n",
-    "Change the timestamps to make a subsection of the trajectory faster"
+    "sample_ids_pd = pd.Series(unique_ids).sample(frac=PERCENTAGE_OF_ANOMALIES).tolist()\n",
+    "print(\"Subset we'll insert anomalies in: \", sample_ids_pd)"
    ]
   },
   {
@@ -327,8 +331,9 @@
    "outputs": [],
    "source": [
     "for i, traj in enumerate(tc.trajectories):\n",
-    "    traj = inject_speed_anomaly(traj)\n",
-    "    tc.trajectories[i] = traj\n",
+    "    if i in sample_ids_pd: \n",
+    "        traj = inject_speed_anomaly(traj)\n",
+    "        tc.trajectories[i] = traj\n",
     "    \n",
     "tc.add_speed(overwrite=True)\n",
     "tc.plot(column='speed', legend=True)"
@@ -376,7 +381,9 @@
    "source": [
     "### Location\n",
     "\n",
-    "#### Generate trajectories that pass through obstacles"
+    "#### Generate trajectories that pass through obstacles\n",
+    "\n",
+    "Note that is not guaranteed that each resulting trajectory will pass through an obtacle! Ships simply ignore the obstacles."
    ]
   },
   {
@@ -408,7 +415,7 @@
     "model = ShipModel(\n",
     "        width=config[\"width\"],\n",
     "        height=config[\"height\"],\n",
-    "        num_ships=config[\"num_ships\"]*PERCENTAGE_OF_ANOMALIES, \n",
+    "        num_ships=int(config[\"num_ships\"]*2*PERCENTAGE_OF_ANOMALIES), # doubling the number, assuming that 50% will violate obstacles\n",
     "        max_speed_range=config[\"max_speed_range\"],\n",
     "        ports=config[\"ports\"],\n",
     "        speed_limit_zones=config.get(\"speed_limit_zones\", []),\n",
@@ -607,6 +614,60 @@
   {
    "cell_type": "markdown",
    "metadata": {},
+   "source": [
+    "## Merge files"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df1 = pd.read_csv(\"anomalies_speed_20250517_172830.csv\")\n",
+    "df2 = pd.read_csv(\"anomalies_loc_20250517_184842.csv\")\n",
+    "out = pd.concat([df1,df2])[['t','AgentID','speed','is_anomaly','anomaly_type','x','y']]\n",
+    "out.to_csv(\"synthetic_vessel_tracks_with_anomalies_20250517.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tmp = pd.read_csv(\"synthetic_vessel_tracks_with_anomalies_20250517.csv\")\n",
+    "tmp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tmp[tmp.anomaly_type==\"location\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tmp[tmp.anomaly_type==\"location\"].AgentID.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],
diff --git a/mesa/examples/basic/ships_hybrid_algorithm/ships.png b/mesa/examples/basic/ships_hybrid_algorithm/ships.png