11"""
2+ src/scripts/browser_monitor.py
3+
24CDP-based web scraper that blocks trackers and captures network requests.
35"""
46
5- import os , json , time , shutil , sys , argparse
7+ import argparse
8+ import logging
9+ import os
10+ import json
11+ import time
12+ import shutil
13+ import sys
14+
615from src .cdp .cdp_session import CDPSession
716from src .data_models .network import ResourceType
817from src .cdp .tab_managements import cdp_new_tab , dispose_context
918
19+ logging .basicConfig (level = logging .INFO )
20+ logger = logging .getLogger (__name__ )
21+
1022# ---- Configuration ----
1123
1224BLOCK_PATTERNS = [
@@ -264,7 +276,7 @@ def save_session_summary(paths, summary, args, start_time, end_time, created_tab
264276 }
265277 }
266278
267- with open (paths ['summary_path' ], 'w' , encoding = 'utf-8' ) as f :
279+ with open (paths ['summary_path' ], mode = 'w' , encoding = 'utf-8' ) as f :
268280 json .dump (session_summary , f , indent = 2 , ensure_ascii = False )
269281
270282 return session_summary
@@ -286,30 +298,30 @@ def main():
286298 remote_debugging_address = f"http://{ args .host } :{ args .port } "
287299
288300 if not tab_id :
289- print ("No tab ID provided, creating new tab..." )
301+ logger . info ("No tab ID provided, creating new tab..." )
290302 try :
291303 tab_id , context_id = cdp_new_tab (
292304 remote_debugging_address = remote_debugging_address ,
293305 incognito = args .incognito ,
294306 url = args .url if not args .no_navigate else "about:blank"
295307 )
296308 created_tab = True
297- print (f"Created new tab: { tab_id } " )
309+ logger . info (f"Created new tab: { tab_id } " )
298310 if context_id :
299- print (f"Browser context: { context_id } " )
311+ logger . info (f"Browser context: { context_id } " )
300312 except Exception as e :
301- print (f"Error creating new tab: { e } " )
313+ logger . info (f"Error creating new tab: { e } " )
302314 sys .exit (1 )
303315
304316 # Build WebSocket URL
305317 ws_url = f"ws://{ args .host } :{ args .port } /devtools/page/{ tab_id } "
306318 navigate_to = None if args .no_navigate else args .url
307319
308- print (f"Starting CDP monitoring session..." )
309- print (f"Output directory: { args .output_dir } " )
310- print (f"Target URL: { navigate_to or 'No navigation (attach only)' } " )
311- print (f"Tab ID: { tab_id } " )
312-
320+ logger . info (f"Starting CDP monitoring session..." )
321+ logger . info (f"Output directory: { args .output_dir } " )
322+ logger . info (f"Target URL: { navigate_to or 'No navigation (attach only)' } " )
323+ logger . info (f"Tab ID: { tab_id } " )
324+
313325 # Create and run CDP session
314326 try :
315327 session = CDPSession (
@@ -323,58 +335,58 @@ def main():
323335 )
324336 session .setup_cdp (navigate_to )
325337 session .run ()
326-
338+
327339 except KeyboardInterrupt :
328- print ("\n Session stopped by user" )
340+ logger . info ("\n Session stopped by user" )
329341 except Exception as e :
330- print (f"Error: { e } " )
342+ logger . info (f"Error: { e } " )
331343 sys .exit (1 )
332344 finally :
333345 # Cleanup: dispose context if we created a tab
334346 if created_tab and context_id :
335347 try :
336- print ("Cleaning up created browser context..." )
348+ logger . info ("Cleaning up created browser context..." )
337349 dispose_context (remote_debugging_address , context_id )
338350 except Exception as e :
339- print (f"Warning: Could not dispose browser context: { e } " )
340-
351+ logger . info (f"Warning: Could not dispose browser context: { e } " )
352+
341353 end_time = time .time ()
342-
354+
343355 # Get final summary and save it
344356 try :
345357 summary = session .get_monitoring_summary ()
346358 save_session_summary (paths , summary , args , start_time , end_time , created_tab , context_id )
347-
359+
348360 # Print organized summary
349- print ("\n " + "=" * 60 )
350- print ("SESSION SUMMARY" )
351- print ("=" * 60 )
352- print (f"Duration: { end_time - start_time :.1f} seconds" )
353- print (f"Tab created: { 'Yes' if created_tab else 'No' } " )
361+ logger . info ("\n " + "=" * 60 )
362+ logger . info ("SESSION SUMMARY" )
363+ logger . info ("=" * 60 )
364+ logger . info (f"Duration: { end_time - start_time :.1f} seconds" )
365+ logger . info (f"Tab created: { 'Yes' if created_tab else 'No' } " )
354366 if created_tab and context_id :
355- print (f"Browser context: { context_id } " )
356- print (f"Network requests tracked: { summary ['network' ]['requests_tracked' ]} " )
357- print (f"Cookies tracked: { summary ['storage' ]['cookies_count' ]} " )
358- print (f"LocalStorage origins: { len (summary ['storage' ]['local_storage_origins' ])} " )
359- print (f"SessionStorage origins: { len (summary ['storage' ]['session_storage_origins' ])} " )
360- print ("OUTPUT STRUCTURE:" )
361- print (f"├── session_summary.json" )
362- print (f"├── network/" )
363- print (f"│ ├── consolidated_transactions.json" )
364- print (f"│ ├── network.har" )
365- print (f"│ └── transactions/" )
366- print (f"│ └── [timestamp_url_id]/" )
367- print (f"│ ├── request.json" )
368- print (f"│ ├── response.json" )
369- print (f"│ └── response_body.[ext]" )
370- print (f"└── storage/" )
371- print (f" └── events.jsonl" )
372- print ()
373- print (f"Session complete! Check { args .output_dir } for all outputs." )
374-
367+ logger . info (f"Browser context: { context_id } " )
368+ logger . info (f"Network requests tracked: { summary ['network' ]['requests_tracked' ]} " )
369+ logger . info (f"Cookies tracked: { summary ['storage' ]['cookies_count' ]} " )
370+ logger . info (f"LocalStorage origins: { len (summary ['storage' ]['local_storage_origins' ])} " )
371+ logger . info (f"SessionStorage origins: { len (summary ['storage' ]['session_storage_origins' ])} " )
372+ logger . info ("OUTPUT STRUCTURE:" )
373+ logger . info (f"├── session_summary.json" )
374+ logger . info (f"├── network/" )
375+ logger . info (f"│ ├── consolidated_transactions.json" )
376+ logger . info (f"│ ├── network.har" )
377+ logger . info (f"│ └── transactions/" )
378+ logger . info (f"│ └── [timestamp_url_id]/" )
379+ logger . info (f"│ ├── request.json" )
380+ logger . info (f"│ ├── response.json" )
381+ logger . info (f"│ └── response_body.[ext]" )
382+ logger . info (f"└── storage/" )
383+ logger . info (f" └── events.jsonl" )
384+ logger . info ()
385+ logger . info (f"Session complete! Check { args .output_dir } for all outputs." )
386+
375387 except Exception as e :
376- print ( f "Warning: Could not generate summary: { e } " )
388+ logger . info ( "Warning: Could not generate summary: %s" , e )
377389
378390
379391if __name__ == "__main__" :
380- main ()
392+ main ()
0 commit comments