Skip to content

Commit eeefff8

Browse files
authored
Merge pull request #24 from VectorlyApp/unboundlocalerr_fix
Resolve `UnboundLocalError` in `agent.py`
2 parents b56ee97 + 062dee5 commit eeefff8

File tree

10 files changed

+249
-138
lines changed

10 files changed

+249
-138
lines changed

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[MAIN]
22

33
# minimum score (out of 10) required to pass
4-
fail-under=0.00
4+
fail-under=5.00
55

66
# files or directories to be skipped
77
ignore=*.ipynb

scripts/browser_monitor.py

Lines changed: 58 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,24 @@
11
"""
2+
src/scripts/browser_monitor.py
3+
24
CDP-based web scraper that blocks trackers and captures network requests.
35
"""
46

5-
import os, json, time, shutil, sys, argparse
7+
import argparse
8+
import logging
9+
import os
10+
import json
11+
import time
12+
import shutil
13+
import sys
14+
615
from src.cdp.cdp_session import CDPSession
716
from src.data_models.network import ResourceType
817
from src.cdp.tab_managements import cdp_new_tab, dispose_context
918

19+
logging.basicConfig(level=logging.INFO)
20+
logger = logging.getLogger(__name__)
21+
1022
# ---- Configuration ----
1123

1224
BLOCK_PATTERNS = [
@@ -264,7 +276,7 @@ def save_session_summary(paths, summary, args, start_time, end_time, created_tab
264276
}
265277
}
266278

267-
with open(paths['summary_path'], 'w', encoding='utf-8') as f:
279+
with open(paths['summary_path'], mode='w', encoding='utf-8') as f:
268280
json.dump(session_summary, f, indent=2, ensure_ascii=False)
269281

270282
return session_summary
@@ -286,30 +298,30 @@ def main():
286298
remote_debugging_address = f"http://{args.host}:{args.port}"
287299

288300
if not tab_id:
289-
print("No tab ID provided, creating new tab...")
301+
logger.info("No tab ID provided, creating new tab...")
290302
try:
291303
tab_id, context_id = cdp_new_tab(
292304
remote_debugging_address=remote_debugging_address,
293305
incognito=args.incognito,
294306
url=args.url if not args.no_navigate else "about:blank"
295307
)
296308
created_tab = True
297-
print(f"Created new tab: {tab_id}")
309+
logger.info(f"Created new tab: {tab_id}")
298310
if context_id:
299-
print(f"Browser context: {context_id}")
311+
logger.info(f"Browser context: {context_id}")
300312
except Exception as e:
301-
print(f"Error creating new tab: {e}")
313+
logger.info(f"Error creating new tab: {e}")
302314
sys.exit(1)
303315

304316
# Build WebSocket URL
305317
ws_url = f"ws://{args.host}:{args.port}/devtools/page/{tab_id}"
306318
navigate_to = None if args.no_navigate else args.url
307319

308-
print(f"Starting CDP monitoring session...")
309-
print(f"Output directory: {args.output_dir}")
310-
print(f"Target URL: {navigate_to or 'No navigation (attach only)'}")
311-
print(f"Tab ID: {tab_id}")
312-
320+
logger.info(f"Starting CDP monitoring session...")
321+
logger.info(f"Output directory: {args.output_dir}")
322+
logger.info(f"Target URL: {navigate_to or 'No navigation (attach only)'}")
323+
logger.info(f"Tab ID: {tab_id}")
324+
313325
# Create and run CDP session
314326
try:
315327
session = CDPSession(
@@ -323,58 +335,58 @@ def main():
323335
)
324336
session.setup_cdp(navigate_to)
325337
session.run()
326-
338+
327339
except KeyboardInterrupt:
328-
print("\nSession stopped by user")
340+
logger.info("\nSession stopped by user")
329341
except Exception as e:
330-
print(f"Error: {e}")
342+
logger.info(f"Error: {e}")
331343
sys.exit(1)
332344
finally:
333345
# Cleanup: dispose context if we created a tab
334346
if created_tab and context_id:
335347
try:
336-
print("Cleaning up created browser context...")
348+
logger.info("Cleaning up created browser context...")
337349
dispose_context(remote_debugging_address, context_id)
338350
except Exception as e:
339-
print(f"Warning: Could not dispose browser context: {e}")
340-
351+
logger.info(f"Warning: Could not dispose browser context: {e}")
352+
341353
end_time = time.time()
342-
354+
343355
# Get final summary and save it
344356
try:
345357
summary = session.get_monitoring_summary()
346358
save_session_summary(paths, summary, args, start_time, end_time, created_tab, context_id)
347-
359+
348360
# Print organized summary
349-
print("\n" + "="*60)
350-
print("SESSION SUMMARY")
351-
print("="*60)
352-
print(f"Duration: {end_time - start_time:.1f} seconds")
353-
print(f"Tab created: {'Yes' if created_tab else 'No'}")
361+
logger.info("\n" + "="*60)
362+
logger.info("SESSION SUMMARY")
363+
logger.info("="*60)
364+
logger.info(f"Duration: {end_time - start_time:.1f} seconds")
365+
logger.info(f"Tab created: {'Yes' if created_tab else 'No'}")
354366
if created_tab and context_id:
355-
print(f"Browser context: {context_id}")
356-
print(f"Network requests tracked: {summary['network']['requests_tracked']}")
357-
print(f"Cookies tracked: {summary['storage']['cookies_count']}")
358-
print(f"LocalStorage origins: {len(summary['storage']['local_storage_origins'])}")
359-
print(f"SessionStorage origins: {len(summary['storage']['session_storage_origins'])}")
360-
print("OUTPUT STRUCTURE:")
361-
print(f"├── session_summary.json")
362-
print(f"├── network/")
363-
print(f"│ ├── consolidated_transactions.json")
364-
print(f"│ ├── network.har")
365-
print(f"│ └── transactions/")
366-
print(f"│ └── [timestamp_url_id]/")
367-
print(f"│ ├── request.json")
368-
print(f"│ ├── response.json")
369-
print(f"│ └── response_body.[ext]")
370-
print(f"└── storage/")
371-
print(f" └── events.jsonl")
372-
print()
373-
print(f"Session complete! Check {args.output_dir} for all outputs.")
374-
367+
logger.info(f"Browser context: {context_id}")
368+
logger.info(f"Network requests tracked: {summary['network']['requests_tracked']}")
369+
logger.info(f"Cookies tracked: {summary['storage']['cookies_count']}")
370+
logger.info(f"LocalStorage origins: {len(summary['storage']['local_storage_origins'])}")
371+
logger.info(f"SessionStorage origins: {len(summary['storage']['session_storage_origins'])}")
372+
logger.info("OUTPUT STRUCTURE:")
373+
logger.info(f"├── session_summary.json")
374+
logger.info(f"├── network/")
375+
logger.info(f"│ ├── consolidated_transactions.json")
376+
logger.info(f"│ ├── network.har")
377+
logger.info(f"│ └── transactions/")
378+
logger.info(f"│ └── [timestamp_url_id]/")
379+
logger.info(f"│ ├── request.json")
380+
logger.info(f"│ ├── response.json")
381+
logger.info(f"│ └── response_body.[ext]")
382+
logger.info(f"└── storage/")
383+
logger.info(f" └── events.jsonl")
384+
logger.info()
385+
logger.info(f"Session complete! Check {args.output_dir} for all outputs.")
386+
375387
except Exception as e:
376-
print(f"Warning: Could not generate summary: {e}")
388+
logger.info("Warning: Could not generate summary: %s", e)
377389

378390

379391
if __name__ == "__main__":
380-
main()
392+
main()

scripts/discover_routines.py

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
import logging
77
import os
88

9-
from dotenv import load_dotenv
109
from openai import OpenAI
1110

11+
from src.config import Config
12+
from src.utils.exceptions import ApiKeyNotFoundError
1213
from src.routine_discovery.agent import RoutineDiscoveryAgent
1314
from src.routine_discovery.context_manager import ContextManager
1415

@@ -17,32 +18,29 @@
1718

1819

1920
def main() -> None:
20-
2121
# parse arguments
2222
parser = ArgumentParser(description="Discover routines from the network transactions.")
2323
parser.add_argument("--task", type=str, required=True, help="The description of the task to discover routines for.")
2424
parser.add_argument("--cdp-captures-dir", type=str, default="./cdp_captures", help="The directory containing the CDP captures.")
2525
parser.add_argument("--output-dir", type=str, default="./routine_discovery_output", help="The directory to save the output to.")
2626
parser.add_argument("--llm-model", type=str, default="gpt-5", help="The LLM model to use.")
2727
args = parser.parse_args()
28-
29-
# load environment variables
30-
load_dotenv()
31-
28+
3229
# ensure OpenAI API key is set
33-
if os.getenv("OPENAI_API_KEY") is None:
34-
raise ValueError("OPENAI_API_KEY is not set")
35-
30+
if Config.OPENAI_API_KEY is None:
31+
logger.error("OPENAI_API_KEY is not set")
32+
raise ApiKeyNotFoundError("OPENAI_API_KEY is not set")
33+
3634
logger.info(f"\n{'-' * 100}")
37-
logger.info(f"Starting routine discovery for task:\n{args.task}")
35+
logger.info("Starting routine discovery for task:\n%s", args.task)
3836
logger.info(f"{'-' * 100}\n")
39-
37+
4038
# initialize OpenAI client
41-
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
42-
39+
openai_client = OpenAI(api_key=Config.OPENAI_API_KEY)
40+
4341
# create the output directory
4442
os.makedirs(args.output_dir, exist_ok=True)
45-
43+
4644
# initialize context manager
4745
context_manager = ContextManager(
4846
client=openai_client,
@@ -51,13 +49,12 @@ def main() -> None:
5149
consolidated_transactions_path=os.path.join(args.cdp_captures_dir, "network/consolidated_transactions.json"),
5250
storage_jsonl_path=os.path.join(args.cdp_captures_dir, "storage/events.jsonl")
5351
)
54-
55-
logger.info(f"Context manager initialized.")
56-
52+
logger.info("Context manager initialized.")
53+
5754
# make the vectorstore
5855
context_manager.make_vectorstore()
59-
logger.info(f"Vectorstore created: {context_manager.vectorstore_id}")
60-
56+
logger.info("Vectorstore created: %s", context_manager.vectorstore_id)
57+
6158
# initialize routine discovery agent
6259
routine_discovery_agent = RoutineDiscoveryAgent(
6360
client=openai_client,
@@ -66,15 +63,15 @@ def main() -> None:
6663
llm_model=args.llm_model,
6764
output_dir=args.output_dir,
6865
)
69-
logger.info(f"Routine discovery agent initialized.")
70-
66+
logger.info("Routine discovery agent initialized.")
67+
7168
logger.info(f"\n{'-' * 100}")
72-
logger.info(f"Running routine discovery agent.")
69+
logger.info("Running routine discovery agent.")
7370
logger.info(f"{'-' * 100}\n")
74-
71+
7572
# run the routine discovery agent
7673
routine_discovery_agent.run()
77-
logger.info(f"Routine discovery agent run complete")
74+
logger.info("Routine discovery agent run complete.")
7875

7976

8077
if __name__ == "__main__":

scripts/execute_routine.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,15 @@
1414
--parameters-dict "{'origin': 'boston', 'destination': 'new york', 'departureDate': '2026-03-22'}"
1515
"""
1616

17-
import json
1817
import argparse
18+
import json
19+
import logging
20+
1921
from src.cdp.routine_execution import execute_routine
2022
from src.data_models.production_routine import Routine
2123

24+
logging.basicConfig(level=logging.INFO)
25+
logger = logging.getLogger(__name__)
2226

2327

2428
def main(routine_path: str, parameters_path: str | None = None, parameters_dict: dict | None = None):
@@ -49,10 +53,10 @@ def main(routine_path: str, parameters_path: str | None = None, parameters_dict:
4953
close_tab_when_done=False,
5054
incognito=True,
5155
)
52-
print(f"Result: {result}")
56+
logger.info(f"Result: {result}")
5357

5458
except Exception as e:
55-
print("Error executing routine: %s", e)
59+
logger.error("Error executing routine: %s", e)
5660

5761

5862
if __name__ == "__main__":

src/cdp/cdp_session.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,26 @@
1-
#!/usr/bin/env python3
21
"""
2+
src/cdp/cdp_session.py
3+
34
CDP Session management for web scraping with Chrome DevTools Protocol.
45
"""
56

67
import json
8+
import logging
79
import websocket
810
import threading
911
import time
12+
1013
from src.cdp.network_monitor import NetworkMonitor
1114
from src.cdp.storage_monitor import StorageMonitor
1215

16+
logging.basicConfig(level=logging.INFO)
17+
logger = logging.getLogger(__name__)
18+
1319

1420
class CDPSession:
15-
"""Manages CDP WebSocket connection and coordinates monitoring components."""
21+
"""
22+
"Manages CDP WebSocket connection and coordinates monitoring components.
23+
"""
1624

1725
def __init__(
1826
self,
@@ -91,7 +99,7 @@ def setup_cdp(self, navigate_to=None):
9199

92100
# Clear cookies if requested
93101
if self.clear_cookies:
94-
print("Clearing all browser cookies...")
102+
logger.info("Clearing all browser cookies...")
95103
self.send("Network.clearBrowserCookies")
96104

97105
# Also clear cookie store
@@ -102,7 +110,7 @@ def setup_cdp(self, navigate_to=None):
102110

103111
# Clear storage if requested
104112
if self.clear_storage:
105-
print("Clearing localStorage and sessionStorage...")
113+
logger.info("Clearing localStorage and sessionStorage...")
106114
try:
107115
# Clear all storage for all origins
108116
self.send("Storage.clearDataForOrigin", {
@@ -119,7 +127,7 @@ def setup_cdp(self, navigate_to=None):
119127
"includeCommandLineAPI": True
120128
})
121129
except:
122-
print("Warning: Could not clear storage automatically")
130+
logger.info("Warning: Could not clear storage automatically")
123131

124132
# Setup monitoring components
125133
self.network_monitor.setup_network_monitoring(self)
@@ -179,14 +187,14 @@ def _handle_command_reply(self, msg):
179187

180188
def run(self):
181189
"""Main message processing loop."""
182-
print("Blocking trackers & capturing network/storage… Press Ctrl+C to stop.")
190+
logger.info("Blocking trackers & capturing network/storage… Press Ctrl+C to stop.")
183191

184192
try:
185193
while True:
186194
msg = json.loads(self.ws.recv())
187195
self.handle_message(msg)
188196
except KeyboardInterrupt:
189-
print("\nStopped.")
197+
logger.info("\nStopped.")
190198
# Final cookie sync using native CDP (no delay needed)
191199
self.storage_monitor.monitor_cookie_changes(self)
192200

src/cdp/routine_execution.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -701,9 +701,9 @@ def recv_until(predicate, deadline):
701701
result = None
702702
current_url = None
703703

704-
print(f"Executing routine with {len(routine.operations)} operations")
704+
logger.info(f"Executing routine with {len(routine.operations)} operations")
705705
for i, operation in enumerate(routine.operations):
706-
print(
706+
logger.info(
707707
f"Executing operation {i+1}/{len(routine.operations)}: {type(operation).__name__}"
708708
)
709709
if isinstance(operation, RoutineNavigateOperation):
@@ -787,4 +787,4 @@ def recv_until(predicate, deadline):
787787
try:
788788
ws.close()
789789
except Exception:
790-
pass
790+
pass

0 commit comments

Comments
 (0)