Skip to content

Commit 5378f6b

Browse files
committed
Merge branch 'main' of github.com:VectorlyApp/web-hacker into versioning
2 parents 80b0afe + c173e34 commit 5378f6b

File tree

3 files changed

+61
-237
lines changed

3 files changed

+61
-237
lines changed

quickstart.py

Lines changed: 4 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
import subprocess
1212
import shutil
1313
import json
14-
import atexit
15-
import signal
1614
from pathlib import Path
1715
from typing import Optional
1816
import requests
@@ -29,9 +27,6 @@
2927
CDP_CAPTURES_DIR = Path("./cdp_captures")
3028
DISCOVERY_OUTPUT_DIR = Path("./routine_discovery_output")
3129

32-
# Global variable to track Chrome process for cleanup
33-
_chrome_process: Optional[subprocess.Popen] = None
34-
3530

3631
def print_colored(text: str, color: str = NC) -> None:
3732
"""Print colored text."""
@@ -292,55 +287,6 @@ def launch_chrome(port: int) -> Optional[subprocess.Popen]:
292287
return None
293288

294289

295-
def cleanup_chrome(process: Optional[subprocess.Popen], port: int) -> None:
296-
"""Clean up Chrome process if it was launched by this script."""
297-
if process is None:
298-
return
299-
300-
try:
301-
# Check if Chrome is still running on the port
302-
if not check_chrome_running(port):
303-
return
304-
305-
print()
306-
print_colored("🧹 Cleaning up Chrome...", YELLOW)
307-
308-
# Try graceful termination first
309-
try:
310-
if platform.system() == "Windows":
311-
# On Windows with CREATE_NEW_PROCESS_GROUP, we need to kill the process group
312-
process.terminate()
313-
time.sleep(1)
314-
if process.poll() is None:
315-
process.kill()
316-
else:
317-
process.terminate()
318-
time.sleep(1)
319-
if process.poll() is None:
320-
process.kill()
321-
322-
# Wait a bit for Chrome to close
323-
process.wait(timeout=3)
324-
print_colored("✅ Chrome closed successfully", GREEN)
325-
except subprocess.TimeoutExpired:
326-
# Force kill if it didn't terminate
327-
try:
328-
process.kill()
329-
process.wait(timeout=2)
330-
print_colored("✅ Chrome force-closed", GREEN)
331-
except Exception:
332-
pass
333-
except Exception as e:
334-
# Process might already be dead
335-
if process.poll() is not None:
336-
print_colored("✅ Chrome already closed", GREEN)
337-
else:
338-
print_colored(f"⚠️ Error closing Chrome: {e}", YELLOW)
339-
except Exception:
340-
# Silently fail during cleanup
341-
pass
342-
343-
344290
def run_command(cmd: list[str], description: str) -> bool:
345291
"""Run a command and return True if successful."""
346292
try:
@@ -360,8 +306,6 @@ def run_command(cmd: list[str], description: str) -> bool:
360306

361307
def main():
362308
"""Main workflow."""
363-
global _chrome_process
364-
365309
# Use local variables that can be updated
366310
cdp_captures_dir = CDP_CAPTURES_DIR
367311
discovery_output_dir = DISCOVERY_OUTPUT_DIR
@@ -370,7 +314,7 @@ def main():
370314
print_colored("║ Web Hacker - Quickstart Workflow ║", BLUE)
371315
print_colored("╚════════════════════════════════════════════════════════════╝", BLUE)
372316
print()
373-
317+
374318
# Pipeline overview
375319
print_colored("Web-hacker Pipeline:", BLUE)
376320
print()
@@ -390,19 +334,6 @@ def main():
390334
print_colored(f"✅ Chrome is already running in debug mode on port {PORT}", GREEN)
391335
else:
392336
chrome_process = launch_chrome(PORT)
393-
# Store globally for cleanup
394-
_chrome_process = chrome_process
395-
# Register cleanup function if we launched Chrome
396-
if chrome_process is not None:
397-
atexit.register(cleanup_chrome, chrome_process, PORT)
398-
# Also register signal handlers for graceful shutdown
399-
def signal_handler(signum, frame):
400-
cleanup_chrome(chrome_process, PORT)
401-
sys.exit(0)
402-
signal.signal(signal.SIGINT, signal_handler)
403-
# SIGTERM may not be available on all platforms
404-
if hasattr(signal, 'SIGTERM'):
405-
signal.signal(signal.SIGTERM, signal_handler)
406337

407338
print()
408339

@@ -456,14 +387,6 @@ def signal_handler(signum, frame):
456387
run_command(monitor_cmd, "monitoring")
457388
print()
458389

459-
# Close Chrome before Step 3 if we launched it
460-
if chrome_process is not None:
461-
cleanup_chrome(chrome_process, PORT)
462-
atexit.unregister(cleanup_chrome)
463-
chrome_process = None
464-
_chrome_process = None
465-
print()
466-
467390
# Step 3: Discover
468391
print_colored("Step 3: Discovering routine from captured data...", GREEN)
469392

@@ -475,9 +398,11 @@ def signal_handler(signum, frame):
475398
return
476399

477400
skip = input(" Skip discovery step? (y/n): ").strip().lower()
401+
478402
if skip == 'y':
479403
# Use default directory when skipping - user can specify routine path in step 4 if needed
480404
discovery_output_dir = DISCOVERY_OUTPUT_DIR
405+
481406
print_colored("⏭️ Skipping discovery step.", GREEN)
482407
print_colored(f" Using default discovery output directory: {discovery_output_dir.resolve()}", GREEN)
483408
print()
@@ -489,8 +414,6 @@ def signal_handler(signum, frame):
489414
if new_output_dir:
490415
discovery_output_dir = Path(new_output_dir)
491416
print_colored(f"✅ Using discovery output directory: {discovery_output_dir}", GREEN)
492-
else:
493-
discovery_output_dir = DISCOVERY_OUTPUT_DIR
494417

495418
# Check if routine already exists
496419
routine_file = discovery_output_dir / "routine.json"
@@ -621,8 +544,4 @@ def signal_handler(signum, frame):
621544
except KeyboardInterrupt:
622545
print()
623546
print_colored("⚠️ Interrupted by user.", YELLOW)
624-
# Clean up Chrome if we launched it
625-
if _chrome_process is not None:
626-
cleanup_chrome(_chrome_process, PORT)
627-
sys.exit(0)
628-
547+
sys.exit(0)

web_hacker/cdp/cdp_session.py

Lines changed: 25 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,6 @@ def __init__(
4242
self.clear_cookies = clear_cookies
4343
self.clear_storage = clear_storage
4444

45-
# Connection state tracking
46-
self._connection_lost = False
47-
self._connection_lost_lock = threading.Lock()
48-
4945
# Response tracking for synchronous commands
5046
self.pending_responses = {}
5147
self.response_lock = threading.Lock()
@@ -71,15 +67,8 @@ def __init__(
7167

7268
def send(self, method, params=None):
7369
"""Send CDP command and return sequence ID."""
74-
if self._connection_lost:
75-
raise ConnectionError("WebSocket connection is closed")
7670
self.seq += 1
77-
try:
78-
self.ws.send(json.dumps({"id": self.seq, "method": method, "params": params or {}}))
79-
except (websocket.WebSocketConnectionClosedException, OSError, ConnectionError) as e:
80-
with self._connection_lost_lock:
81-
self._connection_lost = True
82-
raise ConnectionError(f"WebSocket connection lost: {e}")
71+
self.ws.send(json.dumps({"id": self.seq, "method": method, "params": params or {}}))
8372
return self.seq
8473

8574
def send_and_wait(self, method, params=None, timeout=10):
@@ -111,14 +100,6 @@ def send_and_wait(self, method, params=None, timeout=10):
111100

112101
def setup_cdp(self, navigate_to=None):
113102
"""Setup CDP domains and configuration."""
114-
# Enable Target domain to receive target lifecycle events (if on browser WebSocket)
115-
# Note: This may not work on tab WebSockets, but that's okay - we'll catch disconnections
116-
try:
117-
self.send("Target.setDiscoverTargets", {"discover": True})
118-
except Exception as e:
119-
# This is expected to fail on tab WebSockets - Target domain is browser-level
120-
logger.debug(f"Could not enable Target domain (expected on tab WebSockets): {e}")
121-
122103
# Enable basic domains
123104
self.send("Page.enable")
124105
self.send("Runtime.enable")
@@ -167,14 +148,6 @@ def setup_cdp(self, navigate_to=None):
167148

168149
def handle_message(self, msg):
169150
"""Handle incoming CDP message by delegating to appropriate monitors."""
170-
# Check for target lifecycle events (tab closure)
171-
method = msg.get("method")
172-
if method == "Target.targetDestroyed":
173-
logger.info("Tab was closed. Connection will be lost. Saving assets...")
174-
with self._connection_lost_lock:
175-
self._connection_lost = True
176-
return
177-
178151
# Try network monitor first
179152
if self.network_monitor.handle_network_message(msg, self):
180153
return
@@ -229,84 +202,44 @@ def _handle_command_reply(self, msg):
229202

230203
return False
231204

232-
def _generate_assets(self):
233-
"""Generate all monitoring assets. Works even if connection is lost."""
234-
try:
235-
# Final cookie sync using native CDP (only if connection is still alive)
236-
if not self._connection_lost:
237-
try:
238-
self.storage_monitor.monitor_cookie_changes(self)
239-
except Exception as e:
240-
logger.debug(f"Could not sync cookies (connection may be lost): {e}")
241-
except Exception as e:
242-
logger.debug(f"Error in cookie sync: {e}")
205+
def run(self):
206+
"""Main message processing loop."""
207+
logger.info("Blocking trackers & capturing network/storage… Press Ctrl+C to stop.")
243208

244-
# Consolidate all transactions into a single JSON file (works with cached data)
245209
try:
210+
while True:
211+
msg = json.loads(self.ws.recv())
212+
self.handle_message(msg)
213+
except KeyboardInterrupt:
214+
logger.info("\nStopped. Saving assets...")
215+
# Final cookie sync using native CDP (no delay needed)
216+
self.storage_monitor.monitor_cookie_changes(self)
217+
218+
# Consolidate all transactions into a single JSON file
246219
consolidated_path = f"{self.output_dir}/consolidated_transactions.json"
247220
self.network_monitor.consolidate_transactions(consolidated_path)
248-
except Exception as e:
249-
logger.warning(f"Could not consolidate transactions: {e}")
250-
251-
# Generate HAR file from consolidated transactions (works with cached data)
252-
try:
221+
222+
# Generate HAR file from consolidated transactions
253223
har_path = f"{self.output_dir}/network.har"
254224
self.network_monitor.generate_har_from_transactions(har_path, "Web Hacker Session")
255-
except Exception as e:
256-
logger.warning(f"Could not generate HAR file: {e}")
257-
258-
# Consolidate all interactions into a single JSON file (works with cached data)
259-
try:
225+
226+
# Consolidate all interactions into a single JSON file
260227
interaction_dir = self.paths.get('interaction_dir', f"{self.output_dir}/interaction")
261228
consolidated_interactions_path = os.path.join(interaction_dir, "consolidated_interactions.json")
262229
self.interaction_monitor.consolidate_interactions(consolidated_interactions_path)
263-
except Exception as e:
264-
logger.warning(f"Could not consolidate interactions: {e}")
265-
266-
def run(self):
267-
"""Main message processing loop."""
268-
logger.info("Blocking trackers & capturing network/storage… Press Ctrl+C to stop.")
269-
270-
try:
271-
while True:
272-
try:
273-
msg = json.loads(self.ws.recv())
274-
self.handle_message(msg)
275-
276-
# Check if connection was lost due to tab closure
277-
if self._connection_lost:
278-
logger.info("Tab closed. Saving assets...")
279-
break
280-
except (websocket.WebSocketConnectionClosedException, OSError, ConnectionError) as e:
281-
# WebSocket connection lost (tab closed, browser closed, etc.)
282-
logger.info(f"Connection lost: {e}. Saving assets...")
283-
with self._connection_lost_lock:
284-
self._connection_lost = True
285-
break
286-
except json.JSONDecodeError as e:
287-
logger.warning(f"Failed to parse message: {e}")
288-
continue
289-
except KeyboardInterrupt:
290-
logger.info("\nStopped by user. Saving assets...")
291230
finally:
292-
# Always generate assets, even if connection is lost
293-
self._generate_assets()
294-
295-
# Close WebSocket if still open
296231
try:
297-
if self.ws and not self._connection_lost:
298-
self.ws.close()
299-
except Exception:
232+
self.ws.close()
233+
except:
300234
pass
301235

302236
def get_monitoring_summary(self):
303237
"""Get summary of all monitoring activities."""
304-
# Trigger final cookie check using native CDP (only if connection is still alive)
305-
if not self._connection_lost:
306-
try:
307-
self.storage_monitor.monitor_cookie_changes(self)
308-
except Exception as e:
309-
logger.debug(f"Could not sync cookies for summary: {e}")
238+
# Trigger final cookie check using native CDP (no delay needed)
239+
try:
240+
self.storage_monitor.monitor_cookie_changes(self)
241+
except:
242+
pass
310243

311244
storage_summary = self.storage_monitor.get_storage_summary()
312245
network_summary = self.network_monitor.get_network_summary()
@@ -316,4 +249,4 @@ def get_monitoring_summary(self):
316249
"network": network_summary,
317250
"storage": storage_summary,
318251
"interaction": interaction_summary,
319-
}
252+
}

0 commit comments

Comments
 (0)