VectorlyApp
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 45 additions & 0 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.pylintrc‎
Lines changed: 28 additions & 0 deletions b/‎.pylintrc‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎.python-version‎
Lines changed: 1 addition & 0 deletions b/‎.python-version‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 45 additions & 17 deletions b/‎README.md‎
Lines changed: 45 additions & 17 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 10 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎scripts/discover_routines.py‎
Lines changed: 20 additions & 13 deletions b/‎scripts/discover_routines.py‎
Lines changed: 20 additions & 13 deletions
diff --git a/‎src/cdp/__init__.py‎ b/‎src/cdp/__init__.py‎
@@ -0,0 +1,45 @@
+# .github/workflows/tests.yml
+
+name: Linter and Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:                     # catches PRs from feature branches → main
+    types: [opened, synchronize, reopened]
+
+permissions:
+  contents: read                    # fetch code
+  id-token: write                   # enable OIDC if we ever need cloud creds
+  # pull-requests: write            # only if you want to post PR comments
+
+jobs:
+  lintAndTest:
+    runs-on: "ubuntu-latest"
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+      - uses: actions/checkout@v4
+      
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
+
+      - name: Cache uv dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/uv
+          key: ${{ runner.os }}-uv-${{ hashFiles('pyproject.toml') }}
+
+      - name: Install dependencies
+        run: uv sync
+
+      - name: Lint
+        run: uv run pylint $(git ls-files '*.py')
+
+      #- name: Run tests
+      #  run: uv run pytest tests/ -v
@@ -211,4 +211,6 @@ __marimo__/
 
 # output directories
 cdp_captures/
+cdp_captures*/
 routine_discovery_output/
+routine_discovery_output*/
@@ -0,0 +1,28 @@
+[MAIN]
+
+# minimum score (out of 10) required to pass
+fail-under=0.00
+
+# files or directories to be skipped
+ignore=*.ipynb
+
+disable=
+    too-many-locals,
+    missing-module-docstring
+
+[FORMAT]
+
+max-line-length=125
+
+[./src/]
+
+disable=
+    too-few-public-methods,
+
+[./tests]
+
+disable=
+    missing-function-docstring,
+    missing-class-docstring,
+    missing-module-docstring,
+    too-few-public-methods,
@@ -0,0 +1 @@
+3.12.3
@@ -120,7 +120,7 @@ Placeholders inside operation fields are resolved at runtime:
 
 Interpolation occurs before an operation executes. For example, a fetch endpoint might be:
 
-```
+```json
 {
   "type": "fetch",
   "endpoint": {
@@ -137,24 +137,26 @@ Interpolation occurs before an operation executes. For example, a fetch endpoint
 
 This substitutes parameter values and injects `auth_token` from cookies. The JSON response is stored under `sessionStorage['result_key']` and can be returned by a final `return` operation using the matching `session_storage_key`.
 
-## Prerequisits
+## Prerequisites
 
-- Python 3.11+
+- Python 3.12+
 - Google Chrome (stable)
-- uv (Python package manager)
+- [uv (Python package manager)](https://github.com/astral-sh/uv)
   - macOS/Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`
   - Windows (PowerShell): `iwr https://astral.sh/uv/install.ps1 -UseBasicParsing | iex`
 - OpenAI API key
 
 ## Set up Your Environment 🔧
 
+### Linux
+
 ```bash
 # 1) Clone and enter the repo
 git clone https://github.com/VectorlyApp/web-hacker.git
 cd web-hacker
 
 # 2) Create & activate virtual environment (uv)
-uv venv .venv
+uv venv --prompt web-hacker
 source .venv/bin/activate   # Windows: .venv\\Scripts\\activate
 
 # 3) Install in editable mode via uv (pip-compatible interface)
@@ -166,6 +168,29 @@ cp .env.example .env  # then edit values
 export OPENAI_API_KEY="sk-..."
 ```
 
+### Windows
+
+```powershell
+# 1) Clone and enter the repo
+git clone https://github.com/VectorlyApp/web-hacker.git
+cd web-hacker
+
+# 2) Install uv (if not already installed)
+iwr https://astral.sh/uv/install.ps1 -UseBasicParsing | iex
+
+# 3) Create & activate virtual environment (uv)
+uv venv --prompt web-hacker
+.venv\Scripts\activate
+
+# 4) Install in editable mode via uv (pip-compatible interface)
+uv pip install -e .
+
+# 5) Configure environment
+copy .env.example .env  # then edit values
+# or set directly
+$env:OPENAI_API_KEY="sk-..."
+```
+
 ## Launch Chrome in Debug Mode 🐞
 
 ### Instructions for MacOS
@@ -242,15 +267,10 @@ Use the CDP browser monitor to block trackers and capture network, storage, and
 **Run this command to start monitoring:**
 
 ```bash
-python scripts/browser_monitor.py \
-  --host 127.0.0.1 \
-  --port 9222 \
-  --output-dir ./cdp_captures \
-  --url about:blank \
-  --incognito
+python scripts/browser_monitor.py --host 127.0.0.1 --port 9222 --output-dir ./cdp_captures --url about:blank --incognito
 ```
 
-The script will open a new tab (starting at `about:blank`). Navigate to your target website, then manually perform the actions you want to automate (e.g., search, login, export report). Keep Chrome focused during this process. Press `Ctrl+C` when done; the script will consolidate transactions and produce a HAR automatically.
+The script will open a new tab (starting at `about:blank`). Navigate to your target website, then manually perform the actions you want to automate (e.g., search, login, export report). Keep Chrome focused during this process. Press `Ctrl+C` and the script will consolidate transactions and produce a HAR automatically.
 
 **Output structure** (under `--output-dir`, default `./cdp_captures`):
 
@@ -265,8 +285,8 @@ cdp_captures/
 │           ├── request.json
 │           ├── response.json
 │           └── response_body.[ext]
-├── storage/
-│   └── events.jsonl
+└── storage/
+    └── events.jsonl
 ```
 
 Tip: Keep Chrome focused while monitoring and perform the target flow (search, checkout, etc.). Press Ctrl+C to stop; the script will consolidate transactions and produce a HTTP Archive (HAR) automatically.
@@ -281,14 +301,21 @@ Use the **routine-discovery pipeline** to analyze captured data and synthesize a
 
 > ⚠️ **Important:** You must specify your own `--task` parameter. The example below is just for demonstration—replace it with a description of what you want to automate.
 
-```
+**Linux/macOS (bash):**
+```bash
 python scripts/discover_routines.py \
   --task "recover the api endpoints for searching for trains and their prices" \
   --cdp-captures-dir ./cdp_captures \
   --output-dir ./routine_discovery_output \
   --llm-model gpt-5
 ```
 
+**Windows (PowerShell):**
+```powershell
+# Simple task (no quotes inside):
+python scripts/discover_routines.py --task "Recover the API endpoints for searching for trains and their prices" --cdp-captures-dir ./cdp_captures --output-dir ./routine_discovery_output --llm-model gpt-5
+```
+
 **Example tasks:**
 - `"recover the api endpoints for searching for trains and their prices"` (shown above)
 - `"discover how to search for flights and get pricing"`
@@ -322,6 +349,7 @@ routine_discovery_output/
 ```json
 "field": "{{paramName}}"
 ```
+
 And `paramName` is a string parameter, manually change it to:
 ```json
 "field": "\"{{paramName}}\""
@@ -331,7 +359,7 @@ This ensures the parameter value is properly quoted as a JSON string when substi
 
 Run the example routine: 
 
-```
+```bash
 # Using a parameters file:
 
 python scripts/execute_routine.py \
@@ -347,7 +375,7 @@ python scripts/execute_routine.py \
 
 Run a discovered routine:
 
-```
+```bash
 python scripts/execute_routine.py \
   --routine-path routine_discovery_output/routine.json \
   --parameters-path routine_discovery_output/test_parameters.json
 
@@ -1,18 +1,25 @@
+# pyproject.toml for web-hacker
+
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [project]
 name = "web-hacker"
 version = "0.1.0"
-description = "Add your description here"
+description = " Reverse engineer any web app!"
 readme = "README.md"
-requires-python = ">=3.11"
+requires-python = ">=3.12.3,<3.13"  # pinning to 3.12.x
 dependencies = [
+    "ipykernel>=6.29.5",
     "openai>=2.6.1",
+    "pydantic>=2.11.4",
+    "pylint>=3.0.0",
+    "pytest>=8.3.5",
     "python-dotenv>=1.2.1",
-    "websocket-client>=1.6.0",
     "requests>=2.31.0",
+    "websockets>=15.0.1",
+    "websocket-client>=1.6.0",
 ]
 
 [tool.hatch.build.targets.wheel]
 
@@ -1,12 +1,20 @@
 """
 Script for discovering routines from the network transactions.
 """
+
 from argparse import ArgumentParser
+import logging
+import os
+
+from dotenv import load_dotenv
 from openai import OpenAI
+
 from src.routine_discovery.agent import RoutineDiscoveryAgent
 from src.routine_discovery.context_manager import ContextManager
-from dotenv import load_dotenv
-import os
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
 
 def main() -> None:
 
@@ -25,10 +33,9 @@ def main() -> None:
     if os.getenv("OPENAI_API_KEY") is None:
         raise ValueError("OPENAI_API_KEY is not set")
 
-    
-    print(f"\n{'-' * 100}")
-    print(f"Starting routine discovery for task:\n{args.task}")
-    print(f"{'-' * 100}\n")
+    logger.info(f"\n{'-' * 100}")
+    logger.info(f"Starting routine discovery for task:\n{args.task}")
+    logger.info(f"{'-' * 100}\n")
 
     # initialize OpenAI client
     openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
@@ -45,11 +52,11 @@ def main() -> None:
         storage_jsonl_path=os.path.join(args.cdp_captures_dir, "storage/events.jsonl")
     )
 
-    print(f"Context manager initialized.")
+    logger.info(f"Context manager initialized.")
 
     # make the vectorstore
     context_manager.make_vectorstore()
-    print(f"Vectorstore created: {context_manager.vectorstore_id}")
+    logger.info(f"Vectorstore created: {context_manager.vectorstore_id}")
 
     # initialize routine discovery agent
     routine_discovery_agent = RoutineDiscoveryAgent(
@@ -59,15 +66,15 @@ def main() -> None:
         llm_model=args.llm_model,
         output_dir=args.output_dir,
     )
-    print(f"Routine discovery agent initialized.")
+    logger.info(f"Routine discovery agent initialized.")
 
-    print(f"\n{'-' * 100}")
-    print(f"Running routine discovery agent.")
-    print(f"{'-' * 100}\n")
+    logger.info(f"\n{'-' * 100}")
+    logger.info(f"Running routine discovery agent.")
+    logger.info(f"{'-' * 100}\n")
 
     # run the routine discovery agent
     routine_discovery_agent.run()
-    print(f"Routine discovery agent run complete")
+    logger.info(f"Routine discovery agent run complete")
 
 
 if __name__ == "__main__":