Skip to content

Commit 968cc06

Browse files
authored
Merge pull request #29 from VectorlyApp/fix_param_usage_regex
Fix param usage regex bug
2 parents 11cb523 + 0217c80 commit 968cc06

30 files changed

+1618
-43
lines changed

.github/workflows/tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,5 @@ jobs:
4141
- name: Lint
4242
run: uv run pylint $(git ls-files '*.py')
4343

44-
#- name: Run tests
45-
# run: uv run pytest tests/ -v
44+
- name: Run tests
45+
run: uv run pytest tests/ -v

README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,13 @@ cd web-hacker
159159
uv venv --prompt web-hacker
160160
source .venv/bin/activate # Windows: .venv\\Scripts\\activate
161161

162-
# 3) Install in editable mode via uv (pip-compatible interface)
162+
# 3) Install exactly what lockfile says
163+
uv sync
164+
165+
# 4) Install in editable mode via uv (pip-compatible interface)
163166
uv pip install -e .
164167

165-
# 4) Configure environment
168+
# 5) Configure environment
166169
cp .env.example .env # then edit values
167170
# or set directly
168171
export OPENAI_API_KEY="sk-..."
@@ -304,7 +307,7 @@ Use the **routine-discovery pipeline** to analyze captured data and synthesize a
304307
**Linux/macOS (bash):**
305308
```bash
306309
python scripts/discover_routines.py \
307-
--task "recover the api endpoints for searching for trains and their prices" \
310+
--task "Recover API endpoints for searching for trains and their prices" \
308311
--cdp-captures-dir ./cdp_captures \
309312
--output-dir ./routine_discovery_output \
310313
--llm-model gpt-5

scripts/browser_monitor.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212
import shutil
1313
import sys
1414

15+
from src.config import Config
1516
from src.cdp.cdp_session import CDPSession
1617
from src.data_models.network import ResourceType
1718
from src.cdp.tab_managements import cdp_new_tab, dispose_context
1819

19-
logging.basicConfig(level=logging.INFO)
20+
logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
2021
logger = logging.getLogger(__name__)
2122

2223
# ---- Configuration ----
@@ -381,7 +382,7 @@ def main():
381382
logger.info(f"│ └── response_body.[ext]")
382383
logger.info(f"└── storage/")
383384
logger.info(f" └── events.jsonl")
384-
logger.info()
385+
logger.info("\n")
385386
logger.info(f"Session complete! Check {args.output_dir} for all outputs.")
386387

387388
except Exception as e:

scripts/discover_routines.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
"""
2+
src/scripts/discover_routines.py
3+
24
Script for discovering routines from the network transactions.
35
"""
46

@@ -13,7 +15,7 @@
1315
from src.routine_discovery.agent import RoutineDiscoveryAgent
1416
from src.routine_discovery.context_manager import ContextManager
1517

16-
logging.basicConfig(level=logging.INFO)
18+
logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
1719
logger = logging.getLogger(__name__)
1820

1921

scripts/execute_routine.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@
1818
import json
1919
import logging
2020

21+
from src.config import Config
2122
from src.cdp.routine_execution import execute_routine
2223
from src.data_models.production_routine import Routine
2324

24-
logging.basicConfig(level=logging.INFO)
25+
logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
2526
logger = logging.getLogger(__name__)
2627

2728

src/cdp/cdp_session.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@
1010
import threading
1111
import time
1212

13+
from src.config import Config
1314
from src.cdp.network_monitor import NetworkMonitor
1415
from src.cdp.storage_monitor import StorageMonitor
1516

16-
logging.basicConfig(level=logging.INFO)
17+
logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
1718
logger = logging.getLogger(__name__)
1819

1920

src/cdp/network_monitor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from fnmatch import fnmatch
2929
from typing import Any
3030

31+
from src.config import Config
3132
from src.utils.cdp_utils import (
3233
build_pair_dir,
3334
get_set_cookie_values,
@@ -37,7 +38,7 @@
3738
from src.data_models.network import Stage
3839

3940

40-
logging.basicConfig(level=logging.INFO)
41+
logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
4142
logger = logging.getLogger(__name__)
4243

4344

src/cdp/routine_execution.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""
2+
src/cdp/routine_execution.py
3+
4+
Execute a routine using Chrome DevTools Protocol.
5+
"""
6+
17
import json
28
import logging
39
import random
@@ -9,6 +15,7 @@
915
import requests
1016
import websocket
1117

18+
from src.config import Config
1219
from src.data_models.production_routine import (
1320
Routine,
1421
Endpoint,
@@ -18,7 +25,7 @@
1825
RoutineSleepOperation,
1926
)
2027

21-
logging.basicConfig(level=logging.DEBUG)
28+
logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT)
2229
logger = logging.getLogger(__name__)
2330

2431

src/config.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,32 @@
44
Centralized environment variable configuration.
55
"""
66

7+
import logging
78
import os
89
from typing import Any
910

1011
from dotenv import load_dotenv
1112

1213
load_dotenv()
1314

15+
# configure httpx logger to suppress verbose HTTP logs
16+
logging.getLogger("httpx").setLevel(logging.WARNING)
17+
1418

1519
class Config():
1620
"""
1721
Centralized configuration for environment variables.
1822
"""
1923

24+
# logging configuration
25+
LOG_LEVEL: int = logging.getLevelNamesMapping().get(
26+
os.getenv("LOG_LEVEL", "INFO").upper(),
27+
logging.INFO
28+
)
29+
LOG_DATE_FORMAT: str = os.getenv("LOG_DATE_FORMAT", "%Y-%m-%d %H:%M:%S")
30+
LOG_FORMAT: str = os.getenv("LOG_FORMAT", "[%(asctime)s] %(levelname)s:%(name)s:%(message)s")
31+
32+
# API keys
2033
OPENAI_API_KEY: str | None = os.getenv("OPENAI_API_KEY")
2134

2235
@classmethod

src/data_models/production_routine.py

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""
2+
src/data_models/production_routine.py
3+
4+
Production routine data models.
5+
"""
6+
17
import re
28
import time
39
import uuid
@@ -84,6 +90,7 @@ class BuiltinParameter(BaseModel):
8490
description="Function to generate the builtin parameter value"
8591
)
8692

93+
8794
BUILTIN_PARAMETERS = [
8895
BuiltinParameter(
8996
name="uuid",
@@ -101,7 +108,6 @@ class BuiltinParameter(BaseModel):
101108
class Parameter(BaseModel):
102109
"""
103110
Parameter model with comprehensive validation and type information.
104-
105111
Fields:
106112
name (str): Parameter name (must be valid Python identifier)
107113
type (ParameterType): Parameter data type
@@ -117,13 +123,12 @@ class Parameter(BaseModel):
117123
enum_values (list[str] | None): Allowed values for enum type
118124
format (str | None): Format specification (e.g., 'YYYY-MM-DD')
119125
"""
120-
126+
121127
# reserved prefixes: names that cannot be used at the beginning of a parameter name
122128
RESERVED_PREFIXES: ClassVar[list[str]] = [
123129
"sessionStorage", "localStorage", "cookie", "meta", "uuid", "epoch_milliseconds"
124130
]
125-
126-
131+
127132
name: str = Field(..., description="Parameter name (must be valid Python identifier)")
128133
type: ParameterType = Field(
129134
default=ParameterType.STRING,
@@ -179,24 +184,23 @@ def validate_name(cls, v):
179184
"""Ensure parameter name is a valid Python identifier and not reserved."""
180185
if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', v):
181186
raise ValueError(f"Parameter name '{v}' is not a valid Python identifier")
182-
187+
183188
# Check for reserved prefixes
184189
for prefix in cls.RESERVED_PREFIXES:
185190
if v.startswith(prefix):
186191
raise ValueError(
187192
f"Parameter name '{v}' cannot start with '{prefix}'. "
188193
f"Reserved prefixes: {cls.RESERVED_PREFIXES}"
189194
)
190-
195+
191196
return v
192197

193-
@field_validator('type')
194-
@classmethod
195-
def validate_type_consistency(cls, v, info):
198+
@model_validator(mode='after')
199+
def validate_type_consistency(self) -> 'Parameter':
196200
"""Validate type-specific constraints are consistent."""
197-
if v == ParameterType.ENUM and not info.data.get('enum_values'):
201+
if self.type == ParameterType.ENUM and not self.enum_values:
198202
raise ValueError("enum_values must be provided for enum type")
199-
return v
203+
return self
200204

201205
@field_validator('default')
202206
@classmethod
@@ -226,7 +230,6 @@ def validate_default_type(cls, v, info):
226230
else:
227231
raise ValueError(f"Default value {v} is not a valid boolean value")
228232
raise ValueError(f"Default value {v} cannot be converted to boolean")
229-
230233
return v
231234

232235
@field_validator('examples')
@@ -267,7 +270,6 @@ def validate_examples_type(cls, v, info):
267270
return validated_examples
268271

269272

270-
271273
class HTTPMethod(StrEnum):
272274
"""
273275
Supported HTTP methods for API endpoints.
@@ -319,7 +321,6 @@ class RoutineOperationTypes(StrEnum):
319321
RETURN = "return"
320322

321323

322-
323324
class RoutineOperation(BaseModel):
324325
"""
325326
Base class for routine operations.
@@ -441,31 +442,47 @@ def validate_parameter_usage(self) -> 'Routine':
441442
and no undefined parameters are used.
442443
Raises ValueError if unused parameters are found or undefined parameters are used.
443444
"""
445+
# Check 0: Ensure name and description fields don't contain parameter placeholders
446+
# These are metadata fields and should not have interpolation patterns
447+
param_pattern = r'\{\{([^}]*)\}\}'
448+
# check in Routine.name
449+
name_matches = re.findall(param_pattern, self.name)
450+
if name_matches:
451+
raise ValueError(
452+
f"Parameter placeholders found in routine name '{self.name}': {name_matches}. "
453+
"The 'name' field is a metadata field and should not contain parameter placeholders like {{param}}."
454+
)
455+
# check in Routine.description
456+
description_matches = re.findall(param_pattern, self.description)
457+
if description_matches:
458+
raise ValueError(
459+
f"Parameter placeholders found in routine description: {description_matches}. "
460+
"The 'description' field is a metadata field and should not contain parameter placeholders like {{param}}."
461+
)
462+
444463
# list of builtin parameter names
445464
builtin_parameter_names = [builtin_parameter.name for builtin_parameter in BUILTIN_PARAMETERS]
446-
465+
447466
# Convert the entire routine to JSON string for searching
448467
routine_json = self.model_dump_json()
449468

450469
# Extract all parameter names
451470
defined_parameters = {param.name for param in self.parameters}
452471

453-
# Find all parameter usages in the JSON: *"{{*}}"*
454-
# Match quoted placeholders: "{{param}}" or \"{{param}}\" (escaped quotes in JSON strings)
455-
# \"{{param}}\" in JSON string means "{{param}}" in actual value
456-
# Pattern REQUIRES quotes (either " or \") immediately before {{ and after }}
457-
param_pattern = r'(?:"|\\")\{\{([^}"]*)\}\}(?:"|\\")'
472+
# Find all parameter usages in the JSON: {{*}}
473+
# Match placeholders anywhere: {{param}}
474+
# This matches parameters whether they're standalone quoted values or embedded in strings
475+
param_pattern = r'\{\{([^}]*)\}\}'
458476
matches = re.findall(param_pattern, routine_json)
459-
477+
460478
# track used parameters
461479
used_parameters = set()
462-
480+
463481
# iterate over all parameter usages
464482
for match in matches:
465-
466483
# clean the match (already extracted the content between braces)
467484
match = match.strip()
468-
485+
469486
# if the parameter name contains a colon, it is a storage parameter
470487
if ":" in match:
471488
kind, path = [p.strip() for p in match.split(":", 1)]
@@ -484,15 +501,15 @@ def validate_parameter_usage(self) -> 'Routine':
484501
if unused_parameters:
485502
raise ValueError(
486503
f"Unused parameters found in routine '{self.name}': {list(unused_parameters)}. "
487-
f"All defined parameters must be used somewhere in the routine operations."
504+
"All defined parameters must be used somewhere in the routine operations."
488505
)
489506

490507
# Check 2: No undefined parameters should be used
491508
undefined_parameters = used_parameters - defined_parameters
492509
if undefined_parameters:
493510
raise ValueError(
494511
f"Undefined parameters found in routine '{self.name}': {list(undefined_parameters)}. "
495-
f"All parameters used in the routine must be defined in parameters."
512+
"All parameters used in the routine must be defined in parameters."
496513
)
497514

498515
return self

0 commit comments

Comments
 (0)