Skip to content

Commit e52b47e

Browse files
committed
add smart ingest from repo
1 parent 0f34733 commit e52b47e

File tree

6 files changed

+567
-10
lines changed

6 files changed

+567
-10
lines changed

app/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from fastapi.staticfiles import StaticFiles
44
from fastapi.templating import Jinja2Templates
55
from pathlib import Path
6-
from app.routes import install, actions, smart_ingest_route
6+
from app.routes import install, actions, smart_ingest_route, recommend
77
from app.services.actions_loader import actions_loader
88
from api_analytics.fastapi import Analytics
99
from fastapi_mcp import FastApiMCP
@@ -29,6 +29,7 @@
2929
app.include_router(install.router)
3030
app.include_router(actions.router)
3131
app.include_router(smart_ingest_route.router)
32+
app.include_router(recommend.router)
3233

3334
@app.get("/favicon.ico", operation_id="get_favicon")
3435
async def favicon():

app/routes/recommend.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
Route for tool recommendations based on repository analysis.
3+
"""
4+
5+
from fastapi import APIRouter, HTTPException
6+
from pydantic import BaseModel
7+
from typing import Optional, Dict, List
8+
from app.services.smart_ingest import use_gitingest
9+
from app.services.recommend_tools import (
10+
build_tools_catalog,
11+
get_catalog_version,
12+
format_catalog_for_prompt,
13+
call_llm_for_reco,
14+
parse_and_validate
15+
)
16+
17+
router = APIRouter(prefix="/api", tags=["recommend"])
18+
19+
20+
class RecommendRequest(BaseModel):
21+
repo_url: Optional[str] = None
22+
context: Optional[str] = None
23+
user_prompt: Optional[str] = "Pick minimal useful tools for this repo"
24+
25+
26+
class PreselectionData(BaseModel):
27+
rules: List[str]
28+
agents: List[str]
29+
mcps: List[str]
30+
31+
32+
class RecommendResponse(BaseModel):
33+
success: bool
34+
preselect: PreselectionData
35+
rationales: Optional[Dict[str, str]] = None
36+
context_size: int
37+
catalog_version: str
38+
raw: Optional[str] = None # For debugging
39+
40+
41+
@router.post("/recommend", response_model=RecommendResponse)
42+
async def recommend_tools(request: RecommendRequest):
43+
"""
44+
Analyze a repository and recommend minimal useful tools.
45+
46+
Accepts either repo_url (for ingestion) or context (pre-ingested).
47+
Returns a minimal selection of rules, agents, and MCPs.
48+
"""
49+
try:
50+
# Validate input - need at least one
51+
if not request.repo_url and not request.context:
52+
raise HTTPException(
53+
status_code=400,
54+
detail="Either repo_url or context must be provided"
55+
)
56+
57+
# Step 1: Get context (ingest if needed)
58+
if request.context:
59+
context = request.context
60+
else:
61+
# Ingest the repository
62+
context = await use_gitingest(request.repo_url)
63+
64+
context_size = len(context)
65+
66+
# Step 2: Build catalog
67+
catalog = build_tools_catalog()
68+
catalog_version = get_catalog_version(catalog)
69+
70+
# Step 3: Format catalog for LLM
71+
catalog_text = format_catalog_for_prompt(catalog)
72+
73+
# Step 4: Call LLM
74+
llm_raw = call_llm_for_reco(
75+
context=context,
76+
catalog_text=catalog_text,
77+
user_prompt=request.user_prompt or ""
78+
)
79+
80+
# Step 5: Parse and validate
81+
preselect, rationales = parse_and_validate(llm_raw, catalog)
82+
83+
return RecommendResponse(
84+
success=True,
85+
preselect=PreselectionData(**preselect),
86+
rationales=rationales,
87+
context_size=context_size,
88+
catalog_version=catalog_version,
89+
raw=llm_raw # Include for debugging
90+
)
91+
92+
except HTTPException:
93+
raise
94+
except Exception as e:
95+
raise HTTPException(status_code=500, detail=str(e))

app/services/recommend_tools.py

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
"""
2+
Service for recommending tools based on repository context.
3+
"""
4+
5+
import json
6+
import hashlib
7+
from typing import Dict, List, Tuple, Optional, Any
8+
from app.services.actions_loader import actions_loader
9+
import httpx
10+
from dotenv import load_dotenv
11+
import os
12+
13+
# Load environment variables
14+
load_dotenv()
15+
16+
17+
def build_tools_catalog() -> Dict[str, List[Dict[str, Any]]]:
18+
"""
19+
Build a minimal catalog of available tools from actions_loader.
20+
21+
Returns:
22+
Dictionary with three lists: agents, rules, mcps
23+
Each item has: slug, display_name, tags (optional), type (for rules)
24+
"""
25+
catalog = {
26+
"agents": [],
27+
"rules": [],
28+
"mcps": []
29+
}
30+
31+
# Get agents
32+
for agent in actions_loader.get_agents():
33+
catalog["agents"].append({
34+
"slug": agent.slug or agent.name,
35+
"display_name": agent.display_name or agent.name,
36+
"tags": getattr(agent, 'tags', []) or []
37+
})
38+
39+
# Get rules
40+
for rule in actions_loader.get_rules():
41+
catalog["rules"].append({
42+
"slug": rule.slug or rule.name,
43+
"display_name": rule.display_name or rule.name,
44+
"type": rule.type, # 'rule' or 'ruleset'
45+
"tags": getattr(rule, 'tags', []) or []
46+
})
47+
48+
# Get MCPs (note: MCP uses 'name' as identifier)
49+
for mcp in actions_loader.get_mcps():
50+
catalog["mcps"].append({
51+
"slug": mcp.name, # MCPs use 'name' as slug
52+
"display_name": mcp.name,
53+
"tags": [] # MCPs don't have tags in current structure
54+
})
55+
56+
# Sort by slug for stability
57+
catalog["agents"].sort(key=lambda x: x["slug"])
58+
catalog["rules"].sort(key=lambda x: x["slug"])
59+
catalog["mcps"].sort(key=lambda x: x["slug"])
60+
61+
return catalog
62+
63+
64+
def get_catalog_version(catalog: Dict[str, List[Dict[str, Any]]]) -> str:
65+
"""
66+
Calculate a stable hash of the catalog slugs.
67+
68+
Args:
69+
catalog: The tools catalog
70+
71+
Returns:
72+
SHA1 hash of concatenated sorted slugs
73+
"""
74+
all_slugs = []
75+
all_slugs.extend([a["slug"] for a in catalog["agents"]])
76+
all_slugs.extend([r["slug"] for r in catalog["rules"]])
77+
all_slugs.extend([m["slug"] for m in catalog["mcps"]])
78+
79+
slug_string = ",".join(sorted(all_slugs))
80+
return hashlib.sha1(slug_string.encode()).hexdigest()[:8]
81+
82+
83+
def format_catalog_for_prompt(catalog: Dict[str, List[Dict[str, Any]]]) -> str:
84+
"""
85+
Format the catalog into a compact text for the LLM prompt.
86+
87+
Args:
88+
catalog: The tools catalog
89+
90+
Returns:
91+
Formatted string with one line per tool
92+
"""
93+
lines = []
94+
95+
# Format agents
96+
lines.append("- Agents:")
97+
for agent in catalog["agents"]:
98+
tags = f" — [{', '.join(agent['tags'])}]" if agent.get('tags') else ""
99+
lines.append(f" {agent['slug']}{agent['display_name']}{tags}")
100+
101+
# Format rules
102+
lines.append("- Rules:")
103+
for rule in catalog["rules"]:
104+
tags = f" — [{', '.join(rule['tags'])}]" if rule.get('tags') else ""
105+
lines.append(f" {rule['slug']}{rule['display_name']}{rule['type']}{tags}")
106+
107+
# Format MCPs
108+
lines.append("- MCPs:")
109+
for mcp in catalog["mcps"]:
110+
tags = f" — [{', '.join(mcp['tags'])}]" if mcp.get('tags') else ""
111+
lines.append(f" {mcp['slug']}{mcp['display_name']}{tags}")
112+
113+
return "\n".join(lines)
114+
115+
116+
def call_llm_for_reco(context: str, catalog_text: str, user_prompt: str = "", api_key: Optional[str] = None) -> str:
117+
"""
118+
Call the LLM to get tool recommendations.
119+
120+
Args:
121+
context: Repository context (summary + tree + content)
122+
catalog_text: Formatted catalog of available tools
123+
user_prompt: Optional user guidance
124+
api_key: Optional OpenAI API key
125+
126+
Returns:
127+
Raw LLM response string
128+
"""
129+
# Get API key
130+
if not api_key:
131+
api_key = os.getenv("OPENAI_API_KEY")
132+
if not api_key:
133+
raise ValueError("OPENAI_API_KEY not found")
134+
135+
# System prompt
136+
system_prompt = """You are "Tool Recommender for Codebases." Your job is to read a repository context and choose a minimal set of helpful tools (rules, agents, MCPs) from the provided catalog.
137+
138+
Hard requirements:
139+
- Output strictly valid JSON. No markdown, no commentary.
140+
- Use only the slugs present in the catalog below.
141+
- Prefer minimal selections: 0–2 per category (maximum 3).
142+
- If unsure, return empty arrays.
143+
144+
Selection guidelines:
145+
- Pick items that improve correctness, safety, or developer workflow for this codebase.
146+
- Avoid redundant overlap (e.g., don't pick both a ruleset and all its child rules).
147+
- Skip "fun/novelty" items unless clearly beneficial.
148+
- Base the decision solely on the given repository context and the catalog.
149+
150+
Catalog (one line per item, slug first):
151+
""" + catalog_text + """
152+
153+
Return JSON with this exact shape:
154+
- rules: array of slugs
155+
- agents: array of slugs
156+
- mcps: array of slugs
157+
- rationales (optional): object whose keys are "rules:<slug>", "agents:<slug>", "mcps:<slug>" and whose values are short one-line reasons.
158+
159+
You will now receive the repository context (summary, tree, truncated content) and an optional user focus. Choose minimal helpful tools from the catalog and return JSON only."""
160+
161+
# User message
162+
user_message = "Here is the codebase context (truncated). Choose minimal useful tools from the catalog above.\n\n"
163+
user_message += context
164+
if user_prompt:
165+
user_message += f"\n\nUser focus: {user_prompt}"
166+
167+
# Prepare request
168+
messages = [
169+
{"role": "system", "content": system_prompt},
170+
{"role": "user", "content": user_message}
171+
]
172+
173+
url = "https://api.openai.com/v1/chat/completions"
174+
headers = {
175+
"Authorization": f"Bearer {api_key}",
176+
"Content-Type": "application/json"
177+
}
178+
179+
data = {
180+
"model": "gpt-4o-mini",
181+
"messages": messages,
182+
"temperature": 0.2, # Low temperature for consistency
183+
"max_tokens": 1000
184+
}
185+
186+
try:
187+
with httpx.Client(timeout=60.0) as client:
188+
response = client.post(url, json=data, headers=headers)
189+
response.raise_for_status()
190+
result = response.json()
191+
192+
# Extract the content
193+
content = result["choices"][0]["message"]["content"]
194+
return content
195+
196+
except Exception as e:
197+
raise Exception(f"LLM call failed: {str(e)}")
198+
199+
200+
def parse_and_validate(llm_raw: str, catalog: Dict[str, List[Dict[str, Any]]]) -> Tuple[Dict[str, List[str]], Optional[Dict[str, str]]]:
201+
"""
202+
Parse and validate the LLM response against the catalog.
203+
204+
Args:
205+
llm_raw: Raw JSON string from LLM
206+
catalog: The tools catalog for validation
207+
208+
Returns:
209+
Tuple of (preselect dict, rationales dict or None)
210+
"""
211+
# Try to parse JSON
212+
try:
213+
data = json.loads(llm_raw)
214+
except json.JSONDecodeError:
215+
# Try to extract JSON from possible markdown or text
216+
import re
217+
json_match = re.search(r'\{[^{}]*\}', llm_raw, re.DOTALL)
218+
if json_match:
219+
try:
220+
data = json.loads(json_match.group())
221+
except:
222+
# Return empty if we can't parse
223+
return {"rules": [], "agents": [], "mcps": []}, None
224+
else:
225+
return {"rules": [], "agents": [], "mcps": []}, None
226+
227+
# Extract valid slugs
228+
valid_agent_slugs = {a["slug"] for a in catalog["agents"]}
229+
valid_rule_slugs = {r["slug"] for r in catalog["rules"]}
230+
valid_mcp_slugs = {m["slug"] for m in catalog["mcps"]}
231+
232+
# Filter and limit selections
233+
preselect = {
234+
"rules": [],
235+
"agents": [],
236+
"mcps": []
237+
}
238+
239+
# Process rules (max 3, dedupe)
240+
if "rules" in data and isinstance(data["rules"], list):
241+
seen = set()
242+
for slug in data["rules"][:3]: # Max 3
243+
if slug in valid_rule_slugs and slug not in seen:
244+
preselect["rules"].append(slug)
245+
seen.add(slug)
246+
247+
# Process agents (max 3, dedupe)
248+
if "agents" in data and isinstance(data["agents"], list):
249+
seen = set()
250+
for slug in data["agents"][:3]: # Max 3
251+
if slug in valid_agent_slugs and slug not in seen:
252+
preselect["agents"].append(slug)
253+
seen.add(slug)
254+
255+
# Process MCPs (max 3, dedupe)
256+
if "mcps" in data and isinstance(data["mcps"], list):
257+
seen = set()
258+
for slug in data["mcps"][:3]: # Max 3
259+
if slug in valid_mcp_slugs and slug not in seen:
260+
preselect["mcps"].append(slug)
261+
seen.add(slug)
262+
263+
# Extract rationales if present
264+
rationales = None
265+
if "rationales" in data and isinstance(data["rationales"], dict):
266+
rationales = {}
267+
# Only keep rationales for selected items
268+
for key, value in data["rationales"].items():
269+
parts = key.split(":", 1)
270+
if len(parts) == 2:
271+
category, slug = parts
272+
if category == "rules" and slug in preselect["rules"]:
273+
rationales[key] = str(value)[:200] # Limit length
274+
elif category == "agents" and slug in preselect["agents"]:
275+
rationales[key] = str(value)[:200]
276+
elif category == "mcps" and slug in preselect["mcps"]:
277+
rationales[key] = str(value)[:200]
278+
279+
return preselect, rationales

0 commit comments

Comments
 (0)