Skip to content

Commit 963be49

Browse files
committed
feat: temp workspace and cargo file
1 parent d701388 commit 963be49

File tree

10 files changed

+569
-652
lines changed

10 files changed

+569
-652
lines changed

Cargo.lock

Lines changed: 1 addition & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ version = "0.1.0"
44
edition = "2024"
55

66
[dependencies]
7-
rmcp = { version = "0.1.5", features = ["tower", "transport-io", "transport-sse-server"] }
7+
rmcp = { version = "0.1.5", features = ["tower", "transport-io", "transport-sse-server", "macros", "server"] } # Add macros, server, schemars
88
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
99
dotenvy = "0.15"
1010
serde = { version = "1", features = ["derive"] }
@@ -14,11 +14,12 @@ walkdir = "2.5.0"
1414
scraper = "0.23.1"
1515
ndarray = { version = "0.16.1", features = ["serde"] } # Enable serde feature
1616
async-openai = "0.28.0"
17-
async-trait = "0.1.88"
17+
# async-trait = "0.1.88" # Removed, likely no longer needed
1818
futures = "0.3"
1919
bincode = { version = "2.0.1", features = ["serde"] } # Enable serde integration
2020
tiktoken-rs = "0.6.0"
2121
cargo = "0.86.0"
2222
tempfile = "3.19.1"
2323
xdg = { version = "2.5.2", features = ["serde"] }
2424
anyhow = "1.0.97"
25+
schemars = "0.8.22"

src/doc_loader.rs

Lines changed: 68 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use scraper::{Html, Selector};
2-
use std::fs;
2+
use std::{fs::{self, File, create_dir_all}, io::Write}; // Added File, create_dir_all, and Write
33
use cargo::core::resolver::features::CliFeatures;
44
// use cargo::core::SourceId; // Removed unused import
55
// use cargo::util::Filesystem; // Removed unused import
@@ -37,20 +37,47 @@ pub struct Document {
3737
/// Generates documentation for a given crate in a temporary directory,
3838
/// then loads and parses the HTML documents.
3939
/// Extracts text content from the main content area of rustdoc generated HTML.
40-
pub fn load_documents(crate_name: &str, _crate_version: &str) -> Result<Vec<Document>, DocLoaderError> { // Mark version as unused for now
41-
println!("[DEBUG] load_documents called with crate_name: '{}', crate_version: '{}'", crate_name, _crate_version);
40+
pub fn load_documents(crate_name: &str, crate_version: &str) -> Result<Vec<Document>, DocLoaderError> { // Use crate_version
41+
eprintln!("[DEBUG] load_documents called with crate_name: '{}', crate_version: '{}'", crate_name, crate_version);
4242
let mut documents = Vec::new();
4343

4444
let temp_dir = tempdir().map_err(DocLoaderError::TempDirCreationFailed)?;
4545
let temp_dir_path = temp_dir.path();
46+
let temp_manifest_path = temp_dir_path.join("Cargo.toml");
4647

47-
println!(
48-
"Generating documentation for crate '{}' in temporary directory: {}",
48+
eprintln!(
49+
"Generating documentation for crate '{}' version '{}' in temporary directory: {}",
4950
crate_name,
51+
crate_version,
5052
temp_dir_path.display()
5153
);
5254

53-
// Execute `cargo doc` using std::process::Command
55+
// Create a temporary Cargo.toml
56+
let cargo_toml_content = format!(
57+
r#"[package]
58+
name = "temp-doc-crate"
59+
version = "0.1.0"
60+
edition = "2021"
61+
62+
[lib] # Add an empty lib target to satisfy Cargo
63+
64+
[dependencies]
65+
{} = "{}"
66+
"#,
67+
crate_name, crate_version
68+
);
69+
70+
// Create the src directory and an empty lib.rs file
71+
let src_path = temp_dir_path.join("src");
72+
create_dir_all(&src_path)?;
73+
File::create(src_path.join("lib.rs"))?;
74+
eprintln!("[DEBUG] Created empty src/lib.rs at: {}", src_path.join("lib.rs").display());
75+
76+
let mut temp_manifest_file = File::create(&temp_manifest_path)?;
77+
temp_manifest_file.write_all(cargo_toml_content.as_bytes())?;
78+
eprintln!("[DEBUG] Created temporary manifest at: {}", temp_manifest_path.display());
79+
80+
5481
// --- Use Cargo API ---
5582
let mut config = GlobalContext::default()?; // Make mutable
5683
// Configure context for quiet operation
@@ -67,17 +94,17 @@ pub fn load_documents(crate_name: &str, _crate_version: &str) -> Result<Vec<Docu
6794
)?;
6895
// config.shell().set_verbosity(Verbosity::Quiet); // Keep commented
6996

70-
let current_dir = std::env::current_dir()?;
71-
let mut ws = Workspace::new(&current_dir.join("Cargo.toml"), &config)?; // Make ws mutable
72-
println!("[DEBUG] Workspace target dir before set: {}", ws.target_dir().as_path_unlocked().display());
97+
// Use the temporary manifest path for the Workspace
98+
let mut ws = Workspace::new(&temp_manifest_path, &config)?; // Make ws mutable
99+
eprintln!("[DEBUG] Workspace target dir before set: {}", ws.target_dir().as_path_unlocked().display());
73100
// Set target_dir directly on Workspace
74101
ws.set_target_dir(cargo::util::Filesystem::new(temp_dir_path.to_path_buf()));
75-
println!("[DEBUG] Workspace target dir after set: {}", ws.target_dir().as_path_unlocked().display());
102+
eprintln!("[DEBUG] Workspace target dir after set: {}", ws.target_dir().as_path_unlocked().display());
76103

77104
// Create CompileOptions, relying on ::new for BuildConfig
78105
let mut compile_opts = CompileOptions::new(&config, cargo::core::compiler::CompileMode::Doc { deps: false, json: false })?;
79106
// Specify the package explicitly
80-
let package_spec = crate_name.replace('-', "_"); // Just use name (with underscores)
107+
let package_spec = crate_name.to_string(); // Just use name (with underscores)
81108
compile_opts.cli_features = CliFeatures::new_all(false); // Use new_all(false)
82109
compile_opts.spec = Packages::Packages(vec![package_spec.clone()]); // Clone spec
83110

@@ -87,7 +114,7 @@ pub fn load_documents(crate_name: &str, _crate_version: &str) -> Result<Vec<Docu
87114
open_result: false, // Don't open in browser
88115
output_format: ops::OutputFormat::Html,
89116
};
90-
println!("[DEBUG] package_spec for CompileOptions: '{}'", package_spec);
117+
eprintln!("[DEBUG] package_spec for CompileOptions: '{}'", package_spec);
91118

92119
ops::doc(&ws, &doc_opts).map_err(DocLoaderError::CargoLib)?; // Use ws
93120
// --- End Cargo API ---
@@ -97,49 +124,57 @@ pub fn load_documents(crate_name: &str, _crate_version: &str) -> Result<Vec<Docu
97124
let docs_path = temp_dir_path.join("doc").join(&crate_name_underscores);
98125

99126
// Debug print relevant options before calling ops::doc
100-
println!("[DEBUG] CompileOptions spec: {:?}", doc_opts.compile_opts.spec);
101-
println!("[DEBUG] CompileOptions cli_features: {:?}", doc_opts.compile_opts.cli_features);
102-
println!("[DEBUG] CompileOptions build_config mode: {:?}", doc_opts.compile_opts.build_config.mode);
103-
println!("[DEBUG] DocOptions output_format: {:?}", doc_opts.output_format);
127+
eprintln!("[DEBUG] CompileOptions spec: {:?}", doc_opts.compile_opts.spec);
128+
eprintln!("[DEBUG] CompileOptions cli_features: {:?}", doc_opts.compile_opts.cli_features);
129+
eprintln!("[DEBUG] CompileOptions build_config mode: {:?}", doc_opts.compile_opts.build_config.mode);
130+
eprintln!("[DEBUG] DocOptions output_format: {:?}", doc_opts.output_format);
104131
if !docs_path.exists() || !docs_path.is_dir() {
105132
return Err(DocLoaderError::CargoLib(anyhow::anyhow!(
106133
"Generated documentation not found at expected path: {}. Check crate name and cargo doc output.",
107134
docs_path.display()
108135
)));
109136
}
110-
println!("Generated documentation path: {}", docs_path.display());
137+
eprintln!("Generated documentation path: {}", docs_path.display());
111138

112-
println!("[DEBUG] ops::doc called successfully.");
139+
eprintln!("[DEBUG] ops::doc called successfully.");
113140

114141
// Define the CSS selector for the main content area in rustdoc HTML
115142
// This might need adjustment based on the exact rustdoc version/theme
116143
let content_selector = Selector::parse("section#main-content.content")
117144
.map_err(|e| DocLoaderError::Selector(e.to_string()))?;
118-
println!("[DEBUG] Calculated final docs_path: {}", docs_path.display());
145+
eprintln!("[DEBUG] Calculated final docs_path: {}", docs_path.display());
119146

120-
println!("Starting document loading from: {}", docs_path.display());
121-
println!("[DEBUG] docs_path does not exist or is not a directory.");
147+
eprintln!("Starting document loading from: {}", docs_path.display());
148+
eprintln!("[DEBUG] docs_path does not exist or is not a directory.");
122149

123-
for entry in WalkDir::new(docs_path)
150+
for entry in WalkDir::new(&docs_path)
124151
.into_iter()
125152
.filter_map(Result::ok) // Ignore errors during iteration for now
126153
.filter(|e| !e.file_type().is_dir() && e.path().extension().is_some_and(|ext| ext == "html"))
127154
{
128155
let path = entry.path();
129-
let path_str = path.to_string_lossy().to_string();
130-
// println!("Processing file: {}", path.display()); // Uncommented
131-
132-
// println!(" Reading file content..."); // Added
133-
let html_content = fs::read_to_string(path)?;
134-
// println!(" Parsing HTML..."); // Added
156+
// Calculate path relative to the docs_path root
157+
let relative_path = path.strip_prefix(&docs_path).map_err(|e| {
158+
// Provide more context in the error message
159+
DocLoaderError::Io(std::io::Error::new(
160+
std::io::ErrorKind::Other,
161+
format!("Failed to strip prefix '{}' from path '{}': {}", docs_path.display(), path.display(), e)
162+
))
163+
})?;
164+
let path_str = relative_path.to_string_lossy().to_string(); // Use the relative path
165+
// eprintln!("Processing file: {} (relative: {})", path.display(), path_str); // Updated debug log
166+
167+
// eprintln!(" Reading file content..."); // Added
168+
let html_content = fs::read_to_string(path)?; // Still read from the absolute path
169+
// eprintln!(" Parsing HTML..."); // Added
135170

136171
// Parse the HTML document
137172
let document = Html::parse_document(&html_content);
138173

139174
// Select the main content element
140175
if let Some(main_content_element) = document.select(&content_selector).next() {
141176
// Extract all text nodes within the main content
142-
// println!(" Extracting text content..."); // Added
177+
// eprintln!(" Extracting text content..."); // Added
143178
let text_content: String = main_content_element
144179
.text()
145180
.map(|s| s.trim())
@@ -148,20 +183,20 @@ pub fn load_documents(crate_name: &str, _crate_version: &str) -> Result<Vec<Docu
148183
.join("\n"); // Join text nodes with newlines
149184

150185
if !text_content.is_empty() {
151-
// println!(" Extracted content ({} chars)", text_content.len()); // Uncommented and simplified
186+
// eprintln!(" Extracted content ({} chars)", text_content.len()); // Uncommented and simplified
152187
documents.push(Document {
153188
path: path_str,
154189
content: text_content,
155190
});
156191
} else {
157-
// println!("No text content found in main section for: {}", path.display()); // Verbose logging
192+
// eprintln!("No text content found in main section for: {}", path.display()); // Verbose logging
158193
}
159194
} else {
160-
// println!("'main-content' selector not found for: {}", path.display()); // Verbose logging
195+
// eprintln!("'main-content' selector not found for: {}", path.display()); // Verbose logging
161196
// Optionally handle files without the main content selector differently
162197
}
163198
}
164199

165-
println!("Finished document loading. Found {} documents.", documents.len());
200+
eprintln!("Finished document loading. Found {} documents.", documents.len());
166201
Ok(documents)
167202
}

src/embeddings.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ pub static OPENAI_CLIENT: OnceLock<OpenAIClient<OpenAIConfig>> = OnceLock::new()
1616
use bincode::{Encode, Decode};
1717
use serde::{Serialize, Deserialize};
1818

19-
// Define a struct suitable for serialization with bincode
20-
#[derive(Serialize, Deserialize, Debug, Encode, Decode)] // Add Encode, Decode
21-
pub struct SerializableEmbedding {
19+
// Define a struct containing path, content, and embedding for caching
20+
#[derive(Serialize, Deserialize, Debug, Encode, Decode)]
21+
pub struct CachedDocumentEmbedding {
2222
pub path: String,
23+
pub content: String, // Add the extracted document content
2324
pub vector: Vec<f32>,
2425
}
2526

27+
2628
/// Calculates the cosine similarity between two vectors.
2729
pub fn cosine_similarity(v1: ArrayView1<f32>, v2: ArrayView1<f32>) -> f32 {
2830
let dot_product = v1.dot(&v2);
@@ -42,7 +44,7 @@ pub async fn generate_embeddings(
4244
documents: &[Document],
4345
model: &str,
4446
) -> Result<(Vec<(String, Array1<f32>)>, usize), ServerError> { // Return tuple: (embeddings, total_tokens)
45-
// println!("Generating embeddings for {} documents...", documents.len());
47+
// eprintln!("Generating embeddings for {} documents...", documents.len());
4648

4749
// Get the tokenizer for the model and wrap in Arc
4850
let bpe = Arc::new(cl100k_base().map_err(|e| ServerError::Tiktoken(e.to_string()))?);
@@ -63,7 +65,7 @@ pub async fn generate_embeddings(
6365
let token_count = bpe.encode_with_special_tokens(&doc.content).len();
6466

6567
if token_count > TOKEN_LIMIT {
66-
// println!(
68+
// eprintln!(
6769
// " Skipping document {}: Actual tokens ({}) exceed limit ({}). Path: {}",
6870
// index + 1,
6971
// token_count,
@@ -82,14 +84,14 @@ pub async fn generate_embeddings(
8284
.input(inputs)
8385
.build()?; // Propagates OpenAIError
8486

85-
// println!(
87+
// eprintln!(
8688
// " Sending request for document {} ({} tokens)... Path: {}",
8789
// index + 1,
8890
// token_count, // Use correct variable name
8991
// doc.path
9092
// );
9193
let response = client.embeddings().create(request).await?; // Propagates OpenAIError
92-
// println!(" Received response for document {}.", index + 1);
94+
// eprintln!(" Received response for document {}.", index + 1);
9395

9496
if response.data.len() != 1 {
9597
return Err(ServerError::OpenAI(
@@ -129,13 +131,13 @@ pub async fn generate_embeddings(
129131
Err(e) => {
130132
// Log error but potentially continue? Or return the first error?
131133
// For now, let's return the first error encountered.
132-
println!("Error during concurrent embedding generation: {}", e);
134+
eprintln!("Error during concurrent embedding generation: {}", e);
133135
return Err(e);
134136
}
135137
}
136138
}
137139

138-
println!(
140+
eprintln!(
139141
"Finished generating embeddings. Successfully processed {} documents ({} tokens).",
140142
embeddings_vec.len(), total_processed_tokens
141143
);

src/error.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ pub enum ServerError {
2222
Tiktoken(String),
2323
#[error("XDG Directory Error: {0}")]
2424
Xdg(String),
25+
#[error("MCP Runtime Error: {0}")]
26+
McpRuntime(String),
2527
}

0 commit comments

Comments
 (0)