diff --git a/.gitignore b/.gitignore
index 12b624e7..11f6e220 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,7 @@ python/build/
 python/dist/
 python/kubernetes_mcp_server.egg-info/
 !python/kubernetes-mcp-server
+
+.gevals-step*
+gevals-kubevirt-vm-operations-out.json
+.gemini
diff --git a/internal/tools/update-readme/main.go b/internal/tools/update-readme/main.go
index cdf695fc..590cfc8d 100644
--- a/internal/tools/update-readme/main.go
+++ b/internal/tools/update-readme/main.go
@@ -15,6 +15,7 @@ import (
 	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/config"
 	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/core"
 	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/helm"
+	_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt"
 )
 
 type OpenShift struct{}
diff --git a/pkg/kubernetes-mcp-server/cmd/root_test.go b/pkg/kubernetes-mcp-server/cmd/root_test.go
index 22521667..3f98736a 100644
--- a/pkg/kubernetes-mcp-server/cmd/root_test.go
+++ b/pkg/kubernetes-mcp-server/cmd/root_test.go
@@ -137,7 +137,7 @@ func TestToolsets(t *testing.T) {
 		rootCmd := NewMCPServer(ioStreams)
 		rootCmd.SetArgs([]string{"--help"})
 		o, err := captureOutput(rootCmd.Execute) // --help doesn't use logger/klog, cobra prints directly to stdout
-		if !strings.Contains(o, "Comma-separated list of MCP toolsets to use (available toolsets: config, core, helm).") {
+		if !strings.Contains(o, "Comma-separated list of MCP toolsets to use (available toolsets: config, core, helm, kubevirt).") {
 			t.Fatalf("Expected all available toolsets, got %s %v", o, err)
 		}
 	})
diff --git a/pkg/kubernetes/kubernetes.go b/pkg/kubernetes/kubernetes.go
index 3b5733e1..a154b74e 100644
--- a/pkg/kubernetes/kubernetes.go
+++ b/pkg/kubernetes/kubernetes.go
@@ -2,6 +2,7 @@ package kubernetes
 
 import (
 	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/rest"
 
 	"github.com/containers/kubernetes-mcp-server/pkg/helm"
 	"k8s.io/client-go/kubernetes/scheme"
@@ -30,6 +31,14 @@ func (k *Kubernetes) AccessControlClientset() *AccessControlClientset {
 	return k.manager.accessControlClientSet
 }
 
+// RESTConfig returns the Kubernetes REST configuration
+func (k *Kubernetes) RESTConfig() *rest.Config {
+	if k.manager == nil {
+		return nil
+	}
+	return k.manager.cfg
+}
+
 var Scheme = scheme.Scheme
 var ParameterCodec = runtime.NewParameterCodec(Scheme)
 
diff --git a/pkg/mcp/modules.go b/pkg/mcp/modules.go
index 3295d72b..5356060e 100644
--- a/pkg/mcp/modules.go
+++ b/pkg/mcp/modules.go
@@ -3,3 +3,4 @@ package mcp
 import _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/config"
 import _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/core"
 import _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/helm"
+import _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt"
diff --git a/pkg/toolsets/kubevirt/tests/EVAL_README.md b/pkg/toolsets/kubevirt/tests/EVAL_README.md
new file mode 100644
index 00000000..5c88892d
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/EVAL_README.md
@@ -0,0 +1,682 @@
+# Agent and Model Evaluation System
+
+This directory contains scripts to run gevals against **any** combination of agent types and OpenAI-compatible models by looking up model credentials from gnome-keyring.
+
+## Files
+
+- **`run-agent-model-evals.sh`** - Main script to run evaluations against agent+model combinations
+- **`model-configs.sh`** - Configuration file that retrieves per-model API keys and base URLs from gnome-keyring
+- **`EVAL_README.md`** - This file
+
+## Agent Types
+
+The system supports multiple agent types, each in its own subdirectory:
+
+- **`openai-agent/`** - OpenAI-compatible agent implementation
+- **`gemini/`** - Google Gemini CLI agent implementation
+- **`claude-code/`** - Claude Code agent implementation
+
+Each agent directory contains:
+- `eval.yaml` - Evaluation configuration specific to the agent
+- `agent.yaml` - Agent configuration and system prompts
+- Optional wrapper scripts for agent-specific setup
+
+## Architecture
+
+This system is designed to work with:
+1. **Multiple agent types** - Test different agent implementations
+2. **Any model name** - No predefined model list required
+3. **Individual model credentials** - Each model stores its own API key and base URL
+4. **Explicit agent/model pairing** - Specify exactly which combinations to test
+5. **Optional model specification** - Some agents have pre-configured models and don't require explicit model names
+6. **Unique namespaces** - Each test run uses a unique Kubernetes namespace to avoid conflicts
+7. **Parallel execution** - Run multiple evaluations concurrently with automatic namespace isolation
+
+Agent model requirements:
+- **`openai-agent`** - Requires explicit model specification via `-a openai-agent/model-name`
+- **`gemini`** - Uses pre-configured model, specify as `-a gemini` (no model needed)
+- **`claude-code`** - Uses pre-configured model, specify as `-a claude-code` (no model needed)
+
+For agents requiring models:
+1. Choose a model name (e.g., `gemini-2.0-flash`, `claude-sonnet-4@20250514`, `mistralai/Mistral-7B-Instruct-v0.3`)
+2. Store the model's credentials in gnome-keyring using the normalized service name
+3. Run the script with `-a "agent-type/model-name"`
+
+For agents with pre-configured models:
+1. Run the script with `-a "agent-type"` (e.g., `-a gemini` or `-a claude-code`)
+
+Every model has its own individual secrets:
+- **API Key** - Stored in gnome-keyring as `service: model-{normalized-name} account: api-key`
+- **Base URL** - Stored in gnome-keyring as `service: model-{normalized-name} account: base-url`
+- **Model ID** (optional) - Stored in gnome-keyring as `service: model-{normalized-name} account: model-id`
+
+This allows maximum flexibility - you can use any model from any provider, route models through different proxies, or point to entirely different endpoints.
+
+## Setup
+
+### 1. Install secret-tool
+
+The scripts use `secret-tool` from `libsecret` to retrieve secrets from gnome-keyring:
+
+```bash
+# Fedora/RHEL
+sudo dnf install libsecret
+
+# Ubuntu/Debian
+sudo apt-get install libsecret-tools
+```
+
+### 2. Store Model Secrets in gnome-keyring
+
+Each model requires two secrets to be stored: `api-key` and `base-url`. The service name is derived from the model name by normalizing it (lowercase, special characters replaced with hyphens, prefixed with `model-`).
+
+**Important:** You only need to configure the models you actually plan to use. There's no need to configure all the examples below - these are just for reference.
+
+#### Example: mistralai/Mistral-7B-Instruct-v0.3
+Service name: `model-mistralai-mistral-7b-instruct-v0.3`
+
+```bash
+# API Key
+secret-tool store --label='Mistral 7B API Key' \
+  service model-mistralai-mistral-7b-instruct-v0.3 \
+  account api-key
+
+# Base URL (enter the OpenAI-compatible endpoint URL)
+secret-tool store --label='Mistral 7B Base URL' \
+  service model-mistralai-mistral-7b-instruct-v0.3 \
+  account base-url
+# Example URL: https://api.fireworks.ai/inference/v1
+
+# Optional: Model ID (if the API expects a different model identifier)
+secret-tool store --label='Mistral 7B Model ID' \
+  service model-mistralai-mistral-7b-instruct-v0.3 \
+  account model-id
+# Example: accounts/fireworks/models/mistralai/Mistral-7B-Instruct-v0.3
+```
+
+#### Model: gemini-2.0-flash
+Service name: `model-gemini-2.0-flash`
+
+```bash
+secret-tool store --label='Gemini 2.0 Flash API Key' \
+  service model-gemini-2.0-flash \
+  account api-key
+
+secret-tool store --label='Gemini 2.0 Flash Base URL' \
+  service model-gemini-2.0-flash \
+  account base-url
+# Example URL: https://generativelanguage.googleapis.com/v1beta/openai/
+```
+
+#### Model: claude-sonnet-4@20250514
+Service name: `model-claude-sonnet-4-20250514`
+
+```bash
+secret-tool store --label='Claude Sonnet 4 API Key' \
+  service model-claude-sonnet-4-20250514 \
+  account api-key
+
+secret-tool store --label='Claude Sonnet 4 Base URL' \
+  service model-claude-sonnet-4-20250514 \
+  account base-url
+# Example URL: https://api.anthropic.com/v1
+```
+
+### 3. Verify Model Secrets
+
+You can verify that your model secrets are stored correctly:
+
+```bash
+# Check a specific model
+secret-tool lookup service model-gemini-2.0-flash account api-key
+secret-tool lookup service model-gemini-2.0-flash account base-url
+
+# List all secrets for a model
+secret-tool search service model-gemini-2.0-flash
+
+# Or use the validation command to check all models at once
+./run-agent-model-evals.sh -m "gemini-2.0-flash" --validate-secrets
+```
+
+The `--validate-secrets` command will show you the status of all models and tell you exactly which secrets are missing.
+
+## Usage
+
+### Run Evaluations
+
+The script requires you to specify at least one agent or agent/model combination using the `-a` flag.
+
+**Format:**
+- For agents requiring models (openai-agent): `-a agent-type/model-name`
+- For agents with pre-configured models (gemini, claude-code): `-a agent-type`
+
+```bash
+# Run evaluation with agent that requires a model (openai-agent)
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash
+
+# Run evaluation with agents that have pre-configured models
+./run-agent-model-evals.sh -a gemini
+./run-agent-model-evals.sh -a claude-code
+
+# Run evaluations for multiple combinations
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash -a openai-agent/claude-sonnet-4@20250514
+
+# Test one model with openai-agent and pre-configured agents
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash -a gemini -a claude-code
+
+# Mix and match any combinations
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a openai-agent/claude-sonnet-4@20250514 \
+  -a gemini \
+  -a claude-code
+
+# Run with custom model name for openai-agent
+./run-agent-model-evals.sh -a openai-agent/your-custom-model-name
+```
+
+### Validate Secrets
+
+To check if models used in specific combinations are properly configured without running evaluations:
+
+```bash
+# Validate models used in one combination
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash --validate-secrets
+
+# Validate models used in multiple combinations (including agent-only)
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a openai-agent/claude-sonnet-4@20250514 \
+  -a gemini \
+  --validate-secrets
+
+# Validate agent-only combinations (no models to validate)
+./run-agent-model-evals.sh -a gemini -a claude-code --validate-secrets
+```
+
+This will extract the unique models from your combinations and show you which ones have both API keys and base URLs configured. For agent-only combinations (gemini, claude-code), no model validation is performed.
+
+### Check API Endpoints
+
+To validate that the base URLs are OpenAI-compatible and accessible, add the `--check-api` flag:
+
+```bash
+# Validate secrets AND check API endpoint connectivity
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash --validate-secrets --check-api
+
+# Check multiple combinations (validates unique models)
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a gemini/claude-sonnet-4@20250514 \
+  --validate-secrets --check-api
+```
+
+This performs HTTP requests to test common OpenAI-compatible API endpoints:
+1. **`GET /models`** - Lists available models (informational)
+2. **`POST /chat/completions`** - Creates a test chat completion (critical for agent execution)
+3. **`POST /completions`** - Tests legacy text completion endpoint (informational)
+4. **`POST /embeddings`** - Tests embeddings endpoint (informational)
+5. **`POST /moderations`** - Tests content moderation endpoint (informational)
+
+The validation checks:
+- ✓ The endpoints are accessible
+- ✓ The API key is valid
+- ✓ The chat completions endpoint works (critical - used by agents)
+- ⚠ Non-critical endpoints may return 404 if not supported by the provider
+
+**Example successful validation:**
+```bash
+$ ./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash --validate-secrets --check-api
+
+OK: Model 'gemini-2.0-flash' has API key and base URL configured
+  Note: No custom model-id set, will use 'gemini-2.0-flash'
+  Testing GET https://generativelanguage.googleapis.com/v1beta/openai/models
+    ✓ Endpoint accessible (HTTP 200)
+  Testing POST https://generativelanguage.googleapis.com/v1beta/openai/chat/completions
+    ✓ Endpoint accessible (HTTP 200)
+  Testing POST https://generativelanguage.googleapis.com/v1beta/openai/completions
+    ⚠ Endpoint not found (HTTP 404) - not all providers support legacy completions
+  Testing POST https://generativelanguage.googleapis.com/v1beta/openai/embeddings
+    ⚠ Returned HTTP 400 - may not be an embeddings model
+  Testing POST https://generativelanguage.googleapis.com/v1beta/openai/moderations
+    ⚠ Endpoint not found (HTTP 404) - may not support moderations
+  ✓ API endpoint validation complete
+
+All specified models are properly configured!
+```
+
+**Note:** The `--check-api` flag only works with `--validate-secrets` and requires network connectivity to the API endpoints. Warnings (⚠) are informational and don't cause validation to fail - only authentication errors (✗) cause failure.
+
+### Dry Run
+
+To see what commands would be executed without actually running them:
+
+```bash
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash --dry-run
+```
+
+### Verbose Output
+
+To see detailed configuration and environment variables:
+
+```bash
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash -v
+```
+
+### Custom Output Directory
+
+To specify a custom output directory for log files:
+
+```bash
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash -o /path/to/results
+```
+
+### Custom Output Prefix
+
+To add a prefix to the output files (useful for organizing experiments or runs):
+
+```bash
+# Without prefix (default)
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash
+# Creates:
+#   gevals-openai-agent-gemini-2.0-flash-20250106-143022-out.json
+#   gevals-openai-agent-gemini-2.0-flash-20250106-143022-out.log
+
+# With prefix
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash -p "experiment-1"
+# Creates:
+#   gevals-experiment-1-openai-agent-gemini-2.0-flash-20250106-143022-out.json
+#   gevals-experiment-1-openai-agent-gemini-2.0-flash-20250106-143022-out.log
+
+# Multiple combinations with the same prefix
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a gemini/gemini-2.0-flash \
+  -p "baseline-test"
+# Creates (timestamps will vary):
+#   gevals-baseline-test-openai-agent-gemini-2.0-flash-20250106-143022-out.json
+#   gevals-baseline-test-openai-agent-gemini-2.0-flash-20250106-143022-out.log
+#   gevals-baseline-test-gemini-gemini-2.0-flash-20250106-143045-out.json
+#   gevals-baseline-test-gemini-gemini-2.0-flash-20250106-143045-out.log
+```
+
+### Parallel Execution
+
+To run multiple evaluations in parallel for faster completion:
+
+```bash
+# Run all combinations in parallel (each gets a unique namespace)
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a openai-agent/claude-sonnet-4@20250514 \
+  -a gemini \
+  -a claude-code \
+  --parallel
+
+# Limit parallel jobs to 2 at a time
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a openai-agent/claude-sonnet-4@20250514 \
+  -a gemini \
+  --parallel -j 2
+```
+
+**How it works:**
+- Each evaluation runs in its own unique Kubernetes namespace (e.g., `vm-test-20251106-162422-a3b4c5d6`)
+- Namespaces are automatically created and cleaned up
+- The `--parallel` flag enables concurrent execution
+- The `-j N` flag limits the maximum number of parallel jobs (default: unlimited)
+- Progress is logged in real-time to the run log file
+
+**Benefits:**
+- Much faster evaluation of multiple combinations
+- No namespace conflicts between parallel runs
+- Automatic resource isolation
+
+**Note:** Make sure your Kubernetes cluster has sufficient resources to handle multiple concurrent VMs.
+
+### Help
+
+For full usage information:
+
+```bash
+./run-agent-model-evals.sh --help
+```
+
+## Example Model Configurations
+
+Here are some example normalized service names for common models:
+
+| Model Name Example | Normalized Service Name |
+|-------------------|-------------------------|
+| `mistralai/Mistral-7B-Instruct-v0.3` | `model-mistralai-mistral-7b-instruct-v0.3` |
+| `ibm-granite/granite-4.0-h-tiny` | `model-ibm-granite-granite-4.0-h-tiny` |
+| `ibm-granite/granite-4.0-h-micro` | `model-ibm-granite-granite-4.0-h-micro` |
+| `Qwen/Qwen3-14B` | `model-qwen-qwen3-14b` |
+| `gemini-2.0-flash` | `model-gemini-2.0-flash` |
+| `gemini-2.5-pro` | `model-gemini-2.5-pro` |
+| `claude-sonnet-4@20250514` | `model-claude-sonnet-4-20250514` |
+| `claude-3-5-haiku@20241022` | `model-claude-3-5-haiku-20241022` |
+
+## Using Any Model
+
+To use a model that's not in the examples above:
+
+1. Determine the normalized service name (lowercase, special chars replaced with hyphens):
+   - `new-provider/new-model-v1` → `model-new-provider-new-model-v1`
+   - `MyModel@2024` → `model-mymodel-2024`
+
+2. Store the secrets:
+
+```bash
+secret-tool store --label='New Model API Key' \
+  service model-new-provider-new-model-v1 \
+  account api-key
+
+secret-tool store --label='New Model Base URL' \
+  service model-new-provider-new-model-v1 \
+  account base-url
+```
+
+3. Optionally store a custom model ID if the API expects a different identifier:
+
+```bash
+secret-tool store --label='New Model ID' \
+  service model-new-provider-new-model-v1 \
+  account model-id
+```
+
+4. Run the evaluation:
+
+```bash
+./run-agent-model-evals.sh -a openai-agent -m "new-provider/new-model-v1"
+```
+
+That's it! No need to edit any configuration files - just store the secrets and run.
+
+## Output
+
+The script generates several types of output files:
+
+### Log Files (in specified output directory)
+
+The script creates a `results/` directory (or custom directory specified with `-o`) containing:
+
+- Individual log files for each agent+model evaluation (`gevals-{agent-slug}-{model-slug}-{timestamp}.log`)
+- A run summary log file (`gevals-run-{timestamp}.log`)
+
+### Gevals Output Files (in project results directory)
+
+After each successful evaluation, the script automatically:
+1. Generates a formatted view file from the JSON output using `gevals view`
+2. Renames both files to include the optional prefix, agent type, model name, and timestamp
+3. Moves them to `pkg/toolsets/kubevirt/tests/results/`
+
+File naming pattern:
+- Without prefix: `gevals-{agent-slug}-{model-slug}-{timestamp}-out.{json|log}`
+- With prefix: `gevals-{prefix}-{agent-slug}-{model-slug}-{timestamp}-out.{json|log}`
+
+Where:
+- `{prefix}` is the optional prefix specified with `-p` or `--prefix`
+- `{agent-slug}` is the normalized agent type name
+- `{model-slug}` is the normalized model name
+- `{timestamp}` is the date and time in format `YYYYMMDD-HHMMSS` (e.g., `20250106-143022`)
+
+Files created:
+- `.json` - Raw evaluation results in JSON format (generated by gevals run)
+- `.log` - Formatted view output (generated by gevals view)
+
+Examples:
+
+**Without prefix:**
+```bash
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash
+```
+Creates (timestamp will vary):
+- `pkg/toolsets/kubevirt/tests/results/gevals-openai-agent-gemini-2.0-flash-20250106-143022-out.json`
+- `pkg/toolsets/kubevirt/tests/results/gevals-openai-agent-gemini-2.0-flash-20250106-143022-out.log`
+
+**With prefix:**
+```bash
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash -p "baseline"
+```
+Creates (timestamp will vary):
+- `pkg/toolsets/kubevirt/tests/results/gevals-baseline-openai-agent-gemini-2.0-flash-20250106-143022-out.json`
+- `pkg/toolsets/kubevirt/tests/results/gevals-baseline-openai-agent-gemini-2.0-flash-20250106-143022-out.log`
+
+**Multiple combinations:**
+```bash
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash -a gemini/gemini-2.0-flash
+```
+Creates (timestamps will vary):
+- `pkg/toolsets/kubevirt/tests/results/gevals-openai-agent-gemini-2.0-flash-20250106-143022-out.json`
+- `pkg/toolsets/kubevirt/tests/results/gevals-openai-agent-gemini-2.0-flash-20250106-143022-out.log`
+- `pkg/toolsets/kubevirt/tests/results/gevals-gemini-gemini-2.0-flash-20250106-143045-out.json`
+- `pkg/toolsets/kubevirt/tests/results/gevals-gemini-gemini-2.0-flash-20250106-143045-out.log`
+
+## Understanding Service Names
+
+Service names are automatically normalized from model names:
+- Convert to lowercase
+- Replace non-alphanumeric characters (except dots and hyphens) with hyphens
+- Prefix with `model-`
+
+Examples:
+- `gemini-2.0-flash` → `model-gemini-2.0-flash`
+- `claude-sonnet-4@20250514` → `model-claude-sonnet-4-20250514`
+- `mistralai/Mistral-7B-Instruct-v0.3` → `model-mistralai-mistral-7b-instruct-v0.3`
+
+You can use the `normalize_model_name` function to check the service name:
+
+```bash
+source model-configs.sh
+normalize_model_name "Your/Model@Name"
+```
+
+## Troubleshooting
+
+### Model Secrets Not Found
+
+If you get errors about missing secrets:
+
+```
+ERROR: Model 'gemini-2.0-flash' is missing both API key and base URL
+  Service name: model-gemini-2.0-flash
+```
+
+Make sure you've stored both the `api-key` and `base-url` for that model using the exact service name shown.
+
+### Wrong Service Name
+
+If you're unsure about the service name, use the validation command:
+
+```bash
+./run-agent-model-evals.sh -m "your-model-name" --validate-secrets
+```
+
+This will show you the exact service names for the specified models.
+
+### API Endpoint Not Accessible
+
+If the `--check-api` validation fails, you'll see specific error messages:
+
+**HTTP 401 - Authentication Failed:**
+```
+✗ API authentication failed (HTTP 401) - check API key
+```
+→ Verify your API key is correct and hasn't expired.
+
+**HTTP 404 - Endpoint Not Found:**
+```
+✗ /chat/completions endpoint not found (HTTP 404)
+```
+→ Check that your base URL is correct and includes the proper path (e.g., `/v1` for OpenAI).
+→ The `/models` endpoint might work while `/chat/completions` doesn't - always use `--check-api` to validate both.
+
+**Connection Failed:**
+```
+✗ Could not connect to API endpoint - check base URL and network
+```
+→ Verify the base URL is correct and you have network connectivity.
+
+**Debugging workflow:**
+```bash
+# 1. Check secrets are stored
+./run-agent-model-evals.sh -m "gemini-2.0-flash" --validate-secrets
+
+# 2. Test API connectivity (including chat/completions)
+./run-agent-model-evals.sh -m "gemini-2.0-flash" --validate-secrets --check-api
+
+# 3. Manually test chat completions endpoint
+BASE_URL=$(secret-tool lookup service model-gemini-2.0-flash account base-url)
+API_KEY=$(secret-tool lookup service model-gemini-2.0-flash account api-key)
+curl -X POST "$BASE_URL/chat/completions" \
+  -H "Authorization: Bearer $API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"model":"gemini-2.0-flash","messages":[{"role":"user","content":"test"}],"max_tokens":1}'
+```
+
+### Unknown Agent Type
+
+If you get an error about an unknown agent type:
+
+```
+Error: Unknown agent type 'my-agent'
+Available agents: openai-agent gemini claude-code
+```
+
+Make sure you're using one of the available agent types listed in the error message.
+
+### Checking Stored Secrets
+
+To see all secrets for a model:
+
+```bash
+secret-tool search service model-gemini-2.0-flash
+```
+
+To retrieve a specific secret value:
+
+```bash
+secret-tool lookup service model-gemini-2.0-flash account api-key
+secret-tool lookup service model-gemini-2.0-flash account base-url
+secret-tool lookup service model-gemini-2.0-flash account model-id
+```
+
+### Removing Stored Secrets
+
+To remove a stored secret from gnome-keyring:
+
+```bash
+# Remove an API key
+secret-tool clear service model-gemini-2.0-flash account api-key
+
+# Remove a base URL
+secret-tool clear service model-gemini-2.0-flash account base-url
+
+# Remove a model ID
+secret-tool clear service model-gemini-2.0-flash account model-id
+```
+
+### gevals Command Not Found
+
+Make sure the `gevals` binary is in your PATH or adjust the script to use the full path to the binary.
+
+### Model ID vs Model Name
+
+Some API providers expect a specific model identifier that differs from the friendly model name:
+
+- **Model Name**: What you call the model in your script (e.g., `mistralai/Mistral-7B-Instruct-v0.3`)
+- **Model ID**: What the API expects (e.g., `accounts/fireworks/models/mistralai/Mistral-7B-Instruct-v0.3`)
+
+If the API requires a different identifier, store it as the `model-id`:
+
+```bash
+secret-tool store --label='Model ID' \
+  service model-mistralai-mistral-7b-instruct-v0.3 \
+  account model-id
+# Enter: accounts/fireworks/models/mistralai/Mistral-7B-Instruct-v0.3
+```
+
+If no `model-id` is stored, the script will use the original model name.
+
+## Environment Variables
+
+The scripts set these environment variables for each model evaluation:
+
+- `MODEL_BASE_URL` - The OpenAI-compatible API base URL (from secrets)
+- `MODEL_KEY` - The API key for authentication (from secrets)
+- `MODEL_NAME` - The model name/identifier (from secrets if `model-id` is set, otherwise the original model name)
+- `SYSTEM_PROMPT` - Optional system prompt (can be set externally)
+
+These variables are consumed by the agent implementations in each agent directory.
+
+## Example: Complete Setup for One Agent+Model Combination
+
+Here's a complete example for setting up and running the `openai-agent` with `gemini-2.0-flash`:
+
+```bash
+# 1. Store the API key
+secret-tool store --label='Gemini 2.0 Flash API Key' \
+  service model-gemini-2.0-flash \
+  account api-key
+# When prompted, enter your Google AI API key
+
+# 2. Store the base URL
+secret-tool store --label='Gemini 2.0 Flash Base URL' \
+  service model-gemini-2.0-flash \
+  account base-url
+# When prompted, enter: https://generativelanguage.googleapis.com/v1beta/openai/
+
+# 3. Verify it's configured
+secret-tool search service model-gemini-2.0-flash
+
+# 4. Test just this combination with dry-run
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash --dry-run
+
+# 5. Run the actual evaluation
+./run-agent-model-evals.sh -a openai-agent/gemini-2.0-flash
+```
+
+## Example: Testing Multiple Combinations
+
+To systematically test across all available agent types:
+
+```bash
+# Test gemini-2.0-flash with openai-agent, plus pre-configured agents
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a gemini \
+  -a claude-code
+```
+
+To test multiple different models with openai-agent:
+
+```bash
+# This will run 2 evaluations (openai-agent with 2 different models)
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a openai-agent/claude-sonnet-4@20250514
+```
+
+To run all available agents (mix of agent/model and agent-only):
+
+```bash
+# This will run 4 total evaluations
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a openai-agent/claude-sonnet-4@20250514 \
+  -a gemini \
+  -a claude-code
+```
+
+Or mix and match specific combinations as needed:
+
+```bash
+# Test specific combinations
+./run-agent-model-evals.sh \
+  -a openai-agent/gemini-2.0-flash \
+  -a openai-agent/mistralai/Mistral-7B-Instruct-v0.3 \
+  -a gemini \
+  -a claude-code
+```
diff --git a/pkg/toolsets/kubevirt/tests/README.md b/pkg/toolsets/kubevirt/tests/README.md
new file mode 100644
index 00000000..72f7556d
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/README.md
@@ -0,0 +1,214 @@
+# KubeVirt VM Toolset Tests
+
+This directory contains gevals-based tests for the KubeVirt VM toolset in the Kubernetes MCP Server.
+
+## Overview
+
+These tests validate the VM creation and troubleshooting tools (`vm_create` and `vm_troubleshoot`) by having AI agents complete real tasks using the MCP server.
+
+## Test Structure
+
+```
+tests/
+├── README.md                          # This file
+├── mcp-config.yaml                    # MCP server configuration
+├── claude-code/                       # Claude Code agent configuration
+│   ├── agent.yaml
+│   └── eval.yaml
+└── tasks/                             # Test tasks
+    ├── create-vm-basic/               # Basic VM creation test
+    ├── create-vm-with-instancetype/   # VM with specific instancetype
+    ├── create-vm-with-size/           # VM with size parameter
+    ├── create-vm-ubuntu/              # Ubuntu VM creation
+    ├── create-vm-with-performance/    # VM with performance family
+    └── troubleshoot-vm/               # VM troubleshooting test
+```
+
+## Prerequisites
+
+1. **Kubernetes cluster** with KubeVirt installed
+   - The cluster must have KubeVirt CRDs installed
+   - For testing, you can use a Kind cluster with KubeVirt
+
+2. **Kubernetes MCP Server** running at `http://localhost:8888/mcp`
+
+   ```bash
+   # Build and run the server
+   cd /path/to/kubernetes-mcp-server
+   make build
+   ./kubernetes-mcp-server --port 8888
+   ```
+
+3. **gevals binary** built from the gevals project
+
+   ```bash
+   cd /path/to/gevals
+   go build -o gevals ./cmd/gevals
+   ```
+
+4. **Claude Code** installed and in PATH
+
+   ```bash
+   # Install Claude Code (if not already installed)
+   npm install -g @anthropicsdk/claude-code
+   ```
+
+5. **kubectl** configured to access your cluster
+
+## Running the Tests
+
+### Run All Tests
+
+```bash
+# From the gevals directory
+./gevals eval /path/to/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/tests/claude-code/eval.yaml
+```
+
+### Run a Specific Test
+
+```bash
+# Run just the basic VM creation test
+./gevals eval /path/to/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/tests/tasks/create-vm-basic/create-vm-basic.yaml \
+  --agent-file /path/to/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/tests/claude-code/agent.yaml \
+  --mcp-config-file /path/to/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/tests/mcp-config.yaml
+```
+
+## Test Descriptions
+
+### create-vm-basic
+
+**Difficulty:** Easy
+**Description:** Tests basic VM creation with default Fedora workload.
+**Key Tool:** `vm_create`
+**Expected Behavior:** Agent should use `vm_create` to generate a plan and then create the VM using `resources_create_or_update`.
+
+### create-vm-with-instancetype
+
+**Difficulty:** Medium
+**Description:** Tests VM creation with a specific instancetype (u1.medium).
+**Key Tool:** `vm_create`
+**Expected Behavior:** Agent should pass the instancetype parameter to `vm_create` and create a VM with the correct instancetype reference.
+
+### create-vm-with-size
+
+**Difficulty:** Medium
+**Description:** Tests VM creation using a size hint ('large').
+**Key Tool:** `vm_create`
+**Expected Behavior:** Agent should use the size parameter which should map to an appropriate instancetype.
+
+### create-vm-ubuntu
+
+**Difficulty:** Easy
+**Description:** Tests VM creation with Ubuntu workload.
+**Key Tool:** `vm_create`
+**Expected Behavior:** Agent should create a VM using the Ubuntu container disk image.
+
+### create-vm-with-performance
+
+**Difficulty:** Medium
+**Description:** Tests VM creation with performance family ('compute-optimized') and size.
+**Key Tool:** `vm_create`
+**Expected Behavior:** Agent should combine performance and size to select an appropriate instancetype (e.g., c1.medium).
+
+### troubleshoot-vm
+
+**Difficulty:** Easy
+**Description:** Tests VM troubleshooting guide generation.
+**Key Tool:** `vm_troubleshoot`
+**Expected Behavior:** Agent should use `vm_troubleshoot` to generate a troubleshooting guide for the VM.
+
+## Assertions
+
+The tests validate:
+
+- **Tool Usage:** Agents must call `vm_create`, `vm_troubleshoot`, or `resources_*` tools
+- **Call Limits:** Between 1 and 30 tool calls (allows for exploration and creation)
+- **Task Success:** Verification scripts confirm VMs are created correctly
+
+## Expected Results
+
+**✅ Pass** means:
+
+- The VM tools are well-designed and discoverable
+- Tool descriptions are clear to AI agents
+- Schemas are properly structured
+- Implementation works correctly
+
+**❌ Fail** indicates:
+
+- Tool descriptions may need improvement
+- Schema complexity issues
+- Missing functionality
+- Implementation bugs
+
+## Output
+
+Results are saved to `gevals-kubevirt-vm-operations-out.json` with:
+
+- Task pass/fail status
+- Assertion results
+- Tool call history
+- Agent interactions
+
+## Customization
+
+### Using Different AI Agents
+
+You can create additional agent configurations (similar to the `claude-code/` directory) for testing with different AI models:
+
+```yaml
+# Example: openai-agent/agent.yaml
+kind: Agent
+metadata:
+  name: "openai-agent"
+commands:
+  argTemplateMcpServer: "{{ .File }}"
+  runPrompt: |-
+    agent-wrapper.sh {{ .McpServerFileArgs }} "{{ .Prompt }}"
+```
+
+### Adding New Tests
+
+To add a new test task:
+
+1. Create a new directory under `tasks/`
+2. Add task YAML file with prompt
+3. Add setup, verify, and cleanup scripts
+4. The test will be automatically discovered by the glob pattern in `eval.yaml`
+
+## Troubleshooting
+
+### Tests Fail to Connect to MCP Server
+
+Ensure the Kubernetes MCP Server is running:
+
+```bash
+curl http://localhost:8888/mcp/health
+```
+
+### VirtualMachine Not Created
+
+Check if KubeVirt is installed:
+
+```bash
+kubectl get crds | grep kubevirt
+kubectl get pods -n kubevirt
+```
+
+### Permission Issues
+
+Ensure your kubeconfig has permissions to:
+
+- Create namespaces
+- Create VirtualMachine resources
+- List instancetypes and preferences
+
+## Contributing
+
+When adding new tests:
+
+- Keep tasks focused on a single capability
+- Make verification scripts robust
+- Document expected behavior
+- Set appropriate difficulty levels
+- Ensure cleanup scripts remove all resources
diff --git a/pkg/toolsets/kubevirt/tests/claude-code/agent.yaml b/pkg/toolsets/kubevirt/tests/claude-code/agent.yaml
new file mode 100644
index 00000000..20b715c0
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/claude-code/agent.yaml
@@ -0,0 +1,10 @@
+kind: Agent
+metadata:
+  name: "claude-code"
+commands:
+  useVirtualHome: false
+  argTemplateMcpServer: "--mcp-config {{ .File }}"
+  argTemplateAllowedTools: "mcp__{{ .ServerName }}__{{ .ToolName }}"
+  allowedToolsJoinSeparator: ","
+  runPrompt: |-
+    claude {{ .McpServerFileArgs }} --strict-mcp-config --allowedTools "{{ .AllowedToolArgs }}" --print "{{ .Prompt }}"
diff --git a/pkg/toolsets/kubevirt/tests/claude-code/eval.yaml b/pkg/toolsets/kubevirt/tests/claude-code/eval.yaml
new file mode 100644
index 00000000..01478cd6
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/claude-code/eval.yaml
@@ -0,0 +1,14 @@
+kind: Eval
+metadata:
+  name: "kubevirt-vm-operations"
+config:
+  agentFile: agent.yaml
+  mcpConfigFile: ../mcp-config.yaml
+  taskSets:
+    - glob: ../tasks/*/*.yaml
+      assertions:
+        toolsUsed:
+          - server: kubernetes
+            toolPattern: "(vm_create|vm_troubleshoot|resources_.*)"
+        minToolCalls: 1
+        maxToolCalls: 30
diff --git a/pkg/toolsets/kubevirt/tests/gemini/agent.yaml b/pkg/toolsets/kubevirt/tests/gemini/agent.yaml
new file mode 100644
index 00000000..ba6127fc
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/gemini/agent.yaml
@@ -0,0 +1,10 @@
+kind: Agent
+metadata:
+  name: "gemini"
+commands:
+  useVirtualHome: false
+  argTemplateMcpServer: "{{ .File }}"
+  argTemplateAllowedTools: "{{ .ToolName }}"
+  allowedToolsJoinSeparator: ","
+  runPrompt: |-
+    pkg/toolsets/kubevirt/tests/gemini/gemini-agent-wrapper.sh {{ .McpServerFileArgs }} "{{ .AllowedToolArgs }}" "{{ .Prompt }}"
diff --git a/pkg/toolsets/kubevirt/tests/gemini/eval.yaml b/pkg/toolsets/kubevirt/tests/gemini/eval.yaml
new file mode 100644
index 00000000..c56b21d7
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/gemini/eval.yaml
@@ -0,0 +1,14 @@
+kind: Eval
+metadata:
+  name: "gemini-cli-kubernetes-basic-operations"
+config:
+  agentFile: agent.yaml
+  mcpConfigFile: ../mcp-config.yaml
+  taskSets:
+    - glob: ../tasks/*/*.yaml
+      assertions:
+        toolsUsed:
+          - server: kubernetes
+            toolPattern: ".*"
+        minToolCalls: 1
+        maxToolCalls: 20
diff --git a/pkg/toolsets/kubevirt/tests/gemini/gemini-agent-wrapper.sh b/pkg/toolsets/kubevirt/tests/gemini/gemini-agent-wrapper.sh
new file mode 100755
index 00000000..087d5c03
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/gemini/gemini-agent-wrapper.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+# Wrapper script to configure gemini-cli with MCP server from config file
+
+set -e
+
+CONFIG_FILE="$1"
+shift
+ALLOWED_TOOLS="$1"
+shift
+PROMPT="$*"
+
+# Extract URL from MCP config
+URL=$(grep -o '"url"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | head -1 | sed 's/.*"url"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
+
+if [ -z "$URL" ]; then
+    echo "Error: Could not extract URL from config file $CONFIG_FILE" >&2
+    echo "Config contents:" >&2
+    cat "$CONFIG_FILE" >&2
+    exit 1
+fi
+
+# Generate unique server name for this eval run to avoid conflicts
+SERVER_NAME="mcp-eval-$$"
+
+echo "Configuring gemini with MCP server: $URL (as $SERVER_NAME)" >&2
+
+# Add MCP server for this run
+gemini mcp add "$SERVER_NAME" "$URL" --scope project --transport http --trust >/dev/null 2>&1
+
+# Ensure cleanup on exit (success or failure)
+trap "gemini mcp remove '$SERVER_NAME' >/dev/null 2>&1 || true" EXIT
+
+# Run gemini with configured server and allowed tools
+# --approval-mode yolo: Auto-approve all tool calls (required for automated evals)
+# --output-format text: Ensure text output for parsing
+if [ -n "$ALLOWED_TOOLS" ]; then
+    gemini --allowed-mcp-server-names "$SERVER_NAME" \
+           --allowed-tools "$ALLOWED_TOOLS" \
+           --approval-mode yolo \
+           --output-format text \
+           --prompt "$PROMPT"
+else
+    gemini --allowed-mcp-server-names "$SERVER_NAME" \
+           --approval-mode yolo \
+           --output-format text \
+           --prompt "$PROMPT"
+fi
diff --git a/pkg/toolsets/kubevirt/tests/helpers/README.md b/pkg/toolsets/kubevirt/tests/helpers/README.md
new file mode 100644
index 00000000..941d8291
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/helpers/README.md
@@ -0,0 +1,189 @@
+# Test Verification Helpers
+
+This directory contains shared helper functions for VirtualMachine test verification.
+
+## Usage
+
+Source the helper script in your test verification section:
+
+```bash
+#!/usr/bin/env bash
+source "$(dirname "${BASH_SOURCE[0]}")/../../helpers/verify-vm.sh"
+
+# Use helper functions
+verify_vm_exists "test-vm" "vm-test" || exit 1
+verify_container_disk "test-vm" "vm-test" "fedora" || exit 1
+verify_run_strategy "test-vm" "vm-test" || exit 1
+verify_no_deprecated_running_field "test-vm" "vm-test" || exit 1
+```
+
+## Available Functions
+
+### verify_vm_exists
+Waits for a VirtualMachine to be created.
+
+**Usage:** `verify_vm_exists <vm-name> <namespace> [timeout]`
+
+**Example:**
+```bash
+verify_vm_exists "my-vm" "vm-test" "30s" || exit 1
+```
+
+**Default timeout:** 30s
+
+---
+
+### verify_container_disk
+Verifies that a VM uses a specific container disk OS (checks all volumes).
+
+**Usage:** `verify_container_disk <vm-name> <namespace> <os-name>`
+
+**Example:**
+```bash
+verify_container_disk "my-vm" "vm-test" "fedora" || exit 1
+verify_container_disk "ubuntu-vm" "vm-test" "ubuntu" || exit 1
+```
+
+---
+
+### verify_run_strategy
+Verifies that runStrategy is set (checks both spec and status).
+
+**Usage:** `verify_run_strategy <vm-name> <namespace>`
+
+**Example:**
+```bash
+verify_run_strategy "my-vm" "vm-test" || exit 1
+```
+
+**Note:** This function accepts runStrategy in either `spec.runStrategy` or `status.runStrategy` to accommodate VMs created with the deprecated `running` field.
+
+---
+
+### verify_no_deprecated_running_field
+Verifies that the deprecated `running` field is NOT set in the VirtualMachine spec.
+
+**Usage:** `verify_no_deprecated_running_field <vm-name> <namespace>`
+
+**Example:**
+```bash
+verify_no_deprecated_running_field "my-vm" "vm-test" || exit 1
+```
+
+**Note:** The `running` field is deprecated in KubeVirt. VirtualMachines should use `runStrategy` instead. This function ensures compliance with current best practices.
+
+---
+
+### verify_instancetype
+Verifies that a VM has an instancetype reference with optional exact match.
+
+**Usage:** `verify_instancetype <vm-name> <namespace> [expected-instancetype] [expected-kind]`
+
+**Examples:**
+```bash
+# Just verify instancetype exists
+verify_instancetype "my-vm" "vm-test" || exit 1
+
+# Verify specific instancetype
+verify_instancetype "my-vm" "vm-test" "u1.medium" || exit 1
+
+# Verify instancetype and kind
+verify_instancetype "my-vm" "vm-test" "u1.medium" "VirtualMachineClusterInstancetype" || exit 1
+```
+
+**Default kind:** VirtualMachineClusterInstancetype
+
+---
+
+### verify_instancetype_contains
+Verifies that instancetype name contains a substring (e.g., size like "large").
+
+**Usage:** `verify_instancetype_contains <vm-name> <namespace> <substring> [description]`
+
+**Example:**
+```bash
+verify_instancetype_contains "my-vm" "vm-test" "large" "requested size 'large'"
+verify_instancetype_contains "my-vm" "vm-test" "medium"
+```
+
+**Note:** Returns success even if substring not found (prints warning only).
+
+---
+
+### verify_instancetype_prefix
+Verifies that instancetype starts with a specific prefix (e.g., performance family like "c1").
+
+**Usage:** `verify_instancetype_prefix <vm-name> <namespace> <prefix> [description]`
+
+**Example:**
+```bash
+verify_instancetype_prefix "my-vm" "vm-test" "c1" "compute-optimized"
+verify_instancetype_prefix "my-vm" "vm-test" "u1" "general-purpose"
+```
+
+**Note:** Returns success even if prefix doesn't match (prints warning only).
+
+---
+
+### verify_no_direct_resources
+Verifies that VM uses instancetype for resources (no direct memory specification).
+
+**Usage:** `verify_no_direct_resources <vm-name> <namespace>`
+
+**Example:**
+```bash
+verify_no_direct_resources "my-vm" "vm-test"
+```
+
+**Note:** Returns success even if direct resources found (prints warning only).
+
+---
+
+### verify_has_resources_or_instancetype
+Verifies that VM has either an instancetype or direct resource specification.
+
+**Usage:** `verify_has_resources_or_instancetype <vm-name> <namespace>`
+
+**Example:**
+```bash
+verify_has_resources_or_instancetype "my-vm" "vm-test" || exit 1
+```
+
+**Note:** Fails only if neither instancetype nor direct resources are present.
+
+## Design Principles
+
+1. **Flexible matching**: Functions use pattern matching instead of exact volume names to handle different VM creation approaches.
+
+2. **Clear output**: Each function prints clear success (✓) or failure (✗) messages.
+
+3. **Warning vs Error**: Some functions print warnings (⚠) for non-critical mismatches but still return success.
+
+4. **Return codes**: Functions return 0 for success, 1 for failure. Always check return codes with `|| exit 1` for critical validations.
+
+## Example Test Verification
+
+```bash
+#!/usr/bin/env bash
+source "$(dirname "${BASH_SOURCE[0]}")/../../helpers/verify-vm.sh"
+
+# Wait for VM to exist
+verify_vm_exists "test-vm" "vm-test" || exit 1
+
+# Verify container disk
+verify_container_disk "test-vm" "vm-test" "fedora" || exit 1
+
+# Verify runStrategy is used (not deprecated 'running' field)
+verify_run_strategy "test-vm" "vm-test" || exit 1
+verify_no_deprecated_running_field "test-vm" "vm-test" || exit 1
+
+# Verify instancetype with size
+verify_instancetype "test-vm" "vm-test" || exit 1
+verify_instancetype_contains "test-vm" "vm-test" "large"
+
+# Verify no direct resources
+verify_no_direct_resources "test-vm" "vm-test"
+
+echo "All validations passed"
+exit 0
+```
diff --git a/pkg/toolsets/kubevirt/tests/helpers/verify-vm.sh b/pkg/toolsets/kubevirt/tests/helpers/verify-vm.sh
new file mode 100644
index 00000000..0ad3929d
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/helpers/verify-vm.sh
@@ -0,0 +1,220 @@
+#!/usr/bin/env bash
+# Shared verification helper functions for VirtualMachine tests
+
+# verify_vm_exists: Waits for a VirtualMachine to be created
+# Usage: verify_vm_exists <vm-name> <namespace> [timeout]
+verify_vm_exists() {
+    local vm_name="$1"
+    local namespace="$2"
+    local timeout="${3:-30s}"
+
+    if ! kubectl wait --for=jsonpath='{.metadata.name}'="$vm_name" virtualmachine/"$vm_name" -n "$namespace" --timeout="$timeout" 2>/dev/null; then
+        echo "VirtualMachine $vm_name not found in namespace $namespace"
+        kubectl get virtualmachines -n "$namespace"
+        return 1
+    fi
+    echo "VirtualMachine $vm_name created successfully"
+    return 0
+}
+
+# verify_container_disk: Verifies that a VM uses a specific container disk OS
+# Usage: verify_container_disk <vm-name> <namespace> <os-name>
+# Example: verify_container_disk test-vm vm-test fedora
+verify_container_disk() {
+    local vm_name="$1"
+    local namespace="$2"
+    local os_name="$3"
+
+    # Get all container disk images from all volumes
+    local container_disks
+    container_disks=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.template.spec.volumes[*].containerDisk.image}')
+
+    if [[ "$container_disks" =~ $os_name ]]; then
+        echo "✓ VirtualMachine uses $os_name container disk"
+        return 0
+    else
+        echo "✗ Expected $os_name container disk, found volumes with images: $container_disks"
+        kubectl get virtualmachine "$vm_name" -n "$namespace" -o yaml
+        return 1
+    fi
+}
+
+# verify_run_strategy: Verifies that runStrategy is set (in spec or status)
+# Usage: verify_run_strategy <vm-name> <namespace>
+verify_run_strategy() {
+    local vm_name="$1"
+    local namespace="$2"
+
+    local spec_run_strategy
+    local status_run_strategy
+    spec_run_strategy=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.runStrategy}')
+    status_run_strategy=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.status.runStrategy}')
+
+    if [[ -n "$spec_run_strategy" ]]; then
+        echo "✓ VirtualMachine uses runStrategy in spec: $spec_run_strategy"
+        return 0
+    elif [[ -n "$status_run_strategy" ]]; then
+        echo "✓ VirtualMachine has runStrategy in status: $status_run_strategy"
+        echo "  Note: VM may have been created with deprecated 'running' field, but runStrategy is set in status"
+        return 0
+    else
+        echo "✗ VirtualMachine missing runStrategy field in both spec and status"
+        return 1
+    fi
+}
+
+# verify_no_deprecated_running_field: Verifies that deprecated 'running' field is NOT set
+# Usage: verify_no_deprecated_running_field <vm-name> <namespace>
+verify_no_deprecated_running_field() {
+    local vm_name="$1"
+    local namespace="$2"
+
+    local running_field
+    running_field=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.running}')
+
+    if [[ -z "$running_field" ]]; then
+        echo "✓ VirtualMachine does not use deprecated 'running' field"
+        return 0
+    else
+        echo "✗ VirtualMachine uses deprecated 'running' field with value: $running_field"
+        echo "  Please use 'runStrategy' instead of 'running'"
+        kubectl get virtualmachine "$vm_name" -n "$namespace" -o yaml
+        return 1
+    fi
+}
+
+# verify_instancetype: Verifies that a VM has an instancetype reference
+# Usage: verify_instancetype <vm-name> <namespace> [expected-instancetype] [expected-kind]
+verify_instancetype() {
+    local vm_name="$1"
+    local namespace="$2"
+    local expected_instancetype="$3"
+    local expected_kind="${4:-VirtualMachineClusterInstancetype}"
+
+    local instancetype
+    instancetype=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.instancetype.name}')
+
+    if [[ -z "$instancetype" ]]; then
+        echo "✗ VirtualMachine has no instancetype reference"
+        return 1
+    fi
+
+    echo "✓ VirtualMachine has instancetype reference: $instancetype"
+
+    # Check expected instancetype if provided
+    if [[ -n "$expected_instancetype" ]]; then
+        if [[ "$instancetype" == "$expected_instancetype" ]]; then
+            echo "✓ Instancetype matches expected value: $expected_instancetype"
+        else
+            echo "✗ Expected instancetype '$expected_instancetype', found: $instancetype"
+            return 1
+        fi
+    fi
+
+    # Verify instancetype kind
+    local instancetype_kind
+    instancetype_kind=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.instancetype.kind}')
+    if [[ "$instancetype_kind" == "$expected_kind" ]]; then
+        echo "✓ Instancetype kind is $expected_kind"
+    else
+        echo "⚠ Instancetype kind is: $instancetype_kind (expected: $expected_kind)"
+    fi
+
+    return 0
+}
+
+# verify_instancetype_contains: Verifies that instancetype name contains a string
+# Usage: verify_instancetype_contains <vm-name> <namespace> <substring> [description]
+verify_instancetype_contains() {
+    local vm_name="$1"
+    local namespace="$2"
+    local substring="$3"
+    local description="${4:-$substring}"
+
+    local instancetype
+    instancetype=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.instancetype.name}')
+
+    if [[ -z "$instancetype" ]]; then
+        echo "✗ VirtualMachine has no instancetype reference"
+        return 1
+    fi
+
+    if [[ "$instancetype" =~ $substring ]]; then
+        echo "✓ Instancetype matches $description: $instancetype"
+        return 0
+    else
+        echo "⚠ Instancetype '$instancetype' doesn't match $description"
+        return 0  # Return success for warnings
+    fi
+}
+
+# verify_instancetype_prefix: Verifies that instancetype starts with a prefix
+# Usage: verify_instancetype_prefix <vm-name> <namespace> <prefix> [description]
+verify_instancetype_prefix() {
+    local vm_name="$1"
+    local namespace="$2"
+    local prefix="$3"
+    local description="${4:-$prefix}"
+
+    local instancetype
+    instancetype=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.instancetype.name}')
+
+    if [[ -z "$instancetype" ]]; then
+        echo "✗ VirtualMachine has no instancetype reference"
+        return 1
+    fi
+
+    if [[ "$instancetype" =~ ^${prefix}\. ]]; then
+        echo "✓ Instancetype matches $description family: $instancetype"
+        return 0
+    else
+        echo "⚠ Instancetype '$instancetype' doesn't start with '$prefix'"
+        return 0  # Return success for warnings
+    fi
+}
+
+# verify_no_direct_resources: Verifies VM uses instancetype (no direct memory spec)
+# Usage: verify_no_direct_resources <vm-name> <namespace>
+verify_no_direct_resources() {
+    local vm_name="$1"
+    local namespace="$2"
+
+    local guest_memory
+    guest_memory=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.template.spec.domain.memory.guest}')
+
+    if [[ -z "$guest_memory" ]]; then
+        echo "✓ VirtualMachine uses instancetype for resources (no direct memory spec)"
+        return 0
+    else
+        echo "⚠ VirtualMachine has direct memory specification: $guest_memory"
+        return 0  # Return success for warnings
+    fi
+}
+
+# verify_has_resources_or_instancetype: Verifies VM has either instancetype or direct resources
+# Usage: verify_has_resources_or_instancetype <vm-name> <namespace>
+verify_has_resources_or_instancetype() {
+    local vm_name="$1"
+    local namespace="$2"
+
+    local instancetype
+    instancetype=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.instancetype.name}')
+
+    if [[ -n "$instancetype" ]]; then
+        echo "✓ VirtualMachine has instancetype reference: $instancetype"
+        return 0
+    fi
+
+    # Check for direct resource specification
+    local guest_memory
+    guest_memory=$(kubectl get virtualmachine "$vm_name" -n "$namespace" -o jsonpath='{.spec.template.spec.domain.memory.guest}')
+
+    if [[ -n "$guest_memory" ]]; then
+        echo "⚠ No instancetype set, but VM has direct memory specification: $guest_memory"
+        return 0
+    else
+        echo "✗ VirtualMachine has no instancetype and no direct resource specification"
+        kubectl get virtualmachine "$vm_name" -n "$namespace" -o yaml
+        return 1
+    fi
+}
diff --git a/pkg/toolsets/kubevirt/tests/mcp-config.yaml b/pkg/toolsets/kubevirt/tests/mcp-config.yaml
new file mode 100644
index 00000000..f79b279a
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/mcp-config.yaml
@@ -0,0 +1,5 @@
+mcpServers:
+  kubernetes:
+    type: http
+    url: http://localhost:8888/mcp
+    enableAllTools: true
diff --git a/pkg/toolsets/kubevirt/tests/model-configs.sh b/pkg/toolsets/kubevirt/tests/model-configs.sh
new file mode 100755
index 00000000..7534cb5a
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/model-configs.sh
@@ -0,0 +1,313 @@
+#!/usr/bin/env bash
+# Configuration file for model credentials and settings
+# This file sources API keys and base URLs from gnome-keyring using secret-tool
+# Each model has its own individual base URL and API key stored separately
+#
+# This script is designed to work with ANY model name - no predefined list required.
+# Just provide the model name when running the script, and it will look up the
+# corresponding secrets from gnome-keyring.
+
+# Function to retrieve secrets from gnome-keyring
+get_secret() {
+    local service="$1"
+    local account="$2"
+    secret-tool lookup service "$service" account "$account" 2>/dev/null
+}
+
+# Function to normalize model name to a safe service name
+# Converts model name to lowercase and replaces special chars with hyphens
+normalize_model_name() {
+    local model_name="$1"
+    echo "$model_name" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9.-]/-/g'
+}
+
+# Function to get model configuration from secrets
+# Each model stores its own base-url and api-key in gnome-keyring
+get_model_config() {
+    local model_name="$1"
+    local service_name=$(normalize_model_name "$model_name")
+
+    # Get model-specific secrets
+    local api_key=$(get_secret "model-$service_name" "api-key")
+    local base_url=$(get_secret "model-$service_name" "base-url")
+    local model_id=$(get_secret "model-$service_name" "model-id")
+
+    # Validate that we have required values
+    if [ -z "$api_key" ]; then
+        echo "Error: API key not found for model $model_name (service: model-$service_name)" >&2
+        echo "Error: Store it with: secret-tool store --label='$model_name API Key' service model-$service_name account api-key" >&2
+        return 1
+    fi
+
+    if [ -z "$base_url" ]; then
+        echo "Error: Base URL not found for model $model_name (service: model-$service_name)" >&2
+        echo "Error: Store it with: secret-tool store --label='$model_name Base URL' service model-$service_name account base-url" >&2
+        return 1
+    fi
+
+    # Use stored model-id if available, otherwise use the original model name
+    if [ -z "$model_id" ]; then
+        model_id="$model_name"
+    fi
+
+    echo "MODEL_BASE_URL=$base_url"
+    echo "MODEL_KEY=$api_key"
+    echo "MODEL_NAME=$model_id"
+}
+
+# Function to check if a base URL is OpenAI-compatible
+# Tests both /models and /chat/completions endpoints with the provided API key
+check_openai_compatibility() {
+    local base_url="$1"
+    local api_key="$2"
+    local model_name="$3"
+
+    # Remove trailing slash from base_url if present
+    base_url="${base_url%/}"
+
+    local has_error=false
+
+    # Check /models endpoint
+    local models_url="${base_url}/models"
+    echo "  Testing GET ${models_url}" >&2
+    local models_code
+    models_code=$(curl -s -o /dev/null -w "%{http_code}" \
+        -X GET "${models_url}" \
+        -H "Authorization: Bearer ${api_key}" \
+        -H "Content-Type: application/json" \
+        --max-time 10 \
+        2>/dev/null)
+
+    if [ "$models_code" = "200" ]; then
+        echo "    ✓ Endpoint accessible (HTTP 200)" >&2
+    elif [ "$models_code" = "401" ]; then
+        echo "    ✗ Authentication failed (HTTP 401)" >&2
+        has_error=true
+    elif [ "$models_code" = "404" ]; then
+        echo "    ⚠ Endpoint not found (HTTP 404)" >&2
+    elif [ -z "$models_code" ]; then
+        echo "    ✗ Could not connect to endpoint" >&2
+        has_error=true
+    else
+        echo "    ⚠ Returned HTTP $models_code" >&2
+    fi
+
+    # Check /chat/completions endpoint with a minimal test request
+    local chat_url="${base_url}/chat/completions"
+    echo "  Testing POST ${chat_url}" >&2
+    local chat_code
+    local chat_response
+    chat_response=$(mktemp)
+    chat_code=$(curl -s -w "%{http_code}" -o "$chat_response" \
+        -X POST "${chat_url}" \
+        -H "Authorization: Bearer ${api_key}" \
+        -H "Content-Type: application/json" \
+        -d "{\"model\":\"$model_name\",\"messages\":[{\"role\":\"user\",\"content\":\"test\"}],\"max_tokens\":1}" \
+        --max-time 10 \
+        2>/dev/null)
+
+    if [ "$chat_code" = "200" ]; then
+        echo "    ✓ Endpoint accessible (HTTP 200)" >&2
+    elif [ "$chat_code" = "401" ]; then
+        echo "    ✗ Authentication failed (HTTP 401)" >&2
+        has_error=true
+    elif [ "$chat_code" = "404" ]; then
+        echo "    ✗ Endpoint not found (HTTP 404)" >&2
+        has_error=true
+    elif [ "$chat_code" = "400" ]; then
+        # 400 might be acceptable - could be invalid model name or request format
+        echo "    ⚠ Returned HTTP 400 (check model name)" >&2
+        # Check if response contains model-not-found type error
+        if grep -qi "model.*not.*found\|invalid.*model" "$chat_response" 2>/dev/null; then
+            echo "    ⚠ Model '$model_name' may not exist at this endpoint" >&2
+        fi
+    elif [ -z "$chat_code" ]; then
+        echo "    ✗ Could not connect to endpoint" >&2
+        has_error=true
+    else
+        echo "    ⚠ Returned HTTP $chat_code" >&2
+    fi
+
+    rm -f "$chat_response"
+
+    # Check /completions endpoint (legacy text completion)
+    local completions_url="${base_url}/completions"
+    echo "  Testing POST ${completions_url}" >&2
+    local completions_code
+    local completions_response
+    completions_response=$(mktemp)
+    completions_code=$(curl -s -w "%{http_code}" -o "$completions_response" \
+        -X POST "${completions_url}" \
+        -H "Authorization: Bearer ${api_key}" \
+        -H "Content-Type: application/json" \
+        -d "{\"model\":\"$model_name\",\"prompt\":\"test\",\"max_tokens\":1}" \
+        --max-time 10 \
+        2>/dev/null)
+
+    if [ "$completions_code" = "200" ]; then
+        echo "    ✓ Endpoint accessible (HTTP 200)" >&2
+    elif [ "$completions_code" = "401" ]; then
+        echo "    ✗ Authentication failed (HTTP 401)" >&2
+        has_error=true
+    elif [ "$completions_code" = "404" ]; then
+        echo "    ⚠ Endpoint not found (HTTP 404) - not all providers support legacy completions" >&2
+    elif [ "$completions_code" = "400" ]; then
+        echo "    ⚠ Returned HTTP 400 - may not support this endpoint or model" >&2
+    elif [ -z "$completions_code" ]; then
+        echo "    ✗ Could not connect to endpoint" >&2
+        has_error=true
+    else
+        echo "    ⚠ Returned HTTP $completions_code" >&2
+    fi
+
+    rm -f "$completions_response"
+
+    # Check /embeddings endpoint
+    local embeddings_url="${base_url}/embeddings"
+    echo "  Testing POST ${embeddings_url}" >&2
+    local embeddings_code
+    local embeddings_response
+    embeddings_response=$(mktemp)
+    embeddings_code=$(curl -s -w "%{http_code}" -o "$embeddings_response" \
+        -X POST "${embeddings_url}" \
+        -H "Authorization: Bearer ${api_key}" \
+        -H "Content-Type: application/json" \
+        -d "{\"model\":\"$model_name\",\"input\":\"test\"}" \
+        --max-time 10 \
+        2>/dev/null)
+
+    if [ "$embeddings_code" = "200" ]; then
+        echo "    ✓ Endpoint accessible (HTTP 200)" >&2
+    elif [ "$embeddings_code" = "401" ]; then
+        echo "    ✗ Authentication failed (HTTP 401)" >&2
+        has_error=true
+    elif [ "$embeddings_code" = "404" ]; then
+        echo "    ⚠ Endpoint not found (HTTP 404) - may not support embeddings" >&2
+    elif [ "$embeddings_code" = "400" ]; then
+        echo "    ⚠ Returned HTTP 400 - may not be an embeddings model" >&2
+    elif [ -z "$embeddings_code" ]; then
+        echo "    ✗ Could not connect to endpoint" >&2
+        has_error=true
+    else
+        echo "    ⚠ Returned HTTP $embeddings_code" >&2
+    fi
+
+    rm -f "$embeddings_response"
+
+    # Check /moderations endpoint
+    local moderations_url="${base_url}/moderations"
+    echo "  Testing POST ${moderations_url}" >&2
+    local moderations_code
+    moderations_code=$(curl -s -o /dev/null -w "%{http_code}" \
+        -X POST "${moderations_url}" \
+        -H "Authorization: Bearer ${api_key}" \
+        -H "Content-Type: application/json" \
+        -d "{\"input\":\"test\"}" \
+        --max-time 10 \
+        2>/dev/null)
+
+    if [ "$moderations_code" = "200" ]; then
+        echo "    ✓ Endpoint accessible (HTTP 200)" >&2
+    elif [ "$moderations_code" = "401" ]; then
+        echo "    ✗ Authentication failed (HTTP 401)" >&2
+        has_error=true
+    elif [ "$moderations_code" = "404" ]; then
+        echo "    ⚠ Endpoint not found (HTTP 404) - may not support moderations" >&2
+    elif [ -z "$moderations_code" ]; then
+        echo "    ✗ Could not connect to endpoint" >&2
+        has_error=true
+    else
+        echo "    ⚠ Returned HTTP $moderations_code" >&2
+    fi
+
+    if [ "$has_error" = true ]; then
+        return 1
+    else
+        echo "  ✓ API endpoint validation complete" >&2
+        return 0
+    fi
+}
+
+# Function to validate secrets for specific models
+# Usage: validate_model_secrets [--check-api] "model1" "model2" ...
+validate_model_secrets() {
+    local check_api=false
+    local models=()
+
+    # Parse arguments
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --check-api)
+                check_api=true
+                shift
+                ;;
+            *)
+                models+=("$1")
+                shift
+                ;;
+        esac
+    done
+
+    local errors=0
+
+    if [ ${#models[@]} -eq 0 ]; then
+        echo "No models specified for validation" >&2
+        return 0
+    fi
+
+    echo "Validating secrets for ${#models[@]} model(s)..." >&2
+    if [ "$check_api" = true ]; then
+        echo "API endpoint connectivity check: ENABLED" >&2
+    fi
+    echo "" >&2
+
+    for model_name in "${models[@]}"; do
+        local service_name=$(normalize_model_name "$model_name")
+        local api_key=$(get_secret "model-$service_name" "api-key")
+        local base_url=$(get_secret "model-$service_name" "base-url")
+        local model_id=$(get_secret "model-$service_name" "model-id")
+
+        if [ -z "$api_key" ] && [ -z "$base_url" ]; then
+            echo "ERROR: Model '$model_name' is missing both API key and base URL" >&2
+            echo "  Service name: model-$service_name" >&2
+            ((errors++))
+        elif [ -z "$api_key" ]; then
+            echo "ERROR: Model '$model_name' is missing API key" >&2
+            echo "  Service name: model-$service_name" >&2
+            ((errors++))
+        elif [ -z "$base_url" ]; then
+            echo "ERROR: Model '$model_name' is missing base URL" >&2
+            echo "  Service name: model-$service_name" >&2
+            ((errors++))
+        else
+            echo "OK: Model '$model_name' has API key and base URL configured" >&2
+            if [ -z "$model_id" ]; then
+                echo "  Note: No custom model-id set, will use '$model_name'" >&2
+            else
+                echo "  Custom model-id: $model_id" >&2
+            fi
+
+            # Check API endpoint if requested
+            if [ "$check_api" = true ]; then
+                if ! check_openai_compatibility "$base_url" "$api_key" "$model_name"; then
+                    ((errors++))
+                fi
+            fi
+        fi
+        echo "" >&2
+    done
+
+    if [ $errors -gt 0 ]; then
+        echo "Found $errors error(s). Please configure missing secrets." >&2
+        return 1
+    else
+        echo "All specified models are properly configured!" >&2
+        return 0
+    fi
+}
+
+# Export the functions for use in other scripts
+export -f get_model_config
+export -f validate_model_secrets
+export -f check_openai_compatibility
+export -f normalize_model_name
diff --git a/pkg/toolsets/kubevirt/tests/openai-agent/agent-wrapper.sh b/pkg/toolsets/kubevirt/tests/openai-agent/agent-wrapper.sh
new file mode 100755
index 00000000..5c986e52
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/openai-agent/agent-wrapper.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Wrapper script to extract MCP server URL from config file and call agent
+
+CONFIG_FILE="$1"
+shift
+PROMPT="$*"
+
+# Extract the first server URL from the JSON config file
+# Using grep and sed to parse JSON (simple approach)
+URL=$(grep -o '"url"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | head -1 | sed 's/.*"url"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
+
+if [ -z "$URL" ]; then
+	echo "Error: Could not extract URL from config file $CONFIG_FILE"
+	echo "Config contents:"
+	cat "$CONFIG_FILE"
+	exit 1
+fi
+
+echo "Using MCP server URL: $URL"
+agent --mcp-url "$URL" --prompt "$PROMPT"
diff --git a/pkg/toolsets/kubevirt/tests/openai-agent/agent.yaml b/pkg/toolsets/kubevirt/tests/openai-agent/agent.yaml
new file mode 100644
index 00000000..a0894e2d
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/openai-agent/agent.yaml
@@ -0,0 +1,8 @@
+kind: Agent
+metadata:
+  name: "openai-agent"
+commands:
+  useVirtualHome: false
+  argTemplateMcpServer: "{{ .File }}"
+  runPrompt: |-
+    pkg/toolsets/kubevirt/tests/openai-agent/agent-wrapper.sh {{ .McpServerFileArgs }} "{{ .Prompt }}"
diff --git a/pkg/toolsets/kubevirt/tests/openai-agent/eval.yaml b/pkg/toolsets/kubevirt/tests/openai-agent/eval.yaml
new file mode 100644
index 00000000..f38aee02
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/openai-agent/eval.yaml
@@ -0,0 +1,14 @@
+kind: Eval
+metadata:
+  name: "openai-kubevirt-vm-operations"
+config:
+  agentFile: agent.yaml
+  mcpConfigFile: ../mcp-config.yaml
+  taskSets:
+    - glob: ../tasks/*/*.yaml
+      assertions:
+        toolsUsed:
+          - server: kubernetes
+            toolPattern: "(vm_create|vm_troubleshoot|resources_.*)"
+        minToolCalls: 1
+        maxToolCalls: 30
diff --git a/pkg/toolsets/kubevirt/tests/run-agent-model-evals.sh b/pkg/toolsets/kubevirt/tests/run-agent-model-evals.sh
new file mode 100755
index 00000000..d8e7963e
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/run-agent-model-evals.sh
@@ -0,0 +1,728 @@
+#!/usr/bin/env bash
+# Script to run gevals against agent and model combinations
+# Usage: ./run-agent-model-evals.sh -a AGENT/MODEL [-a AGENT2/MODEL2 ...] [options]
+#
+# This script works with ANY agent type and model name combination.
+# Just specify the agent/model pairs you want to evaluate.
+
+set -euo pipefail
+
+# Get the directory where this script is located
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
+
+# Source the model configuration
+source "$SCRIPT_DIR/model-configs.sh"
+
+# Default values
+OUTPUT_DIR="$SCRIPT_DIR/results"
+OUTPUT_PREFIX=""
+VERBOSE=false
+DRY_RUN=false
+VALIDATE_KEYS_ONLY=false
+CHECK_API=false
+PARALLEL=false
+MAX_PARALLEL_JOBS=0
+AGENT_MODEL_COMBINATIONS=()
+
+# Available agent types
+AVAILABLE_AGENTS=("openai-agent" "gemini" "claude-code")
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -h|--help)
+            cat <<EOF
+Usage: $0 -a AGENT[/MODEL] [-a AGENT2[/MODEL2] ...] [options]
+
+Run gevals against one or more agent and model combinations.
+
+This script works with ANY agent type and model name combination.
+Specify agent/model pairs using the format: -a agent-type/model-name
+For agents with pre-configured models (gemini, claude-code), the model is optional: -a agent-type
+
+Available agent types: ${AVAILABLE_AGENTS[*]}
+
+Options:
+  -h, --help                Show this help message
+  -a, --agent AGENT[/MODEL] Specify an agent or agent/model combination (can be used multiple times)
+                            Format: agent-type or agent-type/model-name
+  -o, --output-dir DIR      Directory to store results (default: ./results)
+  -p, --prefix PREFIX       Prefix for output files (default: none)
+  -v, --verbose             Enable verbose output
+  --dry-run                 Print commands without executing them
+  --validate-secrets        Only validate that model secrets are available
+  --check-api               Validate API endpoints are OpenAI-compatible (with --validate-secrets)
+  --parallel                Run evaluations in parallel (each gets unique namespace)
+  -j, --jobs N              Maximum number of parallel jobs (default: number of combinations)
+
+Examples:
+  # Run evaluation with agent+model combination (openai-agent requires model)
+  $0 -a openai-agent/gemini-2.0-flash
+
+  # Run evaluation with agent only (for agents with pre-configured models)
+  $0 -a gemini -a claude-code
+
+  # Run evaluations for multiple combinations
+  $0 -a openai-agent/gemini-2.0-flash -a openai-agent/claude-sonnet-4@20250514
+
+  # Test one model across multiple agents (mix of agent-only and agent/model)
+  $0 -a openai-agent/gemini-2.0-flash -a gemini -a claude-code
+
+  # Run with custom output prefix
+  $0 -a openai-agent/gemini-2.0-flash -p "experiment-1"
+
+  # Run multiple combinations in parallel (each gets unique namespace)
+  $0 -a openai-agent/gemini-2.0-flash -a gemini -a claude-code --parallel
+
+  # Limit parallel jobs to 2 at a time
+  $0 -a openai-agent/gemini-2.0-flash -a gemini --parallel -j 2
+
+  # Validate secrets for models used in combinations
+  $0 -a openai-agent/gemini-2.0-flash -a openai-agent/claude-sonnet-4@20250514 --validate-secrets
+
+  # Validate secrets AND check API endpoint connectivity
+  $0 -a openai-agent/gemini-2.0-flash --validate-secrets --check-api
+EOF
+            exit 0
+            ;;
+        -a|--agent)
+            AGENT_MODEL_COMBINATIONS+=("$2")
+            shift 2
+            ;;
+        -o|--output-dir)
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        -p|--prefix)
+            OUTPUT_PREFIX="$2"
+            shift 2
+            ;;
+        -v|--verbose)
+            VERBOSE=true
+            shift
+            ;;
+        --dry-run)
+            DRY_RUN=true
+            shift
+            ;;
+        --validate-secrets|--validate-keys)
+            VALIDATE_KEYS_ONLY=true
+            shift
+            ;;
+        --check-api)
+            CHECK_API=true
+            shift
+            ;;
+        --parallel)
+            PARALLEL=true
+            shift
+            ;;
+        -j|--jobs)
+            MAX_PARALLEL_JOBS="$2"
+            shift 2
+            ;;
+        *)
+            echo "Error: Unknown option $1" >&2
+            echo "Run '$0 --help' for usage information" >&2
+            exit 1
+            ;;
+    esac
+done
+
+# Check if at least one combination was specified
+if [ ${#AGENT_MODEL_COMBINATIONS[@]} -eq 0 ]; then
+    echo "Error: No agent/model combinations specified. Use -a to specify at least one." >&2
+    echo "Example: $0 -a openai-agent/gemini-2.0-flash" >&2
+    echo "Run '$0 --help' for usage information" >&2
+    exit 1
+fi
+
+# Parse and validate combinations
+declare -a AGENTS
+declare -a MODELS
+declare -a UNIQUE_MODELS
+
+for combination in "${AGENT_MODEL_COMBINATIONS[@]}"; do
+    # Split on '/' to get agent and model (model is optional)
+    if [[ "$combination" =~ / ]]; then
+        # Format: agent/model
+        agent="${combination%%/*}"
+        model="${combination#*/}"
+    else
+        # Format: agent (no model specified)
+        agent="$combination"
+        model=""
+    fi
+
+    # Validate agent type exists
+    if [[ ! " ${AVAILABLE_AGENTS[*]} " =~ " ${agent} " ]]; then
+        echo "Error: Unknown agent type '$agent' in combination '$combination'" >&2
+        echo "Available agents: ${AVAILABLE_AGENTS[*]}" >&2
+        exit 1
+    fi
+
+    # Store the pair
+    AGENTS+=("$agent")
+    MODELS+=("$model")
+
+    # Build unique models list for validation (only if model is specified)
+    if [ -n "$model" ]; then
+        # Check if model is already in the list
+        found=false
+        for existing in "${UNIQUE_MODELS[@]+"${UNIQUE_MODELS[@]}"}"; do
+            if [ "$existing" = "$model" ]; then
+                found=true
+                break
+            fi
+        done
+        if [ "$found" = false ]; then
+            UNIQUE_MODELS+=("$model")
+        fi
+    fi
+done
+
+# Validate model secrets (only if any models were specified)
+# Use a nounset-safe check for array length
+set +u
+unique_model_count=${#UNIQUE_MODELS[@]}
+set -u
+if [ "$unique_model_count" -gt 0 ]; then
+    # Build validation command with optional --check-api flag
+    if [ "$CHECK_API" = true ]; then
+        validate_cmd=(validate_model_secrets --check-api "${UNIQUE_MODELS[@]}")
+    else
+        validate_cmd=(validate_model_secrets "${UNIQUE_MODELS[@]}")
+    fi
+
+    if ! "${validate_cmd[@]}"; then
+        echo ""
+        echo "Some model secrets are missing from gnome-keyring."
+        echo "Each model requires both an api-key and a base-url to be stored."
+        echo ""
+        echo "Example: To configure a model, determine its normalized service name:"
+        echo "  source model-configs.sh"
+        echo "  normalize_model_name \"your-model-name\""
+        echo ""
+        echo "Then store the secrets using the service name:"
+        echo "  secret-tool store --label='Model API Key' service model-{normalized-name} account api-key"
+        echo "  secret-tool store --label='Model Base URL' service model-{normalized-name} account base-url"
+        echo ""
+        echo "See EVAL_README.md for detailed setup instructions."
+        echo ""
+        if [ "$VALIDATE_KEYS_ONLY" = true ]; then
+            exit 1
+        fi
+    fi
+else
+    echo "Note: No models specified for validation (agents without models specified)" >&2
+fi
+
+if [ "$VALIDATE_KEYS_ONLY" = true ]; then
+    exit 0
+fi
+
+# Create output directory
+mkdir -p "$OUTPUT_DIR"
+
+# Log file for the entire run
+RUN_LOG="$OUTPUT_DIR/gevals-run-$(date +%Y%m%d-%H%M%S).log"
+echo "Logging to: $RUN_LOG"
+
+# Record start time for filtering results later
+RUN_START_TIME=$(date +%s)
+
+# Summary variables
+TOTAL_COMBINATIONS=${#AGENT_MODEL_COMBINATIONS[@]}
+SUCCESS_COUNT=0
+FAILURE_COUNT=0
+SKIPPED_COUNT=0
+
+echo "========================================" | tee -a "$RUN_LOG"
+echo "Starting evaluation run" | tee -a "$RUN_LOG"
+echo "Date: $(date)" | tee -a "$RUN_LOG"
+echo "Combinations: ${AGENT_MODEL_COMBINATIONS[*]}" | tee -a "$RUN_LOG"
+echo "Total combinations: $TOTAL_COMBINATIONS" | tee -a "$RUN_LOG"
+echo "Output directory: $OUTPUT_DIR" | tee -a "$RUN_LOG"
+echo "========================================" | tee -a "$RUN_LOG"
+echo "" | tee -a "$RUN_LOG"
+
+# Function to get the eval name from eval.yaml
+get_eval_name() {
+    local agent_type="$1"
+    local eval_file="$SCRIPT_DIR/$agent_type/eval.yaml"
+
+    if [ ! -f "$eval_file" ]; then
+        echo "ERROR: eval.yaml not found at $eval_file" >&2
+        return 1
+    fi
+
+    # Extract the name from the metadata section
+    local eval_name=$(grep -A 1 "^metadata:" "$eval_file" | grep "name:" | sed 's/.*name: *"\?\([^"]*\)"\?.*/\1/')
+
+    if [ -z "$eval_name" ]; then
+        echo "ERROR: Could not extract eval name from $eval_file" >&2
+        return 1
+    fi
+
+    echo "$eval_name"
+}
+
+# Function to run evaluation for a single agent+model combination
+run_eval() {
+    local agent_type="$1"
+    local model_name="$2"
+    local eval_namespace="${3:-vm-test}"  # Default to vm-test if not provided
+    local agent_slug=$(echo "$agent_type" | sed 's/[^a-zA-Z0-9._-]/_/g')
+    local timestamp=$(date +%Y%m%d-%H%M%S)
+
+    # Build log prefix for this combination (timestamp will be added per message)
+    local log_prefix
+    if [ -n "$model_name" ]; then
+        log_prefix="[$agent_type/$model_name]"
+    else
+        log_prefix="[$agent_type]"
+    fi
+
+    # Helper function to print with timestamp
+    log_msg() {
+        local timestamp=$(date '+%H:%M:%S')
+        echo "[$timestamp] $log_prefix $1"
+    }
+
+    # Build filename based on whether model is specified
+    local result_file
+    if [ -n "$model_name" ]; then
+        local model_slug=$(echo "$model_name" | sed 's/[^a-zA-Z0-9._-]/_/g')
+        result_file="$OUTPUT_DIR/gevals-${agent_slug}-${model_slug}-${timestamp}.log"
+    else
+        result_file="$OUTPUT_DIR/gevals-${agent_slug}-${timestamp}.log"
+    fi
+
+    log_msg "Result file: $result_file" | tee -a "$RUN_LOG"
+
+    # Get model configuration (only if model is specified)
+    local model_base_url model_key model_name_value
+    if [ -n "$model_name" ]; then
+        local config_output
+        if ! config_output=$(get_model_config "$model_name"); then
+            log_msg "ERROR: Failed to get configuration for $model_name" | tee -a "$RUN_LOG"
+            echo "ERROR: Failed to get configuration for $model_name" >> "$result_file"
+            log_msg "Skipping..." | tee -a "$RUN_LOG"
+            ((SKIPPED_COUNT++))
+            return 1
+        fi
+
+        # Parse configuration
+        while IFS='=' read -r key value; do
+            case "$key" in
+                MODEL_BASE_URL) model_base_url="$value" ;;
+                MODEL_KEY) model_key="$value" ;;
+                MODEL_NAME) model_name_value="$value" ;;
+            esac
+        done <<< "$config_output"
+
+        # Validate that we have all required values
+        if [ -z "$model_base_url" ] || [ -z "$model_key" ] || [ -z "$model_name_value" ]; then
+            log_msg "ERROR: Missing required configuration for $model_name" | tee -a "$RUN_LOG"
+            echo "ERROR: Missing required configuration for $model_name" >> "$result_file"
+            log_msg "Skipping..." | tee -a "$RUN_LOG"
+            ((SKIPPED_COUNT++))
+            return 1
+        fi
+
+        if [ -z "$model_key" ] || [ "$model_key" = "null" ]; then
+            log_msg "ERROR: API key not available for $model_name" | tee -a "$RUN_LOG"
+            echo "ERROR: API key not available for $model_name" >> "$result_file"
+            log_msg "Skipping..." | tee -a "$RUN_LOG"
+            ((SKIPPED_COUNT++))
+            return 1
+        fi
+    fi
+
+    # Get eval name for this agent
+    local eval_name
+    if ! eval_name=$(get_eval_name "$agent_type"); then
+        log_msg "ERROR: Failed to get eval name for $agent_type" | tee -a "$RUN_LOG"
+        echo "ERROR: Failed to get eval name for $agent_type" >> "$result_file"
+        log_msg "Skipping..." | tee -a "$RUN_LOG"
+        ((SKIPPED_COUNT++))
+        return 1
+    fi
+
+    # Construct the command
+    local cmd=(
+        "gevals" "run"
+        "$SCRIPT_DIR/$agent_type/eval.yaml"
+    )
+
+    # Export namespace environment variable
+    export EVAL_NAMESPACE="$eval_namespace"
+
+    # Export environment variables for this model (only if model is specified)
+    if [ -n "$model_name" ]; then
+        export MODEL_BASE_URL="$model_base_url"
+        export MODEL_KEY="$model_key"
+        export MODEL_NAME="$model_name_value"
+
+        if [ "$VERBOSE" = true ]; then
+            log_msg "Environment:" | tee -a "$RUN_LOG"
+            log_msg "  EVAL_NAMESPACE=$EVAL_NAMESPACE" | tee -a "$RUN_LOG"
+            log_msg "  MODEL_BASE_URL=$MODEL_BASE_URL" | tee -a "$RUN_LOG"
+            log_msg "  MODEL_NAME=$MODEL_NAME" | tee -a "$RUN_LOG"
+            log_msg "  MODEL_KEY=***" | tee -a "$RUN_LOG"
+        fi
+    else
+        # Clear MODEL_* variables if previously set
+        unset MODEL_BASE_URL MODEL_KEY MODEL_NAME
+
+        if [ "$VERBOSE" = true ]; then
+            log_msg "Environment:" | tee -a "$RUN_LOG"
+            log_msg "  EVAL_NAMESPACE=$EVAL_NAMESPACE" | tee -a "$RUN_LOG"
+            log_msg "  (using agent-configured model)" | tee -a "$RUN_LOG"
+        fi
+    fi
+
+    log_msg "Command: ${cmd[*]}" | tee -a "$RUN_LOG"
+
+    if [ "$DRY_RUN" = true ]; then
+        log_msg "[DRY RUN] Would execute command" | tee -a "$RUN_LOG"
+        return 0
+    fi
+
+    # Run the evaluation
+    local start_time=$(date +%s)
+    local start_time_human=$(date)
+    log_msg "Starting evaluation at $start_time_human..." | tee -a "$RUN_LOG"
+    echo "Starting evaluation at $start_time_human..." >> "$result_file"
+    echo "" >> "$result_file"
+
+    if cd "$PROJECT_ROOT" && "${cmd[@]}" >> "$result_file" 2>&1; then
+        local end_time=$(date +%s)
+        local duration=$((end_time - start_time))
+        local minutes=$((duration / 60))
+        local seconds=$((duration % 60))
+
+        echo "" >> "$result_file"
+
+        # Check for JSON output file (uses eval name from eval.yaml)
+        local default_json="gevals-${eval_name}-out.json"
+        local default_view_log="gevals-${eval_name}-out.log"
+
+        # Check test results in JSON file
+        local test_failed=false
+        if [ -f "$default_json" ]; then
+            # Check if any tasks failed or had errors
+            # Look for "status": "error" or "status": "fail" in the JSON
+            if grep -q '"status"[[:space:]]*:[[:space:]]*"\(error\|fail\)"' "$default_json"; then
+                test_failed=true
+            fi
+        fi
+
+        if [ "$test_failed" = true ]; then
+            log_msg "FAILURE: Tests failed" | tee -a "$RUN_LOG"
+            echo "FAILURE: Tests failed" >> "$result_file"
+        else
+            log_msg "SUCCESS: All tests passed" | tee -a "$RUN_LOG"
+            echo "SUCCESS: All tests passed" >> "$result_file"
+        fi
+        log_msg "Duration: ${minutes}m ${seconds}s (${duration}s total)" | tee -a "$RUN_LOG"
+        echo "Duration: ${minutes}m ${seconds}s (${duration}s total)" >> "$result_file"
+
+        # Process and move gevals output files
+        local results_dir="$SCRIPT_DIR/results"
+        mkdir -p "$results_dir"
+
+        if [ -f "$default_json" ]; then
+            # Generate view output from JSON using gevals view
+            log_msg "Generating view output from JSON..." | tee -a "$RUN_LOG"
+            echo "Generating view output from JSON..." >> "$result_file"
+            if gevals view "$default_json" > "$default_view_log" 2>&1; then
+                log_msg "View output generation successful" | tee -a "$RUN_LOG"
+                echo "View output generation successful" >> "$result_file"
+            else
+                log_msg "Warning: Failed to generate view output from JSON" | tee -a "$RUN_LOG"
+                echo "Warning: Failed to generate view output from JSON" >> "$result_file"
+            fi
+
+            # Move and rename JSON output file
+            # Build the output filename with optional prefix, agent type, model (if provided), and timestamp
+            local filename_base="gevals-"
+            if [ -n "$OUTPUT_PREFIX" ]; then
+                filename_base="${filename_base}${OUTPUT_PREFIX}-"
+            fi
+            filename_base="${filename_base}${agent_slug}-"
+            if [ -n "$model_name" ]; then
+                filename_base="${filename_base}${model_slug}-"
+            fi
+            filename_base="${filename_base}${timestamp}-out"
+
+            # Move JSON file
+            if [ -f "$default_json" ]; then
+                local new_json="$results_dir/${filename_base}.json"
+                mv "$default_json" "$new_json"
+                log_msg "Moved output file to: $new_json" | tee -a "$RUN_LOG"
+                echo "Moved output file to: $new_json" >> "$result_file"
+            fi
+
+            # Move view log file
+            if [ -f "$default_view_log" ]; then
+                local new_view_log="$results_dir/${filename_base}.log"
+                mv "$default_view_log" "$new_view_log"
+                log_msg "Moved view output to: $new_view_log" | tee -a "$RUN_LOG"
+                echo "Moved view output to: $new_view_log" >> "$result_file"
+            fi
+        else
+            log_msg "Warning: JSON output file not found at $default_json" | tee -a "$RUN_LOG"
+            echo "Warning: JSON output file not found at $default_json" >> "$result_file"
+        fi
+
+        # Update counters based on test results
+        if [ "$test_failed" = true ]; then
+            ((FAILURE_COUNT++))
+            return 1
+        else
+            ((SUCCESS_COUNT++))
+            return 0
+        fi
+    else
+        local exit_code=$?
+        local end_time=$(date +%s)
+        local duration=$((end_time - start_time))
+        local minutes=$((duration / 60))
+        local seconds=$((duration % 60))
+
+        log_msg "FAILURE: Evaluation failed (exit code: $exit_code)" | tee -a "$RUN_LOG"
+        echo "FAILURE: Evaluation failed (exit code: $exit_code)" >> "$result_file"
+        log_msg "Duration: ${minutes}m ${seconds}s (${duration}s total)" | tee -a "$RUN_LOG"
+        echo "Duration: ${minutes}m ${seconds}s (${duration}s total)" >> "$result_file"
+        ((FAILURE_COUNT++))
+        return 1
+    fi
+}
+
+# Function to generate unique namespace
+generate_unique_namespace() {
+    local agent_type="$1"
+    local model_name="$2"
+
+    # Sanitize agent and model names for use in namespace (lowercase, replace special chars with hyphens)
+    local agent_slug=$(echo "$agent_type" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g' | sed 's/--*/-/g' | sed 's/^-//' | sed 's/-$//')
+
+    local namespace_base
+    if [ -n "$model_name" ]; then
+        local model_slug=$(echo "$model_name" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g' | sed 's/--*/-/g' | sed 's/^-//' | sed 's/-$//')
+        namespace_base="${agent_slug}-${model_slug}"
+    else
+        namespace_base="${agent_slug}"
+    fi
+
+    # Add timestamp and random suffix for uniqueness
+    # Format: YYYYMMDD-HHMMSS-XXXXXXXX (15 + 1 + 8 = 24 chars)
+    local timestamp=$(date +%Y%m%d-%H%M%S)
+    local random_suffix=$(head -c 4 /dev/urandom | xxd -p)
+    local suffix="${timestamp}-${random_suffix}"
+
+    # Kubernetes namespace limit is 63 characters
+    # We need space for: base + "-" + suffix (24 chars)
+    # So base can be at most: 63 - 1 - 24 = 38 chars
+    local max_base_length=38
+    if [ ${#namespace_base} -gt $max_base_length ]; then
+        namespace_base="${namespace_base:0:$max_base_length}"
+        # Remove trailing hyphen if we cut in the middle
+        namespace_base="${namespace_base%-}"
+    fi
+
+    echo "${namespace_base}-${suffix}"
+}
+
+# Run evaluations for all agent+model combinations
+if [ "$PARALLEL" = true ]; then
+    echo "Running evaluations in parallel..." | tee -a "$RUN_LOG"
+
+    # Determine max parallel jobs
+    if [ "$MAX_PARALLEL_JOBS" -eq 0 ]; then
+        MAX_PARALLEL_JOBS=$TOTAL_COMBINATIONS
+    fi
+
+    # Arrays to track background jobs
+    declare -a PIDS
+    declare -a NAMESPACES
+    declare -a JOB_AGENTS
+    declare -a JOB_MODELS
+    declare -a JOB_START_TIMES
+
+    # Launch evaluations
+    for i in "${!AGENTS[@]}"; do
+        # Wait if we've hit the max parallel jobs
+        set +u
+        num_pids=${#PIDS[@]}
+        set -u
+        while [ "$num_pids" -ge "$MAX_PARALLEL_JOBS" ]; do
+            # Check if any job has completed
+            for j in "${!PIDS[@]}"; do
+                if ! kill -0 "${PIDS[$j]}" 2>/dev/null; then
+                    # Job completed, wait for it
+                    wait "${PIDS[$j]}"
+
+                    # Remove from arrays
+                    unset PIDS[$j]
+                    unset NAMESPACES[$j]
+                    unset JOB_AGENTS[$j]
+                    unset JOB_MODELS[$j]
+                    unset JOB_START_TIMES[$j]
+
+                    # Reindex arrays
+                    PIDS=("${PIDS[@]}")
+                    NAMESPACES=("${NAMESPACES[@]}")
+                    JOB_AGENTS=("${JOB_AGENTS[@]}")
+                    JOB_MODELS=("${JOB_MODELS[@]}")
+                    JOB_START_TIMES=("${JOB_START_TIMES[@]}")
+
+                    # Update count
+                    set +u
+                    num_pids=${#PIDS[@]}
+                    set -u
+                    break
+                fi
+            done
+            sleep 0.1
+        done
+
+        # Generate unique namespace for this eval
+        unique_ns=$(generate_unique_namespace "${AGENTS[$i]}" "${MODELS[$i]}")
+
+        # Build prefix for this combination
+        if [ -n "${MODELS[$i]}" ]; then
+            combo_prefix="[${AGENTS[$i]}/${MODELS[$i]}]"
+        else
+            combo_prefix="[${AGENTS[$i]}]"
+        fi
+
+        timestamp=$(date '+%H:%M:%S')
+        echo "[$timestamp] $combo_prefix Starting with namespace $unique_ns..." | tee -a "$RUN_LOG"
+
+        # Run in background
+        job_start_time=$(date +%s)
+        run_eval "${AGENTS[$i]}" "${MODELS[$i]}" "$unique_ns" &
+        pid=$!
+
+        # Store job info
+        PIDS+=($pid)
+        NAMESPACES+=("$unique_ns")
+        JOB_AGENTS+=("${AGENTS[$i]}")
+        JOB_MODELS+=("${MODELS[$i]}")
+        JOB_START_TIMES+=($job_start_time)
+    done
+
+    # Wait for all remaining jobs to complete
+    set +u
+    remaining_pids=${#PIDS[@]}
+    set -u
+    if [ "$remaining_pids" -gt 0 ]; then
+        echo "Waiting for $remaining_pids remaining job(s) to complete..." | tee -a "$RUN_LOG"
+        # Wait for all background jobs
+        wait
+    else
+        echo "All jobs completed" | tee -a "$RUN_LOG"
+    fi
+else
+    # Sequential execution
+    for i in "${!AGENTS[@]}"; do
+        # Generate unique namespace even for sequential execution
+        unique_ns=$(generate_unique_namespace "${AGENTS[$i]}" "${MODELS[$i]}")
+        echo "Using namespace: $unique_ns" | tee -a "$RUN_LOG"
+
+        run_eval "${AGENTS[$i]}" "${MODELS[$i]}" "$unique_ns" || true
+        echo "" | tee -a "$RUN_LOG"
+    done
+fi
+
+# Calculate final results by checking all JSON output files from this run
+echo "" | tee -a "$RUN_LOG"
+echo "Calculating final results from test outputs..." | tee -a "$RUN_LOG"
+
+# Reset counters (they may be incorrect due to parallel execution in subshells)
+ACTUAL_SUCCESS_COUNT=0
+ACTUAL_FAILURE_COUNT=0
+
+# Arrays to track which combinations succeeded/failed
+declare -a SUCCESSFUL_COMBINATIONS
+declare -a FAILED_COMBINATIONS
+
+# Find all JSON files generated during this run in the results directory
+results_dir="$SCRIPT_DIR/results"
+if [ -d "$results_dir" ]; then
+    # Process each JSON file created during this run
+    for json_file in "$results_dir"/gevals-*-out.json; do
+        if [ -f "$json_file" ]; then
+            # Check if file was created during this run (modified after RUN_START_TIME)
+            file_mtime=$(stat -c %Y "$json_file" 2>/dev/null || stat -f %m "$json_file" 2>/dev/null || echo 0)
+            if [ "$file_mtime" -ge "$RUN_START_TIME" ]; then
+                # Extract combination name from filename (remove gevals- prefix, -TIMESTAMP-out.json suffix)
+                combination=$(basename "$json_file" | sed 's/^gevals-//' | sed 's/-[0-9]\{8\}-[0-9]\{6\}-out\.json$//')
+
+                # Check if any tasks failed or had errors
+                if grep -q '"status"[[:space:]]*:[[:space:]]*"\(error\|fail\)"' "$json_file"; then
+                    ((ACTUAL_FAILURE_COUNT++))
+                    FAILED_COMBINATIONS+=("$combination")
+                else
+                    ((ACTUAL_SUCCESS_COUNT++))
+                    SUCCESSFUL_COMBINATIONS+=("$combination")
+                fi
+            fi
+        fi
+    done
+fi
+
+# Use the actual counts from JSON files
+SUCCESS_COUNT=$ACTUAL_SUCCESS_COUNT
+FAILURE_COUNT=$ACTUAL_FAILURE_COUNT
+# SKIPPED_COUNT is still accurate from the main process
+# (only incremented when we skip before running gevals)
+
+# Create results summary JSON file
+RESULTS_JSON="$results_dir/results-$(date +%Y%m%d-%H%M%S).json"
+cat > "$RESULTS_JSON" <<EOF
+{
+  "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
+  "total_combinations": $TOTAL_COMBINATIONS,
+  "successful": $SUCCESS_COUNT,
+  "failed": $FAILURE_COUNT,
+  "skipped": $SKIPPED_COUNT,
+  "successful_combinations": [
+$(for combo in "${SUCCESSFUL_COMBINATIONS[@]}"; do echo "    \"$combo\","; done | sed '$ s/,$//')
+  ],
+  "failed_combinations": [
+$(for combo in "${FAILED_COMBINATIONS[@]}"; do echo "    \"$combo\","; done | sed '$ s/,$//')
+  ]
+}
+EOF
+
+echo "Results summary saved to: $RESULTS_JSON" | tee -a "$RUN_LOG"
+echo "" | tee -a "$RUN_LOG"
+
+# Print summary
+echo "========================================" | tee -a "$RUN_LOG"
+echo "Evaluation run complete" | tee -a "$RUN_LOG"
+echo "Date: $(date)" | tee -a "$RUN_LOG"
+echo "Total combinations: $TOTAL_COMBINATIONS" | tee -a "$RUN_LOG"
+echo "Successful: $SUCCESS_COUNT" | tee -a "$RUN_LOG"
+echo "Failed: $FAILURE_COUNT" | tee -a "$RUN_LOG"
+echo "Skipped: $SKIPPED_COUNT" | tee -a "$RUN_LOG"
+
+# Show which combinations failed
+if [ $FAILURE_COUNT -gt 0 ]; then
+    echo "" | tee -a "$RUN_LOG"
+    echo "Failed combinations:" | tee -a "$RUN_LOG"
+    for combo in "${FAILED_COMBINATIONS[@]}"; do
+        echo "  - $combo" | tee -a "$RUN_LOG"
+    done
+fi
+
+echo "========================================" | tee -a "$RUN_LOG"
+
+# Exit with error if any evaluations failed
+if [ $FAILURE_COUNT -gt 0 ]; then
+    exit 1
+fi
diff --git a/pkg/toolsets/kubevirt/tests/tasks/create-vm-basic/create-vm-basic.yaml b/pkg/toolsets/kubevirt/tests/tasks/create-vm-basic/create-vm-basic.yaml
new file mode 100644
index 00000000..0dd6e971
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/tasks/create-vm-basic/create-vm-basic.yaml
@@ -0,0 +1,37 @@
+kind: Task
+metadata:
+  name: "create-basic-vm"
+  difficulty: easy
+steps:
+  setup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete namespace "$NS" --ignore-not-found
+      kubectl create namespace "$NS"
+  verify:
+    inline: |-
+      #!/usr/bin/env bash
+      source pkg/toolsets/kubevirt/tests/helpers/verify-vm.sh
+      NS="${EVAL_NAMESPACE:-vm-test}"
+
+      # Wait for VirtualMachine to be created
+      verify_vm_exists "test-vm" "$NS" || exit 1
+
+      # Verify container disk is Fedora
+      verify_container_disk "test-vm" "$NS" "fedora" || exit 1
+
+      # Verify runStrategy is set and deprecated 'running' field is not used
+      verify_run_strategy "test-vm" "$NS" || exit 1
+      verify_no_deprecated_running_field "test-vm" "$NS" || exit 1
+
+      echo "All validations passed"
+      exit 0
+  cleanup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete virtualmachine test-vm -n "$NS" --ignore-not-found
+      kubectl delete namespace "$NS" --ignore-not-found
+  prompt:
+    inline: Please create a Fedora virtual machine named test-vm in the ${EVAL_NAMESPACE:-vm-test} namespace.
diff --git a/pkg/toolsets/kubevirt/tests/tasks/create-vm-ubuntu/create-vm-ubuntu.yaml b/pkg/toolsets/kubevirt/tests/tasks/create-vm-ubuntu/create-vm-ubuntu.yaml
new file mode 100644
index 00000000..5415fa5a
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/tasks/create-vm-ubuntu/create-vm-ubuntu.yaml
@@ -0,0 +1,37 @@
+kind: Task
+metadata:
+  name: "create-ubuntu-vm"
+  difficulty: easy
+steps:
+  setup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete namespace "$NS" --ignore-not-found
+      kubectl create namespace "$NS"
+  verify:
+    inline: |-
+      #!/usr/bin/env bash
+      source pkg/toolsets/kubevirt/tests/helpers/verify-vm.sh
+      NS="${EVAL_NAMESPACE:-vm-test}"
+
+      # Wait for VirtualMachine to be created
+      verify_vm_exists "ubuntu-vm" "$NS" || exit 1
+
+      # Verify container disk is Ubuntu
+      verify_container_disk "ubuntu-vm" "$NS" "ubuntu" || exit 1
+
+      # Verify runStrategy is set and deprecated 'running' field is not used
+      verify_run_strategy "ubuntu-vm" "$NS" || exit 1
+      verify_no_deprecated_running_field "ubuntu-vm" "$NS" || exit 1
+
+      echo "All validations passed"
+      exit 0
+  cleanup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete virtualmachine ubuntu-vm -n "$NS" --ignore-not-found
+      kubectl delete namespace "$NS" --ignore-not-found
+  prompt:
+    inline: Create an Ubuntu virtual machine named ubuntu-vm in the ${EVAL_NAMESPACE:-vm-test} namespace.
diff --git a/pkg/toolsets/kubevirt/tests/tasks/create-vm-with-instancetype/create-vm-with-instancetype.yaml b/pkg/toolsets/kubevirt/tests/tasks/create-vm-with-instancetype/create-vm-with-instancetype.yaml
new file mode 100644
index 00000000..d20eeae8
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/tasks/create-vm-with-instancetype/create-vm-with-instancetype.yaml
@@ -0,0 +1,43 @@
+kind: Task
+metadata:
+  name: "create-vm-with-instancetype"
+  difficulty: medium
+steps:
+  setup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete namespace "$NS" --ignore-not-found
+      kubectl create namespace "$NS"
+  verify:
+    inline: |-
+      #!/usr/bin/env bash
+      source pkg/toolsets/kubevirt/tests/helpers/verify-vm.sh
+      NS="${EVAL_NAMESPACE:-vm-test}"
+
+      # Wait for VirtualMachine to be created
+      verify_vm_exists "test-vm-instancetype" "$NS" || exit 1
+
+      # Verify that it has the specific instancetype reference (u1.medium)
+      verify_instancetype "test-vm-instancetype" "$NS" "u1.medium" || exit 1
+
+      # Verify runStrategy is set and deprecated 'running' field is not used
+      verify_run_strategy "test-vm-instancetype" "$NS" || exit 1
+      verify_no_deprecated_running_field "test-vm-instancetype" "$NS" || exit 1
+
+      # Verify no direct resource specification (should use instancetype)
+      verify_no_direct_resources "test-vm-instancetype" "$NS"
+
+      # Verify container disk is Fedora (default workload)
+      verify_container_disk "test-vm-instancetype" "$NS" "fedora"
+
+      echo "All validations passed"
+      exit 0
+  cleanup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete virtualmachine test-vm-instancetype -n "$NS" --ignore-not-found
+      kubectl delete namespace "$NS" --ignore-not-found
+  prompt:
+    inline: Create a Fedora virtual machine named test-vm-instancetype in the ${EVAL_NAMESPACE:-vm-test} namespace with instancetype 'u1.medium'. Use the vm_create tool to generate the creation plan and then create the VirtualMachine resource.
diff --git a/pkg/toolsets/kubevirt/tests/tasks/create-vm-with-performance/create-vm-with-performance.yaml b/pkg/toolsets/kubevirt/tests/tasks/create-vm-with-performance/create-vm-with-performance.yaml
new file mode 100644
index 00000000..fcf243bf
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/tasks/create-vm-with-performance/create-vm-with-performance.yaml
@@ -0,0 +1,49 @@
+kind: Task
+metadata:
+  name: "create-vm-with-performance"
+  difficulty: medium
+steps:
+  setup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete namespace "$NS" --ignore-not-found
+      kubectl create namespace "$NS"
+  verify:
+    inline: |-
+      #!/usr/bin/env bash
+      source pkg/toolsets/kubevirt/tests/helpers/verify-vm.sh
+      NS="${EVAL_NAMESPACE:-vm-test}"
+
+      # Wait for VirtualMachine to be created
+      verify_vm_exists "test-vm-performance" "$NS" || exit 1
+
+      # Verify that it has an instancetype reference
+      verify_instancetype "test-vm-performance" "$NS" || exit 1
+
+      # Check if instancetype matches compute-optimized family (c1)
+      verify_instancetype_prefix "test-vm-performance" "$NS" "c1" "compute-optimized"
+
+      # Check if instancetype contains 'medium' (matching the requested size)
+      verify_instancetype_contains "test-vm-performance" "$NS" "medium" "requested size 'medium'"
+
+      # Verify runStrategy is set and deprecated 'running' field is not used
+      verify_run_strategy "test-vm-performance" "$NS" || exit 1
+      verify_no_deprecated_running_field "test-vm-performance" "$NS" || exit 1
+
+      # Verify no direct resource specification (should use instancetype)
+      verify_no_direct_resources "test-vm-performance" "$NS"
+
+      # Verify container disk is Fedora (default workload)
+      verify_container_disk "test-vm-performance" "$NS" "fedora"
+
+      echo "All validations passed"
+      exit 0
+  cleanup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete virtualmachine test-vm-performance -n "$NS" --ignore-not-found
+      kubectl delete namespace "$NS" --ignore-not-found
+  prompt:
+    inline: Create a Fedora virtual machine named test-vm-performance in the ${EVAL_NAMESPACE:-vm-test} namespace with performance family 'compute-optimized' and size 'medium'. Use the vm_create tool to generate the creation plan and then create the VirtualMachine resource.
diff --git a/pkg/toolsets/kubevirt/tests/tasks/create-vm-with-size/create-vm-with-size.yaml b/pkg/toolsets/kubevirt/tests/tasks/create-vm-with-size/create-vm-with-size.yaml
new file mode 100644
index 00000000..81b1ca04
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/tasks/create-vm-with-size/create-vm-with-size.yaml
@@ -0,0 +1,43 @@
+kind: Task
+metadata:
+  name: "create-vm-with-size"
+  difficulty: medium
+steps:
+  setup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete namespace "$NS" --ignore-not-found
+      kubectl create namespace "$NS"
+  verify:
+    inline: |-
+      #!/usr/bin/env bash
+      source pkg/toolsets/kubevirt/tests/helpers/verify-vm.sh
+      NS="${EVAL_NAMESPACE:-vm-test}"
+
+      # Wait for VirtualMachine to be created
+      verify_vm_exists "test-vm-size" "$NS" || exit 1
+
+      # Verify that it has an instancetype or direct resources
+      verify_has_resources_or_instancetype "test-vm-size" "$NS" || exit 1
+
+      # Check if instancetype contains 'large' (matching the requested size)
+      verify_instancetype_contains "test-vm-size" "$NS" "large" "requested size 'large'"
+
+      # Verify runStrategy is set and deprecated 'running' field is not used
+      verify_run_strategy "test-vm-size" "$NS" || exit 1
+      verify_no_deprecated_running_field "test-vm-size" "$NS" || exit 1
+
+      # Verify container disk is Fedora (default workload)
+      verify_container_disk "test-vm-size" "$NS" "fedora"
+
+      echo "All validations passed"
+      exit 0
+  cleanup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete virtualmachine test-vm-size -n "$NS" --ignore-not-found
+      kubectl delete namespace "$NS" --ignore-not-found
+  prompt:
+    inline: Create a Fedora virtual machine named test-vm-size in the ${EVAL_NAMESPACE:-vm-test} namespace with size 'large'. Use the vm_create tool to generate the creation plan and then create the VirtualMachine resource.
diff --git a/pkg/toolsets/kubevirt/tests/tasks/troubleshoot-vm/troubleshoot-vm.yaml b/pkg/toolsets/kubevirt/tests/tasks/troubleshoot-vm/troubleshoot-vm.yaml
new file mode 100644
index 00000000..0e3c1e78
--- /dev/null
+++ b/pkg/toolsets/kubevirt/tests/tasks/troubleshoot-vm/troubleshoot-vm.yaml
@@ -0,0 +1,60 @@
+kind: Task
+metadata:
+  name: "troubleshoot-vm"
+  difficulty: easy
+steps:
+  setup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete namespace "$NS" --ignore-not-found
+      kubectl create namespace "$NS"
+
+      # Create a VirtualMachine that can be troubleshot
+      cat <<EOF | kubectl apply -f -
+      apiVersion: kubevirt.io/v1
+      kind: VirtualMachine
+      metadata:
+        name: broken-vm
+        namespace: $NS
+      spec:
+        runStrategy: Halted
+        template:
+          spec:
+            domain:
+              devices:
+                disks:
+                  - name: containerdisk
+                    disk:
+                      bus: virtio
+              resources:
+                requests:
+                  memory: 1Gi
+            volumes:
+              - name: containerdisk
+                containerDisk:
+                  image: quay.io/containerdisks/fedora:latest
+      EOF
+  verify:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      # This test just verifies that the vm_troubleshoot tool was called
+      # The actual verification will be done via assertions in the eval
+
+      # Just check that the VM still exists
+      if kubectl get virtualmachine broken-vm -n "$NS" &>/dev/null; then
+          echo "VirtualMachine broken-vm exists and troubleshooting was performed"
+          exit 0
+      else
+          echo "VirtualMachine broken-vm not found"
+          exit 1
+      fi
+  cleanup:
+    inline: |-
+      #!/usr/bin/env bash
+      NS="${EVAL_NAMESPACE:-vm-test}"
+      kubectl delete virtualmachine broken-vm -n "$NS" --ignore-not-found
+      kubectl delete namespace "$NS" --ignore-not-found
+  prompt:
+    inline: A VirtualMachine named broken-vm in the ${EVAL_NAMESPACE:-vm-test} namespace is having issues.
diff --git a/pkg/toolsets/kubevirt/toolset.go b/pkg/toolsets/kubevirt/toolset.go
new file mode 100644
index 00000000..41257960
--- /dev/null
+++ b/pkg/toolsets/kubevirt/toolset.go
@@ -0,0 +1,38 @@
+package kubevirt
+
+import (
+	"slices"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	internalk8s "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
+	"github.com/containers/kubernetes-mcp-server/pkg/toolsets"
+	vm_create "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/create"
+	vm_start "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/start"
+	vm_stop "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/stop"
+	vm_troubleshoot "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/troubleshoot"
+)
+
+type Toolset struct{}
+
+var _ api.Toolset = (*Toolset)(nil)
+
+func (t *Toolset) GetName() string {
+	return "kubevirt"
+}
+
+func (t *Toolset) GetDescription() string {
+	return "KubeVirt virtual machine management tools"
+}
+
+func (t *Toolset) GetTools(o internalk8s.Openshift) []api.ServerTool {
+	return slices.Concat(
+		vm_create.Tools(),
+		vm_start.Tools(),
+		vm_stop.Tools(),
+		vm_troubleshoot.Tools(),
+	)
+}
+
+func init() {
+	toolsets.Register(&Toolset{})
+}
diff --git a/pkg/toolsets/kubevirt/vm/create/plan.tmpl b/pkg/toolsets/kubevirt/vm/create/plan.tmpl
new file mode 100644
index 00000000..758b0ee0
--- /dev/null
+++ b/pkg/toolsets/kubevirt/vm/create/plan.tmpl
@@ -0,0 +1,99 @@
+# VirtualMachine Creation Plan
+
+**IMPORTANT**: Always use `runStrategy` instead of the deprecated `running` field when creating VirtualMachines.
+
+Use the `resources_create_or_update` tool with the following YAML:
+
+```yaml
+apiVersion: kubevirt.io/v1
+kind: VirtualMachine
+metadata:
+  name: {{.Name}}
+  namespace: {{.Namespace}}
+spec:
+  runStrategy: Halted
+{{- if .Instancetype}}
+  instancetype:
+    name: {{.Instancetype}}
+    kind: VirtualMachineClusterInstancetype
+{{- end}}
+{{- if .Preference}}
+  preference:
+    name: {{.Preference}}
+    kind: VirtualMachineClusterPreference
+{{- end}}
+{{- if .UseDataSource}}
+  dataVolumeTemplates:
+  - metadata:
+      name: {{.Name}}-rootdisk
+    spec:
+      sourceRef:
+        kind: DataSource
+        name: {{.DataSourceName}}
+        namespace: {{.DataSourceNamespace}}
+      storage:
+        resources:
+          requests:
+            storage: 30Gi
+{{- end}}
+  template:
+    spec:
+      domain:
+        devices:
+          disks:
+          - name: {{.Name}}-rootdisk
+{{- if not .Instancetype}}
+        memory:
+          guest: 2Gi
+{{- end}}
+      volumes:
+      - name: {{.Name}}-rootdisk
+{{- if .UseDataSource}}
+        dataVolume:
+          name: {{.Name}}-rootdisk
+{{- else}}
+        containerDisk:
+          image: {{.ContainerDisk}}
+{{- end}}
+```
+
+## Run Strategy Options
+
+The VM is created with `runStrategy: Halted` (stopped state). You can modify the `runStrategy` field to control the VM's execution:
+
+- **`Halted`** - VM is stopped and will not run
+- **`Always`** - VM should always be running (restarts automatically)
+- **`RerunOnFailure`** - Restart the VM only if it fails
+- **`Manual`** - Manual start/stop control via `virtctl start/stop`
+- **`Once`** - Run the VM once, then stop when it terminates
+
+To start the VM after creation, change `runStrategy: Halted` to `runStrategy: Always` or use the Manual strategy and start it with virtctl.
+
+## Verification
+
+After creating the VirtualMachine, verify it was created successfully:
+
+Use the `resources_get` tool:
+- **apiVersion**: `kubevirt.io/v1`
+- **kind**: `VirtualMachine`
+- **namespace**: `{{.Namespace}}`
+- **name**: `{{.Name}}`
+
+Check the resource details for any warnings or errors in the status conditions.
+
+## Troubleshooting
+
+If the VirtualMachine fails to create or start:
+
+1. **Check the VM resource details and events**:
+   - Use `resources_get` tool with apiVersion `kubevirt.io/v1`, kind `VirtualMachine`, namespace `{{.Namespace}}`, name `{{.Name}}`
+   - Look for error messages in the status conditions
+
+2. **Verify instance type exists** (if specified):
+   - Use `resources_get` tool with apiVersion `instancetype.kubevirt.io/v1beta1`, kind `VirtualMachineClusterInstancetype`, name `{{.Instancetype}}`
+
+3. **Verify preference exists** (if specified):
+   - Use `resources_get` tool with apiVersion `instancetype.kubevirt.io/v1beta1`, kind `VirtualMachineClusterPreference`, name `{{.Preference}}`
+
+4. **Check KubeVirt installation**:
+   - Use `pods_list` tool with namespace `kubevirt`
diff --git a/pkg/toolsets/kubevirt/vm/create/tool.go b/pkg/toolsets/kubevirt/vm/create/tool.go
new file mode 100644
index 00000000..c7ea46e0
--- /dev/null
+++ b/pkg/toolsets/kubevirt/vm/create/tool.go
@@ -0,0 +1,822 @@
+package create
+
+import (
+	_ "embed"
+	"fmt"
+	"strings"
+	"text/template"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	"github.com/containers/kubernetes-mcp-server/pkg/output"
+	"github.com/google/jsonschema-go/jsonschema"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/dynamic"
+	"k8s.io/utils/ptr"
+)
+
+const (
+	defaultInstancetypeLabel = "instancetype.kubevirt.io/default-instancetype"
+	defaultPreferenceLabel   = "instancetype.kubevirt.io/default-preference"
+)
+
+//go:embed vm.yaml.tmpl
+var vmYamlTemplate string
+
+func Tools() []api.ServerTool {
+	return []api.ServerTool{
+		{
+			Tool: api.Tool{
+				Name:        "vm_create",
+				Description: "Create a VirtualMachine in the cluster with the specified configuration, automatically resolving instance types, preferences, and container disk images. VM will be created in Halted state by default; use autostart parameter to start it immediately.",
+				InputSchema: &jsonschema.Schema{
+					Type: "object",
+					Properties: map[string]*jsonschema.Schema{
+						"namespace": {
+							Type:        "string",
+							Description: "The namespace for the virtual machine",
+						},
+						"name": {
+							Type:        "string",
+							Description: "The name of the virtual machine",
+						},
+						"workload": {
+							Type:        "string",
+							Description: "The workload for the VM. Accepts OS names (e.g., 'fedora' (default), 'ubuntu', 'centos', 'centos-stream', 'debian', 'rhel', 'opensuse', 'opensuse-tumbleweed', 'opensuse-leap') or full container disk image URLs",
+							Examples:    []interface{}{"fedora", "ubuntu", "centos", "debian", "rhel", "quay.io/containerdisks/fedora:latest"},
+						},
+						"instancetype": {
+							Type:        "string",
+							Description: "Optional instance type name for the VM (e.g., 'u1.small', 'u1.medium', 'u1.large')",
+						},
+						"preference": {
+							Type:        "string",
+							Description: "Optional preference name for the VM",
+						},
+						"size": {
+							Type:        "string",
+							Description: "Optional workload size hint for the VM (e.g., 'small', 'medium', 'large', 'xlarge'). Used to auto-select an appropriate instance type if not explicitly specified.",
+							Examples:    []interface{}{"small", "medium", "large"},
+						},
+						"performance": {
+							Type:        "string",
+							Description: "Optional performance family hint for the VM instance type (e.g., 'u1' for general-purpose, 'o1' for overcommitted, 'c1' for compute-optimized, 'm1' for memory-optimized). Defaults to 'u1' (general-purpose) if not specified.",
+							Examples:    []interface{}{"general-purpose", "overcommitted", "compute-optimized", "memory-optimized"},
+						},
+						"autostart": {
+							Type:        "boolean",
+							Description: "Optional flag to automatically start the VM after creation (sets runStrategy to Always instead of Halted). Defaults to false.",
+						},
+					},
+					Required: []string{"namespace", "name"},
+				},
+				Annotations: api.ToolAnnotations{
+					Title:           "Virtual Machine: Create",
+					ReadOnlyHint:    ptr.To(false),
+					DestructiveHint: ptr.To(true),
+					IdempotentHint:  ptr.To(true),
+					OpenWorldHint:   ptr.To(false),
+				},
+			},
+			Handler: create,
+		},
+	}
+}
+
+type vmParams struct {
+	Namespace           string
+	Name                string
+	ContainerDisk       string
+	Instancetype        string
+	Preference          string
+	UseDataSource       bool
+	DataSourceName      string
+	DataSourceNamespace string
+	RunStrategy         string
+}
+
+type DataSourceInfo struct {
+	Name                string
+	Namespace           string
+	Source              string
+	DefaultInstancetype string
+	DefaultPreference   string
+}
+
+type PreferenceInfo struct {
+	Name string
+}
+
+type InstancetypeInfo struct {
+	Name   string
+	Labels map[string]string
+}
+
+func create(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+	// Parse and validate input parameters
+	createParams, err := parseCreateParameters(params)
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	// Search for available DataSources
+	dataSources, _ := searchDataSources(params, createParams.Workload)
+
+	// Match DataSource based on workload input
+	matchedDataSource := matchDataSource(dataSources, createParams.Workload)
+
+	// Resolve preference from DataSource defaults or cluster resources
+	preference := resolvePreference(params, createParams.Preference, matchedDataSource, createParams.Workload, createParams.Namespace)
+
+	// Resolve instancetype from DataSource defaults or size/performance hints
+	instancetype := resolveInstancetype(params, createParams, matchedDataSource)
+
+	// Build template parameters from resolved resources
+	templateParams := buildTemplateParams(createParams, matchedDataSource, instancetype, preference)
+
+	// Render the VM YAML
+	vmYaml, err := renderVMYaml(templateParams)
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	// Create the VM in the cluster
+	resources, err := params.ResourcesCreateOrUpdate(params, vmYaml)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to create VirtualMachine: %w", err)), nil
+	}
+
+	// Format the output
+	marshalledYaml, err := output.MarshalYaml(resources)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to marshal created VirtualMachine: %w", err)), nil
+	}
+
+	return api.NewToolCallResult("# VirtualMachine created successfully\n"+marshalledYaml, nil), nil
+}
+
+// createParameters holds parsed input parameters for VM creation
+type createParameters struct {
+	Namespace    string
+	Name         string
+	Workload     string
+	Instancetype string
+	Preference   string
+	Size         string
+	Performance  string
+	Autostart    bool
+}
+
+// parseCreateParameters parses and validates input parameters
+func parseCreateParameters(params api.ToolHandlerParams) (*createParameters, error) {
+	namespace, err := getRequiredString(params, "namespace")
+	if err != nil {
+		return nil, err
+	}
+
+	name, err := getRequiredString(params, "name")
+	if err != nil {
+		return nil, err
+	}
+
+	workload := getOptionalString(params, "workload")
+	if workload == "" {
+		workload = "fedora" // Default to fedora if not specified
+	}
+
+	performance := normalizePerformance(getOptionalString(params, "performance"))
+	autostart := getOptionalBool(params, "autostart")
+
+	return &createParameters{
+		Namespace:    namespace,
+		Name:         name,
+		Workload:     workload,
+		Instancetype: getOptionalString(params, "instancetype"),
+		Preference:   getOptionalString(params, "preference"),
+		Size:         getOptionalString(params, "size"),
+		Performance:  performance,
+		Autostart:    autostart,
+	}, nil
+}
+
+// matchDataSource finds a DataSource that matches the workload input
+func matchDataSource(dataSources []DataSourceInfo, workload string) *DataSourceInfo {
+	normalizedInput := strings.ToLower(strings.TrimSpace(workload))
+
+	// First try exact match
+	for i := range dataSources {
+		ds := &dataSources[i]
+		if strings.EqualFold(ds.Name, normalizedInput) || strings.EqualFold(ds.Name, workload) {
+			return ds
+		}
+	}
+
+	// If no exact match, try partial matching (e.g., "rhel" matches "rhel9")
+	// Only match against real DataSources with namespaces, not built-in containerdisks
+	for i := range dataSources {
+		ds := &dataSources[i]
+		// Only do partial matching for real DataSources (those with namespaces)
+		if ds.Namespace != "" && strings.Contains(strings.ToLower(ds.Name), normalizedInput) {
+			return ds
+		}
+	}
+
+	return nil
+}
+
+// resolvePreference determines the preference to use from DataSource defaults or cluster resources
+func resolvePreference(params api.ToolHandlerParams, explicitPreference string, matchedDataSource *DataSourceInfo, workload string, namespace string) string {
+	// Use explicitly specified preference if provided
+	if explicitPreference != "" {
+		return explicitPreference
+	}
+
+	// Use DataSource default preference if available
+	if matchedDataSource != nil && matchedDataSource.DefaultPreference != "" {
+		return matchedDataSource.DefaultPreference
+	}
+
+	// Try to match preference name against the workload input
+	preferences := searchPreferences(params, namespace)
+	normalizedInput := strings.ToLower(strings.TrimSpace(workload))
+
+	for i := range preferences {
+		pref := &preferences[i]
+		// Common patterns: "fedora", "rhel.9", "ubuntu", etc.
+		if strings.Contains(strings.ToLower(pref.Name), normalizedInput) {
+			return pref.Name
+		}
+	}
+
+	return ""
+}
+
+// resolveInstancetype determines the instancetype to use from DataSource defaults or size/performance hints
+func resolveInstancetype(params api.ToolHandlerParams, createParams *createParameters, matchedDataSource *DataSourceInfo) string {
+	// Use explicitly specified instancetype if provided
+	if createParams.Instancetype != "" {
+		return createParams.Instancetype
+	}
+
+	// Use DataSource default instancetype if available (when size not specified)
+	if createParams.Size == "" && matchedDataSource != nil && matchedDataSource.DefaultInstancetype != "" {
+		return matchedDataSource.DefaultInstancetype
+	}
+
+	// Match instancetype based on size and performance hints
+	if createParams.Size != "" {
+		return matchInstancetypeBySize(params, createParams.Size, createParams.Performance, createParams.Namespace)
+	}
+
+	return ""
+}
+
+// matchInstancetypeBySize finds an instancetype that matches the size and performance hints
+func matchInstancetypeBySize(params api.ToolHandlerParams, size, performance, namespace string) string {
+	instancetypes := searchInstancetypes(params, namespace)
+	normalizedSize := strings.ToLower(strings.TrimSpace(size))
+	normalizedPerformance := strings.ToLower(strings.TrimSpace(performance))
+
+	// Filter instance types by size
+	candidatesBySize := filterInstancetypesBySize(instancetypes, normalizedSize)
+	if len(candidatesBySize) == 0 {
+		return ""
+	}
+
+	// Try to match by performance family prefix (e.g., "u1.small")
+	for i := range candidatesBySize {
+		it := &candidatesBySize[i]
+		if strings.HasPrefix(strings.ToLower(it.Name), normalizedPerformance+".") {
+			return it.Name
+		}
+	}
+
+	// Try to match by performance family label
+	for i := range candidatesBySize {
+		it := &candidatesBySize[i]
+		if it.Labels != nil {
+			if class, ok := it.Labels["instancetype.kubevirt.io/class"]; ok {
+				if strings.EqualFold(class, normalizedPerformance) {
+					return it.Name
+				}
+			}
+		}
+	}
+
+	// Fall back to first candidate that matches size
+	return candidatesBySize[0].Name
+}
+
+// filterInstancetypesBySize filters instancetypes that contain the size hint in their name
+func filterInstancetypesBySize(instancetypes []InstancetypeInfo, normalizedSize string) []InstancetypeInfo {
+	var candidates []InstancetypeInfo
+	for i := range instancetypes {
+		it := &instancetypes[i]
+		if strings.Contains(strings.ToLower(it.Name), normalizedSize) {
+			candidates = append(candidates, *it)
+		}
+	}
+	return candidates
+}
+
+// buildTemplateParams constructs the template parameters for VM creation
+func buildTemplateParams(createParams *createParameters, matchedDataSource *DataSourceInfo, instancetype, preference string) vmParams {
+	// Determine runStrategy based on autostart parameter
+	runStrategy := "Halted"
+	if createParams.Autostart {
+		runStrategy = "Always"
+	}
+
+	params := vmParams{
+		Namespace:    createParams.Namespace,
+		Name:         createParams.Name,
+		Instancetype: instancetype,
+		Preference:   preference,
+		RunStrategy:  runStrategy,
+	}
+
+	if matchedDataSource != nil && matchedDataSource.Namespace != "" {
+		// Use the matched DataSource (real cluster DataSource with namespace)
+		params.UseDataSource = true
+		params.DataSourceName = matchedDataSource.Name
+		params.DataSourceNamespace = matchedDataSource.Namespace
+	} else if matchedDataSource != nil {
+		// Matched a built-in containerdisk (no namespace)
+		params.ContainerDisk = matchedDataSource.Source
+	} else {
+		// No match, resolve container disk image from workload name
+		params.ContainerDisk = resolveContainerDisk(createParams.Workload)
+	}
+
+	return params
+}
+
+// renderVMYaml renders the VM YAML from template
+func renderVMYaml(templateParams vmParams) (string, error) {
+	tmpl, err := template.New("vm").Parse(vmYamlTemplate)
+	if err != nil {
+		return "", fmt.Errorf("failed to parse template: %w", err)
+	}
+
+	var result strings.Builder
+	if err := tmpl.Execute(&result, templateParams); err != nil {
+		return "", fmt.Errorf("failed to render template: %w", err)
+	}
+
+	return result.String(), nil
+}
+
+// Helper functions
+
+func normalizePerformance(performance string) string {
+	// Normalize to lowercase and trim spaces
+	normalized := strings.ToLower(strings.TrimSpace(performance))
+
+	// Map natural language terms to instance type prefixes
+	performanceMap := map[string]string{
+		"general-purpose":   "u1",
+		"generalpurpose":    "u1",
+		"general":           "u1",
+		"overcommitted":     "o1",
+		"compute":           "c1",
+		"compute-optimized": "c1",
+		"computeoptimized":  "c1",
+		"memory-optimized":  "m1",
+		"memoryoptimized":   "m1",
+		"memory":            "m1",
+		"u1":                "u1",
+		"o1":                "o1",
+		"c1":                "c1",
+		"m1":                "m1",
+	}
+
+	// Look up the mapping
+	if prefix, exists := performanceMap[normalized]; exists {
+		return prefix
+	}
+
+	// Default to "u1" (general-purpose) if not recognized or empty
+	return "u1"
+}
+
+func getRequiredString(params api.ToolHandlerParams, key string) (string, error) {
+	args := params.GetArguments()
+	val, ok := args[key]
+	if !ok {
+		return "", fmt.Errorf("%s parameter required", key)
+	}
+	str, ok := val.(string)
+	if !ok {
+		return "", fmt.Errorf("%s parameter must be a string", key)
+	}
+	return str, nil
+}
+
+func getOptionalString(params api.ToolHandlerParams, key string) string {
+	args := params.GetArguments()
+	val, ok := args[key]
+	if !ok {
+		return ""
+	}
+	str, ok := val.(string)
+	if !ok {
+		return ""
+	}
+	return str
+}
+
+func getOptionalBool(params api.ToolHandlerParams, key string) bool {
+	args := params.GetArguments()
+	val, ok := args[key]
+	if !ok {
+		return false
+	}
+	b, ok := val.(bool)
+	if !ok {
+		return false
+	}
+	return b
+}
+
+// resolveContainerDisk resolves OS names to container disk images from quay.io/containerdisks
+func resolveContainerDisk(input string) string {
+	// If input already looks like a container image, return as-is
+	if strings.Contains(input, "/") || strings.Contains(input, ":") {
+		return input
+	}
+
+	// Common OS name mappings to containerdisk images
+	osMap := map[string]string{
+		"fedora":              "quay.io/containerdisks/fedora:latest",
+		"ubuntu":              "quay.io/containerdisks/ubuntu:24.04",
+		"centos":              "quay.io/containerdisks/centos-stream:9-latest",
+		"centos-stream":       "quay.io/containerdisks/centos-stream:9-latest",
+		"debian":              "quay.io/containerdisks/debian:latest",
+		"opensuse":            "quay.io/containerdisks/opensuse-tumbleweed:1.0.0",
+		"opensuse-tumbleweed": "quay.io/containerdisks/opensuse-tumbleweed:1.0.0",
+		"opensuse-leap":       "quay.io/containerdisks/opensuse-leap:15.6",
+		// NOTE: The following RHEL images could not be verified due to authentication requirements.
+		"rhel8":  "registry.redhat.io/rhel8/rhel-guest-image:latest",
+		"rhel9":  "registry.redhat.io/rhel9/rhel-guest-image:latest",
+		"rhel10": "registry.redhat.io/rhel10/rhel-guest-image:latest",
+	}
+
+	// Normalize input to lowercase for lookup
+	normalized := strings.ToLower(strings.TrimSpace(input))
+
+	// Look up the OS name
+	if containerDisk, exists := osMap[normalized]; exists {
+		return containerDisk
+	}
+
+	// If no match found, return the input as-is (assume it's a valid container image URL)
+	return input
+}
+
+// getDefaultContainerDisks returns a list of common containerdisk images
+func getDefaultContainerDisks() []DataSourceInfo {
+	return []DataSourceInfo{
+		{
+			Name:   "fedora",
+			Source: "quay.io/containerdisks/fedora:latest",
+		},
+		{
+			Name:   "ubuntu",
+			Source: "quay.io/containerdisks/ubuntu:24.04",
+		},
+		{
+			Name:   "centos-stream",
+			Source: "quay.io/containerdisks/centos-stream:9-latest",
+		},
+		{
+			Name:   "debian",
+			Source: "quay.io/containerdisks/debian:latest",
+		},
+		{
+			Name:   "rhel8",
+			Source: "registry.redhat.io/rhel8/rhel-guest-image:latest",
+		},
+		{
+			Name:   "rhel9",
+			Source: "registry.redhat.io/rhel9/rhel-guest-image:latest",
+		},
+		{
+			Name:   "rhel10",
+			Source: "registry.redhat.io/rhel10/rhel-guest-image:latest",
+		},
+	}
+}
+
+// searchDataSources searches for DataSource resources in the cluster
+func searchDataSources(params api.ToolHandlerParams, query string) ([]DataSourceInfo, error) {
+	// Get dynamic client for querying DataSources
+	dynamicClient, err := getDynamicClient(params)
+	if err != nil {
+		// Return just the built-in containerdisk images
+		return getDefaultContainerDisks(), nil
+	}
+
+	// DataSource GVR for CDI
+	dataSourceGVR := schema.GroupVersionResource{
+		Group:    "cdi.kubevirt.io",
+		Version:  "v1beta1",
+		Resource: "datasources",
+	}
+
+	// Collect DataSources from well-known namespaces and all namespaces
+	results := collectDataSources(params, dynamicClient, dataSourceGVR)
+
+	// Add common containerdisk images
+	results = append(results, getDefaultContainerDisks()...)
+
+	// Return helpful message if no sources found
+	if len(results) == 0 {
+		return []DataSourceInfo{
+			{
+				Name:      "No sources available",
+				Namespace: "",
+				Source:    "No DataSources or containerdisks found",
+			},
+		}, nil
+	}
+
+	return results, nil
+}
+
+// getDynamicClient creates a dynamic Kubernetes client from the provided parameters
+func getDynamicClient(params api.ToolHandlerParams) (dynamic.Interface, error) {
+	// Handle nil or invalid clients gracefully (e.g., in test environments)
+	if params.Kubernetes == nil {
+		return nil, fmt.Errorf("kubernetes client is nil")
+	}
+
+	restConfig := params.RESTConfig()
+	if restConfig == nil {
+		return nil, fmt.Errorf("REST config is nil")
+	}
+
+	return dynamic.NewForConfig(restConfig)
+}
+
+// collectDataSources collects DataSources from well-known namespaces and all namespaces
+func collectDataSources(params api.ToolHandlerParams, dynamicClient dynamic.Interface, gvr schema.GroupVersionResource) []DataSourceInfo {
+	var results []DataSourceInfo
+
+	// Try to list DataSources from well-known namespaces first
+	wellKnownNamespaces := []string{
+		"openshift-virtualization-os-images",
+		"kubevirt-os-images",
+	}
+
+	for _, ns := range wellKnownNamespaces {
+		dsInfos, err := listDataSourcesFromNamespace(params, dynamicClient, gvr, ns)
+		if err == nil {
+			results = append(results, dsInfos...)
+		}
+	}
+
+	// List DataSources from all namespaces
+	list, err := dynamicClient.Resource(gvr).List(params.Context, metav1.ListOptions{})
+	if err != nil {
+		// If we found DataSources from well-known namespaces but couldn't list all, return what we have
+		if len(results) > 0 {
+			return results
+		}
+		// DataSources might not be available, return helpful message
+		return []DataSourceInfo{
+			{
+				Name:      "No DataSources found",
+				Namespace: "",
+				Source:    "CDI may not be installed or DataSources are not available in this cluster",
+			},
+		}
+	}
+
+	// Deduplicate and add DataSources from all namespaces
+	results = deduplicateAndMergeDataSources(results, list.Items)
+
+	return results
+}
+
+// deduplicateAndMergeDataSources merges new DataSources with existing ones, avoiding duplicates
+func deduplicateAndMergeDataSources(existing []DataSourceInfo, items []unstructured.Unstructured) []DataSourceInfo {
+	// Create a map to track already seen DataSources
+	seen := make(map[string]bool)
+	for _, ds := range existing {
+		key := ds.Namespace + "/" + ds.Name
+		seen[key] = true
+	}
+
+	// Add new DataSources that haven't been seen
+	for _, item := range items {
+		name := item.GetName()
+		namespace := item.GetNamespace()
+		key := namespace + "/" + name
+
+		// Skip if we've already added this DataSource
+		if seen[key] {
+			continue
+		}
+
+		labels := item.GetLabels()
+		source := extractDataSourceInfo(&item)
+
+		// Extract default instancetype and preference from labels
+		defaultInstancetype := ""
+		defaultPreference := ""
+		if labels != nil {
+			defaultInstancetype = labels[defaultInstancetypeLabel]
+			defaultPreference = labels[defaultPreferenceLabel]
+		}
+
+		existing = append(existing, DataSourceInfo{
+			Name:                name,
+			Namespace:           namespace,
+			Source:              source,
+			DefaultInstancetype: defaultInstancetype,
+			DefaultPreference:   defaultPreference,
+		})
+	}
+
+	return existing
+}
+
+// listDataSourcesFromNamespace lists DataSources from a specific namespace
+func listDataSourcesFromNamespace(params api.ToolHandlerParams, dynamicClient dynamic.Interface, gvr schema.GroupVersionResource, namespace string) ([]DataSourceInfo, error) {
+	var results []DataSourceInfo
+	list, err := dynamicClient.Resource(gvr).Namespace(namespace).List(params.Context, metav1.ListOptions{})
+	if err != nil {
+		return nil, err
+	}
+
+	for _, item := range list.Items {
+		name := item.GetName()
+		ns := item.GetNamespace()
+		labels := item.GetLabels()
+
+		// Extract source information from the DataSource spec
+		source := extractDataSourceInfo(&item)
+
+		// Extract default instancetype and preference from labels
+		defaultInstancetype := ""
+		defaultPreference := ""
+		if labels != nil {
+			defaultInstancetype = labels[defaultInstancetypeLabel]
+			defaultPreference = labels[defaultPreferenceLabel]
+		}
+
+		results = append(results, DataSourceInfo{
+			Name:                name,
+			Namespace:           ns,
+			Source:              source,
+			DefaultInstancetype: defaultInstancetype,
+			DefaultPreference:   defaultPreference,
+		})
+	}
+
+	return results, nil
+}
+
+// searchPreferences searches for both cluster-wide and namespaced VirtualMachinePreference resources
+func searchPreferences(params api.ToolHandlerParams, namespace string) []PreferenceInfo {
+	// Handle nil or invalid clients gracefully (e.g., in test environments)
+	if params.Kubernetes == nil {
+		return []PreferenceInfo{}
+	}
+
+	restConfig := params.RESTConfig()
+	if restConfig == nil {
+		return []PreferenceInfo{}
+	}
+
+	dynamicClient, err := dynamic.NewForConfig(restConfig)
+	if err != nil {
+		return []PreferenceInfo{}
+	}
+
+	var results []PreferenceInfo
+
+	// Search for cluster-wide VirtualMachineClusterPreferences
+	clusterPreferenceGVR := schema.GroupVersionResource{
+		Group:    "instancetype.kubevirt.io",
+		Version:  "v1beta1",
+		Resource: "virtualmachineclusterpreferences",
+	}
+
+	clusterList, err := dynamicClient.Resource(clusterPreferenceGVR).List(params.Context, metav1.ListOptions{})
+	if err == nil {
+		for _, item := range clusterList.Items {
+			results = append(results, PreferenceInfo{
+				Name: item.GetName(),
+			})
+		}
+	}
+
+	// Search for namespaced VirtualMachinePreferences
+	namespacedPreferenceGVR := schema.GroupVersionResource{
+		Group:    "instancetype.kubevirt.io",
+		Version:  "v1beta1",
+		Resource: "virtualmachinepreferences",
+	}
+
+	namespacedList, err := dynamicClient.Resource(namespacedPreferenceGVR).Namespace(namespace).List(params.Context, metav1.ListOptions{})
+	if err == nil {
+		for _, item := range namespacedList.Items {
+			results = append(results, PreferenceInfo{
+				Name: item.GetName(),
+			})
+		}
+	}
+
+	return results
+}
+
+// searchInstancetypes searches for both cluster-wide and namespaced VirtualMachineInstancetype resources
+func searchInstancetypes(params api.ToolHandlerParams, namespace string) []InstancetypeInfo {
+	// Handle nil or invalid clients gracefully (e.g., in test environments)
+	if params.Kubernetes == nil {
+		return []InstancetypeInfo{}
+	}
+
+	restConfig := params.RESTConfig()
+	if restConfig == nil {
+		return []InstancetypeInfo{}
+	}
+
+	dynamicClient, err := dynamic.NewForConfig(restConfig)
+	if err != nil {
+		return []InstancetypeInfo{}
+	}
+
+	var results []InstancetypeInfo
+
+	// Search for cluster-wide VirtualMachineClusterInstancetypes
+	clusterInstancetypeGVR := schema.GroupVersionResource{
+		Group:    "instancetype.kubevirt.io",
+		Version:  "v1beta1",
+		Resource: "virtualmachineclusterinstancetypes",
+	}
+
+	clusterList, err := dynamicClient.Resource(clusterInstancetypeGVR).List(params.Context, metav1.ListOptions{})
+	if err == nil {
+		for _, item := range clusterList.Items {
+			results = append(results, InstancetypeInfo{
+				Name:   item.GetName(),
+				Labels: item.GetLabels(),
+			})
+		}
+	}
+
+	// Search for namespaced VirtualMachineInstancetypes
+	namespacedInstancetypeGVR := schema.GroupVersionResource{
+		Group:    "instancetype.kubevirt.io",
+		Version:  "v1beta1",
+		Resource: "virtualmachineinstancetypes",
+	}
+
+	namespacedList, err := dynamicClient.Resource(namespacedInstancetypeGVR).Namespace(namespace).List(params.Context, metav1.ListOptions{})
+	if err == nil {
+		for _, item := range namespacedList.Items {
+			results = append(results, InstancetypeInfo{
+				Name:   item.GetName(),
+				Labels: item.GetLabels(),
+			})
+		}
+	}
+
+	return results
+}
+
+// extractDataSourceInfo extracts source information from a DataSource object
+func extractDataSourceInfo(obj *unstructured.Unstructured) string {
+	// Try to get the source from spec.source
+	spec, found, err := unstructured.NestedMap(obj.Object, "spec", "source")
+	if err != nil || !found {
+		return "unknown source"
+	}
+
+	// Check for PVC source
+	if pvcInfo, found, _ := unstructured.NestedMap(spec, "pvc"); found {
+		if pvcName, found, _ := unstructured.NestedString(pvcInfo, "name"); found {
+			if pvcNamespace, found, _ := unstructured.NestedString(pvcInfo, "namespace"); found {
+				return fmt.Sprintf("PVC: %s/%s", pvcNamespace, pvcName)
+			}
+			return fmt.Sprintf("PVC: %s", pvcName)
+		}
+	}
+
+	// Check for registry source
+	if registryInfo, found, _ := unstructured.NestedMap(spec, "registry"); found {
+		if url, found, _ := unstructured.NestedString(registryInfo, "url"); found {
+			return fmt.Sprintf("Registry: %s", url)
+		}
+	}
+
+	// Check for http source
+	if url, found, _ := unstructured.NestedString(spec, "http", "url"); found {
+		return fmt.Sprintf("HTTP: %s", url)
+	}
+
+	return "DataSource (type unknown)"
+}
diff --git a/pkg/toolsets/kubevirt/vm/create/tool_test.go b/pkg/toolsets/kubevirt/vm/create/tool_test.go
new file mode 100644
index 00000000..00742bc0
--- /dev/null
+++ b/pkg/toolsets/kubevirt/vm/create/tool_test.go
@@ -0,0 +1,198 @@
+package create
+
+import (
+	"strings"
+	"testing"
+)
+
+// Test the YAML rendering directly without creating resources
+func TestRenderVMYaml(t *testing.T) {
+	tests := []struct {
+		name      string
+		params    vmParams
+		wantErr   bool
+		checkFunc func(t *testing.T, result string)
+	}{
+		{
+			name: "renders VM with basic settings",
+			params: vmParams{
+				Namespace:     "test-ns",
+				Name:          "test-vm",
+				ContainerDisk: "quay.io/containerdisks/fedora:latest",
+				RunStrategy:   "Halted",
+			},
+			wantErr: false,
+			checkFunc: func(t *testing.T, result string) {
+				if !strings.Contains(result, "apiVersion: kubevirt.io/v1") {
+					t.Errorf("Expected apiVersion in YAML")
+				}
+				if !strings.Contains(result, "kind: VirtualMachine") {
+					t.Errorf("Expected kind VirtualMachine in YAML")
+				}
+				if !strings.Contains(result, "name: test-vm") {
+					t.Errorf("Expected VM name test-vm in YAML")
+				}
+				if !strings.Contains(result, "namespace: test-ns") {
+					t.Errorf("Expected namespace test-ns in YAML")
+				}
+				if !strings.Contains(result, "quay.io/containerdisks/fedora:latest") {
+					t.Errorf("Expected fedora container disk in result")
+				}
+				if !strings.Contains(result, "guest: 2Gi") {
+					t.Errorf("Expected guest: 2Gi in YAML manifest")
+				}
+			},
+		},
+		{
+			name: "renders VM with instancetype",
+			params: vmParams{
+				Namespace:     "test-ns",
+				Name:          "test-vm",
+				ContainerDisk: "quay.io/containerdisks/ubuntu:24.04",
+				Instancetype:  "u1.medium",
+				RunStrategy:   "Halted",
+			},
+			wantErr: false,
+			checkFunc: func(t *testing.T, result string) {
+				if !strings.Contains(result, "name: u1.medium") {
+					t.Errorf("Expected instance type in YAML manifest")
+				}
+				if !strings.Contains(result, "kind: VirtualMachineClusterInstancetype") {
+					t.Errorf("Expected VirtualMachineClusterInstancetype in YAML manifest")
+				}
+				// When instancetype is set, memory should not be in the YAML
+				if strings.Contains(result, "guest: 2Gi") {
+					t.Errorf("Should not have guest memory when instancetype is specified")
+				}
+			},
+		},
+		{
+			name: "renders VM with preference",
+			params: vmParams{
+				Namespace:     "test-ns",
+				Name:          "test-vm",
+				ContainerDisk: "registry.redhat.io/rhel9/rhel-guest-image:latest",
+				Preference:    "rhel.9",
+				RunStrategy:   "Halted",
+			},
+			wantErr: false,
+			checkFunc: func(t *testing.T, result string) {
+				if !strings.Contains(result, "name: rhel.9") {
+					t.Errorf("Expected preference in YAML manifest")
+				}
+				if !strings.Contains(result, "kind: VirtualMachineClusterPreference") {
+					t.Errorf("Expected VirtualMachineClusterPreference in YAML manifest")
+				}
+			},
+		},
+		{
+			name: "renders VM with custom container disk",
+			params: vmParams{
+				Namespace:     "test-ns",
+				Name:          "test-vm",
+				ContainerDisk: "quay.io/myrepo/myimage:v1.0",
+				RunStrategy:   "Halted",
+			},
+			wantErr: false,
+			checkFunc: func(t *testing.T, result string) {
+				if !strings.Contains(result, "quay.io/myrepo/myimage:v1.0") {
+					t.Errorf("Expected custom container disk in YAML")
+				}
+			},
+		},
+		{
+			name: "renders VM with DataSource",
+			params: vmParams{
+				Namespace:           "test-ns",
+				Name:                "test-vm",
+				UseDataSource:       true,
+				DataSourceName:      "fedora",
+				DataSourceNamespace: "openshift-virtualization-os-images",
+				RunStrategy:         "Halted",
+			},
+			wantErr: false,
+			checkFunc: func(t *testing.T, result string) {
+				if !strings.Contains(result, "dataVolumeTemplates") {
+					t.Errorf("Expected dataVolumeTemplates in YAML")
+				}
+				if !strings.Contains(result, "kind: DataSource") {
+					t.Errorf("Expected DataSource kind in YAML")
+				}
+				if !strings.Contains(result, "name: fedora") {
+					t.Errorf("Expected DataSource name in YAML")
+				}
+				if !strings.Contains(result, "openshift-virtualization-os-images") {
+					t.Errorf("Expected DataSource namespace in YAML")
+				}
+			},
+		},
+		{
+			name: "renders VM with autostart (runStrategy Always)",
+			params: vmParams{
+				Namespace:     "test-ns",
+				Name:          "test-vm",
+				ContainerDisk: "quay.io/containerdisks/fedora:latest",
+				RunStrategy:   "Always",
+			},
+			wantErr: false,
+			checkFunc: func(t *testing.T, result string) {
+				if !strings.Contains(result, "runStrategy: Always") {
+					t.Errorf("Expected runStrategy: Always in YAML")
+				}
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := renderVMYaml(tt.params)
+
+			if tt.wantErr {
+				if err == nil {
+					t.Error("Expected error, got nil")
+				}
+			} else {
+				if err != nil {
+					t.Errorf("Expected no error, got: %v", err)
+				}
+				if result == "" {
+					t.Error("Expected non-empty result")
+				}
+				if tt.checkFunc != nil {
+					tt.checkFunc(t, result)
+				}
+			}
+		})
+	}
+}
+
+func TestResolveContainerDisk(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{"fedora", "fedora", "quay.io/containerdisks/fedora:latest"},
+		{"ubuntu", "ubuntu", "quay.io/containerdisks/ubuntu:24.04"},
+		{"rhel8", "rhel8", "registry.redhat.io/rhel8/rhel-guest-image:latest"},
+		{"rhel9", "rhel9", "registry.redhat.io/rhel9/rhel-guest-image:latest"},
+		{"rhel10", "rhel10", "registry.redhat.io/rhel10/rhel-guest-image:latest"},
+		{"centos", "centos", "quay.io/containerdisks/centos-stream:9-latest"},
+		{"centos-stream", "centos-stream", "quay.io/containerdisks/centos-stream:9-latest"},
+		{"debian", "debian", "quay.io/containerdisks/debian:latest"},
+		{"case insensitive", "FEDORA", "quay.io/containerdisks/fedora:latest"},
+		{"with whitespace", " ubuntu ", "quay.io/containerdisks/ubuntu:24.04"},
+		{"custom image", "quay.io/myrepo/myimage:v1", "quay.io/myrepo/myimage:v1"},
+		{"with tag", "myimage:latest", "myimage:latest"},
+		{"unknown OS", "customos", "customos"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := resolveContainerDisk(tt.input)
+			if result != tt.expected {
+				t.Errorf("resolveContainerDisk(%s) = %s, want %s", tt.input, result, tt.expected)
+			}
+		})
+	}
+}
diff --git a/pkg/toolsets/kubevirt/vm/create/vm.yaml.tmpl b/pkg/toolsets/kubevirt/vm/create/vm.yaml.tmpl
new file mode 100644
index 00000000..9982d4a9
--- /dev/null
+++ b/pkg/toolsets/kubevirt/vm/create/vm.yaml.tmpl
@@ -0,0 +1,50 @@
+apiVersion: kubevirt.io/v1
+kind: VirtualMachine
+metadata:
+  name: {{.Name}}
+  namespace: {{.Namespace}}
+spec:
+  runStrategy: {{.RunStrategy}}
+{{- if .Instancetype}}
+  instancetype:
+    name: {{.Instancetype}}
+    kind: VirtualMachineClusterInstancetype
+{{- end}}
+{{- if .Preference}}
+  preference:
+    name: {{.Preference}}
+    kind: VirtualMachineClusterPreference
+{{- end}}
+{{- if .UseDataSource}}
+  dataVolumeTemplates:
+  - metadata:
+      name: {{.Name}}-rootdisk
+    spec:
+      sourceRef:
+        kind: DataSource
+        name: {{.DataSourceName}}
+        namespace: {{.DataSourceNamespace}}
+      storage:
+        resources:
+          requests:
+            storage: 30Gi
+{{- end}}
+  template:
+    spec:
+      domain:
+        devices:
+          disks:
+          - name: {{.Name}}-rootdisk
+{{- if not .Instancetype}}
+        memory:
+          guest: 2Gi
+{{- end}}
+      volumes:
+      - name: {{.Name}}-rootdisk
+{{- if .UseDataSource}}
+        dataVolume:
+          name: {{.Name}}-rootdisk
+{{- else}}
+        containerDisk:
+          image: {{.ContainerDisk}}
+{{- end}}
diff --git a/pkg/toolsets/kubevirt/vm/start/tool.go b/pkg/toolsets/kubevirt/vm/start/tool.go
new file mode 100644
index 00000000..b2784d2f
--- /dev/null
+++ b/pkg/toolsets/kubevirt/vm/start/tool.go
@@ -0,0 +1,123 @@
+package start
+
+import (
+	"fmt"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	"github.com/containers/kubernetes-mcp-server/pkg/output"
+	"github.com/google/jsonschema-go/jsonschema"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/dynamic"
+	"k8s.io/utils/ptr"
+)
+
+func Tools() []api.ServerTool {
+	return []api.ServerTool{
+		{
+			Tool: api.Tool{
+				Name:        "vm_start",
+				Description: "Start a halted or stopped VirtualMachine by changing its runStrategy to Always",
+				InputSchema: &jsonschema.Schema{
+					Type: "object",
+					Properties: map[string]*jsonschema.Schema{
+						"namespace": {
+							Type:        "string",
+							Description: "The namespace of the virtual machine",
+						},
+						"name": {
+							Type:        "string",
+							Description: "The name of the virtual machine to start",
+						},
+					},
+					Required: []string{"namespace", "name"},
+				},
+				Annotations: api.ToolAnnotations{
+					Title:           "Virtual Machine: Start",
+					ReadOnlyHint:    ptr.To(false),
+					DestructiveHint: ptr.To(false),
+					IdempotentHint:  ptr.To(true),
+					OpenWorldHint:   ptr.To(false),
+				},
+			},
+			Handler: start,
+		},
+	}
+}
+
+func start(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+	// Parse required parameters
+	namespace, err := getRequiredString(params, "namespace")
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	name, err := getRequiredString(params, "name")
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	// Get dynamic client
+	restConfig := params.RESTConfig()
+	if restConfig == nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to get REST config")), nil
+	}
+
+	dynamicClient, err := dynamic.NewForConfig(restConfig)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to create dynamic client: %w", err)), nil
+	}
+
+	// Get the current VM
+	gvr := schema.GroupVersionResource{
+		Group:    "kubevirt.io",
+		Version:  "v1",
+		Resource: "virtualmachines",
+	}
+
+	vm, err := dynamicClient.Resource(gvr).Namespace(namespace).Get(
+		params.Context,
+		name,
+		metav1.GetOptions{},
+	)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to get VirtualMachine: %w", err)), nil
+	}
+
+	// Update runStrategy to Always
+	if err := unstructured.SetNestedField(vm.Object, "Always", "spec", "runStrategy"); err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to set runStrategy: %w", err)), nil
+	}
+
+	// Update the VM
+	updatedVM, err := dynamicClient.Resource(gvr).Namespace(namespace).Update(
+		params.Context,
+		vm,
+		metav1.UpdateOptions{},
+	)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to update VirtualMachine: %w", err)), nil
+	}
+
+	// Format the output
+	marshalledYaml, err := output.MarshalYaml(updatedVM)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to marshal VirtualMachine: %w", err)), nil
+	}
+
+	return api.NewToolCallResult("# VirtualMachine started successfully\n"+marshalledYaml, nil), nil
+}
+
+func getRequiredString(params api.ToolHandlerParams, key string) (string, error) {
+	args := params.GetArguments()
+	val, ok := args[key]
+	if !ok {
+		return "", fmt.Errorf("%s parameter required", key)
+	}
+	str, ok := val.(string)
+	if !ok {
+		return "", fmt.Errorf("%s parameter must be a string", key)
+	}
+	return str, nil
+}
diff --git a/pkg/toolsets/kubevirt/vm/stop/tool.go b/pkg/toolsets/kubevirt/vm/stop/tool.go
new file mode 100644
index 00000000..6ab03485
--- /dev/null
+++ b/pkg/toolsets/kubevirt/vm/stop/tool.go
@@ -0,0 +1,123 @@
+package stop
+
+import (
+	"fmt"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	"github.com/containers/kubernetes-mcp-server/pkg/output"
+	"github.com/google/jsonschema-go/jsonschema"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/dynamic"
+	"k8s.io/utils/ptr"
+)
+
+func Tools() []api.ServerTool {
+	return []api.ServerTool{
+		{
+			Tool: api.Tool{
+				Name:        "vm_stop",
+				Description: "Stop a running VirtualMachine by changing its runStrategy to Halted",
+				InputSchema: &jsonschema.Schema{
+					Type: "object",
+					Properties: map[string]*jsonschema.Schema{
+						"namespace": {
+							Type:        "string",
+							Description: "The namespace of the virtual machine",
+						},
+						"name": {
+							Type:        "string",
+							Description: "The name of the virtual machine to stop",
+						},
+					},
+					Required: []string{"namespace", "name"},
+				},
+				Annotations: api.ToolAnnotations{
+					Title:           "Virtual Machine: Stop",
+					ReadOnlyHint:    ptr.To(false),
+					DestructiveHint: ptr.To(false),
+					IdempotentHint:  ptr.To(true),
+					OpenWorldHint:   ptr.To(false),
+				},
+			},
+			Handler: stop,
+		},
+	}
+}
+
+func stop(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+	// Parse required parameters
+	namespace, err := getRequiredString(params, "namespace")
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	name, err := getRequiredString(params, "name")
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	// Get dynamic client
+	restConfig := params.RESTConfig()
+	if restConfig == nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to get REST config")), nil
+	}
+
+	dynamicClient, err := dynamic.NewForConfig(restConfig)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to create dynamic client: %w", err)), nil
+	}
+
+	// Get the current VM
+	gvr := schema.GroupVersionResource{
+		Group:    "kubevirt.io",
+		Version:  "v1",
+		Resource: "virtualmachines",
+	}
+
+	vm, err := dynamicClient.Resource(gvr).Namespace(namespace).Get(
+		params.Context,
+		name,
+		metav1.GetOptions{},
+	)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to get VirtualMachine: %w", err)), nil
+	}
+
+	// Update runStrategy to Halted
+	if err := unstructured.SetNestedField(vm.Object, "Halted", "spec", "runStrategy"); err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to set runStrategy: %w", err)), nil
+	}
+
+	// Update the VM
+	updatedVM, err := dynamicClient.Resource(gvr).Namespace(namespace).Update(
+		params.Context,
+		vm,
+		metav1.UpdateOptions{},
+	)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to update VirtualMachine: %w", err)), nil
+	}
+
+	// Format the output
+	marshalledYaml, err := output.MarshalYaml(updatedVM)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to marshal VirtualMachine: %w", err)), nil
+	}
+
+	return api.NewToolCallResult("# VirtualMachine stopped successfully\n"+marshalledYaml, nil), nil
+}
+
+func getRequiredString(params api.ToolHandlerParams, key string) (string, error) {
+	args := params.GetArguments()
+	val, ok := args[key]
+	if !ok {
+		return "", fmt.Errorf("%s parameter required", key)
+	}
+	str, ok := val.(string)
+	if !ok {
+		return "", fmt.Errorf("%s parameter must be a string", key)
+	}
+	return str, nil
+}
diff --git a/pkg/toolsets/kubevirt/vm/troubleshoot/plan.tmpl b/pkg/toolsets/kubevirt/vm/troubleshoot/plan.tmpl
new file mode 100644
index 00000000..abc9e22a
--- /dev/null
+++ b/pkg/toolsets/kubevirt/vm/troubleshoot/plan.tmpl
@@ -0,0 +1,188 @@
+# VirtualMachine Troubleshooting Guide
+
+## VM: {{.Name}} (namespace: {{.Namespace}})
+
+Follow these steps to diagnose issues with the VirtualMachine:
+
+---
+
+## Step 1: Check VirtualMachine Status
+
+Use the `resources_get` tool to inspect the VirtualMachine:
+- **apiVersion**: `kubevirt.io/v1`
+- **kind**: `VirtualMachine`
+- **namespace**: `{{.Namespace}}`
+- **name**: `{{.Name}}`
+
+**What to look for:**
+- `status.printableStatus` - Should be "Running" for a healthy VM
+- `status.ready` - Should be `true`
+- `status.conditions` - Look for conditions with `status: "False"` or error messages
+- `spec.runStrategy` - Check if it's "Always", "Manual", "Halted", or "RerunOnFailure"
+
+---
+
+## Step 2: Check VirtualMachineInstance Status
+
+If the VM exists but isn't running, check if a VirtualMachineInstance was created:
+
+Use the `resources_get` tool:
+- **apiVersion**: `kubevirt.io/v1`
+- **kind**: `VirtualMachineInstance`
+- **namespace**: `{{.Namespace}}`
+- **name**: `{{.Name}}`
+
+**What to look for:**
+- `status.phase` - Should be "Running" for a healthy VMI
+- `status.conditions` - Check for "Ready" condition with `status: "True"`
+- `status.guestOSInfo` - Confirms guest agent is running
+- If VMI doesn't exist and VM runStrategy is "Always", this indicates a problem
+
+---
+
+## Step 3: Check DataVolume Status (if applicable)
+
+If the VM uses DataVolumeTemplates, check their status:
+
+Use the `resources_list` tool:
+- **apiVersion**: `cdi.kubevirt.io/v1beta1`
+- **kind**: `DataVolume`
+- **namespace**: `{{.Namespace}}`
+
+Look for DataVolumes with names starting with `{{.Name}}-`
+
+**What to look for:**
+- `status.phase` - Should be "Succeeded" when ready
+- `status.progress` - Shows import/clone progress (e.g., "100.0%")
+- Common issues:
+  - Phase "Pending" - Waiting for resources
+  - Phase "ImportScheduled" or "ImportInProgress" - Still importing
+  - Phase "Failed" - Check `status.conditions` for error details
+
+### Check Underlying PersistentVolumeClaims
+
+DataVolumes create PVCs to provision storage. Check the PVC status:
+
+Use the `resources_list` tool:
+- **apiVersion**: `v1`
+- **kind**: `PersistentVolumeClaim`
+- **namespace**: `{{.Namespace}}`
+
+Look for PVCs with names matching the DataVolume names (typically `{{.Name}}-*`)
+
+Or inspect a specific PVC with `resources_get`:
+- **apiVersion**: `v1`
+- **kind**: `PersistentVolumeClaim`
+- **namespace**: `{{.Namespace}}`
+- **name**: (name from DataVolume or VM volumes)
+
+**What to look for:**
+- `status.phase` - Should be "Bound" when ready
+- `spec.storageClassName` - Verify the storage class exists and is available
+- `status.capacity.storage` - Confirms allocated storage size
+- Common PVC issues:
+  - Phase "Pending" - Storage class not available, insufficient storage, or provisioner issues
+  - Missing PVC - DataVolume creation may have failed
+  - Incorrect size - Check if requested size matches available storage
+
+**Check Storage Class:**
+
+If PVC is stuck in "Pending", verify the storage class exists:
+
+Use the `resources_get` tool:
+- **apiVersion**: `storage.k8s.io/v1`
+- **kind**: `StorageClass`
+- **name**: (from PVC `spec.storageClassName`)
+
+Ensure the storage class provisioner is healthy and has capacity.
+
+---
+
+## Step 4: Check virt-launcher Pod
+
+The virt-launcher pod runs the actual VM. Find and inspect it:
+
+Use the `pods_list_in_namespace` tool:
+- **namespace**: `{{.Namespace}}`
+- **labelSelector**: `kubevirt.io=virt-launcher,vm.kubevirt.io/name={{.Name}}`
+
+**What to look for:**
+- Pod should be in "Running" phase
+- All containers should be ready (e.g., "2/2")
+- Check pod events and conditions for errors
+
+If pod exists, get detailed status with `pods_get`:
+- **namespace**: `{{.Namespace}}`
+- **name**: `virt-launcher-{{.Name}}-xxxxx` (use actual pod name from list)
+
+Get pod logs with `pods_log`:
+- **namespace**: `{{.Namespace}}`
+- **name**: `virt-launcher-{{.Name}}-xxxxx`
+- **container**: `compute` (main VM container)
+
+---
+
+## Step 5: Check Events
+
+Events provide crucial diagnostic information:
+
+Use the `events_list` tool:
+- **namespace**: `{{.Namespace}}`
+
+Filter output for events related to `{{.Name}}` - look for warnings or errors.
+
+---
+
+## Step 6: Check Instance Type and Preference (if used)
+
+If the VM uses instance types or preferences, verify they exist:
+
+For instance types, use `resources_get`:
+- **apiVersion**: `instancetype.kubevirt.io/v1beta1`
+- **kind**: `VirtualMachineClusterInstancetype`
+- **name**: (check VM spec for instancetype name)
+
+For preferences, use `resources_get`:
+- **apiVersion**: `instancetype.kubevirt.io/v1beta1`
+- **kind**: `VirtualMachineClusterPreference`
+- **name**: (check VM spec for preference name)
+
+---
+
+## Common Issues and Solutions
+
+### VM stuck in "Stopped" or "Halted"
+- Check `spec.runStrategy` - if "Halted", the VM is intentionally stopped
+- Change runStrategy to "Always" to start the VM
+
+### VMI doesn't exist
+- Check VM conditions for admission errors
+- Verify instance type and preference exist
+- Check resource quotas in the namespace
+
+### DataVolume stuck in "ImportInProgress"
+- Check CDI controller pods in `cdi` namespace
+- Verify source image is accessible
+- Check PVC storage class exists and has available capacity
+
+### virt-launcher pod in CrashLoopBackOff
+- Check pod logs for container `compute`
+- Common causes:
+  - Insufficient resources (CPU/memory)
+  - Invalid VM configuration
+  - Storage issues (PVC not available)
+
+### VM starts but guest doesn't boot
+- Check virt-launcher logs for QEMU errors
+- Verify boot disk is properly configured
+- Check if guest agent is installed (for cloud images)
+- Ensure correct architecture (amd64 vs arm64)
+
+---
+
+## Additional Resources
+
+For more detailed diagnostics:
+- Check KubeVirt components: `pods_list` in `kubevirt` namespace
+- Check CDI components: `pods_list` in `cdi` namespace (if using DataVolumes)
+- Review resource consumption: `pods_top` for the virt-launcher pod
diff --git a/pkg/toolsets/kubevirt/vm/troubleshoot/tool.go b/pkg/toolsets/kubevirt/vm/troubleshoot/tool.go
new file mode 100644
index 00000000..7e0f8ead
--- /dev/null
+++ b/pkg/toolsets/kubevirt/vm/troubleshoot/tool.go
@@ -0,0 +1,98 @@
+package troubleshoot
+
+import (
+	_ "embed"
+	"fmt"
+	"strings"
+	"text/template"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	"github.com/google/jsonschema-go/jsonschema"
+	"k8s.io/utils/ptr"
+)
+
+//go:embed plan.tmpl
+var planTemplate string
+
+func Tools() []api.ServerTool {
+	return []api.ServerTool{
+		{
+			Tool: api.Tool{
+				Name:        "vm_troubleshoot",
+				Description: "Generate a comprehensive troubleshooting guide for a VirtualMachine, providing step-by-step instructions to diagnose common issues",
+				InputSchema: &jsonschema.Schema{
+					Type: "object",
+					Properties: map[string]*jsonschema.Schema{
+						"namespace": {
+							Type:        "string",
+							Description: "The namespace of the virtual machine",
+						},
+						"name": {
+							Type:        "string",
+							Description: "The name of the virtual machine",
+						},
+					},
+					Required: []string{"namespace", "name"},
+				},
+				Annotations: api.ToolAnnotations{
+					Title:           "Virtual Machine: Troubleshoot",
+					ReadOnlyHint:    ptr.To(true),
+					DestructiveHint: ptr.To(false),
+					IdempotentHint:  ptr.To(true),
+					OpenWorldHint:   ptr.To(false),
+				},
+			},
+			Handler: troubleshoot,
+		},
+	}
+}
+
+type troubleshootParams struct {
+	Namespace string
+	Name      string
+}
+
+func troubleshoot(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+	// Parse required parameters
+	namespace, err := getRequiredString(params, "namespace")
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	name, err := getRequiredString(params, "name")
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	// Prepare template parameters
+	templateParams := troubleshootParams{
+		Namespace: namespace,
+		Name:      name,
+	}
+
+	// Render template
+	tmpl, err := template.New("troubleshoot").Parse(planTemplate)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to parse template: %w", err)), nil
+	}
+
+	var result strings.Builder
+	if err := tmpl.Execute(&result, templateParams); err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to render template: %w", err)), nil
+	}
+
+	return api.NewToolCallResult(result.String(), nil), nil
+}
+
+func getRequiredString(params api.ToolHandlerParams, key string) (string, error) {
+	args := params.GetArguments()
+	val, ok := args[key]
+	if !ok {
+		return "", fmt.Errorf("%s parameter required", key)
+	}
+	str, ok := val.(string)
+	if !ok {
+		return "", fmt.Errorf("%s parameter must be a string", key)
+	}
+	return str, nil
+}
diff --git a/pkg/toolsets/kubevirt/vm/troubleshoot/tool_test.go b/pkg/toolsets/kubevirt/vm/troubleshoot/tool_test.go
new file mode 100644
index 00000000..8d371d42
--- /dev/null
+++ b/pkg/toolsets/kubevirt/vm/troubleshoot/tool_test.go
@@ -0,0 +1,110 @@
+package troubleshoot
+
+import (
+	"context"
+	"strings"
+	"testing"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	internalk8s "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
+)
+
+type mockToolCallRequest struct {
+	arguments map[string]interface{}
+}
+
+func (m *mockToolCallRequest) GetArguments() map[string]any {
+	return m.arguments
+}
+
+func TestTroubleshoot(t *testing.T) {
+	tests := []struct {
+		name      string
+		args      map[string]interface{}
+		wantErr   bool
+		checkFunc func(t *testing.T, result string)
+	}{
+		{
+			name: "generates troubleshooting guide",
+			args: map[string]interface{}{
+				"namespace": "test-ns",
+				"name":      "test-vm",
+			},
+			wantErr: false,
+			checkFunc: func(t *testing.T, result string) {
+				if !strings.Contains(result, "VirtualMachine Troubleshooting Guide") {
+					t.Errorf("Expected troubleshooting guide header")
+				}
+				if !strings.Contains(result, "test-vm") {
+					t.Errorf("Expected VM name in guide")
+				}
+				if !strings.Contains(result, "test-ns") {
+					t.Errorf("Expected namespace in guide")
+				}
+				if !strings.Contains(result, "Step 1: Check VirtualMachine Status") {
+					t.Errorf("Expected step 1 header")
+				}
+				if !strings.Contains(result, "resources_get") {
+					t.Errorf("Expected resources_get tool reference")
+				}
+				if !strings.Contains(result, "VirtualMachineInstance") {
+					t.Errorf("Expected VMI section")
+				}
+				if !strings.Contains(result, "virt-launcher") {
+					t.Errorf("Expected virt-launcher pod section")
+				}
+			},
+		},
+		{
+			name: "missing namespace",
+			args: map[string]interface{}{
+				"name": "test-vm",
+			},
+			wantErr: true,
+		},
+		{
+			name: "missing name",
+			args: map[string]interface{}{
+				"namespace": "test-ns",
+			},
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			params := api.ToolHandlerParams{
+				Context:         context.Background(),
+				Kubernetes:      &internalk8s.Kubernetes{},
+				ToolCallRequest: &mockToolCallRequest{arguments: tt.args},
+			}
+
+			result, err := troubleshoot(params)
+			if err != nil {
+				t.Errorf("troubleshoot() unexpected Go error: %v", err)
+				return
+			}
+
+			if result == nil {
+				t.Error("Expected non-nil result")
+				return
+			}
+
+			if tt.wantErr {
+				if result.Error == nil {
+					t.Error("Expected error in result.Error, got nil")
+				}
+			} else {
+				if result.Error != nil {
+					t.Errorf("Expected no error in result, got: %v", result.Error)
+				}
+				if result.Content == "" {
+					t.Error("Expected non-empty result content")
+				}
+				if tt.checkFunc != nil {
+					tt.checkFunc(t, result.Content)
+				}
+			}
+		})
+	}
+}