WIP kubevirt: Add troubleshoot VM tool to toolset

lyarwood · lyarwood · commit 98996ea55709 · 2025-10-22T17:44:54.000+01:00
Assisted-By: Claude &lt;noreply@anthropic.com&gt;
Signed-off-by: Lee Yarwood &lt;lyarwood@redhat.com&gt;
diff --git a/pkg/toolsets/kubevirt/toolset.go b/pkg/toolsets/kubevirt/toolset.go
@@ -7,6 +7,7 @@ import (
 	internalk8s "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
 	"github.com/containers/kubernetes-mcp-server/pkg/toolsets"
 	vm_create "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/create"
+	vm_troubleshoot "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/troubleshoot"
 )
 
 type Toolset struct{}
@@ -24,6 +25,7 @@ func (t *Toolset) GetDescription() string {
 func (t *Toolset) GetTools(o internalk8s.Openshift) []api.ServerTool {
 	return slices.Concat(
 		vm_create.Tools(),
+		vm_troubleshoot.Tools(),
 	)
 }
 
diff --git a/pkg/toolsets/kubevirt/vm/troubleshoot/plan.tmpl b/pkg/toolsets/kubevirt/vm/troubleshoot/plan.tmpl
@@ -0,0 +1,151 @@
+# VirtualMachine Troubleshooting Guide
+
+## VM: {{.Name}} (namespace: {{.Namespace}})
+
+Follow these steps to diagnose issues with the VirtualMachine:
+
+---
+
+## Step 1: Check VirtualMachine Status
+
+Use the `resources_get` tool to inspect the VirtualMachine:
+- **apiVersion**: `kubevirt.io/v1`
+- **kind**: `VirtualMachine`
+- **namespace**: `{{.Namespace}}`
+- **name**: `{{.Name}}`
+
+**What to look for:**
+- `status.printableStatus` - Should be "Running" for a healthy VM
+- `status.ready` - Should be `true`
+- `status.conditions` - Look for conditions with `status: "False"` or error messages
+- `spec.runStrategy` - Check if it's "Always", "Manual", "Halted", or "RerunOnFailure"
+
+---
+
+## Step 2: Check VirtualMachineInstance Status
+
+If the VM exists but isn't running, check if a VirtualMachineInstance was created:
+
+Use the `resources_get` tool:
+- **apiVersion**: `kubevirt.io/v1`
+- **kind**: `VirtualMachineInstance`
+- **namespace**: `{{.Namespace}}`
+- **name**: `{{.Name}}`
+
+**What to look for:**
+- `status.phase` - Should be "Running" for a healthy VMI
+- `status.conditions` - Check for "Ready" condition with `status: "True"`
+- `status.guestOSInfo` - Confirms guest agent is running
+- If VMI doesn't exist and VM runStrategy is "Always", this indicates a problem
+
+---
+
+## Step 3: Check DataVolume Status (if applicable)
+
+If the VM uses DataVolumeTemplates, check their status:
+
+Use the `resources_list` tool:
+- **apiVersion**: `cdi.kubevirt.io/v1beta1`
+- **kind**: `DataVolume`
+- **namespace**: `{{.Namespace}}`
+
+Look for DataVolumes with names starting with `{{.Name}}-`
+
+**What to look for:**
+- `status.phase` - Should be "Succeeded" when ready
+- `status.progress` - Shows import/clone progress (e.g., "100.0%")
+- Common issues:
+  - Phase "Pending" - Waiting for resources
+  - Phase "ImportScheduled" or "ImportInProgress" - Still importing
+  - Phase "Failed" - Check `status.conditions` for error details
+
+---
+
+## Step 4: Check virt-launcher Pod
+
+The virt-launcher pod runs the actual VM. Find and inspect it:
+
+Use the `pods_list_in_namespace` tool:
+- **namespace**: `{{.Namespace}}`
+- **labelSelector**: `kubevirt.io=virt-launcher,vm.kubevirt.io/name={{.Name}}`
+
+**What to look for:**
+- Pod should be in "Running" phase
+- All containers should be ready (e.g., "2/2")
+- Check pod events and conditions for errors
+
+If pod exists, get detailed status with `pods_get`:
+- **namespace**: `{{.Namespace}}`
+- **name**: `virt-launcher-{{.Name}}-xxxxx` (use actual pod name from list)
+
+Get pod logs with `pods_log`:
+- **namespace**: `{{.Namespace}}`
+- **name**: `virt-launcher-{{.Name}}-xxxxx`
+- **container**: `compute` (main VM container)
+
+---
+
+## Step 5: Check Events
+
+Events provide crucial diagnostic information:
+
+Use the `events_list` tool:
+- **namespace**: `{{.Namespace}}`
+
+Filter output for events related to `{{.Name}}` - look for warnings or errors.
+
+---
+
+## Step 6: Check Instance Type and Preference (if used)
+
+If the VM uses instance types or preferences, verify they exist:
+
+For instance types, use `resources_get`:
+- **apiVersion**: `instancetype.kubevirt.io/v1beta1`
+- **kind**: `VirtualMachineClusterInstancetype`
+- **name**: (check VM spec for instancetype name)
+
+For preferences, use `resources_get`:
+- **apiVersion**: `instancetype.kubevirt.io/v1beta1`
+- **kind**: `VirtualMachineClusterPreference`
+- **name**: (check VM spec for preference name)
+
+---
+
+## Common Issues and Solutions
+
+### VM stuck in "Stopped" or "Halted"
+- Check `spec.runStrategy` - if "Halted", the VM is intentionally stopped
+- Change runStrategy to "Always" to start the VM
+
+### VMI doesn't exist
+- Check VM conditions for admission errors
+- Verify instance type and preference exist
+- Check resource quotas in the namespace
+
+### DataVolume stuck in "ImportInProgress"
+- Check CDI controller pods in `cdi` namespace
+- Verify source image is accessible
+- Check PVC storage class exists and has available capacity
+
+### virt-launcher pod in CrashLoopBackOff
+- Check pod logs for container `compute`
+- Common causes:
+  - Insufficient resources (CPU/memory)
+  - Invalid VM configuration
+  - Storage issues (PVC not available)
+
+### VM starts but guest doesn't boot
+- Check virt-launcher logs for QEMU errors
+- Verify boot disk is properly configured
+- Check if guest agent is installed (for cloud images)
+- Ensure correct architecture (amd64 vs arm64)
+
+---
+
+## Additional Resources
+
+For more detailed diagnostics:
+- Check KubeVirt components: `pods_list` in `kubevirt` namespace
+- Check CDI components: `pods_list` in `cdi` namespace (if using DataVolumes)
+- Review resource consumption: `pods_top` for the virt-launcher pod
diff --git a/pkg/toolsets/kubevirt/vm/troubleshoot/tool.go b/pkg/toolsets/kubevirt/vm/troubleshoot/tool.go
@@ -0,0 +1,98 @@
+package troubleshoot
+
+import (
+	_ "embed"
+	"fmt"
+	"strings"
+	"text/template"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	"github.com/google/jsonschema-go/jsonschema"
+	"k8s.io/utils/ptr"
+)
+
+//go:embed plan.tmpl
+var planTemplate string
+
+func Tools() []api.ServerTool {
+	return []api.ServerTool{
+		{
+			Tool: api.Tool{
+				Name:        "vm_troubleshoot",
+				Description: "Generate a comprehensive troubleshooting guide for a VirtualMachine, providing step-by-step instructions to diagnose common issues",
+				InputSchema: &jsonschema.Schema{
+					Type: "object",
+					Properties: map[string]*jsonschema.Schema{
+						"namespace": {
+							Type:        "string",
+							Description: "The namespace of the virtual machine",
+						},
+						"name": {
+							Type:        "string",
+							Description: "The name of the virtual machine",
+						},
+					},
+					Required: []string{"namespace", "name"},
+				},
+				Annotations: api.ToolAnnotations{
+					Title:           "Virtual Machine: Troubleshoot",
+					ReadOnlyHint:    ptr.To(true),
+					DestructiveHint: ptr.To(false),
+					IdempotentHint:  ptr.To(true),
+					OpenWorldHint:   ptr.To(false),
+				},
+			},
+			Handler: troubleshoot,
+		},
+	}
+}
+
+type troubleshootParams struct {
+	Namespace string
+	Name      string
+}
+
+func troubleshoot(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+	// Parse required parameters
+	namespace, err := getRequiredString(params, "namespace")
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	name, err := getRequiredString(params, "name")
+	if err != nil {
+		return api.NewToolCallResult("", err), nil
+	}
+
+	// Prepare template parameters
+	templateParams := troubleshootParams{
+		Namespace: namespace,
+		Name:      name,
+	}
+
+	// Render template
+	tmpl, err := template.New("troubleshoot").Parse(planTemplate)
+	if err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to parse template: %w", err)), nil
+	}
+
+	var result strings.Builder
+	if err := tmpl.Execute(&result, templateParams); err != nil {
+		return api.NewToolCallResult("", fmt.Errorf("failed to render template: %w", err)), nil
+	}
+
+	return api.NewToolCallResult(result.String(), nil), nil
+}
+
+func getRequiredString(params api.ToolHandlerParams, key string) (string, error) {
+	args := params.GetArguments()
+	val, ok := args[key]
+	if !ok {
+		return "", fmt.Errorf("%s parameter required", key)
+	}
+	str, ok := val.(string)
+	if !ok {
+		return "", fmt.Errorf("%s parameter must be a string", key)
+	}
+	return str, nil
+}
diff --git a/pkg/toolsets/kubevirt/vm/troubleshoot/tool_test.go b/pkg/toolsets/kubevirt/vm/troubleshoot/tool_test.go
@@ -0,0 +1,110 @@
+package troubleshoot
+
+import (
+	"context"
+	"strings"
+	"testing"
+
+	"github.com/containers/kubernetes-mcp-server/pkg/api"
+	internalk8s "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
+)
+
+type mockToolCallRequest struct {
+	arguments map[string]interface{}
+}
+
+func (m *mockToolCallRequest) GetArguments() map[string]any {
+	return m.arguments
+}
+
+func TestTroubleshoot(t *testing.T) {
+	tests := []struct {
+		name      string
+		args      map[string]interface{}
+		wantErr   bool
+		checkFunc func(t *testing.T, result string)
+	}{
+		{
+			name: "generates troubleshooting guide",
+			args: map[string]interface{}{
+				"namespace": "test-ns",
+				"name":      "test-vm",
+			},
+			wantErr: false,
+			checkFunc: func(t *testing.T, result string) {
+				if !strings.Contains(result, "VirtualMachine Troubleshooting Guide") {
+					t.Errorf("Expected troubleshooting guide header")
+				}
+				if !strings.Contains(result, "test-vm") {
+					t.Errorf("Expected VM name in guide")
+				}
+				if !strings.Contains(result, "test-ns") {
+					t.Errorf("Expected namespace in guide")
+				}
+				if !strings.Contains(result, "Step 1: Check VirtualMachine Status") {
+					t.Errorf("Expected step 1 header")
+				}
+				if !strings.Contains(result, "resources_get") {
+					t.Errorf("Expected resources_get tool reference")
+				}
+				if !strings.Contains(result, "VirtualMachineInstance") {
+					t.Errorf("Expected VMI section")
+				}
+				if !strings.Contains(result, "virt-launcher") {
+					t.Errorf("Expected virt-launcher pod section")
+				}
+			},
+		},
+		{
+			name: "missing namespace",
+			args: map[string]interface{}{
+				"name": "test-vm",
+			},
+			wantErr: true,
+		},
+		{
+			name: "missing name",
+			args: map[string]interface{}{
+				"namespace": "test-ns",
+			},
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			params := api.ToolHandlerParams{
+				Context:         context.Background(),
+				Kubernetes:      &internalk8s.Kubernetes{},
+				ToolCallRequest: &mockToolCallRequest{arguments: tt.args},
+			}
+
+			result, err := troubleshoot(params)
+			if err != nil {
+				t.Errorf("troubleshoot() unexpected Go error: %v", err)
+				return
+			}
+
+			if result == nil {
+				t.Error("Expected non-nil result")
+				return
+			}
+
+			if tt.wantErr {
+				if result.Error == nil {
+					t.Error("Expected error in result.Error, got nil")
+				}
+			} else {
+				if result.Error != nil {
+					t.Errorf("Expected no error in result, got: %v", result.Error)
+				}
+				if result.Content == "" {
+					t.Error("Expected non-empty result content")
+				}
+				if tt.checkFunc != nil {
+					tt.checkFunc(t, result.Content)
+				}
+			}
+		})
+	}
+}

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@ import (`
`7`	`7`	`internalk8s "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"`
`8`	`8`	`"github.com/containers/kubernetes-mcp-server/pkg/toolsets"`
`9`	`9`	`vm_create "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/create"`
	`10`	`+ vm_troubleshoot "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt/vm/troubleshoot"`
`10`	`11`	`)`
`11`	`12`
`12`	`13`	`type Toolset struct{}`
`@@ -24,6 +25,7 @@ func (t *Toolset) GetDescription() string {`
`24`	`25`	`func (t *Toolset) GetTools(o internalk8s.Openshift) []api.ServerTool {`
`25`	`26`	`return slices.Concat(`
`26`	`27`	`vm_create.Tools(),`
	`28`	`+ vm_troubleshoot.Tools(),`
`27`	`29`	`)`
`28`	`30`	`}`
`29`	`31`