diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..d040911
Binary files /dev/null and b/.DS_Store differ
diff --git a/.env.template b/.env.template
new file mode 100644
index 0000000..6991075
--- /dev/null
+++ b/.env.template
@@ -0,0 +1,5 @@
+# Local port forwarded
+PROMETHEUS_URL="http://localhost:9090"
+MCP_SERVER_URL="http://localhost:8000/mcp"
+# Default
+#OLLAMA_HOST="http://localhost:11434"
diff --git a/CLAUDE.md b/CLAUDE.md
index 57fdb15..c8dd90c 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -16,15 +16,28 @@ Project context for Claude Code. Updated as the project evolves.
 services/
   mcp_k8s_server/
     app/
-      server.py       # FastMCP server — defines @mcp.tool() functions
-      k8s_client.py   # Thin wrapper around the kubernetes Python client
+      server.py             # FastMCP server — defines @mcp.tool() functions
+      k8s_client.py         # Thin wrapper around the kubernetes Python client
+      prometheus_client.py  # Prometheus HTTP API client (PromQL queries)
+    tests/
+      test_smoke.py   # Smoke tests with FakeK8sClient
   agent_chatbot/
     app/
       agent.py        # Interactive LLM chatbot with agentic tool loop
+    tests/
+      .gitkeep        # Placeholder so git tracks the empty directory
 deploy/
   rbac-readonly.yaml  # K8s RBAC for in-cluster service account
 docker/
   mcp-server.Dockerfile
+  agent.Dockerfile
+scripts/
+  run_tests.sh        # pytest runner; treats exit codes 4/5 as success
+.github/
+  workflows/
+    ci.yaml           # CI: runs tests on push/PR to main
+cluster.yaml          # k3d cluster definition (name: k8s-agent, 1 server, 2 agents)
+docker-compose.yaml
 pyproject.toml        # uv-managed dependencies
 ```
 
@@ -71,9 +84,9 @@ Defined in `server.py`, implemented in `k8s_client.py`:
 | Tool | Signature | Returns |
 |---|---|---|
 | `list_namespaces` | `() -> list[str]` | Namespace name strings |
-| `list_pods` | `(namespace: str) -> list[str]` | Pod name strings in that namespace |
-
-**Planned**: Enrich `list_pods` to return status dicts (phase, ready, restart_count, reason) so the LLM can identify CrashLoopBackOff pods. Add `read_pod_log` tool.
+| `list_pods` | `(namespace: str) -> list[dict]` | Pod status dicts (name, phase, ready, restart_count, reason) |
+| `read_pod_log` | `(namespace: str, pod: str, container: str \| None, tail_lines: int) -> str` | Last N lines of pod logs |
+| `query_prometheus` | `(query: str) -> list[dict]` | Instant PromQL query results (metric labels, value, timestamp) |
 
 ---
 
@@ -100,15 +113,14 @@ def list_pods(namespace: str) -> list[dict]:
 ```
 
 ### Tool Return Types
-MCP tools must return JSON-serializable types. The kubernetes Python client returns `V1Pod` and similar objects that **cannot** be serialized — always extract fields explicitly in `server.py`:
+MCP tools must return JSON-serializable types. The kubernetes Python client returns `V1Pod` and similar objects that **cannot** be serialized — always extract fields explicitly into plain dicts or strings. In this project, `k8s_client.py` handles extraction so `server.py` tools can return its output directly:
 
 ```python
-# Wrong — V1Pod is not serializable
+# k8s_client.py extracts fields into a plain dict — safe to return from MCP tool
 return k8s_client.list_pods(namespace)
 
-# Correct — extract what you need
-pods = k8s_client.list_pods(namespace)
-return [p.metadata.name for p in pods]
+# Never return raw kubernetes client objects from a tool
+return core_api.list_namespaced_pod(namespace=namespace)  # Wrong — not serializable
 ```
 
 ### Tool Design Philosophy
@@ -123,8 +135,8 @@ return [p.metadata.name for p in pods]
 ## Workflow Preferences
 
 - Do not commit by default. Make code changes and stop. The user reviews `git diff` before deciding to commit. Only commit when explicitly asked.
-- Active branch: `mcp_enhancement`. If the branch does not exist, check for the current branch and switch to it, then update this file.
-- The worktree `claude/gallant-turing` should be kept in sync with `mcp_enhancement` when resuming sessions (`git reset --hard <sha>`).
+- Active branch: `mcp_even_more_tools`. If the branch does not exist, check for the current branch and switch to it, then update this file.
+- The worktree `claude/gallant-turing` should be kept in sync with `mcp_even_more_tools` when resuming sessions (`git reset --hard <sha>`).
 - When committing, always use the `Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>` trailer.
 - The user uses PyCharm (`.idea/` present) and ruff for linting.
 - Avoid the use of emojis and em-dashes in any veribage or documentation created
@@ -134,19 +146,51 @@ return [p.metadata.name for p in pods]
 
 ## Planned Features
 
-1. **Enrich `list_pods`** with status fields (phase, ready, restart_count, reason)
-2. **`read_pod_log` tool** — already in `k8s_client.py`, needs MCP tool wrapper
-3. **`get_events` tool** — Kubernetes events are often the first place to look when troubleshooting
-4. **FastAPI alerting webhook** — stateless endpoint that accepts alert payloads (Prometheus/Alertmanager format), runs the agent, returns structured diagnosis. Persistent MCP client via FastAPI lifespan, asyncio.Lock for concurrent request safety.
-5. **Agentic loop safety** — consider a max iterations guard on the `while True` loop in `run_turn()`
+1. **`get_events` tool** — Kubernetes events are often the first place to look when troubleshooting
+2. **FastAPI alerting webhook** — stateless endpoint that accepts alert payloads (Prometheus/Alertmanager format), runs the agent, returns structured diagnosis. Persistent MCP client via FastAPI lifespan, asyncio.Lock for concurrent request safety.
+3. **Agentic loop safety** — consider a max iterations guard on the `while True` loop in `run_turn()`
+
+---
+
+## Environment Variables
+
+Configuration is managed via a `.env` file that is not committed to version control.
+
+- `.env.template` — committed, contains all variables with safe defaults and masked secrets
+- `.env` — local only, listed in `.gitignore`, created by copying the template
+
+**Convention**: whenever a new env var is added, update `.env.template` with a safe default or masked placeholder (e.g. `API_KEY="your-api-key-here"`). Never put real credentials in `.env.template`.
+
+| Variable | Default | Used by |
+|---|---|---|
+| `PROMETHEUS_URL` | `http://localhost:9090` | `prometheus_client.py` |
+| `MCP_SERVER_URL` | `http://localhost:8000/mcp` | `agent.py` |
+| `OLLAMA_HOST` | `http://localhost:11434` | ollama client (auto-detected) |
 
 ---
 
 ## Local Development
 
-Start the k3d cluster before running either service:
+Copy the env template before first run:
 ```bash
-k3d cluster start
+cp .env.template .env
+```
+
+Create and start the k3d cluster (required for K8s tools):
+```bash
+make cluster-create   # first time only
+make cluster-start
+```
+
+Start Ollama and pull the model (required for the agent):
+```bash
+ollama serve          # starts the Ollama server on localhost:11434
+ollama pull llama3.1:8b
+```
+
+Or use the Makefile target which does both:
+```bash
+make ollama
 ```
 
 Run the MCP server:
@@ -154,7 +198,12 @@ Run the MCP server:
 uv run python -m services.mcp_k8s_server.app.server
 ```
 
-Run the agent chatbot:
+Run the agent chatbot (requires MCP server already running):
 ```bash
 uv run python -m services.agent_chatbot.app.agent
 ```
+
+Or start both together with:
+```bash
+make start
+```
diff --git a/Makefile b/Makefile
index b26e362..a7df61e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,14 @@
 VERSION := 0.1.0
 
-.PHONY: help ollama server agent start lint format test compose-agent compose-up pre-commit-enable pre-commit-disable
+.PHONY: help ollama server agent start lint format test compose-agent compose-up pre-commit-enable pre-commit-disable cluster-create cluster-start cluster-stop blackbox-install blackbox-uninstall
 
 help:
 	@printf "k8s-agent-mcp Makefile help\n\n"
 	@printf "Usage: make <target>\n\n"
 	@printf "Common targets (run 'make <target>'):\n"
+	@printf "  cluster-create - Create the k3d cluster from cluster.yaml (first time only).\n"
+	@printf "  cluster-start  - Start the k3d cluster.\n"
+	@printf "  cluster-stop   - Stop the k3d cluster.\n"
 	@printf "  ollama        - Pulls the LLM model (ollama) and starts the Ollama server.\n"
 	@printf "  server        - Starts the MCP k8s server (FastMCP) in the foreground.\n"
 	@printf "  agent         - Runs the interactive agent chatbot locally (requires server).\n"
@@ -16,7 +19,9 @@ help:
 	@printf "  format             - Run ruff to autoformat code.\n"
 	@printf "  test               - Run the project's pytest test suite for services.\n"
 	@printf "  pre-commit-enable  - Install pre-commit hooks into .git/hooks.\n"
-	@printf "  pre-commit-disable - Remove pre-commit hooks from .git/hooks.\n\n"
+	@printf "  pre-commit-disable - Remove pre-commit hooks from .git/hooks.\n"
+	@printf "  blackbox-install   - Install blackbox exporter and Probe CR for example-web latency monitoring.\n"
+	@printf "  blackbox-uninstall - Remove blackbox exporter and Probe CR.\n\n"
 	@printf "Notes:\n"
 	@printf "  - 'make' with no args shows this help (default).\n"
 	@printf "  - Use 'make compose-agent' to run the agent interactively inside Docker.\n"
@@ -25,6 +30,18 @@ help:
 
 ## ── Local development ────────────────────────────────────────────────────────
 
+# Create the k3d cluster from cluster.yaml (first time only).
+cluster-create:
+	k3d cluster create --config cluster.yaml
+
+# Start the k3d cluster.
+cluster-start:
+	k3d cluster start k8s-agent
+
+# Stop the k3d cluster.
+cluster-stop:
+	k3d cluster stop k8s-agent
+
 # Pull the LLM model if not already cached, then start the Ollama server.
 # Run this in a separate terminal before starting the agent.
 ollama:
@@ -46,6 +63,8 @@ start:
 	@uv run python -m services.mcp_k8s_server.app.server & \
 	SERVER_PID=$$!; \
 	trap "kill $$SERVER_PID" EXIT; \
+	echo "Waiting for MCP server..."; \
+	until curl -so /dev/null -H "Accept: text/event-stream" http://localhost:8000/mcp 2>/dev/null; do sleep 0.3; done; \
 	uv run python -m services.agent_chatbot.app.agent
 
 # Convenience: run the agent via docker-compose interactively
@@ -57,6 +76,22 @@ compose-agent:
 compose-up:
 	docker compose up -d mcp-server agent
 
+## ── Blackbox exporter ────────────────────────────────────────────────────────
+
+# Install prometheus-blackbox-exporter into the default namespace.
+# fullnameOverride keeps the service name short: blackbox-exporter.default.svc
+blackbox-install:
+	helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+	helm repo update
+	helm upgrade --install blackbox-exporter prometheus-community/prometheus-blackbox-exporter \
+		--namespace default \
+		--set fullnameOverride=blackbox-exporter
+	kubectl apply -f deploy/workloads/blackbox-probe.yaml
+
+blackbox-uninstall:
+	kubectl delete -f deploy/workloads/blackbox-probe.yaml --ignore-not-found
+	helm uninstall blackbox-exporter --namespace default
+
 ## ── Pre-commit ───────────────────────────────────────────────────────────────
 
 # Install pre-commit hooks so they run automatically on every commit.
diff --git a/README.md b/README.md
index e32a688..c203e17 100644
--- a/README.md
+++ b/README.md
@@ -6,13 +6,29 @@
 
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
 
-[![CI](https://github.com/spackle0/k8s-agent-mcp/actions/workflows/docker-build-test.yaml/badge.svg)](https://github.com/spackle0/k8s-agent-mcp/actions/workflows/ci.yml)
+[![CI](https://github.com/spackle0/k8s-agent-mcp/actions/workflows/ci.yaml/badge.svg)](https://github.com/spackle0/k8s-agent-mcp/actions/workflows/ci.yaml)
 
 [![codecov](https://codecov.io/gh/spackle0/k8s-agent-mcp/graph/badge.svg?token=YJVD7W9Q37)](https://codecov.io/gh/spackle0/k8s-agent-mcp)
 
 
 An experiment in Agentic AI with a Kubernetes slant
 
+## Setup
+
+Copy the environment template before running anything locally:
+
+```bash
+cp .env.template .env
+```
+
+Edit `.env` to match your local environment. The file is listed in `.gitignore` and will not be committed. See `.env.template` for all available variables and their defaults.
+
+Create the k3d cluster (first time only):
+
+```bash
+k3d cluster create --config cluster.yaml
+```
+
 ## Docker Compose (interactive agent)
 
 The `agent` service is interactive — the container keeps STDIN open and allocates a TTY so you can type directly into the running Python process.
diff --git a/cluster.yaml b/cluster.yaml
new file mode 100644
index 0000000..fdda4fd
--- /dev/null
+++ b/cluster.yaml
@@ -0,0 +1,7 @@
+apiVersion: k3d.io/v1alpha5
+kind: Simple
+metadata:
+  name: k8s-agent
+servers: 1
+agents: 2
+image: rancher/k3s:v1.33.3-k3s1
\ No newline at end of file
diff --git a/deploy/.DS_Store b/deploy/.DS_Store
new file mode 100644
index 0000000..596e7c2
Binary files /dev/null and b/deploy/.DS_Store differ
diff --git a/deploy/chaosmesh/chaosmesg-pod-failure-5m.yaml b/deploy/chaosmesh/chaosmesg-pod-failure-5m.yaml
new file mode 100644
index 0000000..7892cf2
--- /dev/null
+++ b/deploy/chaosmesh/chaosmesg-pod-failure-5m.yaml
@@ -0,0 +1,14 @@
+kind: PodChaos
+apiVersion: chaos-mesh.org/v1alpha1
+metadata:
+  namespace: default
+  name: pod-test-1
+spec:
+  selector:
+    namespaces:
+      - default
+    labelSelectors:
+      app: example-web
+  mode: all
+  action: pod-failure
+  duration: 5m
diff --git a/deploy/chaosmesh/chaosmesh-network-delay.yaml b/deploy/chaosmesh/chaosmesh-network-delay.yaml
new file mode 100644
index 0000000..74f4296
--- /dev/null
+++ b/deploy/chaosmesh/chaosmesh-network-delay.yaml
@@ -0,0 +1,15 @@
+apiVersion: chaos-mesh.org/v1alpha1
+kind: NetworkChaos
+metadata:
+  name: network-delay-example
+  namespace: default
+spec:
+  action: delay
+  mode: all
+  selector:
+    labelSelectors:
+      app: example-web
+  delay:
+    latency: "200ms"
+    jitter: "50ms"
+  duration: "15m"
\ No newline at end of file
diff --git a/deploy/chaosmesh/chaosmesh-pod-cpu-stress.yaml b/deploy/chaosmesh/chaosmesh-pod-cpu-stress.yaml
new file mode 100644
index 0000000..4495667
--- /dev/null
+++ b/deploy/chaosmesh/chaosmesh-pod-cpu-stress.yaml
@@ -0,0 +1,15 @@
+apiVersion: chaos-mesh.org/v1alpha1
+kind: StressChaos
+metadata:
+  name: cpu-stress-example
+  namespace: default
+spec:
+  mode: one
+  selector:
+    labelSelectors:
+      app: example-web
+  stressors:
+    cpu:
+      workers: 2
+      load: 80
+  duration: "30s"
\ No newline at end of file
diff --git a/deploy/chaosmesh/chaosmesh-pod-kill-cron.yaml b/deploy/chaosmesh/chaosmesh-pod-kill-cron.yaml
new file mode 100644
index 0000000..3394c07
--- /dev/null
+++ b/deploy/chaosmesh/chaosmesh-pod-kill-cron.yaml
@@ -0,0 +1,14 @@
+apiVersion: chaos-mesh.org/v1alpha1
+kind: Schedule
+metadata:
+  name: pod-kill-example
+  namespace: default
+spec:
+  schedule: "*/2 * * * *"
+  type: PodChaos
+  podChaos:
+    action: pod-kill
+    mode: one
+    selector:
+      labelSelectors:
+        app: example-web
\ No newline at end of file
diff --git a/deploy/chaosmesh/chaosmesh-rbac.yaml b/deploy/chaosmesh/chaosmesh-rbac.yaml
new file mode 100644
index 0000000..0732889
--- /dev/null
+++ b/deploy/chaosmesh/chaosmesh-rbac.yaml
@@ -0,0 +1,32 @@
+kind: ServiceAccount
+apiVersion: v1
+metadata:
+  namespace: default
+  name: chaos-mesh-dashboard
+
+---
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: chaos-mesh-dashboard
+rules:
+- apiGroups: [""]
+  resources: ["pods", "namespaces", "nodes", "events"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["chaos-mesh.org"]
+  resources: ["*"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: chaos-mesh-dashboard
+subjects:
+- kind: ServiceAccount
+  name: chaos-mesh-dashboard
+  namespace: default
+roleRef:
+  kind: ClusterRole
+  name: chaos-mesh-dashboard
+  apiGroup: rbac.authorization.k8s.io
diff --git a/deploy/workloads/blackbox-probe.yaml b/deploy/workloads/blackbox-probe.yaml
new file mode 100644
index 0000000..e621536
--- /dev/null
+++ b/deploy/workloads/blackbox-probe.yaml
@@ -0,0 +1,35 @@
+# blackbox-probe.yaml
+# Probe CR tells Prometheus Operator to scrape blackbox exporter results for example-web.
+# Requires blackbox exporter installed via Helm (see Makefile: make blackbox-install).
+#
+# Key metrics exposed after this is applied:
+#   probe_success                  - 1 if HTTP 2xx, 0 if not
+#   probe_duration_seconds         - full round-trip time including any injected latency
+#   probe_http_status_code         - HTTP response code
+#
+# To watch latency during a Chaos Mesh NetworkChaos experiment:
+#   kubectl -n default get probe example-web
+#   PromQL: probe_duration_seconds{job="example-web-blackbox"}
+apiVersion: monitoring.coreos.com/v1
+kind: Probe
+metadata:
+  name: example-web-blackbox
+  namespace: default
+  labels:
+    app: example-web
+    release: prometheus
+spec:
+  jobName: example-web-blackbox
+  interval: 5s
+  module: http_2xx
+  prober:
+    url: blackbox-exporter.default.svc.cluster.local:9115
+    scheme: http
+    path: /probe
+  targets:
+    staticConfig:
+      static:
+        - http://example-web.default.svc.cluster.local:8080
+      labels:
+        app: example-web
+        namespace: default
diff --git a/deploy/workloads/test-workload.yaml b/deploy/workloads/test-workload.yaml
new file mode 100644
index 0000000..4adf9aa
--- /dev/null
+++ b/deploy/workloads/test-workload.yaml
@@ -0,0 +1,69 @@
+# test-workload.yaml
+# Sets up a workload to be monitored by Prometheus
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: example-web
+  namespace: default
+  labels:
+    app: example-web
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: example-web
+  template:
+    metadata:
+      labels:
+        app: example-web
+    spec:
+      containers:
+      - name: web
+        image: quay.io/brancz/prometheus-example-app:v0.5.0
+        ports:
+        - containerPort: 8080
+          name: http
+        resources:
+          requests:
+            cpu: 50m
+            memory: 32Mi
+          limits:
+            cpu: 100m
+            memory: 64Mi
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: example-web
+  namespace: default
+  labels:
+    app: example-web
+spec:
+  selector:
+    app: example-web
+  ports:
+  - port: 8080
+    targetPort: http
+    name: http
+---
+# kubectl api-resources --api-group=monitoring.coreos.com
+# Installed by prometheus helm chart
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: example-web
+  namespace: default
+  labels:
+    app: example-web
+    release: prometheus  # matches the helm release name "helm list -A"
+spec:
+  selector:
+    matchLabels:
+      app: example-web
+  endpoints:
+  - port: http
+    # Careful how short you make this to avoid network saturation
+    # And for any rates in prometheus, be sure to have at least 2x this value
+    # since rate needs at least two data points
+    interval: 5s
+    path: /metrics
diff --git a/pyproject.toml b/pyproject.toml
index 4a34372..28e1c0f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,7 +36,7 @@ test = [
 # ruff configuration
 [tool.ruff]
 show-fixes = true
-select = [
+lint.select = [
     "E",  # pycodestyle errors
     "W",  # pycodestyle warnings
     "F",  # pyflakes
diff --git a/services/agent_chatbot/app/agent.py b/services/agent_chatbot/app/agent.py
index 8d188e9..e861cf6 100644
--- a/services/agent_chatbot/app/agent.py
+++ b/services/agent_chatbot/app/agent.py
@@ -19,6 +19,7 @@
 import os
 
 from fastmcp import Client
+from fastmcp.exceptions import ToolError
 
 from typing import Any
 
@@ -67,8 +68,7 @@ def format_tools_for_log(tools: dict) -> str:
     for name, tool in tools.items():
         # Summarize each parameter as "name: type (required)" or "name: type".
         params = ", ".join(
-            f"{k}: {v.get('type', '?')}"
-            + (" (required)" if k in tool.inputSchema.get("required", []) else "")
+            f"{k}: {v.get('type', '?')}" + (" (required)" if k in tool.inputSchema.get("required", []) else "")
             for k, v in tool.inputSchema.get("properties", {}).items()
         )
         lines.append(f"  {name}({params})")
@@ -88,8 +88,11 @@ async def call_tool(client: Client, tool_name: str, tool_arguments: dict[str, An
     connection. Extracts the plain text from the first content item so the
     LLM receives a clean string rather than a raw result object.
     """
-    result = await client.call_tool(tool_name, tool_arguments)
-    return result.content[0].text if result.content else ""
+    try:
+        result = await client.call_tool(tool_name, tool_arguments)
+        return result.content[0].text if result.content else ""
+    except ToolError as e:
+        return f"Tool error: {e}"
 
 
 async def get_tools(client: Client) -> dict:
@@ -174,8 +177,11 @@ async def main():
                 print("Goodbye.")
                 break
 
-            answer = await run_turn(client, available_tools, ollama_tools, messages, user_input)
-            print(f"Assistant: {answer}\n")
+            try:
+                answer = await run_turn(client, available_tools, ollama_tools, messages, user_input)
+                print(f"Assistant: {answer}\n")
+            except Exception as e:
+                print(f"Error: {e}\n")
 
 
 if __name__ == "__main__":
diff --git a/services/mcp_k8s_server/app/k8s_client.py b/services/mcp_k8s_server/app/k8s_client.py
index feefff0..5e427b0 100644
--- a/services/mcp_k8s_server/app/k8s_client.py
+++ b/services/mcp_k8s_server/app/k8s_client.py
@@ -4,21 +4,13 @@
 """
 
 from functools import cache
-
 from kubernetes import client, config
 
 
 class K8sClient:
-    """Connection manager for Kubernetes API clients.
-
-    Responsible for creating, refreshing, and providing access to the
-    underlying Kubernetes API client objects (CoreV1Api and AppsV1Api).
-    """
+    """Holds initialized Kubernetes API clients."""
 
     def __init__(self) -> None:
-        self._init_clients()
-
-    def _init_clients(self) -> None:
         try:
             config.load_incluster_config()
         except config.ConfigException:
@@ -35,29 +27,6 @@ def apps(self) -> client.AppsV1Api:
         """Return the AppsV1Api instance."""
         return self._apps_api
 
-    def refresh(self) -> None:
-        """Reload configuration and recreate API clients."""
-        self._init_clients()
-
-    def close(self) -> None:
-        """Attempt to close underlying ApiClient connections if supported.
-
-        This is best-effort; the kubernetes ApiClient exposes a `close()` on
-        its `api_client` attribute which we call when available.
-        """
-        try:
-            if hasattr(self._core_api, "api_client") and hasattr(self._core_api.api_client, "close"):
-                self._core_api.api_client.close()
-        except Exception:
-            # Best-effort close; swallow exceptions to avoid noisy shutdown
-            pass
-
-        try:
-            if hasattr(self._apps_api, "api_client") and hasattr(self._apps_api.api_client, "close"):
-                self._apps_api.api_client.close()
-        except Exception:
-            pass
-
 
 @cache
 def get_client() -> K8sClient:
@@ -65,26 +34,9 @@ def get_client() -> K8sClient:
     return K8sClient()
 
 
-def refresh_client() -> None:
-    """Refresh the cached client's credentials/configuration."""
-    get_client().refresh()
-
-
-def close_client() -> None:
-    """Close the cached client and clear the cache so a new instance will be created.
-
-    Use this when credentials or network resources change and you want a fresh
-    client object on next access.
-    """
-    try:
-        get_client().close()
-    finally:
-        # Clear the cached instance so get_client() will construct a new one.
-        get_client.cache_clear()
-
-
-# Module-level convenience functions (preserve previous public API)
 def list_namespaces() -> list[str]:
+    """Return a list of namespaces in a kubernetes cluster.
+    """
     core_api = get_client().core()
     ns = core_api.list_namespace()
     return [n.metadata.name for n in ns.items]
@@ -106,20 +58,20 @@ def list_pods(namespace: str) -> list[dict]:
     pods = core_api.list_namespaced_pod(namespace=namespace)
 
     result: list[dict] = []
-    for p in pods.items:
-        name = getattr(p.metadata, "name", "")
-        phase = getattr(p.status, "phase", "Unknown")
+    for pod in pods.items:
+        name = getattr(pod.metadata, "name", "")
+        phase = getattr(pod.status, "phase", "Unknown")
 
         # Determine readiness: all container statuses must be ready
         ready = True
         restart_count = 0
         reason = None
-        container_statuses = getattr(p.status, "container_statuses", None) or []
-        for cs in container_statuses:
-            ready = ready and bool(getattr(cs, "ready", False))
-            restart_count += int(getattr(cs, "restart_count", 0))
+        container_statuses = getattr(pod.status, "container_statuses", None) or []
+        for status in container_statuses:
+            ready = ready and bool(getattr(status, "ready", False))
+            restart_count += int(getattr(status, "restart_count", 0))
             # If a waiting state is present, prefer that reason
-            state = getattr(cs, "state", None)
+            state = getattr(status, "state", None)
             if state:
                 waiting = getattr(state, "waiting", None)
                 if waiting and getattr(waiting, "reason", None):
@@ -127,20 +79,27 @@ def list_pods(namespace: str) -> list[dict]:
 
         # Fall back to pod-level reason if none found
         if not reason:
-            reason = getattr(p.status, "reason", None)
-
-        result.append({
-            "name": name,
-            "phase": phase,
-            "ready": ready,
-            "restart_count": restart_count,
-            "reason": reason,
-        })
+            reason = getattr(pod.status, "reason", None)
+
+        result.append(
+            {
+                "name": name,
+                "phase": phase,
+                "ready": ready,
+                "restart_count": restart_count,
+                "reason": reason,
+            }
+        )
 
     return result
 
 
 def read_pod_log(namespace: str, pod: str, container: str | None = None, tail_lines: int = 20) -> str:
+    """Return the last tail_lines lines of logs for a pod's container.
+
+    If container is None, the pod's default container is used. Returns a plain
+    string. Returns an empty string if no logs are available.
+    """
     core_api = get_client().core()
     return core_api.read_namespaced_pod_log(
         name=pod,
diff --git a/services/mcp_k8s_server/app/prometheus_client.py b/services/mcp_k8s_server/app/prometheus_client.py
new file mode 100644
index 0000000..33bdf6f
--- /dev/null
+++ b/services/mcp_k8s_server/app/prometheus_client.py
@@ -0,0 +1,50 @@
+"""
+Prometheus HTTP API client for instant PromQL queries.
+"""
+
+import os
+
+import httpx
+
+PROMETHEUS_URL = os.getenv("PROMETHEUS_URL", "http://localhost:9090")
+
+
+def query(promql: str) -> list[dict]:
+    """Execute an instant PromQL query and return the result vector.
+
+    Sends a GET request to the Prometheus HTTP API and returns a list of
+    dicts. Each dict contains the metric labels and the current sample value.
+    Returns an empty list if no time series match the query.
+
+    Raises httpx.HTTPStatusError on non-2xx responses.
+    Raises RuntimeError if Prometheus reports a query error.
+    """
+    response = httpx.get(
+        f"{PROMETHEUS_URL}/api/v1/query",
+        params={"query": promql},
+        timeout=10,
+    )
+    response.raise_for_status()
+    body = response.json()
+
+    if body["status"] != "success":
+        raise RuntimeError(f"Prometheus query error: {body.get('error', 'unknown')}")
+
+    result_type = body["data"]["resultType"]
+    results = body["data"]["result"]
+
+    if result_type == "vector":
+        return [
+            {
+                "metric": item["metric"],
+                "value": item["value"][1],
+                "timestamp": item["value"][0],
+            }
+            for item in results
+        ]
+
+    if result_type == "scalar":
+        return [{"value": results[1], "timestamp": results[0]}]
+
+    # matrix or string — return raw result and let the LLM interpret it
+    return results
diff --git a/services/mcp_k8s_server/app/server.py b/services/mcp_k8s_server/app/server.py
index d9d08f9..7c3ffac 100644
--- a/services/mcp_k8s_server/app/server.py
+++ b/services/mcp_k8s_server/app/server.py
@@ -5,11 +5,14 @@
 # streamable-HTTP and call these tools by name.
 #
 # Tools exposed:
-#   - list_namespaces() → all namespace names in the cluster
+#   - list_namespaces()   → all namespace names in the cluster
+#   - list_pods()         → pod status dicts for a given namespace
+#   - read_pod_log()      → last N lines of a pod's container logs
+#   - query_prometheus()  → instant PromQL query against Prometheus
 
 from fastmcp import FastMCP
 
-from services.mcp_k8s_server.app import k8s_client
+from services.mcp_k8s_server.app import k8s_client, prometheus_client
 
 # Create the FastMCP server instance. The name is metadata clients can read
 # but does not affect routing or tool resolution.
@@ -52,6 +55,27 @@ def read_pod_log(namespace: str, pod: str, container: str | None = None, tail_li
     return logs or ""
 
 
+@mcp.tool()
+def query_prometheus(query: str) -> list[dict]:
+    """Execute an instant PromQL query against Prometheus and return matching time series.
+
+    Takes a PromQL query string and returns a list of dicts, each with:
+      - metric: dict of label key/value pairs identifying the time series
+                (e.g. {"pod": "my-pod", "namespace": "default", "job": "kubelet"})
+      - value: the current sample value as a string (e.g. "1", "0.042")
+      - timestamp: Unix timestamp of the sample as a float
+
+    Returns an empty list if no time series match the query.
+
+    Example queries:
+      - up
+      - container_cpu_usage_seconds_total{namespace="default"}
+      - kube_pod_container_status_restarts_total > 0
+      - rate(http_requests_total[5m])
+    """
+    return prometheus_client.query(query)
+
+
 def main():
     # Start the FastMCP server using the streamable-HTTP transport.
     # By default, this listens on port 8000 at /mcp, matching MCP_SERVER_URL
diff --git a/services/mcp_k8s_server/tests/test_smoke.py b/services/mcp_k8s_server/tests/test_smoke.py
index a51ba95..bfa38d7 100644
--- a/services/mcp_k8s_server/tests/test_smoke.py
+++ b/services/mcp_k8s_server/tests/test_smoke.py
@@ -20,13 +20,15 @@ def list_namespaces():
 
         @staticmethod
         def list_pods(namespace: str):
-            return [{
-                "name": "mypod",
-                "phase": "Running",
-                "ready": True,
-                "restart_count": 0,
-                "reason": None,
-            }]
+            return [
+                {
+                    "name": "mypod",
+                    "phase": "Running",
+                    "ready": True,
+                    "restart_count": 0,
+                    "reason": None,
+                }
+            ]
 
         @staticmethod
         def read_pod_log(namespace: str, pod: str, container=None, tail_lines=20):
@@ -35,7 +37,13 @@ def read_pod_log(namespace: str, pod: str, container=None, tail_lines=20):
     monkeypatch.setattr(server_app.k8s_client, "get_client", lambda: FakeK8sClient())
     monkeypatch.setattr(server_app.k8s_client, "list_namespaces", lambda: FakeK8sClient().list_namespaces())
     monkeypatch.setattr(server_app.k8s_client, "list_pods", lambda ns: FakeK8sClient().list_pods(ns))
-    monkeypatch.setattr(server_app.k8s_client, "read_pod_log", lambda ns, p, container=None, tail_lines=20: FakeK8sClient().read_pod_log(ns, p, container=container, tail_lines=tail_lines))
+    monkeypatch.setattr(
+        server_app.k8s_client,
+        "read_pod_log",
+        lambda ns, p, container=None, tail_lines=20: FakeK8sClient().read_pod_log(
+            ns, p, container=container, tail_lines=tail_lines
+        ),
+    )
 
 
 def test_list_namespaces():