Skip to content

Commit 8519e42

Browse files
committed
preserve specific details in event translations
Include image name, container name, pod name, and rescale size in translated event messages instead of dropping them. Also remove nais.yaml tips from scheduling failure messages.
1 parent 8bbe2da commit 8519e42

File tree

1 file changed

+99
-2
lines changed

1 file changed

+99
-2
lines changed

internal/workload/instancegroup/events.go

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,22 @@ func translateReasonAndNote(reason, note, eventType string) (string, InstanceGro
173173

174174
// --- Image ---
175175
case "Pulling":
176+
if img := extractQuotedValue(note); img != "" {
177+
return fmt.Sprintf("Downloading container image %s...", img), InstanceGroupEventSeverityInfo
178+
}
176179
return "Downloading container image...", InstanceGroupEventSeverityInfo
177180

178181
case "Pulled":
182+
if img := extractQuotedValue(note); img != "" {
183+
return fmt.Sprintf("Container image %s downloaded successfully.", img), InstanceGroupEventSeverityInfo
184+
}
179185
return "Container image downloaded successfully.", InstanceGroupEventSeverityInfo
180186

181187
case "Failed":
182188
if strings.Contains(noteLower, "image") || strings.Contains(noteLower, "pull") || strings.Contains(noteLower, "errimagepull") {
189+
if img := extractQuotedValue(note); img != "" {
190+
return fmt.Sprintf("Failed to download container image %s. Check that the image exists and access is configured correctly.", img), InstanceGroupEventSeverityError
191+
}
183192
return "Failed to download container image. Check that the image exists and access is configured correctly.", InstanceGroupEventSeverityError
184193
}
185194
if strings.Contains(noteLower, "mount") || strings.Contains(noteLower, "volume") {
@@ -196,15 +205,24 @@ func translateReasonAndNote(reason, note, eventType string) (string, InstanceGro
196205
// --- Image pull backoff ---
197206
case "BackOff":
198207
if strings.Contains(noteLower, "image") || strings.Contains(noteLower, "pull") {
208+
if img := extractQuotedValue(note); img != "" {
209+
return fmt.Sprintf("Repeated failures downloading container image %s. The image may not exist or the registry may be inaccessible.", img), InstanceGroupEventSeverityError
210+
}
199211
return "Repeated failures downloading container image. The image may not exist or the registry may be inaccessible.", InstanceGroupEventSeverityError
200212
}
201213
return "Instance is crash-looping — it keeps crashing shortly after starting. Check application logs for details.", InstanceGroupEventSeverityError
202214

203215
// --- Container lifecycle ---
204216
case "Created":
217+
if name := extractContainerName(note); name != "" {
218+
return fmt.Sprintf("Container %s created.", name), InstanceGroupEventSeverityInfo
219+
}
205220
return "Container created.", InstanceGroupEventSeverityInfo
206221

207222
case "Started":
223+
if name := extractContainerName(note); name != "" {
224+
return fmt.Sprintf("Container %s started.", name), InstanceGroupEventSeverityInfo
225+
}
208226
return "Container started.", InstanceGroupEventSeverityInfo
209227

210228
case "Killing":
@@ -214,6 +232,9 @@ func translateReasonAndNote(reason, note, eventType string) (string, InstanceGro
214232
if strings.Contains(noteLower, "preempt") {
215233
return "Instance is being shut down to make room for higher-priority workloads.", InstanceGroupEventSeverityWarning
216234
}
235+
if name := extractContainerName(note); name != "" {
236+
return fmt.Sprintf("Container %s is being terminated.", name), InstanceGroupEventSeverityInfo
237+
}
217238
return "Instance is being terminated.", InstanceGroupEventSeverityInfo
218239

219240
// --- Probes ---
@@ -247,9 +268,15 @@ func translateReasonAndNote(reason, note, eventType string) (string, InstanceGro
247268

248269
// --- ReplicaSet ---
249270
case "SuccessfulCreate":
271+
if name := extractPodName(note); name != "" {
272+
return fmt.Sprintf("New instance %s created.", name), InstanceGroupEventSeverityInfo
273+
}
250274
return "New instance created by the instance group.", InstanceGroupEventSeverityInfo
251275

252276
case "SuccessfulDelete":
277+
if name := extractPodName(note); name != "" {
278+
return fmt.Sprintf("Instance %s removed.", name), InstanceGroupEventSeverityInfo
279+
}
253280
return "Instance removed from the instance group.", InstanceGroupEventSeverityInfo
254281

255282
case "FailedCreate":
@@ -286,9 +313,9 @@ func classifySchedulingFailure(note string) string {
286313

287314
switch {
288315
case strings.Contains(noteLower, "insufficient memory"):
289-
return "Unable to start instance: not enough memory available in the cluster. Consider reducing the memory request in your nais.yaml."
316+
return "Unable to start instance: not enough memory available in the cluster."
290317
case strings.Contains(noteLower, "insufficient cpu"):
291-
return "Unable to start instance: not enough CPU available in the cluster. Consider reducing the CPU request in your nais.yaml."
318+
return "Unable to start instance: not enough CPU available in the cluster."
292319
case strings.Contains(noteLower, "persistentvolumeclaim"):
293320
return "Unable to start instance: a required storage volume is not available."
294321
case strings.Contains(noteLower, "taint") || strings.Contains(noteLower, "toleration"):
@@ -394,15 +421,85 @@ func extractResourceName(note, keyword string) string {
394421
}
395422

396423
// classifyRescale provides a user-friendly message for autoscale events.
424+
// K8s note format: "New size: 5; reason: cpu resource utilization (percentage of request) above target"
397425
func classifyRescale(note string) string {
398426
noteLower := strings.ToLower(note)
427+
size := extractRescaleSize(note)
399428

400429
switch {
401430
case strings.Contains(noteLower, "above"):
431+
if size != "" {
432+
return fmt.Sprintf("Autoscaler scaled up to %s instances due to high resource usage.", size)
433+
}
402434
return "Autoscaler increased instance count due to high resource usage."
403435
case strings.Contains(noteLower, "below"):
436+
if size != "" {
437+
return fmt.Sprintf("Autoscaler scaled down to %s instances due to low resource usage.", size)
438+
}
404439
return "Autoscaler decreased instance count due to low resource usage."
405440
default:
406441
return fmt.Sprintf("Autoscaler adjusted instance count: %s", note)
407442
}
408443
}
444+
445+
// extractQuotedValue extracts the first double-quoted value from a string.
446+
// For example, from `Pulling image "ghcr.io/navikt/myapp:v1.2.3"` it extracts `ghcr.io/navikt/myapp:v1.2.3`.
447+
func extractQuotedValue(s string) string {
448+
start := strings.IndexByte(s, '"')
449+
if start == -1 {
450+
return ""
451+
}
452+
rest := s[start+1:]
453+
end := strings.IndexByte(rest, '"')
454+
if end == -1 {
455+
return ""
456+
}
457+
return rest[:end]
458+
}
459+
460+
// extractContainerName extracts a container name from K8s event notes.
461+
// Handles formats like "Created container myapp" and "Stopping container myapp".
462+
func extractContainerName(note string) string {
463+
const prefix = "container "
464+
noteLower := strings.ToLower(note)
465+
idx := strings.LastIndex(noteLower, prefix)
466+
if idx == -1 {
467+
return ""
468+
}
469+
name := strings.TrimSpace(note[idx+len(prefix):])
470+
if name == "" {
471+
return ""
472+
}
473+
return name
474+
}
475+
476+
// extractPodName extracts a pod name from K8s event notes.
477+
// Handles formats like "Created pod: myapp-abc123-xyz" and "Deleted pod: myapp-abc123-xyz".
478+
func extractPodName(note string) string {
479+
const prefix = "pod: "
480+
idx := strings.Index(strings.ToLower(note), prefix)
481+
if idx == -1 {
482+
return ""
483+
}
484+
name := strings.TrimSpace(note[idx+len(prefix):])
485+
if name == "" {
486+
return ""
487+
}
488+
return name
489+
}
490+
491+
// extractRescaleSize extracts the new size from a rescale event note.
492+
// Handles format like "New size: 5; reason: ..."
493+
func extractRescaleSize(note string) string {
494+
const prefix = "New size: "
495+
idx := strings.Index(note, prefix)
496+
if idx == -1 {
497+
return ""
498+
}
499+
rest := note[idx+len(prefix):]
500+
end := strings.IndexAny(rest, ";, ")
501+
if end == -1 {
502+
return strings.TrimSpace(rest)
503+
}
504+
return strings.TrimSpace(rest[:end])
505+
}

0 commit comments

Comments
 (0)