Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions internal/controller/metrics.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,45 @@
package controller

import (
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

const (
METIC_PREFIX = "node_disruption_controller_"

NodeDisruptionReconcileController = "NodeDisruption"

ReconcileResultSuccess = "success"
ReconcileResultError = "error"
)

var (
// NODE DISRUPTION METRICS
NodeDisruptionReconcileTotal = promauto.With(metrics.Registry).NewCounterVec(
prometheus.CounterOpts{
Name: METIC_PREFIX + "reconcile_total",
Help: "Total number of node disruption controller reconciliations by result",
},
[]string{"controller", "result"},
)
NodeDisruptionLastSuccessfulReconcileTimestamp = promauto.With(metrics.Registry).NewGaugeVec(
prometheus.GaugeOpts{
Name: METIC_PREFIX + "last_successful_reconcile_timestamp_seconds",
Help: "Unix timestamp of the last successful node disruption controller reconciliation",
},
[]string{"controller"},
)
NodeDisruptionLastFailedReconcileTimestamp = promauto.With(metrics.Registry).NewGaugeVec(
prometheus.GaugeOpts{
Name: METIC_PREFIX + "last_failed_reconcile_timestamp_seconds",
Help: "Unix timestamp of the last failed node disruption controller reconciliation",
},
[]string{"controller"},
)
NodeDisruptionGrantedTotal = promauto.With(metrics.Registry).NewCounterVec(
prometheus.CounterOpts{
Name: METIC_PREFIX + "node_disruption_granted_total",
Expand Down Expand Up @@ -147,3 +175,14 @@ var (
[]string{"disruption_budget_namespace", "disruption_budget_name", "disruption_budget_kind", "node_disruption_name"},
)
)

func ObserveNodeDisruptionReconcile(result string) {
NodeDisruptionReconcileTotal.WithLabelValues(NodeDisruptionReconcileController, result).Inc()
now := float64(time.Now().Unix())
switch result {
case ReconcileResultSuccess:
NodeDisruptionLastSuccessfulReconcileTimestamp.WithLabelValues(NodeDisruptionReconcileController).Set(now)
case ReconcileResultError:
NodeDisruptionLastFailedReconcileTimestamp.WithLabelValues(NodeDisruptionReconcileController).Set(now)
}
}
20 changes: 15 additions & 5 deletions internal/controller/nodedisruption_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,19 @@ type NodeDisruptionReconciler struct {
// move the current state of the cluster closer to the desired state.
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.15.0/pkg/reconcile
func (r *NodeDisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
func (r *NodeDisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (clusterResult ctrl.Result, err error) {
logger := log.FromContext(ctx)

clusterResult := ctrl.Result{}
defer func() {
if err != nil {
ObserveNodeDisruptionReconcile(ReconcileResultError)
return
}
ObserveNodeDisruptionReconcile(ReconcileResultSuccess)
}()

nd := &nodedisruptionv1alpha1.NodeDisruption{}
err := r.Get(ctx, req.NamespacedName, nd)
err = r.Get(ctx, req.NamespacedName, nd)
if err != nil {
if errors.IsNotFound(err) {
PruneNodeDisruptionMetrics(req.Name)
Expand Down Expand Up @@ -119,12 +125,16 @@ func (r *NodeDisruptionReconciler) Reconcile(ctx context.Context, req ctrl.Reque

err = reconciler.Reconcile(ctx)
if err != nil {
return clusterResult, nil
return clusterResult, err
}

if !reflect.DeepEqual(nd.Status, reconciler.NodeDisruption.Status) {
logger.Info("Updating Status, done with", "state", reconciler.NodeDisruption.Status.State)
return clusterResult, reconciler.UpdateStatus(ctx)
err = reconciler.UpdateStatus(ctx)
if err != nil {
return clusterResult, err
}
return clusterResult, nil
}
logger.Info("Reconciliation successful", "state", reconciler.NodeDisruption.Status.State)
return clusterResult, nil
Expand Down
Loading