Skip to content

Commit 2cffe50

Browse files
hccheng72k8s-infra-cherrypick-robot
authored andcommitted
chore: refactor metrics pkg
1 parent cab110e commit 2cffe50

File tree

4 files changed

+240
-107
lines changed

4 files changed

+240
-107
lines changed

pkg/azurefile/controllerserver.go

Lines changed: 13 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ import (
4343
"k8s.io/utils/ptr"
4444
csiMetrics "sigs.k8s.io/azurefile-csi-driver/pkg/metrics"
4545
azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
46-
"sigs.k8s.io/cloud-provider-azure/pkg/metrics"
4746
"sigs.k8s.io/cloud-provider-azure/pkg/provider/storage"
4847
)
4948

@@ -527,14 +526,6 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
527526
}
528527
}
529528

530-
csiMC := csiMetrics.NewCSIMetricContext(requestName)
531-
isOperationSucceeded := false
532-
defer func() {
533-
csiMC.ObserveWithLabels(isOperationSucceeded,
534-
"protocol", string(shareProtocol),
535-
"storage_account_type", sku)
536-
}()
537-
538529
if sourceID != "" {
539530
_, srcAccountName, _, _, _, _, err = GetFileShareInfo(sourceID) //nolint:dogsled
540531
if err != nil {
@@ -583,9 +574,10 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
583574
SourceAccountName: srcAccountName,
584575
}
585576

586-
mc := metrics.NewMetricContext(azureFileCSIDriverName, requestName, d.cloud.ResourceGroup, subsID, d.Name)
577+
mc := csiMetrics.NewCSIMetricContext(requestName).WithBasicVolumeInfo(d.cloud.ResourceGroup, subsID, d.Name)
578+
isOperationSucceeded := false
587579
defer func() {
588-
mc.ObserveOperationWithResult(isOperationSucceeded, VolumeID, volumeID)
580+
mc.WithAdditionalVolumeInfo(VolumeID, volumeID).ObserveWithLabels(isOperationSucceeded, csiMetrics.Protocol, string(shareProtocol), csiMetrics.StorageAccountType, sku)
589581
}()
590582

591583
var accountKey, lockKey string
@@ -807,12 +799,6 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
807799

808800
// DeleteVolume delete an azure file
809801
func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest) (resp *csi.DeleteVolumeResponse, returnedErr error) {
810-
requestName := "controller_delete_volume"
811-
csiMC := csiMetrics.NewCSIMetricContext(requestName)
812-
defer func() {
813-
csiMC.Observe(returnedErr == nil)
814-
}()
815-
816802
volumeID := req.GetVolumeId()
817803
if len(volumeID) == 0 {
818804
return nil, status.Error(codes.InvalidArgument, "Volume ID missing in request")
@@ -857,9 +843,9 @@ func (d *Driver) DeleteVolume(ctx context.Context, req *csi.DeleteVolumeRequest)
857843
secret = createStorageAccountSecret(accountName, accountKey)
858844
}
859845

860-
mc := metrics.NewMetricContext(azureFileCSIDriverName, requestName, resourceGroupName, subsID, d.Name)
846+
mc := csiMetrics.NewCSIMetricContext("controller_delete_volume").WithBasicVolumeInfo(resourceGroupName, subsID, d.Name)
861847
defer func() {
862-
mc.ObserveOperationWithResult(returnedErr == nil, VolumeID, volumeID)
848+
mc.WithAdditionalVolumeInfo(VolumeID, volumeID).Observe(returnedErr == nil)
863849
}()
864850

865851
if err := d.DeleteFileShare(ctx, subsID, resourceGroupName, accountName, fileShareName, secret, useDataPlaneAPI); err != nil {
@@ -966,12 +952,6 @@ func (d *Driver) ControllerUnpublishVolume(_ context.Context, _ *csi.ControllerU
966952

967953
// CreateSnapshot create a snapshot
968954
func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequest) (resp *csi.CreateSnapshotResponse, returnedErr error) {
969-
requestName := "controller_create_snapshot"
970-
csiMC := csiMetrics.NewCSIMetricContext(requestName)
971-
defer func() {
972-
csiMC.Observe(returnedErr == nil)
973-
}()
974-
975955
sourceVolumeID := req.GetSourceVolumeId()
976956
snapshotName := req.Name
977957
if len(snapshotName) == 0 {
@@ -1009,9 +989,9 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ
1009989
useDataPlaneAPI = d.useDataPlaneAPI(ctx, sourceVolumeID, accountName)
1010990
}
1011991

1012-
mc := metrics.NewMetricContext(azureFileCSIDriverName, requestName, rgName, subsID, d.Name)
992+
mc := csiMetrics.NewCSIMetricContext("controller_create_snapshot").WithBasicVolumeInfo(rgName, subsID, d.Name)
1013993
defer func() {
1014-
mc.ObserveOperationWithResult(returnedErr == nil, SourceResourceID, sourceVolumeID, SnapshotName, snapshotName)
994+
mc.WithAdditionalVolumeInfo(SourceResourceID, sourceVolumeID, SnapshotName, snapshotName).Observe(returnedErr == nil)
1015995
}()
1016996

1017997
exists, itemSnapshot, itemSnapshotTime, itemSnapshotQuota, err := d.snapshotExists(ctx, sourceVolumeID, snapshotName, req.GetSecrets(), useDataPlaneAPI)
@@ -1121,13 +1101,6 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ
11211101

11221102
// DeleteSnapshot delete a snapshot (todo)
11231103
func (d *Driver) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequest) (*csi.DeleteSnapshotResponse, error) {
1124-
requestName := "controller_delete_snapshot"
1125-
csiMC := csiMetrics.NewCSIMetricContext(requestName)
1126-
isOperationSucceeded := false
1127-
defer func() {
1128-
csiMC.Observe(isOperationSucceeded)
1129-
}()
1130-
11311104
if len(req.SnapshotId) == 0 {
11321105
return nil, status.Error(codes.InvalidArgument, "Snapshot ID must be provided")
11331106
}
@@ -1149,9 +1122,10 @@ func (d *Driver) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequ
11491122
subsID = d.cloud.SubscriptionID
11501123
}
11511124

1152-
mc := metrics.NewMetricContext(azureFileCSIDriverName, requestName, rgName, subsID, d.Name)
1125+
mc := csiMetrics.NewCSIMetricContext("controller_delete_snapshot").WithBasicVolumeInfo(rgName, subsID, d.Name)
1126+
isOperationSucceeded := false
11531127
defer func() {
1154-
mc.ObserveOperationWithResult(isOperationSucceeded, SnapshotID, req.SnapshotId)
1128+
mc.WithAdditionalVolumeInfo(SnapshotID, req.SnapshotId).Observe(isOperationSucceeded)
11551129
}()
11561130

11571131
var deleteErr error
@@ -1319,13 +1293,6 @@ func (d *Driver) execAzcopyCopy(srcPath, dstPath string, azcopyCopyOptions, auth
13191293

13201294
// ControllerExpandVolume controller expand volume
13211295
func (d *Driver) ControllerExpandVolume(ctx context.Context, req *csi.ControllerExpandVolumeRequest) (*csi.ControllerExpandVolumeResponse, error) {
1322-
requestName := "controller_expand_volume"
1323-
csiMC := csiMetrics.NewCSIMetricContext(requestName)
1324-
isOperationSucceeded := false
1325-
defer func() {
1326-
csiMC.Observe(isOperationSucceeded)
1327-
}()
1328-
13291296
volumeID := req.GetVolumeId()
13301297
if len(volumeID) == 0 {
13311298
return nil, status.Error(codes.InvalidArgument, "Volume ID missing in request")
@@ -1364,9 +1331,10 @@ func (d *Driver) ControllerExpandVolume(ctx context.Context, req *csi.Controller
13641331
}
13651332
}
13661333

1367-
mc := metrics.NewMetricContext(azureFileCSIDriverName, requestName, resourceGroupName, subsID, d.Name)
1334+
mc := csiMetrics.NewCSIMetricContext("controller_expand_volume").WithBasicVolumeInfo(resourceGroupName, subsID, d.Name)
1335+
isOperationSucceeded := false
13681336
defer func() {
1369-
mc.ObserveOperationWithResult(isOperationSucceeded, VolumeID, volumeID)
1337+
mc.WithAdditionalVolumeInfo(VolumeID, volumeID).Observe(isOperationSucceeded)
13701338
}()
13711339

13721340
secrets := req.GetSecrets()

pkg/azurefile/nodeserver.go

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ func NewMountClient(cc *grpc.ClientConn) *MountClient {
6161

6262
// NodePublishVolume mount the volume from staging to target path
6363
func (d *Driver) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (resp *csi.NodePublishVolumeResponse, returnedErr error) {
64-
csiMC := csiMetrics.NewCSIMetricContext("node_publish_volume")
64+
mc := csiMetrics.NewCSIMetricContext("node_publish_volume")
6565
defer func() {
66-
csiMC.Observe(returnedErr == nil)
66+
mc.Observe(returnedErr == nil)
6767
}()
6868

6969
volCap := req.GetVolumeCapability()
@@ -205,9 +205,9 @@ func (d *Driver) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolu
205205

206206
// NodeUnpublishVolume unmount the volume from the target path
207207
func (d *Driver) NodeUnpublishVolume(_ context.Context, req *csi.NodeUnpublishVolumeRequest) (resp *csi.NodeUnpublishVolumeResponse, returnedErr error) {
208-
csiMC := csiMetrics.NewCSIMetricContext("node_unpublish_volume")
208+
mc := csiMetrics.NewCSIMetricContext("node_unpublish_volume")
209209
defer func() {
210-
csiMC.Observe(returnedErr == nil)
210+
mc.Observe(returnedErr == nil)
211211
}()
212212

213213
if len(req.GetVolumeId()) == 0 {
@@ -243,12 +243,6 @@ func (d *Driver) NodeUnpublishVolume(_ context.Context, req *csi.NodeUnpublishVo
243243

244244
// NodeStageVolume mount the volume to a staging path
245245
func (d *Driver) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (resp *csi.NodeStageVolumeResponse, returnedErr error) {
246-
requestName := "node_stage_volume"
247-
csiMC := csiMetrics.NewCSIMetricContext(requestName)
248-
defer func() {
249-
csiMC.Observe(returnedErr == nil)
250-
}()
251-
252246
if len(req.GetVolumeId()) == 0 {
253247
return nil, status.Error(codes.InvalidArgument, "Volume ID missing in request")
254248
}
@@ -278,9 +272,9 @@ func (d *Driver) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRe
278272
klog.V(2).Infof("CSI volume is read-only, mounting with extra option ro")
279273
}
280274

281-
mc := metrics.NewMetricContext(azureFileCSIDriverName, requestName, d.cloud.ResourceGroup, "", d.Name)
275+
mc := csiMetrics.NewCSIMetricContext("node_stage_volume").WithBasicVolumeInfo(d.cloud.ResourceGroup, "", d.Name)
282276
defer func() {
283-
mc.ObserveOperationWithResult(returnedErr == nil, VolumeID, volumeID)
277+
mc.WithAdditionalVolumeInfo(VolumeID, volumeID).Observe(returnedErr == nil)
284278
}()
285279

286280
_, accountName, accountKey, fileShareName, diskName, _, tenantID, tokenFilePath, err := d.GetAccountInfo(ctx, volumeID, req.GetSecrets(), context)
@@ -613,13 +607,6 @@ func (d *Driver) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRe
613607

614608
// NodeUnstageVolume unmount the volume from the staging path
615609
func (d *Driver) NodeUnstageVolume(_ context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) {
616-
requestName := "node_unstage_volume"
617-
csiMC := csiMetrics.NewCSIMetricContext(requestName)
618-
isOperationSucceeded := false
619-
defer func() {
620-
csiMC.Observe(isOperationSucceeded)
621-
}()
622-
623610
volumeID := req.GetVolumeId()
624611
if len(volumeID) == 0 {
625612
return nil, status.Error(codes.InvalidArgument, "Volume ID missing in request")
@@ -635,9 +622,10 @@ func (d *Driver) NodeUnstageVolume(_ context.Context, req *csi.NodeUnstageVolume
635622
}
636623
defer d.volumeLocks.Release(lockKey)
637624

638-
mc := metrics.NewMetricContext(azureFileCSIDriverName, requestName, d.cloud.ResourceGroup, "", d.Name)
625+
mc := csiMetrics.NewCSIMetricContext("node_unstage_volume").WithBasicVolumeInfo(d.cloud.ResourceGroup, "", d.Name)
626+
isOperationSucceeded := false
639627
defer func() {
640-
mc.ObserveOperationWithResult(isOperationSucceeded, VolumeID, volumeID)
628+
mc.WithAdditionalVolumeInfo(VolumeID, volumeID).Observe(isOperationSucceeded)
641629
}()
642630

643631
klog.V(2).Infof("NodeUnstageVolume: unmount volume %s on %s", volumeID, stagingTargetPath)
@@ -735,11 +723,11 @@ func (d *Driver) NodeGetVolumeStats(ctx context.Context, req *csi.NodeGetVolumeS
735723
klog.V(6).Infof("NodeGetVolumeStats: begin to get VolumeStats on volume %s path %s", req.VolumeId, req.VolumePath)
736724
}
737725

738-
mc := metrics.NewMetricContext(azureFileCSIDriverName, "node_get_volume_stats", d.cloud.ResourceGroup, "", d.Name)
739-
mc.LogLevel = 6 // change log level
726+
azureMC := metrics.NewMetricContext(azureFileCSIDriverName, "node_get_volume_stats", d.cloud.ResourceGroup, "", d.Name)
727+
azureMC.LogLevel = 6 // change log level
740728
isOperationSucceeded := false
741729
defer func() {
742-
mc.ObserveOperationWithResult(isOperationSucceeded, VolumeID, req.VolumeId)
730+
azureMC.ObserveOperationWithResult(isOperationSucceeded, VolumeID, req.VolumeId)
743731
}()
744732

745733
resp, err := GetVolumeStats(req.VolumePath, d.enableWindowsHostProcess)

pkg/metrics/metrics.go

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,20 @@ limitations under the License.
1717
package metrics
1818

1919
import (
20+
"strings"
2021
"time"
2122

2223
"k8s.io/component-base/metrics"
2324
"k8s.io/component-base/metrics/legacyregistry"
25+
klog "k8s.io/klog/v2"
2426
)
2527

2628
const (
2729
subSystem = "azurefile_csi_driver"
30+
31+
// Label keys for metrics
32+
Protocol = "protocol"
33+
StorageAccountType = "storage_account_type"
2834
)
2935

3036
var (
@@ -47,7 +53,7 @@ var (
4753
Buckets: []float64{0.1, 0.2, 0.5, 1, 5, 10, 15, 20, 30, 40, 50, 60, 100, 200, 300},
4854
StabilityLevel: metrics.ALPHA,
4955
},
50-
[]string{"operation", "success", "protocol", "storage_account_type"},
56+
[]string{"operation", "success", Protocol, StorageAccountType},
5157
)
5258

5359
operationTotal = metrics.NewCounterVec(
@@ -69,18 +75,48 @@ func init() {
6975

7076
// CSIMetricContext represents the context for CSI operation metrics
7177
type CSIMetricContext struct {
72-
operation string
73-
start time.Time
74-
labels map[string]string
78+
operation string
79+
volumeContext []interface{}
80+
start time.Time
81+
labels map[string]string
82+
logLevel int32
7583
}
7684

7785
// NewCSIMetricContext creates a new CSI metric context
7886
func NewCSIMetricContext(operation string) *CSIMetricContext {
7987
return &CSIMetricContext{
80-
operation: operation,
81-
start: time.Now(),
82-
labels: make(map[string]string),
88+
operation: operation,
89+
volumeContext: []interface{}{},
90+
start: time.Now(),
91+
labels: make(map[string]string),
92+
logLevel: 3,
93+
}
94+
}
95+
96+
// WithBasicVolumeInfo adds the standard volume-related context to the metric context
97+
func (mc *CSIMetricContext) WithBasicVolumeInfo(resourceGroup, subscriptionID, source string) *CSIMetricContext {
98+
if resourceGroup != "" {
99+
mc.volumeContext = append(mc.volumeContext, "resource_group", strings.ToLower(resourceGroup))
100+
}
101+
if subscriptionID != "" {
102+
mc.volumeContext = append(mc.volumeContext, "subscription_id", subscriptionID)
83103
}
104+
if source != "" {
105+
mc.volumeContext = append(mc.volumeContext, "source", source)
106+
}
107+
return mc
108+
}
109+
110+
// WithAdditionalVolumeInfo adds additional volume-related context as key-value pairs
111+
// e.g., WithAdditionalVolumeInfo("volumeid", "vol-123")
112+
func (mc *CSIMetricContext) WithAdditionalVolumeInfo(keyValuePairs ...string) *CSIMetricContext {
113+
if len(keyValuePairs)%2 != 0 {
114+
return mc
115+
}
116+
for i := 0; i < len(keyValuePairs); i += 2 {
117+
mc.volumeContext = append(mc.volumeContext, keyValuePairs[i], keyValuePairs[i+1])
118+
}
119+
return mc
84120
}
85121

86122
// WithLabel adds a label to the metric context
@@ -92,6 +128,12 @@ func (mc *CSIMetricContext) WithLabel(key, value string) *CSIMetricContext {
92128
return mc
93129
}
94130

131+
// WithLogLevel sets the log level for the metric context
132+
func (mc *CSIMetricContext) WithLogLevel(level int32) *CSIMetricContext {
133+
mc.logLevel = level
134+
return mc
135+
}
136+
95137
// Observe records the operation result and duration
96138
func (mc *CSIMetricContext) Observe(success bool) {
97139
duration := time.Since(mc.start).Seconds()
@@ -106,8 +148,8 @@ func (mc *CSIMetricContext) Observe(success bool) {
106148

107149
// Record detailed metrics if labels are present
108150
if len(mc.labels) > 0 {
109-
protocol := mc.labels["protocol"]
110-
storageAccountType := mc.labels["storage_account_type"]
151+
protocol := mc.labels[Protocol]
152+
storageAccountType := mc.labels[StorageAccountType]
111153

112154
operationDurationWithLabels.WithLabelValues(
113155
mc.operation,
@@ -116,6 +158,14 @@ func (mc *CSIMetricContext) Observe(success bool) {
116158
storageAccountType,
117159
).Observe(duration)
118160
}
161+
162+
logger := klog.Background().WithName("logLatency").V(int(mc.logLevel))
163+
if !logger.Enabled() {
164+
return
165+
}
166+
167+
keysAndValues := []interface{}{"latency_seconds", duration, "request", subSystem + "_" + mc.operation, "success", successStr}
168+
logger.Info("Observed Request Latency", append(keysAndValues, mc.volumeContext...)...)
119169
}
120170

121171
// ObserveWithLabels records the operation with provided label pairs

0 commit comments

Comments
 (0)