From 199453286a042c1151bbc015ab83b98926c2a302 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Mon, 15 Jun 2026 09:59:43 +0200 Subject: [PATCH 01/10] add genetric metrics for external API --- pkg/metrics/http.go | 8 ++++++-- pkg/metrics/metrics.go | 13 ++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 047032e6..10693936 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -3,6 +3,7 @@ package metrics import ( "fmt" "net/http" + "strconv" "strings" "time" @@ -33,8 +34,11 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp With(prometheus.Labels{operationLabel: operation}). Inc() - if response != nil && response.StatusCode >= http.StatusInternalServerError { - LoadBalancerErrorCount.Inc() + if response != nil && response.StatusCode >= 400 { + HTTPErrorCount.With(prometheus.Labels{ + "method": request.Method, + "code": strconv.Itoa(response.StatusCode), + }).Inc() } return response, err diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index dd21347f..aa6dca6c 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -19,13 +19,12 @@ var ( ConstLabels: nil, }, []string{operationLabel}) - LoadBalancerErrorCount = prometheus.NewCounter(prometheus.CounterOpts{ + HTTPErrorCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: cloudProviderMetricPrefix, - Subsystem: loadBalancerSubSystem, - Name: "errors_total", - Help: "the number of server errors reported when calling the load balancer API", + Name: "http_errors_total", + Help: "Number of HTTP errors returned by external APIs", ConstLabels: nil, - }) + }, []string{"method", "code"}) LoadBalancerResponseTimeHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: cloudProviderMetricPrefix, @@ -56,12 +55,12 @@ func (e *Exporter) Collect(metrics chan<- prometheus.Metric) { func (e *Exporter) describeCloudProvider(descs chan<- *prometheus.Desc) { LoadBalancerRequestCount.Describe(descs) - LoadBalancerErrorCount.Describe(descs) + HTTPErrorCount.Describe(descs) LoadBalancerResponseTimeHistogram.Describe(descs) } func (e *Exporter) collectCloudProvider(metrics chan<- prometheus.Metric) { LoadBalancerRequestCount.Collect(metrics) - LoadBalancerErrorCount.Collect(metrics) + HTTPErrorCount.Collect(metrics) LoadBalancerResponseTimeHistogram.Collect(metrics) } From 276c0f7c9ebe61e8c1b91dff64bd3c296a1eb091 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Mon, 15 Jun 2026 10:04:52 +0200 Subject: [PATCH 02/10] make fmt --- pkg/metrics/http.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 10693936..9c005146 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -36,8 +36,8 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp if response != nil && response.StatusCode >= 400 { HTTPErrorCount.With(prometheus.Labels{ - "method": request.Method, - "code": strconv.Itoa(response.StatusCode), + "method": request.Method, + "code": strconv.Itoa(response.StatusCode), }).Inc() } From 83fbb7c23221d9ace017ba67a30c11a818c5ff65 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Tue, 16 Jun 2026 16:38:00 +0200 Subject: [PATCH 03/10] add tests for the new http error metrics --- pkg/metrics/http_test.go | 65 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 1ffcc08f..9a12616e 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -2,10 +2,13 @@ package metrics import ( "net/http" + "net/http/httptest" "net/url" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" ) var _ = Describe("Metrics", func() { @@ -22,4 +25,66 @@ var _ = Describe("Metrics", func() { Entry("get load-balancers", "GET", "/v2/projects/6-a-4-8-c/regions/eu01/load-balancers", "get_load-balancers"), Entry("get load-balancers instance", "GET", "/v2/projects/6-a-4-8-c/regions/eu01/load-balancers/id", "get_load-balancers_instance"), ) + + Describe("InstrumentedRoundTripper", func() { + It("increments HTTPErrorCount for 400 responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + })) + defer server.Close() + + labels := prometheus.Labels{ + "method": http.MethodGet, + "code": "400", + } + before := testutil.ToFloat64(HTTPErrorCount.With(labels)) + + response, err := NewInstrumentedHTTPClient().Get(server.URL) + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := testutil.ToFloat64(HTTPErrorCount.With(labels)) + Expect(after - before).To(Equal(float64(1))) + }) + + It("increments HTTPErrorCount for 500 responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + labels := prometheus.Labels{ + "method": http.MethodPost, + "code": "500", + } + before := testutil.ToFloat64(HTTPErrorCount.With(labels)) + + response, err := NewInstrumentedHTTPClient().Post(server.URL, "application/json", nil) + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := testutil.ToFloat64(HTTPErrorCount.With(labels)) + Expect(after - before).To(Equal(float64(1))) + }) + + It("does not increment HTTPErrorCount for successful responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + labels := prometheus.Labels{ + "method": http.MethodGet, + "code": "200", + } + before := testutil.ToFloat64(HTTPErrorCount.With(labels)) + + response, err := NewInstrumentedHTTPClient().Get(server.URL) + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := testutil.ToFloat64(HTTPErrorCount.With(labels)) + Expect(after - before).To(Equal(float64(0))) + }) + }) }) From e7c4ef7ba7c7fb1d00cd8b3095c75d342e295718 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Tue, 16 Jun 2026 16:51:07 +0200 Subject: [PATCH 04/10] remove loadbalancer prefix from metrics names --- pkg/metrics/http.go | 4 ++-- pkg/metrics/metrics.go | 23 ++++++++++------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 9c005146..2782c7fc 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -27,10 +27,10 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp response, err := rt.base.RoundTrip(request) duration := time.Since(startTime) - LoadBalancerResponseTimeHistogram. + HTTPRequestDurationHistogram. With(prometheus.Labels{operationLabel: operation}). Observe(float64(duration.Seconds())) - LoadBalancerRequestCount. + HTTPRequestCount. With(prometheus.Labels{operationLabel: operation}). Inc() diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index aa6dca6c..963e3417 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -6,16 +6,14 @@ import ( const ( cloudProviderMetricPrefix = "cloud_provider_stackit" - loadBalancerSubSystem = "lb" operationLabel = "op" ) var ( - LoadBalancerRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + HTTPRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: cloudProviderMetricPrefix, - Subsystem: loadBalancerSubSystem, - Name: "requests_total", - Help: "the number of requests to the load balancer API", + Name: "http_requests_total", + Help: "The number of requests to external APIs", ConstLabels: nil, }, []string{operationLabel}) @@ -26,11 +24,10 @@ var ( ConstLabels: nil, }, []string{"method", "code"}) - LoadBalancerResponseTimeHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + HTTPRequestDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: cloudProviderMetricPrefix, - Subsystem: loadBalancerSubSystem, - Name: "request_duration_seconds", - Help: "the response times of the load balancer API", + Name: "http_request_duration_seconds", + Help: "The response times of external API requests", ConstLabels: nil, Buckets: nil, }, []string{operationLabel}) @@ -54,13 +51,13 @@ func (e *Exporter) Collect(metrics chan<- prometheus.Metric) { } func (e *Exporter) describeCloudProvider(descs chan<- *prometheus.Desc) { - LoadBalancerRequestCount.Describe(descs) + HTTPRequestCount.Describe(descs) HTTPErrorCount.Describe(descs) - LoadBalancerResponseTimeHistogram.Describe(descs) + HTTPRequestDurationHistogram.Describe(descs) } func (e *Exporter) collectCloudProvider(metrics chan<- prometheus.Metric) { - LoadBalancerRequestCount.Collect(metrics) + HTTPRequestCount.Collect(metrics) HTTPErrorCount.Collect(metrics) - LoadBalancerResponseTimeHistogram.Collect(metrics) + HTTPRequestDurationHistogram.Collect(metrics) } From 961c15eec39681bc0f750a45197d39e5942046e6 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Tue, 16 Jun 2026 17:11:06 +0200 Subject: [PATCH 05/10] add tests for HTTPRequestDurationHistogram and HTTPRequestCount --- pkg/metrics/http_test.go | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 9a12616e..93a81802 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -9,6 +9,7 @@ import ( . "github.com/onsi/gomega" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" + dto "github.com/prometheus/client_model/go" ) var _ = Describe("Metrics", func() { @@ -27,6 +28,44 @@ var _ = Describe("Metrics", func() { ) Describe("InstrumentedRoundTripper", func() { + It("increments HTTPRequestCount for responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + labels := prometheus.Labels{ + operationLabel: "get_request-count-test", + } + before := testutil.ToFloat64(HTTPRequestCount.With(labels)) + + response, err := NewInstrumentedHTTPClient().Get(server.URL + "/request-count-test") + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := testutil.ToFloat64(HTTPRequestCount.With(labels)) + Expect(after - before).To(Equal(float64(1))) + }) + + It("records HTTPRequestDurationHistogram observations for responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + labels := prometheus.Labels{ + operationLabel: "get_request-duration-test", + } + before := histogramSampleCount(HTTPRequestDurationHistogram.With(labels)) + + response, err := NewInstrumentedHTTPClient().Get(server.URL + "/request-duration-test") + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := histogramSampleCount(HTTPRequestDurationHistogram.With(labels)) + Expect(after - before).To(Equal(uint64(1))) + }) + It("increments HTTPErrorCount for 400 responses", func() { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusBadRequest) @@ -88,3 +127,13 @@ var _ = Describe("Metrics", func() { }) }) }) + +func histogramSampleCount(observer prometheus.Observer) uint64 { + metric, ok := observer.(prometheus.Metric) + Expect(ok).To(BeTrue()) + + dtoMetric := &dto.Metric{} + Expect(metric.Write(dtoMetric)).To(Succeed()) + + return dtoMetric.GetHistogram().GetSampleCount() +} From e7a91837b69eb7e166a1a954389de673d58ec5bf Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 17 Jun 2026 09:40:47 +0200 Subject: [PATCH 06/10] add api label from client creation --- pkg/ccm/stackit.go | 12 ++++++++++-- pkg/metrics/http.go | 26 +++++++++++++++++++++----- pkg/metrics/http_test.go | 36 +++++++++++++++++++++++++++++++----- pkg/metrics/metrics.go | 10 +++++++--- 4 files changed, 69 insertions(+), 15 deletions(-) diff --git a/pkg/ccm/stackit.go b/pkg/ccm/stackit.go index 94e72246..bf25bd68 100644 --- a/pkg/ccm/stackit.go +++ b/pkg/ccm/stackit.go @@ -119,8 +119,12 @@ func BuildObservability() (*MetricsRemoteWrite, error) { // NewCloudControllerManager creates a new instance of the stackit struct from a stackitconfig struct func NewCloudControllerManager(cfg *stackitconfig.CCMConfig, obs *MetricsRemoteWrite) (*CloudControllerManager, error) { + lbHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameLoadBalancer) + if err != nil { + return nil, fmt.Errorf("create load balancer metrics HTTP client: %w", err) + } lbOpts := []sdkconfig.ConfigurationOption{ - sdkconfig.WithHTTPClient(metrics.NewInstrumentedHTTPClient()), + sdkconfig.WithHTTPClient(lbHTTPClient), } if cfg.Global.APIEndpoints.LoadBalancerAPI != "" { @@ -144,8 +148,12 @@ func NewCloudControllerManager(cfg *stackitconfig.CCMConfig, obs *MetricsRemoteW return nil, err } + iaasHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS) + if err != nil { + return nil, fmt.Errorf("create IaaS metrics HTTP client: %w", err) + } iaasOpts := []sdkconfig.ConfigurationOption{ - sdkconfig.WithHTTPClient(metrics.NewInstrumentedHTTPClient()), + sdkconfig.WithHTTPClient(iaasHTTPClient), } if cfg.Global.APIEndpoints.IaasAPI != "" { diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 2782c7fc..1af6f787 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -1,6 +1,7 @@ package metrics import ( + "errors" "fmt" "net/http" "strconv" @@ -10,13 +11,21 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -func NewInstrumentedHTTPClient() *http.Client { - return &http.Client{ - Transport: &InstrumentedRoundTripper{http.DefaultTransport}, +func NewInstrumentedHTTPClient(api string) (*http.Client, error) { + if api == "" { + return nil, errors.New("api name is required") } + + return &http.Client{ + Transport: &InstrumentedRoundTripper{ + api: api, + base: http.DefaultTransport, + }, + }, nil } type InstrumentedRoundTripper struct { + api string base http.RoundTripper } @@ -28,14 +37,21 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp duration := time.Since(startTime) HTTPRequestDurationHistogram. - With(prometheus.Labels{operationLabel: operation}). + With(prometheus.Labels{ + apiLabel: rt.api, + operationLabel: operation, + }). Observe(float64(duration.Seconds())) HTTPRequestCount. - With(prometheus.Labels{operationLabel: operation}). + With(prometheus.Labels{ + apiLabel: rt.api, + operationLabel: operation, + }). Inc() if response != nil && response.StatusCode >= 400 { HTTPErrorCount.With(prometheus.Labels{ + apiLabel: rt.api, "method": request.Method, "code": strconv.Itoa(response.StatusCode), }).Inc() diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 93a81802..0a6308bd 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -28,6 +28,12 @@ var _ = Describe("Metrics", func() { ) Describe("InstrumentedRoundTripper", func() { + It("requires an API name", func() { + client, err := NewInstrumentedHTTPClient("") + Expect(err).To(MatchError("api name is required")) + Expect(client).To(BeNil()) + }) + It("increments HTTPRequestCount for responses", func() { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) @@ -35,11 +41,15 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", operationLabel: "get_request-count-test", } before := testutil.ToFloat64(HTTPRequestCount.With(labels)) - response, err := NewInstrumentedHTTPClient().Get(server.URL + "/request-count-test") + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Get(server.URL + "/request-count-test") Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() @@ -54,11 +64,15 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", operationLabel: "get_request-duration-test", } before := histogramSampleCount(HTTPRequestDurationHistogram.With(labels)) - response, err := NewInstrumentedHTTPClient().Get(server.URL + "/request-duration-test") + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Get(server.URL + "/request-duration-test") Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() @@ -73,12 +87,16 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", "method": http.MethodGet, "code": "400", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) - response, err := NewInstrumentedHTTPClient().Get(server.URL) + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Get(server.URL) Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() @@ -93,12 +111,16 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", "method": http.MethodPost, "code": "500", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) - response, err := NewInstrumentedHTTPClient().Post(server.URL, "application/json", nil) + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Post(server.URL, "application/json", nil) Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() @@ -113,12 +135,16 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", "method": http.MethodGet, "code": "200", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) - response, err := NewInstrumentedHTTPClient().Get(server.URL) + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Get(server.URL) Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 963e3417..6eb20520 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -6,7 +6,11 @@ import ( const ( cloudProviderMetricPrefix = "cloud_provider_stackit" + apiLabel = "api" operationLabel = "op" + + APINameLoadBalancer = "loadbalancer" + APINameIaaS = "iaas" ) var ( @@ -15,14 +19,14 @@ var ( Name: "http_requests_total", Help: "The number of requests to external APIs", ConstLabels: nil, - }, []string{operationLabel}) + }, []string{apiLabel, operationLabel}) HTTPErrorCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: cloudProviderMetricPrefix, Name: "http_errors_total", Help: "Number of HTTP errors returned by external APIs", ConstLabels: nil, - }, []string{"method", "code"}) + }, []string{apiLabel, "method", "code"}) HTTPRequestDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: cloudProviderMetricPrefix, @@ -30,7 +34,7 @@ var ( Help: "The response times of external API requests", ConstLabels: nil, Buckets: nil, - }, []string{operationLabel}) + }, []string{apiLabel, operationLabel}) ) type Exporter struct { From c6516174ba0062a73e36042724602d1bbeb791bd Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 17 Jun 2026 09:50:25 +0200 Subject: [PATCH 07/10] make mod tidy --- go.mod | 2 +- pkg/metrics/http.go | 6 +++--- pkg/metrics/http_test.go | 28 ++++++++++++++-------------- pkg/metrics/metrics.go | 4 +++- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/go.mod b/go.mod index 909ce101..2301a6be 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/onsi/ginkgo/v2 v2.30.0 github.com/onsi/gomega v1.41.0 github.com/prometheus/client_golang v1.23.2 + github.com/prometheus/client_model v0.6.2 github.com/spf13/cobra v1.10.2 github.com/spf13/pflag v1.0.10 github.com/stackitcloud/stackit-sdk-go/core v0.26.0 @@ -88,7 +89,6 @@ require ( github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.67.5 // indirect github.com/prometheus/procfs v0.19.2 // indirect github.com/stackitcloud/stackit-sdk-go/services/resourcemanager v0.24.0 // indirect diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 1af6f787..27f7028f 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -51,9 +51,9 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp if response != nil && response.StatusCode >= 400 { HTTPErrorCount.With(prometheus.Labels{ - apiLabel: rt.api, - "method": request.Method, - "code": strconv.Itoa(response.StatusCode), + apiLabel: rt.api, + methodLabel: request.Method, + codeLabel: strconv.Itoa(response.StatusCode), }).Inc() } diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 0a6308bd..62f3bdb2 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -35,7 +35,7 @@ var _ = Describe("Metrics", func() { }) It("increments HTTPRequestCount for responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) defer server.Close() @@ -58,7 +58,7 @@ var _ = Describe("Metrics", func() { }) It("records HTTPRequestDurationHistogram observations for responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) defer server.Close() @@ -81,15 +81,15 @@ var _ = Describe("Metrics", func() { }) It("increments HTTPErrorCount for 400 responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusBadRequest) })) defer server.Close() labels := prometheus.Labels{ - apiLabel: "test", - "method": http.MethodGet, - "code": "400", + apiLabel: "test", + methodLabel: http.MethodGet, + codeLabel: "400", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) @@ -105,15 +105,15 @@ var _ = Describe("Metrics", func() { }) It("increments HTTPErrorCount for 500 responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusInternalServerError) })) defer server.Close() labels := prometheus.Labels{ - apiLabel: "test", - "method": http.MethodPost, - "code": "500", + apiLabel: "test", + methodLabel: http.MethodPost, + codeLabel: "500", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) @@ -129,15 +129,15 @@ var _ = Describe("Metrics", func() { }) It("does not increment HTTPErrorCount for successful responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) defer server.Close() labels := prometheus.Labels{ - apiLabel: "test", - "method": http.MethodGet, - "code": "200", + apiLabel: "test", + methodLabel: http.MethodGet, + codeLabel: "200", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 6eb20520..f15ef09a 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -7,6 +7,8 @@ import ( const ( cloudProviderMetricPrefix = "cloud_provider_stackit" apiLabel = "api" + methodLabel = "method" + codeLabel = "code" operationLabel = "op" APINameLoadBalancer = "loadbalancer" @@ -26,7 +28,7 @@ var ( Name: "http_errors_total", Help: "Number of HTTP errors returned by external APIs", ConstLabels: nil, - }, []string{apiLabel, "method", "code"}) + }, []string{apiLabel, methodLabel, codeLabel}) HTTPRequestDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: cloudProviderMetricPrefix, From be0209169ff24978c0ec8184dc876f0826055d7c Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 17 Jun 2026 10:35:30 +0200 Subject: [PATCH 08/10] add metrics for csi --- cmd/stackit-csi-plugin/main.go | 12 +++++++++++- pkg/stackit/client.go | 3 ++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cmd/stackit-csi-plugin/main.go b/cmd/stackit-csi-plugin/main.go index d952bd60..d4de6691 100644 --- a/cmd/stackit-csi-plugin/main.go +++ b/cmd/stackit-csi-plugin/main.go @@ -6,6 +6,8 @@ import ( "github.com/spf13/cobra" "github.com/spf13/pflag" + "github.com/stackitcloud/cloud-provider-stackit/pkg/metrics" + sdkconfig "github.com/stackitcloud/stackit-sdk-go/core/config" "k8s.io/component-base/cli" "k8s.io/klog/v2" @@ -94,7 +96,15 @@ func handle() { klog.Fatal(err) } - iaasClient, err := stackit.CreateIaaSClient(&cfg) + iaasHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS) + if err != nil { + klog.Fatalf("create IaaS metrics HTTP client: %v", err) + } + iaasOpts := []sdkconfig.ConfigurationOption{ + sdkconfig.WithHTTPClient(iaasHTTPClient), + } + + iaasClient, err := stackit.CreateIaaSClient(&cfg, iaasOpts...) if err != nil { klog.Fatalf("Failed to create IaaS client: %v", err) } diff --git a/pkg/stackit/client.go b/pkg/stackit/client.go index 12960832..f676b6d4 100644 --- a/pkg/stackit/client.go +++ b/pkg/stackit/client.go @@ -171,7 +171,7 @@ func CreateSTACKITProvider(client iaas.DefaultAPI, cfg *stackitconfig.CSIConfig) return instance, nil } -func CreateIaaSClient(cfg *stackitconfig.CSIConfig) (iaas.DefaultAPI, error) { +func CreateIaaSClient(cfg *stackitconfig.CSIConfig, clientOpts ...sdkconfig.ConfigurationOption) (iaas.DefaultAPI, error) { var userAgent []string var opts []sdkconfig.ConfigurationOption userAgent = append(userAgent, fmt.Sprintf("%s/%s", "block-storage-csi-driver", version.Version)) @@ -186,6 +186,7 @@ func CreateIaaSClient(cfg *stackitconfig.CSIConfig) (iaas.DefaultAPI, error) { } opts = append(opts, sdkconfig.WithUserAgent(strings.Join(userAgent, " "))) + opts = append(opts, clientOpts...) client, err := iaas.NewAPIClient(opts...) if err != nil { From 810cd4d653a079b5cdefb92fbfa69c322ca70e54 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 17 Jun 2026 16:00:00 +0200 Subject: [PATCH 09/10] call metric server from csi/main.go --- cmd/stackit-csi-plugin/main.go | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/cmd/stackit-csi-plugin/main.go b/cmd/stackit-csi-plugin/main.go index d4de6691..ec9c9458 100644 --- a/cmd/stackit-csi-plugin/main.go +++ b/cmd/stackit-csi-plugin/main.go @@ -1,8 +1,11 @@ package main import ( + "context" "fmt" "os" + "os/signal" + "syscall" "github.com/spf13/cobra" "github.com/spf13/pflag" @@ -23,7 +26,7 @@ var ( endpoint string cloudConfig string cluster string - httpEndpoint string + metricsAddress string provideControllerService bool provideNodeService bool ) @@ -33,7 +36,10 @@ func main() { Use: "stackit-csi-plugin", Short: "STACKIT block-storage CSI plugin", Run: func(_ *cobra.Command, _ []string) { - handle() + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) + defer cancel() + + handle(ctx) }, PersistentPreRunE: func(cmd *cobra.Command, _ []string) error { f := cmd.Flags() @@ -66,8 +72,8 @@ func main() { cmd.Flags().StringVar(&cloudConfig, "cloud-config", "", "CSI driver cloud config. This option can be given multiple times") cmd.PersistentFlags().StringVar(&cluster, "cluster", "", "The identifier of the cluster that the plugin is running in.") - cmd.PersistentFlags().StringVar(&httpEndpoint, "http-endpoint", "", - "The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:8080`)."+ + cmd.PersistentFlags().StringVar(&metricsAddress, "metrics-address", "", + "The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:9090`)."+ "The default is empty string, which means the server is disabled.") cmd.PersistentFlags().BoolVar(&provideControllerService, "provide-controller-service", true, @@ -81,7 +87,14 @@ func main() { os.Exit(code) } -func handle() { +func handle(ctx context.Context) { + if metricsAddress != "" { + go func() { + if err := metrics.Run(ctx, metricsAddress); err != nil { + klog.Fatalf("Run metrics returned an error: %v", err) + } + }() + } // Initialize cloud d := blockstorage.NewDriver(&blockstorage.DriverOpts{ Endpoint: endpoint, From 1c459c3dd251f98ad74cc695d05afdf7d9c82712 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Thu, 18 Jun 2026 11:08:07 +0200 Subject: [PATCH 10/10] create prometheus metrics exporter --- cmd/stackit-csi-plugin/main.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/stackit-csi-plugin/main.go b/cmd/stackit-csi-plugin/main.go index ec9c9458..f1640dae 100644 --- a/cmd/stackit-csi-plugin/main.go +++ b/cmd/stackit-csi-plugin/main.go @@ -7,6 +7,7 @@ import ( "os/signal" "syscall" + "github.com/prometheus/client_golang/prometheus" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/stackitcloud/cloud-provider-stackit/pkg/metrics" @@ -73,7 +74,7 @@ func main() { cmd.PersistentFlags().StringVar(&cluster, "cluster", "", "The identifier of the cluster that the plugin is running in.") cmd.PersistentFlags().StringVar(&metricsAddress, "metrics-address", "", - "The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:9090`)."+ + "The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:8080`)."+ "The default is empty string, which means the server is disabled.") cmd.PersistentFlags().BoolVar(&provideControllerService, "provide-controller-service", true, @@ -89,6 +90,8 @@ func main() { func handle(ctx context.Context) { if metricsAddress != "" { + metricsExporter := metrics.NewExporter() + prometheus.MustRegister(metricsExporter) go func() { if err := metrics.Run(ctx, metricsAddress); err != nil { klog.Fatalf("Run metrics returned an error: %v", err)