Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions cmd/stackit-csi-plugin/main.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
package main

import (
"context"
"fmt"
"os"
"os/signal"
"syscall"

"github.com/spf13/cobra"
"github.com/spf13/pflag"
"github.com/stackitcloud/cloud-provider-stackit/pkg/metrics"
sdkconfig "github.com/stackitcloud/stackit-sdk-go/core/config"
"k8s.io/component-base/cli"
"k8s.io/klog/v2"

Expand All @@ -21,7 +26,7 @@ var (
endpoint string
cloudConfig string
cluster string
httpEndpoint string
metricsAddress string
provideControllerService bool
provideNodeService bool
)
Expand All @@ -31,7 +36,10 @@ func main() {
Use: "stackit-csi-plugin",
Short: "STACKIT block-storage CSI plugin",
Run: func(_ *cobra.Command, _ []string) {
handle()
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
defer cancel()

handle(ctx)
},
PersistentPreRunE: func(cmd *cobra.Command, _ []string) error {
f := cmd.Flags()
Expand Down Expand Up @@ -64,8 +72,8 @@ func main() {
cmd.Flags().StringVar(&cloudConfig, "cloud-config", "", "CSI driver cloud config. This option can be given multiple times")

cmd.PersistentFlags().StringVar(&cluster, "cluster", "", "The identifier of the cluster that the plugin is running in.")
cmd.PersistentFlags().StringVar(&httpEndpoint, "http-endpoint", "",
"The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:8080`)."+
cmd.PersistentFlags().StringVar(&metricsAddress, "metrics-address", "",
"The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:9090`)."+
"The default is empty string, which means the server is disabled.")

cmd.PersistentFlags().BoolVar(&provideControllerService, "provide-controller-service", true,
Expand All @@ -79,7 +87,14 @@ func main() {
os.Exit(code)
}

func handle() {
func handle(ctx context.Context) {
if metricsAddress != "" {
go func() {
if err := metrics.Run(ctx, metricsAddress); err != nil {
klog.Fatalf("Run metrics returned an error: %v", err)
}
}()
}
// Initialize cloud
d := blockstorage.NewDriver(&blockstorage.DriverOpts{
Endpoint: endpoint,
Expand All @@ -94,7 +109,15 @@ func handle() {
klog.Fatal(err)
}

iaasClient, err := stackit.CreateIaaSClient(&cfg)
iaasHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS)
if err != nil {
klog.Fatalf("create IaaS metrics HTTP client: %v", err)
}
iaasOpts := []sdkconfig.ConfigurationOption{
sdkconfig.WithHTTPClient(iaasHTTPClient),
}

iaasClient, err := stackit.CreateIaaSClient(&cfg, iaasOpts...)
if err != nil {
klog.Fatalf("Failed to create IaaS client: %v", err)
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/onsi/ginkgo/v2 v2.30.0
github.com/onsi/gomega v1.41.0
github.com/prometheus/client_golang v1.23.2
github.com/prometheus/client_model v0.6.2
github.com/spf13/cobra v1.10.2
github.com/spf13/pflag v1.0.10
github.com/stackitcloud/stackit-sdk-go/core v0.26.0
Expand Down Expand Up @@ -88,7 +89,6 @@ require (
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.67.5 // indirect
github.com/prometheus/procfs v0.19.2 // indirect
github.com/stackitcloud/stackit-sdk-go/services/resourcemanager v0.24.0 // indirect
Expand Down
12 changes: 10 additions & 2 deletions pkg/ccm/stackit.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,12 @@ func BuildObservability() (*MetricsRemoteWrite, error) {

// NewCloudControllerManager creates a new instance of the stackit struct from a stackitconfig struct
func NewCloudControllerManager(cfg *stackitconfig.CCMConfig, obs *MetricsRemoteWrite) (*CloudControllerManager, error) {
lbHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameLoadBalancer)
if err != nil {
return nil, fmt.Errorf("create load balancer metrics HTTP client: %w", err)
}
lbOpts := []sdkconfig.ConfigurationOption{
sdkconfig.WithHTTPClient(metrics.NewInstrumentedHTTPClient()),
sdkconfig.WithHTTPClient(lbHTTPClient),
}

if cfg.Global.APIEndpoints.LoadBalancerAPI != "" {
Expand All @@ -144,8 +148,12 @@ func NewCloudControllerManager(cfg *stackitconfig.CCMConfig, obs *MetricsRemoteW
return nil, err
}

iaasHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS)
if err != nil {
return nil, fmt.Errorf("create IaaS metrics HTTP client: %w", err)
}
iaasOpts := []sdkconfig.ConfigurationOption{
sdkconfig.WithHTTPClient(metrics.NewInstrumentedHTTPClient()),
sdkconfig.WithHTTPClient(iaasHTTPClient),
}

if cfg.Global.APIEndpoints.IaasAPI != "" {
Expand Down
38 changes: 29 additions & 9 deletions pkg/metrics/http.go
Original file line number Diff line number Diff line change
@@ -1,21 +1,31 @@
package metrics

import (
"errors"
"fmt"
"net/http"
"strconv"
"strings"
"time"

"github.com/prometheus/client_golang/prometheus"
)

func NewInstrumentedHTTPClient() *http.Client {
return &http.Client{
Transport: &InstrumentedRoundTripper{http.DefaultTransport},
func NewInstrumentedHTTPClient(api string) (*http.Client, error) {
if api == "" {
return nil, errors.New("api name is required")
}

return &http.Client{
Transport: &InstrumentedRoundTripper{
api: api,
base: http.DefaultTransport,
},
}, nil
}

type InstrumentedRoundTripper struct {
api string
base http.RoundTripper
}

Expand All @@ -26,15 +36,25 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp
response, err := rt.base.RoundTrip(request)
duration := time.Since(startTime)

LoadBalancerResponseTimeHistogram.
With(prometheus.Labels{operationLabel: operation}).
HTTPRequestDurationHistogram.
With(prometheus.Labels{
apiLabel: rt.api,
operationLabel: operation,
}).
Observe(float64(duration.Seconds()))
LoadBalancerRequestCount.
With(prometheus.Labels{operationLabel: operation}).
HTTPRequestCount.
With(prometheus.Labels{
apiLabel: rt.api,
operationLabel: operation,
}).
Inc()

if response != nil && response.StatusCode >= http.StatusInternalServerError {
LoadBalancerErrorCount.Inc()
if response != nil && response.StatusCode >= 400 {
HTTPErrorCount.With(prometheus.Labels{
apiLabel: rt.api,
methodLabel: request.Method,
codeLabel: strconv.Itoa(response.StatusCode),
}).Inc()
}

return response, err
Expand Down
140 changes: 140 additions & 0 deletions pkg/metrics/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@ package metrics

import (
"net/http"
"net/http/httptest"
"net/url"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
dto "github.com/prometheus/client_model/go"
)

var _ = Describe("Metrics", func() {
Expand All @@ -22,4 +26,140 @@ var _ = Describe("Metrics", func() {
Entry("get load-balancers", "GET", "/v2/projects/6-a-4-8-c/regions/eu01/load-balancers", "get_load-balancers"),
Entry("get load-balancers instance", "GET", "/v2/projects/6-a-4-8-c/regions/eu01/load-balancers/id", "get_load-balancers_instance"),
)

Describe("InstrumentedRoundTripper", func() {
It("requires an API name", func() {
client, err := NewInstrumentedHTTPClient("")
Expect(err).To(MatchError("api name is required"))
Expect(client).To(BeNil())
})

It("increments HTTPRequestCount for responses", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
}))
defer server.Close()

labels := prometheus.Labels{
apiLabel: "test",
operationLabel: "get_request-count-test",
}
before := testutil.ToFloat64(HTTPRequestCount.With(labels))

client, err := NewInstrumentedHTTPClient("test")
Expect(err).NotTo(HaveOccurred())

response, err := client.Get(server.URL + "/request-count-test")
Expect(err).NotTo(HaveOccurred())
defer response.Body.Close()

after := testutil.ToFloat64(HTTPRequestCount.With(labels))
Expect(after - before).To(Equal(float64(1)))
})

It("records HTTPRequestDurationHistogram observations for responses", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
}))
defer server.Close()

labels := prometheus.Labels{
apiLabel: "test",
operationLabel: "get_request-duration-test",
}
before := histogramSampleCount(HTTPRequestDurationHistogram.With(labels))

client, err := NewInstrumentedHTTPClient("test")
Expect(err).NotTo(HaveOccurred())

response, err := client.Get(server.URL + "/request-duration-test")
Expect(err).NotTo(HaveOccurred())
defer response.Body.Close()

after := histogramSampleCount(HTTPRequestDurationHistogram.With(labels))
Expect(after - before).To(Equal(uint64(1)))
})

It("increments HTTPErrorCount for 400 responses", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusBadRequest)
}))
defer server.Close()

labels := prometheus.Labels{
apiLabel: "test",
methodLabel: http.MethodGet,
codeLabel: "400",
}
before := testutil.ToFloat64(HTTPErrorCount.With(labels))

client, err := NewInstrumentedHTTPClient("test")
Expect(err).NotTo(HaveOccurred())

response, err := client.Get(server.URL)
Expect(err).NotTo(HaveOccurred())
defer response.Body.Close()

after := testutil.ToFloat64(HTTPErrorCount.With(labels))
Expect(after - before).To(Equal(float64(1)))
})

It("increments HTTPErrorCount for 500 responses", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer server.Close()

labels := prometheus.Labels{
apiLabel: "test",
methodLabel: http.MethodPost,
codeLabel: "500",
}
before := testutil.ToFloat64(HTTPErrorCount.With(labels))

client, err := NewInstrumentedHTTPClient("test")
Expect(err).NotTo(HaveOccurred())

response, err := client.Post(server.URL, "application/json", nil)
Expect(err).NotTo(HaveOccurred())
defer response.Body.Close()

after := testutil.ToFloat64(HTTPErrorCount.With(labels))
Expect(after - before).To(Equal(float64(1)))
})

It("does not increment HTTPErrorCount for successful responses", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
}))
defer server.Close()

labels := prometheus.Labels{
apiLabel: "test",
methodLabel: http.MethodGet,
codeLabel: "200",
}
before := testutil.ToFloat64(HTTPErrorCount.With(labels))

client, err := NewInstrumentedHTTPClient("test")
Expect(err).NotTo(HaveOccurred())

response, err := client.Get(server.URL)
Expect(err).NotTo(HaveOccurred())
defer response.Body.Close()

after := testutil.ToFloat64(HTTPErrorCount.With(labels))
Expect(after - before).To(Equal(float64(0)))
})
})
})

func histogramSampleCount(observer prometheus.Observer) uint64 {
metric, ok := observer.(prometheus.Metric)
Expect(ok).To(BeTrue())

dtoMetric := &dto.Metric{}
Expect(metric.Write(dtoMetric)).To(Succeed())

return dtoMetric.GetHistogram().GetSampleCount()
}
Loading