Exclude socket metrics (#9770)

* exclude creation and exporting of socket metrics via flag * make exclude metric naming more consistent * fix connect time metric update * add documentation * e2e test * improve creation of metric mapping
2023-04-11 10:01:18 +02:00 · 2023-04-11 10:01:18 +02:00 · 4e8d0b5836
commit 4e8d0b5836
parent bd771997e0
10 changed files with 362 additions and 96 deletions
--- a/cmd/dataplane/main.go
+++ b/cmd/dataplane/main.go
@ -70,7 +70,7 @@ func main() {
 	mc := metric.NewDummyCollector()
 	if conf.EnableMetrics {
 		// TODO: Ingress class is not a part of dataplane anymore
-		mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets)
+		mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
 		if err != nil {
 			klog.Fatalf("Error creating prometheus collector:  %v", err)
 		}
--- a/cmd/nginx/main.go
+++ b/cmd/nginx/main.go
@ -133,7 +133,7 @@ func main() {

 	mc := metric.NewDummyCollector()
 	if conf.EnableMetrics {
-		mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets)
+		mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
 		if err != nil {
 			klog.Fatalf("Error creating prometheus collector:  %v", err)
 		}
--- a/docs/user-guide/cli-arguments.md
+++ b/docs/user-guide/cli-arguments.md
@ -25,6 +25,7 @@ They are set in the container spec of the `ingress-nginx-controller` Deployment
 | `--enable-ssl-chain-completion`    | Autocomplete SSL certificate chains with missing intermediate CA certificates. Certificates uploaded to Kubernetes must have the "Authority Information Access" X.509 v3 extension for this to succeed. (default false)|
 | `--enable-ssl-passthrough`         | Enable SSL Passthrough. (default false) |
 | `--enable-topology-aware-routing`  | Enable topology aware hints feature, needs service object annotation service.kubernetes.io/topology-aware-hints sets to auto. (default false) |
+| `--exclude-socket-metrics`         | Set of socket request metrics to exclude which won't be exported nor being calculated. The possible socket request metrics to exclude are documented in the monitoring guide e.g. 'nginx_ingress_controller_request_duration_seconds,nginx_ingress_controller_response_size'|
 | `--health-check-path`              | URL path of the health check endpoint. Configured inside the NGINX status server. All requests received on the port defined by the healthz-port parameter are forwarded internally to this path. (default "/healthz") |
 | `--health-check-timeout`           | Time limit, in seconds, for a probe to health-check-path to succeed. (default 10) |
 | `--healthz-port`                   | Port to use for the healthz endpoint. (default 10254) |
--- a/internal/ingress/controller/controller.go
+++ b/internal/ingress/controller/controller.go
@ -102,10 +102,11 @@ type Configuration struct {

 	EnableProfiling bool

-	EnableMetrics       bool
-	MetricsPerHost      bool
-	MetricsBuckets      *collectors.HistogramBuckets
-	ReportStatusClasses bool
+	EnableMetrics        bool
+	MetricsPerHost       bool
+	MetricsBuckets       *collectors.HistogramBuckets
+	ReportStatusClasses  bool
+	ExcludeSocketMetrics []string

 	FakeCertificate *ingress.SSLCert

--- a/internal/ingress/metric/collectors/socket.go
+++ b/internal/ingress/metric/collectors/socket.go
@ -21,6 +21,7 @@ import (
 	"io"
 	"net"
 	"os"
+	"strings"
 	"syscall"

 	jsoniter "github.com/json-iterator/go"
@ -60,6 +61,8 @@ type HistogramBuckets struct {
 	SizeBuckets   []float64
 }

+type metricMapping map[string]prometheus.Collector
+
 // SocketCollector stores prometheus metrics and ingress meta-data
 type SocketCollector struct {
 	prometheus.Collector
@ -78,7 +81,7 @@ type SocketCollector struct {

 	listener net.Listener

-	metricMapping map[string]interface{}
+	metricMapping metricMapping

 	hosts sets.Set[string]

@ -106,7 +109,7 @@ var defObjectives = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}

 // NewSocketCollector creates a new SocketCollector instance using
 // the ingress watch namespace and class used by the controller
-func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets) (*SocketCollector, error) {
+func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets, excludeMetrics []string) (*SocketCollector, error) {
 	socket := "/tmp/nginx/prometheus-nginx.socket"
 	// unix sockets must be unlink()ed before being used
 	_ = syscall.Unlink(socket)
@ -132,13 +135,23 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
 		requestTags = append(requestTags, "host")
 	}

+	em := make(map[string]struct{}, len(excludeMetrics))
+	for _, m := range excludeMetrics {
+		// remove potential nginx_ingress_controller prefix from the metric name
+		// TBD: how to handle fully qualified histogram metrics e.g. _buckets and _sum. Should we just remove the suffix and remove the histogram metric or ignore it?
+		em[strings.TrimPrefix(m, "nginx_ingress_controller_")] = struct{}{}
+	}
+
+	// create metric mapping with only the metrics that are not excluded
+	mm := make(metricMapping)
+
 	sc := &SocketCollector{
 		listener: listener,

 		metricsPerHost:      metricsPerHost,
 		reportStatusClasses: reportStatusClasses,

-		connectTime: prometheus.NewHistogramVec(
+		connectTime: histogramMetric(
 			prometheus.HistogramOpts{
 				Name:        "connect_duration_seconds",
 				Help:        "The time spent on establishing a connection with the upstream server",
@ -147,8 +160,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
 				Buckets:     buckets.TimeBuckets,
 			},
 			requestTags,
+			em,
+			mm,
 		),
-		headerTime: prometheus.NewHistogramVec(
+
+		headerTime: histogramMetric(
 			prometheus.HistogramOpts{
 				Name:        "header_duration_seconds",
 				Help:        "The time spent on receiving first header from the upstream server",
@ -157,8 +173,10 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
 				Buckets:     buckets.TimeBuckets,
 			},
 			requestTags,
+			em,
+			mm,
 		),
-		responseTime: prometheus.NewHistogramVec(
+		responseTime: histogramMetric(
 			prometheus.HistogramOpts{
 				Name:        "response_duration_seconds",
 				Help:        "The time spent on receiving the response from the upstream server",
@ -167,8 +185,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
 				Buckets:     buckets.TimeBuckets,
 			},
 			requestTags,
+			em,
+			mm,
 		),
-		requestTime: prometheus.NewHistogramVec(
+
+		requestTime: histogramMetric(
 			prometheus.HistogramOpts{
 				Name:        "request_duration_seconds",
 				Help:        "The request processing time in milliseconds",
@ -177,9 +198,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
 				Buckets:     buckets.TimeBuckets,
 			},
 			requestTags,
+			em,
+			mm,
 		),

-		responseLength: prometheus.NewHistogramVec(
+		responseLength: histogramMetric(
 			prometheus.HistogramOpts{
 				Name:        "response_size",
 				Help:        "The response length (including request line, header, and request body)",
@ -188,19 +211,24 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
 				Buckets:     buckets.LengthBuckets,
 			},
 			requestTags,
+			em,
+			mm,
 		),
-		requestLength: prometheus.NewHistogramVec(
+
+		requestLength: histogramMetric(
 			prometheus.HistogramOpts{
 				Name:        "request_size",
 				Help:        "The request length (including request line, header, and request body)",
 				Namespace:   PrometheusNamespace,
-				Buckets:     buckets.LengthBuckets,
 				ConstLabels: constLabels,
+				Buckets:     buckets.LengthBuckets,
 			},
 			requestTags,
+			em,
+			mm,
 		),

-		requests: prometheus.NewCounterVec(
+		requests: counterMetric(
 			prometheus.CounterOpts{
 				Name:        "requests",
 				Help:        "The total number of client requests",
@ -208,9 +236,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
 				ConstLabels: constLabels,
 			},
 			requestTags,
+			em,
+			mm,
 		),

-		bytesSent: prometheus.NewHistogramVec(
+		bytesSent: histogramMetric(
 			prometheus.HistogramOpts{
 				Name:        "bytes_sent",
 				Help:        "DEPRECATED The number of bytes sent to a client",
@ -219,9 +249,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
 				ConstLabels: constLabels,
 			},
 			requestTags,
+			em,
+			mm,
 		),

-		upstreamLatency: prometheus.NewSummaryVec(
+		upstreamLatency: summaryMetric(
 			prometheus.SummaryOpts{
 				Name:        "ingress_upstream_latency_seconds",
 				Help:        "DEPRECATED Upstream service latency per Ingress",
@ -230,28 +262,59 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
 				Objectives:  defObjectives,
 			},
 			[]string{"ingress", "namespace", "service", "canary"},
+			em,
+			mm,
 		),
 	}

-	sc.metricMapping = map[string]interface{}{
-		prometheus.BuildFQName(PrometheusNamespace, "", "requests"): sc.requests,
-
-		prometheus.BuildFQName(PrometheusNamespace, "", "connect_duration_seconds"):  sc.connectTime,
-		prometheus.BuildFQName(PrometheusNamespace, "", "header_duration_seconds"):   sc.headerTime,
-		prometheus.BuildFQName(PrometheusNamespace, "", "response_duration_seconds"): sc.responseTime,
-		prometheus.BuildFQName(PrometheusNamespace, "", "request_duration_seconds"):  sc.requestTime,
-
-		prometheus.BuildFQName(PrometheusNamespace, "", "request_size"):  sc.requestLength,
-		prometheus.BuildFQName(PrometheusNamespace, "", "response_size"): sc.responseLength,
-
-		prometheus.BuildFQName(PrometheusNamespace, "", "bytes_sent"): sc.bytesSent,
-
-		prometheus.BuildFQName(PrometheusNamespace, "", "ingress_upstream_latency_seconds"): sc.upstreamLatency,
-	}
-
+	sc.metricMapping = mm
 	return sc, nil
 }

+func containsMetric(excludeMetrics map[string]struct{}, name string) bool {
+	if _, ok := excludeMetrics[name]; ok {
+		klog.V(3).InfoS("Skipping metric", "metric", name)
+		return true
+	}
+	return false
+}
+
+func summaryMetric(opts prometheus.SummaryOpts, requestTags []string, excludeMetrics map[string]struct{}, metricMapping metricMapping) *prometheus.SummaryVec {
+	if containsMetric(excludeMetrics, opts.Name) {
+		return nil
+	}
+	m := prometheus.NewSummaryVec(
+		opts,
+		requestTags,
+	)
+	metricMapping[prometheus.BuildFQName(PrometheusNamespace, "", opts.Name)] = m
+	return m
+}
+
+func counterMetric(opts prometheus.CounterOpts, requestTags []string, excludeMetrics map[string]struct{}, metricMapping metricMapping) *prometheus.CounterVec {
+	if containsMetric(excludeMetrics, opts.Name) {
+		return nil
+	}
+	m := prometheus.NewCounterVec(
+		opts,
+		requestTags,
+	)
+	metricMapping[prometheus.BuildFQName(PrometheusNamespace, "", opts.Name)] = m
+	return m
+}
+
+func histogramMetric(opts prometheus.HistogramOpts, requestTags []string, excludeMetrics map[string]struct{}, metricMapping metricMapping) *prometheus.HistogramVec {
+	if containsMetric(excludeMetrics, opts.Name) {
+		return nil
+	}
+	m := prometheus.NewHistogramVec(
+		opts,
+		requestTags,
+	)
+	metricMapping[prometheus.BuildFQName(PrometheusNamespace, "", opts.Name)] = m
+	return m
+}
+
 func (sc *SocketCollector) handleMessage(msg []byte) {
 	klog.V(5).InfoS("Metric", "message", string(msg))

@ -305,30 +368,36 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
 			"canary":    stats.Canary,
 		}

-		requestsMetric, err := sc.requests.GetMetricWith(collectorLabels)
-		if err != nil {
-			klog.ErrorS(err, "Error fetching requests metric")
-		} else {
-			requestsMetric.Inc()
+		if sc.requests != nil {
+			requestsMetric, err := sc.requests.GetMetricWith(collectorLabels)
+			if err != nil {
+				klog.ErrorS(err, "Error fetching requests metric")
+			} else {
+				requestsMetric.Inc()
+			}
 		}

 		if stats.Latency != -1 {
-			connectTimeMetric, err := sc.connectTime.GetMetricWith(requestLabels)
-			if err != nil {
-				klog.ErrorS(err, "Error fetching connect time metric")
-			} else {
-				connectTimeMetric.Observe(stats.Latency)
+			if sc.connectTime != nil {
+				connectTimeMetric, err := sc.connectTime.GetMetricWith(requestLabels)
+				if err != nil {
+					klog.ErrorS(err, "Error fetching connect time metric")
+				} else {
+					connectTimeMetric.Observe(stats.Latency)
+				}
 			}

-			latencyMetric, err := sc.upstreamLatency.GetMetricWith(latencyLabels)
-			if err != nil {
-				klog.ErrorS(err, "Error fetching latency metric")
-			} else {
-				latencyMetric.Observe(stats.Latency)
+			if sc.upstreamLatency != nil {
+				latencyMetric, err := sc.upstreamLatency.GetMetricWith(latencyLabels)
+				if err != nil {
+					klog.ErrorS(err, "Error fetching latency metric")
+				} else {
+					latencyMetric.Observe(stats.Latency)
+				}
 			}
 		}

-		if stats.HeaderTime != -1 {
+		if stats.HeaderTime != -1 && sc.headerTime != nil {
 			headerTimeMetric, err := sc.headerTime.GetMetricWith(requestLabels)
 			if err != nil {
 				klog.ErrorS(err, "Error fetching header time metric")
@ -337,7 +406,7 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
 			}
 		}

-		if stats.RequestTime != -1 {
+		if stats.RequestTime != -1 && sc.requestTime != nil {
 			requestTimeMetric, err := sc.requestTime.GetMetricWith(requestLabels)
 			if err != nil {
 				klog.ErrorS(err, "Error fetching request duration metric")
@ -346,7 +415,7 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
 			}
 		}

-		if stats.RequestLength != -1 {
+		if stats.RequestLength != -1 && sc.requestLength != nil {
 			requestLengthMetric, err := sc.requestLength.GetMetricWith(requestLabels)
 			if err != nil {
 				klog.ErrorS(err, "Error fetching request length metric")
@ -355,7 +424,7 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
 			}
 		}

-		if stats.ResponseTime != -1 {
+		if stats.ResponseTime != -1 && sc.responseTime != nil {
 			responseTimeMetric, err := sc.responseTime.GetMetricWith(requestLabels)
 			if err != nil {
 				klog.ErrorS(err, "Error fetching upstream response time metric")
@ -365,18 +434,22 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
 		}

 		if stats.ResponseLength != -1 {
-			bytesSentMetric, err := sc.bytesSent.GetMetricWith(requestLabels)
-			if err != nil {
-				klog.ErrorS(err, "Error fetching bytes sent metric")
-			} else {
-				bytesSentMetric.Observe(stats.ResponseLength)
+			if sc.bytesSent != nil {
+				bytesSentMetric, err := sc.bytesSent.GetMetricWith(requestLabels)
+				if err != nil {
+					klog.ErrorS(err, "Error fetching bytes sent metric")
+				} else {
+					bytesSentMetric.Observe(stats.ResponseLength)
+				}
 			}

-			responseSizeMetric, err := sc.responseLength.GetMetricWith(requestLabels)
-			if err != nil {
-				klog.ErrorS(err, "Error fetching bytes sent metric")
-			} else {
-				responseSizeMetric.Observe(stats.ResponseLength)
+			if sc.responseLength != nil {
+				responseSizeMetric, err := sc.responseLength.GetMetricWith(requestLabels)
+				if err != nil {
+					klog.ErrorS(err, "Error fetching bytes sent metric")
+				} else {
+					responseSizeMetric.Observe(stats.ResponseLength)
+				}
 			}
 		}
 	}
@ -471,36 +544,16 @@ func (sc *SocketCollector) RemoveMetrics(ingresses []string, registry prometheus

 // Describe implements prometheus.Collector
 func (sc SocketCollector) Describe(ch chan<- *prometheus.Desc) {
-	sc.connectTime.Describe(ch)
-	sc.headerTime.Describe(ch)
-	sc.responseTime.Describe(ch)
-	sc.requestTime.Describe(ch)
-
-	sc.requestLength.Describe(ch)
-	sc.responseLength.Describe(ch)
-
-	sc.requests.Describe(ch)
-
-	sc.upstreamLatency.Describe(ch)
-
-	sc.bytesSent.Describe(ch)
+	for _, metric := range sc.metricMapping {
+		metric.Describe(ch)
+	}
 }

 // Collect implements the prometheus.Collector interface.
 func (sc SocketCollector) Collect(ch chan<- prometheus.Metric) {
-	sc.connectTime.Collect(ch)
-	sc.headerTime.Collect(ch)
-	sc.responseTime.Collect(ch)
-	sc.requestTime.Collect(ch)
-
-	sc.requestLength.Collect(ch)
-	sc.responseLength.Collect(ch)
-
-	sc.requests.Collect(ch)
-
-	sc.upstreamLatency.Collect(ch)
-
-	sc.bytesSent.Collect(ch)
+	for _, metric := range sc.metricMapping {
+		metric.Collect(ch)
+	}
 }

 // SetHosts sets the hostnames that are being served by the ingress controller
--- a/internal/ingress/metric/collectors/socket_test.go
+++ b/internal/ingress/metric/collectors/socket_test.go
@ -84,6 +84,7 @@ func TestCollector(t *testing.T) {
 		data             []string
 		metrics          []string
 		useStatusClasses bool
+		excludeMetrics   []string
 		wantBefore       string
 		removeIngresses  []string
 		wantAfter        string
@ -470,13 +471,126 @@ func TestCollector(t *testing.T) {
 			wantAfter: `
 			`,
 		},
+		{
+			name: "basic exclude metrics test",
+			data: []string{`[{
+				"host":"testshop.com",
+				"status":"200",
+				"bytesSent":150.0,
+				"method":"GET",
+				"path":"/admin",
+				"requestLength":300.0,
+				"requestTime":60.0,
+				"upstreamLatency":1.0,
+				"upstreamHeaderTime":5.0,
+				"upstreamName":"test-upstream",
+				"upstreamIP":"1.1.1.1:8080",
+				"upstreamResponseTime":200,
+				"upstreamStatus":"220",
+				"namespace":"test-app-production",
+				"ingress":"web-yml",
+				"service":"test-app",
+				"canary":""
+			}]`},
+			excludeMetrics:   []string{"nginx_ingress_controller_connect_duration_seconds"},
+			metrics:          []string{"nginx_ingress_controller_connect_duration_seconds", "nginx_ingress_controller_response_duration_seconds"},
+			useStatusClasses: true,
+			wantBefore: `
+				# HELP nginx_ingress_controller_response_duration_seconds The time spent on receiving the response from the upstream server
+				# TYPE nginx_ingress_controller_response_duration_seconds histogram
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.005"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.01"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.025"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.05"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.1"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.25"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.5"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="1"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="2.5"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="5"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="10"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="+Inf"} 1
+				nginx_ingress_controller_response_duration_seconds_sum{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 200
+				nginx_ingress_controller_response_duration_seconds_count{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
+			`,
+		},
+		{
+			name: "remove metrics with the short metric name",
+			data: []string{`[{
+				"host":"testshop.com",
+				"status":"200",
+				"bytesSent":150.0,
+				"method":"GET",
+				"path":"/admin",
+				"requestLength":300.0,
+				"requestTime":60.0,
+				"upstreamLatency":1.0,
+				"upstreamHeaderTime":5.0,
+				"upstreamName":"test-upstream",
+				"upstreamIP":"1.1.1.1:8080",
+				"upstreamResponseTime":200,
+				"upstreamStatus":"220",
+				"namespace":"test-app-production",
+				"ingress":"web-yml",
+				"service":"test-app",
+				"canary":""
+			}]`},
+			excludeMetrics:   []string{"response_duration_seconds"},
+			metrics:          []string{"nginx_ingress_controller_response_duration_seconds"},
+			useStatusClasses: true,
+			wantBefore: `
+			`,
+		},
+		{
+			name: "exclude metrics make sure to only remove exactly matched metrics",
+			data: []string{`[{
+				"host":"testshop.com",
+				"status":"200",
+				"bytesSent":150.0,
+				"method":"GET",
+				"path":"/admin",
+				"requestLength":300.0,
+				"requestTime":60.0,
+				"upstreamLatency":1.0,
+				"upstreamHeaderTime":5.0,
+				"upstreamName":"test-upstream",
+				"upstreamIP":"1.1.1.1:8080",
+				"upstreamResponseTime":200,
+				"upstreamStatus":"220",
+				"namespace":"test-app-production",
+				"ingress":"web-yml",
+				"service":"test-app",
+				"canary":""
+			}]`},
+			excludeMetrics:   []string{"response_duration_seconds2", "test.*", "nginx_ingress_.*", "response_duration_secon"},
+			metrics:          []string{"nginx_ingress_controller_response_duration_seconds"},
+			useStatusClasses: true,
+			wantBefore: `
+				# HELP nginx_ingress_controller_response_duration_seconds The time spent on receiving the response from the upstream server
+				# TYPE nginx_ingress_controller_response_duration_seconds histogram
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.005"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.01"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.025"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.05"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.1"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.25"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.5"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="1"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="2.5"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="5"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="10"} 0
+				nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="+Inf"} 1
+				nginx_ingress_controller_response_duration_seconds_sum{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 200
+				nginx_ingress_controller_response_duration_seconds_count{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
+			`,
+		},
 	}

 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
 			registry := prometheus.NewPedanticRegistry()

-			sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets)
+			sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets, c.excludeMetrics)
 			if err != nil {
 				t.Errorf("%v: unexpected error creating new SocketCollector: %v", c.name, err)
 			}
--- a/internal/ingress/metric/main.go
+++ b/internal/ingress/metric/main.go
@ -71,7 +71,7 @@ type collector struct {
 }

 // NewCollector creates a new metric collector the for ingress controller
-func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets) (Collector, error) {
+func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, excludedSocketMetrics []string) (Collector, error) {
 	podNamespace := os.Getenv("POD_NAMESPACE")
 	if podNamespace == "" {
 		podNamespace = "default"
@ -89,7 +89,7 @@ func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus
 		return nil, err
 	}

-	s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets)
+	s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets, excludedSocketMetrics)
 	if err != nil {
 		return nil, err
 	}
--- a/pkg/flags/flags.go
+++ b/pkg/flags/flags.go
@ -171,10 +171,11 @@ Requires the update-status parameter.`)
 		reportStatusClasses = flags.Bool("report-status-classes", false,
 			`Use status classes (2xx, 3xx, 4xx and 5xx) instead of status codes in metrics.`)

-		timeBuckets         = flags.Float64Slice("time-buckets", prometheus.DefBuckets, "Set of buckets which will be used for prometheus histogram metrics such as RequestTime, ResponseTime.")
-		lengthBuckets       = flags.Float64Slice("length-buckets", prometheus.LinearBuckets(10, 10, 10), "Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength.")
-		sizeBuckets         = flags.Float64Slice("size-buckets", prometheus.ExponentialBuckets(10, 10, 7), "Set of buckets which will be used for prometheus histogram metrics such as BytesSent.")
-		monitorMaxBatchSize = flags.Int("monitor-max-batch-size", 10000, "Max batch size of NGINX metrics.")
+		timeBuckets          = flags.Float64Slice("time-buckets", prometheus.DefBuckets, "Set of buckets which will be used for prometheus histogram metrics such as RequestTime, ResponseTime.")
+		lengthBuckets        = flags.Float64Slice("length-buckets", prometheus.LinearBuckets(10, 10, 10), "Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength.")
+		sizeBuckets          = flags.Float64Slice("size-buckets", prometheus.ExponentialBuckets(10, 10, 7), "Set of buckets which will be used for prometheus histogram metrics such as BytesSent.")
+		excludeSocketMetrics = flags.StringSlice("exclude-socket-metrics", []string{}, "et of socket request metrics to exclude which won't be exported nor being calculated. E.g. 'nginx_ingress_controller_success,nginx_ingress_controller_header_duration_seconds'.")
+		monitorMaxBatchSize  = flags.Int("monitor-max-batch-size", 10000, "Max batch size of NGINX metrics.")

 		httpPort  = flags.Int("http-port", 80, `Port to use for servicing HTTP traffic.`)
 		httpsPort = flags.Int("https-port", 443, `Port to use for servicing HTTPS traffic.`)
@ -328,6 +329,7 @@ https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-g
 		MetricsPerHost:              *metricsPerHost,
 		MetricsBuckets:              histogramBuckets,
 		ReportStatusClasses:         *reportStatusClasses,
+		ExcludeSocketMetrics:        *excludeSocketMetrics,
 		MonitorMaxBatchSize:         *monitorMaxBatchSize,
 		DisableServiceExternalName:  *disableServiceExternalName,
 		EnableSSLPassthrough:        *enableSSLPassthrough,
--- a/test/e2e/e2e.go
+++ b/test/e2e/e2e.go
@ -40,6 +40,7 @@ import (
 	_ "k8s.io/ingress-nginx/test/e2e/leaks"
 	_ "k8s.io/ingress-nginx/test/e2e/loadbalance"
 	_ "k8s.io/ingress-nginx/test/e2e/lua"
+	_ "k8s.io/ingress-nginx/test/e2e/metrics"
 	_ "k8s.io/ingress-nginx/test/e2e/nginx"
 	_ "k8s.io/ingress-nginx/test/e2e/security"
 	_ "k8s.io/ingress-nginx/test/e2e/servicebackend"
--- a/test/e2e/metrics/metrics.go
+++ b/test/e2e/metrics/metrics.go
@ -0,0 +1,94 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package defaultbackend
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/onsi/ginkgo/v2"
+	"github.com/stretchr/testify/assert"
+	appsv1 "k8s.io/api/apps/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	"k8s.io/ingress-nginx/test/e2e/framework"
+)
+
+const waitForMetrics = 2 * time.Second
+
+var _ = framework.IngressNginxDescribe("[metrics] exported prometheus metrics", func() {
+	f := framework.NewDefaultFramework("metrics")
+	host := "foo.com"
+
+	ginkgo.BeforeEach(func() {
+		f.NewEchoDeployment()
+		f.EnsureIngress(framework.NewSingleIngress(host, "/", host, f.Namespace, framework.EchoService, 80, nil))
+		f.WaitForNginxServer(host,
+			func(server string) bool {
+				return strings.Contains(server, fmt.Sprintf("server_name %s ;", host)) &&
+					strings.Contains(server, "proxy_pass http://upstream_balancer;")
+			})
+	})
+
+	ginkgo.It("exclude socket request metrics are absent", func() {
+		err := f.UpdateIngressControllerDeployment(func(deployment *appsv1.Deployment) error {
+			args := deployment.Spec.Template.Spec.Containers[0].Args
+			args = append(args, "--exclude-socket-metrics=nginx_ingress_controller_request_size,nginx_ingress_controller_header_duration_seconds")
+			deployment.Spec.Template.Spec.Containers[0].Args = args
+			_, err := f.KubeClientSet.AppsV1().Deployments(f.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
+			return err
+		})
+		assert.Nil(ginkgo.GinkgoT(), err, "updating deployment")
+
+		f.HTTPTestClient().
+			GET("/").
+			WithHeader("Host", host).
+			Expect().
+			Status(http.StatusOK)
+		time.Sleep(waitForMetrics)
+
+		ip := f.GetNginxPodIP()
+		mf, err := f.GetMetric("nginx_ingress_controller_request_size", ip)
+		assert.ErrorContains(ginkgo.GinkgoT(), err, "nginx_ingress_controller_request_size")
+		assert.Nil(ginkgo.GinkgoT(), mf)
+	})
+	ginkgo.It("exclude socket request metrics are present", func() {
+		err := f.UpdateIngressControllerDeployment(func(deployment *appsv1.Deployment) error {
+			args := deployment.Spec.Template.Spec.Containers[0].Args
+			args = append(args, "--exclude-socket-metrics=non_existing_metric_does_not_affect_existing_metrics")
+			deployment.Spec.Template.Spec.Containers[0].Args = args
+			_, err := f.KubeClientSet.AppsV1().Deployments(f.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
+			return err
+		})
+		assert.Nil(ginkgo.GinkgoT(), err, "updating deployment")
+
+		f.HTTPTestClient().
+			GET("/").
+			WithHeader("Host", host).
+			Expect().
+			Status(http.StatusOK)
+		time.Sleep(waitForMetrics)
+
+		ip := f.GetNginxPodIP()
+		mf, err := f.GetMetric("nginx_ingress_controller_request_size", ip)
+		assert.Nil(ginkgo.GinkgoT(), err)
+		assert.NotNil(ginkgo.GinkgoT(), mf)
+	})
+})