Exclude socket metrics (#9770)

* exclude creation and exporting of socket metrics via flag

* make exclude metric naming more consistent

* fix connect time metric update

* add documentation

* e2e test

* improve creation of metric mapping
This commit is contained in:
Marco Cadetg 2023-04-11 10:01:18 +02:00 committed by GitHub
parent bd771997e0
commit 4e8d0b5836
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 362 additions and 96 deletions

View file

@ -70,7 +70,7 @@ func main() {
mc := metric.NewDummyCollector()
if conf.EnableMetrics {
// TODO: Ingress class is not a part of dataplane anymore
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets)
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
if err != nil {
klog.Fatalf("Error creating prometheus collector: %v", err)
}

View file

@ -133,7 +133,7 @@ func main() {
mc := metric.NewDummyCollector()
if conf.EnableMetrics {
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets)
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
if err != nil {
klog.Fatalf("Error creating prometheus collector: %v", err)
}

View file

@ -25,6 +25,7 @@ They are set in the container spec of the `ingress-nginx-controller` Deployment
| `--enable-ssl-chain-completion` | Autocomplete SSL certificate chains with missing intermediate CA certificates. Certificates uploaded to Kubernetes must have the "Authority Information Access" X.509 v3 extension for this to succeed. (default false)|
| `--enable-ssl-passthrough` | Enable SSL Passthrough. (default false) |
| `--enable-topology-aware-routing` | Enable topology aware hints feature, needs service object annotation service.kubernetes.io/topology-aware-hints sets to auto. (default false) |
| `--exclude-socket-metrics` | Set of socket request metrics to exclude which won't be exported nor being calculated. The possible socket request metrics to exclude are documented in the monitoring guide e.g. 'nginx_ingress_controller_request_duration_seconds,nginx_ingress_controller_response_size'|
| `--health-check-path` | URL path of the health check endpoint. Configured inside the NGINX status server. All requests received on the port defined by the healthz-port parameter are forwarded internally to this path. (default "/healthz") |
| `--health-check-timeout` | Time limit, in seconds, for a probe to health-check-path to succeed. (default 10) |
| `--healthz-port` | Port to use for the healthz endpoint. (default 10254) |

View file

@ -102,10 +102,11 @@ type Configuration struct {
EnableProfiling bool
EnableMetrics bool
MetricsPerHost bool
MetricsBuckets *collectors.HistogramBuckets
ReportStatusClasses bool
EnableMetrics bool
MetricsPerHost bool
MetricsBuckets *collectors.HistogramBuckets
ReportStatusClasses bool
ExcludeSocketMetrics []string
FakeCertificate *ingress.SSLCert

View file

@ -21,6 +21,7 @@ import (
"io"
"net"
"os"
"strings"
"syscall"
jsoniter "github.com/json-iterator/go"
@ -60,6 +61,8 @@ type HistogramBuckets struct {
SizeBuckets []float64
}
type metricMapping map[string]prometheus.Collector
// SocketCollector stores prometheus metrics and ingress meta-data
type SocketCollector struct {
prometheus.Collector
@ -78,7 +81,7 @@ type SocketCollector struct {
listener net.Listener
metricMapping map[string]interface{}
metricMapping metricMapping
hosts sets.Set[string]
@ -106,7 +109,7 @@ var defObjectives = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}
// NewSocketCollector creates a new SocketCollector instance using
// the ingress watch namespace and class used by the controller
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets) (*SocketCollector, error) {
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets, excludeMetrics []string) (*SocketCollector, error) {
socket := "/tmp/nginx/prometheus-nginx.socket"
// unix sockets must be unlink()ed before being used
_ = syscall.Unlink(socket)
@ -132,13 +135,23 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
requestTags = append(requestTags, "host")
}
em := make(map[string]struct{}, len(excludeMetrics))
for _, m := range excludeMetrics {
// remove potential nginx_ingress_controller prefix from the metric name
// TBD: how to handle fully qualified histogram metrics e.g. _buckets and _sum. Should we just remove the suffix and remove the histogram metric or ignore it?
em[strings.TrimPrefix(m, "nginx_ingress_controller_")] = struct{}{}
}
// create metric mapping with only the metrics that are not excluded
mm := make(metricMapping)
sc := &SocketCollector{
listener: listener,
metricsPerHost: metricsPerHost,
reportStatusClasses: reportStatusClasses,
connectTime: prometheus.NewHistogramVec(
connectTime: histogramMetric(
prometheus.HistogramOpts{
Name: "connect_duration_seconds",
Help: "The time spent on establishing a connection with the upstream server",
@ -147,8 +160,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
Buckets: buckets.TimeBuckets,
},
requestTags,
em,
mm,
),
headerTime: prometheus.NewHistogramVec(
headerTime: histogramMetric(
prometheus.HistogramOpts{
Name: "header_duration_seconds",
Help: "The time spent on receiving first header from the upstream server",
@ -157,8 +173,10 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
Buckets: buckets.TimeBuckets,
},
requestTags,
em,
mm,
),
responseTime: prometheus.NewHistogramVec(
responseTime: histogramMetric(
prometheus.HistogramOpts{
Name: "response_duration_seconds",
Help: "The time spent on receiving the response from the upstream server",
@ -167,8 +185,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
Buckets: buckets.TimeBuckets,
},
requestTags,
em,
mm,
),
requestTime: prometheus.NewHistogramVec(
requestTime: histogramMetric(
prometheus.HistogramOpts{
Name: "request_duration_seconds",
Help: "The request processing time in milliseconds",
@ -177,9 +198,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
Buckets: buckets.TimeBuckets,
},
requestTags,
em,
mm,
),
responseLength: prometheus.NewHistogramVec(
responseLength: histogramMetric(
prometheus.HistogramOpts{
Name: "response_size",
Help: "The response length (including request line, header, and request body)",
@ -188,19 +211,24 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
Buckets: buckets.LengthBuckets,
},
requestTags,
em,
mm,
),
requestLength: prometheus.NewHistogramVec(
requestLength: histogramMetric(
prometheus.HistogramOpts{
Name: "request_size",
Help: "The request length (including request line, header, and request body)",
Namespace: PrometheusNamespace,
Buckets: buckets.LengthBuckets,
ConstLabels: constLabels,
Buckets: buckets.LengthBuckets,
},
requestTags,
em,
mm,
),
requests: prometheus.NewCounterVec(
requests: counterMetric(
prometheus.CounterOpts{
Name: "requests",
Help: "The total number of client requests",
@ -208,9 +236,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
ConstLabels: constLabels,
},
requestTags,
em,
mm,
),
bytesSent: prometheus.NewHistogramVec(
bytesSent: histogramMetric(
prometheus.HistogramOpts{
Name: "bytes_sent",
Help: "DEPRECATED The number of bytes sent to a client",
@ -219,9 +249,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
ConstLabels: constLabels,
},
requestTags,
em,
mm,
),
upstreamLatency: prometheus.NewSummaryVec(
upstreamLatency: summaryMetric(
prometheus.SummaryOpts{
Name: "ingress_upstream_latency_seconds",
Help: "DEPRECATED Upstream service latency per Ingress",
@ -230,28 +262,59 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
Objectives: defObjectives,
},
[]string{"ingress", "namespace", "service", "canary"},
em,
mm,
),
}
sc.metricMapping = map[string]interface{}{
prometheus.BuildFQName(PrometheusNamespace, "", "requests"): sc.requests,
prometheus.BuildFQName(PrometheusNamespace, "", "connect_duration_seconds"): sc.connectTime,
prometheus.BuildFQName(PrometheusNamespace, "", "header_duration_seconds"): sc.headerTime,
prometheus.BuildFQName(PrometheusNamespace, "", "response_duration_seconds"): sc.responseTime,
prometheus.BuildFQName(PrometheusNamespace, "", "request_duration_seconds"): sc.requestTime,
prometheus.BuildFQName(PrometheusNamespace, "", "request_size"): sc.requestLength,
prometheus.BuildFQName(PrometheusNamespace, "", "response_size"): sc.responseLength,
prometheus.BuildFQName(PrometheusNamespace, "", "bytes_sent"): sc.bytesSent,
prometheus.BuildFQName(PrometheusNamespace, "", "ingress_upstream_latency_seconds"): sc.upstreamLatency,
}
sc.metricMapping = mm
return sc, nil
}
func containsMetric(excludeMetrics map[string]struct{}, name string) bool {
if _, ok := excludeMetrics[name]; ok {
klog.V(3).InfoS("Skipping metric", "metric", name)
return true
}
return false
}
func summaryMetric(opts prometheus.SummaryOpts, requestTags []string, excludeMetrics map[string]struct{}, metricMapping metricMapping) *prometheus.SummaryVec {
if containsMetric(excludeMetrics, opts.Name) {
return nil
}
m := prometheus.NewSummaryVec(
opts,
requestTags,
)
metricMapping[prometheus.BuildFQName(PrometheusNamespace, "", opts.Name)] = m
return m
}
func counterMetric(opts prometheus.CounterOpts, requestTags []string, excludeMetrics map[string]struct{}, metricMapping metricMapping) *prometheus.CounterVec {
if containsMetric(excludeMetrics, opts.Name) {
return nil
}
m := prometheus.NewCounterVec(
opts,
requestTags,
)
metricMapping[prometheus.BuildFQName(PrometheusNamespace, "", opts.Name)] = m
return m
}
func histogramMetric(opts prometheus.HistogramOpts, requestTags []string, excludeMetrics map[string]struct{}, metricMapping metricMapping) *prometheus.HistogramVec {
if containsMetric(excludeMetrics, opts.Name) {
return nil
}
m := prometheus.NewHistogramVec(
opts,
requestTags,
)
metricMapping[prometheus.BuildFQName(PrometheusNamespace, "", opts.Name)] = m
return m
}
func (sc *SocketCollector) handleMessage(msg []byte) {
klog.V(5).InfoS("Metric", "message", string(msg))
@ -305,30 +368,36 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
"canary": stats.Canary,
}
requestsMetric, err := sc.requests.GetMetricWith(collectorLabels)
if err != nil {
klog.ErrorS(err, "Error fetching requests metric")
} else {
requestsMetric.Inc()
if sc.requests != nil {
requestsMetric, err := sc.requests.GetMetricWith(collectorLabels)
if err != nil {
klog.ErrorS(err, "Error fetching requests metric")
} else {
requestsMetric.Inc()
}
}
if stats.Latency != -1 {
connectTimeMetric, err := sc.connectTime.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching connect time metric")
} else {
connectTimeMetric.Observe(stats.Latency)
if sc.connectTime != nil {
connectTimeMetric, err := sc.connectTime.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching connect time metric")
} else {
connectTimeMetric.Observe(stats.Latency)
}
}
latencyMetric, err := sc.upstreamLatency.GetMetricWith(latencyLabels)
if err != nil {
klog.ErrorS(err, "Error fetching latency metric")
} else {
latencyMetric.Observe(stats.Latency)
if sc.upstreamLatency != nil {
latencyMetric, err := sc.upstreamLatency.GetMetricWith(latencyLabels)
if err != nil {
klog.ErrorS(err, "Error fetching latency metric")
} else {
latencyMetric.Observe(stats.Latency)
}
}
}
if stats.HeaderTime != -1 {
if stats.HeaderTime != -1 && sc.headerTime != nil {
headerTimeMetric, err := sc.headerTime.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching header time metric")
@ -337,7 +406,7 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
}
}
if stats.RequestTime != -1 {
if stats.RequestTime != -1 && sc.requestTime != nil {
requestTimeMetric, err := sc.requestTime.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching request duration metric")
@ -346,7 +415,7 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
}
}
if stats.RequestLength != -1 {
if stats.RequestLength != -1 && sc.requestLength != nil {
requestLengthMetric, err := sc.requestLength.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching request length metric")
@ -355,7 +424,7 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
}
}
if stats.ResponseTime != -1 {
if stats.ResponseTime != -1 && sc.responseTime != nil {
responseTimeMetric, err := sc.responseTime.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching upstream response time metric")
@ -365,18 +434,22 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
}
if stats.ResponseLength != -1 {
bytesSentMetric, err := sc.bytesSent.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching bytes sent metric")
} else {
bytesSentMetric.Observe(stats.ResponseLength)
if sc.bytesSent != nil {
bytesSentMetric, err := sc.bytesSent.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching bytes sent metric")
} else {
bytesSentMetric.Observe(stats.ResponseLength)
}
}
responseSizeMetric, err := sc.responseLength.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching bytes sent metric")
} else {
responseSizeMetric.Observe(stats.ResponseLength)
if sc.responseLength != nil {
responseSizeMetric, err := sc.responseLength.GetMetricWith(requestLabels)
if err != nil {
klog.ErrorS(err, "Error fetching bytes sent metric")
} else {
responseSizeMetric.Observe(stats.ResponseLength)
}
}
}
}
@ -471,36 +544,16 @@ func (sc *SocketCollector) RemoveMetrics(ingresses []string, registry prometheus
// Describe implements prometheus.Collector
func (sc SocketCollector) Describe(ch chan<- *prometheus.Desc) {
sc.connectTime.Describe(ch)
sc.headerTime.Describe(ch)
sc.responseTime.Describe(ch)
sc.requestTime.Describe(ch)
sc.requestLength.Describe(ch)
sc.responseLength.Describe(ch)
sc.requests.Describe(ch)
sc.upstreamLatency.Describe(ch)
sc.bytesSent.Describe(ch)
for _, metric := range sc.metricMapping {
metric.Describe(ch)
}
}
// Collect implements the prometheus.Collector interface.
func (sc SocketCollector) Collect(ch chan<- prometheus.Metric) {
sc.connectTime.Collect(ch)
sc.headerTime.Collect(ch)
sc.responseTime.Collect(ch)
sc.requestTime.Collect(ch)
sc.requestLength.Collect(ch)
sc.responseLength.Collect(ch)
sc.requests.Collect(ch)
sc.upstreamLatency.Collect(ch)
sc.bytesSent.Collect(ch)
for _, metric := range sc.metricMapping {
metric.Collect(ch)
}
}
// SetHosts sets the hostnames that are being served by the ingress controller

View file

@ -84,6 +84,7 @@ func TestCollector(t *testing.T) {
data []string
metrics []string
useStatusClasses bool
excludeMetrics []string
wantBefore string
removeIngresses []string
wantAfter string
@ -470,13 +471,126 @@ func TestCollector(t *testing.T) {
wantAfter: `
`,
},
{
name: "basic exclude metrics test",
data: []string{`[{
"host":"testshop.com",
"status":"200",
"bytesSent":150.0,
"method":"GET",
"path":"/admin",
"requestLength":300.0,
"requestTime":60.0,
"upstreamLatency":1.0,
"upstreamHeaderTime":5.0,
"upstreamName":"test-upstream",
"upstreamIP":"1.1.1.1:8080",
"upstreamResponseTime":200,
"upstreamStatus":"220",
"namespace":"test-app-production",
"ingress":"web-yml",
"service":"test-app",
"canary":""
}]`},
excludeMetrics: []string{"nginx_ingress_controller_connect_duration_seconds"},
metrics: []string{"nginx_ingress_controller_connect_duration_seconds", "nginx_ingress_controller_response_duration_seconds"},
useStatusClasses: true,
wantBefore: `
# HELP nginx_ingress_controller_response_duration_seconds The time spent on receiving the response from the upstream server
# TYPE nginx_ingress_controller_response_duration_seconds histogram
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.005"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.01"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.025"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.05"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.1"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.25"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.5"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="1"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="2.5"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="5"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="10"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="+Inf"} 1
nginx_ingress_controller_response_duration_seconds_sum{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 200
nginx_ingress_controller_response_duration_seconds_count{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
`,
},
{
name: "remove metrics with the short metric name",
data: []string{`[{
"host":"testshop.com",
"status":"200",
"bytesSent":150.0,
"method":"GET",
"path":"/admin",
"requestLength":300.0,
"requestTime":60.0,
"upstreamLatency":1.0,
"upstreamHeaderTime":5.0,
"upstreamName":"test-upstream",
"upstreamIP":"1.1.1.1:8080",
"upstreamResponseTime":200,
"upstreamStatus":"220",
"namespace":"test-app-production",
"ingress":"web-yml",
"service":"test-app",
"canary":""
}]`},
excludeMetrics: []string{"response_duration_seconds"},
metrics: []string{"nginx_ingress_controller_response_duration_seconds"},
useStatusClasses: true,
wantBefore: `
`,
},
{
name: "exclude metrics make sure to only remove exactly matched metrics",
data: []string{`[{
"host":"testshop.com",
"status":"200",
"bytesSent":150.0,
"method":"GET",
"path":"/admin",
"requestLength":300.0,
"requestTime":60.0,
"upstreamLatency":1.0,
"upstreamHeaderTime":5.0,
"upstreamName":"test-upstream",
"upstreamIP":"1.1.1.1:8080",
"upstreamResponseTime":200,
"upstreamStatus":"220",
"namespace":"test-app-production",
"ingress":"web-yml",
"service":"test-app",
"canary":""
}]`},
excludeMetrics: []string{"response_duration_seconds2", "test.*", "nginx_ingress_.*", "response_duration_secon"},
metrics: []string{"nginx_ingress_controller_response_duration_seconds"},
useStatusClasses: true,
wantBefore: `
# HELP nginx_ingress_controller_response_duration_seconds The time spent on receiving the response from the upstream server
# TYPE nginx_ingress_controller_response_duration_seconds histogram
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.005"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.01"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.025"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.05"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.1"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.25"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.5"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="1"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="2.5"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="5"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="10"} 0
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="+Inf"} 1
nginx_ingress_controller_response_duration_seconds_sum{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 200
nginx_ingress_controller_response_duration_seconds_count{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
`,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
registry := prometheus.NewPedanticRegistry()
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets)
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets, c.excludeMetrics)
if err != nil {
t.Errorf("%v: unexpected error creating new SocketCollector: %v", c.name, err)
}

View file

@ -71,7 +71,7 @@ type collector struct {
}
// NewCollector creates a new metric collector the for ingress controller
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets) (Collector, error) {
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, excludedSocketMetrics []string) (Collector, error) {
podNamespace := os.Getenv("POD_NAMESPACE")
if podNamespace == "" {
podNamespace = "default"
@ -89,7 +89,7 @@ func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus
return nil, err
}
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets)
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets, excludedSocketMetrics)
if err != nil {
return nil, err
}

View file

@ -171,10 +171,11 @@ Requires the update-status parameter.`)
reportStatusClasses = flags.Bool("report-status-classes", false,
`Use status classes (2xx, 3xx, 4xx and 5xx) instead of status codes in metrics.`)
timeBuckets = flags.Float64Slice("time-buckets", prometheus.DefBuckets, "Set of buckets which will be used for prometheus histogram metrics such as RequestTime, ResponseTime.")
lengthBuckets = flags.Float64Slice("length-buckets", prometheus.LinearBuckets(10, 10, 10), "Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength.")
sizeBuckets = flags.Float64Slice("size-buckets", prometheus.ExponentialBuckets(10, 10, 7), "Set of buckets which will be used for prometheus histogram metrics such as BytesSent.")
monitorMaxBatchSize = flags.Int("monitor-max-batch-size", 10000, "Max batch size of NGINX metrics.")
timeBuckets = flags.Float64Slice("time-buckets", prometheus.DefBuckets, "Set of buckets which will be used for prometheus histogram metrics such as RequestTime, ResponseTime.")
lengthBuckets = flags.Float64Slice("length-buckets", prometheus.LinearBuckets(10, 10, 10), "Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength.")
sizeBuckets = flags.Float64Slice("size-buckets", prometheus.ExponentialBuckets(10, 10, 7), "Set of buckets which will be used for prometheus histogram metrics such as BytesSent.")
excludeSocketMetrics = flags.StringSlice("exclude-socket-metrics", []string{}, "et of socket request metrics to exclude which won't be exported nor being calculated. E.g. 'nginx_ingress_controller_success,nginx_ingress_controller_header_duration_seconds'.")
monitorMaxBatchSize = flags.Int("monitor-max-batch-size", 10000, "Max batch size of NGINX metrics.")
httpPort = flags.Int("http-port", 80, `Port to use for servicing HTTP traffic.`)
httpsPort = flags.Int("https-port", 443, `Port to use for servicing HTTPS traffic.`)
@ -328,6 +329,7 @@ https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-g
MetricsPerHost: *metricsPerHost,
MetricsBuckets: histogramBuckets,
ReportStatusClasses: *reportStatusClasses,
ExcludeSocketMetrics: *excludeSocketMetrics,
MonitorMaxBatchSize: *monitorMaxBatchSize,
DisableServiceExternalName: *disableServiceExternalName,
EnableSSLPassthrough: *enableSSLPassthrough,

View file

@ -40,6 +40,7 @@ import (
_ "k8s.io/ingress-nginx/test/e2e/leaks"
_ "k8s.io/ingress-nginx/test/e2e/loadbalance"
_ "k8s.io/ingress-nginx/test/e2e/lua"
_ "k8s.io/ingress-nginx/test/e2e/metrics"
_ "k8s.io/ingress-nginx/test/e2e/nginx"
_ "k8s.io/ingress-nginx/test/e2e/security"
_ "k8s.io/ingress-nginx/test/e2e/servicebackend"

View file

@ -0,0 +1,94 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package defaultbackend
import (
"context"
"fmt"
"net/http"
"strings"
"time"
"github.com/onsi/ginkgo/v2"
"github.com/stretchr/testify/assert"
appsv1 "k8s.io/api/apps/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/ingress-nginx/test/e2e/framework"
)
const waitForMetrics = 2 * time.Second
var _ = framework.IngressNginxDescribe("[metrics] exported prometheus metrics", func() {
f := framework.NewDefaultFramework("metrics")
host := "foo.com"
ginkgo.BeforeEach(func() {
f.NewEchoDeployment()
f.EnsureIngress(framework.NewSingleIngress(host, "/", host, f.Namespace, framework.EchoService, 80, nil))
f.WaitForNginxServer(host,
func(server string) bool {
return strings.Contains(server, fmt.Sprintf("server_name %s ;", host)) &&
strings.Contains(server, "proxy_pass http://upstream_balancer;")
})
})
ginkgo.It("exclude socket request metrics are absent", func() {
err := f.UpdateIngressControllerDeployment(func(deployment *appsv1.Deployment) error {
args := deployment.Spec.Template.Spec.Containers[0].Args
args = append(args, "--exclude-socket-metrics=nginx_ingress_controller_request_size,nginx_ingress_controller_header_duration_seconds")
deployment.Spec.Template.Spec.Containers[0].Args = args
_, err := f.KubeClientSet.AppsV1().Deployments(f.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
return err
})
assert.Nil(ginkgo.GinkgoT(), err, "updating deployment")
f.HTTPTestClient().
GET("/").
WithHeader("Host", host).
Expect().
Status(http.StatusOK)
time.Sleep(waitForMetrics)
ip := f.GetNginxPodIP()
mf, err := f.GetMetric("nginx_ingress_controller_request_size", ip)
assert.ErrorContains(ginkgo.GinkgoT(), err, "nginx_ingress_controller_request_size")
assert.Nil(ginkgo.GinkgoT(), mf)
})
ginkgo.It("exclude socket request metrics are present", func() {
err := f.UpdateIngressControllerDeployment(func(deployment *appsv1.Deployment) error {
args := deployment.Spec.Template.Spec.Containers[0].Args
args = append(args, "--exclude-socket-metrics=non_existing_metric_does_not_affect_existing_metrics")
deployment.Spec.Template.Spec.Containers[0].Args = args
_, err := f.KubeClientSet.AppsV1().Deployments(f.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
return err
})
assert.Nil(ginkgo.GinkgoT(), err, "updating deployment")
f.HTTPTestClient().
GET("/").
WithHeader("Host", host).
Expect().
Status(http.StatusOK)
time.Sleep(waitForMetrics)
ip := f.GetNginxPodIP()
mf, err := f.GetMetric("nginx_ingress_controller_request_size", ip)
assert.Nil(ginkgo.GinkgoT(), err)
assert.NotNil(ginkgo.GinkgoT(), mf)
})
})