Exclude socket metrics (#9770)
* exclude creation and exporting of socket metrics via flag * make exclude metric naming more consistent * fix connect time metric update * add documentation * e2e test * improve creation of metric mapping
This commit is contained in:
parent
bd771997e0
commit
4e8d0b5836
10 changed files with 362 additions and 96 deletions
|
@ -70,7 +70,7 @@ func main() {
|
|||
mc := metric.NewDummyCollector()
|
||||
if conf.EnableMetrics {
|
||||
// TODO: Ingress class is not a part of dataplane anymore
|
||||
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets)
|
||||
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
|
||||
if err != nil {
|
||||
klog.Fatalf("Error creating prometheus collector: %v", err)
|
||||
}
|
||||
|
|
|
@ -133,7 +133,7 @@ func main() {
|
|||
|
||||
mc := metric.NewDummyCollector()
|
||||
if conf.EnableMetrics {
|
||||
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets)
|
||||
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
|
||||
if err != nil {
|
||||
klog.Fatalf("Error creating prometheus collector: %v", err)
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ They are set in the container spec of the `ingress-nginx-controller` Deployment
|
|||
| `--enable-ssl-chain-completion` | Autocomplete SSL certificate chains with missing intermediate CA certificates. Certificates uploaded to Kubernetes must have the "Authority Information Access" X.509 v3 extension for this to succeed. (default false)|
|
||||
| `--enable-ssl-passthrough` | Enable SSL Passthrough. (default false) |
|
||||
| `--enable-topology-aware-routing` | Enable topology aware hints feature, needs service object annotation service.kubernetes.io/topology-aware-hints sets to auto. (default false) |
|
||||
| `--exclude-socket-metrics` | Set of socket request metrics to exclude which won't be exported nor being calculated. The possible socket request metrics to exclude are documented in the monitoring guide e.g. 'nginx_ingress_controller_request_duration_seconds,nginx_ingress_controller_response_size'|
|
||||
| `--health-check-path` | URL path of the health check endpoint. Configured inside the NGINX status server. All requests received on the port defined by the healthz-port parameter are forwarded internally to this path. (default "/healthz") |
|
||||
| `--health-check-timeout` | Time limit, in seconds, for a probe to health-check-path to succeed. (default 10) |
|
||||
| `--healthz-port` | Port to use for the healthz endpoint. (default 10254) |
|
||||
|
|
|
@ -102,10 +102,11 @@ type Configuration struct {
|
|||
|
||||
EnableProfiling bool
|
||||
|
||||
EnableMetrics bool
|
||||
MetricsPerHost bool
|
||||
MetricsBuckets *collectors.HistogramBuckets
|
||||
ReportStatusClasses bool
|
||||
EnableMetrics bool
|
||||
MetricsPerHost bool
|
||||
MetricsBuckets *collectors.HistogramBuckets
|
||||
ReportStatusClasses bool
|
||||
ExcludeSocketMetrics []string
|
||||
|
||||
FakeCertificate *ingress.SSLCert
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
|
@ -60,6 +61,8 @@ type HistogramBuckets struct {
|
|||
SizeBuckets []float64
|
||||
}
|
||||
|
||||
type metricMapping map[string]prometheus.Collector
|
||||
|
||||
// SocketCollector stores prometheus metrics and ingress meta-data
|
||||
type SocketCollector struct {
|
||||
prometheus.Collector
|
||||
|
@ -78,7 +81,7 @@ type SocketCollector struct {
|
|||
|
||||
listener net.Listener
|
||||
|
||||
metricMapping map[string]interface{}
|
||||
metricMapping metricMapping
|
||||
|
||||
hosts sets.Set[string]
|
||||
|
||||
|
@ -106,7 +109,7 @@ var defObjectives = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}
|
|||
|
||||
// NewSocketCollector creates a new SocketCollector instance using
|
||||
// the ingress watch namespace and class used by the controller
|
||||
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets) (*SocketCollector, error) {
|
||||
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets, excludeMetrics []string) (*SocketCollector, error) {
|
||||
socket := "/tmp/nginx/prometheus-nginx.socket"
|
||||
// unix sockets must be unlink()ed before being used
|
||||
_ = syscall.Unlink(socket)
|
||||
|
@ -132,13 +135,23 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
requestTags = append(requestTags, "host")
|
||||
}
|
||||
|
||||
em := make(map[string]struct{}, len(excludeMetrics))
|
||||
for _, m := range excludeMetrics {
|
||||
// remove potential nginx_ingress_controller prefix from the metric name
|
||||
// TBD: how to handle fully qualified histogram metrics e.g. _buckets and _sum. Should we just remove the suffix and remove the histogram metric or ignore it?
|
||||
em[strings.TrimPrefix(m, "nginx_ingress_controller_")] = struct{}{}
|
||||
}
|
||||
|
||||
// create metric mapping with only the metrics that are not excluded
|
||||
mm := make(metricMapping)
|
||||
|
||||
sc := &SocketCollector{
|
||||
listener: listener,
|
||||
|
||||
metricsPerHost: metricsPerHost,
|
||||
reportStatusClasses: reportStatusClasses,
|
||||
|
||||
connectTime: prometheus.NewHistogramVec(
|
||||
connectTime: histogramMetric(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "connect_duration_seconds",
|
||||
Help: "The time spent on establishing a connection with the upstream server",
|
||||
|
@ -147,8 +160,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
Buckets: buckets.TimeBuckets,
|
||||
},
|
||||
requestTags,
|
||||
em,
|
||||
mm,
|
||||
),
|
||||
headerTime: prometheus.NewHistogramVec(
|
||||
|
||||
headerTime: histogramMetric(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "header_duration_seconds",
|
||||
Help: "The time spent on receiving first header from the upstream server",
|
||||
|
@ -157,8 +173,10 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
Buckets: buckets.TimeBuckets,
|
||||
},
|
||||
requestTags,
|
||||
em,
|
||||
mm,
|
||||
),
|
||||
responseTime: prometheus.NewHistogramVec(
|
||||
responseTime: histogramMetric(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "response_duration_seconds",
|
||||
Help: "The time spent on receiving the response from the upstream server",
|
||||
|
@ -167,8 +185,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
Buckets: buckets.TimeBuckets,
|
||||
},
|
||||
requestTags,
|
||||
em,
|
||||
mm,
|
||||
),
|
||||
requestTime: prometheus.NewHistogramVec(
|
||||
|
||||
requestTime: histogramMetric(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "request_duration_seconds",
|
||||
Help: "The request processing time in milliseconds",
|
||||
|
@ -177,9 +198,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
Buckets: buckets.TimeBuckets,
|
||||
},
|
||||
requestTags,
|
||||
em,
|
||||
mm,
|
||||
),
|
||||
|
||||
responseLength: prometheus.NewHistogramVec(
|
||||
responseLength: histogramMetric(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "response_size",
|
||||
Help: "The response length (including request line, header, and request body)",
|
||||
|
@ -188,19 +211,24 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
Buckets: buckets.LengthBuckets,
|
||||
},
|
||||
requestTags,
|
||||
em,
|
||||
mm,
|
||||
),
|
||||
requestLength: prometheus.NewHistogramVec(
|
||||
|
||||
requestLength: histogramMetric(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "request_size",
|
||||
Help: "The request length (including request line, header, and request body)",
|
||||
Namespace: PrometheusNamespace,
|
||||
Buckets: buckets.LengthBuckets,
|
||||
ConstLabels: constLabels,
|
||||
Buckets: buckets.LengthBuckets,
|
||||
},
|
||||
requestTags,
|
||||
em,
|
||||
mm,
|
||||
),
|
||||
|
||||
requests: prometheus.NewCounterVec(
|
||||
requests: counterMetric(
|
||||
prometheus.CounterOpts{
|
||||
Name: "requests",
|
||||
Help: "The total number of client requests",
|
||||
|
@ -208,9 +236,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
ConstLabels: constLabels,
|
||||
},
|
||||
requestTags,
|
||||
em,
|
||||
mm,
|
||||
),
|
||||
|
||||
bytesSent: prometheus.NewHistogramVec(
|
||||
bytesSent: histogramMetric(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "bytes_sent",
|
||||
Help: "DEPRECATED The number of bytes sent to a client",
|
||||
|
@ -219,9 +249,11 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
ConstLabels: constLabels,
|
||||
},
|
||||
requestTags,
|
||||
em,
|
||||
mm,
|
||||
),
|
||||
|
||||
upstreamLatency: prometheus.NewSummaryVec(
|
||||
upstreamLatency: summaryMetric(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "ingress_upstream_latency_seconds",
|
||||
Help: "DEPRECATED Upstream service latency per Ingress",
|
||||
|
@ -230,28 +262,59 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
Objectives: defObjectives,
|
||||
},
|
||||
[]string{"ingress", "namespace", "service", "canary"},
|
||||
em,
|
||||
mm,
|
||||
),
|
||||
}
|
||||
|
||||
sc.metricMapping = map[string]interface{}{
|
||||
prometheus.BuildFQName(PrometheusNamespace, "", "requests"): sc.requests,
|
||||
|
||||
prometheus.BuildFQName(PrometheusNamespace, "", "connect_duration_seconds"): sc.connectTime,
|
||||
prometheus.BuildFQName(PrometheusNamespace, "", "header_duration_seconds"): sc.headerTime,
|
||||
prometheus.BuildFQName(PrometheusNamespace, "", "response_duration_seconds"): sc.responseTime,
|
||||
prometheus.BuildFQName(PrometheusNamespace, "", "request_duration_seconds"): sc.requestTime,
|
||||
|
||||
prometheus.BuildFQName(PrometheusNamespace, "", "request_size"): sc.requestLength,
|
||||
prometheus.BuildFQName(PrometheusNamespace, "", "response_size"): sc.responseLength,
|
||||
|
||||
prometheus.BuildFQName(PrometheusNamespace, "", "bytes_sent"): sc.bytesSent,
|
||||
|
||||
prometheus.BuildFQName(PrometheusNamespace, "", "ingress_upstream_latency_seconds"): sc.upstreamLatency,
|
||||
}
|
||||
|
||||
sc.metricMapping = mm
|
||||
return sc, nil
|
||||
}
|
||||
|
||||
func containsMetric(excludeMetrics map[string]struct{}, name string) bool {
|
||||
if _, ok := excludeMetrics[name]; ok {
|
||||
klog.V(3).InfoS("Skipping metric", "metric", name)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func summaryMetric(opts prometheus.SummaryOpts, requestTags []string, excludeMetrics map[string]struct{}, metricMapping metricMapping) *prometheus.SummaryVec {
|
||||
if containsMetric(excludeMetrics, opts.Name) {
|
||||
return nil
|
||||
}
|
||||
m := prometheus.NewSummaryVec(
|
||||
opts,
|
||||
requestTags,
|
||||
)
|
||||
metricMapping[prometheus.BuildFQName(PrometheusNamespace, "", opts.Name)] = m
|
||||
return m
|
||||
}
|
||||
|
||||
func counterMetric(opts prometheus.CounterOpts, requestTags []string, excludeMetrics map[string]struct{}, metricMapping metricMapping) *prometheus.CounterVec {
|
||||
if containsMetric(excludeMetrics, opts.Name) {
|
||||
return nil
|
||||
}
|
||||
m := prometheus.NewCounterVec(
|
||||
opts,
|
||||
requestTags,
|
||||
)
|
||||
metricMapping[prometheus.BuildFQName(PrometheusNamespace, "", opts.Name)] = m
|
||||
return m
|
||||
}
|
||||
|
||||
func histogramMetric(opts prometheus.HistogramOpts, requestTags []string, excludeMetrics map[string]struct{}, metricMapping metricMapping) *prometheus.HistogramVec {
|
||||
if containsMetric(excludeMetrics, opts.Name) {
|
||||
return nil
|
||||
}
|
||||
m := prometheus.NewHistogramVec(
|
||||
opts,
|
||||
requestTags,
|
||||
)
|
||||
metricMapping[prometheus.BuildFQName(PrometheusNamespace, "", opts.Name)] = m
|
||||
return m
|
||||
}
|
||||
|
||||
func (sc *SocketCollector) handleMessage(msg []byte) {
|
||||
klog.V(5).InfoS("Metric", "message", string(msg))
|
||||
|
||||
|
@ -305,30 +368,36 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
|
|||
"canary": stats.Canary,
|
||||
}
|
||||
|
||||
requestsMetric, err := sc.requests.GetMetricWith(collectorLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching requests metric")
|
||||
} else {
|
||||
requestsMetric.Inc()
|
||||
if sc.requests != nil {
|
||||
requestsMetric, err := sc.requests.GetMetricWith(collectorLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching requests metric")
|
||||
} else {
|
||||
requestsMetric.Inc()
|
||||
}
|
||||
}
|
||||
|
||||
if stats.Latency != -1 {
|
||||
connectTimeMetric, err := sc.connectTime.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching connect time metric")
|
||||
} else {
|
||||
connectTimeMetric.Observe(stats.Latency)
|
||||
if sc.connectTime != nil {
|
||||
connectTimeMetric, err := sc.connectTime.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching connect time metric")
|
||||
} else {
|
||||
connectTimeMetric.Observe(stats.Latency)
|
||||
}
|
||||
}
|
||||
|
||||
latencyMetric, err := sc.upstreamLatency.GetMetricWith(latencyLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching latency metric")
|
||||
} else {
|
||||
latencyMetric.Observe(stats.Latency)
|
||||
if sc.upstreamLatency != nil {
|
||||
latencyMetric, err := sc.upstreamLatency.GetMetricWith(latencyLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching latency metric")
|
||||
} else {
|
||||
latencyMetric.Observe(stats.Latency)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if stats.HeaderTime != -1 {
|
||||
if stats.HeaderTime != -1 && sc.headerTime != nil {
|
||||
headerTimeMetric, err := sc.headerTime.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching header time metric")
|
||||
|
@ -337,7 +406,7 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
|
|||
}
|
||||
}
|
||||
|
||||
if stats.RequestTime != -1 {
|
||||
if stats.RequestTime != -1 && sc.requestTime != nil {
|
||||
requestTimeMetric, err := sc.requestTime.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching request duration metric")
|
||||
|
@ -346,7 +415,7 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
|
|||
}
|
||||
}
|
||||
|
||||
if stats.RequestLength != -1 {
|
||||
if stats.RequestLength != -1 && sc.requestLength != nil {
|
||||
requestLengthMetric, err := sc.requestLength.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching request length metric")
|
||||
|
@ -355,7 +424,7 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
|
|||
}
|
||||
}
|
||||
|
||||
if stats.ResponseTime != -1 {
|
||||
if stats.ResponseTime != -1 && sc.responseTime != nil {
|
||||
responseTimeMetric, err := sc.responseTime.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching upstream response time metric")
|
||||
|
@ -365,18 +434,22 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
|
|||
}
|
||||
|
||||
if stats.ResponseLength != -1 {
|
||||
bytesSentMetric, err := sc.bytesSent.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching bytes sent metric")
|
||||
} else {
|
||||
bytesSentMetric.Observe(stats.ResponseLength)
|
||||
if sc.bytesSent != nil {
|
||||
bytesSentMetric, err := sc.bytesSent.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching bytes sent metric")
|
||||
} else {
|
||||
bytesSentMetric.Observe(stats.ResponseLength)
|
||||
}
|
||||
}
|
||||
|
||||
responseSizeMetric, err := sc.responseLength.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching bytes sent metric")
|
||||
} else {
|
||||
responseSizeMetric.Observe(stats.ResponseLength)
|
||||
if sc.responseLength != nil {
|
||||
responseSizeMetric, err := sc.responseLength.GetMetricWith(requestLabels)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error fetching bytes sent metric")
|
||||
} else {
|
||||
responseSizeMetric.Observe(stats.ResponseLength)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -471,36 +544,16 @@ func (sc *SocketCollector) RemoveMetrics(ingresses []string, registry prometheus
|
|||
|
||||
// Describe implements prometheus.Collector
|
||||
func (sc SocketCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
sc.connectTime.Describe(ch)
|
||||
sc.headerTime.Describe(ch)
|
||||
sc.responseTime.Describe(ch)
|
||||
sc.requestTime.Describe(ch)
|
||||
|
||||
sc.requestLength.Describe(ch)
|
||||
sc.responseLength.Describe(ch)
|
||||
|
||||
sc.requests.Describe(ch)
|
||||
|
||||
sc.upstreamLatency.Describe(ch)
|
||||
|
||||
sc.bytesSent.Describe(ch)
|
||||
for _, metric := range sc.metricMapping {
|
||||
metric.Describe(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// Collect implements the prometheus.Collector interface.
|
||||
func (sc SocketCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
sc.connectTime.Collect(ch)
|
||||
sc.headerTime.Collect(ch)
|
||||
sc.responseTime.Collect(ch)
|
||||
sc.requestTime.Collect(ch)
|
||||
|
||||
sc.requestLength.Collect(ch)
|
||||
sc.responseLength.Collect(ch)
|
||||
|
||||
sc.requests.Collect(ch)
|
||||
|
||||
sc.upstreamLatency.Collect(ch)
|
||||
|
||||
sc.bytesSent.Collect(ch)
|
||||
for _, metric := range sc.metricMapping {
|
||||
metric.Collect(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// SetHosts sets the hostnames that are being served by the ingress controller
|
||||
|
|
|
@ -84,6 +84,7 @@ func TestCollector(t *testing.T) {
|
|||
data []string
|
||||
metrics []string
|
||||
useStatusClasses bool
|
||||
excludeMetrics []string
|
||||
wantBefore string
|
||||
removeIngresses []string
|
||||
wantAfter string
|
||||
|
@ -470,13 +471,126 @@ func TestCollector(t *testing.T) {
|
|||
wantAfter: `
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "basic exclude metrics test",
|
||||
data: []string{`[{
|
||||
"host":"testshop.com",
|
||||
"status":"200",
|
||||
"bytesSent":150.0,
|
||||
"method":"GET",
|
||||
"path":"/admin",
|
||||
"requestLength":300.0,
|
||||
"requestTime":60.0,
|
||||
"upstreamLatency":1.0,
|
||||
"upstreamHeaderTime":5.0,
|
||||
"upstreamName":"test-upstream",
|
||||
"upstreamIP":"1.1.1.1:8080",
|
||||
"upstreamResponseTime":200,
|
||||
"upstreamStatus":"220",
|
||||
"namespace":"test-app-production",
|
||||
"ingress":"web-yml",
|
||||
"service":"test-app",
|
||||
"canary":""
|
||||
}]`},
|
||||
excludeMetrics: []string{"nginx_ingress_controller_connect_duration_seconds"},
|
||||
metrics: []string{"nginx_ingress_controller_connect_duration_seconds", "nginx_ingress_controller_response_duration_seconds"},
|
||||
useStatusClasses: true,
|
||||
wantBefore: `
|
||||
# HELP nginx_ingress_controller_response_duration_seconds The time spent on receiving the response from the upstream server
|
||||
# TYPE nginx_ingress_controller_response_duration_seconds histogram
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.005"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.01"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.025"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.05"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.1"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.25"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.5"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="1"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="2.5"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="5"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="10"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="+Inf"} 1
|
||||
nginx_ingress_controller_response_duration_seconds_sum{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 200
|
||||
nginx_ingress_controller_response_duration_seconds_count{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "remove metrics with the short metric name",
|
||||
data: []string{`[{
|
||||
"host":"testshop.com",
|
||||
"status":"200",
|
||||
"bytesSent":150.0,
|
||||
"method":"GET",
|
||||
"path":"/admin",
|
||||
"requestLength":300.0,
|
||||
"requestTime":60.0,
|
||||
"upstreamLatency":1.0,
|
||||
"upstreamHeaderTime":5.0,
|
||||
"upstreamName":"test-upstream",
|
||||
"upstreamIP":"1.1.1.1:8080",
|
||||
"upstreamResponseTime":200,
|
||||
"upstreamStatus":"220",
|
||||
"namespace":"test-app-production",
|
||||
"ingress":"web-yml",
|
||||
"service":"test-app",
|
||||
"canary":""
|
||||
}]`},
|
||||
excludeMetrics: []string{"response_duration_seconds"},
|
||||
metrics: []string{"nginx_ingress_controller_response_duration_seconds"},
|
||||
useStatusClasses: true,
|
||||
wantBefore: `
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "exclude metrics make sure to only remove exactly matched metrics",
|
||||
data: []string{`[{
|
||||
"host":"testshop.com",
|
||||
"status":"200",
|
||||
"bytesSent":150.0,
|
||||
"method":"GET",
|
||||
"path":"/admin",
|
||||
"requestLength":300.0,
|
||||
"requestTime":60.0,
|
||||
"upstreamLatency":1.0,
|
||||
"upstreamHeaderTime":5.0,
|
||||
"upstreamName":"test-upstream",
|
||||
"upstreamIP":"1.1.1.1:8080",
|
||||
"upstreamResponseTime":200,
|
||||
"upstreamStatus":"220",
|
||||
"namespace":"test-app-production",
|
||||
"ingress":"web-yml",
|
||||
"service":"test-app",
|
||||
"canary":""
|
||||
}]`},
|
||||
excludeMetrics: []string{"response_duration_seconds2", "test.*", "nginx_ingress_.*", "response_duration_secon"},
|
||||
metrics: []string{"nginx_ingress_controller_response_duration_seconds"},
|
||||
useStatusClasses: true,
|
||||
wantBefore: `
|
||||
# HELP nginx_ingress_controller_response_duration_seconds The time spent on receiving the response from the upstream server
|
||||
# TYPE nginx_ingress_controller_response_duration_seconds histogram
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.005"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.01"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.025"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.05"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.1"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.25"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="0.5"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="1"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="2.5"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="5"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="10"} 0
|
||||
nginx_ingress_controller_response_duration_seconds_bucket{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx",le="+Inf"} 1
|
||||
nginx_ingress_controller_response_duration_seconds_sum{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 200
|
||||
nginx_ingress_controller_response_duration_seconds_count{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
registry := prometheus.NewPedanticRegistry()
|
||||
|
||||
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets)
|
||||
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets, c.excludeMetrics)
|
||||
if err != nil {
|
||||
t.Errorf("%v: unexpected error creating new SocketCollector: %v", c.name, err)
|
||||
}
|
||||
|
|
|
@ -71,7 +71,7 @@ type collector struct {
|
|||
}
|
||||
|
||||
// NewCollector creates a new metric collector the for ingress controller
|
||||
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets) (Collector, error) {
|
||||
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, excludedSocketMetrics []string) (Collector, error) {
|
||||
podNamespace := os.Getenv("POD_NAMESPACE")
|
||||
if podNamespace == "" {
|
||||
podNamespace = "default"
|
||||
|
@ -89,7 +89,7 @@ func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus
|
|||
return nil, err
|
||||
}
|
||||
|
||||
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets)
|
||||
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets, excludedSocketMetrics)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -171,10 +171,11 @@ Requires the update-status parameter.`)
|
|||
reportStatusClasses = flags.Bool("report-status-classes", false,
|
||||
`Use status classes (2xx, 3xx, 4xx and 5xx) instead of status codes in metrics.`)
|
||||
|
||||
timeBuckets = flags.Float64Slice("time-buckets", prometheus.DefBuckets, "Set of buckets which will be used for prometheus histogram metrics such as RequestTime, ResponseTime.")
|
||||
lengthBuckets = flags.Float64Slice("length-buckets", prometheus.LinearBuckets(10, 10, 10), "Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength.")
|
||||
sizeBuckets = flags.Float64Slice("size-buckets", prometheus.ExponentialBuckets(10, 10, 7), "Set of buckets which will be used for prometheus histogram metrics such as BytesSent.")
|
||||
monitorMaxBatchSize = flags.Int("monitor-max-batch-size", 10000, "Max batch size of NGINX metrics.")
|
||||
timeBuckets = flags.Float64Slice("time-buckets", prometheus.DefBuckets, "Set of buckets which will be used for prometheus histogram metrics such as RequestTime, ResponseTime.")
|
||||
lengthBuckets = flags.Float64Slice("length-buckets", prometheus.LinearBuckets(10, 10, 10), "Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength.")
|
||||
sizeBuckets = flags.Float64Slice("size-buckets", prometheus.ExponentialBuckets(10, 10, 7), "Set of buckets which will be used for prometheus histogram metrics such as BytesSent.")
|
||||
excludeSocketMetrics = flags.StringSlice("exclude-socket-metrics", []string{}, "et of socket request metrics to exclude which won't be exported nor being calculated. E.g. 'nginx_ingress_controller_success,nginx_ingress_controller_header_duration_seconds'.")
|
||||
monitorMaxBatchSize = flags.Int("monitor-max-batch-size", 10000, "Max batch size of NGINX metrics.")
|
||||
|
||||
httpPort = flags.Int("http-port", 80, `Port to use for servicing HTTP traffic.`)
|
||||
httpsPort = flags.Int("https-port", 443, `Port to use for servicing HTTPS traffic.`)
|
||||
|
@ -328,6 +329,7 @@ https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-g
|
|||
MetricsPerHost: *metricsPerHost,
|
||||
MetricsBuckets: histogramBuckets,
|
||||
ReportStatusClasses: *reportStatusClasses,
|
||||
ExcludeSocketMetrics: *excludeSocketMetrics,
|
||||
MonitorMaxBatchSize: *monitorMaxBatchSize,
|
||||
DisableServiceExternalName: *disableServiceExternalName,
|
||||
EnableSSLPassthrough: *enableSSLPassthrough,
|
||||
|
|
|
@ -40,6 +40,7 @@ import (
|
|||
_ "k8s.io/ingress-nginx/test/e2e/leaks"
|
||||
_ "k8s.io/ingress-nginx/test/e2e/loadbalance"
|
||||
_ "k8s.io/ingress-nginx/test/e2e/lua"
|
||||
_ "k8s.io/ingress-nginx/test/e2e/metrics"
|
||||
_ "k8s.io/ingress-nginx/test/e2e/nginx"
|
||||
_ "k8s.io/ingress-nginx/test/e2e/security"
|
||||
_ "k8s.io/ingress-nginx/test/e2e/servicebackend"
|
||||
|
|
94
test/e2e/metrics/metrics.go
Normal file
94
test/e2e/metrics/metrics.go
Normal file
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package defaultbackend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/stretchr/testify/assert"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
"k8s.io/ingress-nginx/test/e2e/framework"
|
||||
)
|
||||
|
||||
const waitForMetrics = 2 * time.Second
|
||||
|
||||
var _ = framework.IngressNginxDescribe("[metrics] exported prometheus metrics", func() {
|
||||
f := framework.NewDefaultFramework("metrics")
|
||||
host := "foo.com"
|
||||
|
||||
ginkgo.BeforeEach(func() {
|
||||
f.NewEchoDeployment()
|
||||
f.EnsureIngress(framework.NewSingleIngress(host, "/", host, f.Namespace, framework.EchoService, 80, nil))
|
||||
f.WaitForNginxServer(host,
|
||||
func(server string) bool {
|
||||
return strings.Contains(server, fmt.Sprintf("server_name %s ;", host)) &&
|
||||
strings.Contains(server, "proxy_pass http://upstream_balancer;")
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.It("exclude socket request metrics are absent", func() {
|
||||
err := f.UpdateIngressControllerDeployment(func(deployment *appsv1.Deployment) error {
|
||||
args := deployment.Spec.Template.Spec.Containers[0].Args
|
||||
args = append(args, "--exclude-socket-metrics=nginx_ingress_controller_request_size,nginx_ingress_controller_header_duration_seconds")
|
||||
deployment.Spec.Template.Spec.Containers[0].Args = args
|
||||
_, err := f.KubeClientSet.AppsV1().Deployments(f.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
|
||||
return err
|
||||
})
|
||||
assert.Nil(ginkgo.GinkgoT(), err, "updating deployment")
|
||||
|
||||
f.HTTPTestClient().
|
||||
GET("/").
|
||||
WithHeader("Host", host).
|
||||
Expect().
|
||||
Status(http.StatusOK)
|
||||
time.Sleep(waitForMetrics)
|
||||
|
||||
ip := f.GetNginxPodIP()
|
||||
mf, err := f.GetMetric("nginx_ingress_controller_request_size", ip)
|
||||
assert.ErrorContains(ginkgo.GinkgoT(), err, "nginx_ingress_controller_request_size")
|
||||
assert.Nil(ginkgo.GinkgoT(), mf)
|
||||
})
|
||||
ginkgo.It("exclude socket request metrics are present", func() {
|
||||
err := f.UpdateIngressControllerDeployment(func(deployment *appsv1.Deployment) error {
|
||||
args := deployment.Spec.Template.Spec.Containers[0].Args
|
||||
args = append(args, "--exclude-socket-metrics=non_existing_metric_does_not_affect_existing_metrics")
|
||||
deployment.Spec.Template.Spec.Containers[0].Args = args
|
||||
_, err := f.KubeClientSet.AppsV1().Deployments(f.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
|
||||
return err
|
||||
})
|
||||
assert.Nil(ginkgo.GinkgoT(), err, "updating deployment")
|
||||
|
||||
f.HTTPTestClient().
|
||||
GET("/").
|
||||
WithHeader("Host", host).
|
||||
Expect().
|
||||
Status(http.StatusOK)
|
||||
time.Sleep(waitForMetrics)
|
||||
|
||||
ip := f.GetNginxPodIP()
|
||||
mf, err := f.GetMetric("nginx_ingress_controller_request_size", ip)
|
||||
assert.Nil(ginkgo.GinkgoT(), err)
|
||||
assert.NotNil(ginkgo.GinkgoT(), mf)
|
||||
})
|
||||
})
|
Loading…
Reference in a new issue