Metrics: Add --metrics-per-undefined-host argument. (#11818)

Signed-off-by: Jon Carl <grounded042@joncarl.com>
This commit is contained in:
Jon Carl 2024-08-26 13:09:11 -06:00 committed by GitHub
parent 93f9f9fbb3
commit 034c3ccad4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 172 additions and 28 deletions

View file

@ -66,7 +66,7 @@ func main() {
mc := metric.NewDummyCollector() mc := metric.NewDummyCollector()
if conf.EnableMetrics { if conf.EnableMetrics {
// TODO: Ingress class is not a part of dataplane anymore // TODO: Ingress class is not a part of dataplane anymore
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics) mc, err = metric.NewCollector(conf.MetricsPerHost, conf.MetricsPerUndefinedHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
if err != nil { if err != nil {
klog.Fatalf("Error creating prometheus collector: %v", err) klog.Fatalf("Error creating prometheus collector: %v", err)
} }

View file

@ -130,7 +130,7 @@ func main() {
mc := metric.NewDummyCollector() mc := metric.NewDummyCollector()
if conf.EnableMetrics { if conf.EnableMetrics {
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics) mc, err = metric.NewCollector(conf.MetricsPerHost, conf.MetricsPerUndefinedHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
if err != nil { if err != nil {
klog.Fatalf("Error creating prometheus collector: %v", err) klog.Fatalf("Error creating prometheus collector: %v", err)
} }

View file

@ -48,6 +48,7 @@ They are set in the container spec of the `ingress-nginx-controller` Deployment
| `--maxmind-license-key` | Maxmind license key to download GeoLite2 Databases. https://blog.maxmind.com/2019/12/significant-changes-to-accessing-and-using-geolite2-databases/ . | | `--maxmind-license-key` | Maxmind license key to download GeoLite2 Databases. https://blog.maxmind.com/2019/12/significant-changes-to-accessing-and-using-geolite2-databases/ . |
| `--maxmind-mirror` | Maxmind mirror url (example: http://geoip.local/databases. | | `--maxmind-mirror` | Maxmind mirror url (example: http://geoip.local/databases. |
| `--metrics-per-host` | Export metrics per-host. (default true) | | `--metrics-per-host` | Export metrics per-host. (default true) |
| `--metrics-per-undefined-host` | Export metrics per-host even if the host is not defined in an ingress. Requires --metrics-per-host to be set to true. (default false) |
| `--monitor-max-batch-size` | Max batch size of NGINX metrics. (default 10000)| | `--monitor-max-batch-size` | Max batch size of NGINX metrics. (default 10000)|
| `--post-shutdown-grace-period` | Additional delay in seconds before controller container exits. (default 10) | | `--post-shutdown-grace-period` | Additional delay in seconds before controller container exits. (default 10) |
| `--profiler-port` | Port to use for expose the ingress controller Go profiler when it is enabled. (default 10245) | | `--profiler-port` | Port to use for expose the ingress controller Go profiler when it is enabled. (default 10245) |

View file

@ -166,7 +166,9 @@ According to the above example, this URL will be http://10.192.0.3:31086
#### Wildcard ingresses #### Wildcard ingresses
- By default request metrics are labeled with the hostname. When you have a wildcard domain ingress, then there will be no metrics for that ingress (to prevent the metrics from exploding in cardinality). To get metrics in this case you need to run the ingress controller with `--metrics-per-host=false` (you will lose labeling by hostname, but still have labeling by ingress). - By default request metrics are labeled with the hostname. When you have a wildcard domain ingress, then there will be no metrics for that ingress (to prevent the metrics from exploding in cardinality). To get metrics in this case you have two options:
- Run the ingress controller with `--metrics-per-host=false`. You will lose labeling by hostname, but still have labeling by ingress.
- Run the ingress controller with `--metrics-per-undefined-host=true --metrics-per-host=true`. You will get labeling by hostname even if the hostname is not explicitly defined on an ingress. Be warned that cardinality could explode due to many hostnames.
### Grafana dashboard using ingress resource ### Grafana dashboard using ingress resource
- If you want to expose the dashboard for grafana using an ingress resource, then you can : - If you want to expose the dashboard for grafana using an ingress resource, then you can :

View file

@ -107,6 +107,7 @@ type Configuration struct {
EnableMetrics bool EnableMetrics bool
MetricsPerHost bool MetricsPerHost bool
MetricsPerUndefinedHost bool
MetricsBuckets *collectors.HistogramBuckets MetricsBuckets *collectors.HistogramBuckets
MetricsBucketFactor float64 MetricsBucketFactor float64
MetricsMaxBuckets uint32 MetricsMaxBuckets uint32

View file

@ -82,6 +82,7 @@ type SocketCollector struct {
hosts sets.Set[string] hosts sets.Set[string]
metricsPerHost bool metricsPerHost bool
metricsPerUndefinedHost bool
reportStatusClasses bool reportStatusClasses bool
} }
@ -99,7 +100,7 @@ var requestTags = []string{
// NewSocketCollector creates a new SocketCollector instance using // NewSocketCollector creates a new SocketCollector instance using
// the ingress watch namespace and class used by the controller // the ingress watch namespace and class used by the controller
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludeMetrics []string) (*SocketCollector, error) { func NewSocketCollector(pod, namespace, class string, metricsPerHost, metricsPerUndefinedHost, reportStatusClasses bool, buckets HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludeMetrics []string) (*SocketCollector, error) {
socket := "/tmp/nginx/prometheus-nginx.socket" socket := "/tmp/nginx/prometheus-nginx.socket"
// unix sockets must be unlink()ed before being used // unix sockets must be unlink()ed before being used
//nolint:errcheck // Ignore unlink error //nolint:errcheck // Ignore unlink error
@ -140,6 +141,7 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
listener: listener, listener: listener,
metricsPerHost: metricsPerHost, metricsPerHost: metricsPerHost,
metricsPerUndefinedHost: metricsPerUndefinedHost,
reportStatusClasses: reportStatusClasses, reportStatusClasses: reportStatusClasses,
connectTime: histogramMetric( connectTime: histogramMetric(
@ -306,8 +308,8 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
for i := range statsBatch { for i := range statsBatch {
stats := &statsBatch[i] stats := &statsBatch[i]
if sc.metricsPerHost && !sc.hosts.Has(stats.Host) { if sc.metricsPerHost && !sc.hosts.Has(stats.Host) && !sc.metricsPerUndefinedHost {
klog.V(3).InfoS("Skipping metric for host not being served", "host", stats.Host) klog.V(3).InfoS("Skipping metric for host not explicitly defined in an ingress", "host", stats.Host)
continue continue
} }

View file

@ -90,6 +90,7 @@ func TestCollector(t *testing.T) {
name string name string
data []string data []string
metrics []string metrics []string
metricsPerUndefinedHost bool
useStatusClasses bool useStatusClasses bool
excludeMetrics []string excludeMetrics []string
wantBefore string wantBefore string
@ -591,13 +592,69 @@ func TestCollector(t *testing.T) {
nginx_ingress_controller_response_duration_seconds_count{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1 nginx_ingress_controller_response_duration_seconds_count{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
`, `,
}, },
{
name: "metrics with a host should not be dropped when the host is not in the hosts slice but metricsPerUndefinedHost is true",
data: []string{`[{
"host":"wildcard.testshop.com",
"status":"200",
"bytesSent":150.0,
"method":"GET",
"path":"/admin",
"requestLength":300.0,
"requestTime":60.0,
"upstreamLatency":1.0,
"upstreamHeaderTime":5.0,
"upstreamName":"test-upstream",
"upstreamIP":"1.1.1.1:8080",
"upstreamResponseTime":200,
"upstreamStatus":"220",
"namespace":"test-app-production",
"ingress":"web-yml",
"service":"test-app",
"canary":""
}]`},
excludeMetrics: []string{"response_duration_seconds2", "test.*", "nginx_ingress_.*", "response_duration_secon"},
metrics: []string{"nginx_ingress_controller_requests"},
metricsPerUndefinedHost: true,
useStatusClasses: true,
wantBefore: `
# HELP nginx_ingress_controller_requests The total number of client requests
# TYPE nginx_ingress_controller_requests counter
nginx_ingress_controller_requests{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="wildcard.testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
`,
},
{
name: "metrics with a host should be dropped when the host is not in the hosts slice",
data: []string{`[{
"host":"wildcard.testshop.com",
"status":"200",
"bytesSent":150.0,
"method":"GET",
"path":"/admin",
"requestLength":300.0,
"requestTime":60.0,
"upstreamLatency":1.0,
"upstreamHeaderTime":5.0,
"upstreamName":"test-upstream",
"upstreamIP":"1.1.1.1:8080",
"upstreamResponseTime":200,
"upstreamStatus":"220",
"namespace":"test-app-production",
"ingress":"web-yml",
"service":"test-app",
"canary":""
}]`},
excludeMetrics: []string{"response_duration_seconds2", "test.*", "nginx_ingress_.*", "response_duration_secon"},
metrics: []string{"nginx_ingress_controller_requests"},
useStatusClasses: true,
},
} }
for _, c := range cases { for _, c := range cases {
t.Run(c.name, func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
registry := prometheus.NewPedanticRegistry() registry := prometheus.NewPedanticRegistry()
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets, bucketFactor, maxBuckets, c.excludeMetrics) sc, err := NewSocketCollector("pod", "default", "ingress", true, c.metricsPerUndefinedHost, c.useStatusClasses, buckets, bucketFactor, maxBuckets, c.excludeMetrics)
if err != nil { if err != nil {
t.Errorf("%v: unexpected error creating new SocketCollector: %v", c.name, err) t.Errorf("%v: unexpected error creating new SocketCollector: %v", c.name, err)
} }

View file

@ -71,7 +71,7 @@ type collector struct {
} }
// NewCollector creates a new metric collector the for ingress controller // NewCollector creates a new metric collector the for ingress controller
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludedSocketMetrics []string) (Collector, error) { func NewCollector(metricsPerHost, metricsPerUndefinedHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludedSocketMetrics []string) (Collector, error) {
podNamespace := os.Getenv("POD_NAMESPACE") podNamespace := os.Getenv("POD_NAMESPACE")
if podNamespace == "" { if podNamespace == "" {
podNamespace = "default" podNamespace = "default"
@ -89,7 +89,7 @@ func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus
return nil, err return nil, err
} }
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets, bucketFactor, maxBuckets, excludedSocketMetrics) s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, metricsPerUndefinedHost, reportStatusClasses, buckets, bucketFactor, maxBuckets, excludedSocketMetrics)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View file

@ -17,6 +17,7 @@ limitations under the License.
package flags package flags
import ( import (
"errors"
"flag" "flag"
"fmt" "fmt"
"net" "net"
@ -177,6 +178,8 @@ Requires the update-status parameter.`)
`Enables the collection of NGINX metrics.`) `Enables the collection of NGINX metrics.`)
metricsPerHost = flags.Bool("metrics-per-host", true, metricsPerHost = flags.Bool("metrics-per-host", true,
`Export metrics per-host.`) `Export metrics per-host.`)
metricsPerUndefinedHost = flags.Bool("metrics-per-undefined-host", false,
`Export metrics per-host even if the host is not defined in an ingress. Requires --metrics-per-host to be set to true.`)
reportStatusClasses = flags.Bool("report-status-classes", false, reportStatusClasses = flags.Bool("report-status-classes", false,
`Use status classes (2xx, 3xx, 4xx and 5xx) instead of status codes in metrics.`) `Use status classes (2xx, 3xx, 4xx and 5xx) instead of status codes in metrics.`)
@ -319,6 +322,10 @@ https://blog.maxmind.com/2019/12/significant-changes-to-accessing-and-using-geol
} }
} }
if *metricsPerUndefinedHost && !*metricsPerHost {
return false, nil, errors.New("--metrics-per-undefined-host=true must be passed with --metrics-per-host=true")
}
if *electionTTL <= 0 { if *electionTTL <= 0 {
*electionTTL = 30 * time.Second *electionTTL = 30 * time.Second
} }
@ -340,6 +347,7 @@ https://blog.maxmind.com/2019/12/significant-changes-to-accessing-and-using-geol
EnableProfiling: *profiling, EnableProfiling: *profiling,
EnableMetrics: *enableMetrics, EnableMetrics: *enableMetrics,
MetricsPerHost: *metricsPerHost, MetricsPerHost: *metricsPerHost,
MetricsPerUndefinedHost: *metricsPerUndefinedHost,
MetricsBuckets: histogramBuckets, MetricsBuckets: histogramBuckets,
MetricsBucketFactor: *bucketFactor, MetricsBucketFactor: *bucketFactor,
MetricsMaxBuckets: *maxBuckets, MetricsMaxBuckets: *maxBuckets,

View file

@ -212,3 +212,29 @@ func TestLeaderElectionTTLParseValueInHours(t *testing.T) {
t.Fatalf("Expected --election-ttl and conf.ElectionTTL as 1h, but found: %v", conf.ElectionTTL) t.Fatalf("Expected --election-ttl and conf.ElectionTTL as 1h, but found: %v", conf.ElectionTTL)
} }
} }
func TestMetricsPerUndefinedHost(t *testing.T) {
ResetForTesting(func() { t.Fatal("Parsing failed") })
oldArgs := os.Args
defer func() { os.Args = oldArgs }()
os.Args = []string{"cmd", "--metrics-per-undefined-host=true"}
_, _, err := ParseFlags()
if err != nil {
t.Fatalf("Expected no error but got: %s", err)
}
}
func TestMetricsPerUndefinedHostWithMetricsPerHostFalse(t *testing.T) {
ResetForTesting(func() { t.Fatal("Parsing failed") })
oldArgs := os.Args
defer func() { os.Args = oldArgs }()
os.Args = []string{"cmd", "--metrics-per-host=false", "--metrics-per-undefined-host=true"}
_, _, err := ParseFlags()
if err == nil {
t.Fatalf("Expected an error parsing flags but none returned")
}
}

View file

@ -36,6 +36,7 @@ const waitForMetrics = 2 * time.Second
var _ = framework.IngressNginxDescribe("[metrics] exported prometheus metrics", func() { var _ = framework.IngressNginxDescribe("[metrics] exported prometheus metrics", func() {
f := framework.NewDefaultFramework("metrics") f := framework.NewDefaultFramework("metrics")
host := "foo.com" host := "foo.com"
wildcardHost := "wildcard." + host
ginkgo.BeforeEach(func() { ginkgo.BeforeEach(func() {
f.NewEchoDeployment() f.NewEchoDeployment()
@ -91,4 +92,50 @@ var _ = framework.IngressNginxDescribe("[metrics] exported prometheus metrics",
assert.Nil(ginkgo.GinkgoT(), err) assert.Nil(ginkgo.GinkgoT(), err)
assert.NotNil(ginkgo.GinkgoT(), mf) assert.NotNil(ginkgo.GinkgoT(), mf)
}) })
ginkgo.It("request metrics per undefined host are present when flag is set", func() {
err := f.UpdateIngressControllerDeployment(func(deployment *appsv1.Deployment) error {
args := deployment.Spec.Template.Spec.Containers[0].Args
args = append(args, "--metrics-per-undefined-host=true")
deployment.Spec.Template.Spec.Containers[0].Args = args
_, err := f.KubeClientSet.AppsV1().Deployments(f.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
return err
})
assert.Nil(ginkgo.GinkgoT(), err, "updating deployment")
f.HTTPTestClient().
GET("/").
WithHeader("Host", wildcardHost).
Expect().
Status(http.StatusNotFound)
time.Sleep(waitForMetrics)
ip := f.GetNginxPodIP()
reqMetrics, err := f.GetMetric("nginx_ingress_controller_requests", ip)
assert.Nil(ginkgo.GinkgoT(), err)
assert.NotNil(ginkgo.GinkgoT(), reqMetrics.Metric)
assert.Len(ginkgo.GinkgoT(), reqMetrics.Metric, 1)
containedLabel := false
for _, label := range reqMetrics.Metric[0].Label {
if *label.Name == "host" && *label.Value == wildcardHost {
containedLabel = true
break
}
}
assert.Truef(ginkgo.GinkgoT(), containedLabel, "expected reqMetrics to contain label with \"name\"=\"host\" \"value\"=%q, but it did not: %s", wildcardHost, reqMetrics.String())
})
ginkgo.It("request metrics per undefined host are not present when flag is not set", func() {
f.HTTPTestClient().
GET("/").
WithHeader("Host", wildcardHost).
Expect().
Status(http.StatusNotFound)
time.Sleep(waitForMetrics)
ip := f.GetNginxPodIP()
reqMetrics, err := f.GetMetric("nginx_ingress_controller_requests", ip)
assert.EqualError(ginkgo.GinkgoT(), err, "there is no metric with name nginx_ingress_controller_requests")
assert.Nil(ginkgo.GinkgoT(), reqMetrics)
})
}) })