Metrics: Add --metrics-per-undefined-host
argument. (#11818)
Signed-off-by: Jon Carl <grounded042@joncarl.com>
This commit is contained in:
parent
93f9f9fbb3
commit
034c3ccad4
11 changed files with 172 additions and 28 deletions
|
@ -66,7 +66,7 @@ func main() {
|
|||
mc := metric.NewDummyCollector()
|
||||
if conf.EnableMetrics {
|
||||
// TODO: Ingress class is not a part of dataplane anymore
|
||||
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
|
||||
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.MetricsPerUndefinedHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
|
||||
if err != nil {
|
||||
klog.Fatalf("Error creating prometheus collector: %v", err)
|
||||
}
|
||||
|
|
|
@ -130,7 +130,7 @@ func main() {
|
|||
|
||||
mc := metric.NewDummyCollector()
|
||||
if conf.EnableMetrics {
|
||||
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
|
||||
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.MetricsPerUndefinedHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
|
||||
if err != nil {
|
||||
klog.Fatalf("Error creating prometheus collector: %v", err)
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ They are set in the container spec of the `ingress-nginx-controller` Deployment
|
|||
| `--maxmind-license-key` | Maxmind license key to download GeoLite2 Databases. https://blog.maxmind.com/2019/12/significant-changes-to-accessing-and-using-geolite2-databases/ . |
|
||||
| `--maxmind-mirror` | Maxmind mirror url (example: http://geoip.local/databases. |
|
||||
| `--metrics-per-host` | Export metrics per-host. (default true) |
|
||||
| `--metrics-per-undefined-host` | Export metrics per-host even if the host is not defined in an ingress. Requires --metrics-per-host to be set to true. (default false) |
|
||||
| `--monitor-max-batch-size` | Max batch size of NGINX metrics. (default 10000)|
|
||||
| `--post-shutdown-grace-period` | Additional delay in seconds before controller container exits. (default 10) |
|
||||
| `--profiler-port` | Port to use for expose the ingress controller Go profiler when it is enabled. (default 10245) |
|
||||
|
|
|
@ -166,7 +166,9 @@ According to the above example, this URL will be http://10.192.0.3:31086
|
|||
|
||||
#### Wildcard ingresses
|
||||
|
||||
- By default request metrics are labeled with the hostname. When you have a wildcard domain ingress, then there will be no metrics for that ingress (to prevent the metrics from exploding in cardinality). To get metrics in this case you need to run the ingress controller with `--metrics-per-host=false` (you will lose labeling by hostname, but still have labeling by ingress).
|
||||
- By default request metrics are labeled with the hostname. When you have a wildcard domain ingress, then there will be no metrics for that ingress (to prevent the metrics from exploding in cardinality). To get metrics in this case you have two options:
|
||||
- Run the ingress controller with `--metrics-per-host=false`. You will lose labeling by hostname, but still have labeling by ingress.
|
||||
- Run the ingress controller with `--metrics-per-undefined-host=true --metrics-per-host=true`. You will get labeling by hostname even if the hostname is not explicitly defined on an ingress. Be warned that cardinality could explode due to many hostnames.
|
||||
|
||||
### Grafana dashboard using ingress resource
|
||||
- If you want to expose the dashboard for grafana using an ingress resource, then you can :
|
||||
|
|
|
@ -105,13 +105,14 @@ type Configuration struct {
|
|||
|
||||
EnableProfiling bool
|
||||
|
||||
EnableMetrics bool
|
||||
MetricsPerHost bool
|
||||
MetricsBuckets *collectors.HistogramBuckets
|
||||
MetricsBucketFactor float64
|
||||
MetricsMaxBuckets uint32
|
||||
ReportStatusClasses bool
|
||||
ExcludeSocketMetrics []string
|
||||
EnableMetrics bool
|
||||
MetricsPerHost bool
|
||||
MetricsPerUndefinedHost bool
|
||||
MetricsBuckets *collectors.HistogramBuckets
|
||||
MetricsBucketFactor float64
|
||||
MetricsMaxBuckets uint32
|
||||
ReportStatusClasses bool
|
||||
ExcludeSocketMetrics []string
|
||||
|
||||
FakeCertificate *ingress.SSLCert
|
||||
|
||||
|
|
|
@ -81,8 +81,9 @@ type SocketCollector struct {
|
|||
|
||||
hosts sets.Set[string]
|
||||
|
||||
metricsPerHost bool
|
||||
reportStatusClasses bool
|
||||
metricsPerHost bool
|
||||
metricsPerUndefinedHost bool
|
||||
reportStatusClasses bool
|
||||
}
|
||||
|
||||
var requestTags = []string{
|
||||
|
@ -99,7 +100,7 @@ var requestTags = []string{
|
|||
|
||||
// NewSocketCollector creates a new SocketCollector instance using
|
||||
// the ingress watch namespace and class used by the controller
|
||||
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludeMetrics []string) (*SocketCollector, error) {
|
||||
func NewSocketCollector(pod, namespace, class string, metricsPerHost, metricsPerUndefinedHost, reportStatusClasses bool, buckets HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludeMetrics []string) (*SocketCollector, error) {
|
||||
socket := "/tmp/nginx/prometheus-nginx.socket"
|
||||
// unix sockets must be unlink()ed before being used
|
||||
//nolint:errcheck // Ignore unlink error
|
||||
|
@ -139,8 +140,9 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
|
|||
sc := &SocketCollector{
|
||||
listener: listener,
|
||||
|
||||
metricsPerHost: metricsPerHost,
|
||||
reportStatusClasses: reportStatusClasses,
|
||||
metricsPerHost: metricsPerHost,
|
||||
metricsPerUndefinedHost: metricsPerUndefinedHost,
|
||||
reportStatusClasses: reportStatusClasses,
|
||||
|
||||
connectTime: histogramMetric(
|
||||
&prometheus.HistogramOpts{
|
||||
|
@ -306,8 +308,8 @@ func (sc *SocketCollector) handleMessage(msg []byte) {
|
|||
|
||||
for i := range statsBatch {
|
||||
stats := &statsBatch[i]
|
||||
if sc.metricsPerHost && !sc.hosts.Has(stats.Host) {
|
||||
klog.V(3).InfoS("Skipping metric for host not being served", "host", stats.Host)
|
||||
if sc.metricsPerHost && !sc.hosts.Has(stats.Host) && !sc.metricsPerUndefinedHost {
|
||||
klog.V(3).InfoS("Skipping metric for host not explicitly defined in an ingress", "host", stats.Host)
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
|
@ -87,14 +87,15 @@ func TestCollector(t *testing.T) {
|
|||
maxBuckets := uint32(100)
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
data []string
|
||||
metrics []string
|
||||
useStatusClasses bool
|
||||
excludeMetrics []string
|
||||
wantBefore string
|
||||
removeIngresses []string
|
||||
wantAfter string
|
||||
name string
|
||||
data []string
|
||||
metrics []string
|
||||
metricsPerUndefinedHost bool
|
||||
useStatusClasses bool
|
||||
excludeMetrics []string
|
||||
wantBefore string
|
||||
removeIngresses []string
|
||||
wantAfter string
|
||||
}{
|
||||
{
|
||||
name: "invalid metric object should not increase prometheus metrics",
|
||||
|
@ -591,13 +592,69 @@ func TestCollector(t *testing.T) {
|
|||
nginx_ingress_controller_response_duration_seconds_count{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "metrics with a host should not be dropped when the host is not in the hosts slice but metricsPerUndefinedHost is true",
|
||||
data: []string{`[{
|
||||
"host":"wildcard.testshop.com",
|
||||
"status":"200",
|
||||
"bytesSent":150.0,
|
||||
"method":"GET",
|
||||
"path":"/admin",
|
||||
"requestLength":300.0,
|
||||
"requestTime":60.0,
|
||||
"upstreamLatency":1.0,
|
||||
"upstreamHeaderTime":5.0,
|
||||
"upstreamName":"test-upstream",
|
||||
"upstreamIP":"1.1.1.1:8080",
|
||||
"upstreamResponseTime":200,
|
||||
"upstreamStatus":"220",
|
||||
"namespace":"test-app-production",
|
||||
"ingress":"web-yml",
|
||||
"service":"test-app",
|
||||
"canary":""
|
||||
}]`},
|
||||
excludeMetrics: []string{"response_duration_seconds2", "test.*", "nginx_ingress_.*", "response_duration_secon"},
|
||||
metrics: []string{"nginx_ingress_controller_requests"},
|
||||
metricsPerUndefinedHost: true,
|
||||
useStatusClasses: true,
|
||||
wantBefore: `
|
||||
# HELP nginx_ingress_controller_requests The total number of client requests
|
||||
# TYPE nginx_ingress_controller_requests counter
|
||||
nginx_ingress_controller_requests{canary="",controller_class="ingress",controller_namespace="default",controller_pod="pod",host="wildcard.testshop.com",ingress="web-yml",method="GET",namespace="test-app-production",path="/admin",service="test-app",status="2xx"} 1
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "metrics with a host should be dropped when the host is not in the hosts slice",
|
||||
data: []string{`[{
|
||||
"host":"wildcard.testshop.com",
|
||||
"status":"200",
|
||||
"bytesSent":150.0,
|
||||
"method":"GET",
|
||||
"path":"/admin",
|
||||
"requestLength":300.0,
|
||||
"requestTime":60.0,
|
||||
"upstreamLatency":1.0,
|
||||
"upstreamHeaderTime":5.0,
|
||||
"upstreamName":"test-upstream",
|
||||
"upstreamIP":"1.1.1.1:8080",
|
||||
"upstreamResponseTime":200,
|
||||
"upstreamStatus":"220",
|
||||
"namespace":"test-app-production",
|
||||
"ingress":"web-yml",
|
||||
"service":"test-app",
|
||||
"canary":""
|
||||
}]`},
|
||||
excludeMetrics: []string{"response_duration_seconds2", "test.*", "nginx_ingress_.*", "response_duration_secon"},
|
||||
metrics: []string{"nginx_ingress_controller_requests"},
|
||||
useStatusClasses: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
registry := prometheus.NewPedanticRegistry()
|
||||
|
||||
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets, bucketFactor, maxBuckets, c.excludeMetrics)
|
||||
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.metricsPerUndefinedHost, c.useStatusClasses, buckets, bucketFactor, maxBuckets, c.excludeMetrics)
|
||||
if err != nil {
|
||||
t.Errorf("%v: unexpected error creating new SocketCollector: %v", c.name, err)
|
||||
}
|
||||
|
|
|
@ -71,7 +71,7 @@ type collector struct {
|
|||
}
|
||||
|
||||
// NewCollector creates a new metric collector the for ingress controller
|
||||
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludedSocketMetrics []string) (Collector, error) {
|
||||
func NewCollector(metricsPerHost, metricsPerUndefinedHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludedSocketMetrics []string) (Collector, error) {
|
||||
podNamespace := os.Getenv("POD_NAMESPACE")
|
||||
if podNamespace == "" {
|
||||
podNamespace = "default"
|
||||
|
@ -89,7 +89,7 @@ func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus
|
|||
return nil, err
|
||||
}
|
||||
|
||||
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets, bucketFactor, maxBuckets, excludedSocketMetrics)
|
||||
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, metricsPerUndefinedHost, reportStatusClasses, buckets, bucketFactor, maxBuckets, excludedSocketMetrics)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
package flags
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net"
|
||||
|
@ -177,6 +178,8 @@ Requires the update-status parameter.`)
|
|||
`Enables the collection of NGINX metrics.`)
|
||||
metricsPerHost = flags.Bool("metrics-per-host", true,
|
||||
`Export metrics per-host.`)
|
||||
metricsPerUndefinedHost = flags.Bool("metrics-per-undefined-host", false,
|
||||
`Export metrics per-host even if the host is not defined in an ingress. Requires --metrics-per-host to be set to true.`)
|
||||
reportStatusClasses = flags.Bool("report-status-classes", false,
|
||||
`Use status classes (2xx, 3xx, 4xx and 5xx) instead of status codes in metrics.`)
|
||||
|
||||
|
@ -319,6 +322,10 @@ https://blog.maxmind.com/2019/12/significant-changes-to-accessing-and-using-geol
|
|||
}
|
||||
}
|
||||
|
||||
if *metricsPerUndefinedHost && !*metricsPerHost {
|
||||
return false, nil, errors.New("--metrics-per-undefined-host=true must be passed with --metrics-per-host=true")
|
||||
}
|
||||
|
||||
if *electionTTL <= 0 {
|
||||
*electionTTL = 30 * time.Second
|
||||
}
|
||||
|
@ -340,6 +347,7 @@ https://blog.maxmind.com/2019/12/significant-changes-to-accessing-and-using-geol
|
|||
EnableProfiling: *profiling,
|
||||
EnableMetrics: *enableMetrics,
|
||||
MetricsPerHost: *metricsPerHost,
|
||||
MetricsPerUndefinedHost: *metricsPerUndefinedHost,
|
||||
MetricsBuckets: histogramBuckets,
|
||||
MetricsBucketFactor: *bucketFactor,
|
||||
MetricsMaxBuckets: *maxBuckets,
|
||||
|
|
|
@ -212,3 +212,29 @@ func TestLeaderElectionTTLParseValueInHours(t *testing.T) {
|
|||
t.Fatalf("Expected --election-ttl and conf.ElectionTTL as 1h, but found: %v", conf.ElectionTTL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsPerUndefinedHost(t *testing.T) {
|
||||
ResetForTesting(func() { t.Fatal("Parsing failed") })
|
||||
|
||||
oldArgs := os.Args
|
||||
defer func() { os.Args = oldArgs }()
|
||||
os.Args = []string{"cmd", "--metrics-per-undefined-host=true"}
|
||||
|
||||
_, _, err := ParseFlags()
|
||||
if err != nil {
|
||||
t.Fatalf("Expected no error but got: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsPerUndefinedHostWithMetricsPerHostFalse(t *testing.T) {
|
||||
ResetForTesting(func() { t.Fatal("Parsing failed") })
|
||||
|
||||
oldArgs := os.Args
|
||||
defer func() { os.Args = oldArgs }()
|
||||
os.Args = []string{"cmd", "--metrics-per-host=false", "--metrics-per-undefined-host=true"}
|
||||
|
||||
_, _, err := ParseFlags()
|
||||
if err == nil {
|
||||
t.Fatalf("Expected an error parsing flags but none returned")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ const waitForMetrics = 2 * time.Second
|
|||
var _ = framework.IngressNginxDescribe("[metrics] exported prometheus metrics", func() {
|
||||
f := framework.NewDefaultFramework("metrics")
|
||||
host := "foo.com"
|
||||
wildcardHost := "wildcard." + host
|
||||
|
||||
ginkgo.BeforeEach(func() {
|
||||
f.NewEchoDeployment()
|
||||
|
@ -91,4 +92,50 @@ var _ = framework.IngressNginxDescribe("[metrics] exported prometheus metrics",
|
|||
assert.Nil(ginkgo.GinkgoT(), err)
|
||||
assert.NotNil(ginkgo.GinkgoT(), mf)
|
||||
})
|
||||
ginkgo.It("request metrics per undefined host are present when flag is set", func() {
|
||||
err := f.UpdateIngressControllerDeployment(func(deployment *appsv1.Deployment) error {
|
||||
args := deployment.Spec.Template.Spec.Containers[0].Args
|
||||
args = append(args, "--metrics-per-undefined-host=true")
|
||||
deployment.Spec.Template.Spec.Containers[0].Args = args
|
||||
_, err := f.KubeClientSet.AppsV1().Deployments(f.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
|
||||
return err
|
||||
})
|
||||
assert.Nil(ginkgo.GinkgoT(), err, "updating deployment")
|
||||
|
||||
f.HTTPTestClient().
|
||||
GET("/").
|
||||
WithHeader("Host", wildcardHost).
|
||||
Expect().
|
||||
Status(http.StatusNotFound)
|
||||
time.Sleep(waitForMetrics)
|
||||
|
||||
ip := f.GetNginxPodIP()
|
||||
reqMetrics, err := f.GetMetric("nginx_ingress_controller_requests", ip)
|
||||
assert.Nil(ginkgo.GinkgoT(), err)
|
||||
assert.NotNil(ginkgo.GinkgoT(), reqMetrics.Metric)
|
||||
assert.Len(ginkgo.GinkgoT(), reqMetrics.Metric, 1)
|
||||
|
||||
containedLabel := false
|
||||
for _, label := range reqMetrics.Metric[0].Label {
|
||||
if *label.Name == "host" && *label.Value == wildcardHost {
|
||||
containedLabel = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
assert.Truef(ginkgo.GinkgoT(), containedLabel, "expected reqMetrics to contain label with \"name\"=\"host\" \"value\"=%q, but it did not: %s", wildcardHost, reqMetrics.String())
|
||||
})
|
||||
ginkgo.It("request metrics per undefined host are not present when flag is not set", func() {
|
||||
f.HTTPTestClient().
|
||||
GET("/").
|
||||
WithHeader("Host", wildcardHost).
|
||||
Expect().
|
||||
Status(http.StatusNotFound)
|
||||
time.Sleep(waitForMetrics)
|
||||
|
||||
ip := f.GetNginxPodIP()
|
||||
reqMetrics, err := f.GetMetric("nginx_ingress_controller_requests", ip)
|
||||
assert.EqualError(ginkgo.GinkgoT(), err, "there is no metric with name nginx_ingress_controller_requests")
|
||||
assert.Nil(ginkgo.GinkgoT(), reqMetrics)
|
||||
})
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue