From 14bca2420261f5bed0ffdc3992e348faf0006c10 Mon Sep 17 00:00:00 2001 From: Giancarlo Rubio Date: Fri, 18 May 2018 20:27:32 +0200 Subject: [PATCH 1/2] Instrument controller to show failed configuration reload and the last time it was succeeded --- internal/ingress/controller/controller.go | 6 ++-- internal/ingress/controller/metrics.go | 43 ++++++++++++++++++++--- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/internal/ingress/controller/controller.go b/internal/ingress/controller/controller.go index 335b2fc20..b47fb384b 100644 --- a/internal/ingress/controller/controller.go +++ b/internal/ingress/controller/controller.go @@ -169,13 +169,15 @@ func (n *NGINXController) syncIngress(interface{}) error { err := n.OnUpdate(pcfg) if err != nil { - incReloadErrorCount() + IncReloadErrorCount() + ConfigSuccess(false) glog.Errorf("unexpected failure restarting the backend: \n%v", err) return err } glog.Infof("ingress backend successfully reloaded...") - incReloadCount() + ConfigSuccess(true) + IncReloadCount() setSSLExpireTime(servers) } diff --git a/internal/ingress/controller/metrics.go b/internal/ingress/controller/metrics.go index 9a311563d..b427f1a59 100644 --- a/internal/ingress/controller/metrics.go +++ b/internal/ingress/controller/metrics.go @@ -17,6 +17,8 @@ limitations under the License. package controller import ( + "time" + "github.com/prometheus/client_golang/prometheus" "k8s.io/ingress-nginx/internal/ingress" @@ -34,22 +36,42 @@ func init() { prometheus.MustRegister(reloadOperation) prometheus.MustRegister(reloadOperationErrors) prometheus.MustRegister(sslExpireTime) + prometheus.MustRegister(configSuccess) + prometheus.MustRegister(configSuccessTime) } var ( + configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: ns, + Name: "config_last_reload_successfull", + Help: `Whether the last configuration reload attemp was successfull. + Prometheus alert example: + alert: IngressControllerFailedReload + expr: ingress_controller_config_last_reload_successfull == 0 + for: 10m`, + }) + configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: ns, + Name: "config_last_reload_successfull_timestamp_seconds", + Help: "Timestamp of the last successfull configuration reload.", + }) + // TODO depreciate this metrics in favor of ingress_controller_config_last_reload_successfull_timestamp_seconds reloadOperation = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: ns, Name: "success", - Help: "Cumulative number of Ingress controller reload operations", + Help: `DEPRECATED: use ingress_controller_config_last_reload_successfull_timestamp_seconds or ingress_controller_config_last_reload_successfull instead. + Cumulative number of Ingress controller reload operations`, }, []string{operation}, ) + // TODO depreciate this metrics in favor of ingress_controller_config_last_reload_successfull_timestamp_seconds reloadOperationErrors = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: ns, Name: "errors", - Help: "Cumulative number of Ingress controller errors during reload operations", + Help: `DEPRECATED: use ingress_controller_config_last_reload_successfull_timestamp_seconds or ingress_controller_config_last_reload_successfull instead. + Cumulative number of Ingress controller errors during reload operations`, }, []string{operation}, ) @@ -64,14 +86,27 @@ var ( ) ) -func incReloadCount() { +func IncReloadCount() { reloadOperation.WithLabelValues(reloadLabel).Inc() } -func incReloadErrorCount() { +func IncReloadErrorCount() { reloadOperationErrors.WithLabelValues(reloadLabel).Inc() } +func ConfigSuccess(success bool) { + if success { + ConfigSuccessTime() + configSuccess.Set(1) + } else { + configSuccess.Set(0) + } +} + +func ConfigSuccessTime() { + configSuccessTime.Set(float64(time.Now().Unix())) +} + func setSSLExpireTime(servers []*ingress.Server) { for _, s := range servers { if s.Hostname != defServerName { From 67de29f7ab8028b32938be0ff711d9d005c8dd2f Mon Sep 17 00:00:00 2001 From: Giancarlo Rubio Date: Fri, 18 May 2018 21:15:19 +0200 Subject: [PATCH 2/2] Fix lint --- internal/ingress/controller/metrics.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/ingress/controller/metrics.go b/internal/ingress/controller/metrics.go index b427f1a59..88d662796 100644 --- a/internal/ingress/controller/metrics.go +++ b/internal/ingress/controller/metrics.go @@ -86,14 +86,17 @@ var ( ) ) +// IncReloadCount increment the reload counter func IncReloadCount() { reloadOperation.WithLabelValues(reloadLabel).Inc() } +// IncReloadErrorCount increment the reload error counter func IncReloadErrorCount() { reloadOperationErrors.WithLabelValues(reloadLabel).Inc() } +// ConfigSuccess set a boolean flag according to the output of the controller configuration reload func ConfigSuccess(success bool) { if success { ConfigSuccessTime() @@ -103,6 +106,7 @@ func ConfigSuccess(success bool) { } } +// ConfigSuccessTime set the current timestamp when the controller is successfully reloaded func ConfigSuccessTime() { configSuccessTime.Set(float64(time.Now().Unix())) }