diff --git a/internal/ingress/controller/controller.go b/internal/ingress/controller/controller.go index 335b2fc20..b47fb384b 100644 --- a/internal/ingress/controller/controller.go +++ b/internal/ingress/controller/controller.go @@ -169,13 +169,15 @@ func (n *NGINXController) syncIngress(interface{}) error { err := n.OnUpdate(pcfg) if err != nil { - incReloadErrorCount() + IncReloadErrorCount() + ConfigSuccess(false) glog.Errorf("unexpected failure restarting the backend: \n%v", err) return err } glog.Infof("ingress backend successfully reloaded...") - incReloadCount() + ConfigSuccess(true) + IncReloadCount() setSSLExpireTime(servers) } diff --git a/internal/ingress/controller/metrics.go b/internal/ingress/controller/metrics.go index 9a311563d..88d662796 100644 --- a/internal/ingress/controller/metrics.go +++ b/internal/ingress/controller/metrics.go @@ -17,6 +17,8 @@ limitations under the License. package controller import ( + "time" + "github.com/prometheus/client_golang/prometheus" "k8s.io/ingress-nginx/internal/ingress" @@ -34,22 +36,42 @@ func init() { prometheus.MustRegister(reloadOperation) prometheus.MustRegister(reloadOperationErrors) prometheus.MustRegister(sslExpireTime) + prometheus.MustRegister(configSuccess) + prometheus.MustRegister(configSuccessTime) } var ( + configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: ns, + Name: "config_last_reload_successfull", + Help: `Whether the last configuration reload attemp was successfull. + Prometheus alert example: + alert: IngressControllerFailedReload + expr: ingress_controller_config_last_reload_successfull == 0 + for: 10m`, + }) + configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: ns, + Name: "config_last_reload_successfull_timestamp_seconds", + Help: "Timestamp of the last successfull configuration reload.", + }) + // TODO depreciate this metrics in favor of ingress_controller_config_last_reload_successfull_timestamp_seconds reloadOperation = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: ns, Name: "success", - Help: "Cumulative number of Ingress controller reload operations", + Help: `DEPRECATED: use ingress_controller_config_last_reload_successfull_timestamp_seconds or ingress_controller_config_last_reload_successfull instead. + Cumulative number of Ingress controller reload operations`, }, []string{operation}, ) + // TODO depreciate this metrics in favor of ingress_controller_config_last_reload_successfull_timestamp_seconds reloadOperationErrors = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: ns, Name: "errors", - Help: "Cumulative number of Ingress controller errors during reload operations", + Help: `DEPRECATED: use ingress_controller_config_last_reload_successfull_timestamp_seconds or ingress_controller_config_last_reload_successfull instead. + Cumulative number of Ingress controller errors during reload operations`, }, []string{operation}, ) @@ -64,14 +86,31 @@ var ( ) ) -func incReloadCount() { +// IncReloadCount increment the reload counter +func IncReloadCount() { reloadOperation.WithLabelValues(reloadLabel).Inc() } -func incReloadErrorCount() { +// IncReloadErrorCount increment the reload error counter +func IncReloadErrorCount() { reloadOperationErrors.WithLabelValues(reloadLabel).Inc() } +// ConfigSuccess set a boolean flag according to the output of the controller configuration reload +func ConfigSuccess(success bool) { + if success { + ConfigSuccessTime() + configSuccess.Set(1) + } else { + configSuccess.Set(0) + } +} + +// ConfigSuccessTime set the current timestamp when the controller is successfully reloaded +func ConfigSuccessTime() { + configSuccessTime.Set(float64(time.Now().Unix())) +} + func setSSLExpireTime(servers []*ingress.Server) { for _, s := range servers { if s.Hostname != defServerName {