From 7e984d73707600e7b10cc866092af17daad2678c Mon Sep 17 00:00:00 2001 From: Manuel Alejandro de Brito Fontes Date: Mon, 11 Mar 2019 13:31:38 -0300 Subject: [PATCH] Only the leader updates metrics for SSL certificate expiration --- internal/ingress/controller/controller.go | 6 +++- internal/ingress/controller/nginx.go | 29 +++++++++++++------ .../ingress/metric/collectors/controller.go | 24 +++++++++++---- internal/ingress/metric/main.go | 1 + 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/internal/ingress/controller/controller.go b/internal/ingress/controller/controller.go index 1878a3172..7de36425b 100644 --- a/internal/ingress/controller/controller.go +++ b/internal/ingress/controller/controller.go @@ -190,7 +190,11 @@ func (n *NGINXController) syncIngress(interface{}) error { klog.Infof("Backend successfully reloaded.") n.metricCollector.ConfigSuccess(hash, true) n.metricCollector.IncReloadCount() - n.metricCollector.SetSSLExpireTime(servers) + + if n.isLeader() { + klog.V(2).Infof("Updating ssl expiration metrics.") + n.metricCollector.SetSSLExpireTime(servers) + } } isFirstSync := n.runningConfig.Equal(&ingress.Configuration{}) diff --git a/internal/ingress/controller/nginx.go b/internal/ingress/controller/nginx.go index 3da833635..0a29f3d08 100644 --- a/internal/ingress/controller/nginx.go +++ b/internal/ingress/controller/nginx.go @@ -31,6 +31,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "syscall" "text/template" "time" @@ -255,6 +256,8 @@ type NGINXController struct { fileSystem filesystem.Filesystem metricCollector metric.Collector + + currentLeader uint32 } // Start starts a new NGINX master process running in the foreground. @@ -278,19 +281,15 @@ func (n *NGINXController) Start() { go n.syncStatus.Run(stopCh) } + n.setLeader(true) n.metricCollector.OnStartedLeading(electionID) + // manually update SSL expiration metrics + // (to not wait for a reload) + n.metricCollector.SetSSLExpireTime(n.runningConfig.Servers) }, OnStoppedLeading: func() { + n.setLeader(false) n.metricCollector.OnStoppedLeading(electionID) - - // Remove prometheus metrics related to SSL certificates - srvs := sets.NewString() - for _, s := range n.runningConfig.Servers { - if !srvs.Has(s.Hostname) { - srvs.Insert(s.Hostname) - } - } - n.metricCollector.RemoveMetrics(nil, srvs.List()) }, PodName: n.podInfo.Name, PodNamespace: n.podInfo.Namespace, @@ -1129,3 +1128,15 @@ func buildRedirects(servers []*ingress.Server) []*redirect { return redirectServers } + +func (n *NGINXController) setLeader(leader bool) { + var i uint32 + if leader { + i = 1 + } + atomic.StoreUint32(&n.currentLeader, i) +} + +func (n *NGINXController) isLeader() bool { + return atomic.LoadUint32(&n.currentLeader) != 0 +} diff --git a/internal/ingress/metric/collectors/controller.go b/internal/ingress/metric/collectors/controller.go index 5c281b988..3df99bafd 100644 --- a/internal/ingress/metric/collectors/controller.go +++ b/internal/ingress/metric/collectors/controller.go @@ -116,8 +116,10 @@ func NewController(pod, namespace, class string) *Controller { ), leaderElection: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "leader_election_status", - Help: "Gauge reporting status of the leader election, 0 indicates follower, 1 indicates leader. 'name' is the string used to identify the lease", + Namespace: PrometheusNamespace, + Name: "leader_election_status", + Help: "Gauge reporting status of the leader election, 0 indicates follower, 1 indicates leader. 'name' is the string used to identify the lease", + ConstLabels: constLabels, }, []string{"name"}, ), @@ -138,12 +140,12 @@ func (cm *Controller) IncReloadErrorCount() { // OnStartedLeading indicates the pod was elected as the leader func (cm *Controller) OnStartedLeading(electionID string) { - cm.leaderElection.WithLabelValues(electionID).Set(0) + cm.leaderElection.WithLabelValues(electionID).Set(1.0) } // OnStoppedLeading indicates the pod stopped being the leader func (cm *Controller) OnStoppedLeading(electionID string) { - cm.leaderElection.WithLabelValues(electionID).Set(1.0) + cm.leaderElection.WithLabelValues(electionID).Set(0) } // ConfigSuccess set a boolean flag according to the output of the controller configuration reload @@ -169,6 +171,7 @@ func (cm Controller) Describe(ch chan<- *prometheus.Desc) { cm.reloadOperation.Describe(ch) cm.reloadOperationErrors.Describe(ch) cm.sslExpireTime.Describe(ch) + cm.leaderElection.Describe(ch) } // Collect implements the prometheus.Collector interface. @@ -179,6 +182,7 @@ func (cm Controller) Collect(ch chan<- prometheus.Metric) { cm.reloadOperation.Collect(ch) cm.reloadOperationErrors.Collect(ch) cm.sslExpireTime.Collect(ch) + cm.leaderElection.Collect(ch) } // SetSSLExpireTime sets the expiration time of SSL Certificates @@ -198,13 +202,21 @@ func (cm *Controller) SetSSLExpireTime(servers []*ingress.Server) { // RemoveMetrics removes metrics for hostnames not available anymore func (cm *Controller) RemoveMetrics(hosts []string, registry prometheus.Gatherer) { + cm.removeSSLExpireMetrics(true, hosts, registry) +} + +// RemoveAllSSLExpireMetrics removes metrics for expiration of SSL Certificates +func (cm *Controller) RemoveAllSSLExpireMetrics(registry prometheus.Gatherer) { + cm.removeSSLExpireMetrics(false, []string{}, registry) +} + +func (cm *Controller) removeSSLExpireMetrics(onlyDefinedHosts bool, hosts []string, registry prometheus.Gatherer) { mfs, err := registry.Gather() if err != nil { klog.Errorf("Error gathering metrics: %v", err) return } - klog.V(2).Infof("removing SSL certificate metrics for %v hosts", hosts) toRemove := sets.NewString(hosts...) for _, mf := range mfs { @@ -227,7 +239,7 @@ func (cm *Controller) RemoveMetrics(hosts []string, registry prometheus.Gatherer continue } - if !toRemove.Has(host) { + if onlyDefinedHosts && !toRemove.Has(host) { continue } diff --git a/internal/ingress/metric/main.go b/internal/ingress/metric/main.go index d604b7fb0..8039c2d74 100644 --- a/internal/ingress/metric/main.go +++ b/internal/ingress/metric/main.go @@ -159,4 +159,5 @@ func (c *collector) OnStartedLeading(electionID string) { // OnStoppedLeading indicates the pod stopped being the leader func (c *collector) OnStoppedLeading(electionID string) { c.ingressController.OnStoppedLeading(electionID) + c.ingressController.RemoveAllSSLExpireMetrics(c.registry) }