From 892786b7a2ec1bd4efcc6a04f15283848eeba778 Mon Sep 17 00:00:00 2001 From: David Shay Date: Mon, 11 Apr 2022 14:42:06 -0400 Subject: [PATCH] Fix for buggy ingress sync with retries (#8325) --- cmd/nginx/flags.go | 63 ++++++++++++----------- internal/ingress/controller/controller.go | 15 ++++-- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/cmd/nginx/flags.go b/cmd/nginx/flags.go index 302930551..337a1f827 100644 --- a/cmd/nginx/flags.go +++ b/cmd/nginx/flags.go @@ -65,7 +65,7 @@ The parameter --controller-class has precedence over this.`) ingressClassController = flags.String("controller-class", ingressclass.DefaultControllerName, `Ingress Class Controller value this Ingress satisfies. -The class of an Ingress object is set using the field IngressClassName in Kubernetes clusters version v1.19.0 or higher. The .spec.controller value of the IngressClass +The class of an Ingress object is set using the field IngressClassName in Kubernetes clusters version v1.19.0 or higher. The .spec.controller value of the IngressClass referenced in an Ingress Object should be the same value specified here to make this object be watched.`) watchWithoutClass = flags.Bool("watch-ingress-without-class", false, @@ -203,6 +203,8 @@ Takes the form ":port". If not provided, no admission controller is starte postShutdownGracePeriod = flags.Int("post-shutdown-grace-period", 10, "Seconds to wait after the nginx process has stopped before controller exits.") deepInspector = flags.Bool("deep-inspect", true, "Enables ingress object security deep inspector") + + dynamicConfigurationRetries = flags.Int("dynamic-configuration-retries", 15, "Number of times to retry failed dynamic configuration before failing to sync an ingress.") ) flags.StringVar(&nginx.MaxmindMirror, "maxmind-mirror", "", `Maxmind mirror url (example: http://geoip.local/databases`) @@ -303,35 +305,36 @@ https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-g ngx_config.EnableSSLChainCompletion = *enableSSLChainCompletion config := &controller.Configuration{ - APIServerHost: *apiserverHost, - KubeConfigFile: *kubeConfigFile, - UpdateStatus: *updateStatus, - ElectionID: *electionID, - EnableProfiling: *profiling, - EnableMetrics: *enableMetrics, - MetricsPerHost: *metricsPerHost, - MetricsBuckets: histogramBuckets, - MonitorMaxBatchSize: *monitorMaxBatchSize, - DisableServiceExternalName: *disableServiceExternalName, - EnableSSLPassthrough: *enableSSLPassthrough, - ResyncPeriod: *resyncPeriod, - DefaultService: *defaultSvc, - Namespace: *watchNamespace, - WatchNamespaceSelector: namespaceSelector, - ConfigMapName: *configMap, - TCPConfigMapName: *tcpConfigMapName, - UDPConfigMapName: *udpConfigMapName, - DisableFullValidationTest: *disableFullValidationTest, - DefaultSSLCertificate: *defSSLCertificate, - DeepInspector: *deepInspector, - PublishService: *publishSvc, - PublishStatusAddress: *publishStatusAddress, - UpdateStatusOnShutdown: *updateStatusOnShutdown, - ShutdownGracePeriod: *shutdownGracePeriod, - PostShutdownGracePeriod: *postShutdownGracePeriod, - UseNodeInternalIP: *useNodeInternalIP, - SyncRateLimit: *syncRateLimit, - HealthCheckHost: *healthzHost, + APIServerHost: *apiserverHost, + KubeConfigFile: *kubeConfigFile, + UpdateStatus: *updateStatus, + ElectionID: *electionID, + EnableProfiling: *profiling, + EnableMetrics: *enableMetrics, + MetricsPerHost: *metricsPerHost, + MetricsBuckets: histogramBuckets, + MonitorMaxBatchSize: *monitorMaxBatchSize, + DisableServiceExternalName: *disableServiceExternalName, + EnableSSLPassthrough: *enableSSLPassthrough, + ResyncPeriod: *resyncPeriod, + DefaultService: *defaultSvc, + Namespace: *watchNamespace, + WatchNamespaceSelector: namespaceSelector, + ConfigMapName: *configMap, + TCPConfigMapName: *tcpConfigMapName, + UDPConfigMapName: *udpConfigMapName, + DisableFullValidationTest: *disableFullValidationTest, + DefaultSSLCertificate: *defSSLCertificate, + DeepInspector: *deepInspector, + PublishService: *publishSvc, + PublishStatusAddress: *publishStatusAddress, + UpdateStatusOnShutdown: *updateStatusOnShutdown, + ShutdownGracePeriod: *shutdownGracePeriod, + PostShutdownGracePeriod: *postShutdownGracePeriod, + UseNodeInternalIP: *useNodeInternalIP, + SyncRateLimit: *syncRateLimit, + HealthCheckHost: *healthzHost, + DynamicConfigurationRetries: *dynamicConfigurationRetries, ListenPorts: &ngx_config.ListenPorts{ Default: *defServerPort, Health: *healthzPort, diff --git a/internal/ingress/controller/controller.go b/internal/ingress/controller/controller.go index 778dfa03d..f43c72d92 100644 --- a/internal/ingress/controller/controller.go +++ b/internal/ingress/controller/controller.go @@ -125,6 +125,8 @@ type Configuration struct { InternalLoggerAddress string IsChroot bool DeepInspector bool + + DynamicConfigurationRetries int } // GetPublishService returns the Service used to set the load-balancer status of Ingresses. @@ -194,19 +196,24 @@ func (n *NGINXController) syncIngress(interface{}) error { } retry := wait.Backoff{ - Steps: 15, - Duration: 1 * time.Second, - Factor: 0.8, + Steps: 1 + n.cfg.DynamicConfigurationRetries, + Duration: time.Second, + Factor: 1.3, Jitter: 0.1, } + retriesRemaining := retry.Steps err := wait.ExponentialBackoff(retry, func() (bool, error) { err := n.configureDynamically(pcfg) if err == nil { klog.V(2).Infof("Dynamic reconfiguration succeeded.") return true, nil } - + retriesRemaining-- + if retriesRemaining > 0 { + klog.Warningf("Dynamic reconfiguration failed (retrying; %d retries left): %v", retriesRemaining, err) + return false, nil + } klog.Warningf("Dynamic reconfiguration failed: %v", err) return false, err })