Merge pull request #21 from bprashanth/default_backend_retry

Refactor default backend handling and add better events
2016-12-07 11:26:07 -08:00 · 2016-12-07 11:26:07 -08:00 · cd07b0b06d
commit cd07b0b06d
parent ac6930b2e7 8c4d951df3
9 changed files with 110 additions and 70 deletions
--- a/controllers/gce/Makefile
+++ b/controllers/gce/Makefile
@ -1,7 +1,7 @@
 all: push
 # 0.0 shouldn't clobber any released builds
-TAG = 0.8.0
+TAG = 0.8.1
 PREFIX = gcr.io/google_containers/glbc
 server:
--- a/controllers/gce/README.md
+++ b/controllers/gce/README.md
@ -327,7 +327,7 @@ So simply delete the replication controller:
 $ kubectl get rc glbc
 CONTROLLER   CONTAINER(S)           IMAGE(S)                                      SELECTOR                    REPLICAS   AGE
 glbc         default-http-backend   gcr.io/google_containers/defaultbackend:1.0   k8s-app=glbc,version=v0.5   1          2m
-             l7-lb-controller       gcr.io/google_containers/glbc:0.8.0
+             l7-lb-controller       gcr.io/google_containers/glbc:0.8.1
 $ kubectl delete rc glbc
 replicationcontroller "glbc" deleted
--- a/controllers/gce/controller/utils.go
+++ b/controllers/gce/controller/utils.go
@ -242,7 +242,7 @@ func (t *GCETranslator) toURLMap(ing *extensions.Ingress) (utils.GCEURLMap, erro
 				// to all other services under the assumption that the user will
 				// modify nodeport.
 				if _, ok := err.(errorNodePortNotFound); ok {
-					glog.Infof("%v", err)
+					t.recorder.Eventf(ing, api.EventTypeWarning, "Service", err.(errorNodePortNotFound).Error())
 					continue
 				}
@ -269,6 +269,10 @@ func (t *GCETranslator) toURLMap(ing *extensions.Ingress) (utils.GCEURLMap, erro
 	}
 	defaultBackend, _ := t.toGCEBackend(ing.Spec.Backend, ing.Namespace)
 	hostPathBackend.PutDefaultBackend(defaultBackend)
 	if defaultBackend != nil && ing.Spec.Backend != nil {
 		t.recorder.Eventf(ing, api.EventTypeNormal, "GCE", fmt.Sprintf("default backend set to %v:%v", ing.Spec.Backend.ServiceName, defaultBackend.Port))
 	}
 	return hostPathBackend, nil
 }
@ -461,40 +465,66 @@ func (t *GCETranslator) HealthCheck(port int64) (*compute.HttpHealthCheck, error
 	if err != nil {
 		return nil, err
 	}
 	var ingresses []extensions.Ingress
 	var healthCheck *compute.HttpHealthCheck
 	// Find the label and target port of the one service with the given nodePort
 	for _, s := range sl {
 		for _, p := range s.Spec.Ports {
-			if int32(port) == p.NodePort {
+
-				rp, err := t.getHTTPProbe(*s, p.TargetPort)
+			// only one Service can match this nodePort, try and look up
-				if err != nil {
+			// the readiness probe of the pods behind it
-					return nil, err
+			if int32(port) != p.NodePort {
-				}
+				continue
 				if rp == nil {
 					glog.Infof("No pod in service %v with node port %v has declared a matching readiness probe for health checks.", s.Name, port)
 					break
 				}
 				healthPath := rp.Handler.HTTPGet.Path
 				// GCE requires a leading "/" for health check urls.
 				if string(healthPath[0]) != "/" {
 					healthPath = fmt.Sprintf("/%v", healthPath)
 				}
 				host := rp.Handler.HTTPGet.Host
 				glog.Infof("Found custom health check for Service %v nodeport %v: %v%v", s.Name, port, host, healthPath)
 				return &compute.HttpHealthCheck{
 					Port:               port,
 					RequestPath:        healthPath,
 					Host:               host,
 					Description:        "kubernetes L7 health check from readiness probe.",
 					CheckIntervalSec:   int64(rp.PeriodSeconds),
 					TimeoutSec:         int64(rp.TimeoutSeconds),
 					HealthyThreshold:   int64(rp.SuccessThreshold),
 					UnhealthyThreshold: int64(rp.FailureThreshold),
 					// TODO: include headers after updating compute godep.
 				}, nil
 			}
 			rp, err := t.getHTTPProbe(*s, p.TargetPort)
 			if err != nil {
 				return nil, err
 			}
 			if rp == nil {
 				glog.Infof("No pod in service %v with node port %v has declared a matching readiness probe for health checks.", s.Name, port)
 				break
 			}
 			healthPath := rp.Handler.HTTPGet.Path
 			// GCE requires a leading "/" for health check urls.
 			if string(healthPath[0]) != "/" {
 				healthPath = fmt.Sprintf("/%v", healthPath)
 			}
 			host := rp.Handler.HTTPGet.Host
 			glog.Infof("Found custom health check for Service %v nodeport %v: %v%v", s.Name, port, host, healthPath)
 			// remember the ingresses that use this Service so we can send
 			// the right events
 			ingresses, err = t.ingLister.GetServiceIngress(s)
 			if err != nil {
 				glog.Warningf("Failed to list ingresses for service %v", s.Name)
 			}
 			healthCheck = &compute.HttpHealthCheck{
 				Port:        port,
 				RequestPath: healthPath,
 				Host:        host,
 				Description: "kubernetes L7 health check from readiness probe.",
 				// set a low health threshold and a high failure threshold.
 				// We're just trying to detect if the node networking is
 				// borked, service level outages will get detected sooner
 				// by kube-proxy.
 				CheckIntervalSec:   int64(rp.PeriodSeconds + utils.DefaultHealthCheckInterval),
 				TimeoutSec:         int64(rp.TimeoutSeconds),
 				HealthyThreshold:   utils.DefaultHealthyThreshold,
 				UnhealthyThreshold: utils.DefaultUnhealthyThreshold,
 				// TODO: include headers after updating compute godep.
 			}
 			break
 		}
 	}
-	return utils.DefaultHealthCheckTemplate(port), nil
+	if healthCheck == nil {
 		healthCheck = utils.DefaultHealthCheckTemplate(port)
 	}
 	for _, ing := range ingresses {
 		t.recorder.Eventf(&ing, api.EventTypeNormal, "GCE", fmt.Sprintf("health check using %v:%v%v", healthCheck.Host, healthCheck.Port, healthCheck.RequestPath))
 	}
 	return healthCheck, nil
 }
 // PodsByCreationTimestamp sorts a list of Pods by creation timestamp, using their names as a tie breaker.
--- a/controllers/gce/instances/instances.go
+++ b/controllers/gce/instances/instances.go
@ -101,9 +101,12 @@ func (i *Instances) DeleteInstanceGroup(name string) error {
 		return err
 	}
 	for _, zone := range zones {
 		glog.Infof("Deleting instance group %v in zone %v", name, zone)
 		if err := i.cloud.DeleteInstanceGroup(name, zone); err != nil {
-			errs = append(errs, err)
+			if !utils.IsHTTPErrorCode(err, http.StatusNotFound) {
 				errs = append(errs, err)
 			}
 		} else {
 			glog.Infof("Deleted instance group %v in zone %v", name, zone)
 		}
 	}
 	if len(errs) == 0 {
--- a/controllers/gce/loadbalancers/loadbalancers.go
+++ b/controllers/gce/loadbalancers/loadbalancers.go
@ -88,19 +88,8 @@ func NewLoadBalancerPool(
 }
 func (l *L7s) create(ri *L7RuntimeInfo) (*L7, error) {
 	// Lazily create a default backend so we don't tax users who don't care
 	// about Ingress by consuming 1 of their 3 GCE BackendServices. This
 	// BackendService is deleted when there are no more Ingresses, either
 	// through Sync or Shutdown.
 	if l.glbcDefaultBackend == nil {
-		err := l.defaultBackendPool.Add(l.defaultBackendNodePort)
+		glog.Warningf("Creating l7 without a default backend")
 		if err != nil {
 			return nil, err
 		}
 		l.glbcDefaultBackend, err = l.defaultBackendPool.Get(l.defaultBackendNodePort)
 		if err != nil {
 			return nil, err
 		}
 	}
 	return &L7{
 		runtimeInfo:        ri,
@ -175,25 +164,25 @@ func (l *L7s) Delete(name string) error {
 func (l *L7s) Sync(lbs []*L7RuntimeInfo) error {
 	glog.V(3).Infof("Creating loadbalancers %+v", lbs)
-	// The default backend is completely managed by the l7 pool.
+	if len(lbs) != 0 {
-	// This includes recreating it if it's deleted, or fixing broken links.
+		// Lazily create a default backend so we don't tax users who don't care
-	if err := l.defaultBackendPool.Add(l.defaultBackendNodePort); err != nil {
+		// about Ingress by consuming 1 of their 3 GCE BackendServices. This
-		return err
+		// BackendService is GC'd when there are no more Ingresses.
 		if err := l.defaultBackendPool.Add(l.defaultBackendNodePort); err != nil {
 			return err
 		}
 		defaultBackend, err := l.defaultBackendPool.Get(l.defaultBackendNodePort)
 		if err != nil {
 			return err
 		}
 		l.glbcDefaultBackend = defaultBackend
 	}
-	// create new loadbalancers, perform an edge hop for existing
+	// create new loadbalancers, validate existing
 	for _, ri := range lbs {
 		if err := l.Add(ri); err != nil {
 			return err
 		}
 	}
 	// Tear down the default backend when there are no more loadbalancers
 	// because the cluster could go down anytime and we'd leak it otherwise.
 	if len(lbs) == 0 {
 		if err := l.defaultBackendPool.Delete(l.defaultBackendNodePort); err != nil {
 			return err
 		}
 		l.glbcDefaultBackend = nil
 	}
 	return nil
 }
@ -215,6 +204,15 @@ func (l *L7s) GC(names []string) error {
 			return err
 		}
 	}
 	// Tear down the default backend when there are no more loadbalancers.
 	// This needs to happen after we've deleted all url-maps that might be
 	// using it.
 	if len(names) == 0 {
 		if err := l.defaultBackendPool.Delete(l.defaultBackendNodePort); err != nil {
 			return err
 		}
 		l.glbcDefaultBackend = nil
 	}
 	return nil
 }
@ -586,7 +584,7 @@ func (l *L7) edgeHop() error {
 		}
 	}
 	if l.runtimeInfo.TLS != nil {
-		glog.V(3).Infof("Edge hopping https for %v", l.Name)
+		glog.V(3).Infof("validating https for %v", l.Name)
 		if err := l.edgeHopHttps(); err != nil {
 			return err
 		}
@ -696,7 +694,7 @@ func (l *L7) UpdateUrlMap(ingressRules utils.GCEURLMap) error {
 	} else {
 		l.um.DefaultService = l.glbcDefaultBackend.SelfLink
 	}
-	glog.V(3).Infof("Updating url map %+v", ingressRules)
+	glog.Infof("Updating url map:\n%+v", ingressRules)
 	// Every update replaces the entire urlmap.
 	// TODO:  when we have multiple loadbalancers point to a single gce url map
--- a/controllers/gce/main.go
+++ b/controllers/gce/main.go
@ -61,7 +61,7 @@ const (
 	alphaNumericChar = "0"
 	// Current docker image version. Only used in debug logging.
-	imageVersion = "glbc:0.8.0"
+	imageVersion = "glbc:0.8.1"
 	// Key used to persist UIDs to configmaps.
 	uidConfigMapName = "ingress-uid"
--- a/controllers/gce/rc.yaml
+++ b/controllers/gce/rc.yaml
@ -24,18 +24,18 @@ metadata:
  name: l7-lb-controller
  labels:
    k8s-app: glbc
-    version: v0.6.2
+    version: v0.8.1
 spec:
  # There should never be more than 1 controller alive simultaneously.
  replicas: 1
  selector:
    k8s-app: glbc
-    version: v0.6.2
+    version: v0.8.1
  template:
    metadata:
      labels:
        k8s-app: glbc
-        version: v0.6.2
+        version: v0.8.1
        name: glbc
    spec:
      terminationGracePeriodSeconds: 600
@ -61,7 +61,7 @@ spec:
          requests:
            cpu: 10m
            memory: 20Mi
-      - image: gcr.io/google_containers/glbc:0.8.0
+      - image: gcr.io/google_containers/glbc:0.8.1
        livenessProbe:
          httpGet:
            path: /healthz
--- a/controllers/gce/storage/pools.go
+++ b/controllers/gce/storage/pools.go
@ -103,7 +103,7 @@ func (c *CloudListingPool) ReplenishPool() {
 	for i := range items {
 		key, err := c.keyGetter(items[i])
 		if err != nil {
-			glog.V(4).Infof("CloudListingPool: %v", err)
+			glog.V(5).Infof("CloudListingPool: %v", err)
 			continue
 		}
 		c.InMemoryPool.Add(key, items[i])
--- a/controllers/gce/utils/utils.go
+++ b/controllers/gce/utils/utils.go
@ -79,6 +79,15 @@ const (
 	// K8sAnnotationPrefix is the prefix used in annotations used to record
 	// debug information in the Ingress annotations.
 	K8sAnnotationPrefix = "ingress.kubernetes.io"
 	// DefaultHealthCheckInterval defines how frequently a probe runs
 	DefaultHealthCheckInterval = 60
 	// DefaultHealthyThreshold defines the threshold of success probes that declare a backend "healthy"
 	DefaultHealthyThreshold = 1
 	// DefaultUnhealthyThreshold defines the threshold of failure probes that declare a backend "unhealthy"
 	DefaultUnhealthyThreshold = 10
 	// DefaultTimeoutSeconds defines the timeout of each probe
 	DefaultTimeoutSeconds = 60
 )
 // Namer handles centralized naming for the cluster.
@ -305,12 +314,12 @@ func DefaultHealthCheckTemplate(port int64) *compute.HttpHealthCheck {
 		RequestPath: "",
 		Description: "Default kubernetes L7 Loadbalancing health check.",
 		// How often to health check.
-		CheckIntervalSec: 1,
+		CheckIntervalSec: DefaultHealthCheckInterval,
 		// How long to wait before claiming failure of a health check.
-		TimeoutSec: 1,
+		TimeoutSec: DefaultTimeoutSeconds,
 		// Number of healthchecks to pass for a vm to be deemed healthy.
-		HealthyThreshold: 1,
+		HealthyThreshold: DefaultHealthyThreshold,
 		// Number of healthchecks to fail before the vm is deemed unhealthy.
-		UnhealthyThreshold: 10,
+		UnhealthyThreshold: DefaultUnhealthyThreshold,
 	}
 }