diff --git a/controllers/gce/Makefile b/controllers/gce/Makefile index c373da02a..bf049960d 100644 --- a/controllers/gce/Makefile +++ b/controllers/gce/Makefile @@ -1,7 +1,7 @@ all: push # 0.0 shouldn't clobber any released builds -TAG = 0.8.0 +TAG = 0.8.1 PREFIX = gcr.io/google_containers/glbc server: diff --git a/controllers/gce/README.md b/controllers/gce/README.md index ade420a0e..600c77b58 100644 --- a/controllers/gce/README.md +++ b/controllers/gce/README.md @@ -327,7 +327,7 @@ So simply delete the replication controller: $ kubectl get rc glbc CONTROLLER CONTAINER(S) IMAGE(S) SELECTOR REPLICAS AGE glbc default-http-backend gcr.io/google_containers/defaultbackend:1.0 k8s-app=glbc,version=v0.5 1 2m - l7-lb-controller gcr.io/google_containers/glbc:0.8.0 + l7-lb-controller gcr.io/google_containers/glbc:0.8.1 $ kubectl delete rc glbc replicationcontroller "glbc" deleted diff --git a/controllers/gce/controller/utils.go b/controllers/gce/controller/utils.go index e373205b3..930d6bffa 100644 --- a/controllers/gce/controller/utils.go +++ b/controllers/gce/controller/utils.go @@ -242,7 +242,7 @@ func (t *GCETranslator) toURLMap(ing *extensions.Ingress) (utils.GCEURLMap, erro // to all other services under the assumption that the user will // modify nodeport. if _, ok := err.(errorNodePortNotFound); ok { - glog.Infof("%v", err) + t.recorder.Eventf(ing, api.EventTypeWarning, "Service", err.(errorNodePortNotFound).Error()) continue } @@ -269,6 +269,10 @@ func (t *GCETranslator) toURLMap(ing *extensions.Ingress) (utils.GCEURLMap, erro } defaultBackend, _ := t.toGCEBackend(ing.Spec.Backend, ing.Namespace) hostPathBackend.PutDefaultBackend(defaultBackend) + + if defaultBackend != nil && ing.Spec.Backend != nil { + t.recorder.Eventf(ing, api.EventTypeNormal, "GCE", fmt.Sprintf("default backend set to %v:%v", ing.Spec.Backend.ServiceName, defaultBackend.Port)) + } return hostPathBackend, nil } @@ -461,40 +465,66 @@ func (t *GCETranslator) HealthCheck(port int64) (*compute.HttpHealthCheck, error if err != nil { return nil, err } + var ingresses []extensions.Ingress + var healthCheck *compute.HttpHealthCheck // Find the label and target port of the one service with the given nodePort for _, s := range sl { for _, p := range s.Spec.Ports { - if int32(port) == p.NodePort { - rp, err := t.getHTTPProbe(*s, p.TargetPort) - if err != nil { - return nil, err - } - if rp == nil { - glog.Infof("No pod in service %v with node port %v has declared a matching readiness probe for health checks.", s.Name, port) - break - } - healthPath := rp.Handler.HTTPGet.Path - // GCE requires a leading "/" for health check urls. - if string(healthPath[0]) != "/" { - healthPath = fmt.Sprintf("/%v", healthPath) - } - host := rp.Handler.HTTPGet.Host - glog.Infof("Found custom health check for Service %v nodeport %v: %v%v", s.Name, port, host, healthPath) - return &compute.HttpHealthCheck{ - Port: port, - RequestPath: healthPath, - Host: host, - Description: "kubernetes L7 health check from readiness probe.", - CheckIntervalSec: int64(rp.PeriodSeconds), - TimeoutSec: int64(rp.TimeoutSeconds), - HealthyThreshold: int64(rp.SuccessThreshold), - UnhealthyThreshold: int64(rp.FailureThreshold), - // TODO: include headers after updating compute godep. - }, nil + + // only one Service can match this nodePort, try and look up + // the readiness probe of the pods behind it + if int32(port) != p.NodePort { + continue } + rp, err := t.getHTTPProbe(*s, p.TargetPort) + if err != nil { + return nil, err + } + if rp == nil { + glog.Infof("No pod in service %v with node port %v has declared a matching readiness probe for health checks.", s.Name, port) + break + } + + healthPath := rp.Handler.HTTPGet.Path + // GCE requires a leading "/" for health check urls. + if string(healthPath[0]) != "/" { + healthPath = fmt.Sprintf("/%v", healthPath) + } + + host := rp.Handler.HTTPGet.Host + glog.Infof("Found custom health check for Service %v nodeport %v: %v%v", s.Name, port, host, healthPath) + // remember the ingresses that use this Service so we can send + // the right events + ingresses, err = t.ingLister.GetServiceIngress(s) + if err != nil { + glog.Warningf("Failed to list ingresses for service %v", s.Name) + } + + healthCheck = &compute.HttpHealthCheck{ + Port: port, + RequestPath: healthPath, + Host: host, + Description: "kubernetes L7 health check from readiness probe.", + // set a low health threshold and a high failure threshold. + // We're just trying to detect if the node networking is + // borked, service level outages will get detected sooner + // by kube-proxy. + CheckIntervalSec: int64(rp.PeriodSeconds + utils.DefaultHealthCheckInterval), + TimeoutSec: int64(rp.TimeoutSeconds), + HealthyThreshold: utils.DefaultHealthyThreshold, + UnhealthyThreshold: utils.DefaultUnhealthyThreshold, + // TODO: include headers after updating compute godep. + } + break } } - return utils.DefaultHealthCheckTemplate(port), nil + if healthCheck == nil { + healthCheck = utils.DefaultHealthCheckTemplate(port) + } + for _, ing := range ingresses { + t.recorder.Eventf(&ing, api.EventTypeNormal, "GCE", fmt.Sprintf("health check using %v:%v%v", healthCheck.Host, healthCheck.Port, healthCheck.RequestPath)) + } + return healthCheck, nil } // PodsByCreationTimestamp sorts a list of Pods by creation timestamp, using their names as a tie breaker. diff --git a/controllers/gce/instances/instances.go b/controllers/gce/instances/instances.go index c8279b3c6..1fa21cba5 100644 --- a/controllers/gce/instances/instances.go +++ b/controllers/gce/instances/instances.go @@ -101,9 +101,12 @@ func (i *Instances) DeleteInstanceGroup(name string) error { return err } for _, zone := range zones { - glog.Infof("Deleting instance group %v in zone %v", name, zone) if err := i.cloud.DeleteInstanceGroup(name, zone); err != nil { - errs = append(errs, err) + if !utils.IsHTTPErrorCode(err, http.StatusNotFound) { + errs = append(errs, err) + } + } else { + glog.Infof("Deleted instance group %v in zone %v", name, zone) } } if len(errs) == 0 { diff --git a/controllers/gce/loadbalancers/loadbalancers.go b/controllers/gce/loadbalancers/loadbalancers.go index 89b9f9b25..9f98b389b 100644 --- a/controllers/gce/loadbalancers/loadbalancers.go +++ b/controllers/gce/loadbalancers/loadbalancers.go @@ -88,19 +88,8 @@ func NewLoadBalancerPool( } func (l *L7s) create(ri *L7RuntimeInfo) (*L7, error) { - // Lazily create a default backend so we don't tax users who don't care - // about Ingress by consuming 1 of their 3 GCE BackendServices. This - // BackendService is deleted when there are no more Ingresses, either - // through Sync or Shutdown. if l.glbcDefaultBackend == nil { - err := l.defaultBackendPool.Add(l.defaultBackendNodePort) - if err != nil { - return nil, err - } - l.glbcDefaultBackend, err = l.defaultBackendPool.Get(l.defaultBackendNodePort) - if err != nil { - return nil, err - } + glog.Warningf("Creating l7 without a default backend") } return &L7{ runtimeInfo: ri, @@ -175,25 +164,25 @@ func (l *L7s) Delete(name string) error { func (l *L7s) Sync(lbs []*L7RuntimeInfo) error { glog.V(3).Infof("Creating loadbalancers %+v", lbs) - // The default backend is completely managed by the l7 pool. - // This includes recreating it if it's deleted, or fixing broken links. - if err := l.defaultBackendPool.Add(l.defaultBackendNodePort); err != nil { - return err + if len(lbs) != 0 { + // Lazily create a default backend so we don't tax users who don't care + // about Ingress by consuming 1 of their 3 GCE BackendServices. This + // BackendService is GC'd when there are no more Ingresses. + if err := l.defaultBackendPool.Add(l.defaultBackendNodePort); err != nil { + return err + } + defaultBackend, err := l.defaultBackendPool.Get(l.defaultBackendNodePort) + if err != nil { + return err + } + l.glbcDefaultBackend = defaultBackend } - // create new loadbalancers, perform an edge hop for existing + // create new loadbalancers, validate existing for _, ri := range lbs { if err := l.Add(ri); err != nil { return err } } - // Tear down the default backend when there are no more loadbalancers - // because the cluster could go down anytime and we'd leak it otherwise. - if len(lbs) == 0 { - if err := l.defaultBackendPool.Delete(l.defaultBackendNodePort); err != nil { - return err - } - l.glbcDefaultBackend = nil - } return nil } @@ -215,6 +204,15 @@ func (l *L7s) GC(names []string) error { return err } } + // Tear down the default backend when there are no more loadbalancers. + // This needs to happen after we've deleted all url-maps that might be + // using it. + if len(names) == 0 { + if err := l.defaultBackendPool.Delete(l.defaultBackendNodePort); err != nil { + return err + } + l.glbcDefaultBackend = nil + } return nil } @@ -586,7 +584,7 @@ func (l *L7) edgeHop() error { } } if l.runtimeInfo.TLS != nil { - glog.V(3).Infof("Edge hopping https for %v", l.Name) + glog.V(3).Infof("validating https for %v", l.Name) if err := l.edgeHopHttps(); err != nil { return err } @@ -696,7 +694,7 @@ func (l *L7) UpdateUrlMap(ingressRules utils.GCEURLMap) error { } else { l.um.DefaultService = l.glbcDefaultBackend.SelfLink } - glog.V(3).Infof("Updating url map %+v", ingressRules) + glog.Infof("Updating url map:\n%+v", ingressRules) // Every update replaces the entire urlmap. // TODO: when we have multiple loadbalancers point to a single gce url map diff --git a/controllers/gce/main.go b/controllers/gce/main.go index 385eef6e6..ff3d34191 100644 --- a/controllers/gce/main.go +++ b/controllers/gce/main.go @@ -61,7 +61,7 @@ const ( alphaNumericChar = "0" // Current docker image version. Only used in debug logging. - imageVersion = "glbc:0.8.0" + imageVersion = "glbc:0.8.1" // Key used to persist UIDs to configmaps. uidConfigMapName = "ingress-uid" diff --git a/controllers/gce/rc.yaml b/controllers/gce/rc.yaml index 19f1d4f9a..ba863b12f 100644 --- a/controllers/gce/rc.yaml +++ b/controllers/gce/rc.yaml @@ -24,18 +24,18 @@ metadata: name: l7-lb-controller labels: k8s-app: glbc - version: v0.6.2 + version: v0.8.1 spec: # There should never be more than 1 controller alive simultaneously. replicas: 1 selector: k8s-app: glbc - version: v0.6.2 + version: v0.8.1 template: metadata: labels: k8s-app: glbc - version: v0.6.2 + version: v0.8.1 name: glbc spec: terminationGracePeriodSeconds: 600 @@ -61,7 +61,7 @@ spec: requests: cpu: 10m memory: 20Mi - - image: gcr.io/google_containers/glbc:0.8.0 + - image: gcr.io/google_containers/glbc:0.8.1 livenessProbe: httpGet: path: /healthz diff --git a/controllers/gce/storage/pools.go b/controllers/gce/storage/pools.go index 5bbce508a..df4d1d885 100644 --- a/controllers/gce/storage/pools.go +++ b/controllers/gce/storage/pools.go @@ -103,7 +103,7 @@ func (c *CloudListingPool) ReplenishPool() { for i := range items { key, err := c.keyGetter(items[i]) if err != nil { - glog.V(4).Infof("CloudListingPool: %v", err) + glog.V(5).Infof("CloudListingPool: %v", err) continue } c.InMemoryPool.Add(key, items[i]) diff --git a/controllers/gce/utils/utils.go b/controllers/gce/utils/utils.go index b822f353e..7e7a21bbb 100644 --- a/controllers/gce/utils/utils.go +++ b/controllers/gce/utils/utils.go @@ -79,6 +79,15 @@ const ( // K8sAnnotationPrefix is the prefix used in annotations used to record // debug information in the Ingress annotations. K8sAnnotationPrefix = "ingress.kubernetes.io" + + // DefaultHealthCheckInterval defines how frequently a probe runs + DefaultHealthCheckInterval = 60 + // DefaultHealthyThreshold defines the threshold of success probes that declare a backend "healthy" + DefaultHealthyThreshold = 1 + // DefaultUnhealthyThreshold defines the threshold of failure probes that declare a backend "unhealthy" + DefaultUnhealthyThreshold = 10 + // DefaultTimeoutSeconds defines the timeout of each probe + DefaultTimeoutSeconds = 60 ) // Namer handles centralized naming for the cluster. @@ -305,12 +314,12 @@ func DefaultHealthCheckTemplate(port int64) *compute.HttpHealthCheck { RequestPath: "", Description: "Default kubernetes L7 Loadbalancing health check.", // How often to health check. - CheckIntervalSec: 1, + CheckIntervalSec: DefaultHealthCheckInterval, // How long to wait before claiming failure of a health check. - TimeoutSec: 1, + TimeoutSec: DefaultTimeoutSeconds, // Number of healthchecks to pass for a vm to be deemed healthy. - HealthyThreshold: 1, + HealthyThreshold: DefaultHealthyThreshold, // Number of healthchecks to fail before the vm is deemed unhealthy. - UnhealthyThreshold: 10, + UnhealthyThreshold: DefaultUnhealthyThreshold, } }