From 24d9aada1171d8f2cca163c37fdb797b69002731 Mon Sep 17 00:00:00 2001 From: bprashanth Date: Wed, 8 Feb 2017 13:54:27 -0800 Subject: [PATCH 1/4] Set balancing mode --- controllers/gce/backends/backends.go | 77 +++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/controllers/gce/backends/backends.go b/controllers/gce/backends/backends.go index 18686b968..3f2e9ca35 100644 --- a/controllers/gce/backends/backends.go +++ b/controllers/gce/backends/backends.go @@ -20,6 +20,7 @@ import ( "fmt" "net/http" "strconv" + "strings" "time" "k8s.io/kubernetes/pkg/util/sets" @@ -32,6 +33,41 @@ import ( "k8s.io/ingress/controllers/gce/utils" ) +// BalancingMode represents the loadbalancing configuration of an individual +// Backend in a BackendService. This is *effectively* a cluster wide setting +// since you can't mix modes across Backends pointing to the same IG, and you +// can't have a single node in more than 1 loadbalanced IG. +type BalancingMode string + +const ( + // Rate balances incoming requests based on observed RPS. + // As of this writing, it's the only balancing mode supported by GCE's + // internal LB. This setting doesn't make sense for Kubernets clusters + // because requests can get proxied between instance groups in different + // zones by kube-proxy without GCE even knowing it. Setting equal RPS on + // all IGs should achieve roughly equal distribution of requests. + Rate BalancingMode = "RATE" + // Utilization balances incoming requests based on observed utilization. + // This mode is only useful if you want to divert traffic away from IGs + // running other compute intensive workloads. Utilization statistics are + // aggregated per instances, not per container, and requests can get proxied + // between instance groups in different zones by kube-proxy without GCE even + // knowing about it. + Utilization BalancingMode = "UTILIZATION" + // Connections balances incoming requests based on a connection counter. + // This setting currently doesn't make sense for Kubernetes clusters, + // because we use NodePort Services as HTTP LB backends, so GCE's connection + // counters don't accurately represent connections per container. + Connections BalancingMode = "CONNECTION" +) + +// maxRPS is the RPS setting for all Backends with BalancingMode RATE. The exact +// value doesn't matter, as long as it's the same for all Backends. Requests +// received by GCLB above this RPS are NOT dropped, GCLB continues to distribute +// them across IGs. +// TODO: Should this be math.MaxInt64? +const maxRPS = 1 + // Backends implements BackendPool. type Backends struct { cloud BackendServices @@ -116,20 +152,35 @@ func (b *Backends) create(igs []*compute.InstanceGroup, namedPort *compute.Named if err != nil { return nil, err } - // Create a new backend - backend := &compute.BackendService{ - Name: name, - Protocol: "HTTP", - Backends: getBackendsForIGs(igs), - // Api expects one, means little to kubernetes. - HealthChecks: []string{hc.SelfLink}, - Port: namedPort.Port, - PortName: namedPort.Name, + errs := []string{} + for _, bm := range []BalancingMode{Rate, Utilization} { + backends := getBackendsForIGs(igs) + for _, b := range backends { + switch bm { + case Rate: + b.MaxRate = maxRPS + default: + // TODO: Set utilization and connection limits when we accept them + // as valid fields. + } + b.BalancingMode = string(bm) + } + // Create a new backend + backend := &compute.BackendService{ + Name: name, + Protocol: "HTTP", + Backends: backends, + HealthChecks: []string{hc.SelfLink}, + Port: namedPort.Port, + PortName: namedPort.Name, + } + if err := b.cloud.CreateBackendService(backend); err != nil { + glog.Infof("Error creating backend service with balancing mode %v", bm) + errs = append(errs, fmt.Sprintf("%v", err)) + } + return b.Get(namedPort.Port) } - if err := b.cloud.CreateBackendService(backend); err != nil { - return nil, err - } - return b.Get(namedPort.Port) + return nil, fmt.Errorf("%v", strings.Join(errs, "\n")) } // Add will get or create a Backend for the given port. From bc8b658a5c72b0074fd2eb0b308f414b987ea822 Mon Sep 17 00:00:00 2001 From: bprashanth Date: Wed, 8 Feb 2017 14:38:51 -0800 Subject: [PATCH 2/4] Be more specific about the type of error to retry on --- controllers/gce/backends/backends.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/controllers/gce/backends/backends.go b/controllers/gce/backends/backends.go index 3f2e9ca35..07f0b3fae 100644 --- a/controllers/gce/backends/backends.go +++ b/controllers/gce/backends/backends.go @@ -153,6 +153,12 @@ func (b *Backends) create(igs []*compute.InstanceGroup, namedPort *compute.Named return nil, err } errs := []string{} + // We first try to create the backend with balancingMode=RATE. If this + // fails, it's mostly likely because there are existing backends with + // balancingMode=UTILIZATION. This failure mode throws a googleapi error + // which wraps a HTTP 400 status code. We handle it in the loop below + // and come around to retry with the right balancing mode. The goal is to + // switch everyone to using RATE. for _, bm := range []BalancingMode{Rate, Utilization} { backends := getBackendsForIGs(igs) for _, b := range backends { @@ -175,8 +181,16 @@ func (b *Backends) create(igs []*compute.InstanceGroup, namedPort *compute.Named PortName: namedPort.Name, } if err := b.cloud.CreateBackendService(backend); err != nil { - glog.Infof("Error creating backend service with balancing mode %v", bm) - errs = append(errs, fmt.Sprintf("%v", err)) + // This is probably a failure because we tried to create the backend + // with balancingMode=RATE when there are already backends with + // balancingMode=UTILIZATION. Just ignore it and retry setting + // balancingMode=UTILIZATION (b/35102911). + if utils.IsHTTPErrorCode(err, http.StatusBadRequest) { + glog.Infof("Error creating backend service with balancing mode %v:%v", bm, err) + errs = append(errs, fmt.Sprintf("%v", err)) + continue + } + return nil, err } return b.Get(namedPort.Port) } From 3f618d7dca85fee9eac0de00a1dcaa998719d4f2 Mon Sep 17 00:00:00 2001 From: bprashanth Date: Wed, 8 Feb 2017 17:23:15 -0800 Subject: [PATCH 3/4] Add unittest --- controllers/gce/backends/backends_test.go | 50 +++++++++++++++++-- controllers/gce/backends/fakes.go | 9 +++- controllers/gce/controller/fakes.go | 3 +- .../gce/loadbalancers/loadbalancers_test.go | 2 +- 4 files changed, 57 insertions(+), 7 deletions(-) diff --git a/controllers/gce/backends/backends_test.go b/controllers/gce/backends/backends_test.go index 951cbb1cd..08afd35f6 100644 --- a/controllers/gce/backends/backends_test.go +++ b/controllers/gce/backends/backends_test.go @@ -17,6 +17,7 @@ limitations under the License. package backends import ( + "net/http" "testing" compute "google.golang.org/api/compute/v1" @@ -25,10 +26,14 @@ import ( "k8s.io/ingress/controllers/gce/storage" "k8s.io/ingress/controllers/gce/utils" "k8s.io/kubernetes/pkg/util/sets" + + "google.golang.org/api/googleapi" ) const defaultZone = "zone-a" +var noOpErrFunc = func(op int, be *compute.BackendService) error { return nil } + func newBackendPool(f BackendServices, fakeIGs instances.InstanceGroups, syncWithCloud bool) BackendPool { namer := &utils.Namer{} nodePool := instances.NewNodePool(fakeIGs) @@ -40,7 +45,7 @@ func newBackendPool(f BackendServices, fakeIGs instances.InstanceGroups, syncWit } func TestBackendPoolAdd(t *testing.T) { - f := NewFakeBackendServices() + f := NewFakeBackendServices(noOpErrFunc) fakeIGs := instances.NewFakeInstanceGroups(sets.NewString()) pool := newBackendPool(f, fakeIGs, false) namer := utils.Namer{} @@ -110,7 +115,7 @@ func TestBackendPoolSync(t *testing.T) { // Call sync on a backend pool with a list of ports, make sure the pool // creates/deletes required ports. svcNodePorts := []int64{81, 82, 83} - f := NewFakeBackendServices() + f := NewFakeBackendServices(noOpErrFunc) fakeIGs := instances.NewFakeInstanceGroups(sets.NewString()) pool := newBackendPool(f, fakeIGs, true) pool.Add(81) @@ -174,7 +179,7 @@ func TestBackendPoolSync(t *testing.T) { } func TestBackendPoolShutdown(t *testing.T) { - f := NewFakeBackendServices() + f := NewFakeBackendServices(noOpErrFunc) fakeIGs := instances.NewFakeInstanceGroups(sets.NewString()) pool := newBackendPool(f, fakeIGs, false) namer := utils.Namer{} @@ -187,7 +192,7 @@ func TestBackendPoolShutdown(t *testing.T) { } func TestBackendInstanceGroupClobbering(t *testing.T) { - f := NewFakeBackendServices() + f := NewFakeBackendServices(noOpErrFunc) fakeIGs := instances.NewFakeInstanceGroups(sets.NewString()) pool := newBackendPool(f, fakeIGs, false) namer := utils.Namer{} @@ -230,3 +235,40 @@ func TestBackendInstanceGroupClobbering(t *testing.T) { t.Fatalf("Expected %v Got %v", expectedGroups, gotGroups) } } + +func TestBackendCreateBalancingMode(t *testing.T) { + f := NewFakeBackendServices(noOpErrFunc) + + fakeIGs := instances.NewFakeInstanceGroups(sets.NewString()) + pool := newBackendPool(f, fakeIGs, false) + namer := utils.Namer{} + nodePort := int64(8080) + modes := []BalancingMode{Rate, Utilization} + + // block the creation of Backends with the given balancingMode + // and verify that a backend with the other balancingMode is + // created + for i, bm := range modes { + f.errFunc = func(op int, be *compute.BackendService) error { + for _, b := range be.Backends { + if b.BalancingMode == string(bm) { + return &googleapi.Error{Code: http.StatusBadRequest} + } + } + return nil + } + + pool.Add(nodePort) + be, err := f.GetBackendService(namer.BeName(nodePort)) + if err != nil { + t.Fatalf("%v", err) + } + + for _, b := range be.Backends { + if b.BalancingMode != string(modes[(i+1)%len(modes)]) { + t.Fatalf("Wrong balancing mode, expected %v got %v", modes[(i+1)%len(modes)], b.BalancingMode) + } + } + pool.GC([]int64{}) + } +} diff --git a/controllers/gce/backends/fakes.go b/controllers/gce/backends/fakes.go index a5eb1d006..bb2b031f0 100644 --- a/controllers/gce/backends/fakes.go +++ b/controllers/gce/backends/fakes.go @@ -25,8 +25,9 @@ import ( ) // NewFakeBackendServices creates a new fake backend services manager. -func NewFakeBackendServices() *FakeBackendServices { +func NewFakeBackendServices(ef func(op int, be *compute.BackendService) error) *FakeBackendServices { return &FakeBackendServices{ + errFunc: ef, backendServices: cache.NewStore(func(obj interface{}) (string, error) { svc := obj.(*compute.BackendService) return svc.Name, nil @@ -38,6 +39,7 @@ func NewFakeBackendServices() *FakeBackendServices { type FakeBackendServices struct { backendServices cache.Store calls []int + errFunc func(op int, be *compute.BackendService) error } // GetBackendService fakes getting a backend service from the cloud. @@ -60,6 +62,11 @@ func (f *FakeBackendServices) GetBackendService(name string) (*compute.BackendSe // CreateBackendService fakes backend service creation. func (f *FakeBackendServices) CreateBackendService(be *compute.BackendService) error { + if f.errFunc != nil { + if err := f.errFunc(utils.Create, be); err != nil { + return err + } + } f.calls = append(f.calls, utils.Create) be.SelfLink = be.Name return f.backendServices.Update(be) diff --git a/controllers/gce/controller/fakes.go b/controllers/gce/controller/fakes.go index cfa3ed08f..ae97f0d9c 100644 --- a/controllers/gce/controller/fakes.go +++ b/controllers/gce/controller/fakes.go @@ -20,6 +20,7 @@ import ( "k8s.io/kubernetes/pkg/util/intstr" "k8s.io/kubernetes/pkg/util/sets" + compute "google.golang.org/api/compute/v1" "k8s.io/ingress/controllers/gce/backends" "k8s.io/ingress/controllers/gce/firewalls" "k8s.io/ingress/controllers/gce/healthchecks" @@ -45,7 +46,7 @@ type fakeClusterManager struct { // NewFakeClusterManager creates a new fake ClusterManager. func NewFakeClusterManager(clusterName string) *fakeClusterManager { fakeLbs := loadbalancers.NewFakeLoadBalancers(clusterName) - fakeBackends := backends.NewFakeBackendServices() + fakeBackends := backends.NewFakeBackendServices(func(op int, be *compute.BackendService) error { return nil }) fakeIGs := instances.NewFakeInstanceGroups(sets.NewString()) fakeHCs := healthchecks.NewFakeHealthChecks() namer := utils.NewNamer(clusterName) diff --git a/controllers/gce/loadbalancers/loadbalancers_test.go b/controllers/gce/loadbalancers/loadbalancers_test.go index 4d6fe133b..f1373a933 100644 --- a/controllers/gce/loadbalancers/loadbalancers_test.go +++ b/controllers/gce/loadbalancers/loadbalancers_test.go @@ -34,7 +34,7 @@ const ( ) func newFakeLoadBalancerPool(f LoadBalancers, t *testing.T) LoadBalancerPool { - fakeBackends := backends.NewFakeBackendServices() + fakeBackends := backends.NewFakeBackendServices(func(op int, be *compute.BackendService) error { return nil }) fakeIGs := instances.NewFakeInstanceGroups(sets.NewString()) fakeHCs := healthchecks.NewFakeHealthChecks() namer := &utils.Namer{} From 9b305f19540ea249b7b11ca7e44d0731ab13952e Mon Sep 17 00:00:00 2001 From: bprashanth Date: Wed, 8 Feb 2017 15:14:19 -0800 Subject: [PATCH 4/4] Flip version to 0.9.1 --- controllers/gce/Makefile | 2 +- controllers/gce/README.md | 2 +- controllers/gce/main.go | 2 +- controllers/gce/rc.yaml | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/controllers/gce/Makefile b/controllers/gce/Makefile index 649658af5..10ca7ca6e 100644 --- a/controllers/gce/Makefile +++ b/controllers/gce/Makefile @@ -1,7 +1,7 @@ all: push # 0.0 shouldn't clobber any released builds -TAG = 0.9.0 +TAG = 0.9.1 PREFIX = gcr.io/google_containers/glbc server: diff --git a/controllers/gce/README.md b/controllers/gce/README.md index aa1d6bd8d..84d1706f0 100644 --- a/controllers/gce/README.md +++ b/controllers/gce/README.md @@ -327,7 +327,7 @@ So simply delete the replication controller: $ kubectl get rc glbc CONTROLLER CONTAINER(S) IMAGE(S) SELECTOR REPLICAS AGE glbc default-http-backend gcr.io/google_containers/defaultbackend:1.0 k8s-app=glbc,version=v0.5 1 2m - l7-lb-controller gcr.io/google_containers/glbc:0.9.0 + l7-lb-controller gcr.io/google_containers/glbc:0.9.1 $ kubectl delete rc glbc replicationcontroller "glbc" deleted diff --git a/controllers/gce/main.go b/controllers/gce/main.go index 2cc35751c..0f1f8e981 100644 --- a/controllers/gce/main.go +++ b/controllers/gce/main.go @@ -61,7 +61,7 @@ const ( alphaNumericChar = "0" // Current docker image version. Only used in debug logging. - imageVersion = "glbc:0.9.0" + imageVersion = "glbc:0.9.1" // Key used to persist UIDs to configmaps. uidConfigMapName = "ingress-uid" diff --git a/controllers/gce/rc.yaml b/controllers/gce/rc.yaml index 3023d0207..753733808 100644 --- a/controllers/gce/rc.yaml +++ b/controllers/gce/rc.yaml @@ -24,18 +24,18 @@ metadata: name: l7-lb-controller labels: k8s-app: glbc - version: v0.9.0 + version: v0.9.1 spec: # There should never be more than 1 controller alive simultaneously. replicas: 1 selector: k8s-app: glbc - version: v0.9.0 + version: v0.9.1 template: metadata: labels: k8s-app: glbc - version: v0.9.0 + version: v0.9.1 name: glbc spec: terminationGracePeriodSeconds: 600 @@ -61,7 +61,7 @@ spec: requests: cpu: 10m memory: 20Mi - - image: gcr.io/google_containers/glbc:0.9.0 + - image: gcr.io/google_containers/glbc:0.9.1 livenessProbe: httpGet: path: /healthz