Sleep for loadbalancer update

This commit is contained in:
Nick Sardo 2017-04-19 13:41:25 -07:00
parent 7cd4ef7c2c
commit 29277994b9
2 changed files with 19 additions and 4 deletions

View file

@ -100,9 +100,10 @@ Step 8: Delete temporary instance groups
#### Interaction with GCE Ingress Controller
After one or more instances have been removed from their instance group, the controller will start throwing validation errors and will try to sync the instances back. However, the instance will hopefully belong to `k8s-ig--migrate` already and the controller does not have logic to take it out of that group. Therefore, the controller only interrupts the migration process in between the removal from a group and the insertion to a group. On the second set of migrations, this interaction is fine since the destination group is the same for updater and controller. If the controller interrupts an instance from being added to the migrate IG, the updater will attempt migration again. Do not be alarmed by multiple attempts.
#### Required Testing
- [ ] Up time is not effected when switching instance groups
- [x] An active GLBC does not negatively interfere with this updater
#### Maintaining Up-time
This may not be a perfect solution, but the updater will sleep for 3 minutes between sensitive changes to the load balancer. For instance, it will sleep after updating the backend-services to point to the new migration instance groups before migrating instances. Without these occasional sleeps, the updater will result in some 502s for a short period of time (order of seconds to minutes). When testing with sleeps, 502s were not detected.
#### TODO
- [x] If only one backend-service exists, just update it in place.
@ -112,3 +113,4 @@ After one or more instances have been removed from their instance group, the con
#### Warning
This tool hasn't been fully tested. Use at your own risk.
You should run on a test cluster before running on important clusters.

View file

@ -36,6 +36,8 @@ const (
balancingModeRATE = "RATE"
balancingModeUTIL = "UTILIZATION"
loadBalancerUpdateTime = 3 * time.Minute
operationPollInterval = 1 * time.Second
operationPollTimeoutDuration = time.Hour
@ -179,6 +181,8 @@ func updateMultipleBackends() {
fmt.Println("\nStep 2: Update backend services to point to original and temporary instance groups")
setBackendsTo(true, balancingModeInverse(targetBalancingMode), true, balancingModeInverse(targetBalancingMode))
sleep(loadBalancerUpdateTime)
fmt.Println("\nStep 3: Migrate instances to temporary group")
migrateInstances(instanceGroupName, instanceGroupTemp)
@ -190,9 +194,13 @@ func updateMultipleBackends() {
fmt.Println("\nStep 5: Update backend services to point to both temporary and original (with new balancing mode) instance groups")
setBackendsTo(true, targetBalancingMode, true, balancingModeInverse(targetBalancingMode))
sleep(loadBalancerUpdateTime)
fmt.Println("\nStep 6: Migrate instances back to original groups")
migrateInstances(instanceGroupTemp, instanceGroupName)
sleep(loadBalancerUpdateTime)
fmt.Println("\nStep 7: Update backend services to point only to original instance groups")
setBackendsTo(true, targetBalancingMode, false, "")
@ -210,6 +218,11 @@ func updateMultipleBackends() {
}
}
func sleep(t time.Duration) {
fmt.Println("\nSleeping for", t)
time.Sleep(t)
}
func setBackendsTo(orig bool, origMode string, temp bool, tempMode string) {
bs := getBackendServices()
for _, bsi := range bs {
@ -317,7 +330,7 @@ func migrateInstance(zone, instanceLink, fromIG, toIG string) error {
op, err = s.InstanceGroups.AddInstances(projectID, zone, toIG, ra).Do()
if err != nil {
if strings.Contains(err.Error(), "memberAlreadyExists") { // GLBC already added the instance back to the IG
fmt.Printf(" already exists in %v\n", toIG)
fmt.Printf(" already exists in %v", toIG)
} else {
fmt.Printf(" failed to add to group %v, err: %v\n", toIG, err)
return false, nil