From c479d3e26176948eb2949cde657c8287527db99f Mon Sep 17 00:00:00 2001 From: Prashanth Balasubramanian Date: Wed, 13 Jul 2016 15:54:16 -0700 Subject: [PATCH 1/2] Bump glbc version --- controllers/gce/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/gce/Makefile b/controllers/gce/Makefile index ff1f77aa6..361fcda46 100644 --- a/controllers/gce/Makefile +++ b/controllers/gce/Makefile @@ -1,7 +1,7 @@ all: push # 0.0 shouldn't clobber any released builds -TAG = 0.7.0 +TAG = 0.7.1 PREFIX = gcr.io/google_containers/glbc server: From fc50762257330b0d828205842e29d0a20ae768ad Mon Sep 17 00:00:00 2001 From: bprashanth Date: Mon, 8 Aug 2016 19:11:47 -0700 Subject: [PATCH 2/2] Poll and notice changes to cluster UID --- controllers/gce/BETA_LIMITATIONS.md | 46 +++++++++ controllers/gce/controller/cluster_manager.go | 7 +- controllers/gce/controller/fakes.go | 2 +- .../gce/loadbalancers/loadbalancers.go | 13 +++ .../gce/loadbalancers/loadbalancers_test.go | 94 +++++++++++++++++++ controllers/gce/main.go | 41 +++++++- controllers/gce/utils/utils.go | 75 +++++++++++++-- 7 files changed, 260 insertions(+), 18 deletions(-) diff --git a/controllers/gce/BETA_LIMITATIONS.md b/controllers/gce/BETA_LIMITATIONS.md index 87bb1fca2..f958b3345 100644 --- a/controllers/gce/BETA_LIMITATIONS.md +++ b/controllers/gce/BETA_LIMITATIONS.md @@ -10,6 +10,8 @@ This is a list of beta limitations: * [Oauth scopes](https://cloud.google.com/compute/docs/authentication): By default GKE/GCE clusters are granted "compute/rw" permissions. If you setup a cluster without these permissions, GLBC is useless and you should delete the controller as described in the [section below](#disabling-glbc). If you don't delete the controller it will keep restarting. * [Default backends](https://cloud.google.com/compute/docs/load-balancing/http/url-map#url_map_simplest_case): All L7 Loadbalancers created by GLBC have a default backend. If you don't specify one in your Ingress, GLBC will assign the 404 default backend mentioned above. * [Teardown](README.md#deletion): The recommended way to tear down a cluster with active Ingresses is to either delete each Ingress, or hit the `/delete-all-and-quit` endpoint on GLBC, before invoking a cluster teardown script (eg: kube-down.sh). You will have to manually cleanup GCE resources through the [cloud console](https://cloud.google.com/compute/docs/console#access) or [gcloud CLI](https://cloud.google.com/compute/docs/gcloud-compute/) if you simply tear down the cluster with active Ingresses. +* [Changing UIDs](#changing-the-cluster-uid): You can change the UID used as a suffix for all your GCE cloud resources, but this requires you to delete existing Ingresses first. +* [Cleaning up](#cleaning-up-cloud-resources): You can delete loadbalancers that older clusters might've leaked due to permature teardown through the GCE console. ## Prerequisites @@ -120,3 +122,47 @@ gcloud container clusters create mycluster --network "default" --num-nodes 1 \ --disk-size 50 --scopes storage-full ``` +## Changing the cluster UID + +The Ingress controller configures itself to add the UID it stores in a configmap in the `kube-system` namespace. + +```console +$ kubectl --namespace=kube-system get configmaps +NAME DATA AGE +ingress-uid 1 12d + +$ kubectl --namespace=kube-system get configmaps -o yaml +apiVersion: v1 +items: +- apiVersion: v1 + data: + uid: UID + kind: ConfigMap +... +``` + +You can pick a different UID, but this requires you to: + +1. Delete existing Ingresses +2. Edit the configmap using `kubectl edit` +3. Recreate the same Ingress + +After step 3 the Ingress should come up using the new UID as the suffix of all cloud resources. You can't simply change the UID if you have existing Ingresses, because +renaming a cloud resource requires a delete/create cycle that the Ingress controller does not currently automate. Note that the UID in step 1 might be an empty string, +if you had a working Ingress before upgrading to Kubernetes 1.3. + +__A note on setting the UID__: The Ingress controller uses the token `--` to split a machine generated prefix from the UID itself. If the user supplied UID is found to +contain `--` the controller will take the token after the last `--`, and use an empty string if it ends with `--`. For example, if you insert `foo--bar` as the UID, +the controller will assume `bar` is the UID. You can either edit the configmap and set the UID to `bar` to match the controller, or delete existing Ingresses as described +above, and reset it to a string bereft of `--`. + +## Cleaning up cloud resources + +If you deleted a GKE/GCE cluster without first deleting the associated Ingresses, the controller would not have deleted the associated cloud resources. If you find yourself in such a situation, you can delete the resources by hand: + +1. Navigate to the [cloud console](https://console.cloud.google.com/) and click on the "Networking" tab, then choose "LoadBalancing" +2. Find the loadbalancer you'd like to delete, it should have a name formatted as: k8s-um-ns-name--UUID +3. Delete it, check the boxes to also casade the deletion down to associated resources (eg: backend-services) +4. Switch to the "Compute Engine" tab, then choose "Instance Groups" +5. Delete the Instance Group allocated for the leaked Ingress, it should have a name formatted as: k8s-ig-UUID + diff --git a/controllers/gce/controller/cluster_manager.go b/controllers/gce/controller/cluster_manager.go index d6e6cf830..90b3d3a96 100644 --- a/controllers/gce/controller/cluster_manager.go +++ b/controllers/gce/controller/cluster_manager.go @@ -230,14 +230,13 @@ func getGCEClient(config io.Reader) *gce.GCECloud { } // NewClusterManager creates a cluster manager for shared resources. -// - name: is the name used to tag cluster wide shared resources. This is the -// string passed to glbc via --gce-cluster-name. +// - namer: is the namer used to tag cluster wide shared resources. // - defaultBackendNodePort: is the node port of glbc's default backend. This is // the kubernetes Service that serves the 404 page if no urls match. // - defaultHealthCheckPath: is the default path used for L7 health checks, eg: "/healthz". func NewClusterManager( configFilePath string, - name string, + namer *utils.Namer, defaultBackendNodePort int64, defaultHealthCheckPath string) (*ClusterManager, error) { @@ -264,7 +263,7 @@ func NewClusterManager( } // Names are fundamental to the cluster, the uid allocator makes sure names don't collide. - cluster := ClusterManager{ClusterNamer: &utils.Namer{name}} + cluster := ClusterManager{ClusterNamer: namer} // NodePool stores GCE vms that are in this Kubernetes cluster. cluster.instancePool = instances.NewNodePool(cloud) diff --git a/controllers/gce/controller/fakes.go b/controllers/gce/controller/fakes.go index a2d8bb632..a325e3917 100644 --- a/controllers/gce/controller/fakes.go +++ b/controllers/gce/controller/fakes.go @@ -48,7 +48,7 @@ func NewFakeClusterManager(clusterName string) *fakeClusterManager { fakeBackends := backends.NewFakeBackendServices() fakeIGs := instances.NewFakeInstanceGroups(sets.NewString()) fakeHCs := healthchecks.NewFakeHealthChecks() - namer := &utils.Namer{clusterName} + namer := utils.NewNamer(clusterName) nodePool := instances.NewNodePool(fakeIGs) nodePool.Init(&instances.FakeZoneLister{[]string{"zone-a"}}) diff --git a/controllers/gce/loadbalancers/loadbalancers.go b/controllers/gce/loadbalancers/loadbalancers.go index fab810d89..491246952 100644 --- a/controllers/gce/loadbalancers/loadbalancers.go +++ b/controllers/gce/loadbalancers/loadbalancers.go @@ -49,6 +49,9 @@ const ( // A single target proxy/urlmap/forwarding rule is created per loadbalancer. // Tagged with the namespace/name of the Ingress. + // TODO: Move the namer to its own package out of utils and move the prefix + // with it. Currently the construction of the loadbalancer resources names + // are split between the namer and the loadbalancers package. targetProxyPrefix = "k8s-tp" targetHTTPSProxyPrefix = "k8s-tps" sslCertPrefix = "k8s-ssl" @@ -869,3 +872,13 @@ func GetLBAnnotations(l7 *L7, existing map[string]string, backendPool backends.B existing[fmt.Sprintf("%v/backends", utils.K8sAnnotationPrefix)] = jsonBackendState return existing } + +// GCEResourceName retrieves the name of the gce resource created for this +// Ingress, of the given resource type, by inspecting the map of ingress +// annotations. +func GCEResourceName(ingAnnotations map[string]string, resourceName string) string { + // Even though this function is trivial, it exists to keep the annotation + // parsing logic in a single location. + resourceName, _ = ingAnnotations[fmt.Sprintf("%v/%v", utils.K8sAnnotationPrefix, resourceName)] + return resourceName +} diff --git a/controllers/gce/loadbalancers/loadbalancers_test.go b/controllers/gce/loadbalancers/loadbalancers_test.go index ef5bcaae3..a007a3bef 100644 --- a/controllers/gce/loadbalancers/loadbalancers_test.go +++ b/controllers/gce/loadbalancers/loadbalancers_test.go @@ -17,6 +17,7 @@ limitations under the License. package loadbalancers import ( + "fmt" "testing" compute "google.golang.org/api/compute/v1" @@ -190,3 +191,96 @@ func TestUpdateUrlMap(t *testing.T) { } f.CheckURLMap(t, l7, expectedMap) } + +func TestNameParsing(t *testing.T) { + clusterName := "123" + namer := utils.NewNamer(clusterName) + fullName := namer.Truncate(fmt.Sprintf("%v-%v", forwardingRulePrefix, namer.LBName("testlb"))) + annotationsMap := map[string]string{ + fmt.Sprintf("%v/forwarding-rule", utils.K8sAnnotationPrefix): fullName, + } + components := namer.ParseName(GCEResourceName(annotationsMap, "forwarding-rule")) + t.Logf("%+v", components) + if components.ClusterName != clusterName { + t.Errorf("Failed to parse cluster name from %v, expected %v got %v", fullName, clusterName, components.ClusterName) + } + resourceName := "fw" + if components.Resource != resourceName { + t.Errorf("Failed to parse resource from %v, expected %v got %v", fullName, resourceName, components.Resource) + } +} + +func TestClusterNameChange(t *testing.T) { + lbInfo := &L7RuntimeInfo{ + Name: "test", + TLS: &TLSCerts{Key: "key", Cert: "cert"}, + } + f := NewFakeLoadBalancers(lbInfo.Name) + pool := newFakeLoadBalancerPool(f, t) + pool.Add(lbInfo) + l7, err := pool.Get(lbInfo.Name) + if err != nil || l7 == nil { + t.Fatalf("Expected l7 not created") + } + um, err := f.GetUrlMap(f.umName()) + if err != nil || + um.DefaultService != pool.(*L7s).glbcDefaultBackend.SelfLink { + t.Fatalf("%v", err) + } + tps, err := f.GetTargetHttpsProxy(f.tpName(true)) + if err != nil || tps.UrlMap != um.SelfLink { + t.Fatalf("%v", err) + } + fws, err := f.GetGlobalForwardingRule(f.fwName(true)) + if err != nil || fws.Target != tps.SelfLink { + t.Fatalf("%v", err) + } + newName := "newName" + namer := pool.(*L7s).namer + namer.SetClusterName(newName) + f.name = fmt.Sprintf("%v--%v", lbInfo.Name, newName) + + // Now the components should get renamed with the next suffix. + pool.Add(lbInfo) + l7, err = pool.Get(lbInfo.Name) + if err != nil || namer.ParseName(l7.Name).ClusterName != newName { + t.Fatalf("Expected L7 name to change.") + } + um, err = f.GetUrlMap(f.umName()) + if err != nil || namer.ParseName(um.Name).ClusterName != newName { + t.Fatalf("Expected urlmap name to change.") + } + if err != nil || + um.DefaultService != pool.(*L7s).glbcDefaultBackend.SelfLink { + t.Fatalf("%v", err) + } + + tps, err = f.GetTargetHttpsProxy(f.tpName(true)) + if err != nil || tps.UrlMap != um.SelfLink { + t.Fatalf("%v", err) + } + fws, err = f.GetGlobalForwardingRule(f.fwName(true)) + if err != nil || fws.Target != tps.SelfLink { + t.Fatalf("%v", err) + } +} + +func TestInvalidClusterNameChange(t *testing.T) { + namer := utils.NewNamer("test--123") + if got := namer.GetClusterName(); got != "123" { + t.Fatalf("Expected name 123, got %v", got) + } + // A name with `--` should take the last token + for _, testCase := range []struct{ newName, expected string }{ + {"foo--bar", "bar"}, + {"--", ""}, + {"", ""}, + {"foo--bar--com", "com"}, + } { + namer.SetClusterName(testCase.newName) + if got := namer.GetClusterName(); got != testCase.expected { + t.Fatalf("Expected %q got %q", testCase.expected, got) + } + } + +} diff --git a/controllers/gce/main.go b/controllers/gce/main.go index fcdcbd28b..bd23d82de 100644 --- a/controllers/gce/main.go +++ b/controllers/gce/main.go @@ -28,7 +28,9 @@ import ( flag "github.com/spf13/pflag" "k8s.io/contrib/ingress/controllers/gce/controller" + "k8s.io/contrib/ingress/controllers/gce/loadbalancers" "k8s.io/contrib/ingress/controllers/gce/storage" + "k8s.io/contrib/ingress/controllers/gce/utils" "k8s.io/kubernetes/pkg/api" client "k8s.io/kubernetes/pkg/client/unversioned" kubectl_util "k8s.io/kubernetes/pkg/kubectl/cmd/util" @@ -199,11 +201,11 @@ func main() { if *inCluster || *useRealCloud { // Create cluster manager - name, err := getClusterUID(kubeClient, *clusterName) + namer, err := newNamer(kubeClient, *clusterName) if err != nil { glog.Fatalf("%v", err) } - clusterManager, err = controller.NewClusterManager(*configFilePath, name, defaultBackendNodePort, *healthCheckPath) + clusterManager, err = controller.NewClusterManager(*configFilePath, namer, defaultBackendNodePort, *healthCheckPath) if err != nil { glog.Fatalf("%v", err) } @@ -217,8 +219,8 @@ func main() { if err != nil { glog.Fatalf("%v", err) } - if clusterManager.ClusterNamer.ClusterName != "" { - glog.V(3).Infof("Cluster name %+v", clusterManager.ClusterNamer.ClusterName) + if clusterManager.ClusterNamer.GetClusterName() != "" { + glog.V(3).Infof("Cluster name %+v", clusterManager.ClusterNamer.GetClusterName()) } clusterManager.Init(&controller.GCETranslator{lbc}) go registerHandlers(lbc) @@ -231,6 +233,32 @@ func main() { } } +func newNamer(kubeClient *client.Client, clusterName string) (*utils.Namer, error) { + name, err := getClusterUID(kubeClient, clusterName) + if err != nil { + return nil, err + } + + namer := utils.NewNamer(name) + vault := storage.NewConfigMapVault(kubeClient, api.NamespaceSystem, uidConfigMapName) + + // Start a goroutine to poll the cluster UID config map + // We don't watch because we know exactly which configmap we want and this + // controller already watches 5 other resources, so it isn't worth the cost + // of another connection and complexity. + go wait.Forever(func() { + uid, found, err := vault.Get() + existing := namer.GetClusterName() + if found && uid != existing { + glog.Infof("Cluster uid changed from %v -> %v", existing, uid) + namer.SetClusterName(uid) + } else if err != nil { + glog.Errorf("Failed to reconcile cluster uid %v, currently set to %v", err, existing) + } + }, 5*time.Second) + return namer, nil +} + // getClusterUID returns the cluster UID. Rules for UID generation: // If the user specifies a --cluster-uid param it overwrites everything // else, check UID config map for a previously recorded uid @@ -264,8 +292,13 @@ func getClusterUID(kubeClient *client.Client, name string) (string, error) { if err != nil { return "", err } + namer := utils.Namer{} for _, ing := range ings.Items { if len(ing.Status.LoadBalancer.Ingress) != 0 { + c := namer.ParseName(loadbalancers.GCEResourceName(ing.Annotations, "forwarding-rule")) + if c.ClusterName != "" { + return c.ClusterName, cfgVault.Put(c.ClusterName) + } glog.Infof("Found a working Ingress, assuming uid is empty string") return "", cfgVault.Put("") } diff --git a/controllers/gce/utils/utils.go b/controllers/gce/utils/utils.go index 930c2a2fd..91791d0de 100644 --- a/controllers/gce/utils/utils.go +++ b/controllers/gce/utils/utils.go @@ -20,6 +20,7 @@ import ( "fmt" "strconv" "strings" + "sync" "github.com/golang/glog" compute "google.golang.org/api/compute/v1" @@ -82,7 +83,42 @@ const ( // Namer handles centralized naming for the cluster. type Namer struct { - ClusterName string + clusterName string + nameLock sync.Mutex +} + +// NewNamer creates a new namer. +func NewNamer(clusterName string) *Namer { + namer := &Namer{} + namer.SetClusterName(clusterName) + return namer +} + +// NameComponents is a struct representing the components of a a GCE resource +// name constructed by the namer. The format of such a name is: +// k8s-resource---uid +type NameComponents struct { + ClusterName, Resource, Metadata string +} + +// SetClusterName sets the UID/name of this cluster. +func (n *Namer) SetClusterName(name string) { + n.nameLock.Lock() + defer n.nameLock.Unlock() + if strings.Contains(name, clusterNameDelimiter) { + tokens := strings.Split(name, clusterNameDelimiter) + glog.Warningf("Given name %v contains %v, taking last token in: %+v", name, clusterNameDelimiter, tokens) + name = tokens[len(tokens)-1] + } + glog.Infof("Changing cluster name from %v to %v", n.clusterName, name) + n.clusterName = name +} + +// GetClusterName returns the UID/name of this cluster. +func (n *Namer) GetClusterName() string { + n.nameLock.Lock() + defer n.nameLock.Unlock() + return n.clusterName } // Truncate truncates the given key to a GCE length limit. @@ -96,10 +132,28 @@ func (n *Namer) Truncate(key string) string { } func (n *Namer) decorateName(name string) string { - if n.ClusterName == "" { + clusterName := n.GetClusterName() + if clusterName == "" { return name } - return n.Truncate(fmt.Sprintf("%v%v%v", name, clusterNameDelimiter, n.ClusterName)) + return n.Truncate(fmt.Sprintf("%v%v%v", name, clusterNameDelimiter, clusterName)) +} + +// ParseName parses the name of a resource generated by the namer. +func (n *Namer) ParseName(name string) *NameComponents { + l := strings.Split(name, clusterNameDelimiter) + var uid, resource string + if len(l) >= 2 { + uid = l[len(l)-1] + } + c := strings.Split(name, "-") + if len(c) >= 2 { + resource = c[1] + } + return &NameComponents{ + ClusterName: uid, + Resource: resource, + } } // NameBelongsToCluster checks if a given name is tagged with this cluster's UID. @@ -109,8 +163,9 @@ func (n *Namer) NameBelongsToCluster(name string) bool { return false } parts := strings.Split(name, clusterNameDelimiter) + clusterName := n.GetClusterName() if len(parts) == 1 { - if n.ClusterName == "" { + if clusterName == "" { return true } return false @@ -119,7 +174,7 @@ func (n *Namer) NameBelongsToCluster(name string) bool { glog.Warningf("Too many parts to name %v, ignoring", name) return false } - return parts[1] == n.ClusterName + return parts[1] == clusterName } // BeName constructs the name for a backend. @@ -152,11 +207,12 @@ func (n *Namer) IGName() string { // FrSuffix constructs the glbc specific suffix for the FirewallRule. func (n *Namer) FrSuffix() string { + clusterName := n.GetClusterName() // The entire cluster only needs a single firewall rule. - if n.ClusterName == "" { + if clusterName == "" { return globalFirewallSuffix } - return n.Truncate(fmt.Sprintf("%v%v%v", globalFirewallSuffix, clusterNameDelimiter, n.ClusterName)) + return n.Truncate(fmt.Sprintf("%v%v%v", globalFirewallSuffix, clusterNameDelimiter, clusterName)) } // FrName constructs the full firewall rule name, this is the name assigned by @@ -174,10 +230,11 @@ func (n *Namer) LBName(key string) string { // namespace conflicts in the Ubernetes context. parts := strings.Split(key, clusterNameDelimiter) scrubbedName := strings.Replace(key, "/", "-", -1) - if n.ClusterName == "" || parts[len(parts)-1] == n.ClusterName { + clusterName := n.GetClusterName() + if clusterName == "" || parts[len(parts)-1] == clusterName { return scrubbedName } - return n.Truncate(fmt.Sprintf("%v%v%v", scrubbedName, clusterNameDelimiter, n.ClusterName)) + return n.Truncate(fmt.Sprintf("%v%v%v", scrubbedName, clusterNameDelimiter, clusterName)) } // GCEURLMap is a nested map of hostname->path regex->backend