Remove NET_BIND_SERVICE for port binding
Signed-off-by: Jan-Otto Kröpke <mail@jkroepke.de>
This commit is contained in:
parent
54389680a7
commit
1c7ac6e8fe
22 changed files with 76 additions and 146 deletions
3
Makefile
3
Makefile
|
@ -40,6 +40,7 @@ E2E_CHECK_LEAKS ?=
|
|||
REPO_INFO ?= $(shell git config --get remote.origin.url)
|
||||
COMMIT_SHA ?= git-$(shell git rev-parse --short HEAD)
|
||||
BUILD_ID ?= "UNSET"
|
||||
SETCAP ?= "false"
|
||||
|
||||
PKG = k8s.io/ingress-nginx
|
||||
|
||||
|
@ -74,6 +75,7 @@ image: clean-image ## Build image for a particular arch.
|
|||
--build-arg TARGETARCH="$(ARCH)" \
|
||||
--build-arg COMMIT_SHA="$(COMMIT_SHA)" \
|
||||
--build-arg BUILD_ID="$(BUILD_ID)" \
|
||||
--build-arg SETCAP="$(SETCAP)" \
|
||||
-t $(REGISTRY)/controller:$(TAG) rootfs
|
||||
|
||||
.PHONY: gosec
|
||||
|
@ -248,6 +250,7 @@ release: ensure-buildx clean
|
|||
--build-arg VERSION="$(TAG)" \
|
||||
--build-arg COMMIT_SHA="$(COMMIT_SHA)" \
|
||||
--build-arg BUILD_ID="$(BUILD_ID)" \
|
||||
--build-arg SETCAP="$(SETCAP)" \
|
||||
-t $(REGISTRY)/controller:$(TAG) rootfs
|
||||
|
||||
docker buildx build \
|
||||
|
|
|
@ -422,7 +422,7 @@ As of version `1.26.0` of this chart, by simply not providing any clusterIP valu
|
|||
| controller.service.targetPorts.https | string | `"https"` | |
|
||||
| controller.service.type | string | `"LoadBalancer"` | |
|
||||
| controller.shareProcessNamespace | bool | `false` | |
|
||||
| controller.sysctls | object | `{}` | See https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ for notes on enabling and using sysctls |
|
||||
| controller.sysctls | object | `{"net.ipv4.ip_unprivileged_port_start":"80"}` | See https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ for notes on enabling and using sysctls |
|
||||
| controller.tcp.annotations | object | `{}` | Annotations to be added to the tcp config configmap |
|
||||
| controller.tcp.configMapNamespace | string | `""` | Allows customization of the tcp-services-configmap; defaults to $(POD_NAMESPACE) |
|
||||
| controller.terminationGracePeriodSeconds | int | `300` | `terminationGracePeriodSeconds` to avoid killing pods before we are ready # wait up to five minutes for the drain of connections # |
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
controller:
|
||||
image:
|
||||
repository: ingress-controller/controller
|
||||
tag: 1.0.0-dev
|
||||
digest: null
|
||||
admissionWebhooks:
|
||||
certManager:
|
||||
enabled: true
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
controller:
|
||||
image:
|
||||
repository: ingress-controller/controller
|
||||
tag: 1.0.0-dev
|
||||
digest: null
|
||||
watchIngressWithoutClass: true
|
||||
ingressClassResource:
|
||||
name: custom-nginx
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
controller:
|
||||
image:
|
||||
repository: ingress-controller/controller
|
||||
tag: 1.0.0-dev
|
||||
digest: null
|
||||
autoscaling:
|
||||
enabled: true
|
||||
behavior:
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
controller:
|
||||
image:
|
||||
repository: ingress-controller/controller
|
||||
tag: 1.0.0-dev
|
||||
digest: null
|
||||
service:
|
||||
type: ClusterIP
|
||||
admissionWebhooks:
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
controller:
|
||||
image:
|
||||
repository: ingress-controller/controller
|
||||
tag: 1.0.0-dev
|
||||
digest: null
|
||||
service:
|
||||
type: ClusterIP
|
||||
admissionWebhooks:
|
||||
|
|
|
@ -41,8 +41,6 @@ Container SecurityContext.
|
|||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
{{- if .Values.controller.image.chroot }}
|
||||
- SYS_CHROOT
|
||||
{{- end }}
|
||||
|
|
|
@ -12,7 +12,6 @@ metadata:
|
|||
{{- end }}
|
||||
spec:
|
||||
allowedCapabilities:
|
||||
- NET_BIND_SERVICE
|
||||
{{- if .Values.controller.image.chroot }}
|
||||
- SYS_CHROOT
|
||||
{{- end }}
|
||||
|
|
|
@ -112,7 +112,8 @@ controller:
|
|||
# -- Security Context policies for controller pods
|
||||
podSecurityContext: {}
|
||||
# -- See https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/ for notes on enabling and using sysctls
|
||||
sysctls: {}
|
||||
sysctls:
|
||||
net.ipv4.ip_unprivileged_port_start: "80"
|
||||
# sysctls:
|
||||
# "net.core.somaxconn": "8192"
|
||||
|
||||
|
|
|
@ -487,8 +487,6 @@ spec:
|
|||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 101
|
||||
|
@ -499,6 +497,10 @@ spec:
|
|||
dnsPolicy: ClusterFirst
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
sysctls:
|
||||
- name: net.ipv4.ip_unprivileged_port_start
|
||||
value: "80"
|
||||
serviceAccountName: ingress-nginx
|
||||
terminationGracePeriodSeconds: 300
|
||||
volumes:
|
||||
|
|
|
@ -499,8 +499,6 @@ spec:
|
|||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 101
|
||||
|
@ -511,6 +509,10 @@ spec:
|
|||
dnsPolicy: ClusterFirst
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
sysctls:
|
||||
- name: net.ipv4.ip_unprivileged_port_start
|
||||
value: "80"
|
||||
serviceAccountName: ingress-nginx
|
||||
terminationGracePeriodSeconds: 300
|
||||
volumes:
|
||||
|
|
|
@ -481,8 +481,6 @@ spec:
|
|||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 101
|
||||
|
@ -493,6 +491,10 @@ spec:
|
|||
dnsPolicy: ClusterFirst
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
sysctls:
|
||||
- name: net.ipv4.ip_unprivileged_port_start
|
||||
value: "80"
|
||||
serviceAccountName: ingress-nginx
|
||||
terminationGracePeriodSeconds: 300
|
||||
volumes:
|
||||
|
|
|
@ -483,8 +483,6 @@ spec:
|
|||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 101
|
||||
|
@ -495,6 +493,10 @@ spec:
|
|||
dnsPolicy: ClusterFirst
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
sysctls:
|
||||
- name: net.ipv4.ip_unprivileged_port_start
|
||||
value: "80"
|
||||
serviceAccountName: ingress-nginx
|
||||
terminationGracePeriodSeconds: 300
|
||||
volumes:
|
||||
|
|
|
@ -486,8 +486,6 @@ spec:
|
|||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 101
|
||||
|
@ -498,6 +496,10 @@ spec:
|
|||
dnsPolicy: ClusterFirst
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
sysctls:
|
||||
- name: net.ipv4.ip_unprivileged_port_start
|
||||
value: "80"
|
||||
serviceAccountName: ingress-nginx
|
||||
terminationGracePeriodSeconds: 300
|
||||
volumes:
|
||||
|
|
|
@ -492,8 +492,6 @@ spec:
|
|||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 101
|
||||
|
@ -504,6 +502,10 @@ spec:
|
|||
dnsPolicy: ClusterFirst
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
sysctls:
|
||||
- name: net.ipv4.ip_unprivileged_port_start
|
||||
value: "80"
|
||||
serviceAccountName: ingress-nginx
|
||||
terminationGracePeriodSeconds: 300
|
||||
volumes:
|
||||
|
|
|
@ -489,8 +489,6 @@ spec:
|
|||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 101
|
||||
|
@ -502,6 +500,10 @@ spec:
|
|||
nodeSelector:
|
||||
ingress-ready: "true"
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
sysctls:
|
||||
- name: net.ipv4.ip_unprivileged_port_start
|
||||
value: "80"
|
||||
serviceAccountName: ingress-nginx
|
||||
terminationGracePeriodSeconds: 0
|
||||
tolerations:
|
||||
|
|
|
@ -486,8 +486,6 @@ spec:
|
|||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 101
|
||||
|
@ -498,6 +496,10 @@ spec:
|
|||
dnsPolicy: ClusterFirst
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
sysctls:
|
||||
- name: net.ipv4.ip_unprivileged_port_start
|
||||
value: "80"
|
||||
serviceAccountName: ingress-nginx
|
||||
terminationGracePeriodSeconds: 300
|
||||
volumes:
|
||||
|
|
|
@ -11,8 +11,8 @@ metadata:
|
|||
name: ingress-nginx
|
||||
namespace: ingress-nginx
|
||||
spec:
|
||||
allowedCapabilities:
|
||||
- NET_BIND_SERVICE
|
||||
allowedUnsafeSysctls:
|
||||
- net.ipv4.ip_unprivileged_port_start
|
||||
privileged: false
|
||||
allowPrivilegeEscalation: true
|
||||
# Allow core volume types.
|
||||
|
|
|
@ -367,115 +367,5 @@ Warning Failed 5m5s (x4 over 6m34s) kubelet Failed to pull ima
|
|||
c. *.appspot.com -> This a Google domain. part of the domain used for GCR.
|
||||
|
||||
## Unable to listen on port (80/443)
|
||||
One possible reason for this error is lack of permission to bind to the port. Ports 80, 443, and any other port < 1024 are Linux privileged ports which historically could only be bound by root. The ingress-nginx-controller uses the CAP_NET_BIND_SERVICE [linux capability](https://man7.org/linux/man-pages/man7/capabilities.7.html) to allow binding these ports as a normal user (www-data / 101). This involves two components:
|
||||
1. In the image, the /nginx-ingress-controller file has the cap_net_bind_service capability added (e.g. via [setcap](https://man7.org/linux/man-pages/man8/setcap.8.html))
|
||||
2. The NET_BIND_SERVICE capability is added to the container in the containerSecurityContext of the deployment.
|
||||
|
||||
If encountering this on one/some node(s) and not on others, try to purge and pull a fresh copy of the image to the affected node(s), in case there has been corruption of the underlying layers to lose the capability on the executable.
|
||||
|
||||
### Create a test pod
|
||||
The /nginx-ingress-controller process exits/crashes when encountering this error, making it difficult to troubleshoot what is happening inside the container. To get around this, start an equivalent container running "sleep 3600", and exec into it for further troubleshooting. For example:
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: ingress-nginx-sleep
|
||||
namespace: default
|
||||
labels:
|
||||
app: nginx
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx
|
||||
image: ##_CONTROLLER_IMAGE_##
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
limits:
|
||||
memory: "1Gi"
|
||||
cpu: "1"
|
||||
command: ["sleep"]
|
||||
args: ["3600"]
|
||||
ports:
|
||||
- containerPort: 80
|
||||
name: http
|
||||
protocol: TCP
|
||||
- containerPort: 443
|
||||
name: https
|
||||
protocol: TCP
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 101
|
||||
restartPolicy: Never
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: ##_NODE_NAME_##
|
||||
tolerations:
|
||||
- key: "node.kubernetes.io/unschedulable"
|
||||
operator: "Exists"
|
||||
effect: NoSchedule
|
||||
```
|
||||
* update the namespace if applicable/desired
|
||||
* replace `##_NODE_NAME_##` with the problematic node (or remove nodeSelector section if problem is not confined to one node)
|
||||
* replace `##_CONTROLLER_IMAGE_##` with the same image as in use by your ingress-nginx deployment
|
||||
* confirm the securityContext section matches what is in place for ingress-nginx-controller pods in your cluster
|
||||
|
||||
Apply the YAML and open a shell into the pod.
|
||||
Try to manually run the controller process:
|
||||
```console
|
||||
$ /nginx-ingress-controller
|
||||
```
|
||||
You should get the same error as from the ingress controller pod logs.
|
||||
|
||||
Confirm the capabilities are properly surfacing into the pod:
|
||||
```console
|
||||
$ grep CapBnd /proc/1/status
|
||||
CapBnd: 0000000000000400
|
||||
```
|
||||
The above value has only net_bind_service enabled (per security context in YAML which adds that and drops all). If you get a different value, then you can decode it on another linux box (capsh not available in this container) like below, and then figure out why specified capabilities are not propagating into the pod/container.
|
||||
```console
|
||||
$ capsh --decode=0000000000000400
|
||||
0x0000000000000400=cap_net_bind_service
|
||||
```
|
||||
|
||||
## Create a test pod as root
|
||||
(Note, this may be restricted by PodSecurityPolicy, PodSecurityAdmission/Standards, OPA Gatekeeper, etc. in which case you will need to do the appropriate workaround for testing, e.g. deploy in a new namespace without the restrictions.)
|
||||
To test further you may want to install additional utilities, etc. Modify the pod yaml by:
|
||||
* changing runAsUser from 101 to 0
|
||||
* removing the "drop..ALL" section from the capabilities.
|
||||
|
||||
Some things to try after shelling into this container:
|
||||
|
||||
Try running the controller as the www-data (101) user:
|
||||
```console
|
||||
$ chmod 4755 /nginx-ingress-controller
|
||||
$ /nginx-ingress-controller
|
||||
```
|
||||
Examine the errors to see if there is still an issue listening on the port or if it passed that and moved on to other expected errors due to running out of context.
|
||||
|
||||
Install the libcap package and check capabilities on the file:
|
||||
```console
|
||||
$ apk add libcap
|
||||
(1/1) Installing libcap (2.50-r0)
|
||||
Executing busybox-1.33.1-r7.trigger
|
||||
OK: 26 MiB in 41 packages
|
||||
$ getcap /nginx-ingress-controller
|
||||
/nginx-ingress-controller cap_net_bind_service=ep
|
||||
```
|
||||
(if missing, see above about purging image on the server and re-pulling)
|
||||
|
||||
Strace the executable to see what system calls are being executed when it fails:
|
||||
```console
|
||||
$ apk add strace
|
||||
(1/1) Installing strace (5.12-r0)
|
||||
Executing busybox-1.33.1-r7.trigger
|
||||
OK: 28 MiB in 42 packages
|
||||
$ strace /nginx-ingress-controller
|
||||
execve("/nginx-ingress-controller", ["/nginx-ingress-controller"], 0x7ffeb9eb3240 /* 131 vars */) = 0
|
||||
arch_prctl(ARCH_SET_FS, 0x29ea690) = 0
|
||||
...
|
||||
```
|
||||
One possible reason for this error is lack of permission to bind to the port. Ports 80, 443, and any other port < 1024 are Linux privileged ports which historically could only be bound by root. The ingress-nginx-controller uses the [sysctl](https://sysctl-explorer.net/net/ipv4/ip_unprivileged_port_start/) `net.ipv4.ip_unprivileged_port_start` to allow binding these ports as a normal user (www-data / 101). This involves:
|
||||
1. The sysctl `net.ipv4.ip_unprivileged_port_start` is set to the pod in the [securityContext](https://kubernetes.io/docs/tasks/administer-cluster/sysctl-cluster/) of the deployment. This requires Kubernetes 1.22.
|
||||
|
|
|
@ -20,6 +20,7 @@ ARG TARGETARCH
|
|||
ARG VERSION
|
||||
ARG COMMIT_SHA
|
||||
ARG BUILD_ID=UNSET
|
||||
ARG SETCAP=false
|
||||
|
||||
LABEL org.opencontainers.image.title="NGINX Ingress Controller for Kubernetes"
|
||||
LABEL org.opencontainers.image.documentation="https://kubernetes.github.io/ingress-nginx/"
|
||||
|
@ -64,16 +65,15 @@ RUN bash -xeu -c ' \
|
|||
# Put libs of newer modules under `/modules_mount/<other>/lib` and add that path below
|
||||
# Could get complicated arch specific paths become a need
|
||||
&& echo "/lib:/usr/lib:/usr/local/lib:/modules_mount/etc/nginx/modules/otel" > /etc/ld-musl-x86_64.path
|
||||
|
||||
|
||||
RUN apk add --no-cache libcap \
|
||||
RUN if [ "${SETCAP}" == "true" ]; then apk add --no-cache libcap \
|
||||
&& setcap cap_net_bind_service=+ep /nginx-ingress-controller \
|
||||
&& setcap -v cap_net_bind_service=+ep /nginx-ingress-controller \
|
||||
&& setcap cap_net_bind_service=+ep /usr/local/nginx/sbin/nginx \
|
||||
&& setcap -v cap_net_bind_service=+ep /usr/local/nginx/sbin/nginx \
|
||||
&& setcap cap_net_bind_service=+ep /usr/bin/dumb-init \
|
||||
&& setcap -v cap_net_bind_service=+ep /usr/bin/dumb-init \
|
||||
&& apk del libcap \
|
||||
&& apk del libcap; fi \
|
||||
&& ln -sf /usr/local/nginx/sbin/nginx /usr/bin/nginx
|
||||
|
||||
USER www-data
|
||||
|
|
|
@ -29,6 +29,7 @@ ARG TARGETARCH
|
|||
ARG VERSION
|
||||
ARG COMMIT_SHA
|
||||
ARG BUILD_ID=UNSET
|
||||
ARG SETCAP=false
|
||||
|
||||
LABEL org.opencontainers.image.title="NGINX Ingress Controller for Kubernetes"
|
||||
LABEL org.opencontainers.image.documentation="https://kubernetes.github.io/ingress-nginx/"
|
||||
|
@ -58,7 +59,7 @@ RUN apk update \
|
|||
util-linux \
|
||||
&& ln -s /usr/local/nginx/sbin/nginx /sbin/nginx \
|
||||
&& adduser -S -D -H -u 101 -h /usr/local/nginx \
|
||||
-s /sbin/nologin -G www-data -g www-data www-data
|
||||
-s /sbin/nologin -G www-data -g www-data www-data
|
||||
|
||||
COPY --from=chroot /chroot /chroot
|
||||
|
||||
|
@ -87,14 +88,16 @@ RUN bash -xeu -c ' \
|
|||
&& echo "/lib:/usr/lib:/usr/local/lib:/modules_mount/etc/nginx/modules/otel" > /chroot/etc/ld-musl-x86_64.path
|
||||
|
||||
RUN apk add --no-cache libcap \
|
||||
&& setcap cap_sys_chroot,cap_net_bind_service=+ep /nginx-ingress-controller \
|
||||
&& setcap -v cap_sys_chroot,cap_net_bind_service=+ep /nginx-ingress-controller \
|
||||
&& setcap cap_sys_chroot,cap_net_bind_service=+ep /usr/bin/unshare \
|
||||
&& setcap -v cap_sys_chroot,cap_net_bind_service=+ep /usr/bin/unshare \
|
||||
&& if [ "${SETCAP}" == "true" ]; then CAP="cap_sys_chroot,cap_net_bind_service=+ep" \
|
||||
&& setcap cap_net_bind_service=+ep /chroot/usr/local/nginx/sbin/nginx \
|
||||
&& setcap -v cap_net_bind_service=+ep /chroot/usr/local/nginx/sbin/nginx \
|
||||
&& setcap cap_sys_chroot,cap_net_bind_service=+ep /usr/bin/dumb-init \
|
||||
&& setcap -v cap_sys_chroot,cap_net_bind_service=+ep /usr/bin/dumb-init \
|
||||
; else CAP="cap_sys_chroot=+ep"; fi \
|
||||
&& setcap ${CAP} /nginx-ingress-controller \
|
||||
&& setcap -v ${CAP} /nginx-ingress-controller \
|
||||
&& setcap ${CAP} /usr/bin/unshare \
|
||||
&& setcap -v ${CAP} /usr/bin/unshare \
|
||||
&& setcap ${CAP} /usr/bin/dumb-init \
|
||||
&& setcap -v ${CAP} /usr/bin/dumb-init \
|
||||
&& apk del libcap
|
||||
|
||||
RUN ln -sf /chroot/etc/nginx /etc/nginx \
|
||||
|
@ -124,4 +127,4 @@ EXPOSE 80 443
|
|||
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||
|
||||
CMD ["/nginx-ingress-controller"]
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue