post image :date_long | 6 min Read

CKS OS Level Security Domains

Define privilege and access control for Pod/Container
  • userID and GroupID
  • run privileged or unprivileged
  • Linux Capabilities

Run a simple container and check user and group
root@scw-k8s:~# k run pod --image=busybox --command -oyaml --dry-run=client -- sh -c 'sleep 1d' > bb.yaml
root@scw-k8s:~# k create -f  bb.yaml
Now let’s try to setup a security context at a pod level

Edit container recipt first

root@scw-k8s:~# cat bb.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    run: pod
  name: pod
spec:
  securityContext:
    runAsUser: 1000    # <<< notice
    runAsGroup: 3000   # <<< notice
  containers:
  - command:
    - sh
    - -c
    - sleep 1d
    image: busybox
    name: pod
    resources: {}
  dnsPolicy: ClusterFirst
  restartPolicy: Always
status: {}

Once pod is created you do not have permissions to create a file in /root directory :)
root@scw-k8s:~# k exec -it pod -- sh
/ $ id
uid=1000 gid=3000
/ $ touch test
touch: test: Permission denied
/ $ touch /tmp/sss.txt
/ $ ls -l /tmp/sss.txt
-rw-r--r--    1 1000     3000             0 May 30 18:50 /tmp/sss.txt
/ $

Run pod with a flag runAsNonRoot: true

Hmm and YOU will see that it actually runs! Why??? Because we have specified a securityContext at the pod level with values 1000 for user and 3000 for a group previously!

root@scw-k8s:~# cat bb.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    run: pod
  name: pod
spec:
  securityContext:
    runAsUser: 1000
    runAsGroup: 3000
  containers:
  - command:
    - sh
    - -c
    - sleep 1d
    image: busybox
    name: pod
    resources: {}
    securityContext:
      runAsNonRoot: true
  dnsPolicy: ClusterFirst
  restartPolicy: Always
status: {}
Let’s try to comment out the first securityContext section level and see what happens
root@scw-k8s:~# cat  bb.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    run: pod
  name: pod
spec:
#  securityContext:
#    runAsUser: 1000
#    runAsGroup: 3000
  containers:
  - command:
    - sh
    - -c
    - sleep 1d
    image: busybox
    name: pod
    resources: {}
    securityContext:
      runAsNonRoot: true
  dnsPolicy: ClusterFirst
  restartPolicy: Always
status: {}

And check the actuall error

root@scw-k8s:~# k get pods
NAME     READY   STATUS                       RESTARTS   AGE
gvisor   0/1     ContainerStatusUnknown       1          12d
pod      0/1     CreateContainerConfigError   0          5s

root@scw-k8s:~# k describe  pod pod
Name:         pod
Namespace:    default
Priority:     0
Node:         scw-k8s/10.18.164.57
...
...

  Warning  Failed     4s (x4 over 37s)  kubelet            Error: container has runAsNonRoot and image will run as root (pod: "pod_default(3f4f06af-ff78-48eb-89cb-08c3c511d9c1)", container: pod)
  Normal   Pulled     4s                kubelet            Successfully pulled image "busybox" in 1.004495509s
r
Privileged Container
  • by default Docker containers run unprivileged
  • it is possible to run privileged
    • access all devices
    • run docker daemon inside container (docker run --privileged)
  • what it means when running privileged??
    • well container user 0 (root) maps directly to host user 0 (root)
root@scw-k8s:~# cat bb.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    run: pod
  name: pod
spec:
#  securityContext:
#    runAsUser: 1000
#    runAsGroup: 3000
  containers:
  - command:
    - sh
    - -c
    - sleep 1d
    image: busybox
    name: pod
    resources: {}
    securityContext:
      # runAsNonRoot: true
      privileged: true
  dnsPolicy: ClusterFirst
  restartPolicy: Always
status: {}

Now, we were able to run sysctl command inside a container (very dangerous!!!)


/ # sysctl kernel.hostname=attacker
kernel.hostname = attacker
PrivilegeEscalation allowPrivilegeEscalation
  • by default - it is enabled in Kubernetes
root@scw-k8s:~# cat  bb.yaml
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    run: pod
  name: pod
spec:
#  securityContext:
#    runAsUser: 1000
#    runAsGroup: 3000
  containers:
  - command:
    - sh
    - -c
    - sleep 1d
    image: busybox
    name: pod
    resources: {}
    securityContext:
      # runAsNonRoot: true
      # privileged: true
      allowPrivilegeEscalation: false
  dnsPolicy: ClusterFirst
  restartPolicy: Always
status: {}

Let’s check it out

root@scw-k8s:~# k exec -it pod -- sh
/ # cat /proc/1/status
Name:   sleep
Umask:  0022
State:  S (sleeping)
Tgid:   1
...
...
...
NoNewPrivs:     1   # <<<    notice this setting
Seccomp:        0
voluntary_ctxt_switches:        28
nonvoluntary_ctxt_switches:     288

Pod Security Policies
  • cluster level resource
  • created by Kubenretes cluster administrator
  • it is an admission controller and has to be allowed!!!

Setup kube-apiserver first, since PodSecurityPolicy is an adminssion controller

root@scw-k8s:~# cat /etc/kubernetes/manifests/kube-apiserver.yaml  | grep admiss -B10 -A3
  name: kube-apiserver
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-apiserver
    - --advertise-address=10.18.164.57
    - --allow-privileged=true
    - --authorization-mode=Node,RBAC
    - --client-ca-file=/etc/kubernetes/pki/ca.crt
    - --enable-admission-plugins=NodeRestriction,PodSecurityPolicy  # <<< --- add this text behind comma!
    - --enable-bootstrap-token-auth=true
    - --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt
    - --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt

Create your very first pod security policy

root@scw-k8s:~# cat psp.yaml
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
  name: cks-psp
spec:
  privileged: false  # Don't allow privileged pods!
  # The rest fills in some required fields.
  seLinux:
    rule: RunAsAny
  supplementalGroups:
    rule: RunAsAny
  runAsUser:
    rule: RunAsAny
  fsGroup:
    rule: RunAsAny
  volumes:
  - '*'

Now, you will not be able to create pretty much anything since default serviceaccount does not have any permissions to use podsecuritypolicies k8s objects at all.

k create role psp --verb=use --resource=podsecuritypolicies
k create rolebinding psp-rb --role psp --serviceaccount=default:default

How, about creating a deployment?

# It is gonna work now :)
root@scw-k8s:~# k create  deployment jano --image=nginx:alpine
root@scw-k8s:~# k get deploy
NAME   READY   UP-TO-DATE   AVAILABLE   AGE
jano   1/1     1            1           11m

The following deployment is going to fail since we want to use privileged: true option. This option is specifically disabled by podsecuritypolicies we created a while ago.

root@scw-k8s:~# k create deployment cks-psp \
--image=nginx:alpine --replicas=3 \
-oyaml --dry-run=client \
| sed -E 's/^(\s+- image.*)$/\1 \n        securityContext:\n          privileged: true/'  | k create -f -
deployment.apps/cks-psp created

root@scw-k8s:~# k get events | grep cks-psp
5s          Warning   FailedCreate        replicaset/cks-psp-79fc878f85   Error creating: pods "cks-psp-79fc878f85-" is forbidden: PodSecurityPolicy: unable to admit pod: [spec.containers[0].securityContext.privileged: Invalid value: true: Privileged containers are not allowed]
46s         Normal    ScalingReplicaSet   deployment/cks-psp              Scaled up replica set cks-psp-79fc878f85 to 3

Let’s do a bit more exercise and try to comply with our podsecuritypolicies.

root@scw-k8s:~# k create deployment cks-psp-will-work --image=nginx:alpine --replicas=3 -oyaml --dry-run=client | sed -E 's/^(\s+- image.*)$/\1 \n        securityContext:\n          privileged: false/'  | k create -f -
deployment.apps/cks-psp-will-work created

root@scw-k8s:~# k get deploy
NAME                READY   UP-TO-DATE   AVAILABLE   AGE
cks-psp             0/3     0            0           2m46s
cks-psp-will-work   3/3     3            3           43s
Create a privileged pod
# is CKS simulator
k run prime --image=nginx:alpine --privileged=true --command -o yaml --dry-run=client -- sh -c 'apk add iptables && sleep 1d' | k create -f -
pod/prime created
Disable allowPrivilegeEscalation
controlplane $ k get deployments.apps logger -oyaml
apiVersion: apps/v1
kind: Deployment
metadata:
  annotations:
    deployment.kubernetes.io/revision: "2"
  creationTimestamp: "2022-06-03T09:26:23Z"
  generation: 2
  labels:
    app: logger
  name: logger
  namespace: default
  resourceVersion: "1580"
  uid: d46deaf9-c544-4062-8d84-becbfa2ad4ba
spec:
  progressDeadlineSeconds: 600
  replicas: 3
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: logger
  strategy:
    rollingUpdate:
      maxSurge: 25%
      maxUnavailable: 25%
    type: RollingUpdate
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: logger
    spec:
      containers:
      - command:
        - sh
        - -c
        - while true; do cat /proc/1/status | grep NoNewPrivs; sleep 1; done
        image: bash:5.0.18-alpine3.14
        imagePullPolicy: IfNotPresent
        name: httpd
        resources: {}
        securityContext:
          allowPrivilegeEscalation: false
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
      dnsPolicy: ClusterFirst
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      terminationGracePeriodSeconds: 0
status:
  availableReplicas: 3
  ...
  replicas: 3
  updatedReplicas: 3
author image

Jan Toth

I have been in DevOps related jobs for past 6 years dealing mainly with Kubernetes in AWS and on-premise as well. I spent quite a lot …

comments powered by Disqus