post image :date_long | 3 min Read

CKS container runtimes

# go inside of a container and call
root@scw-k8s:~# k exec -it pod -- sh
/ # uname -r
5.4.0-96-generic

# step outside of a container and call
root@scw-k8s:~# strace uname -n
execve("/bin/uname", ["uname", "-n"], 0x7fff04c24c98 /* 24 vars */) = 0
brk(NULL)                               = 0x55bfd0cf3000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=23538, ...}) = 0
mmap(NULL, 23538, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f6e56b83000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2030928, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f6e56b81000
mmap(NULL, 4131552, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f6e5656f000
mprotect(0x7f6e56756000, 2097152, PROT_NONE) = 0
mmap(0x7f6e56956000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7f6e56956000
mmap(0x7f6e5695c000, 15072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f6e5695c000
close(3)                                = 0
arch_prctl(ARCH_SET_FS, 0x7f6e56b82540) = 0
mprotect(0x7f6e56956000, 16384, PROT_READ) = 0
mprotect(0x55bfced96000, 4096, PROT_READ) = 0
mprotect(0x7f6e56b89000, 4096, PROT_READ) = 0
...
Katacontainers

Katacontainers create a very lightweight VM with a separate kernel (quite different than a traditional containers) Strong separation layer! Using QEUM as default (needs virtualization, like neasted virtualisztion)

gVisor
  • user-space kernel for containers (might be confusing)
  • adds another layer of separation
  • not hypervisor or VM based
  • simulates kernel syscalls with limited functionality (in golang)
  • runs in a user space separated from a linux kernel
  • runtime is called runsc

#!/usr/bin/env bash
# IF THIS FAILS then you can try to change the URL= further down from specific to the latest release
# https://gvisor.dev/docs/user_guide/install


# gvisor
sudo apt-get update && \
sudo apt-get install -y \
    apt-transport-https \
    ca-certificates \
    curl \
    gnupg-agent \
    software-properties-common


# install from web
(
  set -e
  ARCH=$(uname -m)
  URL=https://storage.googleapis.com/gvisor/releases/release/20210806/${ARCH}
  # URL=https://storage.googleapis.com/gvisor/releases/release/latest/${ARCH} # TRY THIS URL INSTEAD IF THE SCRIPT DOESNT WORK FOR YOU
  wget ${URL}/runsc ${URL}/runsc.sha512 \
    ${URL}/containerd-shim-runsc-v1 ${URL}/containerd-shim-runsc-v1.sha512
  sha512sum -c runsc.sha512 \
    -c containerd-shim-runsc-v1.sha512
  rm -f *.sha512
  chmod a+rx runsc containerd-shim-runsc-v1
  sudo mv runsc containerd-shim-runsc-v1 /usr/local/bin
)


# containerd enable runsc
cat > /etc/containerd/config.toml <<EOF
disabled_plugins = []
imports = []
oom_score = 0
plugin_dir = ""
required_plugins = []
root = "/var/lib/containerd"
state = "/run/containerd"
version = 2

[plugins]
  [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runsc]
    runtime_type = "io.containerd.runsc.v1"

  [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
    [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
      base_runtime_spec = ""
      container_annotations = []
      pod_annotations = []
      privileged_without_host_devices = false
      runtime_engine = ""
      runtime_root = ""
      runtime_type = "io.containerd.runc.v2"

      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
        BinaryName = ""
        CriuImagePath = ""
        CriuPath = ""
        CriuWorkPath = ""
        IoGid = 0
        IoUid = 0
        NoNewKeyring = false
        NoPivotRoot = false
        Root = ""
        ShimCgroup = ""
        SystemdCgroup = true
EOF

systemctl restart containerd

Then you can create a runtimeClass and pod using that particular runtimeClass

apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: gvisor
handler: runsc

Create a pod itself

---
apiVersion: v1
kind: Pod
metadata:
  labels:
    run: gvisor
  name: gvisor
spec:
  runtimeClassName: gvisor
  containers:
    - image: nginx
      name: gvisor
      resources: {}
  dnsPolicy: ClusterFirst
  restartPolicy: Always
author image

Jan Toth

I have been in DevOps related jobs for past 6 years dealing mainly with Kubernetes in AWS and on-premise as well. I spent quite a lot …

comments powered by Disqus