Kubernetes

Материал из Artem Aleksashkin's Wiki
Перейти к навигации Перейти к поиску

Installation

sudo apt-get update
sudo apt-get install -y apt-transport-https ca-certificates curl
sudo mkdir -p /etc/apt/keyrings
sudo curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.30/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
sudo echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.30/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
sudo apt-get update
sudo apt-get install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl

Шпаргалка по командам

minikube start --mount-string="$HOME/go/src/github.com/nginx:/data" --mount --driver=docker
minikube ip
kubectl cluster-info
kubectl create deployment nginx-depl --image=nginx
kubectl get pod
kubectl get replicaset
kubectl get pod <POD-ID> -o wide
kubectl get deployment nginx-depl
kubectl get deployment nginx-depl -o yaml
kubectl edit deployment nginx-depl
kubectl logs <POD-ID>
kubectl describe pod <POD-ID>
kubectl exec -it <POD-ID> -- /bin/bash
kubectl apply -f <CONFIG.YML>
kubectl get deployment nginx-deployment -o yaml
# https://kubernetes.github.io/ingress-nginx/troubleshooting/
kubectl get ingress -n mongo-admin
kubectl describe ing mongo-express-ingress -n mongo-admin
kubectl describe ingress mongo-express-ingress -n mongo-admin
kubectl rollout restart deployment <deployment_name> -n <namespace>

kubectl delete --all pods --namespace=kubernetes-dashboard

System reset

minikube stop
minikube delete
rm -rf ~/.minikube
rm -rf ~/.kube
docker kill $(docker ps -q)
docker rm $(docker ps -a -q)
docker rmi $(docker images -q)
docker system prune

Setup cluster

sudo su
swapoff -a
kubeadm reset
kubeadm init

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
sudo systemctl start docker
sudo swapoff -a
sudo mv /etc/containerd/config.toml  /etc/containerd/config222.toml
sudo kubeadm reset --force
sudo kubeadm init --pod-network-cidr=10.244.0.0/16
sudo mkdir -p /run/flannel
sudo nano /run/flannel/subnet.env

FLANNEL_NETWORK=10.244.0.0/16
FLANNEL_SUBNET=10.244.0.1/24
FLANNEL_MTU=1450
FLANNEL_IPMASQ=true

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

kubectl apply -f https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml

kubectl taint nodes --all node-role.kubernetes.io/control-plane-
kubectl taint nodes --all node.kubernetes.io/disk-pressure-
kubectl taint nodes --all node.kubernetes.io/not-ready-
kubectl taint nodes --all node.kubernetes.io/not-ready:NoSchedule-

-- FORCE SHUTDOWN POD
kubectl delete pod <PODNAME> --grace-period=0 --force --namespace <NAMESPACE>

Translate a Docker Compose File to Kubernetes Resources

Helm

https://helm.sh/docs/intro/install/

Clear space / Remove unused images

crictl rmi --prune
docker image prune -a

Ansible

Container Network Interface

Ingress

Complete Reset

kubeadm reset
systemctl stop kubelet
systemctl stop docker
rm -rf /etc/cni/net.d/
rm -rf /var/lib/cni/
rm -rf /var/lib/kubelet/*
rm -rf /run/flannel
rm -rf /etc/cni/
ip link set cni0 down
ip link delete cni0
brctl delbr cni0
ifconfig flannel.1 down
systemctl start docker

Troubleshooting

Check status

kubectl version

kubectl config view

apiVersion: v1
clusters:
- cluster:
    certificate-authority-data: DATA+OMITTED
    server: https://192.168.0.64:6443
  name: kubernetes
contexts:
- context:
    cluster: kubernetes
    user: kubernetes-admin
  name: kubernetes-admin@kubernetes
current-context: kubernetes-admin@kubernetes
kind: Config
preferences: {}
users:
- name: kubernetes-admin
  user:
    client-certificate-data: DATA+OMITTED
    client-key-data: DATA+OMITTED

kubectl cluster-info

Kubernetes control plane is running at https://192.168.0.64:6443
CoreDNS is running at https://192.168.0.64:6443/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy

To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'.

kubectl describe node -A

kubectl get pod -A

NAMESPACE      NAME                                       READY   STATUS    RESTARTS      AGE
kube-flannel   kube-flannel-ds-tt45j                      1/1     Running   5 (65m ago)   80m
kube-system    coredns-76f75df574-rzth2                   1/1     Running   2 (65m ago)   87m
kube-system    coredns-76f75df574-xvdl6                   1/1     Running   2 (65m ago)   87m
kube-system    etcd-x570-aorus-ultra                      1/1     Running   2 (65m ago)   87m
kube-system    kube-apiserver-x570-aorus-ultra            1/1     Running   2 (65m ago)   87m
kube-system    kube-controller-manager-x570-aorus-ultra   1/1     Running   9 (65m ago)   87m
kube-system    kube-proxy-lm95s                           1/1     Running   2 (65m ago)   87m
kube-system    kube-scheduler-x570-aorus-ultra            1/1     Running   9 (65m ago)   87m

failed to verify certificate: x509 on kubectl apply

error: error validating "./kube-flannel.yml": error validating data: failed to download openapi: Get "https://192.168.0.68:6443/openapi/v2?timeout=32s": tls: failed to verify certificate: x509: certificate signed by unknown authority (possibly because of "crypto/rsa: verification error" while trying to verify candidate authority certificate "kubernetes"); if you choose to ignore these errors, turn validation off with --validate=false

Update config

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

failed to create new CRI runtime service

root@aleksashkin:/home/artem# kubeadm init
[init] Using Kubernetes version: v1.32.3
[preflight] Running pre-flight checks
W0330 13:18:44.831017    2059 checks.go:1077] [preflight] WARNING: Couldn't create the interface used for talking to the container runtime: failed to create new CRI runtime service: validate service connection: validate CRI v1 runtime API for endpoint "unix:///var/run/containerd/containerd.sock": rpc error: code = Unimplemented desc = unknown service runtime.v1.RuntimeService
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action beforehand using 'kubeadm config images pull'
error execution phase preflight: [preflight] Some fatal errors occurred:
failed to create new CRI runtime service: validate service connection: validate CRI v1 runtime API for endpoint "unix:///var/run/containerd/containerd.sock": rpc error: code = Unimplemented desc = unknown service runtime.v1.RuntimeService[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
To see the stack trace of this error execute with --v=5 or higher

Check containerd status

sudo systemctl status containerd

Update containerd deaful config

sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml

Make sure that this line exists

[plugins."io.containerd.grpc.v1.cri"]

Restart conteinerd

sudo systemctl restart containerd

Do sudo kubeadm init again

CoreDNS does not run

Check node is ready

kubectl get node -A

If not check it's status and Taints: should be <none>

kubectl describe node x570-aorus-ultra

If not do

nano /var/lib/kubelet/config.yaml

and add at the end

evictionHard:
    memory.available:  "100Mi"
    nodefs.available:  "2%"
    nodefs.inodesFree: "2%"
    imagefs.available: "2%"

save and do

systemctl restart containerd
systemctl restart kubelet
kubectl taint nodes --all node-role.kubernetes.io/control-plane-
kubectl taint nodes --all node.kubernetes.io/disk-pressure-
kubectl taint nodes --all node.kubernetes.io/not-ready-
kubectl taint nodes --all node.kubernetes.io/not-ready:NoSchedule-

Flannel does not run

In logs

Failed to check br_netfilter: stat /proc/sys/net/bridge/bridge-nf-call-iptables: no such file or directory
modprobe br_netfilter
echo "br_netfilter" >> /etc/modules

API server/ETCD does not run

artem@aleksashkin:~$ sudo crictl ps -a
CONTAINER           IMAGE               CREATED             STATE               NAME                      ATTEMPT             POD ID              POD                                   NAMESPACE
447d32c38ea64       c69fa2e9cbf5f       3 minutes ago       Running             coredns                   0                   f6abd8a38b8a2       coredns-55cb58b774-b4ft6              kube-system
f4f6965d12742       c69fa2e9cbf5f       3 minutes ago       Running             coredns                   0                   46a1d99241c7b       coredns-55cb58b774-wj8gd              kube-system
b78d44e70ffbf       30ea53e259332       4 minutes ago       Running             kube-flannel              0                   fdd77524bf944       kube-flannel-ds-rpfj7                 kube-flannel
d4ed9229ebc5c       30ea53e259332       4 minutes ago       Exited              install-cni               0                   fdd77524bf944       kube-flannel-ds-rpfj7                 kube-flannel
d1556f62afa03       55ce2385d9d8c       4 minutes ago       Exited              install-cni-plugin        0                   fdd77524bf944       kube-flannel-ds-rpfj7                 kube-flannel
f125adef62e67       01045f200a885       5 minutes ago       Running             kube-proxy                0                   18d91cb7e514c       kube-proxy-l5tsh                      kube-system
c0acf84c6cd00       4db5a05c271ea       5 minutes ago       Running             kube-apiserver            99                  13c13ef3e033e       kube-apiserver-aleksashkin            kube-system
be088c2dc4b7a       de1025c2d4968       5 minutes ago       Running             kube-controller-manager   105                 1b97157f5c8f5       kube-controller-manager-aleksashkin   kube-system
cef7cecfb1bf5       2e96e5913fc06       5 minutes ago       Running             etcd                      100                 aea090a8bc0d8       etcd-aleksashkin                      kube-system
d41a184ac8f40       11492f0faf138       5 minutes ago       Running             kube-scheduler            109                 36e11181f4027       kube-scheduler-aleksashkin            kube-system

Смотрим

sudo crictl logs c0acf84c6cd00
sudo crictl logs cef7cecfb1bf5

Проверяем

root@aleksashkin:/home/artem# ETCDCTL_API=3 etcdctl --endpoints=127.0.0.1:2379 --cert=/etc/kubernetes/pki/etcd/peer.crt --key=/etc/kubernetes/pki/etcd/peer.key --cacert=/etc/kubernetes/pki/etcd/ca.crt endpoint health
127.0.0.1:2379 is healthy: successfully committed proposal: took = 8.38773ms

Проверить занятость портов

sudo netstat -tulnp | grep 2379

Конфиги лежат тут

artem@aleksashkin:~$ sudo ls -al /etc/kubernetes/manifests/
total 24
drwx------ 2 root root 4096 Mar 30 23:06 .
drwxr-xr-x 4 root root 4096 Mar 30 23:06 ..
-rw------- 1 root root 2404 Mar 30 23:06 etcd.yaml
-rw------- 1 root root 4033 Mar 30 23:06 kube-apiserver.yaml
-rw------- 1 root root 3544 Mar 30 23:06 kube-controller-manager.yaml
-rw------- 1 root root 1464 Mar 30 23:06 kube-scheduler.yaml

Single Node 127.0.0.1 config

sudo rm -rf /etc/kubernetes/pki/
sudo rm -f /etc/kubernetes/{admin.conf,kubelet.conf,controller-manager.conf,scheduler.conf,super-admin.conf}
sudo kubeadm init phase certs all --apiserver-cert-extra-sans "127.0.0.1" --control-plane-endpoint "127.0.0.1"
sudo kubeadm init phase kubeconfig all --control-plane-endpoint "127.0.0.1"
openssl x509 -in /etc/kubernetes/pki/apiserver.crt -text -noout | grep -A 10  "Subject Alternative Name"

systemctl restart containerd
systemctl restart kubelet

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config