Home | 简体中文 | 繁体中文 | 杂文 | Github | 知乎专栏 | Facebook | Linkedin | Youtube | 打赏(Donations) | About
知乎专栏

108.7. Pod 管理

Pod 状态说明

Pod 状态:

Pod 错误的详细的说明

		
状态						描述
CrashLoopBackOff		容器退出,kubelet正在将它重启
InvalidImageName		无法解析镜像名称
ImageInspectError		无法校验镜像
ErrImageNeverPull		策略禁止拉取镜像
ImagePullBackOff		正在重试拉取
RegistryUnavailable		连接不到镜像中心
ErrImagePull			通用的拉取镜像出错
CreateContainerConfigError	不能创建kubelet使用的容器配置
CreateContainerError	创建容器失败
m.internalLifecycle.PreStartContainer	执行hook报错
RunContainerError		启动容器失败
PostStartHookError		执行hook报错
ContainersNotInitialized	容器没有初始化完毕
ContainersNotRead		容器没有准备完毕
ContainerCreating		容器创建中
PodInitializing	pod 	初始化中
DockerDaemonNotReady	docker还没有完全启动
NetworkPluginNotReady	网络插件还没有完全启动		
		
			

108.7.1. 查看 POD 状态

			
kubectl get pod <pod-name> -o wide		
kubectl get pods --all-namespaces			
			
				

查看默认命名空间下的 pod

			 
[root@localhost ~]# kubectl get pod
NAME                              READY   STATUS    RESTARTS   AGE
hello-minikube-5c856cbf98-6vfvp   1/1     Running   0          6m59s
			
				

查看所有命名空间下的 Pod

						
[root@localhost ~]# kubectl get pods --all-namespaces
NAMESPACE     NAME                                   READY   STATUS    RESTARTS   AGE
default       hello-minikube-5c856cbf98-6vfvp        1/1     Running   1          4d18h
kube-system   coredns-86c58d9df4-2rfqf               1/1     Running   51         4d18h
kube-system   coredns-86c58d9df4-wkb7l               1/1     Running   49         4d18h
kube-system   etcd-minikube                          1/1     Running   12         4d18h
kube-system   kube-addon-manager-minikube            1/1     Running   11         4d18h
kube-system   kube-apiserver-minikube                1/1     Running   74         4d18h
kube-system   kube-controller-manager-minikube       1/1     Running   31         4d18h
kube-system   kube-proxy-brrdd                       1/1     Running   1          4d18h
kube-system   kube-scheduler-minikube                1/1     Running   31         4d18h
kube-system   kubernetes-dashboard-ccc79bfc9-dxcq2   1/1     Running   7          4d17h
kube-system   storage-provisioner                    1/1     Running   2          4d18h		
		
				
			
iMac:~ neo$ kubectl get pods --output=wide
NAME                        READY   STATUS             RESTARTS   AGE   IP           NODE       NOMINATED NODE   READINESS GATES
registry-65854b565b-bkhvq   0/1     ImagePullBackOff   0          18m   172.17.0.4   minikube   <none>           <none>
			
			
				

查看pod标签

			
kubectl get pods --show-labels			
			
				

查看指定标签的pod

			
kubectl get pods -l run=nginx			
			
				

指定命名空间

		
[root@localhost ~]# kubectl get pod --namespace=kube-system
NAME                                   READY   STATUS    RESTARTS   AGE
coredns-86c58d9df4-2rfqf               1/1     Running   0          40m
coredns-86c58d9df4-wkb7l               1/1     Running   0          40m
etcd-minikube                          1/1     Running   0          40m
kube-addon-manager-minikube            1/1     Running   0          41m
kube-apiserver-minikube                1/1     Running   2          40m
kube-controller-manager-minikube       1/1     Running   6          40m
kube-proxy-brrdd                       1/1     Running   0          40m
kube-scheduler-minikube                1/1     Running   5          41m
kubernetes-dashboard-ccc79bfc9-dxcq2   1/1     Running   5          16m
storage-provisioner                    1/1     Running   0          39m		
		
				
格式化输出
			
neo@Netkiller-iMac ~> kubectl get pods -l app=nacos -o jsonpath='{.items[0].metadata.name}'
nacos-0⏎   			
			
					
查看 pod 下面容器

			
root@logging ~# kubectl --kubeconfig=/home/prod/.kube/config -n netkiller get pod neo-6787cfcb9-8s8pp -o jsonpath="{.spec.containers[*].name}"
filebeat neo  
			
					

108.7.2. 运行 POD

			
iMac:kubernetes neo$ kubectl run registry --image=registry:latest			
			
				

			
kubectl run busybox --image=busybox --command -- ping www.netkiller.cn			
			
				

			
kubectl run nginx --replicas=3 --labels="app=example" --image=nginx:latest --port=80			
			
				

			
kubectl run busybox --rm=true --image=busybox --restart=Never -it			
			
				

通过 Yaml 文件运行 Pod

		
apiVersion: v1
kind: Pod
metadata:
  name: counter
spec:
  containers:
  - name: count
    image: busybox
    args: [/bin/sh, -c, 'i=0; while true; do echo "$i: $(date)"; i=$((i+1)); sleep 1; done']		
		
				

创建 pod

		
iMac:kubernetes neo$ kubectl create -f pod.yaml 
pod/counter created

iMac:kubernetes neo$ kubectl logs counter
0: Sun Oct  4 12:32:44 UTC 2020
1: Sun Oct  4 12:32:45 UTC 2020
2: Sun Oct  4 12:32:46 UTC 2020
3: Sun Oct  4 12:32:47 UTC 2020
4: Sun Oct  4 12:32:48 UTC 2020
5: Sun Oct  4 12:32:49 UTC 2020
6: Sun Oct  4 12:32:50 UTC 2020
7: Sun Oct  4 12:32:51 UTC 2020
8: Sun Oct  4 12:32:52 UTC 2020
9: Sun Oct  4 12:32:53 UTC 2020
		
				

108.7.3. 删除 pod

			
kubectl delete -n default pod registry	
kubectl delete -n default pod counter			
			
				

108.7.4. 查看 Pod 的事件

		
kubectl describe pod <pod-name> 		
		
				
		
iMac:~ neo$ kubectl describe pod springboot
Name:         springboot
Namespace:    default
Priority:     0
Node:         minikube/192.168.64.2
Start Time:   Mon, 21 Sep 2020 16:17:03 +0800
Labels:       run=springboot
Annotations:  <none>
Status:       Pending
IP:           
IPs:          <none>
Containers:
  springboot:
    Container ID:   
    Image:          127.0.0.1:5000/netkiller/config:latest
    Image ID:       
    Port:           8888/TCP
    Host Port:      0/TCP
    State:          Waiting
      Reason:       ContainerCreating
    Ready:          False
    Restart Count:  0
    Environment:    <none>
    Mounts:
      /var/run/secrets/kubernetes.io/serviceaccount from default-token-fhfn8 (ro)
Conditions:
  Type              Status
  Initialized       True 
  Ready             False 
  ContainersReady   False 
  PodScheduled      True 
Volumes:
  default-token-fhfn8:
    Type:        Secret (a volume populated by a Secret)
    SecretName:  default-token-fhfn8
    Optional:    false
QoS Class:       BestEffort
Node-Selectors:  <none>
Tolerations:     node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                 node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
  Type    Reason     Age   From               Message
  ----    ------     ----  ----               -------
  Normal  Scheduled  80s   default-scheduler  Successfully assigned default/springboot to minikube
  Normal  Pulling    79s   kubelet            Pulling image "127.0.0.1:5000/netkiller/config:latest"		
		
				

108.7.5. Taint(污点)和 Toleration(容忍)

其目的是分配 pod 在集群间的调度,Taint 和 toleration 相互配合,可以用来避免 pod 被分配到某个节点上。这跟节点亲和性作用相反。

给 node 节点设置 label,通过给 pod 设置 nodeSelector 将 pod 调度到匹配标签的节点上。

如果设置 toleration 应用于 pod 上,则表示 pod 可以被调度到 taint 的节点上。

Taint(污点)设置

设置污点: kubectl taint node [node] key=value:[effect]

effect 参数

  1. NoSchedule :不能被调度。
  2. PreferNoSchedule:尽量不要调度。
  3. NoExecute:不允许该节点有 Pod。

在 shenzhen 节点上设置Taint,键为key,值为value,effect是NoSchedule。

				
kubectl taint nodes shenzhen key=value:NoSchedule
				
					

这意味着除非pod只有明确声明toleration可以容忍这个Taint,否则就不会被调度到该节点。

				
apiVersion: v1
kind: Pod
metadata:
  name: pod-taints
spec:
  tolerations:
  - key: "key"
    operator: "Equal"
    value: "value"
    effect: "NoSchedule"
  containers:
    - name: pod-taints
      image: busybox:latest				
				
					
Toleration(容忍)调度

key 存在即可匹配

				
spec:
  tolerations:
  - key: "key"
    operator: "Exists"
    effect: "NoSchedule"				
				
					

key 必须存在,并且值等 value

				
spec:
  tolerations:
  - key: "key"
    operator: "Equal"
    value: "value"
    effect: "NoSchedule"				
				
					

在pod上设置多个toleration:

				
spec:				
  tolerations:
  - key: "key1"
    operator: "Equal"
    value: "value1"
    effect: "NoSchedule"
  - key: "key2"
    operator: "Equal"
    value: "value2"
    effect: "NoExecute"				
				
					

如果给node加上Taint effect=NoExecute的,该节点上的没有设置toleration的pod都会被立刻驱逐,设置 tolerationSeconds 后会给 Pod 一个宽限期。

				
spec:		
  tolerations:
  - key: "key"
    operator: "Equal"
    value: "value"
    effect: "NoSchedule"
    tolerationSeconds: 3600
				
					
使用场景

例如有些节点上挂了SSD,给redis,mongodb,mysql 使用,有些节点上安装了显卡GPU。就可以使用 taint

				
kubectl taint nodes shenzhen special=true:NoSchedule
kubectl taint nodes guangdong special=true:PreferNoSchedule				
				
					

108.7.6. 镜像拉取策略

imagePullPolicy: Always 总是拉取

imagePullPolicy: IfNotPresent 默认值,本地有则使用本地镜像,不拉取

imagePullPolicy: Never 只使用本地镜像,从不拉取

108.7.7. 指定主机名

			
apiVersion: v1
kind: Pod
metadata:
  name: hostaliases-pod
spec:
  restartPolicy: Never
  hostAliases:
  - ip: "127.0.0.1"
    hostnames:
    - "foo.local"
    - "bar.local"
  - ip: "10.1.2.3"
    hostnames:
    - "foo.remote"
    - "bar.remote"
  containers:
  - name: cat-hosts
    image: busybox
    command:
    - cat
    args:
    - "/etc/hosts"			
			
					

108.7.8. 环境变量

			
apiVersion: v1
kind: Pod
metadata:
  name: envars-fieldref
spec:
  containers:
    - name: test-container
      image: k8s.gcr.io/busybox
      command: [ "sh", "-c"]
      args:
      - while true; do
          echo -en '\n';
          printenv NODE_NAME POD_NAME POD_NAMESPACE;
          printenv POD_IP POD_SERVICE_ACCOUNT;
          sleep 10;
        done;
      env:
        - name: NODE_NAME
          valueFrom:
            fieldRef:
              fieldPath: spec.nodeName
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: POD_IP
          valueFrom:
            fieldRef:
              fieldPath: status.podIP
        - name: POD_SERVICE_ACCOUNT
          valueFrom:
            fieldRef:
              fieldPath: spec.serviceAccountName
  restartPolicy: Never			
			
					

			
apiVersion: v1
kind: Pod
metadata:
  name: envars-resourcefieldref
spec:
  containers:
    - name: test-container
      image: k8s.gcr.io/busybox:1.24
      command: [ "sh", "-c"]
      args:
      - while true; do
          echo -en '\n';
          printenv CPU_REQUEST CPU_LIMIT;
          printenv MEM_REQUEST MEM_LIMIT;
          sleep 10;
        done;
      resources:
        requests:
          memory: "32Mi"
          cpu: "125m"
        limits:
          memory: "64Mi"
          cpu: "250m"
      env:
        - name: CPU_REQUEST
          valueFrom:
            resourceFieldRef:
              containerName: test-container
              resource: requests.cpu
        - name: CPU_LIMIT
          valueFrom:
            resourceFieldRef:
              containerName: test-container
              resource: limits.cpu
        - name: MEM_REQUEST
          valueFrom:
            resourceFieldRef:
              containerName: test-container
              resource: requests.memory
        - name: MEM_LIMIT
          valueFrom:
            resourceFieldRef:
              containerName: test-container
              resource: limits.memory
  restartPolicy: Never			
			
					

108.7.9. 健康状态检查

readinessProbe (就绪探测)

就绪探针检查容器是否能够正常对外提供服务

				
        readinessProbe: 
          exec:
            command:
            - cat
            - /tmp/healthy
          initialDelaySeconds: 10         #10s之后开始第一次探测
          periodSeconds: 5                #第一次探测之后每隔5s探测一次			
				
				
livenessProbe (存活探测)

检测容器中的应用是否健康,然后将检查结果和重启策略restartPolicy来对Pod进行重启

命令方式

				 
apiVersion: v1
kind: Pod
metadata:
  name: nginx-health
spec:
  containers:
  - name: nginx-liveness
    image: nginx:latest
    command:
    - /bin/sh
    - -c
    - /usr/sbin/nginx; sleep 60; rm -rf /run/nginx.pid
    livenessProbe:
      exec:
        command: [ "/bin/sh", "-c", "test", "-e", "/run/nginx.pid" ]
  restartPolicy: Always				
				
				

TCP 方式

				 
apiVersion: v1
kind: Pod
metadata:
  name: nginx-health
spec:
  containers:
  - name: nginx-liveness
    image: nginx:latest
    command:
    - /bin/sh
    - -c
    - /usr/sbin/nginx; sleep 60; rm -rf /run/nginx.pid
    livenessProbe:
      tcpSocket:
        port: 80
  restartPolicy: Always				
				
				

108.7.10. securityContext

sysctls

				
kubelet --allowed-unsafe-sysctls \
  'kernel.msg*,net.core.somaxconn' ...				
				
						
				
apiVersion: v1
kind: Pod
metadata:
  name: sysctl-example
spec:
  securityContext:
    sysctls:
    - name: kernel.shm_rmid_forced
      value: "0"
    - name: net.core.somaxconn
      value: "1024"
    - name: kernel.msgmax
      value: "65536"				
				
						
runAsUser

allowPrivilegeEscalation 表示是否继承父进程权限,runAsUser 表示使用 UID 1000 的用户运行

				
apiVersion: v1
kind: Pod
metadata:
  name: security-context-demo
spec:
  securityContext:
    runAsUser: 1000
  containers:
  - name: sec-ctx-demo
    image: busybox:latest
    securityContext:
      runAsUser: 1000
      allowPrivilegeEscalation: false				
				
						
				
   spec:
     securityContext:
        runAsUser: 1000
        fsGroup: 2000
        runAsNonRoot: true				
				
						
security.alpha.kubernetes.io/sysctls

security.alpha.kubernetes.io/sysctls

			
apiVersion: v1
kind: Pod
metadata:
  name: sysctl-example
  annotations:
    security.alpha.kubernetes.io/sysctls: kernel.shm_rmid_forced=1
spec:			
			
						

unsafe-sysctls

			
apiVersion: v1
kind: Pod
metadata:
  name: sysctl-example
  annotations:
    security.alpha.kubernetes.io/unsafe-sysctls: net.core.somaxconn=65535                 #使用unsafe sysctl,设置最大连接数
spec:
  securityContext:
    privileged: true                                                                      #开启privileged权限			
			
						

108.7.11. nodeName 选择节点

首先查看节点名称

			
[root@master ~]# kubectl get node
NAME      STATUS   ROLES                  AGE     VERSION
agent-1   Ready    <none>                 2d13h   v1.24.4+k3s1
master    Ready    control-plane,master   2d13h   v1.24.4+k3s1
agent-2   Ready    <none>                 13h     v1.24.4+k3s1			
			
					

使用 nodeName: master 选择节点

			
metadata:
  name: redis
  labels:
    app: redis
spec:
  replicas: 1
  serviceName: redis
  selector:
    matchLabels:
      app: redis
  template:
    metadata:
      labels:
        app: redis
    spec:
      containers:
        - name: redis
          image: redis:latest
          ports:
            - containerPort: 6379
          volumeMounts:
            - name: data
              mountPath: /data
            - name: config
              mountPath: /usr/local/etc/redis.conf
              subPath: redis.conf
          livenessProbe:
            tcpSocket:
              port: 6379
            initialDelaySeconds: 60
            failureThreshold: 3
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          readinessProbe:
            tcpSocket:
              port: 6379
            initialDelaySeconds: 5
            failureThreshold: 3
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: redis
        - name: config
          configMap:
            name: redis
      nodeName: master
  volumeClaimTemplates:
    - metadata:
        name: data
      spec:
        accessModes:
          - ReadWriteOnce
        storageClassName: longhorn
        resources:
          requests:
            storage: 2Gi
apiVersion: apps/v1
kind: StatefulSet			
			
					

108.7.12. nodeSelector 选择节点

首先给节点打标签,例如 disk-type=ssd

			
[root@master ~]# kubectl label nodes agent-1 disk-type=ssd
node/agent-1 labeled			
			
					

查看标签

			
[root@master ~]# kubectl get node --show-labels
NAME         STATUS   ROLES    AGE   VERSION   LABELS
master   Ready    master   42d   v1.17.4   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=master,kubernetes.io/os=linux,node-role.kubernetes.io/master=
agent-1   Ready    <none>   42d   v1.17.4   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,disk-type=ssd,kubernetes.io/arch=amd64,kubernetes.io/hostname=agent-1,kubernetes.io/os=linux
agent-2   Ready    <none>   42d   v1.17.4   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=agent-2,kubernetes.io/os=linux			
			
					

			
apiVersion: apps/v1
kind: Deployment
metadata:
  name: busybox
  labels:
    app: busybox
spec:
  replicas: 5
  selector:
    matchLabels:
      app: busybox
  template:
    metadata:
      labels:
        app: busybox
    spec:
      containers:
      - name: busybox
        image: busybox
        imagePullPolicy: IfNotPresent
        ports:
          - containerPort: 80
      # 指定标签节点
      nodeSelector:
        disk-type: ssd			
			
					

删除标签

			
[root@master ~]# kubectl label nodes agent-1 disk-type-
node/agent-1 unlabeled		
			
					

108.7.13. nodeAffinity 选择节点

			
nodeAffinity可对应的两种策略:
preferredDuringScheduling(IgnoredDuringExecution / RequiredDuringExecution) 软策略
requiredDuringScheduling(IgnoredDuringExecution / RequiredDuringExecution) 硬策略

operator 表达式
In: label的值在某个列表中
NotIn:label的值不在某个列表中
Exists:某个label存在
DoesNotExist:某个label不存在
Gt:label的值大于某个值(字符串比较)
Lt:label的值小于某个值(字符串比较)
			
					

108.7.14. Taint(污点)和 Toleration(容忍)

			
			
			
					

108.7.15. strategy

滚动升级策略:

超过期望的Pod数量:1

不可用Pod最大数量:0

			
  strategy:
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
    type: RollingUpdate
			
					
			
  strategy:
      type: RollingUpdate
      rollingUpdate: {
        maxUnavailable: 25%
        maxSurge: 25%