Administrator
发布于 2025-01-06 / 9 阅读
0
0

ubuntu搭建k8s集群

服务器资源

名称

IP

CPU

内存

磁盘

k8smaster

192.168.1.210

-

8G

40G

k8snode1

192.168.1.211

-

32G

100G

-

-

-

-

-

系统及版本:ubuntu-22.04.2

注:开发环境暂时不考虑高可用,如有需要请部署:keepalived、haproxy、k8smaster2

部署前准备

设置host名称

# 设置主机名
## master节点:
hostnamectl set-hostname k8smaste
## node节点
hostnamectl set-hostname k8snode1
​
# 刷新
bash
​
# 验证(示例)
root@k8smaster:~$ uname -a
Linux k8smaster 5.15.0-83-generic #92-Ubuntu SMP Mon Aug 14 09:30:42 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux

修改hosts文件

vim /etc/hosts
​
# 文件末尾追加如下内容:
192.168.1.210 k8smaster
192.168.1.211 k8snode1
​
# 验证
ping k8smaster
ping k8snode1

更改apt源

cd /etc/apt
​
# 备份
cp sources.list{,.bak}
​
# 编辑sources.list文件
vim sources.list
# 替换源的地址
:%s#http://cn.archive.ubuntu.com#https://mirrors.aliyun.com#g
​
# 刷新
apt-get update

关闭防火墙及交换分区

# 关闭selinux
apt install -y selinux-utils
# 临时关闭
setenforce 0
# 永久关闭
​
​
# 关闭防火墙
systemctl stop ufw.service
​
# 永久关闭防火墙
systemctl disable ufw.service
​
​
# 关闭交换分区
swapoff -a
​
# 永久关闭交换分区(/etc/fstab注释掉swap那一行)
vim /etc/fstab 
## 或 
sed -i '/swap/s/^\(.*\)$/#\1/g' /etc/fstab
# /swap.img     none    swap    sw      0       0
​
#查看是否关闭swap
free -m
​
               total        used        free      shared  buff/cache   available
Mem:           32092         348       30030           4        1713       31342
Swap:              0           0           0
# Swap都是0,说明交换分区已关闭

设置时区及时间同步

apt install -y timedatactl
# 查看时区信息
timedatectl
timedatectl set-timezone Asia/Shanghai
# 查看当前时间
date +"%Y-%m-%d %H:%M:%S"
​
# 时间同步
apt install -y chrony
systemctl restart chrony
systemctl status chrony
chronyc sources

修改内核参数

cat > /etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
​
sysctl -p /etc/sysctl.d/k8s.conf
或
sysctl --system
​
# 验证
sysctl -a | grep ip_forward

安装ipvsadm

apt install -y ipset ipvsadm
​
cat > /etc/modules-load.d/ipvs.conf <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_lc
modprobe -- ip_vs_lblc
modprobe -- ip_vs_lblcr
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- ip_vs_dh
modprobe -- ip_vs_fo
modprobe -- ip_vs_nq
modprobe -- ip_vs_sed
modprobe -- ip_vs_ftp
modprobe -- ip_vs_sh
modprobe -- ip_tables
modprobe -- ip_set
modprobe -- ipt_set
modprobe -- ipt_rpfilter
modprobe -- ipt_REJECT
modprobe -- ipip
modprobe -- xt_set
modprobe -- br_netfilter
modprobe -- nf_conntrack
EOF
​
# 添加可执行权限
chmod 755 /etc/modules-load.d/ipvs.conf
​
# 生效
​
bash /etc/modules-load.d/ipvs.conf
# 验证
lsmod | grep ip_vs
​
# 拷贝至 /etc/profile.d目录,永久生效
cp /etc/modules-load.d/ipvs.conf /etc/profile.d/ipvs.modules.sh

安装Docker

# 安装docker所需的工具(安装最新版即可)
apt-get update
apt-get install -y docker.io
# 设置开机启动并启动docker  
systemctl start docker
systemctl enable docker
# 验证
docker version 或 docker info

注:非必要,v1.24版本之后已经剔除 docker容器

设置cgroup

vim /etc/docker/daemon.json
{
    "exec-opts": ["native.cgroupdriver=systemd"]
}
​
cat > /var/lib/kubelet/config.yaml <<EOF
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
EOF
​
systemctl daemon-reload
systemctl restart docker

注:非必要,v1.24版本之后已经剔除 docker容器

部署k8s

添加源及安装k8s组件

# 安装基础环境
apt-get install -y ca-certificates curl software-properties-common apt-transport-https curl
curl -s https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | sudo apt-key add -
# 执行配置k8s阿里云源  
vim /etc/apt/sources.list.d/kubernetes.list
#加入以下内容
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
# 执行更新
apt-get update -y
# 安装kubeadm、kubectl、kubelet  
apt-get install -y kubelet kubeadm kubectl
# 阻止自动更新(apt upgrade时忽略)。所以更新的时候先unhold,更新完再hold。
apt-mark hold kubelet kubeadm kubectl

安装容器运行时 containerd

apt install -y containerd
mkdir /etc/containerd
containerd config default > /etc/containerd/config.toml
​
# 修改配置 /etc/containerd/config.toml
vim /etc/containerd/config.toml
# sandbox_image 3.8 -> 3.9
sandbox_image = "registry.k8s.io/pause:3.9"
# SystemdCgroup = false -> true
SystemdCgroup = true
​
# 立即启动 及 开机启动
systemctl enable --now containerd
​
# 查看镜像
crictl -r unix:///run/containerd/containerd.sock image

初始化k8s集群

# 初始化master节点
kubeadm init \
--apiserver-advertise-address=192.168.1.210 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.28.2 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=all \
--cri-socket unix:///run/containerd/containerd.sock
​
# 如果初始化失败,出现信息 initial timeout of 40s passed.
​
# 查看错误信息
systemctl status kubelet
journalctl -xeu kubelet
​
############极有可能因网络原因导致镜像pull失败,采用下面的方式部署################
​
# 查看kubeadm config所需的镜像
kubeadm config images list --kubernetes-version v1.28.2
kube-apiserver:v1.28.1
kube-controller-manager:v1.28.1
kube-scheduler:v1.28.1
kube-proxy:v1.28.1
pause:3.9
etcd:3.5.9-0
coredns:v1.10.1
​
# 生成默认的初始化文件
kubeadm config print init-defaults > k8s-init.yaml 
​
# 修改k8s-init.yaml 
vim k8s-init.yaml 
​
## 修改
advertiseAddress: 192.168.1.210
nodeRegistration.name: k8smaster
networking.podSubnet: 10.244.0.0/16
dnsDomain.cluster: cluster.local
## 新增
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
cgroupDriver: systemd
​
# 手动pull镜像
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-apiserver:v1.28.2
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-controller-manager:v1.28.2
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-scheduler:v1.28.2
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-proxy:v1.28.2
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/pause:3.9
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/etcd:3.5.9-0
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/coredns:v1.10.1
​
# 查看镜像
crictl -r unix:///run/containerd/containerd.sock image
​
# tag镜像
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/kube-apiserver:v1.28.2 registry.k8s.io/kube-apiserver:v1.28.2
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/kube-controller-manager:v1.28.2 registry.k8s.io/kube-controller-manager:v1.28.2
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/kube-scheduler:v1.28.2 registry.k8s.io/kube-scheduler:v1.28.2
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/kube-proxy:v1.28.2 registry.k8s.io/kube-proxy:v1.28.2
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/pause:3.9 registry.k8s.io/pause:3.9
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/etcd:3.5.9-0 registry.k8s.io/etcd:3.5.9-0
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/coredns:v1.10.1 registry.k8s.io/coredns/coredns:v1.10.1
​
# 删除镜像
crictl -r unix:///run/containerd/containerd.sock rmi registry.cn-shanghai.aliyuncs.com/xxx:latest
​
# 导出镜像
ctr -n k8s.io image export xxx.tar docker.io/bitnami/xxx:latest
​
# 1. 查看机器上的镜像列表
crictl images ls
# 2.删除机器上没用使用的镜像
crictl rmi --prune
​
# 重置kubeadm
kubeadm reset

k8s-init.yaml 示例

kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.1.210
  bindPort: 6443
nodeRegistration:
  criSocket: unix:///var/run/containerd/containerd.sock
  imagePullPolicy: IfNotPresent
  name: k8smaster
  taints: null
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: registry.k8s.io
kind: ClusterConfiguration
kubernetesVersion: 1.28.2
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/12
  podSubnet: 10.244.0.0/16
scheduler: {}
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
cgroupDriver: systemd

注意:dnsDomain 一定要配置默认的 cluster.local,否则会导致dapr sidecar 连接失败

# 重新初始化
kubeadm init --config k8s-init.yaml --ignore-preflight-errors=all
​
Your Kubernetes control-plane has initialized successfully!
​
To start using your cluster, you need to run the following as a regular user:
​
  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config
​
Alternatively, if you are the root user, you can run:
​
  export KUBECONFIG=/etc/kubernetes/admin.conf
​
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/
​
Then you can join any number of worker nodes by running the following on each as root:
​
kubeadm join 192.168.1.210:6443 --token abcdef.0123456789abcdef \
    --discovery-token-ca-cert-hash sha256:0f598ac4471612dd56f61e1d36b26a73862016cd95ebe178e5c3239ef9a409e0
# --cri-socket=unix:///var/run/containerd/containerd.sock
## 出现以上信息说明master部署成功
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
​
# 若没记录到指令,可使用此指令产出
# 或工作节点join一直卡在 Running pre-flight checks
# 此时,需要检查之前的配置,或重新生成token
kubeadm token generate #生成token
# 5pv1xx.j52sjzl6bfljuzkp
# kubeadm token create {token} --print-join-command --ttl=0 #生成join信息
ubeadm token create 5pv1xx.j52sjzl6bfljuzkp --print-join-command
kubeadm join 192.168.1.210:6443 --token 5pv1xx.j52sjzl6bfljuzkp --discovery-token-ca-cert-hash sha256:0f598ac4471612dd56f61e1d36b26a73862016cd95ebe178e5c3239ef9a409e0
​
# 部署node节点:执行除 kubeadm init 前的所有命令 
​
root@k8smaster:~# kubectl get nodes
NAME        STATUS     ROLES           AGE     VERSION
k8smaster   NotReady   control-plane   19m     v1.28.2
k8snode1    NotReady   <none>          2m26s   v1.28.1
​
# 此时所有的node均处于NotReady状态,需要部署网络插件
## 此处以flannel插件为例(master节点执行即可)
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
​
## 如果因网络慢执行失败或超时,使用wget下载后执行
wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
kubectl apply -f kube-flannel.yml
​
## 打印信息
serviceaccount/flannel created
configmap/kube-flannel-cfg created
daemonset.apps/kube-flannel-ds created
​
## 再次查看节点状态(如果一直不行,就重启node节点)
root@k8smaster:~# kubectl get nodes
NAME        STATUS   ROLES           AGE   VERSION
k8smaster   Ready    control-plane   42m   v1.28.2
k8snode1    Ready    <none>          25m   v1.28.1
​
# 查看cpu
kubectl describe node |grep -E '((Name|Roles):\s{6,})|(\s+(memory|cpu)\s+[0-9]+\w{0,2}.+%\))'
Name:               k8smaster
Roles:              control-plane
  cpu                950m (95%)  0 (0%)
  memory             290Mi (3%)  340Mi (4%)
Name:               k8snode1
Roles:              <none>
  cpu                475m (3%)    500m (4%)
  memory             3028Mi (9%)  712Mi (2%)
Name:               k8snode2
Roles:              <none>
  cpu                300m (30%)  0 (0%)
  memory             340Mi (1%)  200Mi (0%)

验证集群可用性

# 创建一个nginx的pod
kubectl create deployment nginx --image=nginx
​
# 创建一个service
kubectl expose deployment nginx --port=80 --type=NodePort
​
# 查看pod详细信息,可见在node1节点已成功部署nginx
kubectl get pod -o wide
NAME                     READY   STATUS    RESTARTS   AGE    IP           NODE       NOMINATED NODE   READINESS GATES
nginx-7854ff8877-hf287   1/1     Running   0          3m4s   10.244.1.2   k8snode1   <none>           <none>
​
# 查看部署的信息
kubectl get pod,svc
NAME                 TYPE        CLUSTER-IP     EXTERNAL-IP   PORT(S)        AGE
service/kubernetes   ClusterIP   10.96.0.1      <none>        443/TCP        52m
service/nginx        NodePort    10.103.156.2   <none>        80:32400/TCP   4m3s
​
# 验证
curl http://10.103.156.2
# 或浏览器访问:
# http://192.168.1.210:32400 
# http://192.168.1.211:32400
​
<!DOCTYPE html>
<html>
<head>
    <title>Welcome to nginx!</title>
    <style>
        html { color-scheme: light dark; }
        body { width: 35em; margin: 0 auto;
        font-family: Tahoma, Verdana, Arial, sans-serif; }
    </style>
</head>
<body>
    ...
    <p><em>Thank you for using nginx.</em></p>
</body>
</html>

nginx-ingress

部署

# 下载 deploy.yaml
wget https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.8.2/deploy/static/provider/cloud/deploy.yaml
​
# 查看文件,发现3处需要下载镜像,
registry.k8s.io/ingress-nginx/controller:v1.8.2@sha256:74834d3d25b336b62cabeb8bf7f1d788706e2cf1cfd64022de4137ade8881ff2
registry.k8s.io/ingress-nginx/kube-webhook-certgen:v20230407@sha256:543c40fd093964bc9ab509d3e791f9989963021f1e9e4c9c7b6700b02bfb227b
registry.k8s.io/ingress-nginx/kube-webhook-certgen:v20230407@sha256:543c40fd093964bc9ab509d3e791f9989963021f1e9e4c9c7b6700b02bfb227b
​
# 由于网络原因,手动拉取镜像
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/nginx-ingress-controller:v1.8.2
Image is up to date for sha256:47fe3698318ff814d1feb11ed2abab82066f9b9f77964ae0d0daa81a9635271f
# 该镜像未找到,其他的版本也尝试都没有
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-webhook-certgen:v1.8.2
​
​
# 各种尝试不成功,降低版本 v1.1.1
waet https://raw.aithubusercontent.com/kubernetes/ingress-nginx/controller-v1.1.1/deploy/static/provider/cloud/deploy.yaml
​
去掉externalTrafficPolicy: Local
修改controller/deploy的image:image: bitnami/nginx-ingress-controller:1.1.1
修改ingress-nginx-admission-create的image:image: liangjw/kube-webhook-certgen:v1.1.1
修改ingress-nainx-admission-patch的imageimage: liangiw/kube-webhook-certgen:v1.1.1
​
crictl -r unix:///run/containerd/containerd.sock pull bitnami/nginx-ingress-controller:1.1.1
crictl -r unix:///run/containerd/containerd.sock pull liangjw/kube-webhook-certgen:v1.1.1
crictl -r unix:///run/containerd/containerd.sock pull liangjw/kube-webhook-certgen:v1.1.1
​
# 设置为默认的ingress
apiVersion: networking.k8s.io/v1
kind: IngressClass
metadata:
  annotations: 
    ingressclass.kubernetes.io/is-default-class: "true" 
​
# DaemonSet方式部署
# 参考:https://www.cnblogs.com/dannylinux/p/15813829.html
kind: DaemonSet
​
spec:
  template: 
    spec:
      hostNetwork: true
      dnsPolicy: ClusterFirstWithHostNet 
      nodeSelector:
        edgenode: 'true'
        
      containers: 
        - name: controller
          args:
            - --watch-ingress-without-class=true

示例

# 示例:nginx-ingress-example.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: nginx-ingress
  namespace: default
  annotations:
    nginx.ingress.kubernetes.io/force-ssl-redirect: "true" # 强制重定向到 HTTPS
    nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
spec:
  rules:
  - host: nginx.cluster.org
    http:
      paths:
      - path: /
        pathType: ImplementationSpecific
        backend:
          service:
            name: nginx
            port:
              number: 32400
              
kubectl apply -f nginx-ingress-example.yaml
curl http://nginx.cluster.org:32400/

Kubenetes-Dashboard

部署Kubenetes-Dashboard

wget https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml
​
# 部署 ingress-nginx
kubectl apply -f recommended.yaml
​
# 删除默认的service
kubectl delete service kubernetes-dashboard --namespace=kubernetes-dashboard
​
# 创建自定义service
vim dashboard-svc.yaml
kind: Service
apiVersion: v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
spec:
  type: NodePort
  ports:
    - port: 443
      targetPort: 8443
      nodePort: 30443
  selector:
    k8s-app: kubernetes-dashboard
    
kubectl apply -f dashboard-svc.yaml
​
​

创建dashboard-ingress

vim dashboard-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: dashboard-ingress
  namespace: kubernetes-dashboard
  annotations:
    nginx.ingress.kubernetes.io/force-ssl-redirect: "true" # 强制重定向到 HTTPS
    nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
spec:
  rules:
  - host: k8s-dashboard.cluster.org
    http:
      paths:
      - path: /
        pathType: ImplementationSpecific
        backend:
          service:
            name: kubernetes-dashboard
            port:
              number: 30443
              
kubectl apply -f dashboard-ingress.yaml
​
# 验证(浏览器访问请配置hosts)
curl https://k8s-dashboard.cluster.org:30443

生成dashboard-token

# 创建service-account
vim dashboard-sa.yam
apiVersion: v1
kind: ServiceAccount
metadata:
  name: admin-user
  namespace: kubernetes-dashboard
  
kubectl apply -f dashboard-sa.yam
  
# 绑定角色-------cluster-admin
vim dashboard-role-binding.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: admin-user
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: admin-user
  namespace: kubernetes-dashboard
  
kubectl apply -f dashboard-role-binding.yaml
​
# 短期token
kubectl create token admin-user -n kubernetes-dashboard
​
# 长期token
vim dashboard-token-secret.yaml
apiVersion: v1
kind: Secret
metadata:
  name: admin-user-token
  namespace: kubernetes-dashboard
  annotations:
    kubernetes.io/service-account.name: "admin-user"
type: kubernetes.io/service-account-token
​
# 创建长期token的secret
kubectl apply -f dashboard-token-secret.yaml
​
# 查看token信息
kubectl describe secret admin-user-token -n kubernetes-dashboard
​
eyJhbGciOiJSUzI1NiIsImtpZCI6Ikk2ZUw0c0tFT011NXJ4Zjg0cUdST0hZemw3SC1EVlJRcllUSmdTbUY4d1EifQ.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJrdWJlcm5ldGVzLWRhc2hib2FyZCIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VjcmV0Lm5hbWUiOiJhZG1pbi11c2VyLXRva2VuIiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZXJ2aWNlLWFjY291bnQubmFtZSI6ImFkbWluLXVzZXIiLCJrdWJlcm5ldGVzLmlvL3NlcnZpY2VhY2NvdW50L3NlcnZpY2UtYWNjb3VudC51aWQiOiI1MGY0ZmI5ZS04NzU0LTQ3ZWMtOGE0NC03MWI4MjY5Y2NkYzUiLCJzdWIiOiJzeXN0ZW06c2VydmljZWFjY291bnQ6a3ViZXJuZXRlcy1kYXNoYm9hcmQ6YWRtaW4tdXNlciJ9.SPXqk101QDtXoaNSLuUYQyTL509inyTRfe8bbZDNcxWM2c8yYYF2oV-HgF9Lyt0sZDGdR85ssBalBYXxrjnYjo3XOLPK2Pr-ulC3WC_eaKQbli0dWQpXBKgizn0XfeCSkw5I_JxwO8nlwqPd0-m_CE7DVdu8YX5gogqMXdIXr9XYDmdonRBbXXVKThVx4ufvsC3EpTp_1NRS-twuEFjWtKvO8CInL2LbUOSaMaBVXH41CQLt5BElnfbjjWVwNzWiQLCTaND2VVrfamtfiRU9ycW5OgjlNZSv15AEwbnWE4p2v3XFv1LPvyLUn9oOUwYY9yGovZt-zuUG5S_5zInzyg

部署NFS

# 参考:https://blog.csdn.net/lpfstudy/article/details/130038661
​
# 亦可如下方式部署
​
# NFS服务端设置:
# 1.安装NFS服务:
sudo apt-get update
sudo apt-get install nfs-kernel-server
​
# 2.创建共享目录并修改权限(以/var/nfs为例):
sudo mkdir /var/nfs
sudo chown nobody:nogroup /var/nfs
​
# 3.编辑/etc/exports文件,添加共享配置:
/var/nfs *(rw,sync,no_subtree_check)
​
# 4.启动NFS服务并导出共享目录:
sudo systemctl start nfs-kernel-server
sudo exportfs -rav
​
​
# NFS客户端设置:
# 1.安装NFS客户端:
sudo apt-get update
sudo apt-get install nfs-common
​
# 2.创建本地挂载点:
sudo mkdir /mnt/nfs
​
# 3.挂载远程NFS文件系统:
sudo mount server_ip:/var/nfs /mnt/nfs
#(将server_ip替换为NFS服务器的IP地址)
# 为了使挂载在启动时自动进行,可以将挂载命令添加到/etc/fstab文件:
server_ip:/var/nfs /mnt/nfs nfs defaults 0 0

部署ClassStorage

# 参考:https://zhuanlan.zhihu.com/p/655923057
​
# 部署 nfs-subdir-external-provisioner
helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/
helm repo update
helm install nfs-subdir-external-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner -n kube-system \
    --set image.repository=dyrnq/nfs-subdir-external-provisioner \
    --set nfs.server=192.168.1.211 \
    --set nfs.path=/home/nfs/nfs-storage
​
# 创建StorageClass
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: alibabacloud-cnfs-nas
provisioner: cluster.local/nfs-subdir-external-provisioner
parameters:
  # 设置为"false"时删除PVC不会保留数据,"true"则保留数据
  archiveOnDelete: "false"
mountOptions:
  # 指定NFS版本,这个需要根据NFS Server版本号设置
  - nfsvers=4
---
# 创建PVC
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: nfs-storage-pvc-1
  namespace: dev1
spec:
  storageClassName: alibabacloud-cnfs-nas    #需要与上面创建的storageclass的名称一致
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 10Mi
---
# 使用示例
kind: Pod
apiVersion: v1
metadata:
  name: nfs-storage-pod-1
  namespace: dev1
spec:
  containers:
    - name: nfs-storage-pod-1
      image: busybox
      command:
        - "/bin/sh"
      args:
        - "-c"
        - "touch /mnt/teststorage && echo 111 > /mnt/teststorage && exit 0 || exit 1"  ## 创建一个名称为"SUCCESS"的文件
      volumeMounts:
        - name: nfs-pvc
          mountPath: "/mnt"
  restartPolicy: "Never"
  volumes:
    - name: nfs-pvc
      persistentVolumeClaim:
        claimName: nfs-storage-pvc-1

注:不同应用最好使用不同StorageClass申请资源,否则可能会申请失败,导致pod运行失败

部署helm3

# 安装
# 参考:https://helm.sh/docs/intro/install/
curl https://baltocdn.com/helm/signing.asc | sudo apt-key add -
apt-get install -y apt-transport-https
echo "deb https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
apt-get update
apt-get install -y helm
​
# 验证
helm version
​
version.BuildInfo{Version:"v3.12.3", GitCommit:"3a31588ad33fe3b89af5a2a54ee1d25bfe6eaa5e", GitTreeState:"clean", GoVersion:"go1.20.7"}
​
# 查看helm帮助
helm -h
​
# 添加helm仓库
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo add aliyun https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts
helm repo update
helm search repo bitnami
​
# 示例(redis)
helm install redis bitnami/redis -n storage --set global.storageClass=alibabacloud-cnfs-nas --set global.redis.password=*******
​
# 示例(es、kibana)
kubectl create secret generic elastic-juzipwd -n dapr-monitoring --from-literal=elasticsearch-password=**********
helm install elasticsearch -f values.yaml -n dapr-monitoring oci://registry-1.docker.io/bitnamicharts/elasticsearch
​
# 示例(mysql)
helm install bitnami/mysql --generate-name
​
# 创建chats模板 
helm create my-app
# 部署本地服务
helm install my-app .
​
# 服务升级(-i 表示如果没有部署则会部署,如果部署了就会更新)
helm upgrade -i my-app dir_path --set image.imageName=my-app --set image.tag=latest

部署dapr

# 参考:https://docs.dapr.io/zh-hans/
​
# 安装
wget -q https://raw.githubusercontent.com/dapr/cli/master/install/install.sh -O - | /bin/bash
​
# 验证
dapr
​
# 初始化
dapr init -k
​
# 查看版本
dapr --version
​
# 查看实例
dapr list

部署etcd

helm install etcd oci://registry-1.docker.io/bitnamicharts/etcd -n <namespace> --set persistence.enabled=false,auth.rbac.rootPassword=ihnIwodNgG
​
etcdctl --endpoints=http://etcd-juzi.minikube.org:2379 --user=root --password=ihnIwodNgG get gitlab.com/echo/config/dev/data

部署cert-manager

# 添加仓库
helm repo add jetstack https://charts.jetstack.io
helm repo update
​
# 部署 cert-manager
helm upgrade --install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --set installCRDs=true

部署mqtt

# 添加仓库
helm repo add emqx https://repos.emqx.io/charts
helm repo update
    
# 部署 emqx-operator
helm install emqx-operator emqx/emqx-operator -n storage
​
# kubectl wait --for=condition=Ready pods -l "control-plane=controller-manager" -n storage
​
# 部署 emqx-enterprise
helm install emqx emqx/emqx-enterprise  -n storage
​
# 如果出现版本问题用以下脚本部署
# daployment.yaml
apiVersion: apps.emqx.io/v2beta1
kind: EMQX
metadata:
   name: emqx
   namespace: storage    # 命名空间
spec:
   image: emqx:5.1  # 镜像源 可更改为最新版本
​
# 查看service
kubectl get svc -n storage | grep emqx-emqx-enterprise
# 创建 emqx-ingress.yaml
vim emqx-ingress.yaml
​
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: emqx-ingress
  namespace: storage
  #annotations:
    #nginx.ingress.kubernetes.io/force-ssl-redirect: "true" # 强制重定向到 HTTPS
    #nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
spec:
  ingressClassName: nginx
  rules:
  - host: emqx.minikube.org
    http:
      paths:
      - path: /
        pathType: ImplementationSpecific
        backend:
          service:
            name: emqx-emqx-enterprise
            port:
              number: 18083
              
kubectl apply -f emqx-ingress.yaml
​
# 浏览器访问 http://emqx.minikube.org
# 用户名:admin
# 密码:public

部署rabbitmq

# 部署rabbitmq
helm install rabbitmq bitnami/rabbitmq -n storage --set auth.username=admin,auth.password=123456
helm install rabbitmq bitnami/rabbitmq -n storage --set global.storageClass=alibabacloud-cnfs-nas,auth.username=admin,auth.password=123456
内网连接点:rabbitmq.storage.svc.cluster.local 
端口:5672,管理后台端口15672
查看密码:
$(kubectl get secret --namespace storage rabbitmq -o jsonpath="{.data.rabbitmq-password}" | base64 -d)

部署Kafka

# 部署Kafka
helm install kafka bitnami/kafka --namespace storage --set kafkaVersion=3.4.0 --set replicaCount=3 --set global.storageClass=alibabacloud-cnfs-nas
​
# 客户端验证
kubectl run kafka-client --restart='Never' --image docker.io/bitnami/kafka:3.6.1-debian-11-r0 --namespace storage --command -- sleep infinity
​
# vim client.properties
security.protocol=SASL_PLAINTEXT
sasl.mechanism=PLAIN
sasl.jaas.config=org.apache.kafka.common.security.scram.ScramLoginModule required \
    username="user1" \
    password="$(kubectl get secret kafka-user-passwords --namespace storage -o jsonpath='{.data.client-passwords}' | base64 -d | cut -d , -f 1)";  
## mechanism: SCRAM-SHA-256 / PLAINT
​
# 拷贝配置文件
kubectl cp --namespace storage /path/to/client.properties kafka-client:/tmp/client.properties
​
# 进入客户端pod
kubectl exec --tty -i kafka-client --namespace storage -- bash
​
# 生产者
kafka-console-producer.sh \
            --producer.config /tmp/client.properties \
            --broker-list kafka-controller-0.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-1.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-2.kafka-controller-headless.storage.svc.cluster.local:9092 \
            --topic test
​
# 消费者
kafka-console-consumer.sh \
            --consumer.config /tmp/client.properties \
            --bootstrap-server kafka.storage.svc.cluster.local:9092 \
            --topic test \
            --from-beginning
            
# 创建topic与partition
kafka-topics.sh --command-config /tmp/client.properties --create --topic givegift-create --partitions 10 --replication-factor 1 --bootstrap-server kafka-controller-0.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-1.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-2.kafka-controller-headless.storage.svc.cluster.local:9092
​
# 删除topic
kafka-topics.sh --command-config /tmp/client.properties --delete --topic givegift-create --bootstrap-server kafka-controller-0.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-1.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-2.kafka-controller-headless.storage.svc.cluster.local:9092
​
​
# 查看所有消费者组的落后信息
kafka-consumer-groups.sh --bootstrap-server <kafka-broker> --list
# 查看特定消费者组的落后信息
kafka-consumer-groups.sh --bootstrap-server <kafka-broker> --group <consumer-group> --describ

命令行工具参考:https://blog.csdn.net/cold___play/article/details/132157982

集群监控

部署metrics-server

# 获取k8s原生相关指标
kubectl get --raw /metrics
​
# 部署metrics-server
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
​
# 查看pod运行情况
kubectl get pods -n kube-system | grep metrics-server
​
​
# 如果镜像拉取失败,需要手动拉取镜像部署
# 参考:https://blog.51cto.com/nowsafe/6026448
​
# 下载镜像并重命名
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/metrics-server:v0.7.0
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/metrics-server:v0.7.0 registry.k8s.io/metrics-server/metrics-server:v0.7.0
​
# 查看镜像
crictl -r unix:///run/containerd/containerd.sock image
​
# 修改下载下来的 components.yaml, 增加 --kubelet-insecure-tls 并修改 --kubelet-preferred-address-types:
 template:
    metadata:
      labels:
        k8s-app: metrics-server
    spec:
      containers:
      - args:
        - --kubelet-preferred-address-types=InternalIP   # 修改这行,默认是InternalIP,ExternalIP,Hostname
        - --kubelet-insecure-tls  # 增加这行
​
kubectl apply -f components.yaml
​
# 查看node资源
kubectl top nodes
​
NAME        CPU(cores)   CPU%   MEMORY(bytes)   MEMORY%   
k8smaster   1132m        28%    2896Mi          36%       
k8snode1    1682m        14%    4985Mi          15% 
​
# 查看pod资源
kubectl top pods

部署prometheus

# 添加仓库
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo update
helm search repo prometheu
# bitnami/prometheus       0.8.1            2.49.1      Prometheus is an open source monitoring and ale...
​
# 部署prometheus
helm install prometheus bitnami/prometheus -n prometheus --set alertmanager.persistentVolume.storageClass="gp2",server.persistentVolume.storageClass="gp2",server.service.type=LoadBalancer

部署grafana

# 添加仓库
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo update
helm search repo grafana
# bitnami/grafana           9.8.3           10.3.1      Grafana is an open source metric analytics and ...
​
# 部署grafana
helm install grafana bitnami/grafana -n prometheus --set global.storageClass=efs-sc-grafana
​
echo "Password: $(kubectl get secret grafana-admin --namespace prometheus -o jsonpath="{.data.GF_SECURITY_ADMIN_PASSWORD}" | base64 -d)"
# w6dPpOIdGd

注:efs-sc-grafana为对应的storageClass,持久化存储于efs

其他参考文档:https://blog.csdn.net/weixin_43832230/article/details/130317391

动态扩容

# 前置条件:已经部署metrics-server,并且能正常获取k8s相关指标
kubectl get apiservice | grep metrics
kubectl get --raw "/apis/metrics.k8s.io/v1beta1/nodes"
​
# dapr资源限制
dapr.io/sidecar-cpu-request: "10m"
dapr.io/sidecar-memory-request: "32Mi"
# dapr.io/sidecar-cpu-limit: "100m"
# dapr.io/sidecar-memory-limit: "256Mi"
​
# app资源限制
resources: 
  requests:
    cpu: 100m
    memory: 128Mi
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  
# 创建hpa (可选参数:--cpu-percent, 默认80)
kubectl autoscale deploy nginx -n namespace --min=1 --max=3 --cpu-percent=60
​
# 查看hpa
kubectl get hpa -n namespace
​
# 查看hpa描述
kubectl describe hpa nginx -n namespace

集群证书

# 检查证书过期信息
kubeadm certs check-expiration
​
# 备份k8s配置信息
cp -R /etc/kubernetes /etc/kubernetes.bak
​
# 更新证书
kubeadm certs renew all
​
# 复制kube-config
cp /etc/kubernetes/admin.conf ./config



评论