服务器资源
系统及版本:ubuntu-22.04.2
注:开发环境暂时不考虑高可用,如有需要请部署:keepalived、haproxy、k8smaster2
部署前准备
设置host名称
# 设置主机名
## master节点:
hostnamectl set-hostname k8smaste
## node节点
hostnamectl set-hostname k8snode1
# 刷新
bash
# 验证(示例)
root@k8smaster:~$ uname -a
Linux k8smaster 5.15.0-83-generic #92-Ubuntu SMP Mon Aug 14 09:30:42 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
修改hosts文件
vim /etc/hosts
# 文件末尾追加如下内容:
192.168.1.210 k8smaster
192.168.1.211 k8snode1
# 验证
ping k8smaster
ping k8snode1
更改apt源
cd /etc/apt
# 备份
cp sources.list{,.bak}
# 编辑sources.list文件
vim sources.list
# 替换源的地址
:%s#http://cn.archive.ubuntu.com#https://mirrors.aliyun.com#g
# 刷新
apt-get update
关闭防火墙及交换分区
# 关闭selinux
apt install -y selinux-utils
# 临时关闭
setenforce 0
# 永久关闭
# 关闭防火墙
systemctl stop ufw.service
# 永久关闭防火墙
systemctl disable ufw.service
# 关闭交换分区
swapoff -a
# 永久关闭交换分区(/etc/fstab注释掉swap那一行)
vim /etc/fstab
## 或
sed -i '/swap/s/^\(.*\)$/#\1/g' /etc/fstab
# /swap.img none swap sw 0 0
#查看是否关闭swap
free -m
total used free shared buff/cache available
Mem: 32092 348 30030 4 1713 31342
Swap: 0 0 0
# Swap都是0,说明交换分区已关闭
设置时区及时间同步
apt install -y timedatactl
# 查看时区信息
timedatectl
timedatectl set-timezone Asia/Shanghai
# 查看当前时间
date +"%Y-%m-%d %H:%M:%S"
# 时间同步
apt install -y chrony
systemctl restart chrony
systemctl status chrony
chronyc sources
修改内核参数
cat > /etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
sysctl -p /etc/sysctl.d/k8s.conf
或
sysctl --system
# 验证
sysctl -a | grep ip_forward
安装ipvsadm
apt install -y ipset ipvsadm
cat > /etc/modules-load.d/ipvs.conf <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_lc
modprobe -- ip_vs_lblc
modprobe -- ip_vs_lblcr
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- ip_vs_dh
modprobe -- ip_vs_fo
modprobe -- ip_vs_nq
modprobe -- ip_vs_sed
modprobe -- ip_vs_ftp
modprobe -- ip_vs_sh
modprobe -- ip_tables
modprobe -- ip_set
modprobe -- ipt_set
modprobe -- ipt_rpfilter
modprobe -- ipt_REJECT
modprobe -- ipip
modprobe -- xt_set
modprobe -- br_netfilter
modprobe -- nf_conntrack
EOF
# 添加可执行权限
chmod 755 /etc/modules-load.d/ipvs.conf
# 生效
bash /etc/modules-load.d/ipvs.conf
# 验证
lsmod | grep ip_vs
# 拷贝至 /etc/profile.d目录,永久生效
cp /etc/modules-load.d/ipvs.conf /etc/profile.d/ipvs.modules.sh
安装Docker
# 安装docker所需的工具(安装最新版即可)
apt-get update
apt-get install -y docker.io
# 设置开机启动并启动docker
systemctl start docker
systemctl enable docker
# 验证
docker version 或 docker info
注:非必要,v1.24版本之后已经剔除 docker容器
设置cgroup
vim /etc/docker/daemon.json
{
"exec-opts": ["native.cgroupdriver=systemd"]
}
cat > /var/lib/kubelet/config.yaml <<EOF
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
EOF
systemctl daemon-reload
systemctl restart docker
注:非必要,v1.24版本之后已经剔除 docker容器
部署k8s
添加源及安装k8s组件
# 安装基础环境
apt-get install -y ca-certificates curl software-properties-common apt-transport-https curl
curl -s https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | sudo apt-key add -
# 执行配置k8s阿里云源
vim /etc/apt/sources.list.d/kubernetes.list
#加入以下内容
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
# 执行更新
apt-get update -y
# 安装kubeadm、kubectl、kubelet
apt-get install -y kubelet kubeadm kubectl
# 阻止自动更新(apt upgrade时忽略)。所以更新的时候先unhold,更新完再hold。
apt-mark hold kubelet kubeadm kubectl
安装容器运行时 containerd
apt install -y containerd
mkdir /etc/containerd
containerd config default > /etc/containerd/config.toml
# 修改配置 /etc/containerd/config.toml
vim /etc/containerd/config.toml
# sandbox_image 3.8 -> 3.9
sandbox_image = "registry.k8s.io/pause:3.9"
# SystemdCgroup = false -> true
SystemdCgroup = true
# 立即启动 及 开机启动
systemctl enable --now containerd
# 查看镜像
crictl -r unix:///run/containerd/containerd.sock image
初始化k8s集群
# 初始化master节点
kubeadm init \
--apiserver-advertise-address=192.168.1.210 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.28.2 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=all \
--cri-socket unix:///run/containerd/containerd.sock
# 如果初始化失败,出现信息 initial timeout of 40s passed.
# 查看错误信息
systemctl status kubelet
journalctl -xeu kubelet
############极有可能因网络原因导致镜像pull失败,采用下面的方式部署################
# 查看kubeadm config所需的镜像
kubeadm config images list --kubernetes-version v1.28.2
kube-apiserver:v1.28.1
kube-controller-manager:v1.28.1
kube-scheduler:v1.28.1
kube-proxy:v1.28.1
pause:3.9
etcd:3.5.9-0
coredns:v1.10.1
# 生成默认的初始化文件
kubeadm config print init-defaults > k8s-init.yaml
# 修改k8s-init.yaml
vim k8s-init.yaml
## 修改
advertiseAddress: 192.168.1.210
nodeRegistration.name: k8smaster
networking.podSubnet: 10.244.0.0/16
dnsDomain.cluster: cluster.local
## 新增
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
cgroupDriver: systemd
# 手动pull镜像
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-apiserver:v1.28.2
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-controller-manager:v1.28.2
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-scheduler:v1.28.2
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-proxy:v1.28.2
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/pause:3.9
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/etcd:3.5.9-0
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/coredns:v1.10.1
# 查看镜像
crictl -r unix:///run/containerd/containerd.sock image
# tag镜像
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/kube-apiserver:v1.28.2 registry.k8s.io/kube-apiserver:v1.28.2
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/kube-controller-manager:v1.28.2 registry.k8s.io/kube-controller-manager:v1.28.2
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/kube-scheduler:v1.28.2 registry.k8s.io/kube-scheduler:v1.28.2
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/kube-proxy:v1.28.2 registry.k8s.io/kube-proxy:v1.28.2
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/pause:3.9 registry.k8s.io/pause:3.9
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/etcd:3.5.9-0 registry.k8s.io/etcd:3.5.9-0
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/coredns:v1.10.1 registry.k8s.io/coredns/coredns:v1.10.1
# 删除镜像
crictl -r unix:///run/containerd/containerd.sock rmi registry.cn-shanghai.aliyuncs.com/xxx:latest
# 导出镜像
ctr -n k8s.io image export xxx.tar docker.io/bitnami/xxx:latest
# 1. 查看机器上的镜像列表
crictl images ls
# 2.删除机器上没用使用的镜像
crictl rmi --prune
# 重置kubeadm
kubeadm reset
k8s-init.yaml 示例
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.1.210
bindPort: 6443
nodeRegistration:
criSocket: unix:///var/run/containerd/containerd.sock
imagePullPolicy: IfNotPresent
name: k8smaster
taints: null
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.k8s.io
kind: ClusterConfiguration
kubernetesVersion: 1.28.2
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16
scheduler: {}
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
cgroupDriver: systemd
注意:dnsDomain 一定要配置默认的 cluster.local,否则会导致dapr sidecar 连接失败
# 重新初始化
kubeadm init --config k8s-init.yaml --ignore-preflight-errors=all
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.1.210:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:0f598ac4471612dd56f61e1d36b26a73862016cd95ebe178e5c3239ef9a409e0
# --cri-socket=unix:///var/run/containerd/containerd.sock
## 出现以上信息说明master部署成功
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 若没记录到指令,可使用此指令产出
# 或工作节点join一直卡在 Running pre-flight checks
# 此时,需要检查之前的配置,或重新生成token
kubeadm token generate #生成token
# 5pv1xx.j52sjzl6bfljuzkp
# kubeadm token create {token} --print-join-command --ttl=0 #生成join信息
ubeadm token create 5pv1xx.j52sjzl6bfljuzkp --print-join-command
kubeadm join 192.168.1.210:6443 --token 5pv1xx.j52sjzl6bfljuzkp --discovery-token-ca-cert-hash sha256:0f598ac4471612dd56f61e1d36b26a73862016cd95ebe178e5c3239ef9a409e0
# 部署node节点:执行除 kubeadm init 前的所有命令
root@k8smaster:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8smaster NotReady control-plane 19m v1.28.2
k8snode1 NotReady <none> 2m26s v1.28.1
# 此时所有的node均处于NotReady状态,需要部署网络插件
## 此处以flannel插件为例(master节点执行即可)
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
## 如果因网络慢执行失败或超时,使用wget下载后执行
wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
kubectl apply -f kube-flannel.yml
## 打印信息
serviceaccount/flannel created
configmap/kube-flannel-cfg created
daemonset.apps/kube-flannel-ds created
## 再次查看节点状态(如果一直不行,就重启node节点)
root@k8smaster:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
k8smaster Ready control-plane 42m v1.28.2
k8snode1 Ready <none> 25m v1.28.1
# 查看cpu
kubectl describe node |grep -E '((Name|Roles):\s{6,})|(\s+(memory|cpu)\s+[0-9]+\w{0,2}.+%\))'
Name: k8smaster
Roles: control-plane
cpu 950m (95%) 0 (0%)
memory 290Mi (3%) 340Mi (4%)
Name: k8snode1
Roles: <none>
cpu 475m (3%) 500m (4%)
memory 3028Mi (9%) 712Mi (2%)
Name: k8snode2
Roles: <none>
cpu 300m (30%) 0 (0%)
memory 340Mi (1%) 200Mi (0%)
验证集群可用性
# 创建一个nginx的pod
kubectl create deployment nginx --image=nginx
# 创建一个service
kubectl expose deployment nginx --port=80 --type=NodePort
# 查看pod详细信息,可见在node1节点已成功部署nginx
kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-7854ff8877-hf287 1/1 Running 0 3m4s 10.244.1.2 k8snode1 <none> <none>
# 查看部署的信息
kubectl get pod,svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 52m
service/nginx NodePort 10.103.156.2 <none> 80:32400/TCP 4m3s
# 验证
curl http://10.103.156.2
# 或浏览器访问:
# http://192.168.1.210:32400
# http://192.168.1.211:32400
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
html { color-scheme: light dark; }
body { width: 35em; margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif; }
</style>
</head>
<body>
...
<p><em>Thank you for using nginx.</em></p>
</body>
</html>
nginx-ingress
部署
# 下载 deploy.yaml
wget https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.8.2/deploy/static/provider/cloud/deploy.yaml
# 查看文件,发现3处需要下载镜像,
registry.k8s.io/ingress-nginx/controller:v1.8.2@sha256:74834d3d25b336b62cabeb8bf7f1d788706e2cf1cfd64022de4137ade8881ff2
registry.k8s.io/ingress-nginx/kube-webhook-certgen:v20230407@sha256:543c40fd093964bc9ab509d3e791f9989963021f1e9e4c9c7b6700b02bfb227b
registry.k8s.io/ingress-nginx/kube-webhook-certgen:v20230407@sha256:543c40fd093964bc9ab509d3e791f9989963021f1e9e4c9c7b6700b02bfb227b
# 由于网络原因,手动拉取镜像
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/nginx-ingress-controller:v1.8.2
Image is up to date for sha256:47fe3698318ff814d1feb11ed2abab82066f9b9f77964ae0d0daa81a9635271f
# 该镜像未找到,其他的版本也尝试都没有
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/kube-webhook-certgen:v1.8.2
# 各种尝试不成功,降低版本 v1.1.1
waet https://raw.aithubusercontent.com/kubernetes/ingress-nginx/controller-v1.1.1/deploy/static/provider/cloud/deploy.yaml
去掉externalTrafficPolicy: Local
修改controller/deploy的image:image: bitnami/nginx-ingress-controller:1.1.1
修改ingress-nginx-admission-create的image:image: liangjw/kube-webhook-certgen:v1.1.1
修改ingress-nainx-admission-patch的imageimage: liangiw/kube-webhook-certgen:v1.1.1
crictl -r unix:///run/containerd/containerd.sock pull bitnami/nginx-ingress-controller:1.1.1
crictl -r unix:///run/containerd/containerd.sock pull liangjw/kube-webhook-certgen:v1.1.1
crictl -r unix:///run/containerd/containerd.sock pull liangjw/kube-webhook-certgen:v1.1.1
# 设置为默认的ingress
apiVersion: networking.k8s.io/v1
kind: IngressClass
metadata:
annotations:
ingressclass.kubernetes.io/is-default-class: "true"
# DaemonSet方式部署
# 参考:https://www.cnblogs.com/dannylinux/p/15813829.html
kind: DaemonSet
spec:
template:
spec:
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
nodeSelector:
edgenode: 'true'
containers:
- name: controller
args:
- --watch-ingress-without-class=true
示例
# 示例:nginx-ingress-example.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: nginx-ingress
namespace: default
annotations:
nginx.ingress.kubernetes.io/force-ssl-redirect: "true" # 强制重定向到 HTTPS
nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
spec:
rules:
- host: nginx.cluster.org
http:
paths:
- path: /
pathType: ImplementationSpecific
backend:
service:
name: nginx
port:
number: 32400
kubectl apply -f nginx-ingress-example.yaml
curl http://nginx.cluster.org:32400/
Kubenetes-Dashboard
部署Kubenetes-Dashboard
wget https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml
# 部署 ingress-nginx
kubectl apply -f recommended.yaml
# 删除默认的service
kubectl delete service kubernetes-dashboard --namespace=kubernetes-dashboard
# 创建自定义service
vim dashboard-svc.yaml
kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
type: NodePort
ports:
- port: 443
targetPort: 8443
nodePort: 30443
selector:
k8s-app: kubernetes-dashboard
kubectl apply -f dashboard-svc.yaml
创建dashboard-ingress
vim dashboard-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: dashboard-ingress
namespace: kubernetes-dashboard
annotations:
nginx.ingress.kubernetes.io/force-ssl-redirect: "true" # 强制重定向到 HTTPS
nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
spec:
rules:
- host: k8s-dashboard.cluster.org
http:
paths:
- path: /
pathType: ImplementationSpecific
backend:
service:
name: kubernetes-dashboard
port:
number: 30443
kubectl apply -f dashboard-ingress.yaml
# 验证(浏览器访问请配置hosts)
curl https://k8s-dashboard.cluster.org:30443
生成dashboard-token
# 创建service-account
vim dashboard-sa.yam
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin-user
namespace: kubernetes-dashboard
kubectl apply -f dashboard-sa.yam
# 绑定角色-------cluster-admin
vim dashboard-role-binding.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: admin-user
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: admin-user
namespace: kubernetes-dashboard
kubectl apply -f dashboard-role-binding.yaml
# 短期token
kubectl create token admin-user -n kubernetes-dashboard
# 长期token
vim dashboard-token-secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: admin-user-token
namespace: kubernetes-dashboard
annotations:
kubernetes.io/service-account.name: "admin-user"
type: kubernetes.io/service-account-token
# 创建长期token的secret
kubectl apply -f dashboard-token-secret.yaml
# 查看token信息
kubectl describe secret admin-user-token -n kubernetes-dashboard
eyJhbGciOiJSUzI1NiIsImtpZCI6Ikk2ZUw0c0tFT011NXJ4Zjg0cUdST0hZemw3SC1EVlJRcllUSmdTbUY4d1EifQ.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJrdWJlcm5ldGVzLWRhc2hib2FyZCIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VjcmV0Lm5hbWUiOiJhZG1pbi11c2VyLXRva2VuIiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZXJ2aWNlLWFjY291bnQubmFtZSI6ImFkbWluLXVzZXIiLCJrdWJlcm5ldGVzLmlvL3NlcnZpY2VhY2NvdW50L3NlcnZpY2UtYWNjb3VudC51aWQiOiI1MGY0ZmI5ZS04NzU0LTQ3ZWMtOGE0NC03MWI4MjY5Y2NkYzUiLCJzdWIiOiJzeXN0ZW06c2VydmljZWFjY291bnQ6a3ViZXJuZXRlcy1kYXNoYm9hcmQ6YWRtaW4tdXNlciJ9.SPXqk101QDtXoaNSLuUYQyTL509inyTRfe8bbZDNcxWM2c8yYYF2oV-HgF9Lyt0sZDGdR85ssBalBYXxrjnYjo3XOLPK2Pr-ulC3WC_eaKQbli0dWQpXBKgizn0XfeCSkw5I_JxwO8nlwqPd0-m_CE7DVdu8YX5gogqMXdIXr9XYDmdonRBbXXVKThVx4ufvsC3EpTp_1NRS-twuEFjWtKvO8CInL2LbUOSaMaBVXH41CQLt5BElnfbjjWVwNzWiQLCTaND2VVrfamtfiRU9ycW5OgjlNZSv15AEwbnWE4p2v3XFv1LPvyLUn9oOUwYY9yGovZt-zuUG5S_5zInzyg
部署NFS
# 参考:https://blog.csdn.net/lpfstudy/article/details/130038661
# 亦可如下方式部署
# NFS服务端设置:
# 1.安装NFS服务:
sudo apt-get update
sudo apt-get install nfs-kernel-server
# 2.创建共享目录并修改权限(以/var/nfs为例):
sudo mkdir /var/nfs
sudo chown nobody:nogroup /var/nfs
# 3.编辑/etc/exports文件,添加共享配置:
/var/nfs *(rw,sync,no_subtree_check)
# 4.启动NFS服务并导出共享目录:
sudo systemctl start nfs-kernel-server
sudo exportfs -rav
# NFS客户端设置:
# 1.安装NFS客户端:
sudo apt-get update
sudo apt-get install nfs-common
# 2.创建本地挂载点:
sudo mkdir /mnt/nfs
# 3.挂载远程NFS文件系统:
sudo mount server_ip:/var/nfs /mnt/nfs
#(将server_ip替换为NFS服务器的IP地址)
# 为了使挂载在启动时自动进行,可以将挂载命令添加到/etc/fstab文件:
server_ip:/var/nfs /mnt/nfs nfs defaults 0 0
部署ClassStorage
# 参考:https://zhuanlan.zhihu.com/p/655923057
# 部署 nfs-subdir-external-provisioner
helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/
helm repo update
helm install nfs-subdir-external-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner -n kube-system \
--set image.repository=dyrnq/nfs-subdir-external-provisioner \
--set nfs.server=192.168.1.211 \
--set nfs.path=/home/nfs/nfs-storage
# 创建StorageClass
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: alibabacloud-cnfs-nas
provisioner: cluster.local/nfs-subdir-external-provisioner
parameters:
# 设置为"false"时删除PVC不会保留数据,"true"则保留数据
archiveOnDelete: "false"
mountOptions:
# 指定NFS版本,这个需要根据NFS Server版本号设置
- nfsvers=4
---
# 创建PVC
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: nfs-storage-pvc-1
namespace: dev1
spec:
storageClassName: alibabacloud-cnfs-nas #需要与上面创建的storageclass的名称一致
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Mi
---
# 使用示例
kind: Pod
apiVersion: v1
metadata:
name: nfs-storage-pod-1
namespace: dev1
spec:
containers:
- name: nfs-storage-pod-1
image: busybox
command:
- "/bin/sh"
args:
- "-c"
- "touch /mnt/teststorage && echo 111 > /mnt/teststorage && exit 0 || exit 1" ## 创建一个名称为"SUCCESS"的文件
volumeMounts:
- name: nfs-pvc
mountPath: "/mnt"
restartPolicy: "Never"
volumes:
- name: nfs-pvc
persistentVolumeClaim:
claimName: nfs-storage-pvc-1
注:不同应用最好使用不同StorageClass申请资源,否则可能会申请失败,导致pod运行失败
部署helm3
# 安装
# 参考:https://helm.sh/docs/intro/install/
curl https://baltocdn.com/helm/signing.asc | sudo apt-key add -
apt-get install -y apt-transport-https
echo "deb https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
apt-get update
apt-get install -y helm
# 验证
helm version
version.BuildInfo{Version:"v3.12.3", GitCommit:"3a31588ad33fe3b89af5a2a54ee1d25bfe6eaa5e", GitTreeState:"clean", GoVersion:"go1.20.7"}
# 查看helm帮助
helm -h
# 添加helm仓库
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo add aliyun https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts
helm repo update
helm search repo bitnami
# 示例(redis)
helm install redis bitnami/redis -n storage --set global.storageClass=alibabacloud-cnfs-nas --set global.redis.password=*******
# 示例(es、kibana)
kubectl create secret generic elastic-juzipwd -n dapr-monitoring --from-literal=elasticsearch-password=**********
helm install elasticsearch -f values.yaml -n dapr-monitoring oci://registry-1.docker.io/bitnamicharts/elasticsearch
# 示例(mysql)
helm install bitnami/mysql --generate-name
# 创建chats模板
helm create my-app
# 部署本地服务
helm install my-app .
# 服务升级(-i 表示如果没有部署则会部署,如果部署了就会更新)
helm upgrade -i my-app dir_path --set image.imageName=my-app --set image.tag=latest
部署dapr
# 参考:https://docs.dapr.io/zh-hans/
# 安装
wget -q https://raw.githubusercontent.com/dapr/cli/master/install/install.sh -O - | /bin/bash
# 验证
dapr
# 初始化
dapr init -k
# 查看版本
dapr --version
# 查看实例
dapr list
部署etcd
helm install etcd oci://registry-1.docker.io/bitnamicharts/etcd -n <namespace> --set persistence.enabled=false,auth.rbac.rootPassword=ihnIwodNgG
etcdctl --endpoints=http://etcd-juzi.minikube.org:2379 --user=root --password=ihnIwodNgG get gitlab.com/echo/config/dev/data
部署cert-manager
# 添加仓库
helm repo add jetstack https://charts.jetstack.io
helm repo update
# 部署 cert-manager
helm upgrade --install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --set installCRDs=true
部署mqtt
# 添加仓库
helm repo add emqx https://repos.emqx.io/charts
helm repo update
# 部署 emqx-operator
helm install emqx-operator emqx/emqx-operator -n storage
# kubectl wait --for=condition=Ready pods -l "control-plane=controller-manager" -n storage
# 部署 emqx-enterprise
helm install emqx emqx/emqx-enterprise -n storage
# 如果出现版本问题用以下脚本部署
# daployment.yaml
apiVersion: apps.emqx.io/v2beta1
kind: EMQX
metadata:
name: emqx
namespace: storage # 命名空间
spec:
image: emqx:5.1 # 镜像源 可更改为最新版本
# 查看service
kubectl get svc -n storage | grep emqx-emqx-enterprise
# 创建 emqx-ingress.yaml
vim emqx-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: emqx-ingress
namespace: storage
#annotations:
#nginx.ingress.kubernetes.io/force-ssl-redirect: "true" # 强制重定向到 HTTPS
#nginx.ingress.kubernetes.io/backend-protocol: "HTTPS"
spec:
ingressClassName: nginx
rules:
- host: emqx.minikube.org
http:
paths:
- path: /
pathType: ImplementationSpecific
backend:
service:
name: emqx-emqx-enterprise
port:
number: 18083
kubectl apply -f emqx-ingress.yaml
# 浏览器访问 http://emqx.minikube.org
# 用户名:admin
# 密码:public
部署rabbitmq
# 部署rabbitmq
helm install rabbitmq bitnami/rabbitmq -n storage --set auth.username=admin,auth.password=123456
helm install rabbitmq bitnami/rabbitmq -n storage --set global.storageClass=alibabacloud-cnfs-nas,auth.username=admin,auth.password=123456
内网连接点:rabbitmq.storage.svc.cluster.local
端口:5672,管理后台端口15672
查看密码:
$(kubectl get secret --namespace storage rabbitmq -o jsonpath="{.data.rabbitmq-password}" | base64 -d)
部署Kafka
# 部署Kafka
helm install kafka bitnami/kafka --namespace storage --set kafkaVersion=3.4.0 --set replicaCount=3 --set global.storageClass=alibabacloud-cnfs-nas
# 客户端验证
kubectl run kafka-client --restart='Never' --image docker.io/bitnami/kafka:3.6.1-debian-11-r0 --namespace storage --command -- sleep infinity
# vim client.properties
security.protocol=SASL_PLAINTEXT
sasl.mechanism=PLAIN
sasl.jaas.config=org.apache.kafka.common.security.scram.ScramLoginModule required \
username="user1" \
password="$(kubectl get secret kafka-user-passwords --namespace storage -o jsonpath='{.data.client-passwords}' | base64 -d | cut -d , -f 1)";
## mechanism: SCRAM-SHA-256 / PLAINT
# 拷贝配置文件
kubectl cp --namespace storage /path/to/client.properties kafka-client:/tmp/client.properties
# 进入客户端pod
kubectl exec --tty -i kafka-client --namespace storage -- bash
# 生产者
kafka-console-producer.sh \
--producer.config /tmp/client.properties \
--broker-list kafka-controller-0.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-1.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-2.kafka-controller-headless.storage.svc.cluster.local:9092 \
--topic test
# 消费者
kafka-console-consumer.sh \
--consumer.config /tmp/client.properties \
--bootstrap-server kafka.storage.svc.cluster.local:9092 \
--topic test \
--from-beginning
# 创建topic与partition
kafka-topics.sh --command-config /tmp/client.properties --create --topic givegift-create --partitions 10 --replication-factor 1 --bootstrap-server kafka-controller-0.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-1.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-2.kafka-controller-headless.storage.svc.cluster.local:9092
# 删除topic
kafka-topics.sh --command-config /tmp/client.properties --delete --topic givegift-create --bootstrap-server kafka-controller-0.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-1.kafka-controller-headless.storage.svc.cluster.local:9092,kafka-controller-2.kafka-controller-headless.storage.svc.cluster.local:9092
# 查看所有消费者组的落后信息
kafka-consumer-groups.sh --bootstrap-server <kafka-broker> --list
# 查看特定消费者组的落后信息
kafka-consumer-groups.sh --bootstrap-server <kafka-broker> --group <consumer-group> --describ
命令行工具参考:https://blog.csdn.net/cold___play/article/details/132157982
集群监控
部署metrics-server
# 获取k8s原生相关指标
kubectl get --raw /metrics
# 部署metrics-server
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
# 查看pod运行情况
kubectl get pods -n kube-system | grep metrics-server
# 如果镜像拉取失败,需要手动拉取镜像部署
# 参考:https://blog.51cto.com/nowsafe/6026448
# 下载镜像并重命名
crictl -r unix:///run/containerd/containerd.sock pull registry.aliyuncs.com/google_containers/metrics-server:v0.7.0
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/metrics-server:v0.7.0 registry.k8s.io/metrics-server/metrics-server:v0.7.0
# 查看镜像
crictl -r unix:///run/containerd/containerd.sock image
# 修改下载下来的 components.yaml, 增加 --kubelet-insecure-tls 并修改 --kubelet-preferred-address-types:
template:
metadata:
labels:
k8s-app: metrics-server
spec:
containers:
- args:
- --kubelet-preferred-address-types=InternalIP # 修改这行,默认是InternalIP,ExternalIP,Hostname
- --kubelet-insecure-tls # 增加这行
kubectl apply -f components.yaml
# 查看node资源
kubectl top nodes
NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
k8smaster 1132m 28% 2896Mi 36%
k8snode1 1682m 14% 4985Mi 15%
# 查看pod资源
kubectl top pods
部署prometheus
# 添加仓库
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo update
helm search repo prometheu
# bitnami/prometheus 0.8.1 2.49.1 Prometheus is an open source monitoring and ale...
# 部署prometheus
helm install prometheus bitnami/prometheus -n prometheus --set alertmanager.persistentVolume.storageClass="gp2",server.persistentVolume.storageClass="gp2",server.service.type=LoadBalancer
部署grafana
# 添加仓库
helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo update
helm search repo grafana
# bitnami/grafana 9.8.3 10.3.1 Grafana is an open source metric analytics and ...
# 部署grafana
helm install grafana bitnami/grafana -n prometheus --set global.storageClass=efs-sc-grafana
echo "Password: $(kubectl get secret grafana-admin --namespace prometheus -o jsonpath="{.data.GF_SECURITY_ADMIN_PASSWORD}" | base64 -d)"
# w6dPpOIdGd
注:efs-sc-grafana为对应的storageClass,持久化存储于efs
其他参考文档:https://blog.csdn.net/weixin_43832230/article/details/130317391
动态扩容
# 前置条件:已经部署metrics-server,并且能正常获取k8s相关指标
kubectl get apiservice | grep metrics
kubectl get --raw "/apis/metrics.k8s.io/v1beta1/nodes"
# dapr资源限制
dapr.io/sidecar-cpu-request: "10m"
dapr.io/sidecar-memory-request: "32Mi"
# dapr.io/sidecar-cpu-limit: "100m"
# dapr.io/sidecar-memory-limit: "256Mi"
# app资源限制
resources:
requests:
cpu: 100m
memory: 128Mi
# limits:
# cpu: 100m
# memory: 128Mi
# 创建hpa (可选参数:--cpu-percent, 默认80)
kubectl autoscale deploy nginx -n namespace --min=1 --max=3 --cpu-percent=60
# 查看hpa
kubectl get hpa -n namespace
# 查看hpa描述
kubectl describe hpa nginx -n namespace
集群证书
# 检查证书过期信息
kubeadm certs check-expiration
# 备份k8s配置信息
cp -R /etc/kubernetes /etc/kubernetes.bak
# 更新证书
kubeadm certs renew all
# 复制kube-config
cp /etc/kubernetes/admin.conf ./config