[TOC]
# 初始环境设置
**添加hosts文件记录**
```shell
cat >> /etc/hosts <<-EOF
{IP} {HOSTNAMW}
EOF
```
> 替换成实际的地址和域名
**关闭防火墙**
```shell
sudo systemctl disable firewalld --now
```
**关闭selinux**
```shell
#临时生效
sudo setenforce 0
sed -ri 's/(SELINUX=).*/\1disabled/g' /etc/selinux/config
```
**关闭交换分区**
```shell
#临时生效
swapoff -a
#永久生效,需要重启
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
```
**加载ipvs模块**
```shell
cat > /etc/sysconfig/modules/ipvs.modules <<-EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
modprobe -- br_netfilter
EOF
# 生效ipvs模块
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules
# 验证
lsmod | grep -e ip_vs -e nf_conntrack -e br_netfilter
```
注意:在 `/etc/sysconfig/modules/` 目录下的modules文件,重启会自动加载。
**安装ipset依赖包**
```shell
yum install ipvsadm conntrack-tools vim -y # 确保安装ipset包
```
**优化内核参数**
```shell
cat > /etc/sysctl.d/kubernetes.conf << EOF
# docker 服务使用的网桥
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
# 允许将TIME-WAIT sockets重新用于新的TCP连接,默认为0,表示关闭
net.ipv4.tcp_tw_reuse = 1
# TCP连接中TIME-WAIT sockets的快速回收, 默认是0,表示关闭。对于位于NAT设备(容器转发)后面的Client来说,就是一场灾难
net.ipv4.tcp_tw_recycle = 0
# 允许系统打开的端口范围,即用于向外连接的端口范围
net.ipv4.ip_local_port_range = 32768 65535
# kernel中最多存在的TIME_WAIT数量, 默认是4096
net.ipv4.tcp_max_tw_buckets = 65535
# kernel转发功能
net.ipv4.ip_forward = 1
EOF
# 生效 kubernetes.conf 文件
sysctl -p /etc/sysctl.d/kubernetes.conf
```
**设置时间同步**
```shell
# 安装chrony包
yum install -y chrony
# 注释原有的同步信息
sed -ri 's/(server .* iburst)/# \1/g' /etc/chrony.conf
# 添加ntp同步源
echo "server ntp.aliyun.com iburst" >> /etc/chrony.conf
# 重启chronyd服务
systemctl restart chronyd
# 验证服务
chronyc sources
```
# 安装docker
**创建docker安装目录及环境变量**
```shell
mkdir -p /app/docker/{conf,data}
```
**下载docker二进制包**
```shell
curl -SL -o /usr/local/src/docker-20.10.21.tgz https://download.docker.com/linux/static/stable/x86_64/docker-20.10.21.tgz
```
**解压二进制包**
```shell
tar xf /usr/local/src/docker-20.10.21.tgz -C /opt
cp /opt/docker/* /usr/local/bin/
rm -rf /opt/docker
```
**创建docker 的systemd 模板**
```shell
cat > /usr/lib/systemd/system/docker.service <<EOF
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target firewalld.service
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/local/bin/dockerd --config-file=/app/docker/conf/daemon.json
ExecReload=/bin/kill -s HUP
LimitNOFILE=infinity
LimitNPROC=infinity
TimeoutStartSec=0
Delegate=yes
KillMode=process
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s
[Install]
WantedBy=multi-user.target
EOF
```
**创建 daemon.json 文件**
```shell
cat > /app/docker/conf/daemon.json << EOF
{
"data-root": "/app/docker/data/",
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors": [
"https://1nj0zren.mirror.aliyuncs.com",
"https://docker.mirrors.ustc.edu.cn",
"http://f1361db2.m.daocloud.io",
"https://registry.docker-cn.com"
],
"log-driver": "json-file",
"log-level": "info"
}
}
EOF
```
**启动docker**
```shell
systemctl daemon-reload
systemctl enable docker.service --now
```
**安装docker-compose**
```shell
curl -L https://get.daocloud.io/docker/compose/releases/download/1.28.6/docker-compose-`uname -s`-`uname -m` > /usr/local/bin/docker-compose
chmod +x /usr/local/bin/docker-compose
```
# 安装kubelet
**创建日志目录**
```shell
mkdir -p /etc/kubernetes/{conf,manifests,pki/etcd}
mkdir -p /var/log/kubernetes/kubelet
mkdir -p /var/lib/kubelet
```
**获取ca证书文件**
```shell
scp k8s-master01:/etc/kubernetes/pki/ca.crt /etc/kubernetes/pki
```
**获取bootstrap token值**
注意下面这个操作在任意一个master节点上操作。复制输出的TOKEN所有内容,粘贴到需要添加主机上。示例`TOKEN='f1e3f6.c290493cdb25351a'`
```shell
cat <<-"EOF" | sudo tee /tmp/create-token.sh > /dev/null
while true :
do
token_id=`cat /dev/urandom | head -c 10 | md5sum | head -c 6`
token_secret=`cat /dev/urandom | head -c 10 | md5sum | head -c 16`
# $token_id 和 $token_secret 都不能纯数字
if ! `echo $token_id | grep [^0-9] > /dev/null`;then
token_id=`cat /dev/urandom | head -c 10 | md5sum | head -c 6`
else
if ! `echo $token_secret | grep [^0-9] > /dev/null`;then
token_secret=`cat /dev/urandom | head -c 10 | md5sum | head -c 16`
else
break
fi
fi
done
cat <<-eof | kubectl apply -f - > /dev/null
apiVersion: v1
kind: Secret
metadata:
name: bootstrap-token-${token_id}
namespace: kube-system
type: bootstrap.kubernetes.io/token
stringData:
token-id: ${token_id}
token-secret: ${token_secret}
expiration: `date -d '1 day' +%F`T`date +%T`+08:00
usage-bootstrap-authentication: "true"
usage-bootstrap-signing: "true"
auth-extra-groups: system:bootstrappers:worker,system:bootstrappers:ingress
eof
TOKEN="${token_id}.${token_secret}"
echo "TOKEN='${TOKEN}'"
EOF
cat <<-"EOF" | sudo tee /tmp/get-token.sh > /dev/null
if [ `kubectl -n kube-system get secret --field-selector type=bootstrap.kubernetes.io/token -o name | wc -l` -ge 1 ];then
token_id=`kubectl -n kube-system get secret --field-selector type=bootstrap.kubernetes.io/token -ojsonpath='{.items[0].data.token-id}' | base64 -d`
token_secret=`kubectl -n kube-system get secret --field-selector type=bootstrap.kubernetes.io/token -ojsonpath='{.items[0].data.token-secret}' | base64 -d`
TOKEN="${token_id}.${token_secret}"
echo "TOKEN='${TOKEN}'"
else
bash /tmp/create-token.sh
fi
EOF
bash /tmp/get-token.sh
```
**生成bootstrap-kubelet证书**
```shell
KUBE_APISERVER="https://192.168.31.100:6443"
TOKEN="f1e3f6.c290493cdb25351a" # 在master节点获取该值,每次获取都不一定一致的。上一步骤可获取该参数
cat <<-EOF | sudo tee /etc/kubernetes/bootstrap-kubelet.conf > /dev/null
apiVersion: v1
clusters:
- cluster:
certificate-authority: pki/ca.crt
server: ${KUBE_APISERVER}
name: bootstrap
contexts:
- context:
cluster: bootstrap
user: kubelet-bootstrap
name: bootstrap
current-context: bootstrap
kind: Config
preferences: {}
users:
- name: kubelet-bootstrap
user:
token: ${TOKEN}
EOF
```
**拷贝命令**
```shell
scp k8s-master01:/usr/local/bin/kubelet /usr/local/bin/
```
**创建kubelet启动参数**
```shell
netcar=`ip r | awk '/default via/ {print $5}'`
[ ! -z $netcar ] && ipaddr=`ip r | awk -v netcar=$netcar '{if($3==netcar) print $9}'` || echo '$netcar is null'
cat > /etc/kubernetes/conf/kubelet.conf <<EOF
KUBELET_KUBECONFIG_ARGS="--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
KUBELET_CONFIG_ARGS="--config=/var/lib/kubelet/config.yaml"
KUBELET_NETWORK_ARGS="--network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin"
KUBELET_DATA_ARGS="--root-dir=/var/lib/kubelet --cert-dir=/var/lib/kubelet/pki --rotate-certificates"
KUBELET_LOG_ARGS="--alsologtostderr=true --logtostderr=false --v=4 --log-dir=/var/log/kubernetes/kubelet"
KUBELET_EXTRA_ARGS="--hostname-override=$ipaddr --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.2"
EOF
```
> 说明:修改 `hostname-override` 为当前的 IP地址 。 `cni-conf-dir` 默认是 /etc/cni/net.d,`cni-bin-dir` 默认是/opt/cni/bin。`root-dir` 默认是/var/lib/kubelet目录
**创建kubelet配置参数文件**
```shell
cat > /var/lib/kubelet/config.yaml <<EOF
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
address: 0.0.0.0
port: 10250
readOnlyPort: 0
cgroupDriver: systemd
authentication:
anonymous:
enabled: false
webhook:
cacheTTL: 2m0s
enabled: true
x509:
clientCAFile: /etc/kubernetes/pki/ca.crt
anthorization:
mode: Webhook
Webhook:
cacheAuthorizedTTL: 5m0s
cacheUnauthorizedTTL: 30s
clusterDomain: cluster.local
healthzBindAddress: 127.0.0.1
healthzPort: 10248
rotateCertificates: true
staticPodPath: /etc/kubernetes/manifests
maxOpenFiles: 1000000
maxPods: 100
clusterDNS:
- 10.96.0.10
evictionHard:
imagefs.available: 15%
memory.available: 100Mi
nodefs.available: 10%
nodefs.inodesFree: 5%
EOF
```
> 说明:需要修改 `clusterDNS` 的IP地址为 `server IP段` 。
> 参考地址: https://github.com/kubernetes/kubelet
> https://kubernetes.io/zh/docs/reference/config-api/kubelet-config.v1beta1/
> https://pkg.go.dev/k8s.io/kubelet/config/v1beta1#KubeletConfiguration
**创建kubelet的systemd模板**
```shell
cat > /usr/lib/systemd/system/kubelet.service <<EOF
[Unit]
Description=Kubernetes Kubelet
After=docker.service
[Service]
EnvironmentFile=/etc/kubernetes/conf/kubelet.conf
ExecStart=/usr/local/bin/kubelet \$KUBELET_KUBECONFIG_ARGS \$KUBELET_CONFIG_ARGS \$KUBELET_NETWORK_ARGS \$KUBELET_DATA_ARGS \$KUBELET_LOG_ARGS \$KUBELET_EXTRA_ARGS
Restart=on-failure
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
EOF
```
**启动kubelet**
```shell
systemctl daemon-reload
systemctl enable kubelet.service --now
```
**验证**
```shell
curl http://localhost:10248/healthz && echo
kubectl get nodes
NAME STATUS ROLES AGE VERSION
192.168.31.79 NotReady <none> 1s v1.18.18
192.168.31.103 Ready master 3d23h v1.18.18
rm -rf /etc/kubernetes/bootstrap-kubelet.conf
```
# kube-proxy
**获取kubeconfig与证书文件**
```shell
scp k8s-master01:/etc/kubernetes/proxy.conf /etc/kubernetes
scp k8s-master01:/etc/kubernetes/pki/proxy.* /etc/kubernetes/pki
```
**创建日志目录**
```shell
mkdir /var/log/kubernetes/kube-proxy
```
**拷贝命令**
```shell
scp k8s-master01:/usr/local/bin/kube-proxy /usr/local/bin/
```
**创建配置参数文件**
```shell
netcar=`ip r | awk '/default via/ {print $5}'`
[ ! -z $netcar ] && ipaddr=`ip r | awk -v netcar=$netcar '{if($3==netcar) print $9}'` || echo '$netcar is null'
cat > /etc/kubernetes/conf/kube-proxy-config.yml << EOF
kind: KubeProxyConfiguration
apiVersion: kubeproxy.config.k8s.io/v1alpha1
bindAddress: 0.0.0.0
clientConnection:
kubeconfig: /etc/kubernetes/proxy.conf
hostnameOverride: $ipaddr
clusterCIDR: 10.244.0.0/16
mode: ipvs
ipvs:
minSyncPeriod: 5s
syncPeriod: 5s
scheduler: "rr"
EOF
```
> 说明:修改 `hostnameOverride` 的值为IP地址。`clusterCIDR` 的值为pod IP段。
> 参考地址: https://github.com/kubernetes/kube-proxy
> https://godoc.org/k8s.io/kube-proxy/config/v1alpha1#KubeProxyConfiguration
> https://kubernetes.io/zh/docs/reference/config-api/kube-proxy-config.v1alpha1/
**创建kube-proxy的systemd模板**
```shell
cat > /usr/lib/systemd/system/kube-proxy.service << EOF
[Unit]
Description=Kubernetes Proxy
After=network.target
[Service]
EnvironmentFile=-/etc/kubernetes/conf/kube-proxy.conf
ExecStart=/usr/local/bin/kube-proxy \\
--config=/etc/kubernetes/conf/kube-proxy-config.yml \\
--alsologtostderr=true --logtostderr=false --v=4 \\
--log-dir=/var/log/kubernetes/kube-proxy
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
```
**启动kube-proxy**
```shell
systemctl daemon-reload
systemctl enable kube-proxy.service --now
```
**验证**
```shell
curl http://localhost:10249/healthz && echo
```
- 前言
- 架构
- 部署
- kubeadm部署
- kubeadm扩容节点
- 二进制安装基础组件
- 添加master节点
- 添加工作节点
- 选装插件安装
- Kubernetes使用
- k8s与dockerfile启动参数
- hostPort与hostNetwork异同
- 应用上下线最佳实践
- 进入容器命名空间
- 主机与pod之间拷贝
- events排序问题
- k8s会话保持
- 容器root特权
- CNI插件
- calico
- calicoctl安装
- calico网络通信
- calico更改pod地址范围
- 新增节点网卡名不一致
- 修改calico模式
- calico数据存储迁移
- 启用 kubectl 来管理 Calico
- calico卸载
- cilium
- cilium架构
- cilium/hubble安装
- cilium网络路由
- IP地址管理(IPAM)
- Cilium替换KubeProxy
- NodePort运行DSR模式
- IP地址伪装
- ingress使用
- nginx-ingress
- ingress安装
- ingress高可用
- helm方式安装
- 基本使用
- Rewrite配置
- tls安全路由
- ingress发布管理
- 代理k8s集群外的web应用
- ingress自定义日志
- ingress记录真实IP地址
- 自定义参数
- traefik-ingress
- traefik名词概念
- traefik安装
- traefik初次使用
- traefik路由(IngressRoute)
- traefik中间件(middlewares)
- traefik记录真实IP地址
- cert-manager
- 安装教程
- 颁布者CA
- 创建证书
- 外部存储
- 对接NFS
- 对接ceph-rbd
- 对接cephfs
- 监控平台
- Prometheus
- Prometheus安装
- grafana安装
- Prometheus配置文件
- node_exporter安装
- kube-state-metrics安装
- Prometheus黑盒监控
- Prometheus告警
- grafana仪表盘设置
- 常用监控配置文件
- thanos
- Prometheus
- Sidecar组件
- Store Gateway组件
- Querier组件
- Compactor组件
- Prometheus监控项
- grafana
- Querier对接grafana
- alertmanager
- Prometheus对接alertmanager
- 日志中心
- filebeat安装
- kafka安装
- logstash安装
- elasticsearch安装
- elasticsearch索引生命周期管理
- kibana安装
- event事件收集
- 资源预留
- 节点资源预留
- imagefs与nodefs验证
- 资源预留 vs 驱逐 vs OOM
- scheduler调度原理
- Helm
- Helm安装
- Helm基本使用
- 安全
- apiserver审计日志
- RBAC鉴权
- namespace资源限制
- 加密Secret数据
- 服务网格
- 备份恢复
- Velero安装
- 备份与恢复
- 常用维护操作
- container runtime
- 拉取私有仓库镜像配置
- 拉取公网镜像加速配置
- runtime网络代理
- overlay2目录占用过大
- 更改Docker的数据目录
- Harbor
- 重置Harbor密码
- 问题处理
- 关闭或开启Harbor的认证
- 固定harbor的IP地址范围
- ETCD
- ETCD扩缩容
- ETCD常用命令
- ETCD数据空间压缩清理
- ingress
- ingress-nginx header配置
- kubernetes
- 验证yaml合法性
- 切换KubeProxy模式
- 容器解析域名
- 删除节点
- 修改镜像仓库
- 修改node名称
- 升级k8s集群
- 切换容器运行时
- apiserver接口
- 其他
- 升级内核
- k8s组件性能分析
- ETCD
- calico
- calico健康检查失败
- Harbor
- harbor同步失败
- Kubernetes
- 资源Terminating状态
- 启动容器报错