[TOC]

sealos+nfs+kubesphere3.0

主机清单及软件版本

主机名	IP	集群角色	操作系统	磁盘规划
dts-paas-middleware-dev-master-0	1x.xxx.1.156	master	CentOS Linux 7.9.2009
dts-paas-middleware-dev-master-1	1x.xxx.1.157	master	CentOS Linux 7.9.2009
dts-paas-middleware-dev-master-2	1x.xxx.1.158	master	CentOS Linux 7.9.2009
dts-paas-middleware-dev-node-0	1x.xxx.1.159	node	CentOS Linux 7.9.2009
dts-paas-middleware-dev-node-1	1x.xxx.1.160	node	CentOS Linux 7.9.2009
dts-paas-middleware-dev-node-2	1x.xxx.1.161	node	CentOS Linux 7.9.2009
dts-paas-middleware-dev-node-3	1x.xxx.1.162	node	CentOS Linux 7.9.2009
dts-paas-middleware-dev-node-4	1x.xxx.1.163	node	CentOS Linux 7.9.2009
dts-paas-middleware-dev-node-5	1x.xxx.1.164	nfs服务器	CentOS Linux 7.9.2009	nfs服务器

sealos版本 v3.3.9-alpha.2

kubenetes版本 v1.18.8

rook版本 v1.4.4

kubesphere版本 3.0

准备工作

配置master到node的免密登录

sealos使用ansible部署kubernetes，本例使用master节点作为ansible的agent，ansible会从agent ssh到集群内所有机器上，为了方便配置master到node的免密登录，否则再使用sealos时需要指定登录node的password或者私钥。

登录master，本例为dts-paas-middleware-dev-master-0

#执行ssh-keygen生成私钥/公钥
[root@d-paas-k8s-master-0 ~]# ssh-keygen
直接回车
cd .ssh
#执行以下命令，将上一步生成的公钥上传到1x.xx.66.1，会得到一个包含url的返回值
[root@d-paas-k8s-master-0 .ssh]# curl --upload-file id_rsa.pub 1x.xx.66.1
http://1x.xx.66.1/z7EVd/id_rsa.pub

#wget fire_auth.sh脚本并修改
[root@d-paas-k8s-master-0 ~]# wget http://1x.xx.66.1/gKVtY/fire_auth.sh
[root@d-paas-k8s-master-0 ~]# vi fire_auth.sh
wget http://1x.xx.66.1/IXJfv/id_rsa.pub #修改为上一步返回的url
cat id_rsa.pub >> .ssh/authorized_keys
systemctl start firewalld.service
firewall-cmd --add-masquerade --permanent
# only if you want NodePorts exposed on control plane IP as well
firewall-cmd --permanent --add-port=0-65535/tcp
firewall-cmd --permanent --add-port=0-65535/udp
firewall-cmd --permanent --zone=trusted --change-interface=docker0
firewall-cmd --permanent --add-rich-rule="rule family="ipv4" source address="1x.xxx.1.156/32" port protocol="tcp" port="22" accept" #address修改为master IP，本例为1x.xxx.1.156
systemctl restart firewalld
firewall-cmd --reload
systemctl enable firewalld
systemctl disable firewalld
systemctl stop firewalld
echo "sshd: 1x.xxx.1.156" >> /etc/hosts.allow #IP国旗为 master IP 本例为1x.xxx.1.156

#将修改过的脚本复制到集群内主机。
curl --upload-file fire_auth.sh 1x.xx.66.1
http://1x.xx.66.1/tUl4q/fire_auth.sh
#集群内所有机器都需要执行此脚本，包括master本身。
[root@d-paas-k8s-master-0 ~]# ./fire_auth.sh
[root@d-paas-k8s-0-node-0 ~]# ./fire_auth.sh
[root@d-paas-k8s-0-node-1 ~]# ./fire_auth.sh
[root@d-paas-k8s-0-node-2 ~]# ./fire_auth.sh

检查时间同步

在每台机器上执行

[root@d-paas-k8s-master-0 ~]# yum install -y chrony
[root@d-paas-k8s-master-0 ~]# systemctl enable --now chronyd
[root@d-paas-k8s-master-0 ~]# timedatectl set-timezone Asia/Shanghai
[root@d-paas-k8s-master-0 ~]# timedatectl
      Local time: Wed 2020-11-11 11:08:52 CST
  Universal time: Wed 2020-11-11 03:08:52 UTC
        RTC time: Wed 2020-11-11 03:08:52
       Time zone: Asia/Shanghai (CST, +0800)
     NTP enabled: yes
NTP synchronized: yes
 RTC in local TZ: no
      DST active: n/a

sealos部署kubernetes集群

准备软件包

在其中一个master节点上执行，本例为dts-paas-middleware-dev-master-0

#下载sealos二进制文件
[root@d-paas-k8s-master-0 ~]# wget -c https://sealyun.oss-cn-beijing.aliyuncs.com/v3.3.9-alpha.2/sealos && chmod +x sealos && mv sealos /usr/bin

或者
wget http://1x.xx.66.1/6GGuB/sealos && chmod +x sealos && mv sealos /usr/bin

#下载kubernetes 离线安装包
[root@d-paas-k8s-master-0 ~]# wget -c https://sealyun.oss-cn-beijing.aliyuncs.com/cd3d5791b292325d38bbfaffd9855312-1.18.8/kube1.18.8.tar.gz

或者 
wget http://1x.xx.66.1/NPUq8/kube1.18.8.tar.gz

部署安装

在master节点上执行，本例为dts-paas-middleware-dev-master-0

#查看网卡信息选择集群间可以进行通信的网卡，本例是team0
[root@d-paas-k8s-master-0 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
2: em1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq master team0 state UP group default qlen 1000
    link/ether 80:18:44:e7:84:28 brd ff:ff:ff:ff:ff:ff
3: em2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq master team0 state UP group default qlen 1000
    link/ether 80:18:44:e7:84:28 brd ff:ff:ff:ff:ff:ff
4: em3: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
    link/ether 80:18:44:e7:84:2a brd ff:ff:ff:ff:ff:ff
5: em4: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
    link/ether 80:18:44:e7:84:2b brd ff:ff:ff:ff:ff:ff
6: team0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
    link/ether 80:18:44:e7:84:28 brd ff:ff:ff:ff:ff:ff
    inet 1x.xx.11.7/24 brd 1x.xx.11.255 scope global noprefixroute team0
       valid_lft forever preferred_lft forever

如果是这种eth0的，下面的命令可以不指定--interface
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether fa:16:3e:a3:fe:ec brd ff:ff:ff:ff:ff:ff
    inet 1x.xxx.1.156/24 brd 1x.xxx.1.255 scope global noprefixroute dynamic eth0
       valid_lft 72505sec preferred_lft 72505sec
    inet6 fe80::f816:3eff:fea3:feec/64 scope link
       valid_lft forever preferred_lft forever


#运行sealos命令，等待命令运行结束并出现sealos logo
[root@d-paas-k8s-master-0 ~]# sealos init --interface team0 \ #可用网卡名称，上一步得到的team0
  --master 1x.xxx.1.156 \
  --master 1x.xxx.1.157 \
  --master 1x.xxx.1.158 \
  --node 1x.xxx.1.159 \
  --node 1x.xxx.1.160 \
  --node 1x.xxx.1.161 \
  --node 1x.xxx.1.162 \
  --node 1x.xxx.1.163 \
  --pkg-url kube1.18.8.tar.gz \
  --version v1.18.8
  不加--interface
  sealos init \
  --master 1x.xxx.1.156 \
  --master 1x.xxx.1.157 \
  --master 1x.xxx.1.158 \
  --node 1x.xxx.1.159 \
  --node 1x.xxx.1.160 \
  --node 1x.xxx.1.161 \
  --node 1x.xxx.1.162 \
  --node 1x.xxx.1.163 \
  --pkg-url kube1.18.8.tar.gz \
  --version v1.18.8

#检查kube-system namespace下 pod是否都是running状态
[root@d-paas-k8s-master-0 ~]# kubectl get pods -n kube-system
NAME                                          READY   STATUS    RESTARTS   AGE
calico-kube-controllers-84445dd79f-5vqjl      1/1     Running   0          20h
calico-node-5qsqv                             1/1     Running   0          20h
calico-node-cnbj9                             1/1     Running   0          20h
calico-node-g6cmr                             1/1     Running   0          20h
calico-node-x9tmz                             1/1     Running   3          20h
coredns-66bff467f8-f7n76                      1/1     Running   0          20h
coredns-66bff467f8-xhlvg                      1/1     Running   2          20h
etcd-d-paas-k8s-master-0                      1/1     Running   3          20h
kube-apiserver-d-paas-k8s-master-0            1/1     Running   3          20h
kube-controller-manager-d-paas-k8s-master-0   1/1     Running   2          20h
kube-proxy-4n5vp                              1/1     Running   0          20h
kube-proxy-cw95q                              1/1     Running   0          20h
kube-proxy-hvssj                              1/1     Running   0          20h
kube-proxy-r6gm8                              1/1     Running   2          20h
kube-scheduler-d-paas-k8s-master-0            1/1     Running   2          20h
kube-sealyun-lvscare-d-paas-k8s-0-node-0      1/1     Running   0          20h
kube-sealyun-lvscare-d-paas-k8s-0-node-1      1/1     Running   0          20h
kube-sealyun-lvscare-d-paas-k8s-0-node-2      1/1     Running   0          20h

遇到的坑

第一次使用sealos 安装时，使用的如下命令，并未使用–interface指定网卡

1	sealos init --master 1x.xx.11.7 --node 1x.xx.11.4 --node 1x.xx.11.5 --node 1x.xx.11.6 --pkg-url kube1.18.8.tar.gz --version v1.18.8

sealos提示安装成功后，

kubectl get pods -nkube-system 检查pod状态，发现calico-node-xxxx不是Running状态，通过kubectl logs查看其中一个calico-node的log，发现报错找不到包含eth.|en.|em.*的可用网卡。

使用sealos时，如果不用–interface 指定网卡，默认会使用正则表达式寻找eth.|en.|em.*开头的可用网卡，本例中主机间通信的网卡名称是team0刚好不在正则表达式搜索的范围内。

出现此问题，两种解法方案

第一种方案：sealos清理集群，重新部署加上–interface指定可用网卡

#删除k8s集群
[root@d-paas-k8s-master-0 ~]# sealos clean --all
#重新部署
[root@d-paas-k8s-master-0 ~]# sealos init --interface team0 \ #可用网卡名称，上一步得到的team0
  --master 1x.xx.11.7 \
  --node 1x.xx.11.4 \
  --node 1x.xx.11.5 \
  --node 1x.xx.11.6 \
  --pkg-url kube1.18.8.tar.gz \
  --version v1.18.8

如果需要指定pod网段需要增加–podcidr 1x.xxx.128.0/23

第二种方案：直接修改calio-node daemonset里面的环境变量，网卡信息写在了里面，修改完毕后重启calio-node daemonset

#编辑calio-node deamonset，找到env下面name: IP_AUTODETECTION_METHOD项目value 改为 interface=指定网卡
[root@d-paas-k8s-master-0 ~]# kubectl edit ds calico-node -n kube-system
- name: IP_AUTODETECTION_METHOD
  value: interface=team0

#重新部署calico-node deamonset
[root@d-paas-k8s-master-0 ~]# kubectl -nkube-system rollout restart ds calico-node

K8S创建使用NFS的storageClass

搭建NFS共享存储

选择一台worker节点的磁盘作为共享存储。这里选了1x.xxx.1.164

1, 创建nfs共享目录

1	mkdir /nfs

2，安装nfs组件

nfs服务端和客户端都安装，所有节点都安装

yum -y install nfs-utils
设置开机启动
systemctl enable rpcbind.service
systemctl enable nfs-server.service

3,编辑exports文件

服务端执行

1
2
3

vim /etc/exports
添加如下内容
/nfs 1x.xxx.1.0/24(rw,sync,no_root_squash,no_wdelay)

含义：

/nfs为第一步创建的共享目录
1x.xxx.1.0/24表示共享给ip为1x.xxx.1.*的机器

权限列表

rw表示可读写，ro只读
sync ：同步模式，内存中数据实时写入磁盘；
async ：不同步，数据在内存中，定期写入磁盘
no_root_squash ：加上这个选项后，root用户就会对共享的目录像是对本机的目录一样拥有最高权限。
root_squash：和上面的选项对应，root用户对共享目录的权限不高，只有普通用户的权限
all_squash：不管使用NFS的用户是谁，他的身份都会被限定成为一个指定的普通用户身份
anonuid=xxx/anongid=xxx ：要和root_squash 以及all_squash一同使用，用于指定使用NFS的用户限定后的uid和gid，前提是本机的/etc/passwd中存在这个uid和gid
wdelay（默认）：检查是否有相关的写操作，如果有则将这些写操作一起执行，这样可以提高效率；
no_wdelay：若有写操作则立即执行，应与sync配合使用；

4，启动nfs服务

服务端执行

1 2	systemctl start rpcbind systemctl start nfs

客户端执行

1	systemctl start rpcbind

5，客户端挂载

1	mount -t nfs 1x.xxx.1.164:/nfs /nfs

可通过下面的命令查看

1	showmount -e 1x.xxx.1.164

客户端命令整合：

mkdir /nfs && yum -y install nfs-utils && systemctl enable rpcbind.service && systemctl enable nfs-server.service && systemctl start rpcbind && mount -t nfs 1x.xxx.151.207:/nfs /nfs && showmount -e 1x.xxx.151.207

创建sc

0，k8s1.6+默认开启rbac,所以必须执行如下命令授权provisioner

kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: nfs-provisioner-runner
rules:
  - apiGroups: [""]
    resources: ["persistentvolumes"]
    verbs: ["get", "list", "watch", "create", "delete"]
  - apiGroups: [""]
    resources: ["persistentvolumeclaims"]
    verbs: ["get", "list", "watch", "update"]
  - apiGroups: ["storage.k8s.io"]
    resources: ["storageclasses"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["events"]
    verbs: ["create", "update", "patch"]
  - apiGroups: [""]
    resources: ["services", "endpoints"]
    verbs: ["get"]
  - apiGroups: ["extensions"]
    resources: ["podsecuritypolicies"]
    resourceNames: ["nfs-provisioner"]
    verbs: ["use"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: run-nfs-provisioner
subjects:
  - kind: ServiceAccount
    name: nfs-provisioner
    namespace: default
roleRef:
  kind: ClusterRole
  name: nfs-provisioner-runner
  apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-nfs-provisioner
rules:
  - apiGroups: [""]
    resources: ["endpoints"]
    verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-nfs-provisioner
subjects:
  - kind: ServiceAccount
    name: nfs-provisioner
    namespace: default
roleRef:
  kind: Role
  name: leader-locking-nfs-provisioner
  apiGroup: rbac.authorization.k8s.io

1	kubectl apply -f rbac.yaml

2,创建创建nfs客户端deployment。nfs-provisioner.yaml

apiVersion: v1
kind: ServiceAccount
metadata:
  name: nfs-provisioner
---
kind: Deployment
apiVersion: apps/v1
metadata:
  name: nfs-provisioner
spec:
  replicas: 1
  selector:
    matchLabels:
      app: nfs-provisioner
      release: nfs-client
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
        app: nfs-provisioner
        release: nfs-client
    spec:
      serviceAccount: nfs-provisioner
      containers:
        - name: nfs-provisioner
          image: kubesphere/nfs-client-provisioner:v3.1.0-k8s1.11
          volumeMounts:
            - name: nfs-client-root
              mountPath: /persistentvolumes
          env:
            - name: PROVISIONER_NAME
              value: cluster.local/nfs-client-nfs-client-provisioner
            - name: NFS_SERVER
              value: 1x.xxx.1.164
            - name: NFS_PATH
              value: /nfs
      volumes:
        - name: nfs-client-root
          nfs:
            server: 1x.xxx.1.164
            path: /nfs

1	kubectl apply -f nfs-provisioner.yaml

3,创建storageClass.yaml

apiVersion: v1
items:
- allowVolumeExpansion: true
  apiVersion: storage.k8s.io/v1
  kind: StorageClass
  metadata:
    annotations:
      storageclass.kubernetes.io/is-default-class: "true"
      storageclass.kubesphere.io/support-snapshot: "false"
      storageclass.kubesphere.io/supported_access_modes: '["ReadWriteOnce","ReadOnlyMany","ReadWriteMany"]'
    labels:
      app: nfs-provisioner
      chart: nfs-client-provisioner-1.1.2
      heritage: Tiller
      release: nfs-client
    name: nfs-client
  mountOptions:
  - nfsvers=3
  parameters:
    archiveOnDelete: "false"
  provisioner: cluster.local/nfs-client-nfs-client-provisioner
  reclaimPolicy: Delete
  volumeBindingMode: Immediate
kind: List
metadata:
  resourceVersion: ""
  selfLink: ""

1	kubectl apply -f storageClass.yaml

1
2
3

[root@idp-restore-master01 sc]# kubectl get storageClass
NAME                   PROVISIONER                                       RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
nfs-client (default)   cluster.local/nfs-client-nfs-client-provisioner   Delete          Immediate           true                   9s

创建pvc验证（不必须）

1，创建一个pvc

kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: my-pvc
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 1Gi
  storageClassName: nfs-client

1	kubectl apply -f my-pvc.yaml

查看下pvc的状态

1	kubectl get pvc

测试（不必须）

使用pvc创建一个测试pod，目的是测试在共享目录下创建一个SUCCESS文件

kind: Pod
apiVersion: v1
metadata:
  name: test-pod
spec:
  containers:
  - name: test-pod
    image: willdockerhub/busybox:1.24
    command:
      - "/bin/sh"
    args:
      - "-c"
      - "touch /mnt/SUCCESS && exit 0 || exit 1"
    volumeMounts:
      - name: nfs-pvc
        mountPath: "/mnt"
  restartPolicy: "Never"
  volumes:
    - name: nfs-pvc
      persistentVolumeClaim:
        claimName: my-pvc

1	kubectl apply -f test-pod.yaml

之后去nfs服务器查看

部署kubesphere

准备软件包

1
2

[root@d-paas-k8s-master-0 ~]# wget https://github.com/kubesphere/ks-installer/releases/download/v3.0.0/kubesphere-installer.yaml
[root@d-paas-k8s-master-0 ~]# wget https://github.com/kubesphere/ks-installer/releases/download/v3.0.0/cluster-configuration.yaml

备用地址

wget http://1x.xx.66.1/SiVrh/kubesphere-installer.yaml

wget http://1x.xx.66.1/XrrTk/cluster-configuration.yaml

修改cluster-configuration.yaml

#配置所有组件enable为true  
  console:
    enableMultiLogin: true  # enable/disable multiple sing on, it allows an account can be used by different users at the same time.
    port: 30880
  alerting:                # (CPU: 0.3 Core, Memory: 300 MiB) Whether to install KubeSphere alerting system. It enables Users to customize alerting policies to send messages to receivers in time with different time intervals and alerting levels to choose from.
    enabled: true
  auditing:                # Whether to install KubeSphere audit log system. It provides a security-relevant chronological set of records，recording the sequence of activities happened in platform, initiated by different tenants.
    enabled: true
  devops:                  # (CPU: 0.47 Core, Memory: 8.6 G) Whether to install KubeSphere DevOps System. It provides out-of-box CI/CD system based on Jenkins, and automated workflow tools including Source-to-Image & Binary-to-Image.
    enabled: true
  .......
  logging:                 # (CPU: 57 m, Memory: 2.76 G) Whether to install KubeSphere logging system. Flexible logging functions are provided for log query, collection and management in a unified console. Additional log collectors can be added, such as Elasticsearch, Kafka and Fluentd.
    enabled: true
  ........
  metrics_server:                    # (CPU: 56 m, Memory: 44.35 MiB) Whether to install metrics-server. IT enables HPA (Horizontal Pod Autoscaler).
    enabled: true
  monitoring:
    enabled: true
  notification:        # Email Notification support for the legacy alerting system, should be enabled/disabled together with the above alerting option.
    enabled: true
  openpitrix:          # (2 Core, 3.6 G) Whether to install KubeSphere Application Store. It provides an application store for Helm-based applications, and offer application lifecycle management.
    enabled: true
  servicemesh:         # (0.3 Core, 300 MiB) Whether to install KubeSphere Service Mesh (Istio-based). It provides fine-grained traffic management, observability and tracing, and offer visualization for traffic topology.
    enabled: true

部署kubesphere

[root@d-paas-k8s-master-0 ~]# kubectl apply -f kubesphere-installer.yaml
[root@d-paas-k8s-master-0 ~]# kubectl apply -f cluster-configuration.yaml
#查看部署日志
[root@d-paas-k8s-master-0 ~]# kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l app=ks-install -o jsonpath='{.items[0].metadata.name}') -f

#当出现以下信息时，证明部署完成
#####################################################
###              Welcome to KubeSphere!           ###
#####################################################

Console: http://1x.xx.11.4:30880
Account: admin
Password: P@88w0rd

NOTES：
  1. After you log into the console, please check the
     monitoring status of service components in
     "Cluster Management". If any service is not
     ready, please wait patiently until all components
     are up and running.
  2. Please change the default password after login.

#####################################################
https://kubesphere.io             2020-11-11 09:29:12
#####################################################

遇到的坑：

日志卡在Failed to ansible-playbook result-info.yaml，查看kubectl get all -n kubesphere-system，有很多pod没起来，比如redis的。

kubectl describe pod/redis-ha-server-0 -n kubesphere-system

看到报错

Events:
  Type     Reason       Age                      From                           Message
  ----     ------       ----                     ----                           -------
  Warning  FailedMount  7m31s (x134 over 3h41m)  kubelet, idp-restore-master02  (combined from similar events): MountVolume.SetUp failed for volume "pvc-e88e0ca4-0047-47fa-ad2c-17499d5b44c3" : mount failed: exit status 32
  。。。
mount: wrong fs type, bad option, bad superblock on 1x.xxx.1.164:/nfs/kubesphere-system-data-redis-ha-server-0-pvc-e88e0ca4-0047-47fa-ad2c-17499d5b44c3,

是nfs挂载的问题

之前master节点没有安装nfs客户端，安装一下就好了。

然后重新执行ks-installer

kubectl rollout restart deployment/ks-installer -n kubesphere-system

配置kubesphere链接ldap

卸载k8s

1	sealos clean --all=true