天天看點

企業級虛拟化平台Proxmox FAQ

作者:Linux系統運維

1. Health Check-警告清除

#顯示crash
#root@pve1:/var/lib/ceph/crash/posted# ceph crash ls
ID                                                               ENTITY   NEW
2022-11-20_04:19:15.455086Z_6ff0a228-afae-42fb-9fda-6ce7d34f7f94 mgr.pve1    
2022-11-20_04:19:41.515229Z_96666130-253f-4017-ab21-93d9e1e211e8 mgr.pve1    
2022-11-20_04:23:09.157816Z_08376718-72ea-4a2f-815f-97cb126624ad mgr.pve1    
2022-11-20_06:05:56.922776Z_b6601eb5-24b3-401a-9eb8-bf5742d45e25 mon.pve1    
2022-11-25_01:56:59.517294Z_84848254-1c98-4ab0-97d4-f9f1a6897cc4 mgr.pve4    
2022-11-25_02:16:16.380455Z_e29c25b0-4bf5-419b-a6fc-1eb642b0a9dd mgr.pve2    
2022-11-26_02:51:08.407370Z_764cb25f-5f08-4fa3-a60e-64f42d82f2a1 mgr.pve2    
2022-11-26_02:51:30.773033Z_cda57001-91e0-40f9-97fe-649ad45935a6 mgr.pve2    
 
#讀取crash資訊
ceph crash info <id>
 
#壓縮資訊,即可清除曆史的warn資訊
ceph crash archive <id>
或者:
ceph crash archive-all           

2. MON_CLOCK_SKEW

ceph 對時間有着嚴格的要求,是以pve提供了對時間差的嚴格監控,相差大于0.05s就會有報警,需要做一次時間同步

ntpdate pool.ntp.org
hwclock -w           

3. 删除osd

#首先通過ceph-volume lvm list 找到osd 與盤符的對應關系(有問題的盤)
root@pve1:~# ceph-volume lvm list
====== osd.1 =======
 
  [block]       /dev/ceph-7c7e8cc4-26fd-4cf8-b531-b94510ab63f8/osd-block-bc80a833-1221-426b-90c1-e910aeb3b0a1
 
      block device              /dev/ceph-7c7e8cc4-26fd-4cf8-b531-b94510ab63f8/osd-block-bc80a833-1221-426b-90c1-e910aeb3b0a1
      block uuid                sfTHK9-qTzt-dg00-7XN9-G4uE-2kzn-7f97Xs
      cephx lockbox secret     
      cluster fsid              98d9e4a9-35d4-497f-96a0-60a62b9e9d64
      cluster name              ceph
      crush device class        None
      db device                 /dev/sdg2
      db uuid                   1fbdcce9-88e3-4282-81cc-6ad255730be3
      encrypted                 0
      osd fsid                  bc80a833-1221-426b-90c1-e910aeb3b0a1
      osd id                    7
      type                      block
      vdo                       0
      devices                   /dev/sdf
 
  [db]          /dev/sdg2
 
      PARTUUID                  1fbdcce9-88e3-4282-81cc-6ad255730be3
 
sdg 這塊盤有問題,得重新删除,并加進ceph
 
==================================================================
#a. 停止相應OSD服務
 
#systemctl stop [email protected]
 
#b. 取消OSD挂載
#umount /var/lib/ceph/osd/ceph-1
 
#c. 設定OSD為OUT
 #ceph osd out osd.1
 
#d. 删除OSD
#ceph osd crush remove osd.1(如果未配置Crush Map則不需要執行這一行指令)
#ceph auth del osd.1
#ceph osd rm 1
 
#e. 清空已删除磁盤中的内容
wipefs -af /dev/sdb
#完成此項操作後,重新開機該OSD所在節點的機器
#重新開機完成後,需要zap該磁盤,輸入指令如下:
#ceph-volume lvm zap /dev/sdb
#如果zap成功,會提示以下資訊:
--> Zapping: /dev/sdb
Running command: /usr/sbin/cryptsetup status /dev/mapper/
stdout: /dev/mapper/ is inactive.
Running command: /usr/sbin/wipefs --all /dev/sdb
Running command: /bin/dd if=/dev/zero of=/dev/sdb bs=1M count=10
stderr: 10+0 records in
10+0 records out
stderr: 10485760 bytes (10 MB) copied, 0.0195807 s, 536 MB/s
--> Zapping successful for: /dev/sdb
 
#f. 重新安裝OSD
#ceph  osd create /dev/sd[X] -db_dev /dev/sd[Y]  (也可以在pve的ceph配置子產品建立osd)
 
#g. 檢視OSD TREE
ceph osd tree           

4. 更新更新ceph版本14.2.4~14.2.20(解決ceph重新連接配接或者重建立用戶端連接配接的安全bug)

cat >> /etc/apt/sources.list <<EOF
deb http://download.proxmox.com/debian/pve buster pve-no-subscription
EOF
 
apt-get update
apt dist-upgrade -y
 
#更新完之後需要重新開機每個節點的mon、mgr、mds、osd
 
#這時候 ceph叢集會有兩個警告:
client is using insecure global_id reclaim
mons are allowing insecure global_id reclaim
 
 
#重新開機兩個服務(每台都要執行)
systemctl try-reload-or-restart pvestatd.service pvedaemon.service
 
 
#消除警告(ceph 任何一個節點執行即可)
ceph config set mon auth_allow_insecure_global_id_reclaim false           

5. 從ceph中導出、導入鏡像

#查詢
 rbd -p cephfs_data ls
 rbd -p cephfs_data ls|grep base-1001-disk-0
# 導出
rbd export cephfs_data:vm-170-state-pz_2021_04_26  -p cephfs_data
 
root@pve1:/# rbd export base-1001-disk-0  -p cephfs_data base-1001-disk-0.img
#導入 
qm importdisk ${vmid} ${imagename.img} ${ceph pool name}
 
 
# 若在目标pve節點沒有rbd指令 可以在一台有ceph的節點export,然後再去目标節點qm import
#比如:
在pve6 執行:rbd export base-1001-disk-0  -p cephfs_data base-1001-disk-0.img
在pve13 導入:qm importdisk 141 base-1001-disk-0.img ceph_data           

繼續閱讀