add nvidia package node-init
This commit is contained in:
parent
9c76073a1e
commit
3276490882
71
node-init/nvidia-init-artifacts.yaml
Normal file
71
node-init/nvidia-init-artifacts.yaml
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: nvidia-init-artifacts
|
||||||
|
namespace: kube-system
|
||||||
|
data:
|
||||||
|
CUDA_REPO: https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/
|
||||||
|
NCT_REPO: https://nvidia.github.io/libnvidia-container/stable/deb/amd64
|
||||||
|
NCT_KEYRING: |
|
||||||
|
mQINBFnNWDEBEACiX68rxIWvqH3h2GykO25oK9BAqV8fDtb6lXEbw3eKx4g87BRzM3DQBA0S0Ifk
|
||||||
|
Q72ovJ33H50+gVTXuu+Zme5muWk72m3pApccZVDLqdzYlpWPruNbMC+IlWr70yo8Jw8Zr1ihbWjF
|
||||||
|
vMbDJTkgqPt2djNq3xxvdiKoZlgnpLRKIpSu9iBQlNoZLHxTQKFH4219L77prRogv2QV1ckBL5lD
|
||||||
|
VOERJuHo4jHE8mm9/NZ6v3m2HGuuAEZ7T9nWlPGiAIP8Pww4ZRTJcBANcI2EFKPLdfP61HTH6w0k
|
||||||
|
VMkoAaGlemadTDl3ZcLpUpTFLc+ko/2uQ1qVPx9QYyoMrorS3kUmlXrhsA7FvcB09aIcb+JX6SVk
|
||||||
|
cbO5A5+baCa3owwUtFBXMHM5hqpLv4P3/GsuW6283YwLZCf53dJY4lJZePqzPGsvs/wSvhnZrFvb
|
||||||
|
61i/Aqm0hjhVh7h6VNxUiE8geMcjxy29LtzajoyS0EPVxes4xZu0VbS78LQyCNHSpS7TFmtVUQmb
|
||||||
|
XqDN7cpiyr9+yutr0lZOMc7NYQt0nP/3RtYkWEob6wXarVImHas1OYzlZymdO1uAnqkediS61E2v
|
||||||
|
SD1OEq37/375FB/Q3AYXuNkQzDjYoJJz9wsv7Xp0bdPzQ/daLdIFNQXo5MmVIirsWM07JvbZaJhD
|
||||||
|
OiJxGn0MPf11/QARAQABtEBOVklESUEgQ09SUE9SQVRJT04gKE9wZW4gU291cmNlIFByb2plY3Rz
|
||||||
|
KSA8Y3VkYXRvb2xzQG52aWRpYS5jb20+iQI4BBMBCgAiBQJZzVgxAhsPBgsJCggHAwUVCgkICwUW
|
||||||
|
AgMBAAIeAQIXgAAKCRDdyuBE95bssAh6EACgUCww2sr8sOztEHKhvdCsonXuTHYbel3YlWmVDPbh
|
||||||
|
4dA31xoRXlvSJptJzPi/zlTc9fkVSFGbEZbFRR4JjnwYTMLDElMh5YRMYAoPVYhWGKIO4earu32G
|
||||||
|
hFuPjfr6h+0xNaQeDPIbr7bPe/AEhLSdJMzIOuAifr7UaC65A6YlxfeaSqyt0HthYujoQ12cWxP9
|
||||||
|
98C5jkc0IN2tyLs/OD7HLHht+lafqDSylykx63cw7jvsV/15rqZwVwjhkcxZyrKET32MTjXF3cxn
|
||||||
|
7+TGpKS8B1k4a/EI7uXnncfSoma0dAT9bZM9JZbXQmSzCPDHHuVtnQ/3uh8VyenpigTFnrb20LCy
|
||||||
|
6WzJd3O9lAZXLhvwF/By3a07WLzRtTZNaUpt37Anb0js2syr3lohbmK9i3xvuqZNzhGPbqu9IV+v
|
||||||
|
FgSGyTHRJUSBlHKDGiCdOOHc20MLPW1yRCXbx0F4eS9TWchYyJkJNNczD5DnEl/gsvL4NCRxa+oU
|
||||||
|
yUhhJ1HpJ6YNmTsy6nAAKIC+6248o164GiavaR3z03RfaQayGHAUrBKi+PJBY7efgsZeYT8f+hyY
|
||||||
|
rIC04MO8poBKS/GvSUL2QtVtj59Nq+95gIptW2mZM8KRpt2huLH+QQ8SKr1vAECbpKJOwseqKmVy
|
||||||
|
xX02iaSE8ifLE+tXFE8YgS3CZjWwy5PD0LkBDQRdgpCQAQgAx1oxX9tFlv3CIva0CJ0dsZyNF7mg
|
||||||
|
HPgNszccUYLu0chyWYvwiVU/OlCzivytNX56wgeBgIVV1QzeBuTkrJSgzJ+dSgfrmyg5RwIDhvH+
|
||||||
|
Dcut0++6+di1LyH9gXQcYPrN3pf4yR8nlRbm6K0Vsp0Z4+br18QelURerfAkRordag26aB+MzVLv
|
||||||
|
loHHu3Z6/v321uTGMdFd8CVCjovec5+EdcIAam3U/MmZe2mr2M/x6F3st30cE7umq9Bb6UCqc6L8
|
||||||
|
bQcoloxR3rwFzL1u9wUBUzQlaMNmxbe0BfezkmSQeC8JN4Fku+DtHEpS9uP5JEYNEEQ66K4mJDTM
|
||||||
|
r0whBv1fKQARAQABiQNbBBgBCgAmAhsCFiEEyVsyG2HojBgJxPdZ3crgRPeW7LAFAl7oD1gFCQNG
|
||||||
|
skgBKcBdIAQZAQoABgUCXYKQkAAKCRBu2RyjrBFgzZ/WB/9TuD2qzaBO7HlPDWRUTpFlvFgyDc3X
|
||||||
|
yfTAC/ISeYbIcPcq5kmVHgpsMdbN9Vvmot5GuT7VWzhHc9sJCmHgL330glBtNtSRflKzlBYnbiSW
|
||||||
|
xLFYZtu2BtNOk8Ylbw8qw1E6W/iFBrqAwgeZvs2VOcPU3203Mqfi1JbS+YHC/bgs6cNq0zs/WJra
|
||||||
|
YxiuleclKYExxLt9tRd0058n58GAph+Ki7mRInO6kxuKpsQannSn1Ku/DiaQcSF2L2TMSo0N9zwv
|
||||||
|
YEZR+hgsKVqyRKT+DkZhusHJHYGv96YHSTwo016ZhwYS9t0MLXY9/PgJysuO41Ya4Ii43D3UK1wO
|
||||||
|
HTmyHZHTCRDdyuBE95bssDpwD/4jV9Pin3vAKa4hhn5GD4e478FNKRD58Q7qF3AhVTBNPIl1m4EF
|
||||||
|
X7sqI6cXUDG4BjpS70ZRWF2x51ZTiq7DLTV/gGw2okfVjoWjzQY0ebrLd4IoNs80lIHmXxa+JdwB
|
||||||
|
6WupCUzKCKLcPsX/yPAmswPNGAuIMAv+PWhUUSMVtzOZldnlogGMhbJ9UD2txFGGh9WoYc2vgX9K
|
||||||
|
AaKryXcC6QMabv7JJU24HEJJDgbJEvtFM5PS8QMFbXIZsYgICWpQXVChBbduXo9sD2TUDWYAniNa
|
||||||
|
aw4LKxPRG+Ix4HAqkh1oNOLojO30DO3r1/62FKE5/ykg3iSMTDR0iOES/leXCCIO9fRJT8+eucxy
|
||||||
|
OQoY5ti7tjt1wm3HnTB+Rz3E/E2qeLs2PN82aseccm1G06pmsMCUiWtmSV6HjdO2XufYprrGLSu0
|
||||||
|
RrT3sz5WHGUOY2iO40xHhSiXg3TcLZRpv30DQzxoUrx9Ff//rXLFznh+MksuvVD2roURBGz/en31
|
||||||
|
FxAcBoex9nNraeOekbFen5b7Xrq9wnzM5xZvJN2QYB3vS0khz/ZgFyy5444ALa9gwb29FZCfA4m5
|
||||||
|
9S2QoB8uPQGM+8gnusE6J8y4fvI59ugafidIkt86dZ3mFsEME5XNmBGdNEo2flRVFfpG1IWds2Ba
|
||||||
|
3IsdbYd9nzmbBW7/n0InVRDrIg==
|
||||||
|
CUDA_KEYRING: |
|
||||||
|
mQINBGJYmlEBEAC6nJmeqByeReM+MSy4palACCnfOg4pOxffrrkldxz4jrDOZNK4q8KG+ZbXrkdP
|
||||||
|
0e9qTFRvZzN+A6Jw3ySfoiKXRBw5l2Zp81AYkghV641OpWNjZOyLsyKEtST9LR1ttHv1ZI71pj8N
|
||||||
|
VG/EnpimZPOblEJ1OpibJJCXLrbn+qcJ8JNuGTSK6v2aLBmhR8VR/aSJpmkg7fFjcGklweTI8+Ib
|
||||||
|
j72HuY9JRD/+dtUoSh7z037mWo56ee02lPFRD0pHOEAlLSXxFO/SDqRVMhcgHk0a8roCF+9h5Ni7
|
||||||
|
ZUyxlGK/uHkqN7ED/U/ATpGKgvk4t23eTpdRC8FXAlBZQyf/xnhQXsyF/z7+RV5CL0o1zk1LKgo+
|
||||||
|
5K325ka5uZb6JSIrEPUaCPEMXu6EEY8zSFnCrRS/Vjkfvc9ViYZWzJ387WTjAhMdS7wdPmdDWw2A
|
||||||
|
SGUP4FrfCireSZiFX+ZAOspKpZdh0P5iR5XSx14XDt3jNK2EQQboaJADuqksItatOEYNu4JsCbc2
|
||||||
|
4roJvJtGhpjTnq1/dyoy6K433afU0DS2ZPLthLpGqeyKMKNY7a2WjxhRmCSu5Zok/fGKcO62XF8a
|
||||||
|
3eSj4NzCRv8LM6mG1Oekz6Zz+tdxHg19ufHO0et7AKE5q+5VjE438Xpl4UWbM/Voj6VPJ9uzywDc
|
||||||
|
nZXpeOqeTQh2pQARAQABtCBjdWRhdG9vbHMgPGN1ZGF0b29sc0BudmlkaWEuY29tPokCOQQTAQIA
|
||||||
|
IwUCYliaUQIbAwcLCQgHAwIBBhUIAgkKCwQWAgMBAh4BAheAAAoJEKS0aZY7+GPM1y4QALKhBqSo
|
||||||
|
zrYbe341Qu7SyxHQgjRCGi4YhI3bHCMj5F6vEOHnwiFH6YmFkxCYtqcGjca6iw7cCYMow/hgKLAP
|
||||||
|
wkwSJ84EYpGLWx62+20rMM4OuZwauSUcY/kE2WgnQ74zbh3+MHs56zntJFfJ9G+NYidvwDWeZn5H
|
||||||
|
IzR4CtxaxRgpiykg0s3ps6X0U+vuVcLnutBF7r81astvlVQERFbce/6KqHK+yj843Qrhb3JEolUo
|
||||||
|
OETK06nD25bVtnAxe0QEyA909MpRNLfR6BdjPpxqhphDcMOhJfyubAroQUxG/7S+Yw+mtEqHrL/d
|
||||||
|
z9iEYqodYiSozfi0b+HFI59sRkTfOBDBwb3kcARExwnvLJmqijiVqWkoJ3H67oA0XJN2nelucw+A
|
||||||
|
Hb+Jt9BWjyzKWlLFDnVHdGicyRJ0I8yqi32w8hGeXmu3tU58VWJrkXEXadBftmcipemb6oZ/r5SC
|
||||||
|
kW6kxr2PsNWcJoebUdynyOQGbVwpMtJAnjOYp0ObKOANbcIg+tsikyCIO5TiY3ADbBDPCeZK8xdc
|
||||||
|
ugXoW5WFwACGC0z+Cn0mtw8z3VGIPAMSCYmLusgWt2+EpikwrP2inNp5Pc+YdczRAsa4s30Jpyv/
|
||||||
|
UHEG5P9GKnvofaxJgnU56lJIRPzFiCUGy6cVI0Fq777X/ME1K6A/bzZ4vRYNx8rUmVE5
|
69
node-init/nvidia-init.yaml
Normal file
69
node-init/nvidia-init.yaml
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
---
|
||||||
|
# https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: nvidia-init
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
root-init.k8s.exa.fi/component-name: nvidia-init
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
root-init.k8s.exa.fi/component-name: nvidia-init
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
root-init.k8s.exa.fi/component-name: nvidia-init
|
||||||
|
spec:
|
||||||
|
tolerations:
|
||||||
|
# this toleration is to have the daemonset runnable on master nodes
|
||||||
|
# remove it if your masters can't run pods
|
||||||
|
- key: node-role.kubernetes.io/master
|
||||||
|
effect: NoSchedule
|
||||||
|
initContainers:
|
||||||
|
# this is implemented dorkily like this to cause it to be recreated
|
||||||
|
# whenever there's a change. this is intended to eventually be moved
|
||||||
|
# into a helm chart to use in argocd where this will make more sense
|
||||||
|
- name: install-packages
|
||||||
|
image: alpine:3.7
|
||||||
|
command: ["nsenter", "--mount=/proc/1/ns/mnt", "--", "sh", "-c"]
|
||||||
|
args:
|
||||||
|
- |-
|
||||||
|
PCI_VENDORS="10de"
|
||||||
|
VEN_FOUND=no
|
||||||
|
for VEN in $PCI_VENDORS;do
|
||||||
|
VEN=$(echo "$VEN" | tr A-Z a-z)
|
||||||
|
if grep -Eq '^[0-9a-f]*\s*'"$VEN" /proc/bus/pci/devices;then
|
||||||
|
VEN_FOUND=yes
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ $VEN_FOUND = no ];then
|
||||||
|
1>&2 echo "no nvidia devices seen. skipping package init"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
CUDA_KEYRING_PATH=/usr/share/keyrings/cuda-archive-keyring.gpg
|
||||||
|
CUDA_LIST_PATH=/etc/apt/sources.list.d/cuda.list
|
||||||
|
echo "$CUDA_KEYRING" | base64 -d > "$CUDA_KEYRING_PATH"
|
||||||
|
echo "deb [signed-by=$CUDA_KEYRING_PATH] $CUDA_REPO /" > "$CUDA_LIST_PATH"
|
||||||
|
|
||||||
|
NCT_KEYRING_PATH=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||||
|
NCT_LIST_PATH=/etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||||
|
echo "$NCT_KEYRING" | base64 -d > "$NCT_KEYRING_PATH"
|
||||||
|
echo "deb [signed-by=$NCT_KEYRING_PATH] $NCT_REPO /" > "$NCT_LIST_PATH"
|
||||||
|
|
||||||
|
apt-get update
|
||||||
|
apt-get install -y nvidia-kernel-dkms nvidia-container-toolkit cuda
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: nvidia-init-artifacts
|
||||||
|
containers:
|
||||||
|
- name: finished-sleep-forever
|
||||||
|
image: k8s.gcr.io/pause:3.1
|
||||||
|
securityContext:
|
||||||
|
privileged: false
|
||||||
|
terminationGracePeriodSeconds: 0
|
||||||
|
hostNetwork: true
|
||||||
|
hostPID: true
|
Loading…
Reference in New Issue
Block a user