123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- # Plugin configuration
- # Only one of "name" or "map" should ever be set for a given deployment.
- # Use "name" to point to an external ConfigMap with a list of configurations.
- # Use "map" to build an integrated ConfigMap from a set of configurations as
- # part of this helm chart. An example of setting "map" might be:
- # config:
- # map:
- # default: |-
- # version: v1
- # flags:
- # migStrategy: none
- # mig-single: |-
- # version: v1
- # flags:
- # migStrategy: single
- # mig-mixed: |-
- # version: v1
- # flags:
- # migStrategy: mixed
- config:
- # ConfigMap name if pulling from an external ConfigMap
- name: ""
- # Set of named configs to build an integrated ConfigMap from
- map:
- default: |-
- version: v1
- sharing:
- timeSlicing:
- resources:
- - name: nvidia.com/gpu
- replicas: 3
- # List of fallback strategies to attempt if no config is selected and no default is provided
- fallbackStrategies: ["named" , "single"]
- compatWithCPUManager: null
- migStrategy: null
- failOnInitError: null
- deviceListStrategy: null
- deviceIDStrategy: null
- nvidiaDriverRoot: null
- gdsEnabled: null
- mofedEnabled: null
- deviceDiscoveryStrategy: null
- nameOverride: ""
- fullnameOverride: ""
- namespaceOverride: ""
- selectorLabelsOverride: {}
- allowDefaultNamespace: false
- imagePullSecrets: []
- image:
- repository: nvcr.io/nvidia/k8s-device-plugin
- pullPolicy: IfNotPresent
- # Overrides the image tag whose default is the chart appVersion.
- tag: ""
- updateStrategy:
- type: RollingUpdate
- podAnnotations: {}
- podSecurityContext: {}
- securityContext: {}
- resources: {}
- nodeSelector: {}
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- # On discrete-GPU based systems NFD adds the following label where 10de is the NVIDIA PCI vendor ID
- - key: feature.node.kubernetes.io/pci-10de.present
- operator: In
- values:
- - "true"
- - matchExpressions:
- # On some Tegra-based systems NFD detects the CPU vendor ID as NVIDIA
- - key: feature.node.kubernetes.io/cpu-model.vendor_id
- operator: In
- values:
- - "NVIDIA"
- - matchExpressions:
- # We allow a GPU deployment to be forced by setting the following label to "true"
- - key: "nvidia.com/gpu.present"
- operator: In
- values:
- - "true"
- tolerations:
- # This toleration is deprecated. Kept here for backward compatibility
- # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
- - key: CriticalAddonsOnly
- operator: Exists
- - key: nvidia.com/gpu
- operator: Exists
- effect: NoSchedule
- # Mark this pod as a critical add-on; when enabled, the critical add-on
- # scheduler reserves resources for critical add-on pods so that they can
- # be rescheduled after a failure.
- # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
- priorityClassName: "system-node-critical"
- runtimeClassName: null
- devicePlugin:
- enabled: true
- gfd:
- enabled: true
- nameOverride: gpu-feature-discovery
- namespaceOverride: ""
- noTimestamp: null
- sleepInterval: null
- securityContext:
- # privileged access is required for the gpu-feature-discovery to access the
- # vgpu info on a host.
- # TODO: This should be optional and detected automatically.
- privileged: true
- # Helm dependency
- nfd:
- nameOverride: node-feature-discovery
- enableNodeFeatureApi: false
- master:
- serviceAccount:
- name: node-feature-discovery
- create: true
- config:
- extraLabelNs: ["nvidia.com"]
- worker:
- tolerations:
- - key: "node-role.kubernetes.io/master"
- operator: "Equal"
- value: ""
- effect: "NoSchedule"
- - key: "nvidia.com/gpu"
- operator: "Equal"
- value: "present"
- effect: "NoSchedule"
- config:
- sources:
- pci:
- deviceClassWhitelist:
- - "02"
- - "03"
- deviceLabelFields:
- - vendor
- mps:
- # root specifies the location where files and folders for managing MPS will
- # be created. This includes a daemon-specific /dev/shm and pipe and log
- # directories.
- # Pipe directories will be created at {{ mps.root }}/{{ .ResourceName }}
- root: "/run/nvidia/mps"
- cdi:
- # nvidiaHookPath specifies the path to the nvidia-cdi-hook or nvidia-ctk executables on the host.
- # This is required to ensure that the generated CDI specification refers to the correct CDI hooks.
- nvidiaHookPath: null
|