qdii
/
k8s


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
							# Plugin configuration
# Only one of "name" or "map" should ever be set for a given deployment.
# Use "name" to point to an external ConfigMap with a list of configurations.
# Use "map" to build an integrated ConfigMap from a set of configurations as
# part of this helm chart. An example of setting "map" might be:
# config:
#   map:
#     default: |-
#       version: v1
#       flags:
#         migStrategy: none
#     mig-single: |-
#       version: v1
#       flags:
#         migStrategy: single
#     mig-mixed: |-
#       version: v1
#       flags:
#         migStrategy: mixed
config:
  # ConfigMap name if pulling from an external ConfigMap
  name: ""
  # Set of named configs to build an integrated ConfigMap from
  map: 
    default: |-
      version: v1
      sharing:
        timeSlicing:
          resources:
          - name: nvidia.com/gpu
            replicas: 3
  # List of fallback strategies to attempt if no config is selected and no default is provided
  fallbackStrategies: ["named" , "single"]

compatWithCPUManager: null
migStrategy: null
failOnInitError: null
deviceListStrategy: null
deviceIDStrategy: null
nvidiaDriverRoot: null
gdsEnabled: null
mofedEnabled: null
deviceDiscoveryStrategy: null

nameOverride: ""
fullnameOverride: ""
namespaceOverride: ""
selectorLabelsOverride: {}

allowDefaultNamespace: false

imagePullSecrets: []
image:
  repository: nvcr.io/nvidia/k8s-device-plugin
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
  tag: ""

updateStrategy:
  type: RollingUpdate

podAnnotations: {}
podSecurityContext: {}
securityContext: {}

resources: {}
nodeSelector: {}
affinity:
  nodeAffinity:
    requiredDuringSchedulingIgnoredDuringExecution:
      nodeSelectorTerms:
      - matchExpressions:
        # On discrete-GPU based systems NFD adds the following label where 10de is the NVIDIA PCI vendor ID
        - key: feature.node.kubernetes.io/pci-10de.present
          operator: In
          values:
          - "true"
      - matchExpressions:
        # On some Tegra-based systems NFD detects the CPU vendor ID as NVIDIA
        - key: feature.node.kubernetes.io/cpu-model.vendor_id
          operator: In
          values:
          - "NVIDIA"
      - matchExpressions:
        # We allow a GPU deployment to be forced by setting the following label to "true"
        - key: "nvidia.com/gpu.present"
          operator: In
          values:
          - "true"
tolerations:
  # This toleration is deprecated. Kept here for backward compatibility
  # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
  - key: CriticalAddonsOnly
    operator: Exists
  - key: nvidia.com/gpu
    operator: Exists
    effect: NoSchedule

# Mark this pod as a critical add-on; when enabled, the critical add-on
# scheduler reserves resources for critical add-on pods so that they can
# be rescheduled after a failure.
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
priorityClassName: "system-node-critical"

runtimeClassName: null

devicePlugin:
  enabled: true

gfd:
  enabled: true
  nameOverride: gpu-feature-discovery
  namespaceOverride: ""
  noTimestamp: null
  sleepInterval: null
  securityContext:
    # privileged access is required for the gpu-feature-discovery to access the
    # vgpu info on a host.
    # TODO: This should be optional and detected automatically.
    privileged: true

# Helm dependency
nfd:
  nameOverride: node-feature-discovery
  enableNodeFeatureApi: false
  master:
    serviceAccount:
      name: node-feature-discovery
      create: true
    config:
      extraLabelNs: ["nvidia.com"]

  worker:
    tolerations:
    - key: "node-role.kubernetes.io/master"
      operator: "Equal"
      value: ""
      effect: "NoSchedule"
    - key: "nvidia.com/gpu"
      operator: "Equal"
      value: "present"
      effect: "NoSchedule"
    config:
      sources:
        pci:
          deviceClassWhitelist:
          - "02"
          - "03"
          deviceLabelFields:
          - vendor

mps:
  # root specifies the location where files and folders for managing MPS will
  # be created. This includes a daemon-specific /dev/shm and pipe and log
  # directories.
  # Pipe directories will be created at {{ mps.root }}/{{ .ResourceName }}
  root: "/run/nvidia/mps"


cdi:
  # nvidiaHookPath specifies the path to the nvidia-cdi-hook or nvidia-ctk executables on the host.
  # This is required to ensure that the generated CDI specification refers to the correct CDI hooks.
  nvidiaHookPath: null