values.yaml 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. # Plugin configuration
  2. # Only one of "name" or "map" should ever be set for a given deployment.
  3. # Use "name" to point to an external ConfigMap with a list of configurations.
  4. # Use "map" to build an integrated ConfigMap from a set of configurations as
  5. # part of this helm chart. An example of setting "map" might be:
  6. # config:
  7. # map:
  8. # default: |-
  9. # version: v1
  10. # flags:
  11. # migStrategy: none
  12. # mig-single: |-
  13. # version: v1
  14. # flags:
  15. # migStrategy: single
  16. # mig-mixed: |-
  17. # version: v1
  18. # flags:
  19. # migStrategy: mixed
  20. config:
  21. # ConfigMap name if pulling from an external ConfigMap
  22. name: ""
  23. # Set of named configs to build an integrated ConfigMap from
  24. map:
  25. default: |-
  26. version: v1
  27. sharing:
  28. timeSlicing:
  29. resources:
  30. - name: nvidia.com/gpu
  31. replicas: 3
  32. # List of fallback strategies to attempt if no config is selected and no default is provided
  33. fallbackStrategies: ["named" , "single"]
  34. compatWithCPUManager: null
  35. migStrategy: null
  36. failOnInitError: null
  37. deviceListStrategy: null
  38. deviceIDStrategy: null
  39. nvidiaDriverRoot: null
  40. gdsEnabled: null
  41. mofedEnabled: null
  42. deviceDiscoveryStrategy: null
  43. nameOverride: ""
  44. fullnameOverride: ""
  45. namespaceOverride: ""
  46. selectorLabelsOverride: {}
  47. allowDefaultNamespace: false
  48. imagePullSecrets: []
  49. image:
  50. repository: nvcr.io/nvidia/k8s-device-plugin
  51. pullPolicy: IfNotPresent
  52. # Overrides the image tag whose default is the chart appVersion.
  53. tag: ""
  54. updateStrategy:
  55. type: RollingUpdate
  56. podAnnotations: {}
  57. podSecurityContext: {}
  58. securityContext: {}
  59. resources: {}
  60. nodeSelector: {}
  61. affinity:
  62. nodeAffinity:
  63. requiredDuringSchedulingIgnoredDuringExecution:
  64. nodeSelectorTerms:
  65. - matchExpressions:
  66. # On discrete-GPU based systems NFD adds the following label where 10de is the NVIDIA PCI vendor ID
  67. - key: feature.node.kubernetes.io/pci-10de.present
  68. operator: In
  69. values:
  70. - "true"
  71. - matchExpressions:
  72. # On some Tegra-based systems NFD detects the CPU vendor ID as NVIDIA
  73. - key: feature.node.kubernetes.io/cpu-model.vendor_id
  74. operator: In
  75. values:
  76. - "NVIDIA"
  77. - matchExpressions:
  78. # We allow a GPU deployment to be forced by setting the following label to "true"
  79. - key: "nvidia.com/gpu.present"
  80. operator: In
  81. values:
  82. - "true"
  83. tolerations:
  84. # This toleration is deprecated. Kept here for backward compatibility
  85. # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
  86. - key: CriticalAddonsOnly
  87. operator: Exists
  88. - key: nvidia.com/gpu
  89. operator: Exists
  90. effect: NoSchedule
  91. # Mark this pod as a critical add-on; when enabled, the critical add-on
  92. # scheduler reserves resources for critical add-on pods so that they can
  93. # be rescheduled after a failure.
  94. # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
  95. priorityClassName: "system-node-critical"
  96. runtimeClassName: null
  97. devicePlugin:
  98. enabled: true
  99. gfd:
  100. enabled: true
  101. nameOverride: gpu-feature-discovery
  102. namespaceOverride: ""
  103. noTimestamp: null
  104. sleepInterval: null
  105. securityContext:
  106. # privileged access is required for the gpu-feature-discovery to access the
  107. # vgpu info on a host.
  108. # TODO: This should be optional and detected automatically.
  109. privileged: true
  110. # Helm dependency
  111. nfd:
  112. nameOverride: node-feature-discovery
  113. enableNodeFeatureApi: false
  114. master:
  115. serviceAccount:
  116. name: node-feature-discovery
  117. create: true
  118. config:
  119. extraLabelNs: ["nvidia.com"]
  120. worker:
  121. tolerations:
  122. - key: "node-role.kubernetes.io/master"
  123. operator: "Equal"
  124. value: ""
  125. effect: "NoSchedule"
  126. - key: "nvidia.com/gpu"
  127. operator: "Equal"
  128. value: "present"
  129. effect: "NoSchedule"
  130. config:
  131. sources:
  132. pci:
  133. deviceClassWhitelist:
  134. - "02"
  135. - "03"
  136. deviceLabelFields:
  137. - vendor
  138. mps:
  139. # root specifies the location where files and folders for managing MPS will
  140. # be created. This includes a daemon-specific /dev/shm and pipe and log
  141. # directories.
  142. # Pipe directories will be created at {{ mps.root }}/{{ .ResourceName }}
  143. root: "/run/nvidia/mps"
  144. cdi:
  145. # nvidiaHookPath specifies the path to the nvidia-cdi-hook or nvidia-ctk executables on the host.
  146. # This is required to ensure that the generated CDI specification refers to the correct CDI hooks.
  147. nvidiaHookPath: null