values.yaml 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # Plugin configuration
  2. # Only one of "name" or "map" should ever be set for a given deployment.
  3. # Use "name" to point to an external ConfigMap with a list of configurations.
  4. # Use "map" to build an integrated ConfigMap from a set of configurations as
  5. # part of this helm chart. An example of setting "map" might be:
  6. # config:
  7. # map:
  8. # default: |-
  9. # version: v1
  10. # flags:
  11. # migStrategy: none
  12. # mig-single: |-
  13. # version: v1
  14. # flags:
  15. # migStrategy: single
  16. # mig-mixed: |-
  17. # version: v1
  18. # flags:
  19. # migStrategy: mixed
  20. config:
  21. # ConfigMap name if pulling from an external ConfigMap
  22. name: ""
  23. # Set of named configs to build an integrated ConfigMap from
  24. map: {}
  25. # Default config name within the ConfigMap
  26. default: ""
  27. # List of fallback strategies to attempt if no config is selected and no default is provided
  28. fallbackStrategies: ["named" , "single"]
  29. compatWithCPUManager: null
  30. migStrategy: null
  31. failOnInitError: null
  32. deviceListStrategy: null
  33. deviceIDStrategy: null
  34. nvidiaDriverRoot: null
  35. gdsEnabled: null
  36. mofedEnabled: null
  37. deviceDiscoveryStrategy: null
  38. nameOverride: ""
  39. fullnameOverride: ""
  40. namespaceOverride: ""
  41. selectorLabelsOverride: {}
  42. allowDefaultNamespace: false
  43. imagePullSecrets: []
  44. image:
  45. repository: nvcr.io/nvidia/k8s-device-plugin
  46. pullPolicy: IfNotPresent
  47. # Overrides the image tag whose default is the chart appVersion.
  48. tag: ""
  49. updateStrategy:
  50. type: RollingUpdate
  51. podAnnotations: {}
  52. podSecurityContext: {}
  53. securityContext: {}
  54. resources: {}
  55. nodeSelector: {}
  56. affinity:
  57. nodeAffinity:
  58. requiredDuringSchedulingIgnoredDuringExecution:
  59. nodeSelectorTerms:
  60. - matchExpressions:
  61. # On discrete-GPU based systems NFD adds the following label where 10de is the NVIDIA PCI vendor ID
  62. - key: feature.node.kubernetes.io/pci-10de.present
  63. operator: In
  64. values:
  65. - "true"
  66. - matchExpressions:
  67. # On some Tegra-based systems NFD detects the CPU vendor ID as NVIDIA
  68. - key: feature.node.kubernetes.io/cpu-model.vendor_id
  69. operator: In
  70. values:
  71. - "NVIDIA"
  72. - matchExpressions:
  73. # We allow a GPU deployment to be forced by setting the following label to "true"
  74. - key: "nvidia.com/gpu.present"
  75. operator: In
  76. values:
  77. - "true"
  78. tolerations:
  79. # This toleration is deprecated. Kept here for backward compatibility
  80. # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
  81. - key: CriticalAddonsOnly
  82. operator: Exists
  83. - key: nvidia.com/gpu
  84. operator: Exists
  85. effect: NoSchedule
  86. # Mark this pod as a critical add-on; when enabled, the critical add-on
  87. # scheduler reserves resources for critical add-on pods so that they can
  88. # be rescheduled after a failure.
  89. # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
  90. priorityClassName: "system-node-critical"
  91. runtimeClassName: null
  92. devicePlugin:
  93. enabled: true
  94. gfd:
  95. enabled: false
  96. nameOverride: gpu-feature-discovery
  97. namespaceOverride: ""
  98. noTimestamp: null
  99. sleepInterval: null
  100. securityContext:
  101. # privileged access is required for the gpu-feature-discovery to access the
  102. # vgpu info on a host.
  103. # TODO: This should be optional and detected automatically.
  104. privileged: true
  105. # Helm dependency
  106. nfd:
  107. nameOverride: node-feature-discovery
  108. enableNodeFeatureApi: false
  109. master:
  110. serviceAccount:
  111. name: node-feature-discovery
  112. create: true
  113. config:
  114. extraLabelNs: ["nvidia.com"]
  115. worker:
  116. tolerations:
  117. - key: "node-role.kubernetes.io/master"
  118. operator: "Equal"
  119. value: ""
  120. effect: "NoSchedule"
  121. - key: "nvidia.com/gpu"
  122. operator: "Equal"
  123. value: "present"
  124. effect: "NoSchedule"
  125. config:
  126. sources:
  127. pci:
  128. deviceClassWhitelist:
  129. - "02"
  130. - "03"
  131. deviceLabelFields:
  132. - vendor
  133. mps:
  134. # root specifies the location where files and folders for managing MPS will
  135. # be created. This includes a daemon-specific /dev/shm and pipe and log
  136. # directories.
  137. # Pipe directories will be created at {{ mps.root }}/{{ .ResourceName }}
  138. root: "/run/nvidia/mps"
  139. cdi:
  140. # nvidiaHookPath specifies the path to the nvidia-cdi-hook or nvidia-ctk executables on the host.
  141. # This is required to ensure that the generated CDI specification refers to the correct CDI hooks.
  142. nvidiaHookPath: null