values.yaml 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. # Plugin configuration
  2. # Only one of "name" or "map" should ever be set for a given deployment.
  3. # Use "name" to point to an external ConfigMap with a list of configurations.
  4. # Use "map" to build an integrated ConfigMap from a set of configurations as
  5. # part of this helm chart. An example of setting "map" might be:
  6. config:
  7. map:
  8. default: |-
  9. version: v1
  10. flags:
  11. migStrategy: none
  12. failOnInitError: true
  13. plugin:
  14. passDeviceSpecs: true
  15. sharing:
  16. timeSlicing:
  17. renameByDefault: false
  18. failRequestsGreaterThanOne: false
  19. resources:
  20. - name: nvidia.com/gpu
  21. replicas: 5
  22. default: ""
  23. fallbackStrategies: ["named" , "single"]
  24. compatWithCPUManager: null
  25. migStrategy: null
  26. failOnInitError: null
  27. deviceListStrategy: null
  28. deviceIDStrategy: null
  29. nvidiaDriverRoot: null
  30. gdsEnabled: null
  31. mofedEnabled: null
  32. deviceDiscoveryStrategy: null
  33. nameOverride: ""
  34. fullnameOverride: ""
  35. namespaceOverride: ""
  36. selectorLabelsOverride: {}
  37. allowDefaultNamespace: false
  38. imagePullSecrets: []
  39. image:
  40. repository: nvcr.io/nvidia/k8s-device-plugin
  41. pullPolicy: IfNotPresent
  42. # Overrides the image tag whose default is the chart appVersion.
  43. tag: ""
  44. updateStrategy:
  45. type: RollingUpdate
  46. podAnnotations: {}
  47. podSecurityContext: {}
  48. securityContext: {}
  49. resources: {}
  50. nodeSelector: {}
  51. affinity:
  52. nodeAffinity:
  53. requiredDuringSchedulingIgnoredDuringExecution:
  54. nodeSelectorTerms:
  55. - matchExpressions:
  56. # On discrete-GPU based systems NFD adds the following label where 10de is the NVIDIA PCI vendor ID
  57. - key: feature.node.kubernetes.io/pci-10de.present
  58. operator: In
  59. values:
  60. - "true"
  61. - matchExpressions:
  62. # On some Tegra-based systems NFD detects the CPU vendor ID as NVIDIA
  63. - key: feature.node.kubernetes.io/cpu-model.vendor_id
  64. operator: In
  65. values:
  66. - "NVIDIA"
  67. - matchExpressions:
  68. # We allow a GPU deployment to be forced by setting the following label to "true"
  69. - key: "nvidia.com/gpu.present"
  70. operator: In
  71. values:
  72. - "true"
  73. tolerations:
  74. # This toleration is deprecated. Kept here for backward compatibility
  75. # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
  76. - key: CriticalAddonsOnly
  77. operator: Exists
  78. - key: nvidia.com/gpu
  79. operator: Exists
  80. effect: NoSchedule
  81. # Mark this pod as a critical add-on; when enabled, the critical add-on
  82. # scheduler reserves resources for critical add-on pods so that they can
  83. # be rescheduled after a failure.
  84. # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
  85. priorityClassName: "system-node-critical"
  86. runtimeClassName: null
  87. devicePlugin:
  88. enabled: true
  89. gfd:
  90. enabled: false
  91. nameOverride: gpu-feature-discovery
  92. namespaceOverride: ""
  93. noTimestamp: null
  94. sleepInterval: null
  95. securityContext:
  96. # privileged access is required for the gpu-feature-discovery to access the
  97. # vgpu info on a host.
  98. # TODO: This should be optional and detected automatically.
  99. privileged: true
  100. # Helm dependency
  101. nfd:
  102. nameOverride: node-feature-discovery
  103. enableNodeFeatureApi: false
  104. master:
  105. serviceAccount:
  106. name: node-feature-discovery
  107. create: true
  108. config:
  109. extraLabelNs: ["nvidia.com"]
  110. worker:
  111. tolerations:
  112. - key: "node-role.kubernetes.io/master"
  113. operator: "Equal"
  114. value: ""
  115. effect: "NoSchedule"
  116. - key: "nvidia.com/gpu"
  117. operator: "Equal"
  118. value: "present"
  119. effect: "NoSchedule"
  120. config:
  121. sources:
  122. pci:
  123. deviceClassWhitelist:
  124. - "02"
  125. - "03"
  126. deviceLabelFields:
  127. - vendor
  128. mps:
  129. # root specifies the location where files and folders for managing MPS will
  130. # be created. This includes a daemon-specific /dev/shm and pipe and log
  131. # directories.
  132. # Pipe directories will be created at {{ mps.root }}/{{ .ResourceName }}
  133. root: "/run/nvidia/mps"
  134. cdi:
  135. # nvidiaHookPath specifies the path to the nvidia-cdi-hook or nvidia-ctk executables on the host.
  136. # This is required to ensure that the generated CDI specification refers to the correct CDI hooks.
  137. nvidiaHookPath: null