-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathconfiguration.yaml
More file actions
165 lines (162 loc) · 3.87 KB
/
configuration.yaml
File metadata and controls
165 lines (162 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: dispatcher
namespace: nthulab
spec:
template:
spec:
serviceAccountName: dispatcher-service-account
volumes:
- name: dispatcher-config-volume
configMap:
name: dispatcher-config
containers:
- name: dispatcher-container
image: ghcr.io/deeeelin/ssis-dispatcher:main # <PLACEHOLDER:Your dispatcher image ex. gcr.io/you/dispatcher:latest>
ports:
- containerPort: 8080
imagePullPolicy: Always
volumeMounts:
- name: dispatcher-config-volume
mountPath: /etc/dispatcher-config
readOnly: true
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: dispatcher-service-account
namespace: nthulab
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: dispatcher
namespace: nthulab
rules:
- apiGroups:
- '*'
resources:
- '*'
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: dispatcher-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: dispatcher
subjects:
- kind: ServiceAccount
name: dispatcher-service-account
namespace: nthulab
---
apiVersion: v1
kind: ConfigMap
metadata:
name: dispatcher-config
namespace: nthulab
data:
service-namespace: nthulab
inference-image: ghcr.io/llshang/tgi_custom:latest
gpu-resource-config: |
nvidia.com/gpu-1gb
nvidia.com/gpu-2gb
nvidia.com/gpu-3gb
nvidia.com/gpu-4gb
nvidia.com/gpu-5gb
nvidia.com/gpu-6gb
nvidia.com/gpu-7gb
nvidia.com/gpu-8gb
nvidia.com/gpu-9gb
nvidia.com/gpu-10gb
nvidia.com/gpu-11gb
nvidia.com/gpu-12gb
nvidia.com/gpu-13gb
nvidia.com/gpu-14gb
nvidia.com/gpu-15gb
nvidia.com/gpu-16gb
nvidia.com/gpu-17gb
nvidia.com/gpu-18gb
nvidia.com/gpu-19gb
nvidia.com/gpu-20gb
nvidia.com/gpu-21gb
nvidia.com/gpu-22gb
nvidia.com/gpu-23gb
nvidia.com/gpu-24gb
nvidia.com/gpu-25gb
nvidia.com/gpu-26gb
nvidia.com/gpu-27gb
nvidia.com/gpu-28gb
nvidia.com/gpu-29gb
nvidia.com/gpu-30gb
nvidia.com/gpu-31gb
nvidia.com/gpu-32gb
mps-active-thread-percentage-config: | # for mps mode to set the active thread percentage(compute SM resource)
nvidia.com/gpu-1gb: 3
nvidia.com/gpu-2gb: 6
nvidia.com/gpu-3gb: 9
nvidia.com/gpu-4gb: 12
nvidia.com/gpu-5gb: 15
nvidia.com/gpu-6gb: 18
nvidia.com/gpu-7gb: 21
nvidia.com/gpu-8gb: 25
nvidia.com/gpu-9gb: 28
nvidia.com/gpu-10gb: 31
nvidia.com/gpu-11gb: 34
nvidia.com/gpu-12gb: 37
nvidia.com/gpu-13gb: 40
nvidia.com/gpu-14gb: 43
nvidia.com/gpu-15gb: 46
nvidia.com/gpu-16gb: 50
nvidia.com/gpu-17gb: 53
nvidia.com/gpu-18gb: 56
nvidia.com/gpu-19gb: 59
nvidia.com/gpu-20gb: 62
nvidia.com/gpu-21gb: 65
nvidia.com/gpu-22gb: 68
nvidia.com/gpu-23gb: 71
nvidia.com/gpu-24gb: 75
nvidia.com/gpu-25gb: 78
nvidia.com/gpu-26gb: 81
nvidia.com/gpu-27gb: 84
nvidia.com/gpu-28gb: 87
nvidia.com/gpu-29gb: 90
nvidia.com/gpu-30gb: 93
nvidia.com/gpu-31gb: 96
nvidia.com/gpu-32gb: 100
---
#---
# Register a PV to require storage on disk
#apiVersion: v1
#kind: PersistentVolume
#metadata:
# name: knative-pv
# namespace: nthulab
#spec:
# capacity:
# storage: 100Gi
# accessModes:
# - ReadWriteMany
# persistentVolumeReclaimPolicy: Retain
# storageClassName: manual
# hostPath:
# path: /net/to/storage ex. /net/storage149/autofs/css89
#
#---
# Use a claim (similar to role binding) for a PV, then a pod uses this PV as a volume and calls it
#apiVersion: v1
#kind: PersistentVolumeClaim
#metadata:
# name: knative-pv-claim
# namespace: nthulab
#spec:
# accessModes:
# - ReadWriteMany
# resources:
# requests:
# storage: 100Gi
# storageClassName: manual