diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/clk_arb.c | 1788 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/clk_arb_linux.h | 120 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c | 641 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/clk_arb.h | 105 |
5 files changed, 1365 insertions, 1290 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 31483c5d..6d0fcad0 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile | |||
@@ -273,6 +273,7 @@ nvgpu-y += \ | |||
273 | clk/clk_domain.o \ | 273 | clk/clk_domain.o \ |
274 | clk/clk_prog.o \ | 274 | clk/clk_prog.o \ |
275 | clk/clk_vf_point.o \ | 275 | clk/clk_vf_point.o \ |
276 | common/linux/ioctl_clk_arb.o \ | ||
276 | common/linux/clk_arb.o \ | 277 | common/linux/clk_arb.o \ |
277 | clk/clk_freq_controller.o \ | 278 | clk/clk_freq_controller.o \ |
278 | perf/vfe_var.o \ | 279 | perf/vfe_var.o \ |
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c index 82c97891..7cb3752a 100644 --- a/drivers/gpu/nvgpu/common/linux/clk_arb.c +++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * This software is licensed under the terms of the GNU General Public | 4 | * This software is licensed under the terms of the GNU General Public |
5 | * License version 2, as published by the Free Software Foundation, and | 5 | * License version 2, as published by the Free Software Foundation, and |
@@ -39,224 +39,12 @@ | |||
39 | 39 | ||
40 | #include "gk20a/gk20a.h" | 40 | #include "gk20a/gk20a.h" |
41 | #include "clk/clk.h" | 41 | #include "clk/clk.h" |
42 | #include "clk_arb_linux.h" | ||
42 | #include "pstate/pstate.h" | 43 | #include "pstate/pstate.h" |
43 | #include "lpwr/lpwr.h" | 44 | #include "lpwr/lpwr.h" |
44 | #include "volt/volt.h" | 45 | #include "volt/volt.h" |
45 | 46 | ||
46 | #ifdef CONFIG_DEBUG_FS | 47 | int nvgpu_clk_notification_queue_alloc(struct gk20a *g, |
47 | #include "common/linux/os_linux.h" | ||
48 | #endif | ||
49 | |||
50 | #define MAX_F_POINTS 256 | ||
51 | #define DEFAULT_EVENT_NUMBER 32 | ||
52 | |||
53 | struct nvgpu_clk_dev; | ||
54 | struct nvgpu_clk_arb_target; | ||
55 | struct nvgpu_clk_notification_queue; | ||
56 | |||
57 | #ifdef CONFIG_DEBUG_FS | ||
58 | static int nvgpu_clk_arb_debugfs_init(struct gk20a *g); | ||
59 | #endif | ||
60 | |||
61 | static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | ||
62 | struct file *filp); | ||
63 | static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | ||
64 | struct file *filp); | ||
65 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); | ||
66 | static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, | ||
67 | size_t size, loff_t *off); | ||
68 | |||
69 | static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd, | ||
70 | unsigned long arg); | ||
71 | |||
72 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); | ||
73 | static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work); | ||
74 | static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb); | ||
75 | static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount); | ||
76 | static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount); | ||
77 | static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, | ||
78 | u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv, | ||
79 | u32 voltuv_sram); | ||
80 | static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, | ||
81 | u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, | ||
82 | u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram); | ||
83 | static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev, | ||
84 | struct nvgpu_clk_arb_target *target, | ||
85 | u32 alarm_mask); | ||
86 | static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm); | ||
87 | static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm); | ||
88 | |||
89 | static void nvgpu_clk_arb_queue_notification(struct gk20a *g, | ||
90 | struct nvgpu_clk_notification_queue *queue, | ||
91 | u32 alarm_mask); | ||
92 | static int nvgpu_clk_notification_queue_alloc(struct gk20a *g, | ||
93 | struct nvgpu_clk_notification_queue *queue, | ||
94 | size_t events_number); | ||
95 | |||
96 | static void nvgpu_clk_notification_queue_free(struct gk20a *g, | ||
97 | struct nvgpu_clk_notification_queue *queue); | ||
98 | |||
99 | #define VF_POINT_INVALID_PSTATE ~0U | ||
100 | #define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b))) | ||
101 | #define VF_POINT_GET_PSTATE(a) (((a)->pstates) ?\ | ||
102 | __fls((a)->pstates) :\ | ||
103 | VF_POINT_INVALID_PSTATE) | ||
104 | #define VF_POINT_COMMON_PSTATE(a, b) (((a)->pstates & (b)->pstates) ?\ | ||
105 | __fls((a)->pstates & (b)->pstates) :\ | ||
106 | VF_POINT_INVALID_PSTATE) | ||
107 | |||
108 | /* Local Alarms */ | ||
109 | #define EVENT(alarm) (0x1UL << NVGPU_GPU_EVENT_##alarm) | ||
110 | |||
111 | #define LOCAL_ALARM_MASK (EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE) | \ | ||
112 | EVENT(VF_UPDATE)) | ||
113 | |||
114 | #define _WRAPGTEQ(a, b) ((a-b) > 0) | ||
115 | |||
116 | struct nvgpu_clk_notification { | ||
117 | u32 notification; | ||
118 | u64 timestamp; | ||
119 | }; | ||
120 | |||
121 | struct nvgpu_clk_notification_queue { | ||
122 | u32 size; | ||
123 | nvgpu_atomic_t head; | ||
124 | nvgpu_atomic_t tail; | ||
125 | struct nvgpu_clk_notification *notifications; | ||
126 | }; | ||
127 | |||
128 | struct nvgpu_clk_vf_point { | ||
129 | u16 pstates; | ||
130 | union { | ||
131 | struct { | ||
132 | u16 gpc_mhz; | ||
133 | u16 sys_mhz; | ||
134 | u16 xbar_mhz; | ||
135 | }; | ||
136 | u16 mem_mhz; | ||
137 | }; | ||
138 | u32 uvolt; | ||
139 | u32 uvolt_sram; | ||
140 | }; | ||
141 | |||
142 | struct nvgpu_clk_vf_table { | ||
143 | u32 mclk_num_points; | ||
144 | struct nvgpu_clk_vf_point *mclk_points; | ||
145 | u32 gpc2clk_num_points; | ||
146 | struct nvgpu_clk_vf_point *gpc2clk_points; | ||
147 | }; | ||
148 | #ifdef CONFIG_DEBUG_FS | ||
149 | struct nvgpu_clk_arb_debug { | ||
150 | s64 switch_max; | ||
151 | s64 switch_min; | ||
152 | u64 switch_num; | ||
153 | s64 switch_avg; | ||
154 | s64 switch_std; | ||
155 | }; | ||
156 | #endif | ||
157 | |||
158 | struct nvgpu_clk_arb_target { | ||
159 | u16 mclk; | ||
160 | u16 gpc2clk; | ||
161 | u32 pstate; | ||
162 | }; | ||
163 | |||
164 | struct nvgpu_clk_arb { | ||
165 | struct nvgpu_spinlock sessions_lock; | ||
166 | struct nvgpu_spinlock users_lock; | ||
167 | |||
168 | struct nvgpu_mutex pstate_lock; | ||
169 | struct list_head users; | ||
170 | struct list_head sessions; | ||
171 | struct llist_head requests; | ||
172 | |||
173 | struct gk20a *g; | ||
174 | int status; | ||
175 | |||
176 | struct nvgpu_clk_arb_target actual_pool[2]; | ||
177 | struct nvgpu_clk_arb_target *actual; | ||
178 | |||
179 | u16 gpc2clk_default_mhz; | ||
180 | u16 mclk_default_mhz; | ||
181 | u32 voltuv_actual; | ||
182 | |||
183 | u16 gpc2clk_min, gpc2clk_max; | ||
184 | u16 mclk_min, mclk_max; | ||
185 | |||
186 | struct work_struct update_fn_work; | ||
187 | struct workqueue_struct *update_work_queue; | ||
188 | struct work_struct vf_table_fn_work; | ||
189 | struct workqueue_struct *vf_table_work_queue; | ||
190 | |||
191 | struct nvgpu_cond request_wq; | ||
192 | |||
193 | struct nvgpu_clk_vf_table *current_vf_table; | ||
194 | struct nvgpu_clk_vf_table vf_table_pool[2]; | ||
195 | u32 vf_table_index; | ||
196 | |||
197 | u16 *mclk_f_points; | ||
198 | nvgpu_atomic_t req_nr; | ||
199 | |||
200 | u32 mclk_f_numpoints; | ||
201 | u16 *gpc2clk_f_points; | ||
202 | u32 gpc2clk_f_numpoints; | ||
203 | |||
204 | nvgpu_atomic64_t alarm_mask; | ||
205 | struct nvgpu_clk_notification_queue notification_queue; | ||
206 | |||
207 | #ifdef CONFIG_DEBUG_FS | ||
208 | struct nvgpu_clk_arb_debug debug_pool[2]; | ||
209 | struct nvgpu_clk_arb_debug *debug; | ||
210 | bool debugfs_set; | ||
211 | #endif | ||
212 | }; | ||
213 | |||
214 | struct nvgpu_clk_dev { | ||
215 | struct nvgpu_clk_session *session; | ||
216 | union { | ||
217 | struct list_head link; | ||
218 | struct llist_node node; | ||
219 | }; | ||
220 | struct nvgpu_cond readout_wq; | ||
221 | nvgpu_atomic_t poll_mask; | ||
222 | u16 gpc2clk_target_mhz; | ||
223 | u16 mclk_target_mhz; | ||
224 | u32 alarms_reported; | ||
225 | nvgpu_atomic_t enabled_mask; | ||
226 | struct nvgpu_clk_notification_queue queue; | ||
227 | u32 arb_queue_head; | ||
228 | struct nvgpu_ref refcount; | ||
229 | }; | ||
230 | |||
231 | struct nvgpu_clk_session { | ||
232 | bool zombie; | ||
233 | struct gk20a *g; | ||
234 | struct nvgpu_ref refcount; | ||
235 | struct list_head link; | ||
236 | struct llist_head targets; | ||
237 | |||
238 | struct nvgpu_clk_arb_target target_pool[2]; | ||
239 | struct nvgpu_clk_arb_target *target; | ||
240 | }; | ||
241 | |||
242 | static const struct file_operations completion_dev_ops = { | ||
243 | .owner = THIS_MODULE, | ||
244 | .release = nvgpu_clk_arb_release_completion_dev, | ||
245 | .poll = nvgpu_clk_arb_poll_dev, | ||
246 | }; | ||
247 | |||
248 | static const struct file_operations event_dev_ops = { | ||
249 | .owner = THIS_MODULE, | ||
250 | .release = nvgpu_clk_arb_release_event_dev, | ||
251 | .poll = nvgpu_clk_arb_poll_dev, | ||
252 | .read = nvgpu_clk_arb_read_event_dev, | ||
253 | #ifdef CONFIG_COMPAT | ||
254 | .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
255 | #endif | ||
256 | .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
257 | }; | ||
258 | |||
259 | static int nvgpu_clk_notification_queue_alloc(struct gk20a *g, | ||
260 | struct nvgpu_clk_notification_queue *queue, | 48 | struct nvgpu_clk_notification_queue *queue, |
261 | size_t events_number) { | 49 | size_t events_number) { |
262 | queue->notifications = nvgpu_kcalloc(g, events_number, | 50 | queue->notifications = nvgpu_kcalloc(g, events_number, |
@@ -271,7 +59,7 @@ static int nvgpu_clk_notification_queue_alloc(struct gk20a *g, | |||
271 | return 0; | 59 | return 0; |
272 | } | 60 | } |
273 | 61 | ||
274 | static void nvgpu_clk_notification_queue_free(struct gk20a *g, | 62 | void nvgpu_clk_notification_queue_free(struct gk20a *g, |
275 | struct nvgpu_clk_notification_queue *queue) { | 63 | struct nvgpu_clk_notification_queue *queue) { |
276 | nvgpu_kfree(g, queue->notifications); | 64 | nvgpu_kfree(g, queue->notifications); |
277 | queue->size = 0; | 65 | queue->size = 0; |
@@ -279,185 +67,20 @@ static void nvgpu_clk_notification_queue_free(struct gk20a *g, | |||
279 | nvgpu_atomic_set(&queue->tail, 0); | 67 | nvgpu_atomic_set(&queue->tail, 0); |
280 | } | 68 | } |
281 | 69 | ||
282 | int nvgpu_clk_arb_init_arbiter(struct gk20a *g) | 70 | static void nvgpu_clk_arb_queue_notification(struct gk20a *g, |
283 | { | 71 | struct nvgpu_clk_notification_queue *queue, |
284 | struct nvgpu_clk_arb *arb; | 72 | u32 alarm_mask) { |
285 | u16 default_mhz; | ||
286 | int err; | ||
287 | int index; | ||
288 | struct nvgpu_clk_vf_table *table; | ||
289 | |||
290 | gk20a_dbg_fn(""); | ||
291 | |||
292 | if (!g->ops.clk_arb.get_arbiter_clk_domains) | ||
293 | return 0; | ||
294 | |||
295 | arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); | ||
296 | if (!arb) | ||
297 | return -ENOMEM; | ||
298 | |||
299 | err = nvgpu_mutex_init(&arb->pstate_lock); | ||
300 | if (err) | ||
301 | goto mutex_fail; | ||
302 | nvgpu_spinlock_init(&arb->sessions_lock); | ||
303 | nvgpu_spinlock_init(&arb->users_lock); | ||
304 | |||
305 | arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); | ||
306 | if (!arb->mclk_f_points) { | ||
307 | err = -ENOMEM; | ||
308 | goto init_fail; | ||
309 | } | ||
310 | |||
311 | arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); | ||
312 | if (!arb->gpc2clk_f_points) { | ||
313 | err = -ENOMEM; | ||
314 | goto init_fail; | ||
315 | } | ||
316 | |||
317 | for (index = 0; index < 2; index++) { | ||
318 | table = &arb->vf_table_pool[index]; | ||
319 | table->gpc2clk_num_points = MAX_F_POINTS; | ||
320 | table->mclk_num_points = MAX_F_POINTS; | ||
321 | |||
322 | table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS, | ||
323 | sizeof(struct nvgpu_clk_vf_point)); | ||
324 | if (!table->gpc2clk_points) { | ||
325 | err = -ENOMEM; | ||
326 | goto init_fail; | ||
327 | } | ||
328 | |||
329 | |||
330 | table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS, | ||
331 | sizeof(struct nvgpu_clk_vf_point)); | ||
332 | if (!table->mclk_points) { | ||
333 | err = -ENOMEM; | ||
334 | goto init_fail; | ||
335 | } | ||
336 | } | ||
337 | |||
338 | g->clk_arb = arb; | ||
339 | arb->g = g; | ||
340 | |||
341 | err = g->ops.clk_arb.get_arbiter_clk_default(g, | ||
342 | CTRL_CLK_DOMAIN_MCLK, &default_mhz); | ||
343 | if (err < 0) { | ||
344 | err = -EINVAL; | ||
345 | goto init_fail; | ||
346 | } | ||
347 | |||
348 | arb->mclk_default_mhz = default_mhz; | ||
349 | |||
350 | err = g->ops.clk_arb.get_arbiter_clk_default(g, | ||
351 | CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz); | ||
352 | if (err < 0) { | ||
353 | err = -EINVAL; | ||
354 | goto init_fail; | ||
355 | } | ||
356 | |||
357 | arb->gpc2clk_default_mhz = default_mhz; | ||
358 | |||
359 | arb->actual = &arb->actual_pool[0]; | ||
360 | |||
361 | nvgpu_atomic_set(&arb->req_nr, 0); | ||
362 | |||
363 | nvgpu_atomic64_set(&arb->alarm_mask, 0); | ||
364 | err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, | ||
365 | DEFAULT_EVENT_NUMBER); | ||
366 | if (err < 0) | ||
367 | goto init_fail; | ||
368 | |||
369 | INIT_LIST_HEAD_RCU(&arb->users); | ||
370 | INIT_LIST_HEAD_RCU(&arb->sessions); | ||
371 | init_llist_head(&arb->requests); | ||
372 | |||
373 | nvgpu_cond_init(&arb->request_wq); | ||
374 | arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1, | ||
375 | "vf_table_update"); | ||
376 | arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1, | ||
377 | "arbiter_update"); | ||
378 | |||
379 | |||
380 | INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb); | ||
381 | |||
382 | INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); | ||
383 | |||
384 | #ifdef CONFIG_DEBUG_FS | ||
385 | arb->debug = &arb->debug_pool[0]; | ||
386 | |||
387 | if (!arb->debugfs_set) { | ||
388 | if (nvgpu_clk_arb_debugfs_init(g)) | ||
389 | arb->debugfs_set = true; | ||
390 | } | ||
391 | #endif | ||
392 | err = clk_vf_point_cache(g); | ||
393 | if (err < 0) | ||
394 | goto init_fail; | ||
395 | |||
396 | err = nvgpu_clk_arb_update_vf_table(arb); | ||
397 | if (err < 0) | ||
398 | goto init_fail; | ||
399 | do { | ||
400 | /* Check that first run is completed */ | ||
401 | nvgpu_smp_mb(); | ||
402 | NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, | ||
403 | nvgpu_atomic_read(&arb->req_nr), 0); | ||
404 | } while (!nvgpu_atomic_read(&arb->req_nr)); | ||
405 | |||
406 | |||
407 | return arb->status; | ||
408 | |||
409 | init_fail: | ||
410 | nvgpu_kfree(g, arb->gpc2clk_f_points); | ||
411 | nvgpu_kfree(g, arb->mclk_f_points); | ||
412 | |||
413 | for (index = 0; index < 2; index++) { | ||
414 | nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); | ||
415 | nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); | ||
416 | } | ||
417 | |||
418 | nvgpu_mutex_destroy(&arb->pstate_lock); | ||
419 | |||
420 | mutex_fail: | ||
421 | nvgpu_kfree(g, arb); | ||
422 | |||
423 | return err; | ||
424 | } | ||
425 | |||
426 | void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g) | ||
427 | { | ||
428 | nvgpu_clk_arb_schedule_alarm(g, | ||
429 | (0x1UL << NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD)); | ||
430 | } | ||
431 | |||
432 | void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm) | ||
433 | { | ||
434 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
435 | |||
436 | nvgpu_clk_arb_set_global_alarm(g, alarm); | ||
437 | if (arb->update_work_queue) | ||
438 | queue_work(arb->update_work_queue, &arb->update_fn_work); | ||
439 | } | ||
440 | |||
441 | static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm) | ||
442 | { | ||
443 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
444 | 73 | ||
445 | u64 current_mask; | 74 | u32 queue_index; |
446 | u32 refcnt; | 75 | u64 timestamp; |
447 | u32 alarm_mask; | ||
448 | u64 new_mask; | ||
449 | 76 | ||
450 | do { | 77 | queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size; |
451 | current_mask = nvgpu_atomic64_read(&arb->alarm_mask); | 78 | /* get current timestamp */ |
452 | /* atomic operations are strong so they do not need masks */ | 79 | timestamp = (u64) sched_clock(); |
453 | 80 | ||
454 | refcnt = ((u32) (current_mask >> 32)) + 1; | 81 | queue->notifications[queue_index].timestamp = timestamp; |
455 | alarm_mask = (u32) (current_mask & ~alarm); | 82 | queue->notifications[queue_index].notification = alarm_mask; |
456 | new_mask = ((u64) refcnt << 32) | alarm_mask; | ||
457 | 83 | ||
458 | } while (unlikely(current_mask != | ||
459 | (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask, | ||
460 | current_mask, new_mask))); | ||
461 | } | 84 | } |
462 | 85 | ||
463 | static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) | 86 | static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) |
@@ -482,231 +105,8 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) | |||
482 | current_mask, new_mask))); | 105 | current_mask, new_mask))); |
483 | 106 | ||
484 | nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm); | 107 | nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm); |
485 | |||
486 | } | ||
487 | |||
488 | void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) | ||
489 | { | ||
490 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
491 | int index; | ||
492 | |||
493 | if (arb) { | ||
494 | cancel_work_sync(&arb->vf_table_fn_work); | ||
495 | destroy_workqueue(arb->vf_table_work_queue); | ||
496 | arb->vf_table_work_queue = NULL; | ||
497 | |||
498 | cancel_work_sync(&arb->update_fn_work); | ||
499 | destroy_workqueue(arb->update_work_queue); | ||
500 | arb->update_work_queue = NULL; | ||
501 | |||
502 | nvgpu_kfree(g, arb->gpc2clk_f_points); | ||
503 | nvgpu_kfree(g, arb->mclk_f_points); | ||
504 | |||
505 | for (index = 0; index < 2; index++) { | ||
506 | nvgpu_kfree(g, | ||
507 | arb->vf_table_pool[index].gpc2clk_points); | ||
508 | nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); | ||
509 | } | ||
510 | nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); | ||
511 | nvgpu_kfree(g, g->clk_arb); | ||
512 | g->clk_arb = NULL; | ||
513 | } | ||
514 | } | ||
515 | |||
516 | static int nvgpu_clk_arb_install_fd(struct gk20a *g, | ||
517 | struct nvgpu_clk_session *session, | ||
518 | const struct file_operations *fops, | ||
519 | struct nvgpu_clk_dev **_dev) | ||
520 | { | ||
521 | struct file *file; | ||
522 | int fd; | ||
523 | int err; | ||
524 | int status; | ||
525 | char name[64]; | ||
526 | struct nvgpu_clk_dev *dev; | ||
527 | |||
528 | gk20a_dbg_fn(""); | ||
529 | |||
530 | dev = nvgpu_kzalloc(g, sizeof(*dev)); | ||
531 | if (!dev) | ||
532 | return -ENOMEM; | ||
533 | |||
534 | status = nvgpu_clk_notification_queue_alloc(g, &dev->queue, | ||
535 | DEFAULT_EVENT_NUMBER); | ||
536 | if (status < 0) { | ||
537 | err = status; | ||
538 | goto fail; | ||
539 | } | ||
540 | |||
541 | fd = get_unused_fd_flags(O_RDWR); | ||
542 | if (fd < 0) { | ||
543 | err = fd; | ||
544 | goto fail; | ||
545 | } | ||
546 | |||
547 | snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd); | ||
548 | file = anon_inode_getfile(name, fops, dev, O_RDWR); | ||
549 | if (IS_ERR(file)) { | ||
550 | err = PTR_ERR(file); | ||
551 | goto fail_fd; | ||
552 | } | ||
553 | |||
554 | fd_install(fd, file); | ||
555 | |||
556 | nvgpu_cond_init(&dev->readout_wq); | ||
557 | |||
558 | nvgpu_atomic_set(&dev->poll_mask, 0); | ||
559 | |||
560 | dev->session = session; | ||
561 | nvgpu_ref_init(&dev->refcount); | ||
562 | |||
563 | nvgpu_ref_get(&session->refcount); | ||
564 | |||
565 | *_dev = dev; | ||
566 | |||
567 | return fd; | ||
568 | |||
569 | fail_fd: | ||
570 | put_unused_fd(fd); | ||
571 | fail: | ||
572 | nvgpu_kfree(g, dev); | ||
573 | |||
574 | return err; | ||
575 | } | 108 | } |
576 | 109 | ||
577 | int nvgpu_clk_arb_init_session(struct gk20a *g, | ||
578 | struct nvgpu_clk_session **_session) | ||
579 | { | ||
580 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
581 | struct nvgpu_clk_session *session = *(_session); | ||
582 | |||
583 | gk20a_dbg_fn(""); | ||
584 | |||
585 | if (!g->ops.clk_arb.get_arbiter_clk_domains) | ||
586 | return 0; | ||
587 | |||
588 | session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session)); | ||
589 | if (!session) | ||
590 | return -ENOMEM; | ||
591 | session->g = g; | ||
592 | |||
593 | nvgpu_ref_init(&session->refcount); | ||
594 | |||
595 | session->zombie = false; | ||
596 | session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8; | ||
597 | /* make sure that the initialization of the pool is visible | ||
598 | * before the update | ||
599 | */ | ||
600 | nvgpu_smp_wmb(); | ||
601 | session->target = &session->target_pool[0]; | ||
602 | |||
603 | init_llist_head(&session->targets); | ||
604 | |||
605 | nvgpu_spinlock_acquire(&arb->sessions_lock); | ||
606 | list_add_tail_rcu(&session->link, &arb->sessions); | ||
607 | nvgpu_spinlock_release(&arb->sessions_lock); | ||
608 | |||
609 | *_session = session; | ||
610 | |||
611 | return 0; | ||
612 | } | ||
613 | |||
614 | static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount) | ||
615 | { | ||
616 | struct nvgpu_clk_dev *dev = container_of(refcount, | ||
617 | struct nvgpu_clk_dev, refcount); | ||
618 | struct nvgpu_clk_session *session = dev->session; | ||
619 | |||
620 | nvgpu_kfree(session->g, dev); | ||
621 | } | ||
622 | |||
623 | static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount) | ||
624 | { | ||
625 | struct nvgpu_clk_session *session = container_of(refcount, | ||
626 | struct nvgpu_clk_session, refcount); | ||
627 | struct nvgpu_clk_arb *arb = session->g->clk_arb; | ||
628 | struct gk20a *g = session->g; | ||
629 | struct nvgpu_clk_dev *dev, *tmp; | ||
630 | struct llist_node *head; | ||
631 | |||
632 | gk20a_dbg_fn(""); | ||
633 | |||
634 | if (arb) { | ||
635 | nvgpu_spinlock_acquire(&arb->sessions_lock); | ||
636 | list_del_rcu(&session->link); | ||
637 | nvgpu_spinlock_release(&arb->sessions_lock); | ||
638 | } | ||
639 | |||
640 | head = llist_del_all(&session->targets); | ||
641 | llist_for_each_entry_safe(dev, tmp, head, node) { | ||
642 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
643 | } | ||
644 | synchronize_rcu(); | ||
645 | nvgpu_kfree(g, session); | ||
646 | } | ||
647 | |||
648 | void nvgpu_clk_arb_release_session(struct gk20a *g, | ||
649 | struct nvgpu_clk_session *session) | ||
650 | { | ||
651 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
652 | |||
653 | gk20a_dbg_fn(""); | ||
654 | |||
655 | session->zombie = true; | ||
656 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
657 | if (arb && arb->update_work_queue) | ||
658 | queue_work(arb->update_work_queue, &arb->update_fn_work); | ||
659 | } | ||
660 | |||
661 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | ||
662 | struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask) | ||
663 | { | ||
664 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
665 | struct nvgpu_clk_dev *dev; | ||
666 | int fd; | ||
667 | |||
668 | gk20a_dbg_fn(""); | ||
669 | |||
670 | fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); | ||
671 | if (fd < 0) | ||
672 | return fd; | ||
673 | |||
674 | /* TODO: alarm mask needs to be set to default value to prevent | ||
675 | * failures of legacy tests. This will be removed when sanity is | ||
676 | * updated | ||
677 | */ | ||
678 | if (alarm_mask) | ||
679 | nvgpu_atomic_set(&dev->enabled_mask, alarm_mask); | ||
680 | else | ||
681 | nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE)); | ||
682 | |||
683 | dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head); | ||
684 | |||
685 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
686 | list_add_tail_rcu(&dev->link, &arb->users); | ||
687 | nvgpu_spinlock_release(&arb->users_lock); | ||
688 | |||
689 | *event_fd = fd; | ||
690 | |||
691 | return 0; | ||
692 | } | ||
693 | |||
694 | int nvgpu_clk_arb_install_request_fd(struct gk20a *g, | ||
695 | struct nvgpu_clk_session *session, int *request_fd) | ||
696 | { | ||
697 | struct nvgpu_clk_dev *dev; | ||
698 | int fd; | ||
699 | |||
700 | gk20a_dbg_fn(""); | ||
701 | |||
702 | fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); | ||
703 | if (fd < 0) | ||
704 | return fd; | ||
705 | |||
706 | *request_fd = fd; | ||
707 | |||
708 | return 0; | ||
709 | } | ||
710 | 110 | ||
711 | static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) | 111 | static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) |
712 | { | 112 | { |
@@ -1014,13 +414,6 @@ exit_vf_table: | |||
1014 | return status; | 414 | return status; |
1015 | } | 415 | } |
1016 | 416 | ||
1017 | void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g) | ||
1018 | { | ||
1019 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
1020 | |||
1021 | if (arb->vf_table_work_queue) | ||
1022 | queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work); | ||
1023 | } | ||
1024 | 417 | ||
1025 | static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work) | 418 | static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work) |
1026 | { | 419 | { |
@@ -1044,6 +437,305 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work) | |||
1044 | nvgpu_clk_arb_update_vf_table(arb); | 437 | nvgpu_clk_arb_update_vf_table(arb); |
1045 | } | 438 | } |
1046 | 439 | ||
440 | static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, | ||
441 | u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, | ||
442 | u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram) | ||
443 | { | ||
444 | u16 gpc2clk_target, mclk_target; | ||
445 | u32 gpc2clk_voltuv, gpc2clk_voltuv_sram; | ||
446 | u32 mclk_voltuv, mclk_voltuv_sram; | ||
447 | u32 pstate = VF_POINT_INVALID_PSTATE; | ||
448 | struct nvgpu_clk_vf_table *table; | ||
449 | u32 index, index_mclk; | ||
450 | struct nvgpu_clk_vf_point *mclk_vf = NULL; | ||
451 | |||
452 | do { | ||
453 | gpc2clk_target = *gpc2clk; | ||
454 | mclk_target = *mclk; | ||
455 | gpc2clk_voltuv = 0; | ||
456 | gpc2clk_voltuv_sram = 0; | ||
457 | mclk_voltuv = 0; | ||
458 | mclk_voltuv_sram = 0; | ||
459 | |||
460 | table = NV_ACCESS_ONCE(arb->current_vf_table); | ||
461 | /* pointer to table can be updated by callback */ | ||
462 | nvgpu_smp_rmb(); | ||
463 | |||
464 | if (!table) | ||
465 | continue; | ||
466 | if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) { | ||
467 | nvgpu_err(arb->g, "found empty table"); | ||
468 | goto find_exit; | ||
469 | } | ||
470 | /* First we check MCLK to find out which PSTATE we are | ||
471 | * are requesting, and from there try to find the minimum | ||
472 | * GPC2CLK on the same PSTATE that satisfies the request. | ||
473 | * If no GPC2CLK can be found, then we need to up the PSTATE | ||
474 | */ | ||
475 | |||
476 | recalculate_vf_point: | ||
477 | for (index = 0; index < table->mclk_num_points; index++) { | ||
478 | if (table->mclk_points[index].mem_mhz >= mclk_target) { | ||
479 | mclk_vf = &table->mclk_points[index]; | ||
480 | break; | ||
481 | } | ||
482 | } | ||
483 | if (index == table->mclk_num_points) { | ||
484 | mclk_vf = &table->mclk_points[index-1]; | ||
485 | index = table->mclk_num_points - 1; | ||
486 | } | ||
487 | index_mclk = index; | ||
488 | |||
489 | /* round up the freq requests */ | ||
490 | for (index = 0; index < table->gpc2clk_num_points; index++) { | ||
491 | pstate = VF_POINT_COMMON_PSTATE( | ||
492 | &table->gpc2clk_points[index], mclk_vf); | ||
493 | |||
494 | if ((table->gpc2clk_points[index].gpc_mhz >= | ||
495 | gpc2clk_target) && | ||
496 | (pstate != VF_POINT_INVALID_PSTATE)) { | ||
497 | gpc2clk_target = | ||
498 | table->gpc2clk_points[index].gpc_mhz; | ||
499 | *sys2clk = | ||
500 | table->gpc2clk_points[index].sys_mhz; | ||
501 | *xbar2clk = | ||
502 | table->gpc2clk_points[index].xbar_mhz; | ||
503 | |||
504 | gpc2clk_voltuv = | ||
505 | table->gpc2clk_points[index].uvolt; | ||
506 | gpc2clk_voltuv_sram = | ||
507 | table->gpc2clk_points[index].uvolt_sram; | ||
508 | break; | ||
509 | } | ||
510 | } | ||
511 | |||
512 | if (index == table->gpc2clk_num_points) { | ||
513 | pstate = VF_POINT_COMMON_PSTATE( | ||
514 | &table->gpc2clk_points[index-1], mclk_vf); | ||
515 | if (pstate != VF_POINT_INVALID_PSTATE) { | ||
516 | gpc2clk_target = | ||
517 | table->gpc2clk_points[index-1].gpc_mhz; | ||
518 | *sys2clk = | ||
519 | table->gpc2clk_points[index-1].sys_mhz; | ||
520 | *xbar2clk = | ||
521 | table->gpc2clk_points[index-1].xbar_mhz; | ||
522 | |||
523 | gpc2clk_voltuv = | ||
524 | table->gpc2clk_points[index-1].uvolt; | ||
525 | gpc2clk_voltuv_sram = | ||
526 | table->gpc2clk_points[index-1]. | ||
527 | uvolt_sram; | ||
528 | } else if (index_mclk >= table->mclk_num_points - 1) { | ||
529 | /* There is no available combination of MCLK | ||
530 | * and GPC2CLK, we need to fail this | ||
531 | */ | ||
532 | gpc2clk_target = 0; | ||
533 | mclk_target = 0; | ||
534 | pstate = VF_POINT_INVALID_PSTATE; | ||
535 | goto find_exit; | ||
536 | } else { | ||
537 | /* recalculate with higher PSTATE */ | ||
538 | gpc2clk_target = *gpc2clk; | ||
539 | mclk_target = table->mclk_points[index_mclk+1]. | ||
540 | mem_mhz; | ||
541 | goto recalculate_vf_point; | ||
542 | } | ||
543 | } | ||
544 | |||
545 | mclk_target = mclk_vf->mem_mhz; | ||
546 | mclk_voltuv = mclk_vf->uvolt; | ||
547 | mclk_voltuv_sram = mclk_vf->uvolt_sram; | ||
548 | |||
549 | } while (!table || | ||
550 | (NV_ACCESS_ONCE(arb->current_vf_table) != table)); | ||
551 | |||
552 | find_exit: | ||
553 | *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; | ||
554 | *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? | ||
555 | gpc2clk_voltuv_sram : mclk_voltuv_sram; | ||
556 | /* noise unaware vmin */ | ||
557 | *nuvmin = mclk_voltuv; | ||
558 | *nuvmin_sram = mclk_voltuv_sram; | ||
559 | *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk; | ||
560 | *mclk = mclk_target; | ||
561 | return pstate; | ||
562 | } | ||
563 | |||
564 | static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, | ||
565 | u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv, | ||
566 | u32 voltuv_sram) | ||
567 | { | ||
568 | struct set_fll_clk fllclk; | ||
569 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
570 | int status; | ||
571 | |||
572 | fllclk.gpc2clkmhz = gpc2clk_target; | ||
573 | fllclk.sys2clkmhz = sys2clk_target; | ||
574 | fllclk.xbar2clkmhz = xbar2clk_target; | ||
575 | |||
576 | fllclk.voltuv = voltuv; | ||
577 | |||
578 | /* if voltage ascends we do: | ||
579 | * (1) FLL change | ||
580 | * (2) Voltage change | ||
581 | * (3) MCLK change | ||
582 | * If it goes down | ||
583 | * (1) MCLK change | ||
584 | * (2) Voltage change | ||
585 | * (3) FLL change | ||
586 | */ | ||
587 | |||
588 | /* descending */ | ||
589 | if (voltuv < arb->voltuv_actual) { | ||
590 | status = g->ops.clk.mclk_change(g, mclk_target); | ||
591 | if (status < 0) | ||
592 | return status; | ||
593 | |||
594 | status = volt_set_voltage(g, voltuv, voltuv_sram); | ||
595 | if (status < 0) | ||
596 | return status; | ||
597 | |||
598 | status = clk_set_fll_clks(g, &fllclk); | ||
599 | if (status < 0) | ||
600 | return status; | ||
601 | } else { | ||
602 | status = clk_set_fll_clks(g, &fllclk); | ||
603 | if (status < 0) | ||
604 | return status; | ||
605 | |||
606 | status = volt_set_voltage(g, voltuv, voltuv_sram); | ||
607 | if (status < 0) | ||
608 | return status; | ||
609 | |||
610 | status = g->ops.clk.mclk_change(g, mclk_target); | ||
611 | if (status < 0) | ||
612 | return status; | ||
613 | } | ||
614 | |||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev, | ||
619 | struct nvgpu_clk_arb_target *target, | ||
620 | u32 alarm) { | ||
621 | |||
622 | struct nvgpu_clk_session *session = dev->session; | ||
623 | struct nvgpu_clk_arb *arb = session->g->clk_arb; | ||
624 | struct nvgpu_clk_notification *notification; | ||
625 | |||
626 | u32 queue_alarm_mask = 0; | ||
627 | u32 enabled_mask = 0; | ||
628 | u32 new_alarms_reported = 0; | ||
629 | u32 poll_mask = 0; | ||
630 | u32 tail, head; | ||
631 | u32 queue_index; | ||
632 | size_t size; | ||
633 | int index; | ||
634 | |||
635 | enabled_mask = nvgpu_atomic_read(&dev->enabled_mask); | ||
636 | size = arb->notification_queue.size; | ||
637 | |||
638 | /* queue global arbiter notifications in buffer */ | ||
639 | do { | ||
640 | tail = nvgpu_atomic_read(&arb->notification_queue.tail); | ||
641 | /* copy items to the queue */ | ||
642 | queue_index = nvgpu_atomic_read(&dev->queue.tail); | ||
643 | head = dev->arb_queue_head; | ||
644 | head = (tail - head) < arb->notification_queue.size ? | ||
645 | head : tail - arb->notification_queue.size; | ||
646 | |||
647 | for (index = head; _WRAPGTEQ(tail, index); index++) { | ||
648 | u32 alarm_detected; | ||
649 | |||
650 | notification = &arb->notification_queue. | ||
651 | notifications[(index+1) % size]; | ||
652 | alarm_detected = | ||
653 | NV_ACCESS_ONCE(notification->notification); | ||
654 | |||
655 | if (!(enabled_mask & alarm_detected)) | ||
656 | continue; | ||
657 | |||
658 | queue_index++; | ||
659 | dev->queue.notifications[ | ||
660 | queue_index % dev->queue.size].timestamp = | ||
661 | NV_ACCESS_ONCE(notification->timestamp); | ||
662 | |||
663 | dev->queue.notifications[ | ||
664 | queue_index % dev->queue.size].notification = | ||
665 | alarm_detected; | ||
666 | |||
667 | queue_alarm_mask |= alarm_detected; | ||
668 | } | ||
669 | } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) != | ||
670 | (int)tail)); | ||
671 | |||
672 | nvgpu_atomic_set(&dev->queue.tail, queue_index); | ||
673 | /* update the last notification we processed from global queue */ | ||
674 | |||
675 | dev->arb_queue_head = tail; | ||
676 | |||
677 | /* Check if current session targets are met */ | ||
678 | if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) { | ||
679 | if ((target->gpc2clk < session->target->gpc2clk) | ||
680 | || (target->mclk < session->target->mclk)) { | ||
681 | |||
682 | poll_mask |= (POLLIN | POLLPRI); | ||
683 | nvgpu_clk_arb_queue_notification(arb->g, &dev->queue, | ||
684 | EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)); | ||
685 | } | ||
686 | } | ||
687 | |||
688 | /* Check if there is a new VF update */ | ||
689 | if (queue_alarm_mask & EVENT(VF_UPDATE)) | ||
690 | poll_mask |= (POLLIN | POLLRDNORM); | ||
691 | |||
692 | /* Notify sticky alarms that were not reported on previous run*/ | ||
693 | new_alarms_reported = (queue_alarm_mask | | ||
694 | (alarm & ~dev->alarms_reported & queue_alarm_mask)); | ||
695 | |||
696 | if (new_alarms_reported & ~LOCAL_ALARM_MASK) { | ||
697 | /* check that we are not re-reporting */ | ||
698 | if (new_alarms_reported & EVENT(ALARM_GPU_LOST)) | ||
699 | poll_mask |= POLLHUP; | ||
700 | |||
701 | poll_mask |= (POLLIN | POLLPRI); | ||
702 | /* On next run do not report global alarms that were already | ||
703 | * reported, but report SHUTDOWN always | ||
704 | */ | ||
705 | dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK & | ||
706 | ~EVENT(ALARM_GPU_LOST); | ||
707 | } | ||
708 | |||
709 | if (poll_mask) { | ||
710 | nvgpu_atomic_set(&dev->poll_mask, poll_mask); | ||
711 | nvgpu_cond_broadcast_interruptible(&dev->readout_wq); | ||
712 | } | ||
713 | |||
714 | return new_alarms_reported; | ||
715 | } | ||
716 | |||
717 | static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm) | ||
718 | { | ||
719 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
720 | |||
721 | u64 current_mask; | ||
722 | u32 refcnt; | ||
723 | u32 alarm_mask; | ||
724 | u64 new_mask; | ||
725 | |||
726 | do { | ||
727 | current_mask = nvgpu_atomic64_read(&arb->alarm_mask); | ||
728 | /* atomic operations are strong so they do not need masks */ | ||
729 | |||
730 | refcnt = ((u32) (current_mask >> 32)) + 1; | ||
731 | alarm_mask = (u32) (current_mask & ~alarm); | ||
732 | new_mask = ((u64) refcnt << 32) | alarm_mask; | ||
733 | |||
734 | } while (unlikely(current_mask != | ||
735 | (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask, | ||
736 | current_mask, new_mask))); | ||
737 | } | ||
738 | |||
1047 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) | 739 | static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) |
1048 | { | 740 | { |
1049 | struct nvgpu_clk_arb *arb = | 741 | struct nvgpu_clk_arb *arb = |
@@ -1345,639 +1037,283 @@ exit_arb: | |||
1345 | ~EVENT(ALARM_GPU_LOST)); | 1037 | ~EVENT(ALARM_GPU_LOST)); |
1346 | } | 1038 | } |
1347 | 1039 | ||
1348 | static void nvgpu_clk_arb_queue_notification(struct gk20a *g, | 1040 | int nvgpu_clk_arb_init_arbiter(struct gk20a *g) |
1349 | struct nvgpu_clk_notification_queue *queue, | 1041 | { |
1350 | u32 alarm_mask) { | 1042 | struct nvgpu_clk_arb *arb; |
1351 | 1043 | u16 default_mhz; | |
1352 | u32 queue_index; | 1044 | int err; |
1353 | u64 timestamp; | ||
1354 | |||
1355 | queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size; | ||
1356 | /* get current timestamp */ | ||
1357 | timestamp = (u64) sched_clock(); | ||
1358 | |||
1359 | queue->notifications[queue_index].timestamp = timestamp; | ||
1360 | queue->notifications[queue_index].notification = alarm_mask; | ||
1361 | |||
1362 | } | ||
1363 | |||
1364 | static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev, | ||
1365 | struct nvgpu_clk_arb_target *target, | ||
1366 | u32 alarm) { | ||
1367 | |||
1368 | struct nvgpu_clk_session *session = dev->session; | ||
1369 | struct nvgpu_clk_arb *arb = session->g->clk_arb; | ||
1370 | struct nvgpu_clk_notification *notification; | ||
1371 | |||
1372 | u32 queue_alarm_mask = 0; | ||
1373 | u32 enabled_mask = 0; | ||
1374 | u32 new_alarms_reported = 0; | ||
1375 | u32 poll_mask = 0; | ||
1376 | u32 tail, head; | ||
1377 | u32 queue_index; | ||
1378 | size_t size; | ||
1379 | int index; | 1045 | int index; |
1046 | struct nvgpu_clk_vf_table *table; | ||
1380 | 1047 | ||
1381 | enabled_mask = nvgpu_atomic_read(&dev->enabled_mask); | 1048 | gk20a_dbg_fn(""); |
1382 | size = arb->notification_queue.size; | ||
1383 | |||
1384 | /* queue global arbiter notifications in buffer */ | ||
1385 | do { | ||
1386 | tail = nvgpu_atomic_read(&arb->notification_queue.tail); | ||
1387 | /* copy items to the queue */ | ||
1388 | queue_index = nvgpu_atomic_read(&dev->queue.tail); | ||
1389 | head = dev->arb_queue_head; | ||
1390 | head = (tail - head) < arb->notification_queue.size ? | ||
1391 | head : tail - arb->notification_queue.size; | ||
1392 | |||
1393 | for (index = head; _WRAPGTEQ(tail, index); index++) { | ||
1394 | u32 alarm_detected; | ||
1395 | |||
1396 | notification = &arb->notification_queue. | ||
1397 | notifications[(index+1) % size]; | ||
1398 | alarm_detected = | ||
1399 | NV_ACCESS_ONCE(notification->notification); | ||
1400 | |||
1401 | if (!(enabled_mask & alarm_detected)) | ||
1402 | continue; | ||
1403 | |||
1404 | queue_index++; | ||
1405 | dev->queue.notifications[ | ||
1406 | queue_index % dev->queue.size].timestamp = | ||
1407 | NV_ACCESS_ONCE(notification->timestamp); | ||
1408 | |||
1409 | dev->queue.notifications[ | ||
1410 | queue_index % dev->queue.size].notification = | ||
1411 | alarm_detected; | ||
1412 | |||
1413 | queue_alarm_mask |= alarm_detected; | ||
1414 | } | ||
1415 | } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) != | ||
1416 | (int)tail)); | ||
1417 | |||
1418 | nvgpu_atomic_set(&dev->queue.tail, queue_index); | ||
1419 | /* update the last notification we processed from global queue */ | ||
1420 | |||
1421 | dev->arb_queue_head = tail; | ||
1422 | |||
1423 | /* Check if current session targets are met */ | ||
1424 | if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) { | ||
1425 | if ((target->gpc2clk < session->target->gpc2clk) | ||
1426 | || (target->mclk < session->target->mclk)) { | ||
1427 | |||
1428 | poll_mask |= (POLLIN | POLLPRI); | ||
1429 | nvgpu_clk_arb_queue_notification(arb->g, &dev->queue, | ||
1430 | EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)); | ||
1431 | } | ||
1432 | } | ||
1433 | 1049 | ||
1434 | /* Check if there is a new VF update */ | 1050 | if (!g->ops.clk_arb.get_arbiter_clk_domains) |
1435 | if (queue_alarm_mask & EVENT(VF_UPDATE)) | 1051 | return 0; |
1436 | poll_mask |= (POLLIN | POLLRDNORM); | ||
1437 | 1052 | ||
1438 | /* Notify sticky alarms that were not reported on previous run*/ | 1053 | arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); |
1439 | new_alarms_reported = (queue_alarm_mask | | 1054 | if (!arb) |
1440 | (alarm & ~dev->alarms_reported & queue_alarm_mask)); | 1055 | return -ENOMEM; |
1441 | 1056 | ||
1442 | if (new_alarms_reported & ~LOCAL_ALARM_MASK) { | 1057 | err = nvgpu_mutex_init(&arb->pstate_lock); |
1443 | /* check that we are not re-reporting */ | 1058 | if (err) |
1444 | if (new_alarms_reported & EVENT(ALARM_GPU_LOST)) | 1059 | goto mutex_fail; |
1445 | poll_mask |= POLLHUP; | 1060 | nvgpu_spinlock_init(&arb->sessions_lock); |
1061 | nvgpu_spinlock_init(&arb->users_lock); | ||
1446 | 1062 | ||
1447 | poll_mask |= (POLLIN | POLLPRI); | 1063 | arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); |
1448 | /* On next run do not report global alarms that were already | 1064 | if (!arb->mclk_f_points) { |
1449 | * reported, but report SHUTDOWN always | 1065 | err = -ENOMEM; |
1450 | */ | 1066 | goto init_fail; |
1451 | dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK & | ||
1452 | ~EVENT(ALARM_GPU_LOST); | ||
1453 | } | 1067 | } |
1454 | 1068 | ||
1455 | if (poll_mask) { | 1069 | arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); |
1456 | nvgpu_atomic_set(&dev->poll_mask, poll_mask); | 1070 | if (!arb->gpc2clk_f_points) { |
1457 | nvgpu_cond_broadcast_interruptible(&dev->readout_wq); | 1071 | err = -ENOMEM; |
1072 | goto init_fail; | ||
1458 | } | 1073 | } |
1459 | 1074 | ||
1460 | return new_alarms_reported; | 1075 | for (index = 0; index < 2; index++) { |
1461 | } | 1076 | table = &arb->vf_table_pool[index]; |
1462 | 1077 | table->gpc2clk_num_points = MAX_F_POINTS; | |
1463 | static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, | 1078 | table->mclk_num_points = MAX_F_POINTS; |
1464 | struct nvgpu_gpu_set_event_filter_args *args) | ||
1465 | { | ||
1466 | u32 mask; | ||
1467 | |||
1468 | gk20a_dbg(gpu_dbg_fn, ""); | ||
1469 | |||
1470 | if (args->flags) | ||
1471 | return -EINVAL; | ||
1472 | |||
1473 | if (args->size != 1) | ||
1474 | return -EINVAL; | ||
1475 | |||
1476 | if (copy_from_user(&mask, (void __user *) args->buffer, | ||
1477 | args->size * sizeof(u32))) | ||
1478 | return -EFAULT; | ||
1479 | |||
1480 | /* update alarm mask */ | ||
1481 | nvgpu_atomic_set(&dev->enabled_mask, mask); | ||
1482 | |||
1483 | return 0; | ||
1484 | } | ||
1485 | |||
1486 | static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd, | ||
1487 | unsigned long arg) | ||
1488 | { | ||
1489 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
1490 | struct gk20a *g = dev->session->g; | ||
1491 | u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE]; | ||
1492 | int err = 0; | ||
1493 | |||
1494 | gk20a_dbg(gpu_dbg_fn, "nr=%d", _IOC_NR(cmd)); | ||
1495 | |||
1496 | if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) | ||
1497 | || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST)) | ||
1498 | return -EINVAL; | ||
1499 | 1079 | ||
1500 | BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); | 1080 | table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS, |
1081 | sizeof(struct nvgpu_clk_vf_point)); | ||
1082 | if (!table->gpc2clk_points) { | ||
1083 | err = -ENOMEM; | ||
1084 | goto init_fail; | ||
1085 | } | ||
1501 | 1086 | ||
1502 | memset(buf, 0, sizeof(buf)); | ||
1503 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
1504 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
1505 | return -EFAULT; | ||
1506 | } | ||
1507 | 1087 | ||
1508 | switch (cmd) { | 1088 | table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS, |
1509 | case NVGPU_EVENT_IOCTL_SET_FILTER: | 1089 | sizeof(struct nvgpu_clk_vf_point)); |
1510 | err = nvgpu_clk_arb_set_event_filter(dev, | 1090 | if (!table->mclk_points) { |
1511 | (struct nvgpu_gpu_set_event_filter_args *)buf); | 1091 | err = -ENOMEM; |
1512 | break; | 1092 | goto init_fail; |
1513 | default: | 1093 | } |
1514 | nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd); | ||
1515 | err = -ENOTTY; | ||
1516 | } | 1094 | } |
1517 | 1095 | ||
1518 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | 1096 | g->clk_arb = arb; |
1519 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | 1097 | arb->g = g; |
1520 | |||
1521 | return err; | ||
1522 | } | ||
1523 | |||
1524 | int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, | ||
1525 | struct nvgpu_clk_session *session, int request_fd) | ||
1526 | { | ||
1527 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
1528 | struct nvgpu_clk_dev *dev; | ||
1529 | struct fd fd; | ||
1530 | int err = 0; | ||
1531 | |||
1532 | gk20a_dbg_fn(""); | ||
1533 | |||
1534 | fd = fdget(request_fd); | ||
1535 | if (!fd.file) | ||
1536 | return -EINVAL; | ||
1537 | 1098 | ||
1538 | if (fd.file->f_op != &completion_dev_ops) { | 1099 | err = g->ops.clk_arb.get_arbiter_clk_default(g, |
1100 | CTRL_CLK_DOMAIN_MCLK, &default_mhz); | ||
1101 | if (err < 0) { | ||
1539 | err = -EINVAL; | 1102 | err = -EINVAL; |
1540 | goto fdput_fd; | 1103 | goto init_fail; |
1541 | } | 1104 | } |
1542 | 1105 | ||
1543 | dev = (struct nvgpu_clk_dev *) fd.file->private_data; | 1106 | arb->mclk_default_mhz = default_mhz; |
1544 | 1107 | ||
1545 | if (!dev || dev->session != session) { | 1108 | err = g->ops.clk_arb.get_arbiter_clk_default(g, |
1109 | CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz); | ||
1110 | if (err < 0) { | ||
1546 | err = -EINVAL; | 1111 | err = -EINVAL; |
1547 | goto fdput_fd; | 1112 | goto init_fail; |
1548 | } | 1113 | } |
1549 | nvgpu_ref_get(&dev->refcount); | ||
1550 | llist_add(&dev->node, &session->targets); | ||
1551 | if (arb->update_work_queue) | ||
1552 | queue_work(arb->update_work_queue, &arb->update_fn_work); | ||
1553 | 1114 | ||
1554 | fdput_fd: | 1115 | arb->gpc2clk_default_mhz = default_mhz; |
1555 | fdput(fd); | ||
1556 | return err; | ||
1557 | } | ||
1558 | 1116 | ||
1559 | static inline u32 __pending_event(struct nvgpu_clk_dev *dev, | 1117 | arb->actual = &arb->actual_pool[0]; |
1560 | struct nvgpu_gpu_event_info *info) { | ||
1561 | 1118 | ||
1562 | u32 tail, head; | 1119 | nvgpu_atomic_set(&arb->req_nr, 0); |
1563 | u32 events = 0; | ||
1564 | struct nvgpu_clk_notification *p_notif; | ||
1565 | 1120 | ||
1566 | tail = nvgpu_atomic_read(&dev->queue.tail); | 1121 | nvgpu_atomic64_set(&arb->alarm_mask, 0); |
1567 | head = nvgpu_atomic_read(&dev->queue.head); | 1122 | err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, |
1123 | DEFAULT_EVENT_NUMBER); | ||
1124 | if (err < 0) | ||
1125 | goto init_fail; | ||
1568 | 1126 | ||
1569 | head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; | 1127 | INIT_LIST_HEAD_RCU(&arb->users); |
1128 | INIT_LIST_HEAD_RCU(&arb->sessions); | ||
1129 | init_llist_head(&arb->requests); | ||
1570 | 1130 | ||
1571 | if (_WRAPGTEQ(tail, head) && info) { | 1131 | nvgpu_cond_init(&arb->request_wq); |
1572 | head++; | 1132 | arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1, |
1573 | p_notif = &dev->queue.notifications[head % dev->queue.size]; | 1133 | "vf_table_update"); |
1574 | events |= p_notif->notification; | 1134 | arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1, |
1575 | info->event_id = ffs(events) - 1; | 1135 | "arbiter_update"); |
1576 | info->timestamp = p_notif->timestamp; | ||
1577 | nvgpu_atomic_set(&dev->queue.head, head); | ||
1578 | } | ||
1579 | 1136 | ||
1580 | return events; | ||
1581 | } | ||
1582 | 1137 | ||
1583 | static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, | 1138 | INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb); |
1584 | size_t size, loff_t *off) | ||
1585 | { | ||
1586 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
1587 | struct nvgpu_gpu_event_info info; | ||
1588 | ssize_t err; | ||
1589 | 1139 | ||
1590 | gk20a_dbg_fn("filp=%p, buf=%p, size=%zu", filp, buf, size); | 1140 | INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); |
1591 | 1141 | ||
1592 | if ((size - *off) < sizeof(info)) | 1142 | #ifdef CONFIG_DEBUG_FS |
1593 | return 0; | 1143 | arb->debug = &arb->debug_pool[0]; |
1594 | 1144 | ||
1595 | memset(&info, 0, sizeof(info)); | 1145 | if (!arb->debugfs_set) { |
1596 | /* Get the oldest event from the queue */ | 1146 | if (nvgpu_clk_arb_debugfs_init(g)) |
1597 | while (!__pending_event(dev, &info)) { | 1147 | arb->debugfs_set = true; |
1598 | if (filp->f_flags & O_NONBLOCK) | ||
1599 | return -EAGAIN; | ||
1600 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
1601 | __pending_event(dev, &info), 0); | ||
1602 | if (err) | ||
1603 | return err; | ||
1604 | if (info.timestamp) | ||
1605 | break; | ||
1606 | } | 1148 | } |
1149 | #endif | ||
1150 | err = clk_vf_point_cache(g); | ||
1151 | if (err < 0) | ||
1152 | goto init_fail; | ||
1607 | 1153 | ||
1608 | if (copy_to_user(buf + *off, &info, sizeof(info))) | 1154 | err = nvgpu_clk_arb_update_vf_table(arb); |
1609 | return -EFAULT; | 1155 | if (err < 0) |
1610 | 1156 | goto init_fail; | |
1611 | return sizeof(info); | 1157 | do { |
1612 | } | 1158 | /* Check that first run is completed */ |
1159 | nvgpu_smp_mb(); | ||
1160 | NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, | ||
1161 | nvgpu_atomic_read(&arb->req_nr), 0); | ||
1162 | } while (!nvgpu_atomic_read(&arb->req_nr)); | ||
1613 | 1163 | ||
1614 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) | ||
1615 | { | ||
1616 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
1617 | 1164 | ||
1618 | gk20a_dbg_fn(""); | 1165 | return arb->status; |
1619 | 1166 | ||
1620 | poll_wait(filp, &dev->readout_wq.wq, wait); | 1167 | init_fail: |
1621 | return nvgpu_atomic_xchg(&dev->poll_mask, 0); | 1168 | nvgpu_kfree(g, arb->gpc2clk_f_points); |
1622 | } | 1169 | nvgpu_kfree(g, arb->mclk_f_points); |
1623 | 1170 | ||
1624 | static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | 1171 | for (index = 0; index < 2; index++) { |
1625 | struct file *filp) | 1172 | nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); |
1626 | { | 1173 | nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); |
1627 | struct nvgpu_clk_dev *dev = filp->private_data; | 1174 | } |
1628 | struct nvgpu_clk_session *session = dev->session; | ||
1629 | 1175 | ||
1176 | nvgpu_mutex_destroy(&arb->pstate_lock); | ||
1630 | 1177 | ||
1631 | gk20a_dbg_fn(""); | 1178 | mutex_fail: |
1179 | nvgpu_kfree(g, arb); | ||
1632 | 1180 | ||
1633 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | 1181 | return err; |
1634 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
1635 | return 0; | ||
1636 | } | 1182 | } |
1637 | 1183 | ||
1638 | static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | 1184 | void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g) |
1639 | struct file *filp) | ||
1640 | { | 1185 | { |
1641 | struct nvgpu_clk_dev *dev = filp->private_data; | 1186 | nvgpu_clk_arb_schedule_alarm(g, |
1642 | struct nvgpu_clk_session *session = dev->session; | 1187 | (0x1UL << NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD)); |
1643 | struct nvgpu_clk_arb *arb; | ||
1644 | |||
1645 | arb = session->g->clk_arb; | ||
1646 | |||
1647 | gk20a_dbg_fn(""); | ||
1648 | |||
1649 | if (arb) { | ||
1650 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
1651 | list_del_rcu(&dev->link); | ||
1652 | nvgpu_spinlock_release(&arb->users_lock); | ||
1653 | nvgpu_clk_notification_queue_free(arb->g, &dev->queue); | ||
1654 | } | ||
1655 | |||
1656 | synchronize_rcu(); | ||
1657 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
1658 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
1659 | |||
1660 | return 0; | ||
1661 | } | 1188 | } |
1662 | 1189 | ||
1663 | int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, | 1190 | void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm) |
1664 | int request_fd, u32 api_domain, u16 target_mhz) | ||
1665 | { | 1191 | { |
1666 | struct nvgpu_clk_dev *dev; | 1192 | struct nvgpu_clk_arb *arb = g->clk_arb; |
1667 | struct fd fd; | ||
1668 | int err = 0; | ||
1669 | |||
1670 | gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz); | ||
1671 | |||
1672 | fd = fdget(request_fd); | ||
1673 | if (!fd.file) | ||
1674 | return -EINVAL; | ||
1675 | |||
1676 | if (fd.file->f_op != &completion_dev_ops) { | ||
1677 | err = -EINVAL; | ||
1678 | goto fdput_fd; | ||
1679 | } | ||
1680 | |||
1681 | dev = fd.file->private_data; | ||
1682 | if (!dev || dev->session != session) { | ||
1683 | err = -EINVAL; | ||
1684 | goto fdput_fd; | ||
1685 | } | ||
1686 | |||
1687 | switch (api_domain) { | ||
1688 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
1689 | dev->mclk_target_mhz = target_mhz; | ||
1690 | break; | ||
1691 | |||
1692 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
1693 | dev->gpc2clk_target_mhz = target_mhz * 2ULL; | ||
1694 | break; | ||
1695 | |||
1696 | default: | ||
1697 | err = -EINVAL; | ||
1698 | } | ||
1699 | 1193 | ||
1700 | fdput_fd: | 1194 | nvgpu_clk_arb_set_global_alarm(g, alarm); |
1701 | fdput(fd); | 1195 | if (arb->update_work_queue) |
1702 | return err; | 1196 | queue_work(arb->update_work_queue, &arb->update_fn_work); |
1703 | } | 1197 | } |
1704 | 1198 | ||
1705 | int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, | 1199 | void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) |
1706 | u32 api_domain, u16 *freq_mhz) | ||
1707 | { | 1200 | { |
1708 | int err = 0; | 1201 | struct nvgpu_clk_arb *arb = g->clk_arb; |
1709 | struct nvgpu_clk_arb_target *target; | 1202 | int index; |
1710 | 1203 | ||
1711 | do { | 1204 | if (arb) { |
1712 | target = NV_ACCESS_ONCE(session->target); | 1205 | cancel_work_sync(&arb->vf_table_fn_work); |
1713 | /* no reordering of this pointer */ | 1206 | destroy_workqueue(arb->vf_table_work_queue); |
1714 | nvgpu_smp_rmb(); | 1207 | arb->vf_table_work_queue = NULL; |
1715 | 1208 | ||
1716 | switch (api_domain) { | 1209 | cancel_work_sync(&arb->update_fn_work); |
1717 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 1210 | destroy_workqueue(arb->update_work_queue); |
1718 | *freq_mhz = target->mclk; | 1211 | arb->update_work_queue = NULL; |
1719 | break; | ||
1720 | 1212 | ||
1721 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | 1213 | nvgpu_kfree(g, arb->gpc2clk_f_points); |
1722 | *freq_mhz = target->gpc2clk / 2ULL; | 1214 | nvgpu_kfree(g, arb->mclk_f_points); |
1723 | break; | ||
1724 | 1215 | ||
1725 | default: | 1216 | for (index = 0; index < 2; index++) { |
1726 | *freq_mhz = 0; | 1217 | nvgpu_kfree(g, |
1727 | err = -EINVAL; | 1218 | arb->vf_table_pool[index].gpc2clk_points); |
1219 | nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); | ||
1728 | } | 1220 | } |
1729 | } while (target != NV_ACCESS_ONCE(session->target)); | 1221 | nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); |
1730 | return err; | 1222 | nvgpu_kfree(g, g->clk_arb); |
1223 | g->clk_arb = NULL; | ||
1224 | } | ||
1731 | } | 1225 | } |
1732 | 1226 | ||
1733 | int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, | 1227 | int nvgpu_clk_arb_init_session(struct gk20a *g, |
1734 | u32 api_domain, u16 *freq_mhz) | 1228 | struct nvgpu_clk_session **_session) |
1735 | { | 1229 | { |
1736 | struct nvgpu_clk_arb *arb = g->clk_arb; | 1230 | struct nvgpu_clk_arb *arb = g->clk_arb; |
1737 | int err = 0; | 1231 | struct nvgpu_clk_session *session = *(_session); |
1738 | struct nvgpu_clk_arb_target *actual; | ||
1739 | 1232 | ||
1740 | do { | 1233 | gk20a_dbg_fn(""); |
1741 | actual = NV_ACCESS_ONCE(arb->actual); | ||
1742 | /* no reordering of this pointer */ | ||
1743 | nvgpu_smp_rmb(); | ||
1744 | 1234 | ||
1745 | switch (api_domain) { | 1235 | if (!g->ops.clk_arb.get_arbiter_clk_domains) |
1746 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 1236 | return 0; |
1747 | *freq_mhz = actual->mclk; | ||
1748 | break; | ||
1749 | 1237 | ||
1750 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | 1238 | session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session)); |
1751 | *freq_mhz = actual->gpc2clk / 2ULL; | 1239 | if (!session) |
1752 | break; | 1240 | return -ENOMEM; |
1241 | session->g = g; | ||
1753 | 1242 | ||
1754 | default: | 1243 | nvgpu_ref_init(&session->refcount); |
1755 | *freq_mhz = 0; | ||
1756 | err = -EINVAL; | ||
1757 | } | ||
1758 | } while (actual != NV_ACCESS_ONCE(arb->actual)); | ||
1759 | return err; | ||
1760 | } | ||
1761 | 1244 | ||
1762 | int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, | 1245 | session->zombie = false; |
1763 | u32 api_domain, u16 *freq_mhz) | 1246 | session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8; |
1764 | { | 1247 | /* make sure that the initialization of the pool is visible |
1765 | switch (api_domain) { | 1248 | * before the update |
1766 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 1249 | */ |
1767 | *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) / | 1250 | nvgpu_smp_wmb(); |
1768 | 1000000ULL; | 1251 | session->target = &session->target_pool[0]; |
1769 | return 0; | ||
1770 | 1252 | ||
1771 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | 1253 | init_llist_head(&session->targets); |
1772 | *freq_mhz = g->ops.clk.measure_freq(g, | ||
1773 | CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL; | ||
1774 | return 0; | ||
1775 | 1254 | ||
1776 | default: | 1255 | nvgpu_spinlock_acquire(&arb->sessions_lock); |
1777 | return -EINVAL; | 1256 | list_add_tail_rcu(&session->link, &arb->sessions); |
1778 | } | 1257 | nvgpu_spinlock_release(&arb->sessions_lock); |
1779 | } | ||
1780 | 1258 | ||
1781 | int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, | 1259 | *_session = session; |
1782 | u16 *min_mhz, u16 *max_mhz) | ||
1783 | { | ||
1784 | int ret; | ||
1785 | |||
1786 | switch (api_domain) { | ||
1787 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
1788 | ret = g->ops.clk_arb.get_arbiter_clk_range(g, | ||
1789 | CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz); | ||
1790 | return ret; | ||
1791 | |||
1792 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
1793 | ret = g->ops.clk_arb.get_arbiter_clk_range(g, | ||
1794 | CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz); | ||
1795 | if (!ret) { | ||
1796 | *min_mhz /= 2; | ||
1797 | *max_mhz /= 2; | ||
1798 | } | ||
1799 | return ret; | ||
1800 | 1260 | ||
1801 | default: | 1261 | return 0; |
1802 | return -EINVAL; | ||
1803 | } | ||
1804 | } | 1262 | } |
1805 | 1263 | ||
1806 | u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) | 1264 | void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount) |
1807 | { | 1265 | { |
1808 | u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); | 1266 | struct nvgpu_clk_dev *dev = container_of(refcount, |
1809 | u32 api_domains = 0; | 1267 | struct nvgpu_clk_dev, refcount); |
1810 | 1268 | struct nvgpu_clk_session *session = dev->session; | |
1811 | if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) | ||
1812 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK); | ||
1813 | |||
1814 | if (clk_domains & CTRL_CLK_DOMAIN_MCLK) | ||
1815 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK); | ||
1816 | 1269 | ||
1817 | return api_domains; | 1270 | nvgpu_kfree(session->g, dev); |
1818 | } | 1271 | } |
1819 | 1272 | ||
1820 | bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain) | 1273 | void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount) |
1821 | { | 1274 | { |
1822 | u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); | 1275 | struct nvgpu_clk_session *session = container_of(refcount, |
1823 | 1276 | struct nvgpu_clk_session, refcount); | |
1824 | switch (api_domain) { | 1277 | struct nvgpu_clk_arb *arb = session->g->clk_arb; |
1825 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | 1278 | struct gk20a *g = session->g; |
1826 | return ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0); | 1279 | struct nvgpu_clk_dev *dev, *tmp; |
1280 | struct llist_node *head; | ||
1827 | 1281 | ||
1828 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | 1282 | gk20a_dbg_fn(""); |
1829 | return ((clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0); | ||
1830 | 1283 | ||
1831 | default: | 1284 | if (arb) { |
1832 | return false; | 1285 | nvgpu_spinlock_acquire(&arb->sessions_lock); |
1286 | list_del_rcu(&session->link); | ||
1287 | nvgpu_spinlock_release(&arb->sessions_lock); | ||
1833 | } | 1288 | } |
1834 | } | ||
1835 | 1289 | ||
1836 | int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, | 1290 | head = llist_del_all(&session->targets); |
1837 | u32 api_domain, u32 *max_points, u16 *fpoints) | 1291 | llist_for_each_entry_safe(dev, tmp, head, node) { |
1838 | { | 1292 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); |
1839 | int err; | ||
1840 | u32 i; | ||
1841 | |||
1842 | switch (api_domain) { | ||
1843 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
1844 | err = clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPC2CLK, | ||
1845 | max_points, fpoints); | ||
1846 | if (err || !fpoints) | ||
1847 | return err; | ||
1848 | for (i = 0; i < *max_points; i++) | ||
1849 | fpoints[i] /= 2; | ||
1850 | return 0; | ||
1851 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
1852 | return clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_MCLK, | ||
1853 | max_points, fpoints); | ||
1854 | default: | ||
1855 | return -EINVAL; | ||
1856 | } | 1293 | } |
1294 | synchronize_rcu(); | ||
1295 | nvgpu_kfree(g, session); | ||
1857 | } | 1296 | } |
1858 | 1297 | ||
1859 | static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, | 1298 | void nvgpu_clk_arb_release_session(struct gk20a *g, |
1860 | u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, | 1299 | struct nvgpu_clk_session *session) |
1861 | u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram) | ||
1862 | { | 1300 | { |
1863 | u16 gpc2clk_target, mclk_target; | 1301 | struct nvgpu_clk_arb *arb = g->clk_arb; |
1864 | u32 gpc2clk_voltuv, gpc2clk_voltuv_sram; | ||
1865 | u32 mclk_voltuv, mclk_voltuv_sram; | ||
1866 | u32 pstate = VF_POINT_INVALID_PSTATE; | ||
1867 | struct nvgpu_clk_vf_table *table; | ||
1868 | u32 index, index_mclk; | ||
1869 | struct nvgpu_clk_vf_point *mclk_vf = NULL; | ||
1870 | |||
1871 | do { | ||
1872 | gpc2clk_target = *gpc2clk; | ||
1873 | mclk_target = *mclk; | ||
1874 | gpc2clk_voltuv = 0; | ||
1875 | gpc2clk_voltuv_sram = 0; | ||
1876 | mclk_voltuv = 0; | ||
1877 | mclk_voltuv_sram = 0; | ||
1878 | |||
1879 | table = NV_ACCESS_ONCE(arb->current_vf_table); | ||
1880 | /* pointer to table can be updated by callback */ | ||
1881 | nvgpu_smp_rmb(); | ||
1882 | |||
1883 | if (!table) | ||
1884 | continue; | ||
1885 | if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) { | ||
1886 | nvgpu_err(arb->g, "found empty table"); | ||
1887 | goto find_exit; | ||
1888 | } | ||
1889 | /* First we check MCLK to find out which PSTATE we are | ||
1890 | * are requesting, and from there try to find the minimum | ||
1891 | * GPC2CLK on the same PSTATE that satisfies the request. | ||
1892 | * If no GPC2CLK can be found, then we need to up the PSTATE | ||
1893 | */ | ||
1894 | |||
1895 | recalculate_vf_point: | ||
1896 | for (index = 0; index < table->mclk_num_points; index++) { | ||
1897 | if (table->mclk_points[index].mem_mhz >= mclk_target) { | ||
1898 | mclk_vf = &table->mclk_points[index]; | ||
1899 | break; | ||
1900 | } | ||
1901 | } | ||
1902 | if (index == table->mclk_num_points) { | ||
1903 | mclk_vf = &table->mclk_points[index-1]; | ||
1904 | index = table->mclk_num_points - 1; | ||
1905 | } | ||
1906 | index_mclk = index; | ||
1907 | |||
1908 | /* round up the freq requests */ | ||
1909 | for (index = 0; index < table->gpc2clk_num_points; index++) { | ||
1910 | pstate = VF_POINT_COMMON_PSTATE( | ||
1911 | &table->gpc2clk_points[index], mclk_vf); | ||
1912 | |||
1913 | if ((table->gpc2clk_points[index].gpc_mhz >= | ||
1914 | gpc2clk_target) && | ||
1915 | (pstate != VF_POINT_INVALID_PSTATE)) { | ||
1916 | gpc2clk_target = | ||
1917 | table->gpc2clk_points[index].gpc_mhz; | ||
1918 | *sys2clk = | ||
1919 | table->gpc2clk_points[index].sys_mhz; | ||
1920 | *xbar2clk = | ||
1921 | table->gpc2clk_points[index].xbar_mhz; | ||
1922 | |||
1923 | gpc2clk_voltuv = | ||
1924 | table->gpc2clk_points[index].uvolt; | ||
1925 | gpc2clk_voltuv_sram = | ||
1926 | table->gpc2clk_points[index].uvolt_sram; | ||
1927 | break; | ||
1928 | } | ||
1929 | } | ||
1930 | |||
1931 | if (index == table->gpc2clk_num_points) { | ||
1932 | pstate = VF_POINT_COMMON_PSTATE( | ||
1933 | &table->gpc2clk_points[index-1], mclk_vf); | ||
1934 | if (pstate != VF_POINT_INVALID_PSTATE) { | ||
1935 | gpc2clk_target = | ||
1936 | table->gpc2clk_points[index-1].gpc_mhz; | ||
1937 | *sys2clk = | ||
1938 | table->gpc2clk_points[index-1].sys_mhz; | ||
1939 | *xbar2clk = | ||
1940 | table->gpc2clk_points[index-1].xbar_mhz; | ||
1941 | 1302 | ||
1942 | gpc2clk_voltuv = | 1303 | gk20a_dbg_fn(""); |
1943 | table->gpc2clk_points[index-1].uvolt; | ||
1944 | gpc2clk_voltuv_sram = | ||
1945 | table->gpc2clk_points[index-1]. | ||
1946 | uvolt_sram; | ||
1947 | } else if (index_mclk >= table->mclk_num_points - 1) { | ||
1948 | /* There is no available combination of MCLK | ||
1949 | * and GPC2CLK, we need to fail this | ||
1950 | */ | ||
1951 | gpc2clk_target = 0; | ||
1952 | mclk_target = 0; | ||
1953 | pstate = VF_POINT_INVALID_PSTATE; | ||
1954 | goto find_exit; | ||
1955 | } else { | ||
1956 | /* recalculate with higher PSTATE */ | ||
1957 | gpc2clk_target = *gpc2clk; | ||
1958 | mclk_target = table->mclk_points[index_mclk+1]. | ||
1959 | mem_mhz; | ||
1960 | goto recalculate_vf_point; | ||
1961 | } | ||
1962 | } | ||
1963 | 1304 | ||
1964 | mclk_target = mclk_vf->mem_mhz; | 1305 | session->zombie = true; |
1965 | mclk_voltuv = mclk_vf->uvolt; | 1306 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); |
1966 | mclk_voltuv_sram = mclk_vf->uvolt_sram; | 1307 | if (arb && arb->update_work_queue) |
1308 | queue_work(arb->update_work_queue, &arb->update_fn_work); | ||
1309 | } | ||
1967 | 1310 | ||
1968 | } while (!table || | 1311 | void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g) |
1969 | (NV_ACCESS_ONCE(arb->current_vf_table) != table)); | 1312 | { |
1313 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
1970 | 1314 | ||
1971 | find_exit: | 1315 | if (arb->vf_table_work_queue) |
1972 | *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; | 1316 | queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work); |
1973 | *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? | ||
1974 | gpc2clk_voltuv_sram : mclk_voltuv_sram; | ||
1975 | /* noise unaware vmin */ | ||
1976 | *nuvmin = mclk_voltuv; | ||
1977 | *nuvmin_sram = mclk_voltuv_sram; | ||
1978 | *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk; | ||
1979 | *mclk = mclk_target; | ||
1980 | return pstate; | ||
1981 | } | 1317 | } |
1982 | 1318 | ||
1983 | /* This function is inherently unsafe to call while arbiter is running | 1319 | /* This function is inherently unsafe to call while arbiter is running |
@@ -1988,60 +1324,6 @@ int nvgpu_clk_arb_get_current_pstate(struct gk20a *g) | |||
1988 | return NV_ACCESS_ONCE(g->clk_arb->actual->pstate); | 1324 | return NV_ACCESS_ONCE(g->clk_arb->actual->pstate); |
1989 | } | 1325 | } |
1990 | 1326 | ||
1991 | static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, | ||
1992 | u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv, | ||
1993 | u32 voltuv_sram) | ||
1994 | { | ||
1995 | struct set_fll_clk fllclk; | ||
1996 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
1997 | int status; | ||
1998 | |||
1999 | fllclk.gpc2clkmhz = gpc2clk_target; | ||
2000 | fllclk.sys2clkmhz = sys2clk_target; | ||
2001 | fllclk.xbar2clkmhz = xbar2clk_target; | ||
2002 | |||
2003 | fllclk.voltuv = voltuv; | ||
2004 | |||
2005 | /* if voltage ascends we do: | ||
2006 | * (1) FLL change | ||
2007 | * (2) Voltage change | ||
2008 | * (3) MCLK change | ||
2009 | * If it goes down | ||
2010 | * (1) MCLK change | ||
2011 | * (2) Voltage change | ||
2012 | * (3) FLL change | ||
2013 | */ | ||
2014 | |||
2015 | /* descending */ | ||
2016 | if (voltuv < arb->voltuv_actual) { | ||
2017 | status = g->ops.clk.mclk_change(g, mclk_target); | ||
2018 | if (status < 0) | ||
2019 | return status; | ||
2020 | |||
2021 | status = volt_set_voltage(g, voltuv, voltuv_sram); | ||
2022 | if (status < 0) | ||
2023 | return status; | ||
2024 | |||
2025 | status = clk_set_fll_clks(g, &fllclk); | ||
2026 | if (status < 0) | ||
2027 | return status; | ||
2028 | } else { | ||
2029 | status = clk_set_fll_clks(g, &fllclk); | ||
2030 | if (status < 0) | ||
2031 | return status; | ||
2032 | |||
2033 | status = volt_set_voltage(g, voltuv, voltuv_sram); | ||
2034 | if (status < 0) | ||
2035 | return status; | ||
2036 | |||
2037 | status = g->ops.clk.mclk_change(g, mclk_target); | ||
2038 | if (status < 0) | ||
2039 | return status; | ||
2040 | } | ||
2041 | |||
2042 | return 0; | ||
2043 | } | ||
2044 | |||
2045 | void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock) | 1327 | void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock) |
2046 | { | 1328 | { |
2047 | struct nvgpu_clk_arb *arb = g->clk_arb; | 1329 | struct nvgpu_clk_arb *arb = g->clk_arb; |
@@ -2051,71 +1333,3 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock) | |||
2051 | else | 1333 | else |
2052 | nvgpu_mutex_release(&arb->pstate_lock); | 1334 | nvgpu_mutex_release(&arb->pstate_lock); |
2053 | } | 1335 | } |
2054 | |||
2055 | #ifdef CONFIG_DEBUG_FS | ||
2056 | static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) | ||
2057 | { | ||
2058 | struct gk20a *g = s->private; | ||
2059 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
2060 | struct nvgpu_clk_arb_debug *debug; | ||
2061 | |||
2062 | u64 num; | ||
2063 | s64 tmp, avg, std, max, min; | ||
2064 | |||
2065 | debug = NV_ACCESS_ONCE(arb->debug); | ||
2066 | /* Make copy of structure and ensure no reordering */ | ||
2067 | nvgpu_smp_rmb(); | ||
2068 | if (!debug) | ||
2069 | return -EINVAL; | ||
2070 | |||
2071 | std = debug->switch_std; | ||
2072 | avg = debug->switch_avg; | ||
2073 | max = debug->switch_max; | ||
2074 | min = debug->switch_min; | ||
2075 | num = debug->switch_num; | ||
2076 | |||
2077 | tmp = std; | ||
2078 | do_div(tmp, num); | ||
2079 | seq_printf(s, "Number of transitions: %lld\n", | ||
2080 | num); | ||
2081 | seq_printf(s, "max / min : %lld / %lld usec\n", | ||
2082 | max, min); | ||
2083 | seq_printf(s, "avg / std : %lld / %ld usec\n", | ||
2084 | avg, int_sqrt(tmp)); | ||
2085 | |||
2086 | return 0; | ||
2087 | } | ||
2088 | |||
2089 | static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file) | ||
2090 | { | ||
2091 | return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private); | ||
2092 | } | ||
2093 | |||
2094 | static const struct file_operations nvgpu_clk_arb_stats_fops = { | ||
2095 | .open = nvgpu_clk_arb_stats_open, | ||
2096 | .read = seq_read, | ||
2097 | .llseek = seq_lseek, | ||
2098 | .release = single_release, | ||
2099 | }; | ||
2100 | |||
2101 | |||
2102 | static int nvgpu_clk_arb_debugfs_init(struct gk20a *g) | ||
2103 | { | ||
2104 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
2105 | struct dentry *gpu_root = l->debugfs; | ||
2106 | struct dentry *d; | ||
2107 | |||
2108 | gk20a_dbg(gpu_dbg_info, "g=%p", g); | ||
2109 | |||
2110 | d = debugfs_create_file( | ||
2111 | "arb_stats", | ||
2112 | S_IRUGO, | ||
2113 | gpu_root, | ||
2114 | g, | ||
2115 | &nvgpu_clk_arb_stats_fops); | ||
2116 | if (!d) | ||
2117 | return -ENOMEM; | ||
2118 | |||
2119 | return 0; | ||
2120 | } | ||
2121 | #endif | ||
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h new file mode 100644 index 00000000..b66876da --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h | |||
@@ -0,0 +1,120 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef __NVGPU_CLK_ARB_LINUX_H__ | ||
18 | #define __NVGPU_CLK_ARB_LINUX_H__ | ||
19 | |||
20 | #include <nvgpu/types.h> | ||
21 | #include <nvgpu/bitops.h> | ||
22 | #include <nvgpu/lock.h> | ||
23 | #include <nvgpu/kmem.h> | ||
24 | #include <nvgpu/atomic.h> | ||
25 | #include <nvgpu/bug.h> | ||
26 | #include <nvgpu/kref.h> | ||
27 | #include <nvgpu/log.h> | ||
28 | #include <nvgpu/barrier.h> | ||
29 | #include <nvgpu/cond.h> | ||
30 | |||
31 | #include "gk20a/gk20a.h" | ||
32 | #include "clk/clk.h" | ||
33 | #include "pstate/pstate.h" | ||
34 | #include "lpwr/lpwr.h" | ||
35 | #include "volt/volt.h" | ||
36 | |||
37 | /* | ||
38 | * The defines here should finally move to clk_arb.h, once these are | ||
39 | * refactored to be free of Linux fields. | ||
40 | */ | ||
41 | struct nvgpu_clk_arb { | ||
42 | struct nvgpu_spinlock sessions_lock; | ||
43 | struct nvgpu_spinlock users_lock; | ||
44 | |||
45 | struct nvgpu_mutex pstate_lock; | ||
46 | struct list_head users; | ||
47 | struct list_head sessions; | ||
48 | struct llist_head requests; | ||
49 | |||
50 | struct gk20a *g; | ||
51 | int status; | ||
52 | |||
53 | struct nvgpu_clk_arb_target actual_pool[2]; | ||
54 | struct nvgpu_clk_arb_target *actual; | ||
55 | |||
56 | u16 gpc2clk_default_mhz; | ||
57 | u16 mclk_default_mhz; | ||
58 | u32 voltuv_actual; | ||
59 | |||
60 | u16 gpc2clk_min, gpc2clk_max; | ||
61 | u16 mclk_min, mclk_max; | ||
62 | |||
63 | struct work_struct update_fn_work; | ||
64 | struct workqueue_struct *update_work_queue; | ||
65 | struct work_struct vf_table_fn_work; | ||
66 | struct workqueue_struct *vf_table_work_queue; | ||
67 | |||
68 | struct nvgpu_cond request_wq; | ||
69 | |||
70 | struct nvgpu_clk_vf_table *current_vf_table; | ||
71 | struct nvgpu_clk_vf_table vf_table_pool[2]; | ||
72 | u32 vf_table_index; | ||
73 | |||
74 | u16 *mclk_f_points; | ||
75 | nvgpu_atomic_t req_nr; | ||
76 | |||
77 | u32 mclk_f_numpoints; | ||
78 | u16 *gpc2clk_f_points; | ||
79 | u32 gpc2clk_f_numpoints; | ||
80 | |||
81 | nvgpu_atomic64_t alarm_mask; | ||
82 | struct nvgpu_clk_notification_queue notification_queue; | ||
83 | |||
84 | #ifdef CONFIG_DEBUG_FS | ||
85 | struct nvgpu_clk_arb_debug debug_pool[2]; | ||
86 | struct nvgpu_clk_arb_debug *debug; | ||
87 | bool debugfs_set; | ||
88 | #endif | ||
89 | }; | ||
90 | |||
91 | struct nvgpu_clk_dev { | ||
92 | struct nvgpu_clk_session *session; | ||
93 | union { | ||
94 | struct list_head link; | ||
95 | struct llist_node node; | ||
96 | }; | ||
97 | struct nvgpu_cond readout_wq; | ||
98 | nvgpu_atomic_t poll_mask; | ||
99 | u16 gpc2clk_target_mhz; | ||
100 | u16 mclk_target_mhz; | ||
101 | u32 alarms_reported; | ||
102 | nvgpu_atomic_t enabled_mask; | ||
103 | struct nvgpu_clk_notification_queue queue; | ||
104 | u32 arb_queue_head; | ||
105 | struct nvgpu_ref refcount; | ||
106 | }; | ||
107 | |||
108 | struct nvgpu_clk_session { | ||
109 | bool zombie; | ||
110 | struct gk20a *g; | ||
111 | struct nvgpu_ref refcount; | ||
112 | struct list_head link; | ||
113 | struct llist_head targets; | ||
114 | |||
115 | struct nvgpu_clk_arb_target target_pool[2]; | ||
116 | struct nvgpu_clk_arb_target *target; | ||
117 | }; | ||
118 | |||
119 | #endif /* __NVGPU_CLK_ARB_LINUX_H__ */ | ||
120 | |||
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c new file mode 100644 index 00000000..27afe777 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c | |||
@@ -0,0 +1,641 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This software is licensed under the terms of the GNU General Public | ||
5 | * License version 2, as published by the Free Software Foundation, and | ||
6 | * may be copied, distributed, and modified under those terms. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/cdev.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/anon_inodes.h> | ||
20 | #include <linux/rculist.h> | ||
21 | #include <linux/llist.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/poll.h> | ||
24 | #ifdef CONFIG_DEBUG_FS | ||
25 | #include <linux/debugfs.h> | ||
26 | #endif | ||
27 | #include <uapi/linux/nvgpu.h> | ||
28 | |||
29 | #include <nvgpu/bitops.h> | ||
30 | #include <nvgpu/lock.h> | ||
31 | #include <nvgpu/kmem.h> | ||
32 | #include <nvgpu/atomic.h> | ||
33 | #include <nvgpu/bug.h> | ||
34 | #include <nvgpu/kref.h> | ||
35 | #include <nvgpu/log.h> | ||
36 | #include <nvgpu/barrier.h> | ||
37 | #include <nvgpu/cond.h> | ||
38 | #include <nvgpu/clk_arb.h> | ||
39 | |||
40 | #include "gk20a/gk20a.h" | ||
41 | #include "clk/clk.h" | ||
42 | #include "clk_arb_linux.h" | ||
43 | #include "pstate/pstate.h" | ||
44 | #include "lpwr/lpwr.h" | ||
45 | #include "volt/volt.h" | ||
46 | |||
47 | #ifdef CONFIG_DEBUG_FS | ||
48 | #include "common/linux/os_linux.h" | ||
49 | #endif | ||
50 | |||
51 | static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, | ||
52 | struct file *filp) | ||
53 | { | ||
54 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
55 | struct nvgpu_clk_session *session = dev->session; | ||
56 | |||
57 | |||
58 | gk20a_dbg_fn(""); | ||
59 | |||
60 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
61 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) | ||
66 | { | ||
67 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
68 | |||
69 | gk20a_dbg_fn(""); | ||
70 | |||
71 | poll_wait(filp, &dev->readout_wq.wq, wait); | ||
72 | return nvgpu_atomic_xchg(&dev->poll_mask, 0); | ||
73 | } | ||
74 | |||
75 | static int nvgpu_clk_arb_release_event_dev(struct inode *inode, | ||
76 | struct file *filp) | ||
77 | { | ||
78 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
79 | struct nvgpu_clk_session *session = dev->session; | ||
80 | struct nvgpu_clk_arb *arb; | ||
81 | |||
82 | arb = session->g->clk_arb; | ||
83 | |||
84 | gk20a_dbg_fn(""); | ||
85 | |||
86 | if (arb) { | ||
87 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
88 | list_del_rcu(&dev->link); | ||
89 | nvgpu_spinlock_release(&arb->users_lock); | ||
90 | nvgpu_clk_notification_queue_free(arb->g, &dev->queue); | ||
91 | } | ||
92 | |||
93 | synchronize_rcu(); | ||
94 | nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); | ||
95 | nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | static inline u32 __pending_event(struct nvgpu_clk_dev *dev, | ||
101 | struct nvgpu_gpu_event_info *info) { | ||
102 | |||
103 | u32 tail, head; | ||
104 | u32 events = 0; | ||
105 | struct nvgpu_clk_notification *p_notif; | ||
106 | |||
107 | tail = nvgpu_atomic_read(&dev->queue.tail); | ||
108 | head = nvgpu_atomic_read(&dev->queue.head); | ||
109 | |||
110 | head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; | ||
111 | |||
112 | if (_WRAPGTEQ(tail, head) && info) { | ||
113 | head++; | ||
114 | p_notif = &dev->queue.notifications[head % dev->queue.size]; | ||
115 | events |= p_notif->notification; | ||
116 | info->event_id = ffs(events) - 1; | ||
117 | info->timestamp = p_notif->timestamp; | ||
118 | nvgpu_atomic_set(&dev->queue.head, head); | ||
119 | } | ||
120 | |||
121 | return events; | ||
122 | } | ||
123 | |||
124 | static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, | ||
125 | size_t size, loff_t *off) | ||
126 | { | ||
127 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
128 | struct nvgpu_gpu_event_info info; | ||
129 | ssize_t err; | ||
130 | |||
131 | gk20a_dbg_fn("filp=%p, buf=%p, size=%zu", filp, buf, size); | ||
132 | |||
133 | if ((size - *off) < sizeof(info)) | ||
134 | return 0; | ||
135 | |||
136 | memset(&info, 0, sizeof(info)); | ||
137 | /* Get the oldest event from the queue */ | ||
138 | while (!__pending_event(dev, &info)) { | ||
139 | if (filp->f_flags & O_NONBLOCK) | ||
140 | return -EAGAIN; | ||
141 | err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq, | ||
142 | __pending_event(dev, &info), 0); | ||
143 | if (err) | ||
144 | return err; | ||
145 | if (info.timestamp) | ||
146 | break; | ||
147 | } | ||
148 | |||
149 | if (copy_to_user(buf + *off, &info, sizeof(info))) | ||
150 | return -EFAULT; | ||
151 | |||
152 | return sizeof(info); | ||
153 | } | ||
154 | |||
155 | static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, | ||
156 | struct nvgpu_gpu_set_event_filter_args *args) | ||
157 | { | ||
158 | u32 mask; | ||
159 | |||
160 | gk20a_dbg(gpu_dbg_fn, ""); | ||
161 | |||
162 | if (args->flags) | ||
163 | return -EINVAL; | ||
164 | |||
165 | if (args->size != 1) | ||
166 | return -EINVAL; | ||
167 | |||
168 | if (copy_from_user(&mask, (void __user *) args->buffer, | ||
169 | args->size * sizeof(u32))) | ||
170 | return -EFAULT; | ||
171 | |||
172 | /* update alarm mask */ | ||
173 | nvgpu_atomic_set(&dev->enabled_mask, mask); | ||
174 | |||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd, | ||
179 | unsigned long arg) | ||
180 | { | ||
181 | struct nvgpu_clk_dev *dev = filp->private_data; | ||
182 | struct gk20a *g = dev->session->g; | ||
183 | u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE]; | ||
184 | int err = 0; | ||
185 | |||
186 | gk20a_dbg(gpu_dbg_fn, "nr=%d", _IOC_NR(cmd)); | ||
187 | |||
188 | if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) | ||
189 | || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST)) | ||
190 | return -EINVAL; | ||
191 | |||
192 | BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); | ||
193 | |||
194 | memset(buf, 0, sizeof(buf)); | ||
195 | if (_IOC_DIR(cmd) & _IOC_WRITE) { | ||
196 | if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd))) | ||
197 | return -EFAULT; | ||
198 | } | ||
199 | |||
200 | switch (cmd) { | ||
201 | case NVGPU_EVENT_IOCTL_SET_FILTER: | ||
202 | err = nvgpu_clk_arb_set_event_filter(dev, | ||
203 | (struct nvgpu_gpu_set_event_filter_args *)buf); | ||
204 | break; | ||
205 | default: | ||
206 | nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd); | ||
207 | err = -ENOTTY; | ||
208 | } | ||
209 | |||
210 | if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) | ||
211 | err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); | ||
212 | |||
213 | return err; | ||
214 | } | ||
215 | |||
216 | static const struct file_operations completion_dev_ops = { | ||
217 | .owner = THIS_MODULE, | ||
218 | .release = nvgpu_clk_arb_release_completion_dev, | ||
219 | .poll = nvgpu_clk_arb_poll_dev, | ||
220 | }; | ||
221 | |||
222 | static const struct file_operations event_dev_ops = { | ||
223 | .owner = THIS_MODULE, | ||
224 | .release = nvgpu_clk_arb_release_event_dev, | ||
225 | .poll = nvgpu_clk_arb_poll_dev, | ||
226 | .read = nvgpu_clk_arb_read_event_dev, | ||
227 | #ifdef CONFIG_COMPAT | ||
228 | .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
229 | #endif | ||
230 | .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev, | ||
231 | }; | ||
232 | |||
233 | static int nvgpu_clk_arb_install_fd(struct gk20a *g, | ||
234 | struct nvgpu_clk_session *session, | ||
235 | const struct file_operations *fops, | ||
236 | struct nvgpu_clk_dev **_dev) | ||
237 | { | ||
238 | struct file *file; | ||
239 | int fd; | ||
240 | int err; | ||
241 | int status; | ||
242 | char name[64]; | ||
243 | struct nvgpu_clk_dev *dev; | ||
244 | |||
245 | gk20a_dbg_fn(""); | ||
246 | |||
247 | dev = nvgpu_kzalloc(g, sizeof(*dev)); | ||
248 | if (!dev) | ||
249 | return -ENOMEM; | ||
250 | |||
251 | status = nvgpu_clk_notification_queue_alloc(g, &dev->queue, | ||
252 | DEFAULT_EVENT_NUMBER); | ||
253 | if (status < 0) { | ||
254 | err = status; | ||
255 | goto fail; | ||
256 | } | ||
257 | |||
258 | fd = get_unused_fd_flags(O_RDWR); | ||
259 | if (fd < 0) { | ||
260 | err = fd; | ||
261 | goto fail; | ||
262 | } | ||
263 | |||
264 | snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd); | ||
265 | file = anon_inode_getfile(name, fops, dev, O_RDWR); | ||
266 | if (IS_ERR(file)) { | ||
267 | err = PTR_ERR(file); | ||
268 | goto fail_fd; | ||
269 | } | ||
270 | |||
271 | fd_install(fd, file); | ||
272 | |||
273 | nvgpu_cond_init(&dev->readout_wq); | ||
274 | |||
275 | nvgpu_atomic_set(&dev->poll_mask, 0); | ||
276 | |||
277 | dev->session = session; | ||
278 | nvgpu_ref_init(&dev->refcount); | ||
279 | |||
280 | nvgpu_ref_get(&session->refcount); | ||
281 | |||
282 | *_dev = dev; | ||
283 | |||
284 | return fd; | ||
285 | |||
286 | fail_fd: | ||
287 | put_unused_fd(fd); | ||
288 | fail: | ||
289 | nvgpu_kfree(g, dev); | ||
290 | |||
291 | return err; | ||
292 | } | ||
293 | |||
294 | int nvgpu_clk_arb_install_event_fd(struct gk20a *g, | ||
295 | struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask) | ||
296 | { | ||
297 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
298 | struct nvgpu_clk_dev *dev; | ||
299 | int fd; | ||
300 | |||
301 | gk20a_dbg_fn(""); | ||
302 | |||
303 | fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); | ||
304 | if (fd < 0) | ||
305 | return fd; | ||
306 | |||
307 | /* TODO: alarm mask needs to be set to default value to prevent | ||
308 | * failures of legacy tests. This will be removed when sanity is | ||
309 | * updated | ||
310 | */ | ||
311 | if (alarm_mask) | ||
312 | nvgpu_atomic_set(&dev->enabled_mask, alarm_mask); | ||
313 | else | ||
314 | nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE)); | ||
315 | |||
316 | dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head); | ||
317 | |||
318 | nvgpu_spinlock_acquire(&arb->users_lock); | ||
319 | list_add_tail_rcu(&dev->link, &arb->users); | ||
320 | nvgpu_spinlock_release(&arb->users_lock); | ||
321 | |||
322 | *event_fd = fd; | ||
323 | |||
324 | return 0; | ||
325 | } | ||
326 | |||
327 | int nvgpu_clk_arb_install_request_fd(struct gk20a *g, | ||
328 | struct nvgpu_clk_session *session, int *request_fd) | ||
329 | { | ||
330 | struct nvgpu_clk_dev *dev; | ||
331 | int fd; | ||
332 | |||
333 | gk20a_dbg_fn(""); | ||
334 | |||
335 | fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); | ||
336 | if (fd < 0) | ||
337 | return fd; | ||
338 | |||
339 | *request_fd = fd; | ||
340 | |||
341 | return 0; | ||
342 | } | ||
343 | |||
344 | int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, | ||
345 | struct nvgpu_clk_session *session, int request_fd) | ||
346 | { | ||
347 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
348 | struct nvgpu_clk_dev *dev; | ||
349 | struct fd fd; | ||
350 | int err = 0; | ||
351 | |||
352 | gk20a_dbg_fn(""); | ||
353 | |||
354 | fd = fdget(request_fd); | ||
355 | if (!fd.file) | ||
356 | return -EINVAL; | ||
357 | |||
358 | if (fd.file->f_op != &completion_dev_ops) { | ||
359 | err = -EINVAL; | ||
360 | goto fdput_fd; | ||
361 | } | ||
362 | |||
363 | dev = (struct nvgpu_clk_dev *) fd.file->private_data; | ||
364 | |||
365 | if (!dev || dev->session != session) { | ||
366 | err = -EINVAL; | ||
367 | goto fdput_fd; | ||
368 | } | ||
369 | nvgpu_ref_get(&dev->refcount); | ||
370 | llist_add(&dev->node, &session->targets); | ||
371 | if (arb->update_work_queue) | ||
372 | queue_work(arb->update_work_queue, &arb->update_fn_work); | ||
373 | |||
374 | fdput_fd: | ||
375 | fdput(fd); | ||
376 | return err; | ||
377 | } | ||
378 | |||
379 | int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, | ||
380 | int request_fd, u32 api_domain, u16 target_mhz) | ||
381 | { | ||
382 | struct nvgpu_clk_dev *dev; | ||
383 | struct fd fd; | ||
384 | int err = 0; | ||
385 | |||
386 | gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz); | ||
387 | |||
388 | fd = fdget(request_fd); | ||
389 | if (!fd.file) | ||
390 | return -EINVAL; | ||
391 | |||
392 | if (fd.file->f_op != &completion_dev_ops) { | ||
393 | err = -EINVAL; | ||
394 | goto fdput_fd; | ||
395 | } | ||
396 | |||
397 | dev = fd.file->private_data; | ||
398 | if (!dev || dev->session != session) { | ||
399 | err = -EINVAL; | ||
400 | goto fdput_fd; | ||
401 | } | ||
402 | |||
403 | switch (api_domain) { | ||
404 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
405 | dev->mclk_target_mhz = target_mhz; | ||
406 | break; | ||
407 | |||
408 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
409 | dev->gpc2clk_target_mhz = target_mhz * 2ULL; | ||
410 | break; | ||
411 | |||
412 | default: | ||
413 | err = -EINVAL; | ||
414 | } | ||
415 | |||
416 | fdput_fd: | ||
417 | fdput(fd); | ||
418 | return err; | ||
419 | } | ||
420 | |||
421 | int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, | ||
422 | u32 api_domain, u16 *freq_mhz) | ||
423 | { | ||
424 | int err = 0; | ||
425 | struct nvgpu_clk_arb_target *target; | ||
426 | |||
427 | do { | ||
428 | target = NV_ACCESS_ONCE(session->target); | ||
429 | /* no reordering of this pointer */ | ||
430 | nvgpu_smp_rmb(); | ||
431 | |||
432 | switch (api_domain) { | ||
433 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
434 | *freq_mhz = target->mclk; | ||
435 | break; | ||
436 | |||
437 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
438 | *freq_mhz = target->gpc2clk / 2ULL; | ||
439 | break; | ||
440 | |||
441 | default: | ||
442 | *freq_mhz = 0; | ||
443 | err = -EINVAL; | ||
444 | } | ||
445 | } while (target != NV_ACCESS_ONCE(session->target)); | ||
446 | return err; | ||
447 | } | ||
448 | |||
449 | int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, | ||
450 | u32 api_domain, u16 *freq_mhz) | ||
451 | { | ||
452 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
453 | int err = 0; | ||
454 | struct nvgpu_clk_arb_target *actual; | ||
455 | |||
456 | do { | ||
457 | actual = NV_ACCESS_ONCE(arb->actual); | ||
458 | /* no reordering of this pointer */ | ||
459 | nvgpu_smp_rmb(); | ||
460 | |||
461 | switch (api_domain) { | ||
462 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
463 | *freq_mhz = actual->mclk; | ||
464 | break; | ||
465 | |||
466 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
467 | *freq_mhz = actual->gpc2clk / 2ULL; | ||
468 | break; | ||
469 | |||
470 | default: | ||
471 | *freq_mhz = 0; | ||
472 | err = -EINVAL; | ||
473 | } | ||
474 | } while (actual != NV_ACCESS_ONCE(arb->actual)); | ||
475 | return err; | ||
476 | } | ||
477 | |||
478 | int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, | ||
479 | u32 api_domain, u16 *freq_mhz) | ||
480 | { | ||
481 | switch (api_domain) { | ||
482 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
483 | *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) / | ||
484 | 1000000ULL; | ||
485 | return 0; | ||
486 | |||
487 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
488 | *freq_mhz = g->ops.clk.measure_freq(g, | ||
489 | CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL; | ||
490 | return 0; | ||
491 | |||
492 | default: | ||
493 | return -EINVAL; | ||
494 | } | ||
495 | } | ||
496 | |||
497 | int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, | ||
498 | u16 *min_mhz, u16 *max_mhz) | ||
499 | { | ||
500 | int ret; | ||
501 | |||
502 | switch (api_domain) { | ||
503 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
504 | ret = g->ops.clk_arb.get_arbiter_clk_range(g, | ||
505 | CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz); | ||
506 | return ret; | ||
507 | |||
508 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
509 | ret = g->ops.clk_arb.get_arbiter_clk_range(g, | ||
510 | CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz); | ||
511 | if (!ret) { | ||
512 | *min_mhz /= 2; | ||
513 | *max_mhz /= 2; | ||
514 | } | ||
515 | return ret; | ||
516 | |||
517 | default: | ||
518 | return -EINVAL; | ||
519 | } | ||
520 | } | ||
521 | |||
522 | u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) | ||
523 | { | ||
524 | u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); | ||
525 | u32 api_domains = 0; | ||
526 | |||
527 | if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) | ||
528 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK); | ||
529 | |||
530 | if (clk_domains & CTRL_CLK_DOMAIN_MCLK) | ||
531 | api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK); | ||
532 | |||
533 | return api_domains; | ||
534 | } | ||
535 | |||
536 | bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain) | ||
537 | { | ||
538 | u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); | ||
539 | |||
540 | switch (api_domain) { | ||
541 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
542 | return ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0); | ||
543 | |||
544 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
545 | return ((clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0); | ||
546 | |||
547 | default: | ||
548 | return false; | ||
549 | } | ||
550 | } | ||
551 | |||
552 | int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, | ||
553 | u32 api_domain, u32 *max_points, u16 *fpoints) | ||
554 | { | ||
555 | int err; | ||
556 | u32 i; | ||
557 | |||
558 | switch (api_domain) { | ||
559 | case NVGPU_GPU_CLK_DOMAIN_GPCCLK: | ||
560 | err = clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPC2CLK, | ||
561 | max_points, fpoints); | ||
562 | if (err || !fpoints) | ||
563 | return err; | ||
564 | for (i = 0; i < *max_points; i++) | ||
565 | fpoints[i] /= 2; | ||
566 | return 0; | ||
567 | case NVGPU_GPU_CLK_DOMAIN_MCLK: | ||
568 | return clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_MCLK, | ||
569 | max_points, fpoints); | ||
570 | default: | ||
571 | return -EINVAL; | ||
572 | } | ||
573 | } | ||
574 | |||
575 | #ifdef CONFIG_DEBUG_FS | ||
576 | static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) | ||
577 | { | ||
578 | struct gk20a *g = s->private; | ||
579 | struct nvgpu_clk_arb *arb = g->clk_arb; | ||
580 | struct nvgpu_clk_arb_debug *debug; | ||
581 | |||
582 | u64 num; | ||
583 | s64 tmp, avg, std, max, min; | ||
584 | |||
585 | debug = NV_ACCESS_ONCE(arb->debug); | ||
586 | /* Make copy of structure and ensure no reordering */ | ||
587 | nvgpu_smp_rmb(); | ||
588 | if (!debug) | ||
589 | return -EINVAL; | ||
590 | |||
591 | std = debug->switch_std; | ||
592 | avg = debug->switch_avg; | ||
593 | max = debug->switch_max; | ||
594 | min = debug->switch_min; | ||
595 | num = debug->switch_num; | ||
596 | |||
597 | tmp = std; | ||
598 | do_div(tmp, num); | ||
599 | seq_printf(s, "Number of transitions: %lld\n", | ||
600 | num); | ||
601 | seq_printf(s, "max / min : %lld / %lld usec\n", | ||
602 | max, min); | ||
603 | seq_printf(s, "avg / std : %lld / %ld usec\n", | ||
604 | avg, int_sqrt(tmp)); | ||
605 | |||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file) | ||
610 | { | ||
611 | return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private); | ||
612 | } | ||
613 | |||
614 | static const struct file_operations nvgpu_clk_arb_stats_fops = { | ||
615 | .open = nvgpu_clk_arb_stats_open, | ||
616 | .read = seq_read, | ||
617 | .llseek = seq_lseek, | ||
618 | .release = single_release, | ||
619 | }; | ||
620 | |||
621 | |||
622 | int nvgpu_clk_arb_debugfs_init(struct gk20a *g) | ||
623 | { | ||
624 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
625 | struct dentry *gpu_root = l->debugfs; | ||
626 | struct dentry *d; | ||
627 | |||
628 | gk20a_dbg(gpu_dbg_info, "g=%p", g); | ||
629 | |||
630 | d = debugfs_create_file( | ||
631 | "arb_stats", | ||
632 | S_IRUGO, | ||
633 | gpu_root, | ||
634 | g, | ||
635 | &nvgpu_clk_arb_stats_fops); | ||
636 | if (!d) | ||
637 | return -ENOMEM; | ||
638 | |||
639 | return 0; | ||
640 | } | ||
641 | #endif | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h index c13144ee..a2f8135e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h +++ b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. | 2 | * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. |
3 | * | 3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), | 5 | * copy of this software and associated documentation files (the "Software"), |
@@ -24,10 +24,95 @@ | |||
24 | #define __NVGPU_CLK_ARB_H__ | 24 | #define __NVGPU_CLK_ARB_H__ |
25 | 25 | ||
26 | #include <nvgpu/types.h> | 26 | #include <nvgpu/types.h> |
27 | 27 | #include <nvgpu/bitops.h> | |
28 | struct gk20a; | 28 | #include <nvgpu/lock.h> |
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/atomic.h> | ||
31 | #include <nvgpu/bug.h> | ||
32 | #include <nvgpu/kref.h> | ||
33 | #include <nvgpu/log.h> | ||
34 | #include <nvgpu/barrier.h> | ||
35 | #include <nvgpu/cond.h> | ||
36 | |||
37 | #include "gk20a/gk20a.h" | ||
38 | #include "clk/clk.h" | ||
39 | #include "pstate/pstate.h" | ||
40 | #include "lpwr/lpwr.h" | ||
41 | #include "volt/volt.h" | ||
42 | |||
43 | #define MAX_F_POINTS 256 | ||
44 | #define DEFAULT_EVENT_NUMBER 32 | ||
45 | |||
46 | struct nvgpu_clk_dev; | ||
47 | struct nvgpu_clk_arb_target; | ||
48 | struct nvgpu_clk_notification_queue; | ||
29 | struct nvgpu_clk_session; | 49 | struct nvgpu_clk_session; |
30 | 50 | ||
51 | #define VF_POINT_INVALID_PSTATE ~0U | ||
52 | #define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b))) | ||
53 | #define VF_POINT_GET_PSTATE(a) (((a)->pstates) ?\ | ||
54 | __fls((a)->pstates) :\ | ||
55 | VF_POINT_INVALID_PSTATE) | ||
56 | #define VF_POINT_COMMON_PSTATE(a, b) (((a)->pstates & (b)->pstates) ?\ | ||
57 | __fls((a)->pstates & (b)->pstates) :\ | ||
58 | VF_POINT_INVALID_PSTATE) | ||
59 | |||
60 | /* Local Alarms */ | ||
61 | #define EVENT(alarm) (0x1UL << NVGPU_GPU_EVENT_##alarm) | ||
62 | |||
63 | #define LOCAL_ALARM_MASK (EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE) | \ | ||
64 | EVENT(VF_UPDATE)) | ||
65 | |||
66 | #define _WRAPGTEQ(a, b) ((a-b) > 0) | ||
67 | |||
68 | struct nvgpu_clk_notification { | ||
69 | u32 notification; | ||
70 | u64 timestamp; | ||
71 | }; | ||
72 | |||
73 | struct nvgpu_clk_notification_queue { | ||
74 | u32 size; | ||
75 | nvgpu_atomic_t head; | ||
76 | nvgpu_atomic_t tail; | ||
77 | struct nvgpu_clk_notification *notifications; | ||
78 | }; | ||
79 | |||
80 | struct nvgpu_clk_vf_point { | ||
81 | u16 pstates; | ||
82 | union { | ||
83 | struct { | ||
84 | u16 gpc_mhz; | ||
85 | u16 sys_mhz; | ||
86 | u16 xbar_mhz; | ||
87 | }; | ||
88 | u16 mem_mhz; | ||
89 | }; | ||
90 | u32 uvolt; | ||
91 | u32 uvolt_sram; | ||
92 | }; | ||
93 | |||
94 | struct nvgpu_clk_vf_table { | ||
95 | u32 mclk_num_points; | ||
96 | struct nvgpu_clk_vf_point *mclk_points; | ||
97 | u32 gpc2clk_num_points; | ||
98 | struct nvgpu_clk_vf_point *gpc2clk_points; | ||
99 | }; | ||
100 | #ifdef CONFIG_DEBUG_FS | ||
101 | struct nvgpu_clk_arb_debug { | ||
102 | s64 switch_max; | ||
103 | s64 switch_min; | ||
104 | u64 switch_num; | ||
105 | s64 switch_avg; | ||
106 | s64 switch_std; | ||
107 | }; | ||
108 | #endif | ||
109 | |||
110 | struct nvgpu_clk_arb_target { | ||
111 | u16 mclk; | ||
112 | u16 gpc2clk; | ||
113 | u32 pstate; | ||
114 | }; | ||
115 | |||
31 | int nvgpu_clk_arb_init_arbiter(struct gk20a *g); | 116 | int nvgpu_clk_arb_init_arbiter(struct gk20a *g); |
32 | 117 | ||
33 | int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, | 118 | int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, |
@@ -80,5 +165,19 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock); | |||
80 | void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g); | 165 | void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g); |
81 | 166 | ||
82 | void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm); | 167 | void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm); |
168 | |||
169 | void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount); | ||
170 | |||
171 | void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount); | ||
172 | |||
173 | int nvgpu_clk_notification_queue_alloc(struct gk20a *g, | ||
174 | struct nvgpu_clk_notification_queue *queue, | ||
175 | size_t events_number); | ||
176 | |||
177 | void nvgpu_clk_notification_queue_free(struct gk20a *g, | ||
178 | struct nvgpu_clk_notification_queue *queue); | ||
179 | #ifdef CONFIG_DEBUG_FS | ||
180 | int nvgpu_clk_arb_debugfs_init(struct gk20a *g); | ||
181 | #endif | ||
83 | #endif /* __NVGPU_CLK_ARB_H__ */ | 182 | #endif /* __NVGPU_CLK_ARB_H__ */ |
84 | 183 | ||