summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux/clk_arb.c
diff options
context:
space:
mode:
authorSourab Gupta <sourabg@nvidia.com>2018-04-19 01:17:46 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-04-26 15:57:04 -0400
commit9fa77a1c05bf01f253b5118c283086fea2eca1de (patch)
tree96ca92c998afafb2f5b1796248f1e07848433ab0 /drivers/gpu/nvgpu/common/linux/clk_arb.c
parentd22d9d8caa06ae5ad2518c598f27869c06d0656d (diff)
gpu: nvgpu: split clk arb code
Clk arbiter code contains two significant portions - the one which interacts with userspace and is OS specific, and the other which does the heavylifting work which can be moved to the common OS agnostic code. Split the code into two files in prep towards refactoring the clk arbiter. Jira VQRM-3741 Change-Id: I47e2c5b18d86949d02d6963c69c2e2ad161626f7 Signed-off-by: Sourab Gupta <sourabg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1699240 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux/clk_arb.c')
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb.c1788
1 files changed, 501 insertions, 1287 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c
index 82c97891..7cb3752a 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This software is licensed under the terms of the GNU General Public 4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and 5 * License version 2, as published by the Free Software Foundation, and
@@ -39,224 +39,12 @@
39 39
40#include "gk20a/gk20a.h" 40#include "gk20a/gk20a.h"
41#include "clk/clk.h" 41#include "clk/clk.h"
42#include "clk_arb_linux.h"
42#include "pstate/pstate.h" 43#include "pstate/pstate.h"
43#include "lpwr/lpwr.h" 44#include "lpwr/lpwr.h"
44#include "volt/volt.h" 45#include "volt/volt.h"
45 46
46#ifdef CONFIG_DEBUG_FS 47int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
47#include "common/linux/os_linux.h"
48#endif
49
50#define MAX_F_POINTS 256
51#define DEFAULT_EVENT_NUMBER 32
52
53struct nvgpu_clk_dev;
54struct nvgpu_clk_arb_target;
55struct nvgpu_clk_notification_queue;
56
57#ifdef CONFIG_DEBUG_FS
58static int nvgpu_clk_arb_debugfs_init(struct gk20a *g);
59#endif
60
61static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
62 struct file *filp);
63static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
64 struct file *filp);
65static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
66static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
67 size_t size, loff_t *off);
68
69static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
70 unsigned long arg);
71
72static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
73static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
74static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
75static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount);
76static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount);
77static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
78 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
79 u32 voltuv_sram);
80static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
81 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
82 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram);
83static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
84 struct nvgpu_clk_arb_target *target,
85 u32 alarm_mask);
86static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm);
87static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm);
88
89static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
90 struct nvgpu_clk_notification_queue *queue,
91 u32 alarm_mask);
92static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
93 struct nvgpu_clk_notification_queue *queue,
94 size_t events_number);
95
96static void nvgpu_clk_notification_queue_free(struct gk20a *g,
97 struct nvgpu_clk_notification_queue *queue);
98
99#define VF_POINT_INVALID_PSTATE ~0U
100#define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
101#define VF_POINT_GET_PSTATE(a) (((a)->pstates) ?\
102 __fls((a)->pstates) :\
103 VF_POINT_INVALID_PSTATE)
104#define VF_POINT_COMMON_PSTATE(a, b) (((a)->pstates & (b)->pstates) ?\
105 __fls((a)->pstates & (b)->pstates) :\
106 VF_POINT_INVALID_PSTATE)
107
108/* Local Alarms */
109#define EVENT(alarm) (0x1UL << NVGPU_GPU_EVENT_##alarm)
110
111#define LOCAL_ALARM_MASK (EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE) | \
112 EVENT(VF_UPDATE))
113
114#define _WRAPGTEQ(a, b) ((a-b) > 0)
115
116struct nvgpu_clk_notification {
117 u32 notification;
118 u64 timestamp;
119};
120
121struct nvgpu_clk_notification_queue {
122 u32 size;
123 nvgpu_atomic_t head;
124 nvgpu_atomic_t tail;
125 struct nvgpu_clk_notification *notifications;
126};
127
128struct nvgpu_clk_vf_point {
129 u16 pstates;
130 union {
131 struct {
132 u16 gpc_mhz;
133 u16 sys_mhz;
134 u16 xbar_mhz;
135 };
136 u16 mem_mhz;
137 };
138 u32 uvolt;
139 u32 uvolt_sram;
140};
141
142struct nvgpu_clk_vf_table {
143 u32 mclk_num_points;
144 struct nvgpu_clk_vf_point *mclk_points;
145 u32 gpc2clk_num_points;
146 struct nvgpu_clk_vf_point *gpc2clk_points;
147};
148#ifdef CONFIG_DEBUG_FS
149struct nvgpu_clk_arb_debug {
150 s64 switch_max;
151 s64 switch_min;
152 u64 switch_num;
153 s64 switch_avg;
154 s64 switch_std;
155};
156#endif
157
158struct nvgpu_clk_arb_target {
159 u16 mclk;
160 u16 gpc2clk;
161 u32 pstate;
162};
163
164struct nvgpu_clk_arb {
165 struct nvgpu_spinlock sessions_lock;
166 struct nvgpu_spinlock users_lock;
167
168 struct nvgpu_mutex pstate_lock;
169 struct list_head users;
170 struct list_head sessions;
171 struct llist_head requests;
172
173 struct gk20a *g;
174 int status;
175
176 struct nvgpu_clk_arb_target actual_pool[2];
177 struct nvgpu_clk_arb_target *actual;
178
179 u16 gpc2clk_default_mhz;
180 u16 mclk_default_mhz;
181 u32 voltuv_actual;
182
183 u16 gpc2clk_min, gpc2clk_max;
184 u16 mclk_min, mclk_max;
185
186 struct work_struct update_fn_work;
187 struct workqueue_struct *update_work_queue;
188 struct work_struct vf_table_fn_work;
189 struct workqueue_struct *vf_table_work_queue;
190
191 struct nvgpu_cond request_wq;
192
193 struct nvgpu_clk_vf_table *current_vf_table;
194 struct nvgpu_clk_vf_table vf_table_pool[2];
195 u32 vf_table_index;
196
197 u16 *mclk_f_points;
198 nvgpu_atomic_t req_nr;
199
200 u32 mclk_f_numpoints;
201 u16 *gpc2clk_f_points;
202 u32 gpc2clk_f_numpoints;
203
204 nvgpu_atomic64_t alarm_mask;
205 struct nvgpu_clk_notification_queue notification_queue;
206
207#ifdef CONFIG_DEBUG_FS
208 struct nvgpu_clk_arb_debug debug_pool[2];
209 struct nvgpu_clk_arb_debug *debug;
210 bool debugfs_set;
211#endif
212};
213
214struct nvgpu_clk_dev {
215 struct nvgpu_clk_session *session;
216 union {
217 struct list_head link;
218 struct llist_node node;
219 };
220 struct nvgpu_cond readout_wq;
221 nvgpu_atomic_t poll_mask;
222 u16 gpc2clk_target_mhz;
223 u16 mclk_target_mhz;
224 u32 alarms_reported;
225 nvgpu_atomic_t enabled_mask;
226 struct nvgpu_clk_notification_queue queue;
227 u32 arb_queue_head;
228 struct nvgpu_ref refcount;
229};
230
231struct nvgpu_clk_session {
232 bool zombie;
233 struct gk20a *g;
234 struct nvgpu_ref refcount;
235 struct list_head link;
236 struct llist_head targets;
237
238 struct nvgpu_clk_arb_target target_pool[2];
239 struct nvgpu_clk_arb_target *target;
240};
241
242static const struct file_operations completion_dev_ops = {
243 .owner = THIS_MODULE,
244 .release = nvgpu_clk_arb_release_completion_dev,
245 .poll = nvgpu_clk_arb_poll_dev,
246};
247
248static const struct file_operations event_dev_ops = {
249 .owner = THIS_MODULE,
250 .release = nvgpu_clk_arb_release_event_dev,
251 .poll = nvgpu_clk_arb_poll_dev,
252 .read = nvgpu_clk_arb_read_event_dev,
253#ifdef CONFIG_COMPAT
254 .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
255#endif
256 .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
257};
258
259static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
260 struct nvgpu_clk_notification_queue *queue, 48 struct nvgpu_clk_notification_queue *queue,
261 size_t events_number) { 49 size_t events_number) {
262 queue->notifications = nvgpu_kcalloc(g, events_number, 50 queue->notifications = nvgpu_kcalloc(g, events_number,
@@ -271,7 +59,7 @@ static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
271 return 0; 59 return 0;
272} 60}
273 61
274static void nvgpu_clk_notification_queue_free(struct gk20a *g, 62void nvgpu_clk_notification_queue_free(struct gk20a *g,
275 struct nvgpu_clk_notification_queue *queue) { 63 struct nvgpu_clk_notification_queue *queue) {
276 nvgpu_kfree(g, queue->notifications); 64 nvgpu_kfree(g, queue->notifications);
277 queue->size = 0; 65 queue->size = 0;
@@ -279,185 +67,20 @@ static void nvgpu_clk_notification_queue_free(struct gk20a *g,
279 nvgpu_atomic_set(&queue->tail, 0); 67 nvgpu_atomic_set(&queue->tail, 0);
280} 68}
281 69
282int nvgpu_clk_arb_init_arbiter(struct gk20a *g) 70static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
283{ 71 struct nvgpu_clk_notification_queue *queue,
284 struct nvgpu_clk_arb *arb; 72 u32 alarm_mask) {
285 u16 default_mhz;
286 int err;
287 int index;
288 struct nvgpu_clk_vf_table *table;
289
290 gk20a_dbg_fn("");
291
292 if (!g->ops.clk_arb.get_arbiter_clk_domains)
293 return 0;
294
295 arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
296 if (!arb)
297 return -ENOMEM;
298
299 err = nvgpu_mutex_init(&arb->pstate_lock);
300 if (err)
301 goto mutex_fail;
302 nvgpu_spinlock_init(&arb->sessions_lock);
303 nvgpu_spinlock_init(&arb->users_lock);
304
305 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
306 if (!arb->mclk_f_points) {
307 err = -ENOMEM;
308 goto init_fail;
309 }
310
311 arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
312 if (!arb->gpc2clk_f_points) {
313 err = -ENOMEM;
314 goto init_fail;
315 }
316
317 for (index = 0; index < 2; index++) {
318 table = &arb->vf_table_pool[index];
319 table->gpc2clk_num_points = MAX_F_POINTS;
320 table->mclk_num_points = MAX_F_POINTS;
321
322 table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
323 sizeof(struct nvgpu_clk_vf_point));
324 if (!table->gpc2clk_points) {
325 err = -ENOMEM;
326 goto init_fail;
327 }
328
329
330 table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
331 sizeof(struct nvgpu_clk_vf_point));
332 if (!table->mclk_points) {
333 err = -ENOMEM;
334 goto init_fail;
335 }
336 }
337
338 g->clk_arb = arb;
339 arb->g = g;
340
341 err = g->ops.clk_arb.get_arbiter_clk_default(g,
342 CTRL_CLK_DOMAIN_MCLK, &default_mhz);
343 if (err < 0) {
344 err = -EINVAL;
345 goto init_fail;
346 }
347
348 arb->mclk_default_mhz = default_mhz;
349
350 err = g->ops.clk_arb.get_arbiter_clk_default(g,
351 CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
352 if (err < 0) {
353 err = -EINVAL;
354 goto init_fail;
355 }
356
357 arb->gpc2clk_default_mhz = default_mhz;
358
359 arb->actual = &arb->actual_pool[0];
360
361 nvgpu_atomic_set(&arb->req_nr, 0);
362
363 nvgpu_atomic64_set(&arb->alarm_mask, 0);
364 err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
365 DEFAULT_EVENT_NUMBER);
366 if (err < 0)
367 goto init_fail;
368
369 INIT_LIST_HEAD_RCU(&arb->users);
370 INIT_LIST_HEAD_RCU(&arb->sessions);
371 init_llist_head(&arb->requests);
372
373 nvgpu_cond_init(&arb->request_wq);
374 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
375 "vf_table_update");
376 arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
377 "arbiter_update");
378
379
380 INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
381
382 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
383
384#ifdef CONFIG_DEBUG_FS
385 arb->debug = &arb->debug_pool[0];
386
387 if (!arb->debugfs_set) {
388 if (nvgpu_clk_arb_debugfs_init(g))
389 arb->debugfs_set = true;
390 }
391#endif
392 err = clk_vf_point_cache(g);
393 if (err < 0)
394 goto init_fail;
395
396 err = nvgpu_clk_arb_update_vf_table(arb);
397 if (err < 0)
398 goto init_fail;
399 do {
400 /* Check that first run is completed */
401 nvgpu_smp_mb();
402 NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
403 nvgpu_atomic_read(&arb->req_nr), 0);
404 } while (!nvgpu_atomic_read(&arb->req_nr));
405
406
407 return arb->status;
408
409init_fail:
410 nvgpu_kfree(g, arb->gpc2clk_f_points);
411 nvgpu_kfree(g, arb->mclk_f_points);
412
413 for (index = 0; index < 2; index++) {
414 nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
415 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
416 }
417
418 nvgpu_mutex_destroy(&arb->pstate_lock);
419
420mutex_fail:
421 nvgpu_kfree(g, arb);
422
423 return err;
424}
425
426void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
427{
428 nvgpu_clk_arb_schedule_alarm(g,
429 (0x1UL << NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
430}
431
432void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
433{
434 struct nvgpu_clk_arb *arb = g->clk_arb;
435
436 nvgpu_clk_arb_set_global_alarm(g, alarm);
437 if (arb->update_work_queue)
438 queue_work(arb->update_work_queue, &arb->update_fn_work);
439}
440
441static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
442{
443 struct nvgpu_clk_arb *arb = g->clk_arb;
444 73
445 u64 current_mask; 74 u32 queue_index;
446 u32 refcnt; 75 u64 timestamp;
447 u32 alarm_mask;
448 u64 new_mask;
449 76
450 do { 77 queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
451 current_mask = nvgpu_atomic64_read(&arb->alarm_mask); 78 /* get current timestamp */
452 /* atomic operations are strong so they do not need masks */ 79 timestamp = (u64) sched_clock();
453 80
454 refcnt = ((u32) (current_mask >> 32)) + 1; 81 queue->notifications[queue_index].timestamp = timestamp;
455 alarm_mask = (u32) (current_mask & ~alarm); 82 queue->notifications[queue_index].notification = alarm_mask;
456 new_mask = ((u64) refcnt << 32) | alarm_mask;
457 83
458 } while (unlikely(current_mask !=
459 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
460 current_mask, new_mask)));
461} 84}
462 85
463static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) 86static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
@@ -482,231 +105,8 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
482 current_mask, new_mask))); 105 current_mask, new_mask)));
483 106
484 nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm); 107 nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);
485
486}
487
488void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
489{
490 struct nvgpu_clk_arb *arb = g->clk_arb;
491 int index;
492
493 if (arb) {
494 cancel_work_sync(&arb->vf_table_fn_work);
495 destroy_workqueue(arb->vf_table_work_queue);
496 arb->vf_table_work_queue = NULL;
497
498 cancel_work_sync(&arb->update_fn_work);
499 destroy_workqueue(arb->update_work_queue);
500 arb->update_work_queue = NULL;
501
502 nvgpu_kfree(g, arb->gpc2clk_f_points);
503 nvgpu_kfree(g, arb->mclk_f_points);
504
505 for (index = 0; index < 2; index++) {
506 nvgpu_kfree(g,
507 arb->vf_table_pool[index].gpc2clk_points);
508 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
509 }
510 nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
511 nvgpu_kfree(g, g->clk_arb);
512 g->clk_arb = NULL;
513 }
514}
515
516static int nvgpu_clk_arb_install_fd(struct gk20a *g,
517 struct nvgpu_clk_session *session,
518 const struct file_operations *fops,
519 struct nvgpu_clk_dev **_dev)
520{
521 struct file *file;
522 int fd;
523 int err;
524 int status;
525 char name[64];
526 struct nvgpu_clk_dev *dev;
527
528 gk20a_dbg_fn("");
529
530 dev = nvgpu_kzalloc(g, sizeof(*dev));
531 if (!dev)
532 return -ENOMEM;
533
534 status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
535 DEFAULT_EVENT_NUMBER);
536 if (status < 0) {
537 err = status;
538 goto fail;
539 }
540
541 fd = get_unused_fd_flags(O_RDWR);
542 if (fd < 0) {
543 err = fd;
544 goto fail;
545 }
546
547 snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
548 file = anon_inode_getfile(name, fops, dev, O_RDWR);
549 if (IS_ERR(file)) {
550 err = PTR_ERR(file);
551 goto fail_fd;
552 }
553
554 fd_install(fd, file);
555
556 nvgpu_cond_init(&dev->readout_wq);
557
558 nvgpu_atomic_set(&dev->poll_mask, 0);
559
560 dev->session = session;
561 nvgpu_ref_init(&dev->refcount);
562
563 nvgpu_ref_get(&session->refcount);
564
565 *_dev = dev;
566
567 return fd;
568
569fail_fd:
570 put_unused_fd(fd);
571fail:
572 nvgpu_kfree(g, dev);
573
574 return err;
575} 108}
576 109
577int nvgpu_clk_arb_init_session(struct gk20a *g,
578 struct nvgpu_clk_session **_session)
579{
580 struct nvgpu_clk_arb *arb = g->clk_arb;
581 struct nvgpu_clk_session *session = *(_session);
582
583 gk20a_dbg_fn("");
584
585 if (!g->ops.clk_arb.get_arbiter_clk_domains)
586 return 0;
587
588 session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
589 if (!session)
590 return -ENOMEM;
591 session->g = g;
592
593 nvgpu_ref_init(&session->refcount);
594
595 session->zombie = false;
596 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
597 /* make sure that the initialization of the pool is visible
598 * before the update
599 */
600 nvgpu_smp_wmb();
601 session->target = &session->target_pool[0];
602
603 init_llist_head(&session->targets);
604
605 nvgpu_spinlock_acquire(&arb->sessions_lock);
606 list_add_tail_rcu(&session->link, &arb->sessions);
607 nvgpu_spinlock_release(&arb->sessions_lock);
608
609 *_session = session;
610
611 return 0;
612}
613
614static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
615{
616 struct nvgpu_clk_dev *dev = container_of(refcount,
617 struct nvgpu_clk_dev, refcount);
618 struct nvgpu_clk_session *session = dev->session;
619
620 nvgpu_kfree(session->g, dev);
621}
622
623static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
624{
625 struct nvgpu_clk_session *session = container_of(refcount,
626 struct nvgpu_clk_session, refcount);
627 struct nvgpu_clk_arb *arb = session->g->clk_arb;
628 struct gk20a *g = session->g;
629 struct nvgpu_clk_dev *dev, *tmp;
630 struct llist_node *head;
631
632 gk20a_dbg_fn("");
633
634 if (arb) {
635 nvgpu_spinlock_acquire(&arb->sessions_lock);
636 list_del_rcu(&session->link);
637 nvgpu_spinlock_release(&arb->sessions_lock);
638 }
639
640 head = llist_del_all(&session->targets);
641 llist_for_each_entry_safe(dev, tmp, head, node) {
642 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
643 }
644 synchronize_rcu();
645 nvgpu_kfree(g, session);
646}
647
648void nvgpu_clk_arb_release_session(struct gk20a *g,
649 struct nvgpu_clk_session *session)
650{
651 struct nvgpu_clk_arb *arb = g->clk_arb;
652
653 gk20a_dbg_fn("");
654
655 session->zombie = true;
656 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
657 if (arb && arb->update_work_queue)
658 queue_work(arb->update_work_queue, &arb->update_fn_work);
659}
660
661int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
662 struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
663{
664 struct nvgpu_clk_arb *arb = g->clk_arb;
665 struct nvgpu_clk_dev *dev;
666 int fd;
667
668 gk20a_dbg_fn("");
669
670 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
671 if (fd < 0)
672 return fd;
673
674 /* TODO: alarm mask needs to be set to default value to prevent
675 * failures of legacy tests. This will be removed when sanity is
676 * updated
677 */
678 if (alarm_mask)
679 nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
680 else
681 nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
682
683 dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
684
685 nvgpu_spinlock_acquire(&arb->users_lock);
686 list_add_tail_rcu(&dev->link, &arb->users);
687 nvgpu_spinlock_release(&arb->users_lock);
688
689 *event_fd = fd;
690
691 return 0;
692}
693
694int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
695 struct nvgpu_clk_session *session, int *request_fd)
696{
697 struct nvgpu_clk_dev *dev;
698 int fd;
699
700 gk20a_dbg_fn("");
701
702 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
703 if (fd < 0)
704 return fd;
705
706 *request_fd = fd;
707
708 return 0;
709}
710 110
711static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) 111static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
712{ 112{
@@ -1014,13 +414,6 @@ exit_vf_table:
1014 return status; 414 return status;
1015} 415}
1016 416
1017void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1018{
1019 struct nvgpu_clk_arb *arb = g->clk_arb;
1020
1021 if (arb->vf_table_work_queue)
1022 queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
1023}
1024 417
1025static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work) 418static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
1026{ 419{
@@ -1044,6 +437,305 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
1044 nvgpu_clk_arb_update_vf_table(arb); 437 nvgpu_clk_arb_update_vf_table(arb);
1045} 438}
1046 439
440static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
441 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
442 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
443{
444 u16 gpc2clk_target, mclk_target;
445 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
446 u32 mclk_voltuv, mclk_voltuv_sram;
447 u32 pstate = VF_POINT_INVALID_PSTATE;
448 struct nvgpu_clk_vf_table *table;
449 u32 index, index_mclk;
450 struct nvgpu_clk_vf_point *mclk_vf = NULL;
451
452 do {
453 gpc2clk_target = *gpc2clk;
454 mclk_target = *mclk;
455 gpc2clk_voltuv = 0;
456 gpc2clk_voltuv_sram = 0;
457 mclk_voltuv = 0;
458 mclk_voltuv_sram = 0;
459
460 table = NV_ACCESS_ONCE(arb->current_vf_table);
461 /* pointer to table can be updated by callback */
462 nvgpu_smp_rmb();
463
464 if (!table)
465 continue;
466 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
467 nvgpu_err(arb->g, "found empty table");
468 goto find_exit;
469 }
470 /* First we check MCLK to find out which PSTATE we are
471 * are requesting, and from there try to find the minimum
472 * GPC2CLK on the same PSTATE that satisfies the request.
473 * If no GPC2CLK can be found, then we need to up the PSTATE
474 */
475
476recalculate_vf_point:
477 for (index = 0; index < table->mclk_num_points; index++) {
478 if (table->mclk_points[index].mem_mhz >= mclk_target) {
479 mclk_vf = &table->mclk_points[index];
480 break;
481 }
482 }
483 if (index == table->mclk_num_points) {
484 mclk_vf = &table->mclk_points[index-1];
485 index = table->mclk_num_points - 1;
486 }
487 index_mclk = index;
488
489 /* round up the freq requests */
490 for (index = 0; index < table->gpc2clk_num_points; index++) {
491 pstate = VF_POINT_COMMON_PSTATE(
492 &table->gpc2clk_points[index], mclk_vf);
493
494 if ((table->gpc2clk_points[index].gpc_mhz >=
495 gpc2clk_target) &&
496 (pstate != VF_POINT_INVALID_PSTATE)) {
497 gpc2clk_target =
498 table->gpc2clk_points[index].gpc_mhz;
499 *sys2clk =
500 table->gpc2clk_points[index].sys_mhz;
501 *xbar2clk =
502 table->gpc2clk_points[index].xbar_mhz;
503
504 gpc2clk_voltuv =
505 table->gpc2clk_points[index].uvolt;
506 gpc2clk_voltuv_sram =
507 table->gpc2clk_points[index].uvolt_sram;
508 break;
509 }
510 }
511
512 if (index == table->gpc2clk_num_points) {
513 pstate = VF_POINT_COMMON_PSTATE(
514 &table->gpc2clk_points[index-1], mclk_vf);
515 if (pstate != VF_POINT_INVALID_PSTATE) {
516 gpc2clk_target =
517 table->gpc2clk_points[index-1].gpc_mhz;
518 *sys2clk =
519 table->gpc2clk_points[index-1].sys_mhz;
520 *xbar2clk =
521 table->gpc2clk_points[index-1].xbar_mhz;
522
523 gpc2clk_voltuv =
524 table->gpc2clk_points[index-1].uvolt;
525 gpc2clk_voltuv_sram =
526 table->gpc2clk_points[index-1].
527 uvolt_sram;
528 } else if (index_mclk >= table->mclk_num_points - 1) {
529 /* There is no available combination of MCLK
530 * and GPC2CLK, we need to fail this
531 */
532 gpc2clk_target = 0;
533 mclk_target = 0;
534 pstate = VF_POINT_INVALID_PSTATE;
535 goto find_exit;
536 } else {
537 /* recalculate with higher PSTATE */
538 gpc2clk_target = *gpc2clk;
539 mclk_target = table->mclk_points[index_mclk+1].
540 mem_mhz;
541 goto recalculate_vf_point;
542 }
543 }
544
545 mclk_target = mclk_vf->mem_mhz;
546 mclk_voltuv = mclk_vf->uvolt;
547 mclk_voltuv_sram = mclk_vf->uvolt_sram;
548
549 } while (!table ||
550 (NV_ACCESS_ONCE(arb->current_vf_table) != table));
551
552find_exit:
553 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
554 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
555 gpc2clk_voltuv_sram : mclk_voltuv_sram;
556 /* noise unaware vmin */
557 *nuvmin = mclk_voltuv;
558 *nuvmin_sram = mclk_voltuv_sram;
559 *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
560 *mclk = mclk_target;
561 return pstate;
562}
563
564static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
565 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
566 u32 voltuv_sram)
567{
568 struct set_fll_clk fllclk;
569 struct nvgpu_clk_arb *arb = g->clk_arb;
570 int status;
571
572 fllclk.gpc2clkmhz = gpc2clk_target;
573 fllclk.sys2clkmhz = sys2clk_target;
574 fllclk.xbar2clkmhz = xbar2clk_target;
575
576 fllclk.voltuv = voltuv;
577
578 /* if voltage ascends we do:
579 * (1) FLL change
580 * (2) Voltage change
581 * (3) MCLK change
582 * If it goes down
583 * (1) MCLK change
584 * (2) Voltage change
585 * (3) FLL change
586 */
587
588 /* descending */
589 if (voltuv < arb->voltuv_actual) {
590 status = g->ops.clk.mclk_change(g, mclk_target);
591 if (status < 0)
592 return status;
593
594 status = volt_set_voltage(g, voltuv, voltuv_sram);
595 if (status < 0)
596 return status;
597
598 status = clk_set_fll_clks(g, &fllclk);
599 if (status < 0)
600 return status;
601 } else {
602 status = clk_set_fll_clks(g, &fllclk);
603 if (status < 0)
604 return status;
605
606 status = volt_set_voltage(g, voltuv, voltuv_sram);
607 if (status < 0)
608 return status;
609
610 status = g->ops.clk.mclk_change(g, mclk_target);
611 if (status < 0)
612 return status;
613 }
614
615 return 0;
616}
617
618static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
619 struct nvgpu_clk_arb_target *target,
620 u32 alarm) {
621
622 struct nvgpu_clk_session *session = dev->session;
623 struct nvgpu_clk_arb *arb = session->g->clk_arb;
624 struct nvgpu_clk_notification *notification;
625
626 u32 queue_alarm_mask = 0;
627 u32 enabled_mask = 0;
628 u32 new_alarms_reported = 0;
629 u32 poll_mask = 0;
630 u32 tail, head;
631 u32 queue_index;
632 size_t size;
633 int index;
634
635 enabled_mask = nvgpu_atomic_read(&dev->enabled_mask);
636 size = arb->notification_queue.size;
637
638 /* queue global arbiter notifications in buffer */
639 do {
640 tail = nvgpu_atomic_read(&arb->notification_queue.tail);
641 /* copy items to the queue */
642 queue_index = nvgpu_atomic_read(&dev->queue.tail);
643 head = dev->arb_queue_head;
644 head = (tail - head) < arb->notification_queue.size ?
645 head : tail - arb->notification_queue.size;
646
647 for (index = head; _WRAPGTEQ(tail, index); index++) {
648 u32 alarm_detected;
649
650 notification = &arb->notification_queue.
651 notifications[(index+1) % size];
652 alarm_detected =
653 NV_ACCESS_ONCE(notification->notification);
654
655 if (!(enabled_mask & alarm_detected))
656 continue;
657
658 queue_index++;
659 dev->queue.notifications[
660 queue_index % dev->queue.size].timestamp =
661 NV_ACCESS_ONCE(notification->timestamp);
662
663 dev->queue.notifications[
664 queue_index % dev->queue.size].notification =
665 alarm_detected;
666
667 queue_alarm_mask |= alarm_detected;
668 }
669 } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
670 (int)tail));
671
672 nvgpu_atomic_set(&dev->queue.tail, queue_index);
673 /* update the last notification we processed from global queue */
674
675 dev->arb_queue_head = tail;
676
677 /* Check if current session targets are met */
678 if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
679 if ((target->gpc2clk < session->target->gpc2clk)
680 || (target->mclk < session->target->mclk)) {
681
682 poll_mask |= (POLLIN | POLLPRI);
683 nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
684 EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
685 }
686 }
687
688 /* Check if there is a new VF update */
689 if (queue_alarm_mask & EVENT(VF_UPDATE))
690 poll_mask |= (POLLIN | POLLRDNORM);
691
692 /* Notify sticky alarms that were not reported on previous run*/
693 new_alarms_reported = (queue_alarm_mask |
694 (alarm & ~dev->alarms_reported & queue_alarm_mask));
695
696 if (new_alarms_reported & ~LOCAL_ALARM_MASK) {
697 /* check that we are not re-reporting */
698 if (new_alarms_reported & EVENT(ALARM_GPU_LOST))
699 poll_mask |= POLLHUP;
700
701 poll_mask |= (POLLIN | POLLPRI);
702 /* On next run do not report global alarms that were already
703 * reported, but report SHUTDOWN always
704 */
705 dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
706 ~EVENT(ALARM_GPU_LOST);
707 }
708
709 if (poll_mask) {
710 nvgpu_atomic_set(&dev->poll_mask, poll_mask);
711 nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
712 }
713
714 return new_alarms_reported;
715}
716
717static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
718{
719 struct nvgpu_clk_arb *arb = g->clk_arb;
720
721 u64 current_mask;
722 u32 refcnt;
723 u32 alarm_mask;
724 u64 new_mask;
725
726 do {
727 current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
728 /* atomic operations are strong so they do not need masks */
729
730 refcnt = ((u32) (current_mask >> 32)) + 1;
731 alarm_mask = (u32) (current_mask & ~alarm);
732 new_mask = ((u64) refcnt << 32) | alarm_mask;
733
734 } while (unlikely(current_mask !=
735 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
736 current_mask, new_mask)));
737}
738
1047static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) 739static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1048{ 740{
1049 struct nvgpu_clk_arb *arb = 741 struct nvgpu_clk_arb *arb =
@@ -1345,639 +1037,283 @@ exit_arb:
1345 ~EVENT(ALARM_GPU_LOST)); 1037 ~EVENT(ALARM_GPU_LOST));
1346} 1038}
1347 1039
1348static void nvgpu_clk_arb_queue_notification(struct gk20a *g, 1040int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1349 struct nvgpu_clk_notification_queue *queue, 1041{
1350 u32 alarm_mask) { 1042 struct nvgpu_clk_arb *arb;
1351 1043 u16 default_mhz;
1352 u32 queue_index; 1044 int err;
1353 u64 timestamp;
1354
1355 queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
1356 /* get current timestamp */
1357 timestamp = (u64) sched_clock();
1358
1359 queue->notifications[queue_index].timestamp = timestamp;
1360 queue->notifications[queue_index].notification = alarm_mask;
1361
1362}
1363
1364static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
1365 struct nvgpu_clk_arb_target *target,
1366 u32 alarm) {
1367
1368 struct nvgpu_clk_session *session = dev->session;
1369 struct nvgpu_clk_arb *arb = session->g->clk_arb;
1370 struct nvgpu_clk_notification *notification;
1371
1372 u32 queue_alarm_mask = 0;
1373 u32 enabled_mask = 0;
1374 u32 new_alarms_reported = 0;
1375 u32 poll_mask = 0;
1376 u32 tail, head;
1377 u32 queue_index;
1378 size_t size;
1379 int index; 1045 int index;
1046 struct nvgpu_clk_vf_table *table;
1380 1047
1381 enabled_mask = nvgpu_atomic_read(&dev->enabled_mask); 1048 gk20a_dbg_fn("");
1382 size = arb->notification_queue.size;
1383
1384 /* queue global arbiter notifications in buffer */
1385 do {
1386 tail = nvgpu_atomic_read(&arb->notification_queue.tail);
1387 /* copy items to the queue */
1388 queue_index = nvgpu_atomic_read(&dev->queue.tail);
1389 head = dev->arb_queue_head;
1390 head = (tail - head) < arb->notification_queue.size ?
1391 head : tail - arb->notification_queue.size;
1392
1393 for (index = head; _WRAPGTEQ(tail, index); index++) {
1394 u32 alarm_detected;
1395
1396 notification = &arb->notification_queue.
1397 notifications[(index+1) % size];
1398 alarm_detected =
1399 NV_ACCESS_ONCE(notification->notification);
1400
1401 if (!(enabled_mask & alarm_detected))
1402 continue;
1403
1404 queue_index++;
1405 dev->queue.notifications[
1406 queue_index % dev->queue.size].timestamp =
1407 NV_ACCESS_ONCE(notification->timestamp);
1408
1409 dev->queue.notifications[
1410 queue_index % dev->queue.size].notification =
1411 alarm_detected;
1412
1413 queue_alarm_mask |= alarm_detected;
1414 }
1415 } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
1416 (int)tail));
1417
1418 nvgpu_atomic_set(&dev->queue.tail, queue_index);
1419 /* update the last notification we processed from global queue */
1420
1421 dev->arb_queue_head = tail;
1422
1423 /* Check if current session targets are met */
1424 if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
1425 if ((target->gpc2clk < session->target->gpc2clk)
1426 || (target->mclk < session->target->mclk)) {
1427
1428 poll_mask |= (POLLIN | POLLPRI);
1429 nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
1430 EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
1431 }
1432 }
1433 1049
1434 /* Check if there is a new VF update */ 1050 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1435 if (queue_alarm_mask & EVENT(VF_UPDATE)) 1051 return 0;
1436 poll_mask |= (POLLIN | POLLRDNORM);
1437 1052
1438 /* Notify sticky alarms that were not reported on previous run*/ 1053 arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
1439 new_alarms_reported = (queue_alarm_mask | 1054 if (!arb)
1440 (alarm & ~dev->alarms_reported & queue_alarm_mask)); 1055 return -ENOMEM;
1441 1056
1442 if (new_alarms_reported & ~LOCAL_ALARM_MASK) { 1057 err = nvgpu_mutex_init(&arb->pstate_lock);
1443 /* check that we are not re-reporting */ 1058 if (err)
1444 if (new_alarms_reported & EVENT(ALARM_GPU_LOST)) 1059 goto mutex_fail;
1445 poll_mask |= POLLHUP; 1060 nvgpu_spinlock_init(&arb->sessions_lock);
1061 nvgpu_spinlock_init(&arb->users_lock);
1446 1062
1447 poll_mask |= (POLLIN | POLLPRI); 1063 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1448 /* On next run do not report global alarms that were already 1064 if (!arb->mclk_f_points) {
1449 * reported, but report SHUTDOWN always 1065 err = -ENOMEM;
1450 */ 1066 goto init_fail;
1451 dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
1452 ~EVENT(ALARM_GPU_LOST);
1453 } 1067 }
1454 1068
1455 if (poll_mask) { 1069 arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1456 nvgpu_atomic_set(&dev->poll_mask, poll_mask); 1070 if (!arb->gpc2clk_f_points) {
1457 nvgpu_cond_broadcast_interruptible(&dev->readout_wq); 1071 err = -ENOMEM;
1072 goto init_fail;
1458 } 1073 }
1459 1074
1460 return new_alarms_reported; 1075 for (index = 0; index < 2; index++) {
1461} 1076 table = &arb->vf_table_pool[index];
1462 1077 table->gpc2clk_num_points = MAX_F_POINTS;
1463static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, 1078 table->mclk_num_points = MAX_F_POINTS;
1464 struct nvgpu_gpu_set_event_filter_args *args)
1465{
1466 u32 mask;
1467
1468 gk20a_dbg(gpu_dbg_fn, "");
1469
1470 if (args->flags)
1471 return -EINVAL;
1472
1473 if (args->size != 1)
1474 return -EINVAL;
1475
1476 if (copy_from_user(&mask, (void __user *) args->buffer,
1477 args->size * sizeof(u32)))
1478 return -EFAULT;
1479
1480 /* update alarm mask */
1481 nvgpu_atomic_set(&dev->enabled_mask, mask);
1482
1483 return 0;
1484}
1485
1486static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
1487 unsigned long arg)
1488{
1489 struct nvgpu_clk_dev *dev = filp->private_data;
1490 struct gk20a *g = dev->session->g;
1491 u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
1492 int err = 0;
1493
1494 gk20a_dbg(gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
1495
1496 if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
1497 || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
1498 return -EINVAL;
1499 1079
1500 BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); 1080 table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1081 sizeof(struct nvgpu_clk_vf_point));
1082 if (!table->gpc2clk_points) {
1083 err = -ENOMEM;
1084 goto init_fail;
1085 }
1501 1086
1502 memset(buf, 0, sizeof(buf));
1503 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1504 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
1505 return -EFAULT;
1506 }
1507 1087
1508 switch (cmd) { 1088 table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1509 case NVGPU_EVENT_IOCTL_SET_FILTER: 1089 sizeof(struct nvgpu_clk_vf_point));
1510 err = nvgpu_clk_arb_set_event_filter(dev, 1090 if (!table->mclk_points) {
1511 (struct nvgpu_gpu_set_event_filter_args *)buf); 1091 err = -ENOMEM;
1512 break; 1092 goto init_fail;
1513 default: 1093 }
1514 nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
1515 err = -ENOTTY;
1516 } 1094 }
1517 1095
1518 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) 1096 g->clk_arb = arb;
1519 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); 1097 arb->g = g;
1520
1521 return err;
1522}
1523
1524int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
1525 struct nvgpu_clk_session *session, int request_fd)
1526{
1527 struct nvgpu_clk_arb *arb = g->clk_arb;
1528 struct nvgpu_clk_dev *dev;
1529 struct fd fd;
1530 int err = 0;
1531
1532 gk20a_dbg_fn("");
1533
1534 fd = fdget(request_fd);
1535 if (!fd.file)
1536 return -EINVAL;
1537 1098
1538 if (fd.file->f_op != &completion_dev_ops) { 1099 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1100 CTRL_CLK_DOMAIN_MCLK, &default_mhz);
1101 if (err < 0) {
1539 err = -EINVAL; 1102 err = -EINVAL;
1540 goto fdput_fd; 1103 goto init_fail;
1541 } 1104 }
1542 1105
1543 dev = (struct nvgpu_clk_dev *) fd.file->private_data; 1106 arb->mclk_default_mhz = default_mhz;
1544 1107
1545 if (!dev || dev->session != session) { 1108 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1109 CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
1110 if (err < 0) {
1546 err = -EINVAL; 1111 err = -EINVAL;
1547 goto fdput_fd; 1112 goto init_fail;
1548 } 1113 }
1549 nvgpu_ref_get(&dev->refcount);
1550 llist_add(&dev->node, &session->targets);
1551 if (arb->update_work_queue)
1552 queue_work(arb->update_work_queue, &arb->update_fn_work);
1553 1114
1554fdput_fd: 1115 arb->gpc2clk_default_mhz = default_mhz;
1555 fdput(fd);
1556 return err;
1557}
1558 1116
1559static inline u32 __pending_event(struct nvgpu_clk_dev *dev, 1117 arb->actual = &arb->actual_pool[0];
1560 struct nvgpu_gpu_event_info *info) {
1561 1118
1562 u32 tail, head; 1119 nvgpu_atomic_set(&arb->req_nr, 0);
1563 u32 events = 0;
1564 struct nvgpu_clk_notification *p_notif;
1565 1120
1566 tail = nvgpu_atomic_read(&dev->queue.tail); 1121 nvgpu_atomic64_set(&arb->alarm_mask, 0);
1567 head = nvgpu_atomic_read(&dev->queue.head); 1122 err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
1123 DEFAULT_EVENT_NUMBER);
1124 if (err < 0)
1125 goto init_fail;
1568 1126
1569 head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; 1127 INIT_LIST_HEAD_RCU(&arb->users);
1128 INIT_LIST_HEAD_RCU(&arb->sessions);
1129 init_llist_head(&arb->requests);
1570 1130
1571 if (_WRAPGTEQ(tail, head) && info) { 1131 nvgpu_cond_init(&arb->request_wq);
1572 head++; 1132 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1573 p_notif = &dev->queue.notifications[head % dev->queue.size]; 1133 "vf_table_update");
1574 events |= p_notif->notification; 1134 arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1575 info->event_id = ffs(events) - 1; 1135 "arbiter_update");
1576 info->timestamp = p_notif->timestamp;
1577 nvgpu_atomic_set(&dev->queue.head, head);
1578 }
1579 1136
1580 return events;
1581}
1582 1137
1583static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, 1138 INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
1584 size_t size, loff_t *off)
1585{
1586 struct nvgpu_clk_dev *dev = filp->private_data;
1587 struct nvgpu_gpu_event_info info;
1588 ssize_t err;
1589 1139
1590 gk20a_dbg_fn("filp=%p, buf=%p, size=%zu", filp, buf, size); 1140 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
1591 1141
1592 if ((size - *off) < sizeof(info)) 1142#ifdef CONFIG_DEBUG_FS
1593 return 0; 1143 arb->debug = &arb->debug_pool[0];
1594 1144
1595 memset(&info, 0, sizeof(info)); 1145 if (!arb->debugfs_set) {
1596 /* Get the oldest event from the queue */ 1146 if (nvgpu_clk_arb_debugfs_init(g))
1597 while (!__pending_event(dev, &info)) { 1147 arb->debugfs_set = true;
1598 if (filp->f_flags & O_NONBLOCK)
1599 return -EAGAIN;
1600 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
1601 __pending_event(dev, &info), 0);
1602 if (err)
1603 return err;
1604 if (info.timestamp)
1605 break;
1606 } 1148 }
1149#endif
1150 err = clk_vf_point_cache(g);
1151 if (err < 0)
1152 goto init_fail;
1607 1153
1608 if (copy_to_user(buf + *off, &info, sizeof(info))) 1154 err = nvgpu_clk_arb_update_vf_table(arb);
1609 return -EFAULT; 1155 if (err < 0)
1610 1156 goto init_fail;
1611 return sizeof(info); 1157 do {
1612} 1158 /* Check that first run is completed */
1159 nvgpu_smp_mb();
1160 NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
1161 nvgpu_atomic_read(&arb->req_nr), 0);
1162 } while (!nvgpu_atomic_read(&arb->req_nr));
1613 1163
1614static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
1615{
1616 struct nvgpu_clk_dev *dev = filp->private_data;
1617 1164
1618 gk20a_dbg_fn(""); 1165 return arb->status;
1619 1166
1620 poll_wait(filp, &dev->readout_wq.wq, wait); 1167init_fail:
1621 return nvgpu_atomic_xchg(&dev->poll_mask, 0); 1168 nvgpu_kfree(g, arb->gpc2clk_f_points);
1622} 1169 nvgpu_kfree(g, arb->mclk_f_points);
1623 1170
1624static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, 1171 for (index = 0; index < 2; index++) {
1625 struct file *filp) 1172 nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
1626{ 1173 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1627 struct nvgpu_clk_dev *dev = filp->private_data; 1174 }
1628 struct nvgpu_clk_session *session = dev->session;
1629 1175
1176 nvgpu_mutex_destroy(&arb->pstate_lock);
1630 1177
1631 gk20a_dbg_fn(""); 1178mutex_fail:
1179 nvgpu_kfree(g, arb);
1632 1180
1633 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); 1181 return err;
1634 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1635 return 0;
1636} 1182}
1637 1183
1638static int nvgpu_clk_arb_release_event_dev(struct inode *inode, 1184void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
1639 struct file *filp)
1640{ 1185{
1641 struct nvgpu_clk_dev *dev = filp->private_data; 1186 nvgpu_clk_arb_schedule_alarm(g,
1642 struct nvgpu_clk_session *session = dev->session; 1187 (0x1UL << NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
1643 struct nvgpu_clk_arb *arb;
1644
1645 arb = session->g->clk_arb;
1646
1647 gk20a_dbg_fn("");
1648
1649 if (arb) {
1650 nvgpu_spinlock_acquire(&arb->users_lock);
1651 list_del_rcu(&dev->link);
1652 nvgpu_spinlock_release(&arb->users_lock);
1653 nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
1654 }
1655
1656 synchronize_rcu();
1657 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1658 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1659
1660 return 0;
1661} 1188}
1662 1189
1663int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, 1190void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
1664 int request_fd, u32 api_domain, u16 target_mhz)
1665{ 1191{
1666 struct nvgpu_clk_dev *dev; 1192 struct nvgpu_clk_arb *arb = g->clk_arb;
1667 struct fd fd;
1668 int err = 0;
1669
1670 gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
1671
1672 fd = fdget(request_fd);
1673 if (!fd.file)
1674 return -EINVAL;
1675
1676 if (fd.file->f_op != &completion_dev_ops) {
1677 err = -EINVAL;
1678 goto fdput_fd;
1679 }
1680
1681 dev = fd.file->private_data;
1682 if (!dev || dev->session != session) {
1683 err = -EINVAL;
1684 goto fdput_fd;
1685 }
1686
1687 switch (api_domain) {
1688 case NVGPU_GPU_CLK_DOMAIN_MCLK:
1689 dev->mclk_target_mhz = target_mhz;
1690 break;
1691
1692 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
1693 dev->gpc2clk_target_mhz = target_mhz * 2ULL;
1694 break;
1695
1696 default:
1697 err = -EINVAL;
1698 }
1699 1193
1700fdput_fd: 1194 nvgpu_clk_arb_set_global_alarm(g, alarm);
1701 fdput(fd); 1195 if (arb->update_work_queue)
1702 return err; 1196 queue_work(arb->update_work_queue, &arb->update_fn_work);
1703} 1197}
1704 1198
1705int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, 1199void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
1706 u32 api_domain, u16 *freq_mhz)
1707{ 1200{
1708 int err = 0; 1201 struct nvgpu_clk_arb *arb = g->clk_arb;
1709 struct nvgpu_clk_arb_target *target; 1202 int index;
1710 1203
1711 do { 1204 if (arb) {
1712 target = NV_ACCESS_ONCE(session->target); 1205 cancel_work_sync(&arb->vf_table_fn_work);
1713 /* no reordering of this pointer */ 1206 destroy_workqueue(arb->vf_table_work_queue);
1714 nvgpu_smp_rmb(); 1207 arb->vf_table_work_queue = NULL;
1715 1208
1716 switch (api_domain) { 1209 cancel_work_sync(&arb->update_fn_work);
1717 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1210 destroy_workqueue(arb->update_work_queue);
1718 *freq_mhz = target->mclk; 1211 arb->update_work_queue = NULL;
1719 break;
1720 1212
1721 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1213 nvgpu_kfree(g, arb->gpc2clk_f_points);
1722 *freq_mhz = target->gpc2clk / 2ULL; 1214 nvgpu_kfree(g, arb->mclk_f_points);
1723 break;
1724 1215
1725 default: 1216 for (index = 0; index < 2; index++) {
1726 *freq_mhz = 0; 1217 nvgpu_kfree(g,
1727 err = -EINVAL; 1218 arb->vf_table_pool[index].gpc2clk_points);
1219 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1728 } 1220 }
1729 } while (target != NV_ACCESS_ONCE(session->target)); 1221 nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
1730 return err; 1222 nvgpu_kfree(g, g->clk_arb);
1223 g->clk_arb = NULL;
1224 }
1731} 1225}
1732 1226
1733int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, 1227int nvgpu_clk_arb_init_session(struct gk20a *g,
1734 u32 api_domain, u16 *freq_mhz) 1228 struct nvgpu_clk_session **_session)
1735{ 1229{
1736 struct nvgpu_clk_arb *arb = g->clk_arb; 1230 struct nvgpu_clk_arb *arb = g->clk_arb;
1737 int err = 0; 1231 struct nvgpu_clk_session *session = *(_session);
1738 struct nvgpu_clk_arb_target *actual;
1739 1232
1740 do { 1233 gk20a_dbg_fn("");
1741 actual = NV_ACCESS_ONCE(arb->actual);
1742 /* no reordering of this pointer */
1743 nvgpu_smp_rmb();
1744 1234
1745 switch (api_domain) { 1235 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1746 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1236 return 0;
1747 *freq_mhz = actual->mclk;
1748 break;
1749 1237
1750 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1238 session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
1751 *freq_mhz = actual->gpc2clk / 2ULL; 1239 if (!session)
1752 break; 1240 return -ENOMEM;
1241 session->g = g;
1753 1242
1754 default: 1243 nvgpu_ref_init(&session->refcount);
1755 *freq_mhz = 0;
1756 err = -EINVAL;
1757 }
1758 } while (actual != NV_ACCESS_ONCE(arb->actual));
1759 return err;
1760}
1761 1244
1762int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, 1245 session->zombie = false;
1763 u32 api_domain, u16 *freq_mhz) 1246 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
1764{ 1247 /* make sure that the initialization of the pool is visible
1765 switch (api_domain) { 1248 * before the update
1766 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1249 */
1767 *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) / 1250 nvgpu_smp_wmb();
1768 1000000ULL; 1251 session->target = &session->target_pool[0];
1769 return 0;
1770 1252
1771 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1253 init_llist_head(&session->targets);
1772 *freq_mhz = g->ops.clk.measure_freq(g,
1773 CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL;
1774 return 0;
1775 1254
1776 default: 1255 nvgpu_spinlock_acquire(&arb->sessions_lock);
1777 return -EINVAL; 1256 list_add_tail_rcu(&session->link, &arb->sessions);
1778 } 1257 nvgpu_spinlock_release(&arb->sessions_lock);
1779}
1780 1258
1781int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, 1259 *_session = session;
1782 u16 *min_mhz, u16 *max_mhz)
1783{
1784 int ret;
1785
1786 switch (api_domain) {
1787 case NVGPU_GPU_CLK_DOMAIN_MCLK:
1788 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
1789 CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz);
1790 return ret;
1791
1792 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
1793 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
1794 CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz);
1795 if (!ret) {
1796 *min_mhz /= 2;
1797 *max_mhz /= 2;
1798 }
1799 return ret;
1800 1260
1801 default: 1261 return 0;
1802 return -EINVAL;
1803 }
1804} 1262}
1805 1263
1806u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) 1264void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
1807{ 1265{
1808 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); 1266 struct nvgpu_clk_dev *dev = container_of(refcount,
1809 u32 api_domains = 0; 1267 struct nvgpu_clk_dev, refcount);
1810 1268 struct nvgpu_clk_session *session = dev->session;
1811 if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
1812 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
1813
1814 if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
1815 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
1816 1269
1817 return api_domains; 1270 nvgpu_kfree(session->g, dev);
1818} 1271}
1819 1272
1820bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain) 1273void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
1821{ 1274{
1822 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); 1275 struct nvgpu_clk_session *session = container_of(refcount,
1823 1276 struct nvgpu_clk_session, refcount);
1824 switch (api_domain) { 1277 struct nvgpu_clk_arb *arb = session->g->clk_arb;
1825 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1278 struct gk20a *g = session->g;
1826 return ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0); 1279 struct nvgpu_clk_dev *dev, *tmp;
1280 struct llist_node *head;
1827 1281
1828 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1282 gk20a_dbg_fn("");
1829 return ((clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0);
1830 1283
1831 default: 1284 if (arb) {
1832 return false; 1285 nvgpu_spinlock_acquire(&arb->sessions_lock);
1286 list_del_rcu(&session->link);
1287 nvgpu_spinlock_release(&arb->sessions_lock);
1833 } 1288 }
1834}
1835 1289
1836int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, 1290 head = llist_del_all(&session->targets);
1837 u32 api_domain, u32 *max_points, u16 *fpoints) 1291 llist_for_each_entry_safe(dev, tmp, head, node) {
1838{ 1292 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1839 int err;
1840 u32 i;
1841
1842 switch (api_domain) {
1843 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
1844 err = clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPC2CLK,
1845 max_points, fpoints);
1846 if (err || !fpoints)
1847 return err;
1848 for (i = 0; i < *max_points; i++)
1849 fpoints[i] /= 2;
1850 return 0;
1851 case NVGPU_GPU_CLK_DOMAIN_MCLK:
1852 return clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_MCLK,
1853 max_points, fpoints);
1854 default:
1855 return -EINVAL;
1856 } 1293 }
1294 synchronize_rcu();
1295 nvgpu_kfree(g, session);
1857} 1296}
1858 1297
1859static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, 1298void nvgpu_clk_arb_release_session(struct gk20a *g,
1860 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, 1299 struct nvgpu_clk_session *session)
1861 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
1862{ 1300{
1863 u16 gpc2clk_target, mclk_target; 1301 struct nvgpu_clk_arb *arb = g->clk_arb;
1864 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
1865 u32 mclk_voltuv, mclk_voltuv_sram;
1866 u32 pstate = VF_POINT_INVALID_PSTATE;
1867 struct nvgpu_clk_vf_table *table;
1868 u32 index, index_mclk;
1869 struct nvgpu_clk_vf_point *mclk_vf = NULL;
1870
1871 do {
1872 gpc2clk_target = *gpc2clk;
1873 mclk_target = *mclk;
1874 gpc2clk_voltuv = 0;
1875 gpc2clk_voltuv_sram = 0;
1876 mclk_voltuv = 0;
1877 mclk_voltuv_sram = 0;
1878
1879 table = NV_ACCESS_ONCE(arb->current_vf_table);
1880 /* pointer to table can be updated by callback */
1881 nvgpu_smp_rmb();
1882
1883 if (!table)
1884 continue;
1885 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
1886 nvgpu_err(arb->g, "found empty table");
1887 goto find_exit;
1888 }
1889 /* First we check MCLK to find out which PSTATE we are
1890 * are requesting, and from there try to find the minimum
1891 * GPC2CLK on the same PSTATE that satisfies the request.
1892 * If no GPC2CLK can be found, then we need to up the PSTATE
1893 */
1894
1895recalculate_vf_point:
1896 for (index = 0; index < table->mclk_num_points; index++) {
1897 if (table->mclk_points[index].mem_mhz >= mclk_target) {
1898 mclk_vf = &table->mclk_points[index];
1899 break;
1900 }
1901 }
1902 if (index == table->mclk_num_points) {
1903 mclk_vf = &table->mclk_points[index-1];
1904 index = table->mclk_num_points - 1;
1905 }
1906 index_mclk = index;
1907
1908 /* round up the freq requests */
1909 for (index = 0; index < table->gpc2clk_num_points; index++) {
1910 pstate = VF_POINT_COMMON_PSTATE(
1911 &table->gpc2clk_points[index], mclk_vf);
1912
1913 if ((table->gpc2clk_points[index].gpc_mhz >=
1914 gpc2clk_target) &&
1915 (pstate != VF_POINT_INVALID_PSTATE)) {
1916 gpc2clk_target =
1917 table->gpc2clk_points[index].gpc_mhz;
1918 *sys2clk =
1919 table->gpc2clk_points[index].sys_mhz;
1920 *xbar2clk =
1921 table->gpc2clk_points[index].xbar_mhz;
1922
1923 gpc2clk_voltuv =
1924 table->gpc2clk_points[index].uvolt;
1925 gpc2clk_voltuv_sram =
1926 table->gpc2clk_points[index].uvolt_sram;
1927 break;
1928 }
1929 }
1930
1931 if (index == table->gpc2clk_num_points) {
1932 pstate = VF_POINT_COMMON_PSTATE(
1933 &table->gpc2clk_points[index-1], mclk_vf);
1934 if (pstate != VF_POINT_INVALID_PSTATE) {
1935 gpc2clk_target =
1936 table->gpc2clk_points[index-1].gpc_mhz;
1937 *sys2clk =
1938 table->gpc2clk_points[index-1].sys_mhz;
1939 *xbar2clk =
1940 table->gpc2clk_points[index-1].xbar_mhz;
1941 1302
1942 gpc2clk_voltuv = 1303 gk20a_dbg_fn("");
1943 table->gpc2clk_points[index-1].uvolt;
1944 gpc2clk_voltuv_sram =
1945 table->gpc2clk_points[index-1].
1946 uvolt_sram;
1947 } else if (index_mclk >= table->mclk_num_points - 1) {
1948 /* There is no available combination of MCLK
1949 * and GPC2CLK, we need to fail this
1950 */
1951 gpc2clk_target = 0;
1952 mclk_target = 0;
1953 pstate = VF_POINT_INVALID_PSTATE;
1954 goto find_exit;
1955 } else {
1956 /* recalculate with higher PSTATE */
1957 gpc2clk_target = *gpc2clk;
1958 mclk_target = table->mclk_points[index_mclk+1].
1959 mem_mhz;
1960 goto recalculate_vf_point;
1961 }
1962 }
1963 1304
1964 mclk_target = mclk_vf->mem_mhz; 1305 session->zombie = true;
1965 mclk_voltuv = mclk_vf->uvolt; 1306 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1966 mclk_voltuv_sram = mclk_vf->uvolt_sram; 1307 if (arb && arb->update_work_queue)
1308 queue_work(arb->update_work_queue, &arb->update_fn_work);
1309}
1967 1310
1968 } while (!table || 1311void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1969 (NV_ACCESS_ONCE(arb->current_vf_table) != table)); 1312{
1313 struct nvgpu_clk_arb *arb = g->clk_arb;
1970 1314
1971find_exit: 1315 if (arb->vf_table_work_queue)
1972 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; 1316 queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
1973 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
1974 gpc2clk_voltuv_sram : mclk_voltuv_sram;
1975 /* noise unaware vmin */
1976 *nuvmin = mclk_voltuv;
1977 *nuvmin_sram = mclk_voltuv_sram;
1978 *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
1979 *mclk = mclk_target;
1980 return pstate;
1981} 1317}
1982 1318
1983/* This function is inherently unsafe to call while arbiter is running 1319/* This function is inherently unsafe to call while arbiter is running
@@ -1988,60 +1324,6 @@ int nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
1988 return NV_ACCESS_ONCE(g->clk_arb->actual->pstate); 1324 return NV_ACCESS_ONCE(g->clk_arb->actual->pstate);
1989} 1325}
1990 1326
1991static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1992 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
1993 u32 voltuv_sram)
1994{
1995 struct set_fll_clk fllclk;
1996 struct nvgpu_clk_arb *arb = g->clk_arb;
1997 int status;
1998
1999 fllclk.gpc2clkmhz = gpc2clk_target;
2000 fllclk.sys2clkmhz = sys2clk_target;
2001 fllclk.xbar2clkmhz = xbar2clk_target;
2002
2003 fllclk.voltuv = voltuv;
2004
2005 /* if voltage ascends we do:
2006 * (1) FLL change
2007 * (2) Voltage change
2008 * (3) MCLK change
2009 * If it goes down
2010 * (1) MCLK change
2011 * (2) Voltage change
2012 * (3) FLL change
2013 */
2014
2015 /* descending */
2016 if (voltuv < arb->voltuv_actual) {
2017 status = g->ops.clk.mclk_change(g, mclk_target);
2018 if (status < 0)
2019 return status;
2020
2021 status = volt_set_voltage(g, voltuv, voltuv_sram);
2022 if (status < 0)
2023 return status;
2024
2025 status = clk_set_fll_clks(g, &fllclk);
2026 if (status < 0)
2027 return status;
2028 } else {
2029 status = clk_set_fll_clks(g, &fllclk);
2030 if (status < 0)
2031 return status;
2032
2033 status = volt_set_voltage(g, voltuv, voltuv_sram);
2034 if (status < 0)
2035 return status;
2036
2037 status = g->ops.clk.mclk_change(g, mclk_target);
2038 if (status < 0)
2039 return status;
2040 }
2041
2042 return 0;
2043}
2044
2045void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock) 1327void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
2046{ 1328{
2047 struct nvgpu_clk_arb *arb = g->clk_arb; 1329 struct nvgpu_clk_arb *arb = g->clk_arb;
@@ -2051,71 +1333,3 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
2051 else 1333 else
2052 nvgpu_mutex_release(&arb->pstate_lock); 1334 nvgpu_mutex_release(&arb->pstate_lock);
2053} 1335}
2054
2055#ifdef CONFIG_DEBUG_FS
2056static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
2057{
2058 struct gk20a *g = s->private;
2059 struct nvgpu_clk_arb *arb = g->clk_arb;
2060 struct nvgpu_clk_arb_debug *debug;
2061
2062 u64 num;
2063 s64 tmp, avg, std, max, min;
2064
2065 debug = NV_ACCESS_ONCE(arb->debug);
2066 /* Make copy of structure and ensure no reordering */
2067 nvgpu_smp_rmb();
2068 if (!debug)
2069 return -EINVAL;
2070
2071 std = debug->switch_std;
2072 avg = debug->switch_avg;
2073 max = debug->switch_max;
2074 min = debug->switch_min;
2075 num = debug->switch_num;
2076
2077 tmp = std;
2078 do_div(tmp, num);
2079 seq_printf(s, "Number of transitions: %lld\n",
2080 num);
2081 seq_printf(s, "max / min : %lld / %lld usec\n",
2082 max, min);
2083 seq_printf(s, "avg / std : %lld / %ld usec\n",
2084 avg, int_sqrt(tmp));
2085
2086 return 0;
2087}
2088
2089static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
2090{
2091 return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
2092}
2093
2094static const struct file_operations nvgpu_clk_arb_stats_fops = {
2095 .open = nvgpu_clk_arb_stats_open,
2096 .read = seq_read,
2097 .llseek = seq_lseek,
2098 .release = single_release,
2099};
2100
2101
2102static int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
2103{
2104 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
2105 struct dentry *gpu_root = l->debugfs;
2106 struct dentry *d;
2107
2108 gk20a_dbg(gpu_dbg_info, "g=%p", g);
2109
2110 d = debugfs_create_file(
2111 "arb_stats",
2112 S_IRUGO,
2113 gpu_root,
2114 g,
2115 &nvgpu_clk_arb_stats_fops);
2116 if (!d)
2117 return -ENOMEM;
2118
2119 return 0;
2120}
2121#endif