summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/Makefile1
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb.c1788
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb_linux.h120
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c641
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/clk_arb.h105
5 files changed, 1365 insertions, 1290 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 31483c5d..6d0fcad0 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -273,6 +273,7 @@ nvgpu-y += \
273 clk/clk_domain.o \ 273 clk/clk_domain.o \
274 clk/clk_prog.o \ 274 clk/clk_prog.o \
275 clk/clk_vf_point.o \ 275 clk/clk_vf_point.o \
276 common/linux/ioctl_clk_arb.o \
276 common/linux/clk_arb.o \ 277 common/linux/clk_arb.o \
277 clk/clk_freq_controller.o \ 278 clk/clk_freq_controller.o \
278 perf/vfe_var.o \ 279 perf/vfe_var.o \
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c
index 82c97891..7cb3752a 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This software is licensed under the terms of the GNU General Public 4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and 5 * License version 2, as published by the Free Software Foundation, and
@@ -39,224 +39,12 @@
39 39
40#include "gk20a/gk20a.h" 40#include "gk20a/gk20a.h"
41#include "clk/clk.h" 41#include "clk/clk.h"
42#include "clk_arb_linux.h"
42#include "pstate/pstate.h" 43#include "pstate/pstate.h"
43#include "lpwr/lpwr.h" 44#include "lpwr/lpwr.h"
44#include "volt/volt.h" 45#include "volt/volt.h"
45 46
46#ifdef CONFIG_DEBUG_FS 47int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
47#include "common/linux/os_linux.h"
48#endif
49
50#define MAX_F_POINTS 256
51#define DEFAULT_EVENT_NUMBER 32
52
53struct nvgpu_clk_dev;
54struct nvgpu_clk_arb_target;
55struct nvgpu_clk_notification_queue;
56
57#ifdef CONFIG_DEBUG_FS
58static int nvgpu_clk_arb_debugfs_init(struct gk20a *g);
59#endif
60
61static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
62 struct file *filp);
63static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
64 struct file *filp);
65static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
66static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
67 size_t size, loff_t *off);
68
69static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
70 unsigned long arg);
71
72static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
73static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
74static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
75static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount);
76static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount);
77static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
78 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
79 u32 voltuv_sram);
80static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
81 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
82 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram);
83static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
84 struct nvgpu_clk_arb_target *target,
85 u32 alarm_mask);
86static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm);
87static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm);
88
89static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
90 struct nvgpu_clk_notification_queue *queue,
91 u32 alarm_mask);
92static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
93 struct nvgpu_clk_notification_queue *queue,
94 size_t events_number);
95
96static void nvgpu_clk_notification_queue_free(struct gk20a *g,
97 struct nvgpu_clk_notification_queue *queue);
98
99#define VF_POINT_INVALID_PSTATE ~0U
100#define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
101#define VF_POINT_GET_PSTATE(a) (((a)->pstates) ?\
102 __fls((a)->pstates) :\
103 VF_POINT_INVALID_PSTATE)
104#define VF_POINT_COMMON_PSTATE(a, b) (((a)->pstates & (b)->pstates) ?\
105 __fls((a)->pstates & (b)->pstates) :\
106 VF_POINT_INVALID_PSTATE)
107
108/* Local Alarms */
109#define EVENT(alarm) (0x1UL << NVGPU_GPU_EVENT_##alarm)
110
111#define LOCAL_ALARM_MASK (EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE) | \
112 EVENT(VF_UPDATE))
113
114#define _WRAPGTEQ(a, b) ((a-b) > 0)
115
116struct nvgpu_clk_notification {
117 u32 notification;
118 u64 timestamp;
119};
120
121struct nvgpu_clk_notification_queue {
122 u32 size;
123 nvgpu_atomic_t head;
124 nvgpu_atomic_t tail;
125 struct nvgpu_clk_notification *notifications;
126};
127
128struct nvgpu_clk_vf_point {
129 u16 pstates;
130 union {
131 struct {
132 u16 gpc_mhz;
133 u16 sys_mhz;
134 u16 xbar_mhz;
135 };
136 u16 mem_mhz;
137 };
138 u32 uvolt;
139 u32 uvolt_sram;
140};
141
142struct nvgpu_clk_vf_table {
143 u32 mclk_num_points;
144 struct nvgpu_clk_vf_point *mclk_points;
145 u32 gpc2clk_num_points;
146 struct nvgpu_clk_vf_point *gpc2clk_points;
147};
148#ifdef CONFIG_DEBUG_FS
149struct nvgpu_clk_arb_debug {
150 s64 switch_max;
151 s64 switch_min;
152 u64 switch_num;
153 s64 switch_avg;
154 s64 switch_std;
155};
156#endif
157
158struct nvgpu_clk_arb_target {
159 u16 mclk;
160 u16 gpc2clk;
161 u32 pstate;
162};
163
164struct nvgpu_clk_arb {
165 struct nvgpu_spinlock sessions_lock;
166 struct nvgpu_spinlock users_lock;
167
168 struct nvgpu_mutex pstate_lock;
169 struct list_head users;
170 struct list_head sessions;
171 struct llist_head requests;
172
173 struct gk20a *g;
174 int status;
175
176 struct nvgpu_clk_arb_target actual_pool[2];
177 struct nvgpu_clk_arb_target *actual;
178
179 u16 gpc2clk_default_mhz;
180 u16 mclk_default_mhz;
181 u32 voltuv_actual;
182
183 u16 gpc2clk_min, gpc2clk_max;
184 u16 mclk_min, mclk_max;
185
186 struct work_struct update_fn_work;
187 struct workqueue_struct *update_work_queue;
188 struct work_struct vf_table_fn_work;
189 struct workqueue_struct *vf_table_work_queue;
190
191 struct nvgpu_cond request_wq;
192
193 struct nvgpu_clk_vf_table *current_vf_table;
194 struct nvgpu_clk_vf_table vf_table_pool[2];
195 u32 vf_table_index;
196
197 u16 *mclk_f_points;
198 nvgpu_atomic_t req_nr;
199
200 u32 mclk_f_numpoints;
201 u16 *gpc2clk_f_points;
202 u32 gpc2clk_f_numpoints;
203
204 nvgpu_atomic64_t alarm_mask;
205 struct nvgpu_clk_notification_queue notification_queue;
206
207#ifdef CONFIG_DEBUG_FS
208 struct nvgpu_clk_arb_debug debug_pool[2];
209 struct nvgpu_clk_arb_debug *debug;
210 bool debugfs_set;
211#endif
212};
213
214struct nvgpu_clk_dev {
215 struct nvgpu_clk_session *session;
216 union {
217 struct list_head link;
218 struct llist_node node;
219 };
220 struct nvgpu_cond readout_wq;
221 nvgpu_atomic_t poll_mask;
222 u16 gpc2clk_target_mhz;
223 u16 mclk_target_mhz;
224 u32 alarms_reported;
225 nvgpu_atomic_t enabled_mask;
226 struct nvgpu_clk_notification_queue queue;
227 u32 arb_queue_head;
228 struct nvgpu_ref refcount;
229};
230
231struct nvgpu_clk_session {
232 bool zombie;
233 struct gk20a *g;
234 struct nvgpu_ref refcount;
235 struct list_head link;
236 struct llist_head targets;
237
238 struct nvgpu_clk_arb_target target_pool[2];
239 struct nvgpu_clk_arb_target *target;
240};
241
242static const struct file_operations completion_dev_ops = {
243 .owner = THIS_MODULE,
244 .release = nvgpu_clk_arb_release_completion_dev,
245 .poll = nvgpu_clk_arb_poll_dev,
246};
247
248static const struct file_operations event_dev_ops = {
249 .owner = THIS_MODULE,
250 .release = nvgpu_clk_arb_release_event_dev,
251 .poll = nvgpu_clk_arb_poll_dev,
252 .read = nvgpu_clk_arb_read_event_dev,
253#ifdef CONFIG_COMPAT
254 .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
255#endif
256 .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
257};
258
259static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
260 struct nvgpu_clk_notification_queue *queue, 48 struct nvgpu_clk_notification_queue *queue,
261 size_t events_number) { 49 size_t events_number) {
262 queue->notifications = nvgpu_kcalloc(g, events_number, 50 queue->notifications = nvgpu_kcalloc(g, events_number,
@@ -271,7 +59,7 @@ static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
271 return 0; 59 return 0;
272} 60}
273 61
274static void nvgpu_clk_notification_queue_free(struct gk20a *g, 62void nvgpu_clk_notification_queue_free(struct gk20a *g,
275 struct nvgpu_clk_notification_queue *queue) { 63 struct nvgpu_clk_notification_queue *queue) {
276 nvgpu_kfree(g, queue->notifications); 64 nvgpu_kfree(g, queue->notifications);
277 queue->size = 0; 65 queue->size = 0;
@@ -279,185 +67,20 @@ static void nvgpu_clk_notification_queue_free(struct gk20a *g,
279 nvgpu_atomic_set(&queue->tail, 0); 67 nvgpu_atomic_set(&queue->tail, 0);
280} 68}
281 69
282int nvgpu_clk_arb_init_arbiter(struct gk20a *g) 70static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
283{ 71 struct nvgpu_clk_notification_queue *queue,
284 struct nvgpu_clk_arb *arb; 72 u32 alarm_mask) {
285 u16 default_mhz;
286 int err;
287 int index;
288 struct nvgpu_clk_vf_table *table;
289
290 gk20a_dbg_fn("");
291
292 if (!g->ops.clk_arb.get_arbiter_clk_domains)
293 return 0;
294
295 arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
296 if (!arb)
297 return -ENOMEM;
298
299 err = nvgpu_mutex_init(&arb->pstate_lock);
300 if (err)
301 goto mutex_fail;
302 nvgpu_spinlock_init(&arb->sessions_lock);
303 nvgpu_spinlock_init(&arb->users_lock);
304
305 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
306 if (!arb->mclk_f_points) {
307 err = -ENOMEM;
308 goto init_fail;
309 }
310
311 arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
312 if (!arb->gpc2clk_f_points) {
313 err = -ENOMEM;
314 goto init_fail;
315 }
316
317 for (index = 0; index < 2; index++) {
318 table = &arb->vf_table_pool[index];
319 table->gpc2clk_num_points = MAX_F_POINTS;
320 table->mclk_num_points = MAX_F_POINTS;
321
322 table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
323 sizeof(struct nvgpu_clk_vf_point));
324 if (!table->gpc2clk_points) {
325 err = -ENOMEM;
326 goto init_fail;
327 }
328
329
330 table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
331 sizeof(struct nvgpu_clk_vf_point));
332 if (!table->mclk_points) {
333 err = -ENOMEM;
334 goto init_fail;
335 }
336 }
337
338 g->clk_arb = arb;
339 arb->g = g;
340
341 err = g->ops.clk_arb.get_arbiter_clk_default(g,
342 CTRL_CLK_DOMAIN_MCLK, &default_mhz);
343 if (err < 0) {
344 err = -EINVAL;
345 goto init_fail;
346 }
347
348 arb->mclk_default_mhz = default_mhz;
349
350 err = g->ops.clk_arb.get_arbiter_clk_default(g,
351 CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
352 if (err < 0) {
353 err = -EINVAL;
354 goto init_fail;
355 }
356
357 arb->gpc2clk_default_mhz = default_mhz;
358
359 arb->actual = &arb->actual_pool[0];
360
361 nvgpu_atomic_set(&arb->req_nr, 0);
362
363 nvgpu_atomic64_set(&arb->alarm_mask, 0);
364 err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
365 DEFAULT_EVENT_NUMBER);
366 if (err < 0)
367 goto init_fail;
368
369 INIT_LIST_HEAD_RCU(&arb->users);
370 INIT_LIST_HEAD_RCU(&arb->sessions);
371 init_llist_head(&arb->requests);
372
373 nvgpu_cond_init(&arb->request_wq);
374 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
375 "vf_table_update");
376 arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
377 "arbiter_update");
378
379
380 INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
381
382 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
383
384#ifdef CONFIG_DEBUG_FS
385 arb->debug = &arb->debug_pool[0];
386
387 if (!arb->debugfs_set) {
388 if (nvgpu_clk_arb_debugfs_init(g))
389 arb->debugfs_set = true;
390 }
391#endif
392 err = clk_vf_point_cache(g);
393 if (err < 0)
394 goto init_fail;
395
396 err = nvgpu_clk_arb_update_vf_table(arb);
397 if (err < 0)
398 goto init_fail;
399 do {
400 /* Check that first run is completed */
401 nvgpu_smp_mb();
402 NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
403 nvgpu_atomic_read(&arb->req_nr), 0);
404 } while (!nvgpu_atomic_read(&arb->req_nr));
405
406
407 return arb->status;
408
409init_fail:
410 nvgpu_kfree(g, arb->gpc2clk_f_points);
411 nvgpu_kfree(g, arb->mclk_f_points);
412
413 for (index = 0; index < 2; index++) {
414 nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
415 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
416 }
417
418 nvgpu_mutex_destroy(&arb->pstate_lock);
419
420mutex_fail:
421 nvgpu_kfree(g, arb);
422
423 return err;
424}
425
426void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
427{
428 nvgpu_clk_arb_schedule_alarm(g,
429 (0x1UL << NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
430}
431
432void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
433{
434 struct nvgpu_clk_arb *arb = g->clk_arb;
435
436 nvgpu_clk_arb_set_global_alarm(g, alarm);
437 if (arb->update_work_queue)
438 queue_work(arb->update_work_queue, &arb->update_fn_work);
439}
440
441static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
442{
443 struct nvgpu_clk_arb *arb = g->clk_arb;
444 73
445 u64 current_mask; 74 u32 queue_index;
446 u32 refcnt; 75 u64 timestamp;
447 u32 alarm_mask;
448 u64 new_mask;
449 76
450 do { 77 queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
451 current_mask = nvgpu_atomic64_read(&arb->alarm_mask); 78 /* get current timestamp */
452 /* atomic operations are strong so they do not need masks */ 79 timestamp = (u64) sched_clock();
453 80
454 refcnt = ((u32) (current_mask >> 32)) + 1; 81 queue->notifications[queue_index].timestamp = timestamp;
455 alarm_mask = (u32) (current_mask & ~alarm); 82 queue->notifications[queue_index].notification = alarm_mask;
456 new_mask = ((u64) refcnt << 32) | alarm_mask;
457 83
458 } while (unlikely(current_mask !=
459 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
460 current_mask, new_mask)));
461} 84}
462 85
463static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) 86static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
@@ -482,231 +105,8 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
482 current_mask, new_mask))); 105 current_mask, new_mask)));
483 106
484 nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm); 107 nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);
485
486}
487
488void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
489{
490 struct nvgpu_clk_arb *arb = g->clk_arb;
491 int index;
492
493 if (arb) {
494 cancel_work_sync(&arb->vf_table_fn_work);
495 destroy_workqueue(arb->vf_table_work_queue);
496 arb->vf_table_work_queue = NULL;
497
498 cancel_work_sync(&arb->update_fn_work);
499 destroy_workqueue(arb->update_work_queue);
500 arb->update_work_queue = NULL;
501
502 nvgpu_kfree(g, arb->gpc2clk_f_points);
503 nvgpu_kfree(g, arb->mclk_f_points);
504
505 for (index = 0; index < 2; index++) {
506 nvgpu_kfree(g,
507 arb->vf_table_pool[index].gpc2clk_points);
508 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
509 }
510 nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
511 nvgpu_kfree(g, g->clk_arb);
512 g->clk_arb = NULL;
513 }
514}
515
516static int nvgpu_clk_arb_install_fd(struct gk20a *g,
517 struct nvgpu_clk_session *session,
518 const struct file_operations *fops,
519 struct nvgpu_clk_dev **_dev)
520{
521 struct file *file;
522 int fd;
523 int err;
524 int status;
525 char name[64];
526 struct nvgpu_clk_dev *dev;
527
528 gk20a_dbg_fn("");
529
530 dev = nvgpu_kzalloc(g, sizeof(*dev));
531 if (!dev)
532 return -ENOMEM;
533
534 status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
535 DEFAULT_EVENT_NUMBER);
536 if (status < 0) {
537 err = status;
538 goto fail;
539 }
540
541 fd = get_unused_fd_flags(O_RDWR);
542 if (fd < 0) {
543 err = fd;
544 goto fail;
545 }
546
547 snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
548 file = anon_inode_getfile(name, fops, dev, O_RDWR);
549 if (IS_ERR(file)) {
550 err = PTR_ERR(file);
551 goto fail_fd;
552 }
553
554 fd_install(fd, file);
555
556 nvgpu_cond_init(&dev->readout_wq);
557
558 nvgpu_atomic_set(&dev->poll_mask, 0);
559
560 dev->session = session;
561 nvgpu_ref_init(&dev->refcount);
562
563 nvgpu_ref_get(&session->refcount);
564
565 *_dev = dev;
566
567 return fd;
568
569fail_fd:
570 put_unused_fd(fd);
571fail:
572 nvgpu_kfree(g, dev);
573
574 return err;
575} 108}
576 109
577int nvgpu_clk_arb_init_session(struct gk20a *g,
578 struct nvgpu_clk_session **_session)
579{
580 struct nvgpu_clk_arb *arb = g->clk_arb;
581 struct nvgpu_clk_session *session = *(_session);
582
583 gk20a_dbg_fn("");
584
585 if (!g->ops.clk_arb.get_arbiter_clk_domains)
586 return 0;
587
588 session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
589 if (!session)
590 return -ENOMEM;
591 session->g = g;
592
593 nvgpu_ref_init(&session->refcount);
594
595 session->zombie = false;
596 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
597 /* make sure that the initialization of the pool is visible
598 * before the update
599 */
600 nvgpu_smp_wmb();
601 session->target = &session->target_pool[0];
602
603 init_llist_head(&session->targets);
604
605 nvgpu_spinlock_acquire(&arb->sessions_lock);
606 list_add_tail_rcu(&session->link, &arb->sessions);
607 nvgpu_spinlock_release(&arb->sessions_lock);
608
609 *_session = session;
610
611 return 0;
612}
613
614static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
615{
616 struct nvgpu_clk_dev *dev = container_of(refcount,
617 struct nvgpu_clk_dev, refcount);
618 struct nvgpu_clk_session *session = dev->session;
619
620 nvgpu_kfree(session->g, dev);
621}
622
623static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
624{
625 struct nvgpu_clk_session *session = container_of(refcount,
626 struct nvgpu_clk_session, refcount);
627 struct nvgpu_clk_arb *arb = session->g->clk_arb;
628 struct gk20a *g = session->g;
629 struct nvgpu_clk_dev *dev, *tmp;
630 struct llist_node *head;
631
632 gk20a_dbg_fn("");
633
634 if (arb) {
635 nvgpu_spinlock_acquire(&arb->sessions_lock);
636 list_del_rcu(&session->link);
637 nvgpu_spinlock_release(&arb->sessions_lock);
638 }
639
640 head = llist_del_all(&session->targets);
641 llist_for_each_entry_safe(dev, tmp, head, node) {
642 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
643 }
644 synchronize_rcu();
645 nvgpu_kfree(g, session);
646}
647
648void nvgpu_clk_arb_release_session(struct gk20a *g,
649 struct nvgpu_clk_session *session)
650{
651 struct nvgpu_clk_arb *arb = g->clk_arb;
652
653 gk20a_dbg_fn("");
654
655 session->zombie = true;
656 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
657 if (arb && arb->update_work_queue)
658 queue_work(arb->update_work_queue, &arb->update_fn_work);
659}
660
661int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
662 struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
663{
664 struct nvgpu_clk_arb *arb = g->clk_arb;
665 struct nvgpu_clk_dev *dev;
666 int fd;
667
668 gk20a_dbg_fn("");
669
670 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
671 if (fd < 0)
672 return fd;
673
674 /* TODO: alarm mask needs to be set to default value to prevent
675 * failures of legacy tests. This will be removed when sanity is
676 * updated
677 */
678 if (alarm_mask)
679 nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
680 else
681 nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
682
683 dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
684
685 nvgpu_spinlock_acquire(&arb->users_lock);
686 list_add_tail_rcu(&dev->link, &arb->users);
687 nvgpu_spinlock_release(&arb->users_lock);
688
689 *event_fd = fd;
690
691 return 0;
692}
693
694int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
695 struct nvgpu_clk_session *session, int *request_fd)
696{
697 struct nvgpu_clk_dev *dev;
698 int fd;
699
700 gk20a_dbg_fn("");
701
702 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
703 if (fd < 0)
704 return fd;
705
706 *request_fd = fd;
707
708 return 0;
709}
710 110
711static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) 111static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
712{ 112{
@@ -1014,13 +414,6 @@ exit_vf_table:
1014 return status; 414 return status;
1015} 415}
1016 416
1017void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1018{
1019 struct nvgpu_clk_arb *arb = g->clk_arb;
1020
1021 if (arb->vf_table_work_queue)
1022 queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
1023}
1024 417
1025static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work) 418static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
1026{ 419{
@@ -1044,6 +437,305 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
1044 nvgpu_clk_arb_update_vf_table(arb); 437 nvgpu_clk_arb_update_vf_table(arb);
1045} 438}
1046 439
440static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
441 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
442 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
443{
444 u16 gpc2clk_target, mclk_target;
445 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
446 u32 mclk_voltuv, mclk_voltuv_sram;
447 u32 pstate = VF_POINT_INVALID_PSTATE;
448 struct nvgpu_clk_vf_table *table;
449 u32 index, index_mclk;
450 struct nvgpu_clk_vf_point *mclk_vf = NULL;
451
452 do {
453 gpc2clk_target = *gpc2clk;
454 mclk_target = *mclk;
455 gpc2clk_voltuv = 0;
456 gpc2clk_voltuv_sram = 0;
457 mclk_voltuv = 0;
458 mclk_voltuv_sram = 0;
459
460 table = NV_ACCESS_ONCE(arb->current_vf_table);
461 /* pointer to table can be updated by callback */
462 nvgpu_smp_rmb();
463
464 if (!table)
465 continue;
466 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
467 nvgpu_err(arb->g, "found empty table");
468 goto find_exit;
469 }
470 /* First we check MCLK to find out which PSTATE we are
471 * are requesting, and from there try to find the minimum
472 * GPC2CLK on the same PSTATE that satisfies the request.
473 * If no GPC2CLK can be found, then we need to up the PSTATE
474 */
475
476recalculate_vf_point:
477 for (index = 0; index < table->mclk_num_points; index++) {
478 if (table->mclk_points[index].mem_mhz >= mclk_target) {
479 mclk_vf = &table->mclk_points[index];
480 break;
481 }
482 }
483 if (index == table->mclk_num_points) {
484 mclk_vf = &table->mclk_points[index-1];
485 index = table->mclk_num_points - 1;
486 }
487 index_mclk = index;
488
489 /* round up the freq requests */
490 for (index = 0; index < table->gpc2clk_num_points; index++) {
491 pstate = VF_POINT_COMMON_PSTATE(
492 &table->gpc2clk_points[index], mclk_vf);
493
494 if ((table->gpc2clk_points[index].gpc_mhz >=
495 gpc2clk_target) &&
496 (pstate != VF_POINT_INVALID_PSTATE)) {
497 gpc2clk_target =
498 table->gpc2clk_points[index].gpc_mhz;
499 *sys2clk =
500 table->gpc2clk_points[index].sys_mhz;
501 *xbar2clk =
502 table->gpc2clk_points[index].xbar_mhz;
503
504 gpc2clk_voltuv =
505 table->gpc2clk_points[index].uvolt;
506 gpc2clk_voltuv_sram =
507 table->gpc2clk_points[index].uvolt_sram;
508 break;
509 }
510 }
511
512 if (index == table->gpc2clk_num_points) {
513 pstate = VF_POINT_COMMON_PSTATE(
514 &table->gpc2clk_points[index-1], mclk_vf);
515 if (pstate != VF_POINT_INVALID_PSTATE) {
516 gpc2clk_target =
517 table->gpc2clk_points[index-1].gpc_mhz;
518 *sys2clk =
519 table->gpc2clk_points[index-1].sys_mhz;
520 *xbar2clk =
521 table->gpc2clk_points[index-1].xbar_mhz;
522
523 gpc2clk_voltuv =
524 table->gpc2clk_points[index-1].uvolt;
525 gpc2clk_voltuv_sram =
526 table->gpc2clk_points[index-1].
527 uvolt_sram;
528 } else if (index_mclk >= table->mclk_num_points - 1) {
529 /* There is no available combination of MCLK
530 * and GPC2CLK, we need to fail this
531 */
532 gpc2clk_target = 0;
533 mclk_target = 0;
534 pstate = VF_POINT_INVALID_PSTATE;
535 goto find_exit;
536 } else {
537 /* recalculate with higher PSTATE */
538 gpc2clk_target = *gpc2clk;
539 mclk_target = table->mclk_points[index_mclk+1].
540 mem_mhz;
541 goto recalculate_vf_point;
542 }
543 }
544
545 mclk_target = mclk_vf->mem_mhz;
546 mclk_voltuv = mclk_vf->uvolt;
547 mclk_voltuv_sram = mclk_vf->uvolt_sram;
548
549 } while (!table ||
550 (NV_ACCESS_ONCE(arb->current_vf_table) != table));
551
552find_exit:
553 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
554 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
555 gpc2clk_voltuv_sram : mclk_voltuv_sram;
556 /* noise unaware vmin */
557 *nuvmin = mclk_voltuv;
558 *nuvmin_sram = mclk_voltuv_sram;
559 *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
560 *mclk = mclk_target;
561 return pstate;
562}
563
564static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
565 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
566 u32 voltuv_sram)
567{
568 struct set_fll_clk fllclk;
569 struct nvgpu_clk_arb *arb = g->clk_arb;
570 int status;
571
572 fllclk.gpc2clkmhz = gpc2clk_target;
573 fllclk.sys2clkmhz = sys2clk_target;
574 fllclk.xbar2clkmhz = xbar2clk_target;
575
576 fllclk.voltuv = voltuv;
577
578 /* if voltage ascends we do:
579 * (1) FLL change
580 * (2) Voltage change
581 * (3) MCLK change
582 * If it goes down
583 * (1) MCLK change
584 * (2) Voltage change
585 * (3) FLL change
586 */
587
588 /* descending */
589 if (voltuv < arb->voltuv_actual) {
590 status = g->ops.clk.mclk_change(g, mclk_target);
591 if (status < 0)
592 return status;
593
594 status = volt_set_voltage(g, voltuv, voltuv_sram);
595 if (status < 0)
596 return status;
597
598 status = clk_set_fll_clks(g, &fllclk);
599 if (status < 0)
600 return status;
601 } else {
602 status = clk_set_fll_clks(g, &fllclk);
603 if (status < 0)
604 return status;
605
606 status = volt_set_voltage(g, voltuv, voltuv_sram);
607 if (status < 0)
608 return status;
609
610 status = g->ops.clk.mclk_change(g, mclk_target);
611 if (status < 0)
612 return status;
613 }
614
615 return 0;
616}
617
618static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
619 struct nvgpu_clk_arb_target *target,
620 u32 alarm) {
621
622 struct nvgpu_clk_session *session = dev->session;
623 struct nvgpu_clk_arb *arb = session->g->clk_arb;
624 struct nvgpu_clk_notification *notification;
625
626 u32 queue_alarm_mask = 0;
627 u32 enabled_mask = 0;
628 u32 new_alarms_reported = 0;
629 u32 poll_mask = 0;
630 u32 tail, head;
631 u32 queue_index;
632 size_t size;
633 int index;
634
635 enabled_mask = nvgpu_atomic_read(&dev->enabled_mask);
636 size = arb->notification_queue.size;
637
638 /* queue global arbiter notifications in buffer */
639 do {
640 tail = nvgpu_atomic_read(&arb->notification_queue.tail);
641 /* copy items to the queue */
642 queue_index = nvgpu_atomic_read(&dev->queue.tail);
643 head = dev->arb_queue_head;
644 head = (tail - head) < arb->notification_queue.size ?
645 head : tail - arb->notification_queue.size;
646
647 for (index = head; _WRAPGTEQ(tail, index); index++) {
648 u32 alarm_detected;
649
650 notification = &arb->notification_queue.
651 notifications[(index+1) % size];
652 alarm_detected =
653 NV_ACCESS_ONCE(notification->notification);
654
655 if (!(enabled_mask & alarm_detected))
656 continue;
657
658 queue_index++;
659 dev->queue.notifications[
660 queue_index % dev->queue.size].timestamp =
661 NV_ACCESS_ONCE(notification->timestamp);
662
663 dev->queue.notifications[
664 queue_index % dev->queue.size].notification =
665 alarm_detected;
666
667 queue_alarm_mask |= alarm_detected;
668 }
669 } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
670 (int)tail));
671
672 nvgpu_atomic_set(&dev->queue.tail, queue_index);
673 /* update the last notification we processed from global queue */
674
675 dev->arb_queue_head = tail;
676
677 /* Check if current session targets are met */
678 if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
679 if ((target->gpc2clk < session->target->gpc2clk)
680 || (target->mclk < session->target->mclk)) {
681
682 poll_mask |= (POLLIN | POLLPRI);
683 nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
684 EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
685 }
686 }
687
688 /* Check if there is a new VF update */
689 if (queue_alarm_mask & EVENT(VF_UPDATE))
690 poll_mask |= (POLLIN | POLLRDNORM);
691
692 /* Notify sticky alarms that were not reported on previous run*/
693 new_alarms_reported = (queue_alarm_mask |
694 (alarm & ~dev->alarms_reported & queue_alarm_mask));
695
696 if (new_alarms_reported & ~LOCAL_ALARM_MASK) {
697 /* check that we are not re-reporting */
698 if (new_alarms_reported & EVENT(ALARM_GPU_LOST))
699 poll_mask |= POLLHUP;
700
701 poll_mask |= (POLLIN | POLLPRI);
702 /* On next run do not report global alarms that were already
703 * reported, but report SHUTDOWN always
704 */
705 dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
706 ~EVENT(ALARM_GPU_LOST);
707 }
708
709 if (poll_mask) {
710 nvgpu_atomic_set(&dev->poll_mask, poll_mask);
711 nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
712 }
713
714 return new_alarms_reported;
715}
716
717static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
718{
719 struct nvgpu_clk_arb *arb = g->clk_arb;
720
721 u64 current_mask;
722 u32 refcnt;
723 u32 alarm_mask;
724 u64 new_mask;
725
726 do {
727 current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
728 /* atomic operations are strong so they do not need masks */
729
730 refcnt = ((u32) (current_mask >> 32)) + 1;
731 alarm_mask = (u32) (current_mask & ~alarm);
732 new_mask = ((u64) refcnt << 32) | alarm_mask;
733
734 } while (unlikely(current_mask !=
735 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
736 current_mask, new_mask)));
737}
738
1047static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) 739static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1048{ 740{
1049 struct nvgpu_clk_arb *arb = 741 struct nvgpu_clk_arb *arb =
@@ -1345,639 +1037,283 @@ exit_arb:
1345 ~EVENT(ALARM_GPU_LOST)); 1037 ~EVENT(ALARM_GPU_LOST));
1346} 1038}
1347 1039
1348static void nvgpu_clk_arb_queue_notification(struct gk20a *g, 1040int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1349 struct nvgpu_clk_notification_queue *queue, 1041{
1350 u32 alarm_mask) { 1042 struct nvgpu_clk_arb *arb;
1351 1043 u16 default_mhz;
1352 u32 queue_index; 1044 int err;
1353 u64 timestamp;
1354
1355 queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
1356 /* get current timestamp */
1357 timestamp = (u64) sched_clock();
1358
1359 queue->notifications[queue_index].timestamp = timestamp;
1360 queue->notifications[queue_index].notification = alarm_mask;
1361
1362}
1363
1364static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
1365 struct nvgpu_clk_arb_target *target,
1366 u32 alarm) {
1367
1368 struct nvgpu_clk_session *session = dev->session;
1369 struct nvgpu_clk_arb *arb = session->g->clk_arb;
1370 struct nvgpu_clk_notification *notification;
1371
1372 u32 queue_alarm_mask = 0;
1373 u32 enabled_mask = 0;
1374 u32 new_alarms_reported = 0;
1375 u32 poll_mask = 0;
1376 u32 tail, head;
1377 u32 queue_index;
1378 size_t size;
1379 int index; 1045 int index;
1046 struct nvgpu_clk_vf_table *table;
1380 1047
1381 enabled_mask = nvgpu_atomic_read(&dev->enabled_mask); 1048 gk20a_dbg_fn("");
1382 size = arb->notification_queue.size;
1383
1384 /* queue global arbiter notifications in buffer */
1385 do {
1386 tail = nvgpu_atomic_read(&arb->notification_queue.tail);
1387 /* copy items to the queue */
1388 queue_index = nvgpu_atomic_read(&dev->queue.tail);
1389 head = dev->arb_queue_head;
1390 head = (tail - head) < arb->notification_queue.size ?
1391 head : tail - arb->notification_queue.size;
1392
1393 for (index = head; _WRAPGTEQ(tail, index); index++) {
1394 u32 alarm_detected;
1395
1396 notification = &arb->notification_queue.
1397 notifications[(index+1) % size];
1398 alarm_detected =
1399 NV_ACCESS_ONCE(notification->notification);
1400
1401 if (!(enabled_mask & alarm_detected))
1402 continue;
1403
1404 queue_index++;
1405 dev->queue.notifications[
1406 queue_index % dev->queue.size].timestamp =
1407 NV_ACCESS_ONCE(notification->timestamp);
1408
1409 dev->queue.notifications[
1410 queue_index % dev->queue.size].notification =
1411 alarm_detected;
1412
1413 queue_alarm_mask |= alarm_detected;
1414 }
1415 } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
1416 (int)tail));
1417
1418 nvgpu_atomic_set(&dev->queue.tail, queue_index);
1419 /* update the last notification we processed from global queue */
1420
1421 dev->arb_queue_head = tail;
1422
1423 /* Check if current session targets are met */
1424 if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
1425 if ((target->gpc2clk < session->target->gpc2clk)
1426 || (target->mclk < session->target->mclk)) {
1427
1428 poll_mask |= (POLLIN | POLLPRI);
1429 nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
1430 EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
1431 }
1432 }
1433 1049
1434 /* Check if there is a new VF update */ 1050 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1435 if (queue_alarm_mask & EVENT(VF_UPDATE)) 1051 return 0;
1436 poll_mask |= (POLLIN | POLLRDNORM);
1437 1052
1438 /* Notify sticky alarms that were not reported on previous run*/ 1053 arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
1439 new_alarms_reported = (queue_alarm_mask | 1054 if (!arb)
1440 (alarm & ~dev->alarms_reported & queue_alarm_mask)); 1055 return -ENOMEM;
1441 1056
1442 if (new_alarms_reported & ~LOCAL_ALARM_MASK) { 1057 err = nvgpu_mutex_init(&arb->pstate_lock);
1443 /* check that we are not re-reporting */ 1058 if (err)
1444 if (new_alarms_reported & EVENT(ALARM_GPU_LOST)) 1059 goto mutex_fail;
1445 poll_mask |= POLLHUP; 1060 nvgpu_spinlock_init(&arb->sessions_lock);
1061 nvgpu_spinlock_init(&arb->users_lock);
1446 1062
1447 poll_mask |= (POLLIN | POLLPRI); 1063 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1448 /* On next run do not report global alarms that were already 1064 if (!arb->mclk_f_points) {
1449 * reported, but report SHUTDOWN always 1065 err = -ENOMEM;
1450 */ 1066 goto init_fail;
1451 dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
1452 ~EVENT(ALARM_GPU_LOST);
1453 } 1067 }
1454 1068
1455 if (poll_mask) { 1069 arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1456 nvgpu_atomic_set(&dev->poll_mask, poll_mask); 1070 if (!arb->gpc2clk_f_points) {
1457 nvgpu_cond_broadcast_interruptible(&dev->readout_wq); 1071 err = -ENOMEM;
1072 goto init_fail;
1458 } 1073 }
1459 1074
1460 return new_alarms_reported; 1075 for (index = 0; index < 2; index++) {
1461} 1076 table = &arb->vf_table_pool[index];
1462 1077 table->gpc2clk_num_points = MAX_F_POINTS;
1463static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, 1078 table->mclk_num_points = MAX_F_POINTS;
1464 struct nvgpu_gpu_set_event_filter_args *args)
1465{
1466 u32 mask;
1467
1468 gk20a_dbg(gpu_dbg_fn, "");
1469
1470 if (args->flags)
1471 return -EINVAL;
1472
1473 if (args->size != 1)
1474 return -EINVAL;
1475
1476 if (copy_from_user(&mask, (void __user *) args->buffer,
1477 args->size * sizeof(u32)))
1478 return -EFAULT;
1479
1480 /* update alarm mask */
1481 nvgpu_atomic_set(&dev->enabled_mask, mask);
1482
1483 return 0;
1484}
1485
1486static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
1487 unsigned long arg)
1488{
1489 struct nvgpu_clk_dev *dev = filp->private_data;
1490 struct gk20a *g = dev->session->g;
1491 u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
1492 int err = 0;
1493
1494 gk20a_dbg(gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
1495
1496 if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
1497 || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
1498 return -EINVAL;
1499 1079
1500 BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); 1080 table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1081 sizeof(struct nvgpu_clk_vf_point));
1082 if (!table->gpc2clk_points) {
1083 err = -ENOMEM;
1084 goto init_fail;
1085 }
1501 1086
1502 memset(buf, 0, sizeof(buf));
1503 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1504 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
1505 return -EFAULT;
1506 }
1507 1087
1508 switch (cmd) { 1088 table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1509 case NVGPU_EVENT_IOCTL_SET_FILTER: 1089 sizeof(struct nvgpu_clk_vf_point));
1510 err = nvgpu_clk_arb_set_event_filter(dev, 1090 if (!table->mclk_points) {
1511 (struct nvgpu_gpu_set_event_filter_args *)buf); 1091 err = -ENOMEM;
1512 break; 1092 goto init_fail;
1513 default: 1093 }
1514 nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
1515 err = -ENOTTY;
1516 } 1094 }
1517 1095
1518 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) 1096 g->clk_arb = arb;
1519 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); 1097 arb->g = g;
1520
1521 return err;
1522}
1523
1524int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
1525 struct nvgpu_clk_session *session, int request_fd)
1526{
1527 struct nvgpu_clk_arb *arb = g->clk_arb;
1528 struct nvgpu_clk_dev *dev;
1529 struct fd fd;
1530 int err = 0;
1531
1532 gk20a_dbg_fn("");
1533
1534 fd = fdget(request_fd);
1535 if (!fd.file)
1536 return -EINVAL;
1537 1098
1538 if (fd.file->f_op != &completion_dev_ops) { 1099 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1100 CTRL_CLK_DOMAIN_MCLK, &default_mhz);
1101 if (err < 0) {
1539 err = -EINVAL; 1102 err = -EINVAL;
1540 goto fdput_fd; 1103 goto init_fail;
1541 } 1104 }
1542 1105
1543 dev = (struct nvgpu_clk_dev *) fd.file->private_data; 1106 arb->mclk_default_mhz = default_mhz;
1544 1107
1545 if (!dev || dev->session != session) { 1108 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1109 CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
1110 if (err < 0) {
1546 err = -EINVAL; 1111 err = -EINVAL;
1547 goto fdput_fd; 1112 goto init_fail;
1548 } 1113 }
1549 nvgpu_ref_get(&dev->refcount);
1550 llist_add(&dev->node, &session->targets);
1551 if (arb->update_work_queue)
1552 queue_work(arb->update_work_queue, &arb->update_fn_work);
1553 1114
1554fdput_fd: 1115 arb->gpc2clk_default_mhz = default_mhz;
1555 fdput(fd);
1556 return err;
1557}
1558 1116
1559static inline u32 __pending_event(struct nvgpu_clk_dev *dev, 1117 arb->actual = &arb->actual_pool[0];
1560 struct nvgpu_gpu_event_info *info) {
1561 1118
1562 u32 tail, head; 1119 nvgpu_atomic_set(&arb->req_nr, 0);
1563 u32 events = 0;
1564 struct nvgpu_clk_notification *p_notif;
1565 1120
1566 tail = nvgpu_atomic_read(&dev->queue.tail); 1121 nvgpu_atomic64_set(&arb->alarm_mask, 0);
1567 head = nvgpu_atomic_read(&dev->queue.head); 1122 err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
1123 DEFAULT_EVENT_NUMBER);
1124 if (err < 0)
1125 goto init_fail;
1568 1126
1569 head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; 1127 INIT_LIST_HEAD_RCU(&arb->users);
1128 INIT_LIST_HEAD_RCU(&arb->sessions);
1129 init_llist_head(&arb->requests);
1570 1130
1571 if (_WRAPGTEQ(tail, head) && info) { 1131 nvgpu_cond_init(&arb->request_wq);
1572 head++; 1132 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1573 p_notif = &dev->queue.notifications[head % dev->queue.size]; 1133 "vf_table_update");
1574 events |= p_notif->notification; 1134 arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1575 info->event_id = ffs(events) - 1; 1135 "arbiter_update");
1576 info->timestamp = p_notif->timestamp;
1577 nvgpu_atomic_set(&dev->queue.head, head);
1578 }
1579 1136
1580 return events;
1581}
1582 1137
1583static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, 1138 INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
1584 size_t size, loff_t *off)
1585{
1586 struct nvgpu_clk_dev *dev = filp->private_data;
1587 struct nvgpu_gpu_event_info info;
1588 ssize_t err;
1589 1139
1590 gk20a_dbg_fn("filp=%p, buf=%p, size=%zu", filp, buf, size); 1140 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
1591 1141
1592 if ((size - *off) < sizeof(info)) 1142#ifdef CONFIG_DEBUG_FS
1593 return 0; 1143 arb->debug = &arb->debug_pool[0];
1594 1144
1595 memset(&info, 0, sizeof(info)); 1145 if (!arb->debugfs_set) {
1596 /* Get the oldest event from the queue */ 1146 if (nvgpu_clk_arb_debugfs_init(g))
1597 while (!__pending_event(dev, &info)) { 1147 arb->debugfs_set = true;
1598 if (filp->f_flags & O_NONBLOCK)
1599 return -EAGAIN;
1600 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
1601 __pending_event(dev, &info), 0);
1602 if (err)
1603 return err;
1604 if (info.timestamp)
1605 break;
1606 } 1148 }
1149#endif
1150 err = clk_vf_point_cache(g);
1151 if (err < 0)
1152 goto init_fail;
1607 1153
1608 if (copy_to_user(buf + *off, &info, sizeof(info))) 1154 err = nvgpu_clk_arb_update_vf_table(arb);
1609 return -EFAULT; 1155 if (err < 0)
1610 1156 goto init_fail;
1611 return sizeof(info); 1157 do {
1612} 1158 /* Check that first run is completed */
1159 nvgpu_smp_mb();
1160 NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
1161 nvgpu_atomic_read(&arb->req_nr), 0);
1162 } while (!nvgpu_atomic_read(&arb->req_nr));
1613 1163
1614static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
1615{
1616 struct nvgpu_clk_dev *dev = filp->private_data;
1617 1164
1618 gk20a_dbg_fn(""); 1165 return arb->status;
1619 1166
1620 poll_wait(filp, &dev->readout_wq.wq, wait); 1167init_fail:
1621 return nvgpu_atomic_xchg(&dev->poll_mask, 0); 1168 nvgpu_kfree(g, arb->gpc2clk_f_points);
1622} 1169 nvgpu_kfree(g, arb->mclk_f_points);
1623 1170
1624static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, 1171 for (index = 0; index < 2; index++) {
1625 struct file *filp) 1172 nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
1626{ 1173 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1627 struct nvgpu_clk_dev *dev = filp->private_data; 1174 }
1628 struct nvgpu_clk_session *session = dev->session;
1629 1175
1176 nvgpu_mutex_destroy(&arb->pstate_lock);
1630 1177
1631 gk20a_dbg_fn(""); 1178mutex_fail:
1179 nvgpu_kfree(g, arb);
1632 1180
1633 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); 1181 return err;
1634 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1635 return 0;
1636} 1182}
1637 1183
1638static int nvgpu_clk_arb_release_event_dev(struct inode *inode, 1184void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
1639 struct file *filp)
1640{ 1185{
1641 struct nvgpu_clk_dev *dev = filp->private_data; 1186 nvgpu_clk_arb_schedule_alarm(g,
1642 struct nvgpu_clk_session *session = dev->session; 1187 (0x1UL << NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
1643 struct nvgpu_clk_arb *arb;
1644
1645 arb = session->g->clk_arb;
1646
1647 gk20a_dbg_fn("");
1648
1649 if (arb) {
1650 nvgpu_spinlock_acquire(&arb->users_lock);
1651 list_del_rcu(&dev->link);
1652 nvgpu_spinlock_release(&arb->users_lock);
1653 nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
1654 }
1655
1656 synchronize_rcu();
1657 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1658 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1659
1660 return 0;
1661} 1188}
1662 1189
1663int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, 1190void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
1664 int request_fd, u32 api_domain, u16 target_mhz)
1665{ 1191{
1666 struct nvgpu_clk_dev *dev; 1192 struct nvgpu_clk_arb *arb = g->clk_arb;
1667 struct fd fd;
1668 int err = 0;
1669
1670 gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
1671
1672 fd = fdget(request_fd);
1673 if (!fd.file)
1674 return -EINVAL;
1675
1676 if (fd.file->f_op != &completion_dev_ops) {
1677 err = -EINVAL;
1678 goto fdput_fd;
1679 }
1680
1681 dev = fd.file->private_data;
1682 if (!dev || dev->session != session) {
1683 err = -EINVAL;
1684 goto fdput_fd;
1685 }
1686
1687 switch (api_domain) {
1688 case NVGPU_GPU_CLK_DOMAIN_MCLK:
1689 dev->mclk_target_mhz = target_mhz;
1690 break;
1691
1692 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
1693 dev->gpc2clk_target_mhz = target_mhz * 2ULL;
1694 break;
1695
1696 default:
1697 err = -EINVAL;
1698 }
1699 1193
1700fdput_fd: 1194 nvgpu_clk_arb_set_global_alarm(g, alarm);
1701 fdput(fd); 1195 if (arb->update_work_queue)
1702 return err; 1196 queue_work(arb->update_work_queue, &arb->update_fn_work);
1703} 1197}
1704 1198
1705int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, 1199void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
1706 u32 api_domain, u16 *freq_mhz)
1707{ 1200{
1708 int err = 0; 1201 struct nvgpu_clk_arb *arb = g->clk_arb;
1709 struct nvgpu_clk_arb_target *target; 1202 int index;
1710 1203
1711 do { 1204 if (arb) {
1712 target = NV_ACCESS_ONCE(session->target); 1205 cancel_work_sync(&arb->vf_table_fn_work);
1713 /* no reordering of this pointer */ 1206 destroy_workqueue(arb->vf_table_work_queue);
1714 nvgpu_smp_rmb(); 1207 arb->vf_table_work_queue = NULL;
1715 1208
1716 switch (api_domain) { 1209 cancel_work_sync(&arb->update_fn_work);
1717 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1210 destroy_workqueue(arb->update_work_queue);
1718 *freq_mhz = target->mclk; 1211 arb->update_work_queue = NULL;
1719 break;
1720 1212
1721 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1213 nvgpu_kfree(g, arb->gpc2clk_f_points);
1722 *freq_mhz = target->gpc2clk / 2ULL; 1214 nvgpu_kfree(g, arb->mclk_f_points);
1723 break;
1724 1215
1725 default: 1216 for (index = 0; index < 2; index++) {
1726 *freq_mhz = 0; 1217 nvgpu_kfree(g,
1727 err = -EINVAL; 1218 arb->vf_table_pool[index].gpc2clk_points);
1219 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1728 } 1220 }
1729 } while (target != NV_ACCESS_ONCE(session->target)); 1221 nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
1730 return err; 1222 nvgpu_kfree(g, g->clk_arb);
1223 g->clk_arb = NULL;
1224 }
1731} 1225}
1732 1226
1733int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, 1227int nvgpu_clk_arb_init_session(struct gk20a *g,
1734 u32 api_domain, u16 *freq_mhz) 1228 struct nvgpu_clk_session **_session)
1735{ 1229{
1736 struct nvgpu_clk_arb *arb = g->clk_arb; 1230 struct nvgpu_clk_arb *arb = g->clk_arb;
1737 int err = 0; 1231 struct nvgpu_clk_session *session = *(_session);
1738 struct nvgpu_clk_arb_target *actual;
1739 1232
1740 do { 1233 gk20a_dbg_fn("");
1741 actual = NV_ACCESS_ONCE(arb->actual);
1742 /* no reordering of this pointer */
1743 nvgpu_smp_rmb();
1744 1234
1745 switch (api_domain) { 1235 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1746 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1236 return 0;
1747 *freq_mhz = actual->mclk;
1748 break;
1749 1237
1750 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1238 session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
1751 *freq_mhz = actual->gpc2clk / 2ULL; 1239 if (!session)
1752 break; 1240 return -ENOMEM;
1241 session->g = g;
1753 1242
1754 default: 1243 nvgpu_ref_init(&session->refcount);
1755 *freq_mhz = 0;
1756 err = -EINVAL;
1757 }
1758 } while (actual != NV_ACCESS_ONCE(arb->actual));
1759 return err;
1760}
1761 1244
1762int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, 1245 session->zombie = false;
1763 u32 api_domain, u16 *freq_mhz) 1246 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
1764{ 1247 /* make sure that the initialization of the pool is visible
1765 switch (api_domain) { 1248 * before the update
1766 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1249 */
1767 *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) / 1250 nvgpu_smp_wmb();
1768 1000000ULL; 1251 session->target = &session->target_pool[0];
1769 return 0;
1770 1252
1771 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1253 init_llist_head(&session->targets);
1772 *freq_mhz = g->ops.clk.measure_freq(g,
1773 CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL;
1774 return 0;
1775 1254
1776 default: 1255 nvgpu_spinlock_acquire(&arb->sessions_lock);
1777 return -EINVAL; 1256 list_add_tail_rcu(&session->link, &arb->sessions);
1778 } 1257 nvgpu_spinlock_release(&arb->sessions_lock);
1779}
1780 1258
1781int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, 1259 *_session = session;
1782 u16 *min_mhz, u16 *max_mhz)
1783{
1784 int ret;
1785
1786 switch (api_domain) {
1787 case NVGPU_GPU_CLK_DOMAIN_MCLK:
1788 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
1789 CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz);
1790 return ret;
1791
1792 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
1793 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
1794 CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz);
1795 if (!ret) {
1796 *min_mhz /= 2;
1797 *max_mhz /= 2;
1798 }
1799 return ret;
1800 1260
1801 default: 1261 return 0;
1802 return -EINVAL;
1803 }
1804} 1262}
1805 1263
1806u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) 1264void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
1807{ 1265{
1808 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); 1266 struct nvgpu_clk_dev *dev = container_of(refcount,
1809 u32 api_domains = 0; 1267 struct nvgpu_clk_dev, refcount);
1810 1268 struct nvgpu_clk_session *session = dev->session;
1811 if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
1812 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
1813
1814 if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
1815 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
1816 1269
1817 return api_domains; 1270 nvgpu_kfree(session->g, dev);
1818} 1271}
1819 1272
1820bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain) 1273void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
1821{ 1274{
1822 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); 1275 struct nvgpu_clk_session *session = container_of(refcount,
1823 1276 struct nvgpu_clk_session, refcount);
1824 switch (api_domain) { 1277 struct nvgpu_clk_arb *arb = session->g->clk_arb;
1825 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1278 struct gk20a *g = session->g;
1826 return ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0); 1279 struct nvgpu_clk_dev *dev, *tmp;
1280 struct llist_node *head;
1827 1281
1828 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1282 gk20a_dbg_fn("");
1829 return ((clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0);
1830 1283
1831 default: 1284 if (arb) {
1832 return false; 1285 nvgpu_spinlock_acquire(&arb->sessions_lock);
1286 list_del_rcu(&session->link);
1287 nvgpu_spinlock_release(&arb->sessions_lock);
1833 } 1288 }
1834}
1835 1289
1836int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, 1290 head = llist_del_all(&session->targets);
1837 u32 api_domain, u32 *max_points, u16 *fpoints) 1291 llist_for_each_entry_safe(dev, tmp, head, node) {
1838{ 1292 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1839 int err;
1840 u32 i;
1841
1842 switch (api_domain) {
1843 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
1844 err = clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPC2CLK,
1845 max_points, fpoints);
1846 if (err || !fpoints)
1847 return err;
1848 for (i = 0; i < *max_points; i++)
1849 fpoints[i] /= 2;
1850 return 0;
1851 case NVGPU_GPU_CLK_DOMAIN_MCLK:
1852 return clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_MCLK,
1853 max_points, fpoints);
1854 default:
1855 return -EINVAL;
1856 } 1293 }
1294 synchronize_rcu();
1295 nvgpu_kfree(g, session);
1857} 1296}
1858 1297
1859static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, 1298void nvgpu_clk_arb_release_session(struct gk20a *g,
1860 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, 1299 struct nvgpu_clk_session *session)
1861 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
1862{ 1300{
1863 u16 gpc2clk_target, mclk_target; 1301 struct nvgpu_clk_arb *arb = g->clk_arb;
1864 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
1865 u32 mclk_voltuv, mclk_voltuv_sram;
1866 u32 pstate = VF_POINT_INVALID_PSTATE;
1867 struct nvgpu_clk_vf_table *table;
1868 u32 index, index_mclk;
1869 struct nvgpu_clk_vf_point *mclk_vf = NULL;
1870
1871 do {
1872 gpc2clk_target = *gpc2clk;
1873 mclk_target = *mclk;
1874 gpc2clk_voltuv = 0;
1875 gpc2clk_voltuv_sram = 0;
1876 mclk_voltuv = 0;
1877 mclk_voltuv_sram = 0;
1878
1879 table = NV_ACCESS_ONCE(arb->current_vf_table);
1880 /* pointer to table can be updated by callback */
1881 nvgpu_smp_rmb();
1882
1883 if (!table)
1884 continue;
1885 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
1886 nvgpu_err(arb->g, "found empty table");
1887 goto find_exit;
1888 }
1889 /* First we check MCLK to find out which PSTATE we are
1890 * are requesting, and from there try to find the minimum
1891 * GPC2CLK on the same PSTATE that satisfies the request.
1892 * If no GPC2CLK can be found, then we need to up the PSTATE
1893 */
1894
1895recalculate_vf_point:
1896 for (index = 0; index < table->mclk_num_points; index++) {
1897 if (table->mclk_points[index].mem_mhz >= mclk_target) {
1898 mclk_vf = &table->mclk_points[index];
1899 break;
1900 }
1901 }
1902 if (index == table->mclk_num_points) {
1903 mclk_vf = &table->mclk_points[index-1];
1904 index = table->mclk_num_points - 1;
1905 }
1906 index_mclk = index;
1907
1908 /* round up the freq requests */
1909 for (index = 0; index < table->gpc2clk_num_points; index++) {
1910 pstate = VF_POINT_COMMON_PSTATE(
1911 &table->gpc2clk_points[index], mclk_vf);
1912
1913 if ((table->gpc2clk_points[index].gpc_mhz >=
1914 gpc2clk_target) &&
1915 (pstate != VF_POINT_INVALID_PSTATE)) {
1916 gpc2clk_target =
1917 table->gpc2clk_points[index].gpc_mhz;
1918 *sys2clk =
1919 table->gpc2clk_points[index].sys_mhz;
1920 *xbar2clk =
1921 table->gpc2clk_points[index].xbar_mhz;
1922
1923 gpc2clk_voltuv =
1924 table->gpc2clk_points[index].uvolt;
1925 gpc2clk_voltuv_sram =
1926 table->gpc2clk_points[index].uvolt_sram;
1927 break;
1928 }
1929 }
1930
1931 if (index == table->gpc2clk_num_points) {
1932 pstate = VF_POINT_COMMON_PSTATE(
1933 &table->gpc2clk_points[index-1], mclk_vf);
1934 if (pstate != VF_POINT_INVALID_PSTATE) {
1935 gpc2clk_target =
1936 table->gpc2clk_points[index-1].gpc_mhz;
1937 *sys2clk =
1938 table->gpc2clk_points[index-1].sys_mhz;
1939 *xbar2clk =
1940 table->gpc2clk_points[index-1].xbar_mhz;
1941 1302
1942 gpc2clk_voltuv = 1303 gk20a_dbg_fn("");
1943 table->gpc2clk_points[index-1].uvolt;
1944 gpc2clk_voltuv_sram =
1945 table->gpc2clk_points[index-1].
1946 uvolt_sram;
1947 } else if (index_mclk >= table->mclk_num_points - 1) {
1948 /* There is no available combination of MCLK
1949 * and GPC2CLK, we need to fail this
1950 */
1951 gpc2clk_target = 0;
1952 mclk_target = 0;
1953 pstate = VF_POINT_INVALID_PSTATE;
1954 goto find_exit;
1955 } else {
1956 /* recalculate with higher PSTATE */
1957 gpc2clk_target = *gpc2clk;
1958 mclk_target = table->mclk_points[index_mclk+1].
1959 mem_mhz;
1960 goto recalculate_vf_point;
1961 }
1962 }
1963 1304
1964 mclk_target = mclk_vf->mem_mhz; 1305 session->zombie = true;
1965 mclk_voltuv = mclk_vf->uvolt; 1306 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1966 mclk_voltuv_sram = mclk_vf->uvolt_sram; 1307 if (arb && arb->update_work_queue)
1308 queue_work(arb->update_work_queue, &arb->update_fn_work);
1309}
1967 1310
1968 } while (!table || 1311void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1969 (NV_ACCESS_ONCE(arb->current_vf_table) != table)); 1312{
1313 struct nvgpu_clk_arb *arb = g->clk_arb;
1970 1314
1971find_exit: 1315 if (arb->vf_table_work_queue)
1972 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; 1316 queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
1973 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
1974 gpc2clk_voltuv_sram : mclk_voltuv_sram;
1975 /* noise unaware vmin */
1976 *nuvmin = mclk_voltuv;
1977 *nuvmin_sram = mclk_voltuv_sram;
1978 *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
1979 *mclk = mclk_target;
1980 return pstate;
1981} 1317}
1982 1318
1983/* This function is inherently unsafe to call while arbiter is running 1319/* This function is inherently unsafe to call while arbiter is running
@@ -1988,60 +1324,6 @@ int nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
1988 return NV_ACCESS_ONCE(g->clk_arb->actual->pstate); 1324 return NV_ACCESS_ONCE(g->clk_arb->actual->pstate);
1989} 1325}
1990 1326
1991static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1992 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
1993 u32 voltuv_sram)
1994{
1995 struct set_fll_clk fllclk;
1996 struct nvgpu_clk_arb *arb = g->clk_arb;
1997 int status;
1998
1999 fllclk.gpc2clkmhz = gpc2clk_target;
2000 fllclk.sys2clkmhz = sys2clk_target;
2001 fllclk.xbar2clkmhz = xbar2clk_target;
2002
2003 fllclk.voltuv = voltuv;
2004
2005 /* if voltage ascends we do:
2006 * (1) FLL change
2007 * (2) Voltage change
2008 * (3) MCLK change
2009 * If it goes down
2010 * (1) MCLK change
2011 * (2) Voltage change
2012 * (3) FLL change
2013 */
2014
2015 /* descending */
2016 if (voltuv < arb->voltuv_actual) {
2017 status = g->ops.clk.mclk_change(g, mclk_target);
2018 if (status < 0)
2019 return status;
2020
2021 status = volt_set_voltage(g, voltuv, voltuv_sram);
2022 if (status < 0)
2023 return status;
2024
2025 status = clk_set_fll_clks(g, &fllclk);
2026 if (status < 0)
2027 return status;
2028 } else {
2029 status = clk_set_fll_clks(g, &fllclk);
2030 if (status < 0)
2031 return status;
2032
2033 status = volt_set_voltage(g, voltuv, voltuv_sram);
2034 if (status < 0)
2035 return status;
2036
2037 status = g->ops.clk.mclk_change(g, mclk_target);
2038 if (status < 0)
2039 return status;
2040 }
2041
2042 return 0;
2043}
2044
2045void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock) 1327void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
2046{ 1328{
2047 struct nvgpu_clk_arb *arb = g->clk_arb; 1329 struct nvgpu_clk_arb *arb = g->clk_arb;
@@ -2051,71 +1333,3 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
2051 else 1333 else
2052 nvgpu_mutex_release(&arb->pstate_lock); 1334 nvgpu_mutex_release(&arb->pstate_lock);
2053} 1335}
2054
2055#ifdef CONFIG_DEBUG_FS
2056static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
2057{
2058 struct gk20a *g = s->private;
2059 struct nvgpu_clk_arb *arb = g->clk_arb;
2060 struct nvgpu_clk_arb_debug *debug;
2061
2062 u64 num;
2063 s64 tmp, avg, std, max, min;
2064
2065 debug = NV_ACCESS_ONCE(arb->debug);
2066 /* Make copy of structure and ensure no reordering */
2067 nvgpu_smp_rmb();
2068 if (!debug)
2069 return -EINVAL;
2070
2071 std = debug->switch_std;
2072 avg = debug->switch_avg;
2073 max = debug->switch_max;
2074 min = debug->switch_min;
2075 num = debug->switch_num;
2076
2077 tmp = std;
2078 do_div(tmp, num);
2079 seq_printf(s, "Number of transitions: %lld\n",
2080 num);
2081 seq_printf(s, "max / min : %lld / %lld usec\n",
2082 max, min);
2083 seq_printf(s, "avg / std : %lld / %ld usec\n",
2084 avg, int_sqrt(tmp));
2085
2086 return 0;
2087}
2088
2089static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
2090{
2091 return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
2092}
2093
2094static const struct file_operations nvgpu_clk_arb_stats_fops = {
2095 .open = nvgpu_clk_arb_stats_open,
2096 .read = seq_read,
2097 .llseek = seq_lseek,
2098 .release = single_release,
2099};
2100
2101
2102static int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
2103{
2104 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
2105 struct dentry *gpu_root = l->debugfs;
2106 struct dentry *d;
2107
2108 gk20a_dbg(gpu_dbg_info, "g=%p", g);
2109
2110 d = debugfs_create_file(
2111 "arb_stats",
2112 S_IRUGO,
2113 gpu_root,
2114 g,
2115 &nvgpu_clk_arb_stats_fops);
2116 if (!d)
2117 return -ENOMEM;
2118
2119 return 0;
2120}
2121#endif
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
new file mode 100644
index 00000000..b66876da
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
@@ -0,0 +1,120 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_CLK_ARB_LINUX_H__
18#define __NVGPU_CLK_ARB_LINUX_H__
19
20#include <nvgpu/types.h>
21#include <nvgpu/bitops.h>
22#include <nvgpu/lock.h>
23#include <nvgpu/kmem.h>
24#include <nvgpu/atomic.h>
25#include <nvgpu/bug.h>
26#include <nvgpu/kref.h>
27#include <nvgpu/log.h>
28#include <nvgpu/barrier.h>
29#include <nvgpu/cond.h>
30
31#include "gk20a/gk20a.h"
32#include "clk/clk.h"
33#include "pstate/pstate.h"
34#include "lpwr/lpwr.h"
35#include "volt/volt.h"
36
37/*
38 * The defines here should finally move to clk_arb.h, once these are
39 * refactored to be free of Linux fields.
40 */
41struct nvgpu_clk_arb {
42 struct nvgpu_spinlock sessions_lock;
43 struct nvgpu_spinlock users_lock;
44
45 struct nvgpu_mutex pstate_lock;
46 struct list_head users;
47 struct list_head sessions;
48 struct llist_head requests;
49
50 struct gk20a *g;
51 int status;
52
53 struct nvgpu_clk_arb_target actual_pool[2];
54 struct nvgpu_clk_arb_target *actual;
55
56 u16 gpc2clk_default_mhz;
57 u16 mclk_default_mhz;
58 u32 voltuv_actual;
59
60 u16 gpc2clk_min, gpc2clk_max;
61 u16 mclk_min, mclk_max;
62
63 struct work_struct update_fn_work;
64 struct workqueue_struct *update_work_queue;
65 struct work_struct vf_table_fn_work;
66 struct workqueue_struct *vf_table_work_queue;
67
68 struct nvgpu_cond request_wq;
69
70 struct nvgpu_clk_vf_table *current_vf_table;
71 struct nvgpu_clk_vf_table vf_table_pool[2];
72 u32 vf_table_index;
73
74 u16 *mclk_f_points;
75 nvgpu_atomic_t req_nr;
76
77 u32 mclk_f_numpoints;
78 u16 *gpc2clk_f_points;
79 u32 gpc2clk_f_numpoints;
80
81 nvgpu_atomic64_t alarm_mask;
82 struct nvgpu_clk_notification_queue notification_queue;
83
84#ifdef CONFIG_DEBUG_FS
85 struct nvgpu_clk_arb_debug debug_pool[2];
86 struct nvgpu_clk_arb_debug *debug;
87 bool debugfs_set;
88#endif
89};
90
91struct nvgpu_clk_dev {
92 struct nvgpu_clk_session *session;
93 union {
94 struct list_head link;
95 struct llist_node node;
96 };
97 struct nvgpu_cond readout_wq;
98 nvgpu_atomic_t poll_mask;
99 u16 gpc2clk_target_mhz;
100 u16 mclk_target_mhz;
101 u32 alarms_reported;
102 nvgpu_atomic_t enabled_mask;
103 struct nvgpu_clk_notification_queue queue;
104 u32 arb_queue_head;
105 struct nvgpu_ref refcount;
106};
107
108struct nvgpu_clk_session {
109 bool zombie;
110 struct gk20a *g;
111 struct nvgpu_ref refcount;
112 struct list_head link;
113 struct llist_head targets;
114
115 struct nvgpu_clk_arb_target target_pool[2];
116 struct nvgpu_clk_arb_target *target;
117};
118
119#endif /* __NVGPU_CLK_ARB_LINUX_H__ */
120
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
new file mode 100644
index 00000000..27afe777
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
@@ -0,0 +1,641 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/cdev.h>
18#include <linux/file.h>
19#include <linux/anon_inodes.h>
20#include <linux/rculist.h>
21#include <linux/llist.h>
22#include <linux/uaccess.h>
23#include <linux/poll.h>
24#ifdef CONFIG_DEBUG_FS
25#include <linux/debugfs.h>
26#endif
27#include <uapi/linux/nvgpu.h>
28
29#include <nvgpu/bitops.h>
30#include <nvgpu/lock.h>
31#include <nvgpu/kmem.h>
32#include <nvgpu/atomic.h>
33#include <nvgpu/bug.h>
34#include <nvgpu/kref.h>
35#include <nvgpu/log.h>
36#include <nvgpu/barrier.h>
37#include <nvgpu/cond.h>
38#include <nvgpu/clk_arb.h>
39
40#include "gk20a/gk20a.h"
41#include "clk/clk.h"
42#include "clk_arb_linux.h"
43#include "pstate/pstate.h"
44#include "lpwr/lpwr.h"
45#include "volt/volt.h"
46
47#ifdef CONFIG_DEBUG_FS
48#include "common/linux/os_linux.h"
49#endif
50
51static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
52 struct file *filp)
53{
54 struct nvgpu_clk_dev *dev = filp->private_data;
55 struct nvgpu_clk_session *session = dev->session;
56
57
58 gk20a_dbg_fn("");
59
60 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
61 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
62 return 0;
63}
64
65static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
66{
67 struct nvgpu_clk_dev *dev = filp->private_data;
68
69 gk20a_dbg_fn("");
70
71 poll_wait(filp, &dev->readout_wq.wq, wait);
72 return nvgpu_atomic_xchg(&dev->poll_mask, 0);
73}
74
75static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
76 struct file *filp)
77{
78 struct nvgpu_clk_dev *dev = filp->private_data;
79 struct nvgpu_clk_session *session = dev->session;
80 struct nvgpu_clk_arb *arb;
81
82 arb = session->g->clk_arb;
83
84 gk20a_dbg_fn("");
85
86 if (arb) {
87 nvgpu_spinlock_acquire(&arb->users_lock);
88 list_del_rcu(&dev->link);
89 nvgpu_spinlock_release(&arb->users_lock);
90 nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
91 }
92
93 synchronize_rcu();
94 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
95 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
96
97 return 0;
98}
99
100static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
101 struct nvgpu_gpu_event_info *info) {
102
103 u32 tail, head;
104 u32 events = 0;
105 struct nvgpu_clk_notification *p_notif;
106
107 tail = nvgpu_atomic_read(&dev->queue.tail);
108 head = nvgpu_atomic_read(&dev->queue.head);
109
110 head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
111
112 if (_WRAPGTEQ(tail, head) && info) {
113 head++;
114 p_notif = &dev->queue.notifications[head % dev->queue.size];
115 events |= p_notif->notification;
116 info->event_id = ffs(events) - 1;
117 info->timestamp = p_notif->timestamp;
118 nvgpu_atomic_set(&dev->queue.head, head);
119 }
120
121 return events;
122}
123
124static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
125 size_t size, loff_t *off)
126{
127 struct nvgpu_clk_dev *dev = filp->private_data;
128 struct nvgpu_gpu_event_info info;
129 ssize_t err;
130
131 gk20a_dbg_fn("filp=%p, buf=%p, size=%zu", filp, buf, size);
132
133 if ((size - *off) < sizeof(info))
134 return 0;
135
136 memset(&info, 0, sizeof(info));
137 /* Get the oldest event from the queue */
138 while (!__pending_event(dev, &info)) {
139 if (filp->f_flags & O_NONBLOCK)
140 return -EAGAIN;
141 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
142 __pending_event(dev, &info), 0);
143 if (err)
144 return err;
145 if (info.timestamp)
146 break;
147 }
148
149 if (copy_to_user(buf + *off, &info, sizeof(info)))
150 return -EFAULT;
151
152 return sizeof(info);
153}
154
155static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
156 struct nvgpu_gpu_set_event_filter_args *args)
157{
158 u32 mask;
159
160 gk20a_dbg(gpu_dbg_fn, "");
161
162 if (args->flags)
163 return -EINVAL;
164
165 if (args->size != 1)
166 return -EINVAL;
167
168 if (copy_from_user(&mask, (void __user *) args->buffer,
169 args->size * sizeof(u32)))
170 return -EFAULT;
171
172 /* update alarm mask */
173 nvgpu_atomic_set(&dev->enabled_mask, mask);
174
175 return 0;
176}
177
178static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
179 unsigned long arg)
180{
181 struct nvgpu_clk_dev *dev = filp->private_data;
182 struct gk20a *g = dev->session->g;
183 u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
184 int err = 0;
185
186 gk20a_dbg(gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
187
188 if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
189 || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
190 return -EINVAL;
191
192 BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE);
193
194 memset(buf, 0, sizeof(buf));
195 if (_IOC_DIR(cmd) & _IOC_WRITE) {
196 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
197 return -EFAULT;
198 }
199
200 switch (cmd) {
201 case NVGPU_EVENT_IOCTL_SET_FILTER:
202 err = nvgpu_clk_arb_set_event_filter(dev,
203 (struct nvgpu_gpu_set_event_filter_args *)buf);
204 break;
205 default:
206 nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
207 err = -ENOTTY;
208 }
209
210 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
211 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
212
213 return err;
214}
215
216static const struct file_operations completion_dev_ops = {
217 .owner = THIS_MODULE,
218 .release = nvgpu_clk_arb_release_completion_dev,
219 .poll = nvgpu_clk_arb_poll_dev,
220};
221
222static const struct file_operations event_dev_ops = {
223 .owner = THIS_MODULE,
224 .release = nvgpu_clk_arb_release_event_dev,
225 .poll = nvgpu_clk_arb_poll_dev,
226 .read = nvgpu_clk_arb_read_event_dev,
227#ifdef CONFIG_COMPAT
228 .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
229#endif
230 .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
231};
232
233static int nvgpu_clk_arb_install_fd(struct gk20a *g,
234 struct nvgpu_clk_session *session,
235 const struct file_operations *fops,
236 struct nvgpu_clk_dev **_dev)
237{
238 struct file *file;
239 int fd;
240 int err;
241 int status;
242 char name[64];
243 struct nvgpu_clk_dev *dev;
244
245 gk20a_dbg_fn("");
246
247 dev = nvgpu_kzalloc(g, sizeof(*dev));
248 if (!dev)
249 return -ENOMEM;
250
251 status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
252 DEFAULT_EVENT_NUMBER);
253 if (status < 0) {
254 err = status;
255 goto fail;
256 }
257
258 fd = get_unused_fd_flags(O_RDWR);
259 if (fd < 0) {
260 err = fd;
261 goto fail;
262 }
263
264 snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
265 file = anon_inode_getfile(name, fops, dev, O_RDWR);
266 if (IS_ERR(file)) {
267 err = PTR_ERR(file);
268 goto fail_fd;
269 }
270
271 fd_install(fd, file);
272
273 nvgpu_cond_init(&dev->readout_wq);
274
275 nvgpu_atomic_set(&dev->poll_mask, 0);
276
277 dev->session = session;
278 nvgpu_ref_init(&dev->refcount);
279
280 nvgpu_ref_get(&session->refcount);
281
282 *_dev = dev;
283
284 return fd;
285
286fail_fd:
287 put_unused_fd(fd);
288fail:
289 nvgpu_kfree(g, dev);
290
291 return err;
292}
293
294int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
295 struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
296{
297 struct nvgpu_clk_arb *arb = g->clk_arb;
298 struct nvgpu_clk_dev *dev;
299 int fd;
300
301 gk20a_dbg_fn("");
302
303 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
304 if (fd < 0)
305 return fd;
306
307 /* TODO: alarm mask needs to be set to default value to prevent
308 * failures of legacy tests. This will be removed when sanity is
309 * updated
310 */
311 if (alarm_mask)
312 nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
313 else
314 nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
315
316 dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
317
318 nvgpu_spinlock_acquire(&arb->users_lock);
319 list_add_tail_rcu(&dev->link, &arb->users);
320 nvgpu_spinlock_release(&arb->users_lock);
321
322 *event_fd = fd;
323
324 return 0;
325}
326
327int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
328 struct nvgpu_clk_session *session, int *request_fd)
329{
330 struct nvgpu_clk_dev *dev;
331 int fd;
332
333 gk20a_dbg_fn("");
334
335 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
336 if (fd < 0)
337 return fd;
338
339 *request_fd = fd;
340
341 return 0;
342}
343
344int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
345 struct nvgpu_clk_session *session, int request_fd)
346{
347 struct nvgpu_clk_arb *arb = g->clk_arb;
348 struct nvgpu_clk_dev *dev;
349 struct fd fd;
350 int err = 0;
351
352 gk20a_dbg_fn("");
353
354 fd = fdget(request_fd);
355 if (!fd.file)
356 return -EINVAL;
357
358 if (fd.file->f_op != &completion_dev_ops) {
359 err = -EINVAL;
360 goto fdput_fd;
361 }
362
363 dev = (struct nvgpu_clk_dev *) fd.file->private_data;
364
365 if (!dev || dev->session != session) {
366 err = -EINVAL;
367 goto fdput_fd;
368 }
369 nvgpu_ref_get(&dev->refcount);
370 llist_add(&dev->node, &session->targets);
371 if (arb->update_work_queue)
372 queue_work(arb->update_work_queue, &arb->update_fn_work);
373
374fdput_fd:
375 fdput(fd);
376 return err;
377}
378
379int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
380 int request_fd, u32 api_domain, u16 target_mhz)
381{
382 struct nvgpu_clk_dev *dev;
383 struct fd fd;
384 int err = 0;
385
386 gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
387
388 fd = fdget(request_fd);
389 if (!fd.file)
390 return -EINVAL;
391
392 if (fd.file->f_op != &completion_dev_ops) {
393 err = -EINVAL;
394 goto fdput_fd;
395 }
396
397 dev = fd.file->private_data;
398 if (!dev || dev->session != session) {
399 err = -EINVAL;
400 goto fdput_fd;
401 }
402
403 switch (api_domain) {
404 case NVGPU_GPU_CLK_DOMAIN_MCLK:
405 dev->mclk_target_mhz = target_mhz;
406 break;
407
408 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
409 dev->gpc2clk_target_mhz = target_mhz * 2ULL;
410 break;
411
412 default:
413 err = -EINVAL;
414 }
415
416fdput_fd:
417 fdput(fd);
418 return err;
419}
420
421int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
422 u32 api_domain, u16 *freq_mhz)
423{
424 int err = 0;
425 struct nvgpu_clk_arb_target *target;
426
427 do {
428 target = NV_ACCESS_ONCE(session->target);
429 /* no reordering of this pointer */
430 nvgpu_smp_rmb();
431
432 switch (api_domain) {
433 case NVGPU_GPU_CLK_DOMAIN_MCLK:
434 *freq_mhz = target->mclk;
435 break;
436
437 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
438 *freq_mhz = target->gpc2clk / 2ULL;
439 break;
440
441 default:
442 *freq_mhz = 0;
443 err = -EINVAL;
444 }
445 } while (target != NV_ACCESS_ONCE(session->target));
446 return err;
447}
448
449int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
450 u32 api_domain, u16 *freq_mhz)
451{
452 struct nvgpu_clk_arb *arb = g->clk_arb;
453 int err = 0;
454 struct nvgpu_clk_arb_target *actual;
455
456 do {
457 actual = NV_ACCESS_ONCE(arb->actual);
458 /* no reordering of this pointer */
459 nvgpu_smp_rmb();
460
461 switch (api_domain) {
462 case NVGPU_GPU_CLK_DOMAIN_MCLK:
463 *freq_mhz = actual->mclk;
464 break;
465
466 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
467 *freq_mhz = actual->gpc2clk / 2ULL;
468 break;
469
470 default:
471 *freq_mhz = 0;
472 err = -EINVAL;
473 }
474 } while (actual != NV_ACCESS_ONCE(arb->actual));
475 return err;
476}
477
478int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
479 u32 api_domain, u16 *freq_mhz)
480{
481 switch (api_domain) {
482 case NVGPU_GPU_CLK_DOMAIN_MCLK:
483 *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) /
484 1000000ULL;
485 return 0;
486
487 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
488 *freq_mhz = g->ops.clk.measure_freq(g,
489 CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL;
490 return 0;
491
492 default:
493 return -EINVAL;
494 }
495}
496
497int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
498 u16 *min_mhz, u16 *max_mhz)
499{
500 int ret;
501
502 switch (api_domain) {
503 case NVGPU_GPU_CLK_DOMAIN_MCLK:
504 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
505 CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz);
506 return ret;
507
508 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
509 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
510 CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz);
511 if (!ret) {
512 *min_mhz /= 2;
513 *max_mhz /= 2;
514 }
515 return ret;
516
517 default:
518 return -EINVAL;
519 }
520}
521
522u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
523{
524 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
525 u32 api_domains = 0;
526
527 if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
528 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
529
530 if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
531 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
532
533 return api_domains;
534}
535
536bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain)
537{
538 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
539
540 switch (api_domain) {
541 case NVGPU_GPU_CLK_DOMAIN_MCLK:
542 return ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0);
543
544 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
545 return ((clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0);
546
547 default:
548 return false;
549 }
550}
551
552int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
553 u32 api_domain, u32 *max_points, u16 *fpoints)
554{
555 int err;
556 u32 i;
557
558 switch (api_domain) {
559 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
560 err = clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPC2CLK,
561 max_points, fpoints);
562 if (err || !fpoints)
563 return err;
564 for (i = 0; i < *max_points; i++)
565 fpoints[i] /= 2;
566 return 0;
567 case NVGPU_GPU_CLK_DOMAIN_MCLK:
568 return clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_MCLK,
569 max_points, fpoints);
570 default:
571 return -EINVAL;
572 }
573}
574
575#ifdef CONFIG_DEBUG_FS
576static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
577{
578 struct gk20a *g = s->private;
579 struct nvgpu_clk_arb *arb = g->clk_arb;
580 struct nvgpu_clk_arb_debug *debug;
581
582 u64 num;
583 s64 tmp, avg, std, max, min;
584
585 debug = NV_ACCESS_ONCE(arb->debug);
586 /* Make copy of structure and ensure no reordering */
587 nvgpu_smp_rmb();
588 if (!debug)
589 return -EINVAL;
590
591 std = debug->switch_std;
592 avg = debug->switch_avg;
593 max = debug->switch_max;
594 min = debug->switch_min;
595 num = debug->switch_num;
596
597 tmp = std;
598 do_div(tmp, num);
599 seq_printf(s, "Number of transitions: %lld\n",
600 num);
601 seq_printf(s, "max / min : %lld / %lld usec\n",
602 max, min);
603 seq_printf(s, "avg / std : %lld / %ld usec\n",
604 avg, int_sqrt(tmp));
605
606 return 0;
607}
608
609static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
610{
611 return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
612}
613
614static const struct file_operations nvgpu_clk_arb_stats_fops = {
615 .open = nvgpu_clk_arb_stats_open,
616 .read = seq_read,
617 .llseek = seq_lseek,
618 .release = single_release,
619};
620
621
622int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
623{
624 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
625 struct dentry *gpu_root = l->debugfs;
626 struct dentry *d;
627
628 gk20a_dbg(gpu_dbg_info, "g=%p", g);
629
630 d = debugfs_create_file(
631 "arb_stats",
632 S_IRUGO,
633 gpu_root,
634 g,
635 &nvgpu_clk_arb_stats_fops);
636 if (!d)
637 return -ENOMEM;
638
639 return 0;
640}
641#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h
index c13144ee..a2f8135e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -24,10 +24,95 @@
24#define __NVGPU_CLK_ARB_H__ 24#define __NVGPU_CLK_ARB_H__
25 25
26#include <nvgpu/types.h> 26#include <nvgpu/types.h>
27 27#include <nvgpu/bitops.h>
28struct gk20a; 28#include <nvgpu/lock.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/atomic.h>
31#include <nvgpu/bug.h>
32#include <nvgpu/kref.h>
33#include <nvgpu/log.h>
34#include <nvgpu/barrier.h>
35#include <nvgpu/cond.h>
36
37#include "gk20a/gk20a.h"
38#include "clk/clk.h"
39#include "pstate/pstate.h"
40#include "lpwr/lpwr.h"
41#include "volt/volt.h"
42
43#define MAX_F_POINTS 256
44#define DEFAULT_EVENT_NUMBER 32
45
46struct nvgpu_clk_dev;
47struct nvgpu_clk_arb_target;
48struct nvgpu_clk_notification_queue;
29struct nvgpu_clk_session; 49struct nvgpu_clk_session;
30 50
51#define VF_POINT_INVALID_PSTATE ~0U
52#define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
53#define VF_POINT_GET_PSTATE(a) (((a)->pstates) ?\
54 __fls((a)->pstates) :\
55 VF_POINT_INVALID_PSTATE)
56#define VF_POINT_COMMON_PSTATE(a, b) (((a)->pstates & (b)->pstates) ?\
57 __fls((a)->pstates & (b)->pstates) :\
58 VF_POINT_INVALID_PSTATE)
59
60/* Local Alarms */
61#define EVENT(alarm) (0x1UL << NVGPU_GPU_EVENT_##alarm)
62
63#define LOCAL_ALARM_MASK (EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE) | \
64 EVENT(VF_UPDATE))
65
66#define _WRAPGTEQ(a, b) ((a-b) > 0)
67
68struct nvgpu_clk_notification {
69 u32 notification;
70 u64 timestamp;
71};
72
73struct nvgpu_clk_notification_queue {
74 u32 size;
75 nvgpu_atomic_t head;
76 nvgpu_atomic_t tail;
77 struct nvgpu_clk_notification *notifications;
78};
79
80struct nvgpu_clk_vf_point {
81 u16 pstates;
82 union {
83 struct {
84 u16 gpc_mhz;
85 u16 sys_mhz;
86 u16 xbar_mhz;
87 };
88 u16 mem_mhz;
89 };
90 u32 uvolt;
91 u32 uvolt_sram;
92};
93
94struct nvgpu_clk_vf_table {
95 u32 mclk_num_points;
96 struct nvgpu_clk_vf_point *mclk_points;
97 u32 gpc2clk_num_points;
98 struct nvgpu_clk_vf_point *gpc2clk_points;
99};
100#ifdef CONFIG_DEBUG_FS
101struct nvgpu_clk_arb_debug {
102 s64 switch_max;
103 s64 switch_min;
104 u64 switch_num;
105 s64 switch_avg;
106 s64 switch_std;
107};
108#endif
109
110struct nvgpu_clk_arb_target {
111 u16 mclk;
112 u16 gpc2clk;
113 u32 pstate;
114};
115
31int nvgpu_clk_arb_init_arbiter(struct gk20a *g); 116int nvgpu_clk_arb_init_arbiter(struct gk20a *g);
32 117
33int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, 118int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
@@ -80,5 +165,19 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock);
80void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g); 165void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g);
81 166
82void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm); 167void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm);
168
169void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount);
170
171void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount);
172
173int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
174 struct nvgpu_clk_notification_queue *queue,
175 size_t events_number);
176
177void nvgpu_clk_notification_queue_free(struct gk20a *g,
178 struct nvgpu_clk_notification_queue *queue);
179#ifdef CONFIG_DEBUG_FS
180int nvgpu_clk_arb_debugfs_init(struct gk20a *g);
181#endif
83#endif /* __NVGPU_CLK_ARB_H__ */ 182#endif /* __NVGPU_CLK_ARB_H__ */
84 183