summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorSourab Gupta <sourabg@nvidia.com>2018-04-19 01:17:46 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-04-26 15:57:04 -0400
commit9fa77a1c05bf01f253b5118c283086fea2eca1de (patch)
tree96ca92c998afafb2f5b1796248f1e07848433ab0 /drivers/gpu/nvgpu
parentd22d9d8caa06ae5ad2518c598f27869c06d0656d (diff)
gpu: nvgpu: split clk arb code
Clk arbiter code contains two significant portions - the one which interacts with userspace and is OS specific, and the other which does the heavylifting work which can be moved to the common OS agnostic code. Split the code into two files in prep towards refactoring the clk arbiter. Jira VQRM-3741 Change-Id: I47e2c5b18d86949d02d6963c69c2e2ad161626f7 Signed-off-by: Sourab Gupta <sourabg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1699240 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/Makefile1
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb.c1788
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb_linux.h120
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c641
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/clk_arb.h105
5 files changed, 1365 insertions, 1290 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 31483c5d..6d0fcad0 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -273,6 +273,7 @@ nvgpu-y += \
273 clk/clk_domain.o \ 273 clk/clk_domain.o \
274 clk/clk_prog.o \ 274 clk/clk_prog.o \
275 clk/clk_vf_point.o \ 275 clk/clk_vf_point.o \
276 common/linux/ioctl_clk_arb.o \
276 common/linux/clk_arb.o \ 277 common/linux/clk_arb.o \
277 clk/clk_freq_controller.o \ 278 clk/clk_freq_controller.o \
278 perf/vfe_var.o \ 279 perf/vfe_var.o \
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c
index 82c97891..7cb3752a 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This software is licensed under the terms of the GNU General Public 4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and 5 * License version 2, as published by the Free Software Foundation, and
@@ -39,224 +39,12 @@
39 39
40#include "gk20a/gk20a.h" 40#include "gk20a/gk20a.h"
41#include "clk/clk.h" 41#include "clk/clk.h"
42#include "clk_arb_linux.h"
42#include "pstate/pstate.h" 43#include "pstate/pstate.h"
43#include "lpwr/lpwr.h" 44#include "lpwr/lpwr.h"
44#include "volt/volt.h" 45#include "volt/volt.h"
45 46
46#ifdef CONFIG_DEBUG_FS 47int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
47#include "common/linux/os_linux.h"
48#endif
49
50#define MAX_F_POINTS 256
51#define DEFAULT_EVENT_NUMBER 32
52
53struct nvgpu_clk_dev;
54struct nvgpu_clk_arb_target;
55struct nvgpu_clk_notification_queue;
56
57#ifdef CONFIG_DEBUG_FS
58static int nvgpu_clk_arb_debugfs_init(struct gk20a *g);
59#endif
60
61static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
62 struct file *filp);
63static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
64 struct file *filp);
65static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
66static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
67 size_t size, loff_t *off);
68
69static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
70 unsigned long arg);
71
72static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
73static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
74static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
75static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount);
76static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount);
77static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
78 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
79 u32 voltuv_sram);
80static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
81 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
82 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram);
83static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
84 struct nvgpu_clk_arb_target *target,
85 u32 alarm_mask);
86static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm);
87static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm);
88
89static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
90 struct nvgpu_clk_notification_queue *queue,
91 u32 alarm_mask);
92static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
93 struct nvgpu_clk_notification_queue *queue,
94 size_t events_number);
95
96static void nvgpu_clk_notification_queue_free(struct gk20a *g,
97 struct nvgpu_clk_notification_queue *queue);
98
99#define VF_POINT_INVALID_PSTATE ~0U
100#define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
101#define VF_POINT_GET_PSTATE(a) (((a)->pstates) ?\
102 __fls((a)->pstates) :\
103 VF_POINT_INVALID_PSTATE)
104#define VF_POINT_COMMON_PSTATE(a, b) (((a)->pstates & (b)->pstates) ?\
105 __fls((a)->pstates & (b)->pstates) :\
106 VF_POINT_INVALID_PSTATE)
107
108/* Local Alarms */
109#define EVENT(alarm) (0x1UL << NVGPU_GPU_EVENT_##alarm)
110
111#define LOCAL_ALARM_MASK (EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE) | \
112 EVENT(VF_UPDATE))
113
114#define _WRAPGTEQ(a, b) ((a-b) > 0)
115
116struct nvgpu_clk_notification {
117 u32 notification;
118 u64 timestamp;
119};
120
121struct nvgpu_clk_notification_queue {
122 u32 size;
123 nvgpu_atomic_t head;
124 nvgpu_atomic_t tail;
125 struct nvgpu_clk_notification *notifications;
126};
127
128struct nvgpu_clk_vf_point {
129 u16 pstates;
130 union {
131 struct {
132 u16 gpc_mhz;
133 u16 sys_mhz;
134 u16 xbar_mhz;
135 };
136 u16 mem_mhz;
137 };
138 u32 uvolt;
139 u32 uvolt_sram;
140};
141
142struct nvgpu_clk_vf_table {
143 u32 mclk_num_points;
144 struct nvgpu_clk_vf_point *mclk_points;
145 u32 gpc2clk_num_points;
146 struct nvgpu_clk_vf_point *gpc2clk_points;
147};
148#ifdef CONFIG_DEBUG_FS
149struct nvgpu_clk_arb_debug {
150 s64 switch_max;
151 s64 switch_min;
152 u64 switch_num;
153 s64 switch_avg;
154 s64 switch_std;
155};
156#endif
157
158struct nvgpu_clk_arb_target {
159 u16 mclk;
160 u16 gpc2clk;
161 u32 pstate;
162};
163
164struct nvgpu_clk_arb {
165 struct nvgpu_spinlock sessions_lock;
166 struct nvgpu_spinlock users_lock;
167
168 struct nvgpu_mutex pstate_lock;
169 struct list_head users;
170 struct list_head sessions;
171 struct llist_head requests;
172
173 struct gk20a *g;
174 int status;
175
176 struct nvgpu_clk_arb_target actual_pool[2];
177 struct nvgpu_clk_arb_target *actual;
178
179 u16 gpc2clk_default_mhz;
180 u16 mclk_default_mhz;
181 u32 voltuv_actual;
182
183 u16 gpc2clk_min, gpc2clk_max;
184 u16 mclk_min, mclk_max;
185
186 struct work_struct update_fn_work;
187 struct workqueue_struct *update_work_queue;
188 struct work_struct vf_table_fn_work;
189 struct workqueue_struct *vf_table_work_queue;
190
191 struct nvgpu_cond request_wq;
192
193 struct nvgpu_clk_vf_table *current_vf_table;
194 struct nvgpu_clk_vf_table vf_table_pool[2];
195 u32 vf_table_index;
196
197 u16 *mclk_f_points;
198 nvgpu_atomic_t req_nr;
199
200 u32 mclk_f_numpoints;
201 u16 *gpc2clk_f_points;
202 u32 gpc2clk_f_numpoints;
203
204 nvgpu_atomic64_t alarm_mask;
205 struct nvgpu_clk_notification_queue notification_queue;
206
207#ifdef CONFIG_DEBUG_FS
208 struct nvgpu_clk_arb_debug debug_pool[2];
209 struct nvgpu_clk_arb_debug *debug;
210 bool debugfs_set;
211#endif
212};
213
214struct nvgpu_clk_dev {
215 struct nvgpu_clk_session *session;
216 union {
217 struct list_head link;
218 struct llist_node node;
219 };
220 struct nvgpu_cond readout_wq;
221 nvgpu_atomic_t poll_mask;
222 u16 gpc2clk_target_mhz;
223 u16 mclk_target_mhz;
224 u32 alarms_reported;
225 nvgpu_atomic_t enabled_mask;
226 struct nvgpu_clk_notification_queue queue;
227 u32 arb_queue_head;
228 struct nvgpu_ref refcount;
229};
230
231struct nvgpu_clk_session {
232 bool zombie;
233 struct gk20a *g;
234 struct nvgpu_ref refcount;
235 struct list_head link;
236 struct llist_head targets;
237
238 struct nvgpu_clk_arb_target target_pool[2];
239 struct nvgpu_clk_arb_target *target;
240};
241
242static const struct file_operations completion_dev_ops = {
243 .owner = THIS_MODULE,
244 .release = nvgpu_clk_arb_release_completion_dev,
245 .poll = nvgpu_clk_arb_poll_dev,
246};
247
248static const struct file_operations event_dev_ops = {
249 .owner = THIS_MODULE,
250 .release = nvgpu_clk_arb_release_event_dev,
251 .poll = nvgpu_clk_arb_poll_dev,
252 .read = nvgpu_clk_arb_read_event_dev,
253#ifdef CONFIG_COMPAT
254 .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
255#endif
256 .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
257};
258
259static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
260 struct nvgpu_clk_notification_queue *queue, 48 struct nvgpu_clk_notification_queue *queue,
261 size_t events_number) { 49 size_t events_number) {
262 queue->notifications = nvgpu_kcalloc(g, events_number, 50 queue->notifications = nvgpu_kcalloc(g, events_number,
@@ -271,7 +59,7 @@ static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
271 return 0; 59 return 0;
272} 60}
273 61
274static void nvgpu_clk_notification_queue_free(struct gk20a *g, 62void nvgpu_clk_notification_queue_free(struct gk20a *g,
275 struct nvgpu_clk_notification_queue *queue) { 63 struct nvgpu_clk_notification_queue *queue) {
276 nvgpu_kfree(g, queue->notifications); 64 nvgpu_kfree(g, queue->notifications);
277 queue->size = 0; 65 queue->size = 0;
@@ -279,185 +67,20 @@ static void nvgpu_clk_notification_queue_free(struct gk20a *g,
279 nvgpu_atomic_set(&queue->tail, 0); 67 nvgpu_atomic_set(&queue->tail, 0);
280} 68}
281 69
282int nvgpu_clk_arb_init_arbiter(struct gk20a *g) 70static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
283{ 71 struct nvgpu_clk_notification_queue *queue,
284 struct nvgpu_clk_arb *arb; 72 u32 alarm_mask) {
285 u16 default_mhz;
286 int err;
287 int index;
288 struct nvgpu_clk_vf_table *table;
289
290 gk20a_dbg_fn("");
291
292 if (!g->ops.clk_arb.get_arbiter_clk_domains)
293 return 0;
294
295 arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
296 if (!arb)
297 return -ENOMEM;
298
299 err = nvgpu_mutex_init(&arb->pstate_lock);
300 if (err)
301 goto mutex_fail;
302 nvgpu_spinlock_init(&arb->sessions_lock);
303 nvgpu_spinlock_init(&arb->users_lock);
304
305 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
306 if (!arb->mclk_f_points) {
307 err = -ENOMEM;
308 goto init_fail;
309 }
310
311 arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
312 if (!arb->gpc2clk_f_points) {
313 err = -ENOMEM;
314 goto init_fail;
315 }
316
317 for (index = 0; index < 2; index++) {
318 table = &arb->vf_table_pool[index];
319 table->gpc2clk_num_points = MAX_F_POINTS;
320 table->mclk_num_points = MAX_F_POINTS;
321
322 table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
323 sizeof(struct nvgpu_clk_vf_point));
324 if (!table->gpc2clk_points) {
325 err = -ENOMEM;
326 goto init_fail;
327 }
328
329
330 table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
331 sizeof(struct nvgpu_clk_vf_point));
332 if (!table->mclk_points) {
333 err = -ENOMEM;
334 goto init_fail;
335 }
336 }
337
338 g->clk_arb = arb;
339 arb->g = g;
340
341 err = g->ops.clk_arb.get_arbiter_clk_default(g,
342 CTRL_CLK_DOMAIN_MCLK, &default_mhz);
343 if (err < 0) {
344 err = -EINVAL;
345 goto init_fail;
346 }
347
348 arb->mclk_default_mhz = default_mhz;
349
350 err = g->ops.clk_arb.get_arbiter_clk_default(g,
351 CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
352 if (err < 0) {
353 err = -EINVAL;
354 goto init_fail;
355 }
356
357 arb->gpc2clk_default_mhz = default_mhz;
358
359 arb->actual = &arb->actual_pool[0];
360
361 nvgpu_atomic_set(&arb->req_nr, 0);
362
363 nvgpu_atomic64_set(&arb->alarm_mask, 0);
364 err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
365 DEFAULT_EVENT_NUMBER);
366 if (err < 0)
367 goto init_fail;
368
369 INIT_LIST_HEAD_RCU(&arb->users);
370 INIT_LIST_HEAD_RCU(&arb->sessions);
371 init_llist_head(&arb->requests);
372
373 nvgpu_cond_init(&arb->request_wq);
374 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
375 "vf_table_update");
376 arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
377 "arbiter_update");
378
379
380 INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
381
382 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
383
384#ifdef CONFIG_DEBUG_FS
385 arb->debug = &arb->debug_pool[0];
386
387 if (!arb->debugfs_set) {
388 if (nvgpu_clk_arb_debugfs_init(g))
389 arb->debugfs_set = true;
390 }
391#endif
392 err = clk_vf_point_cache(g);
393 if (err < 0)
394 goto init_fail;
395
396 err = nvgpu_clk_arb_update_vf_table(arb);
397 if (err < 0)
398 goto init_fail;
399 do {
400 /* Check that first run is completed */
401 nvgpu_smp_mb();
402 NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
403 nvgpu_atomic_read(&arb->req_nr), 0);
404 } while (!nvgpu_atomic_read(&arb->req_nr));
405
406
407 return arb->status;
408
409init_fail:
410 nvgpu_kfree(g, arb->gpc2clk_f_points);
411 nvgpu_kfree(g, arb->mclk_f_points);
412
413 for (index = 0; index < 2; index++) {
414 nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
415 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
416 }
417
418 nvgpu_mutex_destroy(&arb->pstate_lock);
419
420mutex_fail:
421 nvgpu_kfree(g, arb);
422
423 return err;
424}
425
426void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
427{
428 nvgpu_clk_arb_schedule_alarm(g,
429 (0x1UL << NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
430}
431
432void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
433{
434 struct nvgpu_clk_arb *arb = g->clk_arb;
435
436 nvgpu_clk_arb_set_global_alarm(g, alarm);
437 if (arb->update_work_queue)
438 queue_work(arb->update_work_queue, &arb->update_fn_work);
439}
440
441static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
442{
443 struct nvgpu_clk_arb *arb = g->clk_arb;
444 73
445 u64 current_mask; 74 u32 queue_index;
446 u32 refcnt; 75 u64 timestamp;
447 u32 alarm_mask;
448 u64 new_mask;
449 76
450 do { 77 queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
451 current_mask = nvgpu_atomic64_read(&arb->alarm_mask); 78 /* get current timestamp */
452 /* atomic operations are strong so they do not need masks */ 79 timestamp = (u64) sched_clock();
453 80
454 refcnt = ((u32) (current_mask >> 32)) + 1; 81 queue->notifications[queue_index].timestamp = timestamp;
455 alarm_mask = (u32) (current_mask & ~alarm); 82 queue->notifications[queue_index].notification = alarm_mask;
456 new_mask = ((u64) refcnt << 32) | alarm_mask;
457 83
458 } while (unlikely(current_mask !=
459 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
460 current_mask, new_mask)));
461} 84}
462 85
463static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) 86static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
@@ -482,231 +105,8 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
482 current_mask, new_mask))); 105 current_mask, new_mask)));
483 106
484 nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm); 107 nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);
485
486}
487
488void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
489{
490 struct nvgpu_clk_arb *arb = g->clk_arb;
491 int index;
492
493 if (arb) {
494 cancel_work_sync(&arb->vf_table_fn_work);
495 destroy_workqueue(arb->vf_table_work_queue);
496 arb->vf_table_work_queue = NULL;
497
498 cancel_work_sync(&arb->update_fn_work);
499 destroy_workqueue(arb->update_work_queue);
500 arb->update_work_queue = NULL;
501
502 nvgpu_kfree(g, arb->gpc2clk_f_points);
503 nvgpu_kfree(g, arb->mclk_f_points);
504
505 for (index = 0; index < 2; index++) {
506 nvgpu_kfree(g,
507 arb->vf_table_pool[index].gpc2clk_points);
508 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
509 }
510 nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
511 nvgpu_kfree(g, g->clk_arb);
512 g->clk_arb = NULL;
513 }
514}
515
516static int nvgpu_clk_arb_install_fd(struct gk20a *g,
517 struct nvgpu_clk_session *session,
518 const struct file_operations *fops,
519 struct nvgpu_clk_dev **_dev)
520{
521 struct file *file;
522 int fd;
523 int err;
524 int status;
525 char name[64];
526 struct nvgpu_clk_dev *dev;
527
528 gk20a_dbg_fn("");
529
530 dev = nvgpu_kzalloc(g, sizeof(*dev));
531 if (!dev)
532 return -ENOMEM;
533
534 status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
535 DEFAULT_EVENT_NUMBER);
536 if (status < 0) {
537 err = status;
538 goto fail;
539 }
540
541 fd = get_unused_fd_flags(O_RDWR);
542 if (fd < 0) {
543 err = fd;
544 goto fail;
545 }
546
547 snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
548 file = anon_inode_getfile(name, fops, dev, O_RDWR);
549 if (IS_ERR(file)) {
550 err = PTR_ERR(file);
551 goto fail_fd;
552 }
553
554 fd_install(fd, file);
555
556 nvgpu_cond_init(&dev->readout_wq);
557
558 nvgpu_atomic_set(&dev->poll_mask, 0);
559
560 dev->session = session;
561 nvgpu_ref_init(&dev->refcount);
562
563 nvgpu_ref_get(&session->refcount);
564
565 *_dev = dev;
566
567 return fd;
568
569fail_fd:
570 put_unused_fd(fd);
571fail:
572 nvgpu_kfree(g, dev);
573
574 return err;
575} 108}
576 109
577int nvgpu_clk_arb_init_session(struct gk20a *g,
578 struct nvgpu_clk_session **_session)
579{
580 struct nvgpu_clk_arb *arb = g->clk_arb;
581 struct nvgpu_clk_session *session = *(_session);
582
583 gk20a_dbg_fn("");
584
585 if (!g->ops.clk_arb.get_arbiter_clk_domains)
586 return 0;
587
588 session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
589 if (!session)
590 return -ENOMEM;
591 session->g = g;
592
593 nvgpu_ref_init(&session->refcount);
594
595 session->zombie = false;
596 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
597 /* make sure that the initialization of the pool is visible
598 * before the update
599 */
600 nvgpu_smp_wmb();
601 session->target = &session->target_pool[0];
602
603 init_llist_head(&session->targets);
604
605 nvgpu_spinlock_acquire(&arb->sessions_lock);
606 list_add_tail_rcu(&session->link, &arb->sessions);
607 nvgpu_spinlock_release(&arb->sessions_lock);
608
609 *_session = session;
610
611 return 0;
612}
613
614static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
615{
616 struct nvgpu_clk_dev *dev = container_of(refcount,
617 struct nvgpu_clk_dev, refcount);
618 struct nvgpu_clk_session *session = dev->session;
619
620 nvgpu_kfree(session->g, dev);
621}
622
623static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
624{
625 struct nvgpu_clk_session *session = container_of(refcount,
626 struct nvgpu_clk_session, refcount);
627 struct nvgpu_clk_arb *arb = session->g->clk_arb;
628 struct gk20a *g = session->g;
629 struct nvgpu_clk_dev *dev, *tmp;
630 struct llist_node *head;
631
632 gk20a_dbg_fn("");
633
634 if (arb) {
635 nvgpu_spinlock_acquire(&arb->sessions_lock);
636 list_del_rcu(&session->link);
637 nvgpu_spinlock_release(&arb->sessions_lock);
638 }
639
640 head = llist_del_all(&session->targets);
641 llist_for_each_entry_safe(dev, tmp, head, node) {
642 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
643 }
644 synchronize_rcu();
645 nvgpu_kfree(g, session);
646}
647
648void nvgpu_clk_arb_release_session(struct gk20a *g,
649 struct nvgpu_clk_session *session)
650{
651 struct nvgpu_clk_arb *arb = g->clk_arb;
652
653 gk20a_dbg_fn("");
654
655 session->zombie = true;
656 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
657 if (arb && arb->update_work_queue)
658 queue_work(arb->update_work_queue, &arb->update_fn_work);
659}
660
661int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
662 struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
663{
664 struct nvgpu_clk_arb *arb = g->clk_arb;
665 struct nvgpu_clk_dev *dev;
666 int fd;
667
668 gk20a_dbg_fn("");
669
670 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
671 if (fd < 0)
672 return fd;
673
674 /* TODO: alarm mask needs to be set to default value to prevent
675 * failures of legacy tests. This will be removed when sanity is
676 * updated
677 */
678 if (alarm_mask)
679 nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
680 else
681 nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
682
683 dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
684
685 nvgpu_spinlock_acquire(&arb->users_lock);
686 list_add_tail_rcu(&dev->link, &arb->users);
687 nvgpu_spinlock_release(&arb->users_lock);
688
689 *event_fd = fd;
690
691 return 0;
692}
693
694int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
695 struct nvgpu_clk_session *session, int *request_fd)
696{
697 struct nvgpu_clk_dev *dev;
698 int fd;
699
700 gk20a_dbg_fn("");
701
702 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
703 if (fd < 0)
704 return fd;
705
706 *request_fd = fd;
707
708 return 0;
709}
710 110
711static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) 111static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
712{ 112{
@@ -1014,13 +414,6 @@ exit_vf_table:
1014 return status; 414 return status;
1015} 415}
1016 416
1017void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1018{
1019 struct nvgpu_clk_arb *arb = g->clk_arb;
1020
1021 if (arb->vf_table_work_queue)
1022 queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
1023}
1024 417
1025static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work) 418static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
1026{ 419{
@@ -1044,6 +437,305 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
1044 nvgpu_clk_arb_update_vf_table(arb); 437 nvgpu_clk_arb_update_vf_table(arb);
1045} 438}
1046 439
440static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
441 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
442 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
443{
444 u16 gpc2clk_target, mclk_target;
445 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
446 u32 mclk_voltuv, mclk_voltuv_sram;
447 u32 pstate = VF_POINT_INVALID_PSTATE;
448 struct nvgpu_clk_vf_table *table;
449 u32 index, index_mclk;
450 struct nvgpu_clk_vf_point *mclk_vf = NULL;
451
452 do {
453 gpc2clk_target = *gpc2clk;
454 mclk_target = *mclk;
455 gpc2clk_voltuv = 0;
456 gpc2clk_voltuv_sram = 0;
457 mclk_voltuv = 0;
458 mclk_voltuv_sram = 0;
459
460 table = NV_ACCESS_ONCE(arb->current_vf_table);
461 /* pointer to table can be updated by callback */
462 nvgpu_smp_rmb();
463
464 if (!table)
465 continue;
466 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
467 nvgpu_err(arb->g, "found empty table");
468 goto find_exit;
469 }
470 /* First we check MCLK to find out which PSTATE we are
471 * are requesting, and from there try to find the minimum
472 * GPC2CLK on the same PSTATE that satisfies the request.
473 * If no GPC2CLK can be found, then we need to up the PSTATE
474 */
475
476recalculate_vf_point:
477 for (index = 0; index < table->mclk_num_points; index++) {
478 if (table->mclk_points[index].mem_mhz >= mclk_target) {
479 mclk_vf = &table->mclk_points[index];
480 break;
481 }
482 }
483 if (index == table->mclk_num_points) {
484 mclk_vf = &table->mclk_points[index-1];
485 index = table->mclk_num_points - 1;
486 }
487 index_mclk = index;
488
489 /* round up the freq requests */
490 for (index = 0; index < table->gpc2clk_num_points; index++) {
491 pstate = VF_POINT_COMMON_PSTATE(
492 &table->gpc2clk_points[index], mclk_vf);
493
494 if ((table->gpc2clk_points[index].gpc_mhz >=
495 gpc2clk_target) &&
496 (pstate != VF_POINT_INVALID_PSTATE)) {
497 gpc2clk_target =
498 table->gpc2clk_points[index].gpc_mhz;
499 *sys2clk =
500 table->gpc2clk_points[index].sys_mhz;
501 *xbar2clk =
502 table->gpc2clk_points[index].xbar_mhz;
503
504 gpc2clk_voltuv =
505 table->gpc2clk_points[index].uvolt;
506 gpc2clk_voltuv_sram =
507 table->gpc2clk_points[index].uvolt_sram;
508 break;
509 }
510 }
511
512 if (index == table->gpc2clk_num_points) {
513 pstate = VF_POINT_COMMON_PSTATE(
514 &table->gpc2clk_points[index-1], mclk_vf);
515 if (pstate != VF_POINT_INVALID_PSTATE) {
516 gpc2clk_target =
517 table->gpc2clk_points[index-1].gpc_mhz;
518 *sys2clk =
519 table->gpc2clk_points[index-1].sys_mhz;
520 *xbar2clk =
521 table->gpc2clk_points[index-1].xbar_mhz;
522
523 gpc2clk_voltuv =
524 table->gpc2clk_points[index-1].uvolt;
525 gpc2clk_voltuv_sram =
526 table->gpc2clk_points[index-1].
527 uvolt_sram;
528 } else if (index_mclk >= table->mclk_num_points - 1) {
529 /* There is no available combination of MCLK
530 * and GPC2CLK, we need to fail this
531 */
532 gpc2clk_target = 0;
533 mclk_target = 0;
534 pstate = VF_POINT_INVALID_PSTATE;
535 goto find_exit;
536 } else {
537 /* recalculate with higher PSTATE */
538 gpc2clk_target = *gpc2clk;
539 mclk_target = table->mclk_points[index_mclk+1].
540 mem_mhz;
541 goto recalculate_vf_point;
542 }
543 }
544
545 mclk_target = mclk_vf->mem_mhz;
546 mclk_voltuv = mclk_vf->uvolt;
547 mclk_voltuv_sram = mclk_vf->uvolt_sram;
548
549 } while (!table ||
550 (NV_ACCESS_ONCE(arb->current_vf_table) != table));
551
552find_exit:
553 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
554 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
555 gpc2clk_voltuv_sram : mclk_voltuv_sram;
556 /* noise unaware vmin */
557 *nuvmin = mclk_voltuv;
558 *nuvmin_sram = mclk_voltuv_sram;
559 *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
560 *mclk = mclk_target;
561 return pstate;
562}
563
564static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
565 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
566 u32 voltuv_sram)
567{
568 struct set_fll_clk fllclk;
569 struct nvgpu_clk_arb *arb = g->clk_arb;
570 int status;
571
572 fllclk.gpc2clkmhz = gpc2clk_target;
573 fllclk.sys2clkmhz = sys2clk_target;
574 fllclk.xbar2clkmhz = xbar2clk_target;
575
576 fllclk.voltuv = voltuv;
577
578 /* if voltage ascends we do:
579 * (1) FLL change
580 * (2) Voltage change
581 * (3) MCLK change
582 * If it goes down
583 * (1) MCLK change
584 * (2) Voltage change
585 * (3) FLL change
586 */
587
588 /* descending */
589 if (voltuv < arb->voltuv_actual) {
590 status = g->ops.clk.mclk_change(g, mclk_target);
591 if (status < 0)
592 return status;
593
594 status = volt_set_voltage(g, voltuv, voltuv_sram);
595 if (status < 0)
596 return status;
597
598 status = clk_set_fll_clks(g, &fllclk);
599 if (status < 0)
600 return status;
601 } else {
602 status = clk_set_fll_clks(g, &fllclk);
603 if (status < 0)
604 return status;
605
606 status = volt_set_voltage(g, voltuv, voltuv_sram);
607 if (status < 0)
608 return status;
609
610 status = g->ops.clk.mclk_change(g, mclk_target);
611 if (status < 0)
612 return status;
613 }
614
615 return 0;
616}
617
618static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
619 struct nvgpu_clk_arb_target *target,
620 u32 alarm) {
621
622 struct nvgpu_clk_session *session = dev->session;
623 struct nvgpu_clk_arb *arb = session->g->clk_arb;
624 struct nvgpu_clk_notification *notification;
625
626 u32 queue_alarm_mask = 0;
627 u32 enabled_mask = 0;
628 u32 new_alarms_reported = 0;
629 u32 poll_mask = 0;
630 u32 tail, head;
631 u32 queue_index;
632 size_t size;
633 int index;
634
635 enabled_mask = nvgpu_atomic_read(&dev->enabled_mask);
636 size = arb->notification_queue.size;
637
638 /* queue global arbiter notifications in buffer */
639 do {
640 tail = nvgpu_atomic_read(&arb->notification_queue.tail);
641 /* copy items to the queue */
642 queue_index = nvgpu_atomic_read(&dev->queue.tail);
643 head = dev->arb_queue_head;
644 head = (tail - head) < arb->notification_queue.size ?
645 head : tail - arb->notification_queue.size;
646
647 for (index = head; _WRAPGTEQ(tail, index); index++) {
648 u32 alarm_detected;
649
650 notification = &arb->notification_queue.
651 notifications[(index+1) % size];
652 alarm_detected =
653 NV_ACCESS_ONCE(notification->notification);
654
655 if (!(enabled_mask & alarm_detected))
656 continue;
657
658 queue_index++;
659 dev->queue.notifications[
660 queue_index % dev->queue.size].timestamp =
661 NV_ACCESS_ONCE(notification->timestamp);
662
663 dev->queue.notifications[
664 queue_index % dev->queue.size].notification =
665 alarm_detected;
666
667 queue_alarm_mask |= alarm_detected;
668 }
669 } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
670 (int)tail));
671
672 nvgpu_atomic_set(&dev->queue.tail, queue_index);
673 /* update the last notification we processed from global queue */
674
675 dev->arb_queue_head = tail;
676
677 /* Check if current session targets are met */
678 if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
679 if ((target->gpc2clk < session->target->gpc2clk)
680 || (target->mclk < session->target->mclk)) {
681
682 poll_mask |= (POLLIN | POLLPRI);
683 nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
684 EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
685 }
686 }
687
688 /* Check if there is a new VF update */
689 if (queue_alarm_mask & EVENT(VF_UPDATE))
690 poll_mask |= (POLLIN | POLLRDNORM);
691
692 /* Notify sticky alarms that were not reported on previous run*/
693 new_alarms_reported = (queue_alarm_mask |
694 (alarm & ~dev->alarms_reported & queue_alarm_mask));
695
696 if (new_alarms_reported & ~LOCAL_ALARM_MASK) {
697 /* check that we are not re-reporting */
698 if (new_alarms_reported & EVENT(ALARM_GPU_LOST))
699 poll_mask |= POLLHUP;
700
701 poll_mask |= (POLLIN | POLLPRI);
702 /* On next run do not report global alarms that were already
703 * reported, but report SHUTDOWN always
704 */
705 dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
706 ~EVENT(ALARM_GPU_LOST);
707 }
708
709 if (poll_mask) {
710 nvgpu_atomic_set(&dev->poll_mask, poll_mask);
711 nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
712 }
713
714 return new_alarms_reported;
715}
716
717static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
718{
719 struct nvgpu_clk_arb *arb = g->clk_arb;
720
721 u64 current_mask;
722 u32 refcnt;
723 u32 alarm_mask;
724 u64 new_mask;
725
726 do {
727 current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
728 /* atomic operations are strong so they do not need masks */
729
730 refcnt = ((u32) (current_mask >> 32)) + 1;
731 alarm_mask = (u32) (current_mask & ~alarm);
732 new_mask = ((u64) refcnt << 32) | alarm_mask;
733
734 } while (unlikely(current_mask !=
735 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
736 current_mask, new_mask)));
737}
738
1047static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) 739static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
1048{ 740{
1049 struct nvgpu_clk_arb *arb = 741 struct nvgpu_clk_arb *arb =
@@ -1345,639 +1037,283 @@ exit_arb:
1345 ~EVENT(ALARM_GPU_LOST)); 1037 ~EVENT(ALARM_GPU_LOST));
1346} 1038}
1347 1039
1348static void nvgpu_clk_arb_queue_notification(struct gk20a *g, 1040int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1349 struct nvgpu_clk_notification_queue *queue, 1041{
1350 u32 alarm_mask) { 1042 struct nvgpu_clk_arb *arb;
1351 1043 u16 default_mhz;
1352 u32 queue_index; 1044 int err;
1353 u64 timestamp;
1354
1355 queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
1356 /* get current timestamp */
1357 timestamp = (u64) sched_clock();
1358
1359 queue->notifications[queue_index].timestamp = timestamp;
1360 queue->notifications[queue_index].notification = alarm_mask;
1361
1362}
1363
1364static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
1365 struct nvgpu_clk_arb_target *target,
1366 u32 alarm) {
1367
1368 struct nvgpu_clk_session *session = dev->session;
1369 struct nvgpu_clk_arb *arb = session->g->clk_arb;
1370 struct nvgpu_clk_notification *notification;
1371
1372 u32 queue_alarm_mask = 0;
1373 u32 enabled_mask = 0;
1374 u32 new_alarms_reported = 0;
1375 u32 poll_mask = 0;
1376 u32 tail, head;
1377 u32 queue_index;
1378 size_t size;
1379 int index; 1045 int index;
1046 struct nvgpu_clk_vf_table *table;
1380 1047
1381 enabled_mask = nvgpu_atomic_read(&dev->enabled_mask); 1048 gk20a_dbg_fn("");
1382 size = arb->notification_queue.size;
1383
1384 /* queue global arbiter notifications in buffer */
1385 do {
1386 tail = nvgpu_atomic_read(&arb->notification_queue.tail);
1387 /* copy items to the queue */
1388 queue_index = nvgpu_atomic_read(&dev->queue.tail);
1389 head = dev->arb_queue_head;
1390 head = (tail - head) < arb->notification_queue.size ?
1391 head : tail - arb->notification_queue.size;
1392
1393 for (index = head; _WRAPGTEQ(tail, index); index++) {
1394 u32 alarm_detected;
1395
1396 notification = &arb->notification_queue.
1397 notifications[(index+1) % size];
1398 alarm_detected =
1399 NV_ACCESS_ONCE(notification->notification);
1400
1401 if (!(enabled_mask & alarm_detected))
1402 continue;
1403
1404 queue_index++;
1405 dev->queue.notifications[
1406 queue_index % dev->queue.size].timestamp =
1407 NV_ACCESS_ONCE(notification->timestamp);
1408
1409 dev->queue.notifications[
1410 queue_index % dev->queue.size].notification =
1411 alarm_detected;
1412
1413 queue_alarm_mask |= alarm_detected;
1414 }
1415 } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
1416 (int)tail));
1417
1418 nvgpu_atomic_set(&dev->queue.tail, queue_index);
1419 /* update the last notification we processed from global queue */
1420
1421 dev->arb_queue_head = tail;
1422
1423 /* Check if current session targets are met */
1424 if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
1425 if ((target->gpc2clk < session->target->gpc2clk)
1426 || (target->mclk < session->target->mclk)) {
1427
1428 poll_mask |= (POLLIN | POLLPRI);
1429 nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
1430 EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
1431 }
1432 }
1433 1049
1434 /* Check if there is a new VF update */ 1050 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1435 if (queue_alarm_mask & EVENT(VF_UPDATE)) 1051 return 0;
1436 poll_mask |= (POLLIN | POLLRDNORM);
1437 1052
1438 /* Notify sticky alarms that were not reported on previous run*/ 1053 arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
1439 new_alarms_reported = (queue_alarm_mask | 1054 if (!arb)
1440 (alarm & ~dev->alarms_reported & queue_alarm_mask)); 1055 return -ENOMEM;
1441 1056
1442 if (new_alarms_reported & ~LOCAL_ALARM_MASK) { 1057 err = nvgpu_mutex_init(&arb->pstate_lock);
1443 /* check that we are not re-reporting */ 1058 if (err)
1444 if (new_alarms_reported & EVENT(ALARM_GPU_LOST)) 1059 goto mutex_fail;
1445 poll_mask |= POLLHUP; 1060 nvgpu_spinlock_init(&arb->sessions_lock);
1061 nvgpu_spinlock_init(&arb->users_lock);
1446 1062
1447 poll_mask |= (POLLIN | POLLPRI); 1063 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1448 /* On next run do not report global alarms that were already 1064 if (!arb->mclk_f_points) {
1449 * reported, but report SHUTDOWN always 1065 err = -ENOMEM;
1450 */ 1066 goto init_fail;
1451 dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
1452 ~EVENT(ALARM_GPU_LOST);
1453 } 1067 }
1454 1068
1455 if (poll_mask) { 1069 arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1456 nvgpu_atomic_set(&dev->poll_mask, poll_mask); 1070 if (!arb->gpc2clk_f_points) {
1457 nvgpu_cond_broadcast_interruptible(&dev->readout_wq); 1071 err = -ENOMEM;
1072 goto init_fail;
1458 } 1073 }
1459 1074
1460 return new_alarms_reported; 1075 for (index = 0; index < 2; index++) {
1461} 1076 table = &arb->vf_table_pool[index];
1462 1077 table->gpc2clk_num_points = MAX_F_POINTS;
1463static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev, 1078 table->mclk_num_points = MAX_F_POINTS;
1464 struct nvgpu_gpu_set_event_filter_args *args)
1465{
1466 u32 mask;
1467
1468 gk20a_dbg(gpu_dbg_fn, "");
1469
1470 if (args->flags)
1471 return -EINVAL;
1472
1473 if (args->size != 1)
1474 return -EINVAL;
1475
1476 if (copy_from_user(&mask, (void __user *) args->buffer,
1477 args->size * sizeof(u32)))
1478 return -EFAULT;
1479
1480 /* update alarm mask */
1481 nvgpu_atomic_set(&dev->enabled_mask, mask);
1482
1483 return 0;
1484}
1485
1486static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
1487 unsigned long arg)
1488{
1489 struct nvgpu_clk_dev *dev = filp->private_data;
1490 struct gk20a *g = dev->session->g;
1491 u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
1492 int err = 0;
1493
1494 gk20a_dbg(gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
1495
1496 if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
1497 || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
1498 return -EINVAL;
1499 1079
1500 BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE); 1080 table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1081 sizeof(struct nvgpu_clk_vf_point));
1082 if (!table->gpc2clk_points) {
1083 err = -ENOMEM;
1084 goto init_fail;
1085 }
1501 1086
1502 memset(buf, 0, sizeof(buf));
1503 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1504 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
1505 return -EFAULT;
1506 }
1507 1087
1508 switch (cmd) { 1088 table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1509 case NVGPU_EVENT_IOCTL_SET_FILTER: 1089 sizeof(struct nvgpu_clk_vf_point));
1510 err = nvgpu_clk_arb_set_event_filter(dev, 1090 if (!table->mclk_points) {
1511 (struct nvgpu_gpu_set_event_filter_args *)buf); 1091 err = -ENOMEM;
1512 break; 1092 goto init_fail;
1513 default: 1093 }
1514 nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
1515 err = -ENOTTY;
1516 } 1094 }
1517 1095
1518 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) 1096 g->clk_arb = arb;
1519 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd)); 1097 arb->g = g;
1520
1521 return err;
1522}
1523
1524int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
1525 struct nvgpu_clk_session *session, int request_fd)
1526{
1527 struct nvgpu_clk_arb *arb = g->clk_arb;
1528 struct nvgpu_clk_dev *dev;
1529 struct fd fd;
1530 int err = 0;
1531
1532 gk20a_dbg_fn("");
1533
1534 fd = fdget(request_fd);
1535 if (!fd.file)
1536 return -EINVAL;
1537 1098
1538 if (fd.file->f_op != &completion_dev_ops) { 1099 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1100 CTRL_CLK_DOMAIN_MCLK, &default_mhz);
1101 if (err < 0) {
1539 err = -EINVAL; 1102 err = -EINVAL;
1540 goto fdput_fd; 1103 goto init_fail;
1541 } 1104 }
1542 1105
1543 dev = (struct nvgpu_clk_dev *) fd.file->private_data; 1106 arb->mclk_default_mhz = default_mhz;
1544 1107
1545 if (!dev || dev->session != session) { 1108 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1109 CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
1110 if (err < 0) {
1546 err = -EINVAL; 1111 err = -EINVAL;
1547 goto fdput_fd; 1112 goto init_fail;
1548 } 1113 }
1549 nvgpu_ref_get(&dev->refcount);
1550 llist_add(&dev->node, &session->targets);
1551 if (arb->update_work_queue)
1552 queue_work(arb->update_work_queue, &arb->update_fn_work);
1553 1114
1554fdput_fd: 1115 arb->gpc2clk_default_mhz = default_mhz;
1555 fdput(fd);
1556 return err;
1557}
1558 1116
1559static inline u32 __pending_event(struct nvgpu_clk_dev *dev, 1117 arb->actual = &arb->actual_pool[0];
1560 struct nvgpu_gpu_event_info *info) {
1561 1118
1562 u32 tail, head; 1119 nvgpu_atomic_set(&arb->req_nr, 0);
1563 u32 events = 0;
1564 struct nvgpu_clk_notification *p_notif;
1565 1120
1566 tail = nvgpu_atomic_read(&dev->queue.tail); 1121 nvgpu_atomic64_set(&arb->alarm_mask, 0);
1567 head = nvgpu_atomic_read(&dev->queue.head); 1122 err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
1123 DEFAULT_EVENT_NUMBER);
1124 if (err < 0)
1125 goto init_fail;
1568 1126
1569 head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size; 1127 INIT_LIST_HEAD_RCU(&arb->users);
1128 INIT_LIST_HEAD_RCU(&arb->sessions);
1129 init_llist_head(&arb->requests);
1570 1130
1571 if (_WRAPGTEQ(tail, head) && info) { 1131 nvgpu_cond_init(&arb->request_wq);
1572 head++; 1132 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1573 p_notif = &dev->queue.notifications[head % dev->queue.size]; 1133 "vf_table_update");
1574 events |= p_notif->notification; 1134 arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
1575 info->event_id = ffs(events) - 1; 1135 "arbiter_update");
1576 info->timestamp = p_notif->timestamp;
1577 nvgpu_atomic_set(&dev->queue.head, head);
1578 }
1579 1136
1580 return events;
1581}
1582 1137
1583static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf, 1138 INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
1584 size_t size, loff_t *off)
1585{
1586 struct nvgpu_clk_dev *dev = filp->private_data;
1587 struct nvgpu_gpu_event_info info;
1588 ssize_t err;
1589 1139
1590 gk20a_dbg_fn("filp=%p, buf=%p, size=%zu", filp, buf, size); 1140 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
1591 1141
1592 if ((size - *off) < sizeof(info)) 1142#ifdef CONFIG_DEBUG_FS
1593 return 0; 1143 arb->debug = &arb->debug_pool[0];
1594 1144
1595 memset(&info, 0, sizeof(info)); 1145 if (!arb->debugfs_set) {
1596 /* Get the oldest event from the queue */ 1146 if (nvgpu_clk_arb_debugfs_init(g))
1597 while (!__pending_event(dev, &info)) { 1147 arb->debugfs_set = true;
1598 if (filp->f_flags & O_NONBLOCK)
1599 return -EAGAIN;
1600 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
1601 __pending_event(dev, &info), 0);
1602 if (err)
1603 return err;
1604 if (info.timestamp)
1605 break;
1606 } 1148 }
1149#endif
1150 err = clk_vf_point_cache(g);
1151 if (err < 0)
1152 goto init_fail;
1607 1153
1608 if (copy_to_user(buf + *off, &info, sizeof(info))) 1154 err = nvgpu_clk_arb_update_vf_table(arb);
1609 return -EFAULT; 1155 if (err < 0)
1610 1156 goto init_fail;
1611 return sizeof(info); 1157 do {
1612} 1158 /* Check that first run is completed */
1159 nvgpu_smp_mb();
1160 NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
1161 nvgpu_atomic_read(&arb->req_nr), 0);
1162 } while (!nvgpu_atomic_read(&arb->req_nr));
1613 1163
1614static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
1615{
1616 struct nvgpu_clk_dev *dev = filp->private_data;
1617 1164
1618 gk20a_dbg_fn(""); 1165 return arb->status;
1619 1166
1620 poll_wait(filp, &dev->readout_wq.wq, wait); 1167init_fail:
1621 return nvgpu_atomic_xchg(&dev->poll_mask, 0); 1168 nvgpu_kfree(g, arb->gpc2clk_f_points);
1622} 1169 nvgpu_kfree(g, arb->mclk_f_points);
1623 1170
1624static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, 1171 for (index = 0; index < 2; index++) {
1625 struct file *filp) 1172 nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
1626{ 1173 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1627 struct nvgpu_clk_dev *dev = filp->private_data; 1174 }
1628 struct nvgpu_clk_session *session = dev->session;
1629 1175
1176 nvgpu_mutex_destroy(&arb->pstate_lock);
1630 1177
1631 gk20a_dbg_fn(""); 1178mutex_fail:
1179 nvgpu_kfree(g, arb);
1632 1180
1633 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); 1181 return err;
1634 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1635 return 0;
1636} 1182}
1637 1183
1638static int nvgpu_clk_arb_release_event_dev(struct inode *inode, 1184void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
1639 struct file *filp)
1640{ 1185{
1641 struct nvgpu_clk_dev *dev = filp->private_data; 1186 nvgpu_clk_arb_schedule_alarm(g,
1642 struct nvgpu_clk_session *session = dev->session; 1187 (0x1UL << NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
1643 struct nvgpu_clk_arb *arb;
1644
1645 arb = session->g->clk_arb;
1646
1647 gk20a_dbg_fn("");
1648
1649 if (arb) {
1650 nvgpu_spinlock_acquire(&arb->users_lock);
1651 list_del_rcu(&dev->link);
1652 nvgpu_spinlock_release(&arb->users_lock);
1653 nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
1654 }
1655
1656 synchronize_rcu();
1657 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1658 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1659
1660 return 0;
1661} 1188}
1662 1189
1663int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, 1190void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
1664 int request_fd, u32 api_domain, u16 target_mhz)
1665{ 1191{
1666 struct nvgpu_clk_dev *dev; 1192 struct nvgpu_clk_arb *arb = g->clk_arb;
1667 struct fd fd;
1668 int err = 0;
1669
1670 gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
1671
1672 fd = fdget(request_fd);
1673 if (!fd.file)
1674 return -EINVAL;
1675
1676 if (fd.file->f_op != &completion_dev_ops) {
1677 err = -EINVAL;
1678 goto fdput_fd;
1679 }
1680
1681 dev = fd.file->private_data;
1682 if (!dev || dev->session != session) {
1683 err = -EINVAL;
1684 goto fdput_fd;
1685 }
1686
1687 switch (api_domain) {
1688 case NVGPU_GPU_CLK_DOMAIN_MCLK:
1689 dev->mclk_target_mhz = target_mhz;
1690 break;
1691
1692 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
1693 dev->gpc2clk_target_mhz = target_mhz * 2ULL;
1694 break;
1695
1696 default:
1697 err = -EINVAL;
1698 }
1699 1193
1700fdput_fd: 1194 nvgpu_clk_arb_set_global_alarm(g, alarm);
1701 fdput(fd); 1195 if (arb->update_work_queue)
1702 return err; 1196 queue_work(arb->update_work_queue, &arb->update_fn_work);
1703} 1197}
1704 1198
1705int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, 1199void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
1706 u32 api_domain, u16 *freq_mhz)
1707{ 1200{
1708 int err = 0; 1201 struct nvgpu_clk_arb *arb = g->clk_arb;
1709 struct nvgpu_clk_arb_target *target; 1202 int index;
1710 1203
1711 do { 1204 if (arb) {
1712 target = NV_ACCESS_ONCE(session->target); 1205 cancel_work_sync(&arb->vf_table_fn_work);
1713 /* no reordering of this pointer */ 1206 destroy_workqueue(arb->vf_table_work_queue);
1714 nvgpu_smp_rmb(); 1207 arb->vf_table_work_queue = NULL;
1715 1208
1716 switch (api_domain) { 1209 cancel_work_sync(&arb->update_fn_work);
1717 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1210 destroy_workqueue(arb->update_work_queue);
1718 *freq_mhz = target->mclk; 1211 arb->update_work_queue = NULL;
1719 break;
1720 1212
1721 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1213 nvgpu_kfree(g, arb->gpc2clk_f_points);
1722 *freq_mhz = target->gpc2clk / 2ULL; 1214 nvgpu_kfree(g, arb->mclk_f_points);
1723 break;
1724 1215
1725 default: 1216 for (index = 0; index < 2; index++) {
1726 *freq_mhz = 0; 1217 nvgpu_kfree(g,
1727 err = -EINVAL; 1218 arb->vf_table_pool[index].gpc2clk_points);
1219 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1728 } 1220 }
1729 } while (target != NV_ACCESS_ONCE(session->target)); 1221 nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
1730 return err; 1222 nvgpu_kfree(g, g->clk_arb);
1223 g->clk_arb = NULL;
1224 }
1731} 1225}
1732 1226
1733int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, 1227int nvgpu_clk_arb_init_session(struct gk20a *g,
1734 u32 api_domain, u16 *freq_mhz) 1228 struct nvgpu_clk_session **_session)
1735{ 1229{
1736 struct nvgpu_clk_arb *arb = g->clk_arb; 1230 struct nvgpu_clk_arb *arb = g->clk_arb;
1737 int err = 0; 1231 struct nvgpu_clk_session *session = *(_session);
1738 struct nvgpu_clk_arb_target *actual;
1739 1232
1740 do { 1233 gk20a_dbg_fn("");
1741 actual = NV_ACCESS_ONCE(arb->actual);
1742 /* no reordering of this pointer */
1743 nvgpu_smp_rmb();
1744 1234
1745 switch (api_domain) { 1235 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1746 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1236 return 0;
1747 *freq_mhz = actual->mclk;
1748 break;
1749 1237
1750 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1238 session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
1751 *freq_mhz = actual->gpc2clk / 2ULL; 1239 if (!session)
1752 break; 1240 return -ENOMEM;
1241 session->g = g;
1753 1242
1754 default: 1243 nvgpu_ref_init(&session->refcount);
1755 *freq_mhz = 0;
1756 err = -EINVAL;
1757 }
1758 } while (actual != NV_ACCESS_ONCE(arb->actual));
1759 return err;
1760}
1761 1244
1762int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, 1245 session->zombie = false;
1763 u32 api_domain, u16 *freq_mhz) 1246 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
1764{ 1247 /* make sure that the initialization of the pool is visible
1765 switch (api_domain) { 1248 * before the update
1766 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1249 */
1767 *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) / 1250 nvgpu_smp_wmb();
1768 1000000ULL; 1251 session->target = &session->target_pool[0];
1769 return 0;
1770 1252
1771 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1253 init_llist_head(&session->targets);
1772 *freq_mhz = g->ops.clk.measure_freq(g,
1773 CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL;
1774 return 0;
1775 1254
1776 default: 1255 nvgpu_spinlock_acquire(&arb->sessions_lock);
1777 return -EINVAL; 1256 list_add_tail_rcu(&session->link, &arb->sessions);
1778 } 1257 nvgpu_spinlock_release(&arb->sessions_lock);
1779}
1780 1258
1781int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, 1259 *_session = session;
1782 u16 *min_mhz, u16 *max_mhz)
1783{
1784 int ret;
1785
1786 switch (api_domain) {
1787 case NVGPU_GPU_CLK_DOMAIN_MCLK:
1788 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
1789 CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz);
1790 return ret;
1791
1792 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
1793 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
1794 CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz);
1795 if (!ret) {
1796 *min_mhz /= 2;
1797 *max_mhz /= 2;
1798 }
1799 return ret;
1800 1260
1801 default: 1261 return 0;
1802 return -EINVAL;
1803 }
1804} 1262}
1805 1263
1806u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) 1264void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
1807{ 1265{
1808 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); 1266 struct nvgpu_clk_dev *dev = container_of(refcount,
1809 u32 api_domains = 0; 1267 struct nvgpu_clk_dev, refcount);
1810 1268 struct nvgpu_clk_session *session = dev->session;
1811 if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
1812 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
1813
1814 if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
1815 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
1816 1269
1817 return api_domains; 1270 nvgpu_kfree(session->g, dev);
1818} 1271}
1819 1272
1820bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain) 1273void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
1821{ 1274{
1822 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g); 1275 struct nvgpu_clk_session *session = container_of(refcount,
1823 1276 struct nvgpu_clk_session, refcount);
1824 switch (api_domain) { 1277 struct nvgpu_clk_arb *arb = session->g->clk_arb;
1825 case NVGPU_GPU_CLK_DOMAIN_MCLK: 1278 struct gk20a *g = session->g;
1826 return ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0); 1279 struct nvgpu_clk_dev *dev, *tmp;
1280 struct llist_node *head;
1827 1281
1828 case NVGPU_GPU_CLK_DOMAIN_GPCCLK: 1282 gk20a_dbg_fn("");
1829 return ((clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0);
1830 1283
1831 default: 1284 if (arb) {
1832 return false; 1285 nvgpu_spinlock_acquire(&arb->sessions_lock);
1286 list_del_rcu(&session->link);
1287 nvgpu_spinlock_release(&arb->sessions_lock);
1833 } 1288 }
1834}
1835 1289
1836int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, 1290 head = llist_del_all(&session->targets);
1837 u32 api_domain, u32 *max_points, u16 *fpoints) 1291 llist_for_each_entry_safe(dev, tmp, head, node) {
1838{ 1292 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1839 int err;
1840 u32 i;
1841
1842 switch (api_domain) {
1843 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
1844 err = clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPC2CLK,
1845 max_points, fpoints);
1846 if (err || !fpoints)
1847 return err;
1848 for (i = 0; i < *max_points; i++)
1849 fpoints[i] /= 2;
1850 return 0;
1851 case NVGPU_GPU_CLK_DOMAIN_MCLK:
1852 return clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_MCLK,
1853 max_points, fpoints);
1854 default:
1855 return -EINVAL;
1856 } 1293 }
1294 synchronize_rcu();
1295 nvgpu_kfree(g, session);
1857} 1296}
1858 1297
1859static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, 1298void nvgpu_clk_arb_release_session(struct gk20a *g,
1860 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, 1299 struct nvgpu_clk_session *session)
1861 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
1862{ 1300{
1863 u16 gpc2clk_target, mclk_target; 1301 struct nvgpu_clk_arb *arb = g->clk_arb;
1864 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
1865 u32 mclk_voltuv, mclk_voltuv_sram;
1866 u32 pstate = VF_POINT_INVALID_PSTATE;
1867 struct nvgpu_clk_vf_table *table;
1868 u32 index, index_mclk;
1869 struct nvgpu_clk_vf_point *mclk_vf = NULL;
1870
1871 do {
1872 gpc2clk_target = *gpc2clk;
1873 mclk_target = *mclk;
1874 gpc2clk_voltuv = 0;
1875 gpc2clk_voltuv_sram = 0;
1876 mclk_voltuv = 0;
1877 mclk_voltuv_sram = 0;
1878
1879 table = NV_ACCESS_ONCE(arb->current_vf_table);
1880 /* pointer to table can be updated by callback */
1881 nvgpu_smp_rmb();
1882
1883 if (!table)
1884 continue;
1885 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
1886 nvgpu_err(arb->g, "found empty table");
1887 goto find_exit;
1888 }
1889 /* First we check MCLK to find out which PSTATE we are
1890 * are requesting, and from there try to find the minimum
1891 * GPC2CLK on the same PSTATE that satisfies the request.
1892 * If no GPC2CLK can be found, then we need to up the PSTATE
1893 */
1894
1895recalculate_vf_point:
1896 for (index = 0; index < table->mclk_num_points; index++) {
1897 if (table->mclk_points[index].mem_mhz >= mclk_target) {
1898 mclk_vf = &table->mclk_points[index];
1899 break;
1900 }
1901 }
1902 if (index == table->mclk_num_points) {
1903 mclk_vf = &table->mclk_points[index-1];
1904 index = table->mclk_num_points - 1;
1905 }
1906 index_mclk = index;
1907
1908 /* round up the freq requests */
1909 for (index = 0; index < table->gpc2clk_num_points; index++) {
1910 pstate = VF_POINT_COMMON_PSTATE(
1911 &table->gpc2clk_points[index], mclk_vf);
1912
1913 if ((table->gpc2clk_points[index].gpc_mhz >=
1914 gpc2clk_target) &&
1915 (pstate != VF_POINT_INVALID_PSTATE)) {
1916 gpc2clk_target =
1917 table->gpc2clk_points[index].gpc_mhz;
1918 *sys2clk =
1919 table->gpc2clk_points[index].sys_mhz;
1920 *xbar2clk =
1921 table->gpc2clk_points[index].xbar_mhz;
1922
1923 gpc2clk_voltuv =
1924 table->gpc2clk_points[index].uvolt;
1925 gpc2clk_voltuv_sram =
1926 table->gpc2clk_points[index].uvolt_sram;
1927 break;
1928 }
1929 }
1930
1931 if (index == table->gpc2clk_num_points) {
1932 pstate = VF_POINT_COMMON_PSTATE(
1933 &table->gpc2clk_points[index-1], mclk_vf);
1934 if (pstate != VF_POINT_INVALID_PSTATE) {
1935 gpc2clk_target =
1936 table->gpc2clk_points[index-1].gpc_mhz;
1937 *sys2clk =
1938 table->gpc2clk_points[index-1].sys_mhz;
1939 *xbar2clk =
1940 table->gpc2clk_points[index-1].xbar_mhz;
1941 1302
1942 gpc2clk_voltuv = 1303 gk20a_dbg_fn("");
1943 table->gpc2clk_points[index-1].uvolt;
1944 gpc2clk_voltuv_sram =
1945 table->gpc2clk_points[index-1].
1946 uvolt_sram;
1947 } else if (index_mclk >= table->mclk_num_points - 1) {
1948 /* There is no available combination of MCLK
1949 * and GPC2CLK, we need to fail this
1950 */
1951 gpc2clk_target = 0;
1952 mclk_target = 0;
1953 pstate = VF_POINT_INVALID_PSTATE;
1954 goto find_exit;
1955 } else {
1956 /* recalculate with higher PSTATE */
1957 gpc2clk_target = *gpc2clk;
1958 mclk_target = table->mclk_points[index_mclk+1].
1959 mem_mhz;
1960 goto recalculate_vf_point;
1961 }
1962 }
1963 1304
1964 mclk_target = mclk_vf->mem_mhz; 1305 session->zombie = true;
1965 mclk_voltuv = mclk_vf->uvolt; 1306 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1966 mclk_voltuv_sram = mclk_vf->uvolt_sram; 1307 if (arb && arb->update_work_queue)
1308 queue_work(arb->update_work_queue, &arb->update_fn_work);
1309}
1967 1310
1968 } while (!table || 1311void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1969 (NV_ACCESS_ONCE(arb->current_vf_table) != table)); 1312{
1313 struct nvgpu_clk_arb *arb = g->clk_arb;
1970 1314
1971find_exit: 1315 if (arb->vf_table_work_queue)
1972 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; 1316 queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
1973 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
1974 gpc2clk_voltuv_sram : mclk_voltuv_sram;
1975 /* noise unaware vmin */
1976 *nuvmin = mclk_voltuv;
1977 *nuvmin_sram = mclk_voltuv_sram;
1978 *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
1979 *mclk = mclk_target;
1980 return pstate;
1981} 1317}
1982 1318
1983/* This function is inherently unsafe to call while arbiter is running 1319/* This function is inherently unsafe to call while arbiter is running
@@ -1988,60 +1324,6 @@ int nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
1988 return NV_ACCESS_ONCE(g->clk_arb->actual->pstate); 1324 return NV_ACCESS_ONCE(g->clk_arb->actual->pstate);
1989} 1325}
1990 1326
1991static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1992 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
1993 u32 voltuv_sram)
1994{
1995 struct set_fll_clk fllclk;
1996 struct nvgpu_clk_arb *arb = g->clk_arb;
1997 int status;
1998
1999 fllclk.gpc2clkmhz = gpc2clk_target;
2000 fllclk.sys2clkmhz = sys2clk_target;
2001 fllclk.xbar2clkmhz = xbar2clk_target;
2002
2003 fllclk.voltuv = voltuv;
2004
2005 /* if voltage ascends we do:
2006 * (1) FLL change
2007 * (2) Voltage change
2008 * (3) MCLK change
2009 * If it goes down
2010 * (1) MCLK change
2011 * (2) Voltage change
2012 * (3) FLL change
2013 */
2014
2015 /* descending */
2016 if (voltuv < arb->voltuv_actual) {
2017 status = g->ops.clk.mclk_change(g, mclk_target);
2018 if (status < 0)
2019 return status;
2020
2021 status = volt_set_voltage(g, voltuv, voltuv_sram);
2022 if (status < 0)
2023 return status;
2024
2025 status = clk_set_fll_clks(g, &fllclk);
2026 if (status < 0)
2027 return status;
2028 } else {
2029 status = clk_set_fll_clks(g, &fllclk);
2030 if (status < 0)
2031 return status;
2032
2033 status = volt_set_voltage(g, voltuv, voltuv_sram);
2034 if (status < 0)
2035 return status;
2036
2037 status = g->ops.clk.mclk_change(g, mclk_target);
2038 if (status < 0)
2039 return status;
2040 }
2041
2042 return 0;
2043}
2044
2045void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock) 1327void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
2046{ 1328{
2047 struct nvgpu_clk_arb *arb = g->clk_arb; 1329 struct nvgpu_clk_arb *arb = g->clk_arb;
@@ -2051,71 +1333,3 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
2051 else 1333 else
2052 nvgpu_mutex_release(&arb->pstate_lock); 1334 nvgpu_mutex_release(&arb->pstate_lock);
2053} 1335}
2054
2055#ifdef CONFIG_DEBUG_FS
2056static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
2057{
2058 struct gk20a *g = s->private;
2059 struct nvgpu_clk_arb *arb = g->clk_arb;
2060 struct nvgpu_clk_arb_debug *debug;
2061
2062 u64 num;
2063 s64 tmp, avg, std, max, min;
2064
2065 debug = NV_ACCESS_ONCE(arb->debug);
2066 /* Make copy of structure and ensure no reordering */
2067 nvgpu_smp_rmb();
2068 if (!debug)
2069 return -EINVAL;
2070
2071 std = debug->switch_std;
2072 avg = debug->switch_avg;
2073 max = debug->switch_max;
2074 min = debug->switch_min;
2075 num = debug->switch_num;
2076
2077 tmp = std;
2078 do_div(tmp, num);
2079 seq_printf(s, "Number of transitions: %lld\n",
2080 num);
2081 seq_printf(s, "max / min : %lld / %lld usec\n",
2082 max, min);
2083 seq_printf(s, "avg / std : %lld / %ld usec\n",
2084 avg, int_sqrt(tmp));
2085
2086 return 0;
2087}
2088
2089static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
2090{
2091 return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
2092}
2093
2094static const struct file_operations nvgpu_clk_arb_stats_fops = {
2095 .open = nvgpu_clk_arb_stats_open,
2096 .read = seq_read,
2097 .llseek = seq_lseek,
2098 .release = single_release,
2099};
2100
2101
2102static int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
2103{
2104 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
2105 struct dentry *gpu_root = l->debugfs;
2106 struct dentry *d;
2107
2108 gk20a_dbg(gpu_dbg_info, "g=%p", g);
2109
2110 d = debugfs_create_file(
2111 "arb_stats",
2112 S_IRUGO,
2113 gpu_root,
2114 g,
2115 &nvgpu_clk_arb_stats_fops);
2116 if (!d)
2117 return -ENOMEM;
2118
2119 return 0;
2120}
2121#endif
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
new file mode 100644
index 00000000..b66876da
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
@@ -0,0 +1,120 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_CLK_ARB_LINUX_H__
18#define __NVGPU_CLK_ARB_LINUX_H__
19
20#include <nvgpu/types.h>
21#include <nvgpu/bitops.h>
22#include <nvgpu/lock.h>
23#include <nvgpu/kmem.h>
24#include <nvgpu/atomic.h>
25#include <nvgpu/bug.h>
26#include <nvgpu/kref.h>
27#include <nvgpu/log.h>
28#include <nvgpu/barrier.h>
29#include <nvgpu/cond.h>
30
31#include "gk20a/gk20a.h"
32#include "clk/clk.h"
33#include "pstate/pstate.h"
34#include "lpwr/lpwr.h"
35#include "volt/volt.h"
36
37/*
38 * The defines here should finally move to clk_arb.h, once these are
39 * refactored to be free of Linux fields.
40 */
41struct nvgpu_clk_arb {
42 struct nvgpu_spinlock sessions_lock;
43 struct nvgpu_spinlock users_lock;
44
45 struct nvgpu_mutex pstate_lock;
46 struct list_head users;
47 struct list_head sessions;
48 struct llist_head requests;
49
50 struct gk20a *g;
51 int status;
52
53 struct nvgpu_clk_arb_target actual_pool[2];
54 struct nvgpu_clk_arb_target *actual;
55
56 u16 gpc2clk_default_mhz;
57 u16 mclk_default_mhz;
58 u32 voltuv_actual;
59
60 u16 gpc2clk_min, gpc2clk_max;
61 u16 mclk_min, mclk_max;
62
63 struct work_struct update_fn_work;
64 struct workqueue_struct *update_work_queue;
65 struct work_struct vf_table_fn_work;
66 struct workqueue_struct *vf_table_work_queue;
67
68 struct nvgpu_cond request_wq;
69
70 struct nvgpu_clk_vf_table *current_vf_table;
71 struct nvgpu_clk_vf_table vf_table_pool[2];
72 u32 vf_table_index;
73
74 u16 *mclk_f_points;
75 nvgpu_atomic_t req_nr;
76
77 u32 mclk_f_numpoints;
78 u16 *gpc2clk_f_points;
79 u32 gpc2clk_f_numpoints;
80
81 nvgpu_atomic64_t alarm_mask;
82 struct nvgpu_clk_notification_queue notification_queue;
83
84#ifdef CONFIG_DEBUG_FS
85 struct nvgpu_clk_arb_debug debug_pool[2];
86 struct nvgpu_clk_arb_debug *debug;
87 bool debugfs_set;
88#endif
89};
90
91struct nvgpu_clk_dev {
92 struct nvgpu_clk_session *session;
93 union {
94 struct list_head link;
95 struct llist_node node;
96 };
97 struct nvgpu_cond readout_wq;
98 nvgpu_atomic_t poll_mask;
99 u16 gpc2clk_target_mhz;
100 u16 mclk_target_mhz;
101 u32 alarms_reported;
102 nvgpu_atomic_t enabled_mask;
103 struct nvgpu_clk_notification_queue queue;
104 u32 arb_queue_head;
105 struct nvgpu_ref refcount;
106};
107
108struct nvgpu_clk_session {
109 bool zombie;
110 struct gk20a *g;
111 struct nvgpu_ref refcount;
112 struct list_head link;
113 struct llist_head targets;
114
115 struct nvgpu_clk_arb_target target_pool[2];
116 struct nvgpu_clk_arb_target *target;
117};
118
119#endif /* __NVGPU_CLK_ARB_LINUX_H__ */
120
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
new file mode 100644
index 00000000..27afe777
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
@@ -0,0 +1,641 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/cdev.h>
18#include <linux/file.h>
19#include <linux/anon_inodes.h>
20#include <linux/rculist.h>
21#include <linux/llist.h>
22#include <linux/uaccess.h>
23#include <linux/poll.h>
24#ifdef CONFIG_DEBUG_FS
25#include <linux/debugfs.h>
26#endif
27#include <uapi/linux/nvgpu.h>
28
29#include <nvgpu/bitops.h>
30#include <nvgpu/lock.h>
31#include <nvgpu/kmem.h>
32#include <nvgpu/atomic.h>
33#include <nvgpu/bug.h>
34#include <nvgpu/kref.h>
35#include <nvgpu/log.h>
36#include <nvgpu/barrier.h>
37#include <nvgpu/cond.h>
38#include <nvgpu/clk_arb.h>
39
40#include "gk20a/gk20a.h"
41#include "clk/clk.h"
42#include "clk_arb_linux.h"
43#include "pstate/pstate.h"
44#include "lpwr/lpwr.h"
45#include "volt/volt.h"
46
47#ifdef CONFIG_DEBUG_FS
48#include "common/linux/os_linux.h"
49#endif
50
51static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
52 struct file *filp)
53{
54 struct nvgpu_clk_dev *dev = filp->private_data;
55 struct nvgpu_clk_session *session = dev->session;
56
57
58 gk20a_dbg_fn("");
59
60 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
61 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
62 return 0;
63}
64
65static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
66{
67 struct nvgpu_clk_dev *dev = filp->private_data;
68
69 gk20a_dbg_fn("");
70
71 poll_wait(filp, &dev->readout_wq.wq, wait);
72 return nvgpu_atomic_xchg(&dev->poll_mask, 0);
73}
74
75static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
76 struct file *filp)
77{
78 struct nvgpu_clk_dev *dev = filp->private_data;
79 struct nvgpu_clk_session *session = dev->session;
80 struct nvgpu_clk_arb *arb;
81
82 arb = session->g->clk_arb;
83
84 gk20a_dbg_fn("");
85
86 if (arb) {
87 nvgpu_spinlock_acquire(&arb->users_lock);
88 list_del_rcu(&dev->link);
89 nvgpu_spinlock_release(&arb->users_lock);
90 nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
91 }
92
93 synchronize_rcu();
94 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
95 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
96
97 return 0;
98}
99
100static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
101 struct nvgpu_gpu_event_info *info) {
102
103 u32 tail, head;
104 u32 events = 0;
105 struct nvgpu_clk_notification *p_notif;
106
107 tail = nvgpu_atomic_read(&dev->queue.tail);
108 head = nvgpu_atomic_read(&dev->queue.head);
109
110 head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
111
112 if (_WRAPGTEQ(tail, head) && info) {
113 head++;
114 p_notif = &dev->queue.notifications[head % dev->queue.size];
115 events |= p_notif->notification;
116 info->event_id = ffs(events) - 1;
117 info->timestamp = p_notif->timestamp;
118 nvgpu_atomic_set(&dev->queue.head, head);
119 }
120
121 return events;
122}
123
124static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
125 size_t size, loff_t *off)
126{
127 struct nvgpu_clk_dev *dev = filp->private_data;
128 struct nvgpu_gpu_event_info info;
129 ssize_t err;
130
131 gk20a_dbg_fn("filp=%p, buf=%p, size=%zu", filp, buf, size);
132
133 if ((size - *off) < sizeof(info))
134 return 0;
135
136 memset(&info, 0, sizeof(info));
137 /* Get the oldest event from the queue */
138 while (!__pending_event(dev, &info)) {
139 if (filp->f_flags & O_NONBLOCK)
140 return -EAGAIN;
141 err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
142 __pending_event(dev, &info), 0);
143 if (err)
144 return err;
145 if (info.timestamp)
146 break;
147 }
148
149 if (copy_to_user(buf + *off, &info, sizeof(info)))
150 return -EFAULT;
151
152 return sizeof(info);
153}
154
155static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
156 struct nvgpu_gpu_set_event_filter_args *args)
157{
158 u32 mask;
159
160 gk20a_dbg(gpu_dbg_fn, "");
161
162 if (args->flags)
163 return -EINVAL;
164
165 if (args->size != 1)
166 return -EINVAL;
167
168 if (copy_from_user(&mask, (void __user *) args->buffer,
169 args->size * sizeof(u32)))
170 return -EFAULT;
171
172 /* update alarm mask */
173 nvgpu_atomic_set(&dev->enabled_mask, mask);
174
175 return 0;
176}
177
178static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
179 unsigned long arg)
180{
181 struct nvgpu_clk_dev *dev = filp->private_data;
182 struct gk20a *g = dev->session->g;
183 u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
184 int err = 0;
185
186 gk20a_dbg(gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
187
188 if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
189 || (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
190 return -EINVAL;
191
192 BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE);
193
194 memset(buf, 0, sizeof(buf));
195 if (_IOC_DIR(cmd) & _IOC_WRITE) {
196 if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
197 return -EFAULT;
198 }
199
200 switch (cmd) {
201 case NVGPU_EVENT_IOCTL_SET_FILTER:
202 err = nvgpu_clk_arb_set_event_filter(dev,
203 (struct nvgpu_gpu_set_event_filter_args *)buf);
204 break;
205 default:
206 nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
207 err = -ENOTTY;
208 }
209
210 if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
211 err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
212
213 return err;
214}
215
216static const struct file_operations completion_dev_ops = {
217 .owner = THIS_MODULE,
218 .release = nvgpu_clk_arb_release_completion_dev,
219 .poll = nvgpu_clk_arb_poll_dev,
220};
221
222static const struct file_operations event_dev_ops = {
223 .owner = THIS_MODULE,
224 .release = nvgpu_clk_arb_release_event_dev,
225 .poll = nvgpu_clk_arb_poll_dev,
226 .read = nvgpu_clk_arb_read_event_dev,
227#ifdef CONFIG_COMPAT
228 .compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
229#endif
230 .unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
231};
232
233static int nvgpu_clk_arb_install_fd(struct gk20a *g,
234 struct nvgpu_clk_session *session,
235 const struct file_operations *fops,
236 struct nvgpu_clk_dev **_dev)
237{
238 struct file *file;
239 int fd;
240 int err;
241 int status;
242 char name[64];
243 struct nvgpu_clk_dev *dev;
244
245 gk20a_dbg_fn("");
246
247 dev = nvgpu_kzalloc(g, sizeof(*dev));
248 if (!dev)
249 return -ENOMEM;
250
251 status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
252 DEFAULT_EVENT_NUMBER);
253 if (status < 0) {
254 err = status;
255 goto fail;
256 }
257
258 fd = get_unused_fd_flags(O_RDWR);
259 if (fd < 0) {
260 err = fd;
261 goto fail;
262 }
263
264 snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
265 file = anon_inode_getfile(name, fops, dev, O_RDWR);
266 if (IS_ERR(file)) {
267 err = PTR_ERR(file);
268 goto fail_fd;
269 }
270
271 fd_install(fd, file);
272
273 nvgpu_cond_init(&dev->readout_wq);
274
275 nvgpu_atomic_set(&dev->poll_mask, 0);
276
277 dev->session = session;
278 nvgpu_ref_init(&dev->refcount);
279
280 nvgpu_ref_get(&session->refcount);
281
282 *_dev = dev;
283
284 return fd;
285
286fail_fd:
287 put_unused_fd(fd);
288fail:
289 nvgpu_kfree(g, dev);
290
291 return err;
292}
293
294int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
295 struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
296{
297 struct nvgpu_clk_arb *arb = g->clk_arb;
298 struct nvgpu_clk_dev *dev;
299 int fd;
300
301 gk20a_dbg_fn("");
302
303 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
304 if (fd < 0)
305 return fd;
306
307 /* TODO: alarm mask needs to be set to default value to prevent
308 * failures of legacy tests. This will be removed when sanity is
309 * updated
310 */
311 if (alarm_mask)
312 nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
313 else
314 nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
315
316 dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
317
318 nvgpu_spinlock_acquire(&arb->users_lock);
319 list_add_tail_rcu(&dev->link, &arb->users);
320 nvgpu_spinlock_release(&arb->users_lock);
321
322 *event_fd = fd;
323
324 return 0;
325}
326
327int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
328 struct nvgpu_clk_session *session, int *request_fd)
329{
330 struct nvgpu_clk_dev *dev;
331 int fd;
332
333 gk20a_dbg_fn("");
334
335 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
336 if (fd < 0)
337 return fd;
338
339 *request_fd = fd;
340
341 return 0;
342}
343
344int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
345 struct nvgpu_clk_session *session, int request_fd)
346{
347 struct nvgpu_clk_arb *arb = g->clk_arb;
348 struct nvgpu_clk_dev *dev;
349 struct fd fd;
350 int err = 0;
351
352 gk20a_dbg_fn("");
353
354 fd = fdget(request_fd);
355 if (!fd.file)
356 return -EINVAL;
357
358 if (fd.file->f_op != &completion_dev_ops) {
359 err = -EINVAL;
360 goto fdput_fd;
361 }
362
363 dev = (struct nvgpu_clk_dev *) fd.file->private_data;
364
365 if (!dev || dev->session != session) {
366 err = -EINVAL;
367 goto fdput_fd;
368 }
369 nvgpu_ref_get(&dev->refcount);
370 llist_add(&dev->node, &session->targets);
371 if (arb->update_work_queue)
372 queue_work(arb->update_work_queue, &arb->update_fn_work);
373
374fdput_fd:
375 fdput(fd);
376 return err;
377}
378
379int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
380 int request_fd, u32 api_domain, u16 target_mhz)
381{
382 struct nvgpu_clk_dev *dev;
383 struct fd fd;
384 int err = 0;
385
386 gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
387
388 fd = fdget(request_fd);
389 if (!fd.file)
390 return -EINVAL;
391
392 if (fd.file->f_op != &completion_dev_ops) {
393 err = -EINVAL;
394 goto fdput_fd;
395 }
396
397 dev = fd.file->private_data;
398 if (!dev || dev->session != session) {
399 err = -EINVAL;
400 goto fdput_fd;
401 }
402
403 switch (api_domain) {
404 case NVGPU_GPU_CLK_DOMAIN_MCLK:
405 dev->mclk_target_mhz = target_mhz;
406 break;
407
408 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
409 dev->gpc2clk_target_mhz = target_mhz * 2ULL;
410 break;
411
412 default:
413 err = -EINVAL;
414 }
415
416fdput_fd:
417 fdput(fd);
418 return err;
419}
420
421int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
422 u32 api_domain, u16 *freq_mhz)
423{
424 int err = 0;
425 struct nvgpu_clk_arb_target *target;
426
427 do {
428 target = NV_ACCESS_ONCE(session->target);
429 /* no reordering of this pointer */
430 nvgpu_smp_rmb();
431
432 switch (api_domain) {
433 case NVGPU_GPU_CLK_DOMAIN_MCLK:
434 *freq_mhz = target->mclk;
435 break;
436
437 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
438 *freq_mhz = target->gpc2clk / 2ULL;
439 break;
440
441 default:
442 *freq_mhz = 0;
443 err = -EINVAL;
444 }
445 } while (target != NV_ACCESS_ONCE(session->target));
446 return err;
447}
448
449int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
450 u32 api_domain, u16 *freq_mhz)
451{
452 struct nvgpu_clk_arb *arb = g->clk_arb;
453 int err = 0;
454 struct nvgpu_clk_arb_target *actual;
455
456 do {
457 actual = NV_ACCESS_ONCE(arb->actual);
458 /* no reordering of this pointer */
459 nvgpu_smp_rmb();
460
461 switch (api_domain) {
462 case NVGPU_GPU_CLK_DOMAIN_MCLK:
463 *freq_mhz = actual->mclk;
464 break;
465
466 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
467 *freq_mhz = actual->gpc2clk / 2ULL;
468 break;
469
470 default:
471 *freq_mhz = 0;
472 err = -EINVAL;
473 }
474 } while (actual != NV_ACCESS_ONCE(arb->actual));
475 return err;
476}
477
478int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
479 u32 api_domain, u16 *freq_mhz)
480{
481 switch (api_domain) {
482 case NVGPU_GPU_CLK_DOMAIN_MCLK:
483 *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) /
484 1000000ULL;
485 return 0;
486
487 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
488 *freq_mhz = g->ops.clk.measure_freq(g,
489 CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL;
490 return 0;
491
492 default:
493 return -EINVAL;
494 }
495}
496
497int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
498 u16 *min_mhz, u16 *max_mhz)
499{
500 int ret;
501
502 switch (api_domain) {
503 case NVGPU_GPU_CLK_DOMAIN_MCLK:
504 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
505 CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz);
506 return ret;
507
508 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
509 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
510 CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz);
511 if (!ret) {
512 *min_mhz /= 2;
513 *max_mhz /= 2;
514 }
515 return ret;
516
517 default:
518 return -EINVAL;
519 }
520}
521
522u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
523{
524 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
525 u32 api_domains = 0;
526
527 if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
528 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
529
530 if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
531 api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
532
533 return api_domains;
534}
535
536bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain)
537{
538 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
539
540 switch (api_domain) {
541 case NVGPU_GPU_CLK_DOMAIN_MCLK:
542 return ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0);
543
544 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
545 return ((clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0);
546
547 default:
548 return false;
549 }
550}
551
552int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
553 u32 api_domain, u32 *max_points, u16 *fpoints)
554{
555 int err;
556 u32 i;
557
558 switch (api_domain) {
559 case NVGPU_GPU_CLK_DOMAIN_GPCCLK:
560 err = clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPC2CLK,
561 max_points, fpoints);
562 if (err || !fpoints)
563 return err;
564 for (i = 0; i < *max_points; i++)
565 fpoints[i] /= 2;
566 return 0;
567 case NVGPU_GPU_CLK_DOMAIN_MCLK:
568 return clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_MCLK,
569 max_points, fpoints);
570 default:
571 return -EINVAL;
572 }
573}
574
575#ifdef CONFIG_DEBUG_FS
576static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
577{
578 struct gk20a *g = s->private;
579 struct nvgpu_clk_arb *arb = g->clk_arb;
580 struct nvgpu_clk_arb_debug *debug;
581
582 u64 num;
583 s64 tmp, avg, std, max, min;
584
585 debug = NV_ACCESS_ONCE(arb->debug);
586 /* Make copy of structure and ensure no reordering */
587 nvgpu_smp_rmb();
588 if (!debug)
589 return -EINVAL;
590
591 std = debug->switch_std;
592 avg = debug->switch_avg;
593 max = debug->switch_max;
594 min = debug->switch_min;
595 num = debug->switch_num;
596
597 tmp = std;
598 do_div(tmp, num);
599 seq_printf(s, "Number of transitions: %lld\n",
600 num);
601 seq_printf(s, "max / min : %lld / %lld usec\n",
602 max, min);
603 seq_printf(s, "avg / std : %lld / %ld usec\n",
604 avg, int_sqrt(tmp));
605
606 return 0;
607}
608
609static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
610{
611 return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
612}
613
614static const struct file_operations nvgpu_clk_arb_stats_fops = {
615 .open = nvgpu_clk_arb_stats_open,
616 .read = seq_read,
617 .llseek = seq_lseek,
618 .release = single_release,
619};
620
621
622int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
623{
624 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
625 struct dentry *gpu_root = l->debugfs;
626 struct dentry *d;
627
628 gk20a_dbg(gpu_dbg_info, "g=%p", g);
629
630 d = debugfs_create_file(
631 "arb_stats",
632 S_IRUGO,
633 gpu_root,
634 g,
635 &nvgpu_clk_arb_stats_fops);
636 if (!d)
637 return -ENOMEM;
638
639 return 0;
640}
641#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h
index c13144ee..a2f8135e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -24,10 +24,95 @@
24#define __NVGPU_CLK_ARB_H__ 24#define __NVGPU_CLK_ARB_H__
25 25
26#include <nvgpu/types.h> 26#include <nvgpu/types.h>
27 27#include <nvgpu/bitops.h>
28struct gk20a; 28#include <nvgpu/lock.h>
29#include <nvgpu/kmem.h>
30#include <nvgpu/atomic.h>
31#include <nvgpu/bug.h>
32#include <nvgpu/kref.h>
33#include <nvgpu/log.h>
34#include <nvgpu/barrier.h>
35#include <nvgpu/cond.h>
36
37#include "gk20a/gk20a.h"
38#include "clk/clk.h"
39#include "pstate/pstate.h"
40#include "lpwr/lpwr.h"
41#include "volt/volt.h"
42
43#define MAX_F_POINTS 256
44#define DEFAULT_EVENT_NUMBER 32
45
46struct nvgpu_clk_dev;
47struct nvgpu_clk_arb_target;
48struct nvgpu_clk_notification_queue;
29struct nvgpu_clk_session; 49struct nvgpu_clk_session;
30 50
51#define VF_POINT_INVALID_PSTATE ~0U
52#define VF_POINT_SET_PSTATE_SUPPORTED(a, b) ((a)->pstates |= (1UL << (b)))
53#define VF_POINT_GET_PSTATE(a) (((a)->pstates) ?\
54 __fls((a)->pstates) :\
55 VF_POINT_INVALID_PSTATE)
56#define VF_POINT_COMMON_PSTATE(a, b) (((a)->pstates & (b)->pstates) ?\
57 __fls((a)->pstates & (b)->pstates) :\
58 VF_POINT_INVALID_PSTATE)
59
60/* Local Alarms */
61#define EVENT(alarm) (0x1UL << NVGPU_GPU_EVENT_##alarm)
62
63#define LOCAL_ALARM_MASK (EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE) | \
64 EVENT(VF_UPDATE))
65
66#define _WRAPGTEQ(a, b) ((a-b) > 0)
67
68struct nvgpu_clk_notification {
69 u32 notification;
70 u64 timestamp;
71};
72
73struct nvgpu_clk_notification_queue {
74 u32 size;
75 nvgpu_atomic_t head;
76 nvgpu_atomic_t tail;
77 struct nvgpu_clk_notification *notifications;
78};
79
80struct nvgpu_clk_vf_point {
81 u16 pstates;
82 union {
83 struct {
84 u16 gpc_mhz;
85 u16 sys_mhz;
86 u16 xbar_mhz;
87 };
88 u16 mem_mhz;
89 };
90 u32 uvolt;
91 u32 uvolt_sram;
92};
93
94struct nvgpu_clk_vf_table {
95 u32 mclk_num_points;
96 struct nvgpu_clk_vf_point *mclk_points;
97 u32 gpc2clk_num_points;
98 struct nvgpu_clk_vf_point *gpc2clk_points;
99};
100#ifdef CONFIG_DEBUG_FS
101struct nvgpu_clk_arb_debug {
102 s64 switch_max;
103 s64 switch_min;
104 u64 switch_num;
105 s64 switch_avg;
106 s64 switch_std;
107};
108#endif
109
110struct nvgpu_clk_arb_target {
111 u16 mclk;
112 u16 gpc2clk;
113 u32 pstate;
114};
115
31int nvgpu_clk_arb_init_arbiter(struct gk20a *g); 116int nvgpu_clk_arb_init_arbiter(struct gk20a *g);
32 117
33int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, 118int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
@@ -80,5 +165,19 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock);
80void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g); 165void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g);
81 166
82void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm); 167void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm);
168
169void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount);
170
171void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount);
172
173int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
174 struct nvgpu_clk_notification_queue *queue,
175 size_t events_number);
176
177void nvgpu_clk_notification_queue_free(struct gk20a *g,
178 struct nvgpu_clk_notification_queue *queue);
179#ifdef CONFIG_DEBUG_FS
180int nvgpu_clk_arb_debugfs_init(struct gk20a *g);
181#endif
83#endif /* __NVGPU_CLK_ARB_H__ */ 182#endif /* __NVGPU_CLK_ARB_H__ */
84 183