summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/clk/clk_arb.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/clk/clk_arb.c')
-rw-r--r--drivers/gpu/nvgpu/clk/clk_arb.c765
1 files changed, 667 insertions, 98 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 98b7cb5f..f868100b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -18,9 +18,17 @@
18#include <linux/anon_inodes.h> 18#include <linux/anon_inodes.h>
19#include <linux/nvgpu.h> 19#include <linux/nvgpu.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/spinlock.h>
21 22
22#include "clk/clk_arb.h" 23#include "clk/clk_arb.h"
23 24
25
26#define MAX_F_POINTS 127
27
28#ifdef CONFIG_DEBUG_FS
29static int nvgpu_clk_arb_debugfs_init(struct gk20a *g);
30#endif
31
24static int nvgpu_clk_arb_release_event_dev(struct inode *inode, 32static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
25 struct file *filp); 33 struct file *filp);
26static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, 34static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
@@ -28,21 +36,57 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
28static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); 36static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
29 37
30static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); 38static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
39static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work);
40static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *);
41
42struct nvgpu_clk_vf_point {
43 u16 mhz;
44 u32 uvolt;
45 u32 uvolt_sram;
46};
31 47
32struct nvgpu_clk_arb { 48struct nvgpu_clk_arb {
33 struct mutex req_lock; 49 spinlock_t sessions_lock;
34 struct mutex users_lock; 50 spinlock_t users_lock;
51 spinlock_t req_lock;
52
35 struct list_head users; 53 struct list_head users;
54 struct list_head sessions;
36 struct list_head requests; 55 struct list_head requests;
37 56
38 u64 gpc2clk_current_hz; 57 struct gk20a *g;
39 u64 gpc2clk_target_hz; 58 spinlock_t data_lock;
40 u64 gpc2clk_default_hz; 59 spinlock_t vf_lock;
41 u64 mclk_current_hz; 60
42 u64 mclk_target_hz; 61 u16 gpc2clk_actual_mhz;
43 u64 mclk_default_hz; 62 u16 gpc2clk_default_mhz;
44 atomic_t usercount; 63
64 u16 mclk_actual_mhz;
65 u16 mclk_default_mhz;
66 u32 voltuv_actual;
67
45 struct work_struct update_fn_work; 68 struct work_struct update_fn_work;
69 struct work_struct vftable_fn_work;
70 wait_queue_head_t vftable_wq;
71
72 u16 *mclk_f_points;
73 bool vftable_set;
74
75 struct nvgpu_clk_vf_point *mclk_vf_points;
76 u32 mclk_f_numpoints;
77 u16 *gpc2clk_f_points;
78 u32 gpc2clk_f_numpoints;
79 struct nvgpu_clk_vf_point *gpc2clk_vf_points;
80
81#ifdef CONFIG_DEBUG_FS
82 struct mutex debug_lock;
83 s64 switch_max;
84 s64 switch_min;
85 u64 switch_num;
86 s64 switch_avg;
87 s64 switch_std;
88 bool debugfs_set;
89#endif
46}; 90};
47 91
48 92
@@ -51,15 +95,20 @@ struct nvgpu_clk_dev {
51 struct list_head link; 95 struct list_head link;
52 wait_queue_head_t readout_wq; 96 wait_queue_head_t readout_wq;
53 atomic_t poll_mask; 97 atomic_t poll_mask;
98 u16 gpc2clk_target_mhz;
99 u16 mclk_target_mhz;
54}; 100};
55 101
56struct nvgpu_clk_session { 102struct nvgpu_clk_session {
57 bool zombie; 103 bool zombie;
58 struct gk20a *g; 104 struct gk20a *g;
59 struct kref refcount; 105 struct kref refcount;
106 struct list_head link;
107 struct list_head targets;
60 108
61 u64 gpc2clk_target_hz; 109 spinlock_t target_lock;
62 u64 mclk_target_hz; 110 u16 gpc2clk_target_mhz;
111 u16 mclk_target_mhz;
63}; 112};
64 113
65static const struct file_operations completion_dev_ops = { 114static const struct file_operations completion_dev_ops = {
@@ -77,7 +126,7 @@ static const struct file_operations event_dev_ops = {
77int nvgpu_clk_arb_init_arbiter(struct gk20a *g) 126int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
78{ 127{
79 struct nvgpu_clk_arb *arb; 128 struct nvgpu_clk_arb *arb;
80 u64 default_hz; 129 u16 default_mhz;
81 int err; 130 int err;
82 131
83 gk20a_dbg_fn(""); 132 gk20a_dbg_fn("");
@@ -86,39 +135,104 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
86 return 0; 135 return 0;
87 136
88 arb = kzalloc(sizeof(struct nvgpu_clk_arb), GFP_KERNEL); 137 arb = kzalloc(sizeof(struct nvgpu_clk_arb), GFP_KERNEL);
89 if (!arb) 138 if (!arb) {
90 return -ENOMEM; 139 err = -ENOMEM;
140 goto init_fail;
141 }
142
143 arb->gpc2clk_f_numpoints = MAX_F_POINTS;
144 arb->mclk_f_numpoints = MAX_F_POINTS;
145
146 arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
147 if (!arb->gpc2clk_f_points) {
148 err = -ENOMEM;
149 goto init_fail;
150 }
151
152 arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
153 if (!arb->mclk_f_points) {
154 err = -ENOMEM;
155 goto init_fail;
156 }
157
158 arb->gpc2clk_vf_points = kcalloc(MAX_F_POINTS,
159 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
160 if (!arb->gpc2clk_vf_points) {
161 err = -ENOMEM;
162 goto init_fail;
163 }
164
165 arb->mclk_vf_points = kcalloc(MAX_F_POINTS,
166 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
167 if (!arb->mclk_vf_points) {
168 err = -ENOMEM;
169 goto init_fail;
170 }
91 171
92 g->clk_arb = arb; 172 g->clk_arb = arb;
173 arb->g = g;
93 174
94 mutex_init(&arb->req_lock); 175 spin_lock_init(&arb->sessions_lock);
95 mutex_init(&arb->users_lock); 176 spin_lock_init(&arb->users_lock);
177 spin_lock_init(&arb->req_lock);
178 spin_lock_init(&arb->data_lock);
179 spin_lock_init(&arb->vf_lock);
96 180
97 err = g->ops.clk_arb.get_arbiter_clk_default(g, 181 err = g->ops.clk_arb.get_arbiter_clk_default(g,
98 NVGPU_GPU_CLK_DOMAIN_MCLK, &default_hz); 182 NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz);
99 if (err) 183 if (err) {
100 return -EINVAL; 184 err = -EINVAL;
185 goto init_fail;
186 }
101 187
102 arb->mclk_target_hz = default_hz; 188 arb->mclk_default_mhz = default_mhz;
103 arb->mclk_current_hz = default_hz;
104 arb->mclk_default_hz = default_hz;
105 189
106 err = g->ops.clk_arb.get_arbiter_clk_default(g, 190 err = g->ops.clk_arb.get_arbiter_clk_default(g,
107 NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_hz); 191 NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_mhz);
108 if (err) 192 if (err) {
109 return -EINVAL; 193 err = -EINVAL;
110 194 goto init_fail;
111 arb->gpc2clk_target_hz = default_hz; 195 }
112 arb->gpc2clk_current_hz = default_hz;
113 arb->gpc2clk_default_hz = default_hz;
114 196
115 atomic_set(&arb->usercount, 0); 197 arb->gpc2clk_default_mhz = default_mhz;
116 198
117 INIT_LIST_HEAD(&arb->users); 199 INIT_LIST_HEAD(&arb->users);
200 INIT_LIST_HEAD(&arb->sessions);
118 INIT_LIST_HEAD(&arb->requests); 201 INIT_LIST_HEAD(&arb->requests);
202
203 init_waitqueue_head(&arb->vftable_wq);
204
205 INIT_WORK(&arb->vftable_fn_work, nvgpu_clk_arb_run_vftable_cb);
206
119 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); 207 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
120 208
209#ifdef CONFIG_DEBUG_FS
210 mutex_init(&arb->debug_lock);
211 if (!arb->debugfs_set) {
212 if (nvgpu_clk_arb_debugfs_init(g))
213 arb->debugfs_set = true;
214 }
215#endif
216 err = nvgpu_clk_arb_update_vftable(arb);
217 if (err < 0)
218 goto init_fail;
219
220 /* Schedule first run */
221 schedule_work(&arb->update_fn_work);
222
121 return 0; 223 return 0;
224
225init_fail:
226
227 kfree(arb->gpc2clk_f_points);
228 kfree(arb->gpc2clk_vf_points);
229
230 kfree(arb->mclk_f_points);
231 kfree(arb->mclk_vf_points);
232
233 kfree(arb);
234
235 return err;
122} 236}
123 237
124void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) 238void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
@@ -170,6 +284,7 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g,
170fail: 284fail:
171 kfree(dev); 285 kfree(dev);
172 put_unused_fd(fd); 286 put_unused_fd(fd);
287
173 return err; 288 return err;
174} 289}
175 290
@@ -190,12 +305,16 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
190 session->g = g; 305 session->g = g;
191 306
192 kref_init(&session->refcount); 307 kref_init(&session->refcount);
193 308 spin_lock_init(&session->target_lock);
194 atomic_inc(&arb->usercount);
195 309
196 session->zombie = false; 310 session->zombie = false;
197 session->mclk_target_hz = arb->mclk_default_hz; 311 session->mclk_target_mhz = arb->mclk_default_mhz;
198 session->gpc2clk_target_hz = arb->gpc2clk_default_hz; 312 session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz;
313 INIT_LIST_HEAD(&session->targets);
314
315 spin_lock(&arb->sessions_lock);
316 list_add_tail(&session->link, &arb->sessions);
317 spin_unlock(&arb->sessions_lock);
199 318
200 *_session = session; 319 *_session = session;
201 320
@@ -206,8 +325,15 @@ void nvgpu_clk_arb_free_session(struct kref *refcount)
206{ 325{
207 struct nvgpu_clk_session *session = container_of(refcount, 326 struct nvgpu_clk_session *session = container_of(refcount,
208 struct nvgpu_clk_session, refcount); 327 struct nvgpu_clk_session, refcount);
328 struct nvgpu_clk_arb *arb = session->g->clk_arb;
209 329
330 gk20a_dbg_fn("");
331
332 spin_lock(&arb->sessions_lock);
333 list_del(&session->link);
334 spin_unlock(&arb->sessions_lock);
210 kfree(session); 335 kfree(session);
336;
211} 337}
212 338
213void nvgpu_clk_arb_release_session(struct gk20a *g, 339void nvgpu_clk_arb_release_session(struct gk20a *g,
@@ -215,12 +341,12 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
215{ 341{
216 struct nvgpu_clk_arb *arb = g->clk_arb; 342 struct nvgpu_clk_arb *arb = g->clk_arb;
217 343
344 gk20a_dbg_fn("");
345
218 session->zombie = true; 346 session->zombie = true;
219 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 347 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
220 348
221 /* schedule arbiter if no more user */ 349 schedule_work(&arb->update_fn_work);
222 if (!atomic_dec_and_test(&arb->usercount))
223 schedule_work(&arb->update_fn_work);
224} 350}
225 351
226int nvgpu_clk_arb_install_event_fd(struct gk20a *g, 352int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
@@ -230,19 +356,155 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
230 struct nvgpu_clk_dev *dev; 356 struct nvgpu_clk_dev *dev;
231 int fd; 357 int fd;
232 358
359 gk20a_dbg_fn("");
360
233 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); 361 fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
234 if (fd < 0) 362 if (fd < 0)
235 return fd; 363 return fd;
236 364
237 mutex_lock(&arb->users_lock); 365 spin_lock(&arb->users_lock);
238 list_add_tail(&dev->link, &arb->users); 366 list_add_tail(&dev->link, &arb->users);
239 mutex_unlock(&arb->users_lock); 367 spin_unlock(&arb->users_lock);
240 368
241 *event_fd = fd; 369 *event_fd = fd;
242 370
243 return 0; 371 return 0;
244} 372}
245 373
374int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
375 struct nvgpu_clk_session *session, int *request_fd)
376{
377 struct nvgpu_clk_dev *dev;
378 int fd;
379
380 gk20a_dbg_fn("");
381
382 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
383 if (fd < 0)
384 return fd;
385
386 *request_fd = fd;
387
388 return 0;
389}
390
391static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *arb)
392{
393 struct gk20a *g = arb->g;
394
395 int i;
396 int status = 0;
397 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
398 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
399
400 /* the flag must be visible in all threads */
401 mb();
402 ACCESS_ONCE(arb->vftable_set) = false;
403
404 spin_lock(&arb->vf_lock);
405
406 if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
407 &arb->gpc2clk_f_numpoints, arb->gpc2clk_f_points) < 0) {
408 gk20a_err(dev_from_gk20a(g),
409 "failed to fetch GPC2CLK frequency points");
410 goto exit_vftable;
411 }
412 if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK,
413 &arb->mclk_f_numpoints, arb->mclk_f_points) < 0) {
414 gk20a_err(dev_from_gk20a(g),
415 "failed to fetch MCLK frequency points");
416 goto exit_vftable;
417 }
418
419
420 memset(arb->mclk_vf_points, 0,
421 arb->mclk_f_numpoints*sizeof(struct nvgpu_clk_vf_point));
422 memset(arb->gpc2clk_vf_points, 0,
423 arb->gpc2clk_f_numpoints*sizeof(struct nvgpu_clk_vf_point));
424
425 for (i = 0 ; i < arb->mclk_f_numpoints; i++) {
426 arb->mclk_vf_points[i].mhz = arb->mclk_f_points[i];
427 mclk_voltuv = mclk_voltuv_sram = 0;
428
429 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
430 &arb->mclk_vf_points[i].mhz, &mclk_voltuv,
431 CTRL_VOLT_DOMAIN_LOGIC);
432 if (status < 0) {
433 gk20a_err(dev_from_gk20a(g),
434 "failed to get MCLK LOGIC voltage");
435 goto exit_vftable;
436 }
437 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
438 &arb->mclk_vf_points[i].mhz, &mclk_voltuv_sram,
439 CTRL_VOLT_DOMAIN_SRAM);
440 if (status < 0) {
441 gk20a_err(dev_from_gk20a(g),
442 "failed to get MCLK SRAM voltage");
443 goto exit_vftable;
444 }
445
446 arb->mclk_vf_points[i].uvolt = mclk_voltuv;
447 arb->mclk_vf_points[i].uvolt_sram = mclk_voltuv_sram;
448 }
449
450 for (i = 0 ; i < arb->gpc2clk_f_numpoints; i++) {
451 arb->gpc2clk_vf_points[i].mhz = arb->gpc2clk_f_points[i];
452 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
453
454 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
455 &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv,
456 CTRL_VOLT_DOMAIN_LOGIC);
457 if (status < 0) {
458 gk20a_err(dev_from_gk20a(g),
459 "failed to get GPC2CLK LOGIC voltage");
460 goto exit_vftable;
461 }
462 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
463 &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv_sram,
464 CTRL_VOLT_DOMAIN_SRAM);
465 if (status < 0) {
466 gk20a_err(dev_from_gk20a(g),
467 "failed to get GPC2CLK SRAM voltage");
468 goto exit_vftable;
469 }
470
471 arb->gpc2clk_vf_points[i].uvolt = gpc2clk_voltuv;
472 arb->gpc2clk_vf_points[i].uvolt_sram = gpc2clk_voltuv_sram;
473
474 }
475
476 /* make flag visible when all data has resolved in the tables */
477 wmb();
478 ACCESS_ONCE(arb->vftable_set) = true;
479
480 wake_up(&arb->vftable_wq);
481exit_vftable:
482
483 spin_unlock(&arb->vf_lock);
484
485 return status;
486}
487
488void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g)
489{
490 struct nvgpu_clk_arb *arb = g->clk_arb;
491
492 ACCESS_ONCE(arb->vftable_set) = false;
493 /* Disable the flag in case arbiter gets scheduled first */
494 mb();
495
496 schedule_work(&arb->vftable_fn_work);
497 schedule_work(&arb->update_fn_work);
498}
499
500static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work)
501{
502 struct nvgpu_clk_arb *arb =
503 container_of(work, struct nvgpu_clk_arb, update_fn_work);
504
505 nvgpu_clk_arb_update_vftable(arb);
506}
507
246static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) 508static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
247{ 509{
248 struct nvgpu_clk_arb *arb = 510 struct nvgpu_clk_arb *arb =
@@ -250,67 +512,270 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
250 struct nvgpu_clk_session *session; 512 struct nvgpu_clk_session *session;
251 struct nvgpu_clk_dev *dev; 513 struct nvgpu_clk_dev *dev;
252 struct nvgpu_clk_dev *tmp; 514 struct nvgpu_clk_dev *tmp;
515 struct gk20a *g = arb->g;
516
517 struct change_fll_clk fllclk;
518 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
519 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
520
521 u32 voltuv, voltuv_sram;
522
523 int status;
524
525 /* Temporary variables for checking target frequency */
526 u16 gpc2clk_target, mclk_target;
253 527
254 mutex_lock(&arb->req_lock); 528 /* iteration index */
529 u32 index;
255 530
256 arb->mclk_target_hz = arb->mclk_default_hz; 531#ifdef CONFIG_DEBUG_FS
257 arb->gpc2clk_target_hz = arb->gpc2clk_default_hz; 532 u64 t0, t1;
533#endif
258 534
259 list_for_each_entry(dev, &arb->requests, link) { 535 gk20a_dbg_fn("");
260 session = dev->session; 536
537#ifdef CONFIG_DEBUG_FS
538 g->ops.read_ptimer(g, &t0);
539#endif
540
541 /* Only one arbiter should be running */
542 gpc2clk_target = 0;
543 mclk_target = 0;
544
545 spin_lock(&arb->sessions_lock);
546 list_for_each_entry(session, &arb->sessions, link) {
261 if (!session->zombie) { 547 if (!session->zombie) {
262 /* TODO: arbiter policy. For now last request wins */ 548 spin_lock(&arb->req_lock);
549 spin_lock(&session->target_lock);
550
551 mclk_target = mclk_target > session->mclk_target_mhz ?
552 mclk_target : session->mclk_target_mhz;
553
554 gpc2clk_target =
555 gpc2clk_target > session->gpc2clk_target_mhz ?
556 gpc2clk_target : session->gpc2clk_target_mhz;
557 /* Move processed requests to notification list*/
558 list_for_each_entry_safe(dev, tmp, &session->targets,
559 link) {
560 list_del_init(&dev->link);
561 list_add_tail(&dev->link, &arb->requests);
562 }
563 spin_unlock(&session->target_lock);
564 spin_unlock(&arb->req_lock);
565
566 }
567 }
568 spin_unlock(&arb->sessions_lock);
569
570 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
571 arb->gpc2clk_actual_mhz ? gpc2clk_target :
572 arb->gpc2clk_default_mhz;
263 573
264 arb->mclk_target_hz = session->mclk_target_hz; 574 mclk_target = (mclk_target > 0) ? mclk_target :
265 arb->gpc2clk_target_hz = session->gpc2clk_target_hz; 575 arb->mclk_actual_mhz ? mclk_target :
576 arb->mclk_default_mhz;
577
578 if (!gpc2clk_target && !mclk_target) {
579 mclk_target = arb->mclk_default_mhz;
580 gpc2clk_target = arb->gpc2clk_default_mhz;
581 }
582
583 if (!gpc2clk_target)
584 gpc2clk_target = arb->gpc2clk_actual_mhz;
585
586 do {
587 /* Check that the table is set */
588 mb();
589 wait_event(arb->vftable_wq, arb->vftable_set);
590 } while (!ACCESS_ONCE(arb->vftable_set));
591
592 spin_lock(&arb->vf_lock);
593 /* round up the freq requests */
594 for (index = 0; index < arb->gpc2clk_f_numpoints; index++) {
595 if (arb->gpc2clk_vf_points[index].mhz >= gpc2clk_target) {
596 gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
597 gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
598 gpc2clk_voltuv_sram =
599 arb->gpc2clk_vf_points[index].uvolt_sram;
600 break;
266 } 601 }
267 } 602 }
268 603
269 /* TODO: loop up higher or equal VF points */ 604 if (index == arb->gpc2clk_f_numpoints) {
605 gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
606 gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
607 gpc2clk_voltuv_sram =
608 arb->gpc2clk_vf_points[index].uvolt_sram;
609 }
610
611 if (!mclk_target)
612 mclk_target = arb->mclk_actual_mhz;
613
614 for (index = 0; index < arb->mclk_f_numpoints; index++) {
615 if (arb->mclk_vf_points[index].mhz >= mclk_target) {
616 mclk_target = arb->mclk_vf_points[index].mhz;
617 mclk_voltuv = arb->mclk_vf_points[index].uvolt;
618 mclk_voltuv_sram =
619 arb->mclk_vf_points[index].uvolt_sram;
620 break;
621 }
622 }
623 if (index == arb->mclk_f_numpoints) {
624 mclk_target = arb->mclk_vf_points[index].mhz;
625 mclk_voltuv = arb->mclk_vf_points[index].uvolt;
626 mclk_voltuv_sram =
627 arb->mclk_vf_points[index].uvolt_sram;
628 }
629 spin_unlock(&arb->vf_lock);
630
631 /* Program clocks */
632 /* A change in both mclk of gpc2clk may require a change in voltage */
633 if ((arb->gpc2clk_actual_mhz == gpc2clk_target) &&
634 (arb->mclk_actual_mhz == mclk_target)) {
635 goto exit_arb;
636 }
637
638 voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
639 voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
640 gpc2clk_voltuv_sram : mclk_voltuv_sram;
641
642 /* if voltage ascends we do:
643 * (1) FLL change
644 * (2) Voltage change
645 * (3) MCLK change
646 * If it goes down
647 * (1) MCLK change
648 * (2) Voltage change
649 * (3) FLL change
650 */
651
652 /* descending */
653 if (voltuv <= arb->voltuv_actual) {
654 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
655 if (status < 0)
656 goto exit_arb;
657
658 status = volt_set_voltage(g, voltuv, voltuv_sram);
659 if (status < 0)
660 goto exit_arb;
661
662 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
663 fllclk.clkmhz = gpc2clk_target;
664 fllclk.voltuv = voltuv;
665 status = clk_program_fll_clks(g, &fllclk);
666 if (status < 0)
667 goto exit_arb;
668 } else {
669 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
670 fllclk.clkmhz = gpc2clk_target;
671 fllclk.voltuv = voltuv;
672 status = clk_program_fll_clks(g, &fllclk);
673 if (status < 0)
674 goto exit_arb;
675
676 status = volt_set_voltage(g, voltuv, voltuv_sram);
677 if (status < 0)
678 goto exit_arb;
679
680 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
681 if (status < 0)
682 goto exit_arb;
683 }
684
685 spin_lock(&arb->data_lock);
686 arb->gpc2clk_actual_mhz = gpc2clk_target;
687 arb->mclk_actual_mhz = mclk_target;
688 arb->voltuv_actual = voltuv;
689 /* Make changes visible to other threads */
690 wmb();
691
692 spin_unlock(&arb->data_lock);
693
694#ifdef CONFIG_DEBUG_FS
695 g->ops.read_ptimer(g, &t1);
696 arb->switch_num++;
697
698 mutex_lock(&arb->debug_lock);
699 if (arb->switch_num == 1) {
700 arb->switch_max = arb->switch_min =
701 arb->switch_avg = (t1-t0)/1000;
702 arb->switch_std = 0;
703 } else {
704 s64 prev_avg;
705 u64 curr = (t1-t0)/1000;
706
707 arb->switch_max = curr > arb->switch_max ?
708 curr : arb->switch_max;
709 arb->switch_min = arb->switch_min ?
710 (curr < arb->switch_min ?
711 curr : arb->switch_min) : curr;
712 prev_avg = arb->switch_avg;
713 arb->switch_avg = (curr +
714 (arb->switch_avg * (arb->switch_num-1))) /
715 arb->switch_num;
716 arb->switch_std +=
717 (curr - arb->switch_avg) * (curr - prev_avg);
718 }
719 mutex_unlock(&arb->debug_lock);
270 720
271 arb->mclk_current_hz = arb->mclk_target_hz; 721#endif
272 arb->gpc2clk_current_hz = arb->gpc2clk_target_hz;
273 722
274 /* TODO: actually program the clocks */ 723exit_arb:
275 724
725 spin_lock(&arb->req_lock);
276 /* notify completion for all requests */ 726 /* notify completion for all requests */
277 list_for_each_entry_safe(dev, tmp, &arb->requests, link) { 727 list_for_each_entry_safe(dev, tmp, &arb->requests, link) {
278 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); 728 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
279 wake_up_interruptible(&dev->readout_wq); 729 wake_up_interruptible(&dev->readout_wq);
280 list_del_init(&dev->link); 730 list_del_init(&dev->link);
281 } 731 }
282 mutex_unlock(&arb->req_lock); 732 spin_unlock(&arb->req_lock);
283 733
284 /* notify event for all users */ 734 /* notify event for all users */
285 mutex_lock(&arb->users_lock); 735 spin_lock(&arb->users_lock);
286 list_for_each_entry(dev, &arb->users, link) { 736 list_for_each_entry(dev, &arb->users, link) {
287 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); 737 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
288 wake_up_interruptible(&dev->readout_wq); 738 wake_up_interruptible(&dev->readout_wq);
289 } 739 }
290 mutex_unlock(&arb->users_lock); 740 spin_unlock(&arb->users_lock);
291
292} 741}
293 742
294int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g, 743int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
295 struct nvgpu_clk_session *session, int *completion_fd) 744 struct nvgpu_clk_session *session, int request_fd)
296{ 745{
297 struct nvgpu_clk_arb *arb = g->clk_arb; 746 struct nvgpu_clk_arb *arb = g->clk_arb;
298 struct nvgpu_clk_dev *dev; 747 struct nvgpu_clk_dev *dev;
299 int fd; 748 struct fd fd;
749 int err = 0;
300 750
301 fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); 751 gk20a_dbg_fn("");
302 if (fd < 0) 752
303 return fd; 753 fd = fdget(request_fd);
754
755 if (!fd.file)
756 return -EINVAL;
757
758 dev = (struct nvgpu_clk_dev *) fd.file->private_data;
304 759
305 *completion_fd = fd; 760 if (!dev || dev->session != session) {
761 err = -EINVAL;
762 goto fdput_fd;
763 }
764 spin_lock(&session->target_lock);
765 session->mclk_target_mhz = dev->mclk_target_mhz ? dev->mclk_target_mhz :
766 session->mclk_target_mhz;
767 session->gpc2clk_target_mhz = dev->gpc2clk_target_mhz ?
768 dev->gpc2clk_target_mhz :
769 session->gpc2clk_target_mhz;
306 770
307 mutex_lock(&arb->req_lock); 771 list_add_tail(&dev->link, &session->targets);
308 list_add_tail(&dev->link, &arb->requests); 772 spin_unlock(&session->target_lock);
309 mutex_unlock(&arb->req_lock);
310 773
311 schedule_work(&arb->update_fn_work); 774 schedule_work(&arb->update_fn_work);
312 775
313 return 0; 776fdput_fd:
777 fdput(fd);
778 return err;
314} 779}
315 780
316static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) 781static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
@@ -328,11 +793,22 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
328{ 793{
329 struct nvgpu_clk_dev *dev = filp->private_data; 794 struct nvgpu_clk_dev *dev = filp->private_data;
330 struct nvgpu_clk_session *session = dev->session; 795 struct nvgpu_clk_session *session = dev->session;
796 struct nvgpu_clk_arb *arb;
797
798 arb = session->g->clk_arb;
331 799
332 gk20a_dbg_fn(""); 800 gk20a_dbg_fn("");
333 801
802 spin_lock(&arb->req_lock);
803 spin_lock(&session->target_lock);
804 if (!list_empty(&dev->link))
805 list_del_init(&dev->link);
806 spin_unlock(&session->target_lock);
807 spin_unlock(&arb->req_lock);
808
334 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 809 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
335 kfree(dev); 810 kfree(dev);
811
336 return 0; 812 return 0;
337} 813}
338 814
@@ -341,94 +817,123 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
341{ 817{
342 struct nvgpu_clk_dev *dev = filp->private_data; 818 struct nvgpu_clk_dev *dev = filp->private_data;
343 struct nvgpu_clk_session *session = dev->session; 819 struct nvgpu_clk_session *session = dev->session;
344 struct nvgpu_clk_arb *arb = session->g->clk_arb; 820 struct nvgpu_clk_arb *arb;
821
822 arb = session->g->clk_arb;
345 823
346 gk20a_dbg_fn(""); 824 gk20a_dbg_fn("");
347 825
348 mutex_lock(&arb->users_lock); 826 spin_lock(&arb->users_lock);
349 list_del_init(&dev->link); 827 list_del(&dev->link);
350 mutex_unlock(&arb->users_lock); 828 spin_unlock(&arb->users_lock);
351 829
352 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 830 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
353 kfree(dev); 831 kfree(dev);
832
354 return 0; 833 return 0;
355} 834}
356 835
357int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session, 836int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
358 u32 api_domain, u64 target_hz) 837 int request_fd, u32 api_domain, u16 target_mhz)
359{ 838{
839 struct nvgpu_clk_dev *dev;
840 struct fd fd;
841 int err = 0;
360 842
361 gk20a_dbg_fn("domain=0x%08x target_hz=%llu", api_domain, target_hz); 843 gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz);
844
845 fd = fdget(request_fd);
846
847 if (!fd.file)
848 return -EINVAL;
849
850 dev = fd.file->private_data;
851 if (!dev || dev->session != session) {
852 err = -EINVAL;
853 goto fdput_fd;
854 }
362 855
363 switch (api_domain) { 856 switch (api_domain) {
364 case NVGPU_GPU_CLK_DOMAIN_MCLK: 857 case NVGPU_GPU_CLK_DOMAIN_MCLK:
365 session->mclk_target_hz = target_hz; 858 dev->mclk_target_mhz = target_mhz;
366 return 0; 859 break;
367 860
368 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 861 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
369 session->gpc2clk_target_hz = target_hz; 862 dev->gpc2clk_target_mhz = target_mhz;
370 return 0; 863 break;
371 864
372 default: 865 default:
373 return -EINVAL; 866 err = -EINVAL;
374 } 867 }
868
869fdput_fd:
870 fdput(fd);
871 return err;
375} 872}
376 873
377int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session, 874int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
378 u32 api_domain, u64 *freq_hz) 875 u32 api_domain, u16 *freq_mhz)
379{ 876{
877 int err = 0;
878
879 spin_lock(&session->target_lock);
880
380 switch (api_domain) { 881 switch (api_domain) {
381 case NVGPU_GPU_CLK_DOMAIN_MCLK: 882 case NVGPU_GPU_CLK_DOMAIN_MCLK:
382 *freq_hz = session->mclk_target_hz; 883 *freq_mhz = session->mclk_target_mhz;
383 return 0; 884 break;
384 885
385 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 886 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
386 *freq_hz = session->gpc2clk_target_hz; 887 *freq_mhz = session->gpc2clk_target_mhz;
387 return 0; 888 break;
388 889
389 default: 890 default:
390 *freq_hz = 0; 891 *freq_mhz = 0;
391 return -EINVAL; 892 err = -EINVAL;
392 } 893 }
894
895 spin_unlock(&session->target_lock);
896 return err;
393} 897}
394 898
395int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g, 899int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
396 u32 api_domain, u64 *freq_hz) 900 u32 api_domain, u16 *freq_mhz)
397{ 901{
398 struct nvgpu_clk_arb *arb = g->clk_arb; 902 struct nvgpu_clk_arb *arb = g->clk_arb;
399 int err = 0; 903 int err = 0;
400 904
401 mutex_lock(&arb->req_lock); 905 spin_lock(&arb->data_lock);
906
402 switch (api_domain) { 907 switch (api_domain) {
403 case NVGPU_GPU_CLK_DOMAIN_MCLK: 908 case NVGPU_GPU_CLK_DOMAIN_MCLK:
404 *freq_hz = arb->mclk_current_hz; 909 *freq_mhz = arb->mclk_actual_mhz;
405 break; 910 break;
406 911
407 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 912 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
408 *freq_hz = arb->gpc2clk_current_hz; 913 *freq_mhz = arb->gpc2clk_actual_mhz;
409 break; 914 break;
410 915
411 default: 916 default:
412 *freq_hz = 0; 917 *freq_mhz = 0;
413 err = -EINVAL; 918 err = -EINVAL;
414 } 919 }
415 mutex_unlock(&arb->req_lock);
416 920
921 spin_unlock(&arb->data_lock);
417 return err; 922 return err;
418} 923}
419 924
420int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g, 925int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
421 u32 api_domain, u64 *freq_hz) 926 u32 api_domain, u16 *freq_mhz)
422{ 927{
423 /* TODO: measure clocks from counters */ 928 /* TODO: measure clocks from counters */
424 return nvgpu_clk_arb_get_arbiter_actual_hz(g, api_domain, freq_hz); 929 return nvgpu_clk_arb_get_arbiter_actual_mhz(g, api_domain, freq_mhz);
425} 930}
426 931
427int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, 932int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
428 u64 *min_hz, u64 *max_hz) 933 u16 *min_mhz, u16 *max_mhz)
429{ 934{
430 return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain, 935 return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain,
431 min_hz, max_hz); 936 min_mhz, max_mhz);
432} 937}
433 938
434u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) 939u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
@@ -441,3 +946,67 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
441{ 946{
442 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); 947 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
443} 948}
949
950#ifdef CONFIG_DEBUG_FS
951static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
952{
953 struct gk20a *g = s->private;
954 struct nvgpu_clk_arb *arb = g->clk_arb;
955 u64 num;
956 s64 tmp, avg, std, max, min;
957
958 /* Make copy of structure to reduce time with lock held */
959 mutex_lock(&arb->debug_lock);
960 std = arb->switch_std;
961 avg = arb->switch_avg;
962 max = arb->switch_max;
963 min = arb->switch_min;
964 num = arb->switch_num;
965 mutex_unlock(&arb->debug_lock);
966
967 tmp = std;
968 do_div(tmp, num);
969 seq_printf(s, "Number of transitions: %lld\n",
970 num);
971 seq_printf(s, "max / min : %lld / %lld usec\n",
972 max, min);
973 seq_printf(s, "avg / std : %lld / %ld usec\n",
974 avg, int_sqrt(tmp));
975
976 return 0;
977}
978
979static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
980{
981 return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
982}
983
984static const struct file_operations nvgpu_clk_arb_stats_fops = {
985 .open = nvgpu_clk_arb_stats_open,
986 .read = seq_read,
987 .llseek = seq_lseek,
988 .release = single_release,
989};
990
991
992static int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
993{
994 struct gk20a_platform *platform = dev_get_drvdata(g->dev);
995
996 struct dentry *gpu_root = platform->debugfs;
997 struct dentry *d;
998
999 gk20a_dbg(gpu_dbg_info, "g=%p", g);
1000
1001 d = debugfs_create_file(
1002 "arb_stats",
1003 S_IRUGO,
1004 gpu_root,
1005 g,
1006 &nvgpu_clk_arb_stats_fops);
1007 if (!d)
1008 return -ENOMEM;
1009
1010 return 0;
1011}
1012#endif