summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/clk/clk_arb.c893
-rw-r--r--drivers/gpu/nvgpu/pstate/pstate.c4
2 files changed, 546 insertions, 351 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 1f7c2aea..aea32cb8 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -19,7 +19,8 @@
19#include <linux/nvgpu.h> 19#include <linux/nvgpu.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22 22#include <linux/rculist.h>
23#include <linux/llist.h>
23#include "clk/clk_arb.h" 24#include "clk/clk_arb.h"
24 25
25 26
@@ -36,8 +37,15 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
36static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); 37static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
37 38
38static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); 39static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
39static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work); 40static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
40static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *); 41static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
42static void nvgpu_clk_arb_free_fd(struct kref *refcount);
43static void nvgpu_clk_arb_free_session(struct kref *refcount);
44static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk,
45 u16 mclk, u32 voltuv, u32 voltuv_sram);
46static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
47 u16 *gpc2clk_target, u16 *mclk_target, u32 *voltuv,
48 u32 *voltuv_sram);
41 49
42struct nvgpu_clk_vf_point { 50struct nvgpu_clk_vf_point {
43 u16 mhz; 51 u16 mhz;
@@ -45,58 +53,80 @@ struct nvgpu_clk_vf_point {
45 u32 uvolt_sram; 53 u32 uvolt_sram;
46}; 54};
47 55
56struct nvgpu_clk_vf_table {
57 u32 mclk_num_points;
58 struct nvgpu_clk_vf_point *mclk_points;
59 u32 gpc2clk_num_points;
60 struct nvgpu_clk_vf_point *gpc2clk_points;
61};
62#ifdef CONFIG_DEBUG_FS
63struct nvgpu_clk_arb_debug {
64 s64 switch_max;
65 s64 switch_min;
66 u64 switch_num;
67 s64 switch_avg;
68 s64 switch_std;
69};
70#endif
71
72struct nvgpu_clk_arb_target {
73 u16 mclk;
74 u16 gpc2clk;
75};
76
48struct nvgpu_clk_arb { 77struct nvgpu_clk_arb {
49 spinlock_t sessions_lock; 78 spinlock_t sessions_lock;
50 spinlock_t users_lock; 79 spinlock_t users_lock;
51 spinlock_t req_lock;
52 80
53 struct list_head users; 81 struct list_head users;
54 struct list_head sessions; 82 struct list_head sessions;
55 struct list_head requests; 83 struct llist_head requests;
56 84
57 struct gk20a *g; 85 struct gk20a *g;
58 spinlock_t data_lock;
59 spinlock_t vf_lock;
60 86
61 u16 gpc2clk_actual_mhz; 87 struct nvgpu_clk_arb_target actual_pool[2];
62 u16 gpc2clk_default_mhz; 88 struct nvgpu_clk_arb_target *actual;
63 89
64 u16 mclk_actual_mhz; 90 u16 gpc2clk_default_mhz;
65 u16 mclk_default_mhz; 91 u16 mclk_default_mhz;
66 u32 voltuv_actual; 92 u32 voltuv_actual;
67 93
68 struct work_struct update_fn_work; 94 struct work_struct update_fn_work;
69 struct work_struct vftable_fn_work; 95 struct workqueue_struct *update_work_queue;
70 wait_queue_head_t vftable_wq; 96 struct work_struct vf_table_fn_work;
97 struct workqueue_struct *vf_table_work_queue;
98
99 wait_queue_head_t request_wq;
100
101 struct nvgpu_clk_vf_table *current_vf_table;
102 struct nvgpu_clk_vf_table vf_table_pool[2];
103 u32 vf_table_index;
71 104
72 u16 *mclk_f_points; 105 u16 *mclk_f_points;
73 bool vftable_set; 106 atomic_t req_nr;
74 107
75 struct nvgpu_clk_vf_point *mclk_vf_points;
76 u32 mclk_f_numpoints; 108 u32 mclk_f_numpoints;
77 u16 *gpc2clk_f_points; 109 u16 *gpc2clk_f_points;
78 u32 gpc2clk_f_numpoints; 110 u32 gpc2clk_f_numpoints;
79 struct nvgpu_clk_vf_point *gpc2clk_vf_points;
80 111
81#ifdef CONFIG_DEBUG_FS 112#ifdef CONFIG_DEBUG_FS
82 struct mutex debug_lock; 113 struct nvgpu_clk_arb_debug debug_pool[2];
83 s64 switch_max; 114 struct nvgpu_clk_arb_debug *debug;
84 s64 switch_min;
85 u64 switch_num;
86 s64 switch_avg;
87 s64 switch_std;
88 bool debugfs_set; 115 bool debugfs_set;
89#endif 116#endif
90}; 117};
91 118
92
93struct nvgpu_clk_dev { 119struct nvgpu_clk_dev {
94 struct nvgpu_clk_session *session; 120 struct nvgpu_clk_session *session;
95 struct list_head link; 121 union {
122 struct list_head link;
123 struct llist_node node;
124 };
96 wait_queue_head_t readout_wq; 125 wait_queue_head_t readout_wq;
97 atomic_t poll_mask; 126 atomic_t poll_mask;
98 u16 gpc2clk_target_mhz; 127 u16 gpc2clk_target_mhz;
99 u16 mclk_target_mhz; 128 u16 mclk_target_mhz;
129 struct kref refcount;
100}; 130};
101 131
102struct nvgpu_clk_session { 132struct nvgpu_clk_session {
@@ -104,11 +134,10 @@ struct nvgpu_clk_session {
104 struct gk20a *g; 134 struct gk20a *g;
105 struct kref refcount; 135 struct kref refcount;
106 struct list_head link; 136 struct list_head link;
107 struct list_head targets; 137 struct llist_head targets;
108 138
109 spinlock_t target_lock; 139 struct nvgpu_clk_arb_target target_pool[2];
110 u16 gpc2clk_target_mhz; 140 struct nvgpu_clk_arb_target *target;
111 u16 mclk_target_mhz;
112}; 141};
113 142
114static const struct file_operations completion_dev_ops = { 143static const struct file_operations completion_dev_ops = {
@@ -128,6 +157,8 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
128 struct nvgpu_clk_arb *arb; 157 struct nvgpu_clk_arb *arb;
129 u16 default_mhz; 158 u16 default_mhz;
130 int err; 159 int err;
160 int index;
161 struct nvgpu_clk_vf_table *table;
131 162
132 gk20a_dbg_fn(""); 163 gk20a_dbg_fn("");
133 164
@@ -140,33 +171,37 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
140 goto init_fail; 171 goto init_fail;
141 } 172 }
142 173
143 arb->gpc2clk_f_numpoints = MAX_F_POINTS;
144 arb->mclk_f_numpoints = MAX_F_POINTS;
145
146 arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
147 if (!arb->gpc2clk_f_points) {
148 err = -ENOMEM;
149 goto init_fail;
150 }
151
152 arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); 174 arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
153 if (!arb->mclk_f_points) { 175 if (!arb->mclk_f_points) {
154 err = -ENOMEM; 176 err = -ENOMEM;
155 goto init_fail; 177 goto init_fail;
156 } 178 }
157 179
158 arb->gpc2clk_vf_points = kcalloc(MAX_F_POINTS, 180 arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
159 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); 181 if (!arb->gpc2clk_f_points) {
160 if (!arb->gpc2clk_vf_points) {
161 err = -ENOMEM; 182 err = -ENOMEM;
162 goto init_fail; 183 goto init_fail;
163 } 184 }
164 185
165 arb->mclk_vf_points = kcalloc(MAX_F_POINTS, 186 for (index = 0; index < 2; index++) {
166 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); 187 table = &arb->vf_table_pool[index];
167 if (!arb->mclk_vf_points) { 188 table->gpc2clk_num_points = MAX_F_POINTS;
168 err = -ENOMEM; 189 table->mclk_num_points = MAX_F_POINTS;
169 goto init_fail; 190
191 table->gpc2clk_points = kcalloc(MAX_F_POINTS,
192 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
193 if (!table->gpc2clk_points) {
194 err = -ENOMEM;
195 goto init_fail;
196 }
197
198
199 table->mclk_points = kcalloc(MAX_F_POINTS,
200 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
201 if (!table->mclk_points) {
202 err = -ENOMEM;
203 goto init_fail;
204 }
170 } 205 }
171 206
172 g->clk_arb = arb; 207 g->clk_arb = arb;
@@ -174,9 +209,6 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
174 209
175 spin_lock_init(&arb->sessions_lock); 210 spin_lock_init(&arb->sessions_lock);
176 spin_lock_init(&arb->users_lock); 211 spin_lock_init(&arb->users_lock);
177 spin_lock_init(&arb->req_lock);
178 spin_lock_init(&arb->data_lock);
179 spin_lock_init(&arb->vf_lock);
180 212
181 err = g->ops.clk_arb.get_arbiter_clk_default(g, 213 err = g->ops.clk_arb.get_arbiter_clk_default(g,
182 NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz); 214 NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz);
@@ -196,39 +228,58 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
196 228
197 arb->gpc2clk_default_mhz = default_mhz; 229 arb->gpc2clk_default_mhz = default_mhz;
198 230
199 INIT_LIST_HEAD(&arb->users); 231 arb->actual = &arb->actual_pool[0];
200 INIT_LIST_HEAD(&arb->sessions); 232
201 INIT_LIST_HEAD(&arb->requests); 233 atomic_set(&arb->req_nr, 0);
202 234
203 init_waitqueue_head(&arb->vftable_wq); 235 INIT_LIST_HEAD_RCU(&arb->users);
236 INIT_LIST_HEAD_RCU(&arb->sessions);
237 init_llist_head(&arb->requests);
204 238
205 INIT_WORK(&arb->vftable_fn_work, nvgpu_clk_arb_run_vftable_cb); 239 init_waitqueue_head(&arb->request_wq);
240 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
241 "vf_table_update");
242 arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
243 "arbiter_update");
244
245
246 INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
206 247
207 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); 248 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
208 249
209#ifdef CONFIG_DEBUG_FS 250#ifdef CONFIG_DEBUG_FS
210 mutex_init(&arb->debug_lock); 251 arb->debug = &arb->debug_pool[0];
252
211 if (!arb->debugfs_set) { 253 if (!arb->debugfs_set) {
212 if (nvgpu_clk_arb_debugfs_init(g)) 254 if (nvgpu_clk_arb_debugfs_init(g))
213 arb->debugfs_set = true; 255 arb->debugfs_set = true;
214 } 256 }
215#endif 257#endif
216 err = nvgpu_clk_arb_update_vftable(arb); 258 err = clk_vf_point_cache(g);
217 if (err < 0) 259 if (err < 0)
218 goto init_fail; 260 goto init_fail;
219 261
220 /* Schedule first run */ 262 err = nvgpu_clk_arb_update_vf_table(arb);
221 schedule_work(&arb->update_fn_work); 263 if (err < 0)
264 goto init_fail;
265 do {
266 /* Check that first run is completed */
267 smp_mb();
268 wait_event_interruptible(arb->request_wq,
269 atomic_read(&arb->req_nr));
270 } while (!atomic_read(&arb->req_nr));
222 271
223 return 0; 272 return 0;
224 273
225init_fail: 274init_fail:
226 275
227 kfree(arb->gpc2clk_f_points); 276 kfree(arb->gpc2clk_f_points);
228 kfree(arb->gpc2clk_vf_points);
229
230 kfree(arb->mclk_f_points); 277 kfree(arb->mclk_f_points);
231 kfree(arb->mclk_vf_points); 278
279 for (index = 0; index < 2; index++) {
280 kfree(arb->vf_table_pool[index].gpc2clk_points);
281 kfree(arb->vf_table_pool[index].mclk_points);
282 }
232 283
233 kfree(arb); 284 kfree(arb);
234 285
@@ -275,6 +326,8 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g,
275 atomic_set(&dev->poll_mask, 0); 326 atomic_set(&dev->poll_mask, 0);
276 327
277 dev->session = session; 328 dev->session = session;
329 kref_init(&dev->refcount);
330
278 kref_get(&session->refcount); 331 kref_get(&session->refcount);
279 332
280 *_dev = dev; 333 *_dev = dev;
@@ -305,15 +358,15 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
305 session->g = g; 358 session->g = g;
306 359
307 kref_init(&session->refcount); 360 kref_init(&session->refcount);
308 spin_lock_init(&session->target_lock);
309 361
310 session->zombie = false; 362 session->zombie = false;
311 session->mclk_target_mhz = arb->mclk_default_mhz; 363 session->target = &session->target_pool[0];
312 session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz; 364 session->target->mclk = arb->mclk_default_mhz;
313 INIT_LIST_HEAD(&session->targets); 365 session->target->gpc2clk = arb->gpc2clk_default_mhz;
366 init_llist_head(&session->targets);
314 367
315 spin_lock(&arb->sessions_lock); 368 spin_lock(&arb->sessions_lock);
316 list_add_tail(&session->link, &arb->sessions); 369 list_add_tail_rcu(&session->link, &arb->sessions);
317 spin_unlock(&arb->sessions_lock); 370 spin_unlock(&arb->sessions_lock);
318 371
319 *_session = session; 372 *_session = session;
@@ -321,19 +374,34 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
321 return 0; 374 return 0;
322} 375}
323 376
324void nvgpu_clk_arb_free_session(struct kref *refcount) 377static void nvgpu_clk_arb_free_fd(struct kref *refcount)
378{
379 struct nvgpu_clk_dev *dev = container_of(refcount,
380 struct nvgpu_clk_dev, refcount);
381
382 kfree(dev);
383}
384
385static void nvgpu_clk_arb_free_session(struct kref *refcount)
325{ 386{
326 struct nvgpu_clk_session *session = container_of(refcount, 387 struct nvgpu_clk_session *session = container_of(refcount,
327 struct nvgpu_clk_session, refcount); 388 struct nvgpu_clk_session, refcount);
328 struct nvgpu_clk_arb *arb = session->g->clk_arb; 389 struct nvgpu_clk_arb *arb = session->g->clk_arb;
390 struct nvgpu_clk_dev *dev, *tmp;
391 struct llist_node *head;
329 392
330 gk20a_dbg_fn(""); 393 gk20a_dbg_fn("");
331 394
332 spin_lock(&arb->sessions_lock); 395 spin_lock(&arb->sessions_lock);
333 list_del(&session->link); 396 list_del_rcu(&session->link);
334 spin_unlock(&arb->sessions_lock); 397 spin_unlock(&arb->sessions_lock);
398
399 head = llist_del_all(&session->targets);
400 llist_for_each_entry_safe(dev, tmp, head, node) {
401 kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
402 }
403 synchronize_rcu();
335 kfree(session); 404 kfree(session);
336;
337} 405}
338 406
339void nvgpu_clk_arb_release_session(struct gk20a *g, 407void nvgpu_clk_arb_release_session(struct gk20a *g,
@@ -346,7 +414,7 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
346 session->zombie = true; 414 session->zombie = true;
347 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 415 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
348 416
349 schedule_work(&arb->update_fn_work); 417 queue_work(arb->update_work_queue, &arb->update_fn_work);
350} 418}
351 419
352int nvgpu_clk_arb_install_event_fd(struct gk20a *g, 420int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
@@ -363,7 +431,7 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
363 return fd; 431 return fd;
364 432
365 spin_lock(&arb->users_lock); 433 spin_lock(&arb->users_lock);
366 list_add_tail(&dev->link, &arb->users); 434 list_add_tail_rcu(&dev->link, &arb->users);
367 spin_unlock(&arb->users_lock); 435 spin_unlock(&arb->users_lock);
368 436
369 *event_fd = fd; 437 *event_fd = fd;
@@ -388,121 +456,159 @@ int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
388 return 0; 456 return 0;
389} 457}
390 458
391static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *arb) 459static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
392{ 460{
393 struct gk20a *g = arb->g; 461 struct gk20a *g = arb->g;
462 struct nvgpu_clk_vf_table *table;
394 463
395 int i; 464 int i, j;
396 int status = 0; 465 int status = 0;
397 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; 466 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
398 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; 467 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
468 u16 gpc2clk_min, gpc2clk_max, clk_cur;
469 u16 mclk_min, mclk_max;
470 u32 num_points;
471
472 table = ACCESS_ONCE(arb->current_vf_table);
473 /* make flag visible when all data has resolved in the tables */
474 smp_rmb();
399 475
400 /* the flag must be visible in all threads */ 476 table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
401 mb(); 477 &arb->vf_table_pool[0];
402 ACCESS_ONCE(arb->vftable_set) = false;
403 478
404 spin_lock(&arb->vf_lock); 479 /* Get allowed memory ranges */
480 if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
481 &gpc2clk_min, &gpc2clk_max) < 0)
482 goto exit_vf_table;
483 if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_MCLK,
484 &mclk_min, &mclk_max) < 0)
485 goto exit_vf_table;
405 486
406 if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK, 487 if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
407 &arb->gpc2clk_f_numpoints, arb->gpc2clk_f_points) < 0) { 488 &table->gpc2clk_num_points, arb->gpc2clk_f_points) < 0) {
408 gk20a_err(dev_from_gk20a(g), 489 gk20a_err(dev_from_gk20a(g),
409 "failed to fetch GPC2CLK frequency points"); 490 "failed to fetch GPC2CLK frequency points");
410 goto exit_vftable; 491 goto exit_vf_table;
411 } 492 }
412 if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK, 493 if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK,
413 &arb->mclk_f_numpoints, arb->mclk_f_points) < 0) { 494 &table->mclk_num_points, arb->mclk_f_points) < 0) {
414 gk20a_err(dev_from_gk20a(g), 495 gk20a_err(dev_from_gk20a(g),
415 "failed to fetch MCLK frequency points"); 496 "failed to fetch MCLK frequency points");
416 goto exit_vftable; 497 goto exit_vf_table;
417 } 498 }
418 499
500 memset(table->mclk_points, 0,
501 table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point));
502 memset(table->gpc2clk_points, 0,
503 table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
504
505 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
506 i < table->mclk_num_points; i++) {
507 if ((arb->mclk_f_points[i] >= mclk_min) &&
508 (arb->mclk_f_points[i] <= mclk_max) &&
509 (arb->mclk_f_points[i] != clk_cur)) {
510
511 table->mclk_points[j].mhz = arb->mclk_f_points[i];
512 mclk_voltuv = mclk_voltuv_sram = 0;
513
514 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
515 &table->mclk_points[j].mhz, &mclk_voltuv,
516 CTRL_VOLT_DOMAIN_LOGIC);
517 if (status < 0) {
518 gk20a_err(dev_from_gk20a(g),
519 "failed to get MCLK LOGIC voltage");
520 goto exit_vf_table;
521 }
522 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
523 &table->mclk_points[j].mhz, &mclk_voltuv_sram,
524 CTRL_VOLT_DOMAIN_SRAM);
525 if (status < 0) {
526 gk20a_err(dev_from_gk20a(g),
527 "failed to get MCLK SRAM voltage");
528 goto exit_vf_table;
529 }
419 530
420 memset(arb->mclk_vf_points, 0, 531 table->mclk_points[j].uvolt = mclk_voltuv;
421 arb->mclk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); 532 table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
422 memset(arb->gpc2clk_vf_points, 0, 533 clk_cur = table->mclk_points[j].mhz;
423 arb->gpc2clk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); 534 j++;
424 535 num_points++;
425 for (i = 0 ; i < arb->mclk_f_numpoints; i++) {
426 arb->mclk_vf_points[i].mhz = arb->mclk_f_points[i];
427 mclk_voltuv = mclk_voltuv_sram = 0;
428
429 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
430 &arb->mclk_vf_points[i].mhz, &mclk_voltuv,
431 CTRL_VOLT_DOMAIN_LOGIC);
432 if (status < 0) {
433 gk20a_err(dev_from_gk20a(g),
434 "failed to get MCLK LOGIC voltage");
435 goto exit_vftable;
436 }
437 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
438 &arb->mclk_vf_points[i].mhz, &mclk_voltuv_sram,
439 CTRL_VOLT_DOMAIN_SRAM);
440 if (status < 0) {
441 gk20a_err(dev_from_gk20a(g),
442 "failed to get MCLK SRAM voltage");
443 goto exit_vftable;
444 } 536 }
445
446 arb->mclk_vf_points[i].uvolt = mclk_voltuv;
447 arb->mclk_vf_points[i].uvolt_sram = mclk_voltuv_sram;
448 } 537 }
538 table->mclk_num_points = num_points;
539
540 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
541 i < table->gpc2clk_num_points; i++) {
542 if ((arb->gpc2clk_f_points[i] >= gpc2clk_min) &&
543 (arb->gpc2clk_f_points[i] <= gpc2clk_max) &&
544 (arb->gpc2clk_f_points[i] != clk_cur)) {
545
546 table->gpc2clk_points[j].mhz = arb->gpc2clk_f_points[i];
547 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
548
549 status = clk_domain_get_f_or_v(g,
550 CTRL_CLK_DOMAIN_GPC2CLK,
551 &table->gpc2clk_points[j].mhz, &gpc2clk_voltuv,
552 CTRL_VOLT_DOMAIN_LOGIC);
553 if (status < 0) {
554 gk20a_err(dev_from_gk20a(g),
555 "failed to get GPC2CLK LOGIC voltage");
556 goto exit_vf_table;
557 }
449 558
450 for (i = 0 ; i < arb->gpc2clk_f_numpoints; i++) { 559 status = clk_domain_get_f_or_v(g,
451 arb->gpc2clk_vf_points[i].mhz = arb->gpc2clk_f_points[i]; 560 CTRL_CLK_DOMAIN_GPC2CLK,
452 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; 561 &table->gpc2clk_points[j].mhz,
453 562 &gpc2clk_voltuv_sram,
454 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, 563 CTRL_VOLT_DOMAIN_SRAM);
455 &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv, 564 if (status < 0) {
456 CTRL_VOLT_DOMAIN_LOGIC); 565 gk20a_err(dev_from_gk20a(g),
457 if (status < 0) { 566 "failed to get GPC2CLK SRAM voltage");
458 gk20a_err(dev_from_gk20a(g), 567 goto exit_vf_table;
459 "failed to get GPC2CLK LOGIC voltage"); 568 }
460 goto exit_vftable;
461 }
462 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
463 &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv_sram,
464 CTRL_VOLT_DOMAIN_SRAM);
465 if (status < 0) {
466 gk20a_err(dev_from_gk20a(g),
467 "failed to get GPC2CLK SRAM voltage");
468 goto exit_vftable;
469 }
470
471 arb->gpc2clk_vf_points[i].uvolt = gpc2clk_voltuv;
472 arb->gpc2clk_vf_points[i].uvolt_sram = gpc2clk_voltuv_sram;
473 569
570 table->gpc2clk_points[j].uvolt = gpc2clk_voltuv;
571 table->gpc2clk_points[j].uvolt_sram =
572 gpc2clk_voltuv_sram;
573 clk_cur = table->gpc2clk_points[j].mhz;
574 j++;
575 num_points++;
576 }
474 } 577 }
578 table->gpc2clk_num_points = num_points;
475 579
476 /* make flag visible when all data has resolved in the tables */ 580 /* make table visible when all data has resolved in the tables */
477 wmb(); 581 smp_wmb();
478 ACCESS_ONCE(arb->vftable_set) = true; 582 xchg(&arb->current_vf_table, table);
479
480 wake_up(&arb->vftable_wq);
481exit_vftable:
482 583
483 spin_unlock(&arb->vf_lock); 584 queue_work(arb->update_work_queue, &arb->update_fn_work);
585exit_vf_table:
484 586
485 return status; 587 return status;
486} 588}
487 589
488void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g) 590void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
489{ 591{
490 struct nvgpu_clk_arb *arb = g->clk_arb; 592 struct nvgpu_clk_arb *arb = g->clk_arb;
491 593
492 ACCESS_ONCE(arb->vftable_set) = false; 594 queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
493 /* Disable the flag in case arbiter gets scheduled first */
494 mb();
495
496 schedule_work(&arb->vftable_fn_work);
497 schedule_work(&arb->update_fn_work);
498} 595}
499 596
500static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work) 597static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
501{ 598{
502 struct nvgpu_clk_arb *arb = 599 struct nvgpu_clk_arb *arb =
503 container_of(work, struct nvgpu_clk_arb, update_fn_work); 600 container_of(work, struct nvgpu_clk_arb, vf_table_fn_work);
601 struct gk20a *g = arb->g;
602 u32 err;
504 603
505 nvgpu_clk_arb_update_vftable(arb); 604 /* get latest vf curve from pmu */
605 err = clk_vf_point_cache(g);
606 if (err) {
607 gk20a_err(dev_from_gk20a(g),
608 "failed to get GPC2CLK SRAM voltage");
609 return;
610 }
611 nvgpu_clk_arb_update_vf_table(arb);
506} 612}
507 613
508static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) 614static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
@@ -512,24 +618,22 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
512 struct nvgpu_clk_session *session; 618 struct nvgpu_clk_session *session;
513 struct nvgpu_clk_dev *dev; 619 struct nvgpu_clk_dev *dev;
514 struct nvgpu_clk_dev *tmp; 620 struct nvgpu_clk_dev *tmp;
621 struct nvgpu_clk_arb_target *target, *actual;
515 struct gk20a *g = arb->g; 622 struct gk20a *g = arb->g;
516 623 struct llist_node *head;
517 struct change_fll_clk fllclk;
518 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
519 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
520 624
521 u32 voltuv, voltuv_sram; 625 u32 voltuv, voltuv_sram;
626 bool mclk_set, gpc2clk_set;
522 627
523 int status; 628 int status;
524 629
525 /* Temporary variables for checking target frequency */ 630 /* Temporary variables for checking target frequency */
526 u16 gpc2clk_target, mclk_target; 631 u16 gpc2clk_target, mclk_target;
527 632
528 /* iteration index */
529 u32 index;
530
531#ifdef CONFIG_DEBUG_FS 633#ifdef CONFIG_DEBUG_FS
532 u64 t0, t1; 634 u64 t0, t1;
635 struct nvgpu_clk_arb_debug *debug;
636
533#endif 637#endif
534 638
535 gk20a_dbg_fn(""); 639 gk20a_dbg_fn("");
@@ -542,37 +646,61 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
542 gpc2clk_target = 0; 646 gpc2clk_target = 0;
543 mclk_target = 0; 647 mclk_target = 0;
544 648
545 spin_lock(&arb->sessions_lock); 649 rcu_read_lock();
546 list_for_each_entry(session, &arb->sessions, link) { 650 list_for_each_entry_rcu(session, &arb->sessions, link) {
547 if (!session->zombie) { 651 if (!session->zombie) {
548 spin_lock(&arb->req_lock); 652 mclk_set = false;
549 spin_lock(&session->target_lock); 653 gpc2clk_set = false;
654 target = ACCESS_ONCE(session->target) ==
655 &session->target_pool[0] ?
656 &session->target_pool[1] :
657 &session->target_pool[0];
658 /* Do not reorder pointer */
659 smp_rmb();
660 head = llist_del_all(&session->targets);
661 if (head) {
662
663 /* Copy over state */
664 target->mclk = session->target->mclk;
665 target->gpc2clk = session->target->gpc2clk;
666 /* Query the latest committed request */
667 llist_for_each_entry_safe(dev, tmp, head,
668 node) {
669 if (!mclk_set && dev->mclk_target_mhz) {
670 target->mclk =
671 dev->mclk_target_mhz;
672 mclk_set = true;
673 }
674 if (!gpc2clk_set &&
675 dev->gpc2clk_target_mhz) {
676 target->gpc2clk =
677 dev->gpc2clk_target_mhz;
678 gpc2clk_set = true;
679 }
680 kref_get(&dev->refcount);
681 llist_add(&dev->node, &arb->requests);
682 }
683 /* Ensure target is updated before ptr sawp */
684 smp_wmb();
685 xchg(&session->target, target);
686 }
550 687
551 mclk_target = mclk_target > session->mclk_target_mhz ? 688 mclk_target = mclk_target > session->target->mclk ?
552 mclk_target : session->mclk_target_mhz; 689 mclk_target : session->target->mclk;
553 690
554 gpc2clk_target = 691 gpc2clk_target =
555 gpc2clk_target > session->gpc2clk_target_mhz ? 692 gpc2clk_target > session->target->gpc2clk ?
556 gpc2clk_target : session->gpc2clk_target_mhz; 693 gpc2clk_target : session->target->gpc2clk;
557 /* Move processed requests to notification list*/
558 list_for_each_entry_safe(dev, tmp, &session->targets,
559 link) {
560 list_del_init(&dev->link);
561 list_add_tail(&dev->link, &arb->requests);
562 }
563 spin_unlock(&session->target_lock);
564 spin_unlock(&arb->req_lock);
565
566 } 694 }
567 } 695 }
568 spin_unlock(&arb->sessions_lock); 696 rcu_read_unlock();
569 697
570 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : 698 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
571 arb->gpc2clk_actual_mhz ? gpc2clk_target : 699 arb->actual->gpc2clk ? gpc2clk_target :
572 arb->gpc2clk_default_mhz; 700 arb->gpc2clk_default_mhz;
573 701
574 mclk_target = (mclk_target > 0) ? mclk_target : 702 mclk_target = (mclk_target > 0) ? mclk_target :
575 arb->mclk_actual_mhz ? mclk_target : 703 arb->actual->mclk ? mclk_target :
576 arb->mclk_default_mhz; 704 arb->mclk_default_mhz;
577 705
578 if (!gpc2clk_target && !mclk_target) { 706 if (!gpc2clk_target && !mclk_target) {
@@ -581,163 +709,100 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
581 } 709 }
582 710
583 if (!gpc2clk_target) 711 if (!gpc2clk_target)
584 gpc2clk_target = arb->gpc2clk_actual_mhz; 712 gpc2clk_target = arb->actual->mclk;
585 713
586 do { 714 if (!mclk_target)
587 /* Check that the table is set */ 715 mclk_target = arb->actual->mclk;
588 mb();
589 wait_event(arb->vftable_wq, arb->vftable_set);
590 } while (!ACCESS_ONCE(arb->vftable_set));
591
592 spin_lock(&arb->vf_lock);
593 /* round up the freq requests */
594 for (index = 0; index < arb->gpc2clk_f_numpoints; index++) {
595 if (arb->gpc2clk_vf_points[index].mhz >= gpc2clk_target) {
596 gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
597 gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
598 gpc2clk_voltuv_sram =
599 arb->gpc2clk_vf_points[index].uvolt_sram;
600 break;
601 }
602 }
603 716
604 if (index == arb->gpc2clk_f_numpoints) {
605 gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
606 gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
607 gpc2clk_voltuv_sram =
608 arb->gpc2clk_vf_points[index].uvolt_sram;
609 }
610 717
611 if (!mclk_target) 718 /* Query the table for the closest vf point to program */
612 mclk_target = arb->mclk_actual_mhz; 719 nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, &mclk_target, &voltuv,
720 &voltuv_sram);
613 721
614 for (index = 0; index < arb->mclk_f_numpoints; index++) { 722 if ((arb->actual->gpc2clk == gpc2clk_target) &&
615 if (arb->mclk_vf_points[index].mhz >= mclk_target) { 723 (arb->actual->mclk == mclk_target) &&
616 mclk_target = arb->mclk_vf_points[index].mhz; 724 (arb->voltuv_actual == voltuv)) {
617 mclk_voltuv = arb->mclk_vf_points[index].uvolt; 725 goto exit_arb;
618 mclk_voltuv_sram =
619 arb->mclk_vf_points[index].uvolt_sram;
620 break;
621 }
622 }
623 if (index == arb->mclk_f_numpoints) {
624 mclk_target = arb->mclk_vf_points[index].mhz;
625 mclk_voltuv = arb->mclk_vf_points[index].uvolt;
626 mclk_voltuv_sram =
627 arb->mclk_vf_points[index].uvolt_sram;
628 } 726 }
629 spin_unlock(&arb->vf_lock);
630 727
631 /* Program clocks */ 728 /* Program clocks */
632 /* A change in both mclk of gpc2clk may require a change in voltage */ 729 /* A change in both mclk of gpc2clk may require a change in voltage */
633 if ((arb->gpc2clk_actual_mhz == gpc2clk_target) &&
634 (arb->mclk_actual_mhz == mclk_target)) {
635 goto exit_arb;
636 }
637
638 voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
639 voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
640 gpc2clk_voltuv_sram : mclk_voltuv_sram;
641 730
642 /* if voltage ascends we do: 731 status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, mclk_target,
643 * (1) FLL change 732 voltuv, voltuv_sram);
644 * (2) Voltage change
645 * (3) MCLK change
646 * If it goes down
647 * (1) MCLK change
648 * (2) Voltage change
649 * (3) FLL change
650 */
651 733
652 /* descending */ 734 if (status < 0)
653 if (voltuv <= arb->voltuv_actual) { 735 goto exit_arb;
654 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
655 if (status < 0)
656 goto exit_arb;
657 736
658 status = volt_set_voltage(g, voltuv, voltuv_sram); 737 actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
659 if (status < 0) 738 &arb->actual_pool[1] : &arb->actual_pool[0];
660 goto exit_arb;
661 739
662 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; 740 /* do not reorder this pointer */
663 fllclk.clkmhz = gpc2clk_target; 741 smp_rmb();
664 fllclk.voltuv = voltuv; 742 actual->gpc2clk = gpc2clk_target;
665 status = clk_program_fll_clks(g, &fllclk); 743 actual->mclk = mclk_target;
666 if (status < 0) 744 arb->voltuv_actual = voltuv;
667 goto exit_arb;
668 } else {
669 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
670 fllclk.clkmhz = gpc2clk_target;
671 fllclk.voltuv = voltuv;
672 status = clk_program_fll_clks(g, &fllclk);
673 if (status < 0)
674 goto exit_arb;
675 745
676 status = volt_set_voltage(g, voltuv, voltuv_sram); 746 /* Make changes visible to other threads */
677 if (status < 0) 747 smp_wmb();
678 goto exit_arb; 748 xchg(&arb->actual, actual);
679 749
680 status = g->clk_pmu.clk_mclk.change(g, mclk_target); 750 atomic_inc(&arb->req_nr);
681 if (status < 0)
682 goto exit_arb;
683 }
684 751
685 spin_lock(&arb->data_lock); 752 wake_up_interruptible(&arb->request_wq);
686 arb->gpc2clk_actual_mhz = gpc2clk_target;
687 arb->mclk_actual_mhz = mclk_target;
688 arb->voltuv_actual = voltuv;
689 /* Make changes visible to other threads */
690 wmb();
691 753
692 spin_unlock(&arb->data_lock);
693 754
694#ifdef CONFIG_DEBUG_FS 755#ifdef CONFIG_DEBUG_FS
695 g->ops.read_ptimer(g, &t1); 756 g->ops.read_ptimer(g, &t1);
696 arb->switch_num++;
697 757
698 mutex_lock(&arb->debug_lock); 758 debug = arb->debug == &arb->debug_pool[0] ?
699 if (arb->switch_num == 1) { 759 &arb->debug_pool[1] : &arb->debug_pool[0];
700 arb->switch_max = arb->switch_min = 760
701 arb->switch_avg = (t1-t0)/1000; 761 memcpy(debug, arb->debug, sizeof(arb->debug_pool[0]));
702 arb->switch_std = 0; 762 debug->switch_num++;
763
764 if (debug->switch_num == 1) {
765 debug->switch_max = debug->switch_min =
766 debug->switch_avg = (t1-t0)/1000;
767 debug->switch_std = 0;
703 } else { 768 } else {
704 s64 prev_avg; 769 s64 prev_avg;
705 u64 curr = (t1-t0)/1000; 770 u64 curr = (t1-t0)/1000;
706 771
707 arb->switch_max = curr > arb->switch_max ? 772 debug->switch_max = curr > debug->switch_max ?
708 curr : arb->switch_max; 773 curr : debug->switch_max;
709 arb->switch_min = arb->switch_min ? 774 debug->switch_min = debug->switch_min ?
710 (curr < arb->switch_min ? 775 (curr < debug->switch_min ?
711 curr : arb->switch_min) : curr; 776 curr : debug->switch_min) : curr;
712 prev_avg = arb->switch_avg; 777 prev_avg = debug->switch_avg;
713 arb->switch_avg = (curr + 778 debug->switch_avg = (curr +
714 (arb->switch_avg * (arb->switch_num-1))) / 779 (debug->switch_avg * (debug->switch_num-1))) /
715 arb->switch_num; 780 debug->switch_num;
716 arb->switch_std += 781 debug->switch_std +=
717 (curr - arb->switch_avg) * (curr - prev_avg); 782 (curr - debug->switch_avg) * (curr - prev_avg);
718 } 783 }
719 mutex_unlock(&arb->debug_lock); 784 /* commit changes before exchanging debug pointer */
720 785 smp_wmb();
786 xchg(&arb->debug, debug);
721#endif 787#endif
722 788
723exit_arb: 789exit_arb:
724 790
725 spin_lock(&arb->req_lock);
726 /* notify completion for all requests */ 791 /* notify completion for all requests */
727 list_for_each_entry_safe(dev, tmp, &arb->requests, link) { 792 head = llist_del_all(&arb->requests);
793 llist_for_each_entry_safe(dev, tmp, head, node) {
728 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); 794 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
729 wake_up_interruptible(&dev->readout_wq); 795 wake_up_interruptible(&dev->readout_wq);
730 list_del_init(&dev->link); 796 kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
731 } 797 }
732 spin_unlock(&arb->req_lock);
733 798
734 /* notify event for all users */ 799 /* notify event for all users */
735 spin_lock(&arb->users_lock); 800 rcu_read_lock();
736 list_for_each_entry(dev, &arb->users, link) { 801 list_for_each_entry_rcu(dev, &arb->users, link) {
737 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); 802 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
738 wake_up_interruptible(&dev->readout_wq); 803 wake_up_interruptible(&dev->readout_wq);
739 } 804 }
740 spin_unlock(&arb->users_lock); 805 rcu_read_unlock();
741} 806}
742 807
743int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, 808int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
@@ -761,17 +826,10 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
761 err = -EINVAL; 826 err = -EINVAL;
762 goto fdput_fd; 827 goto fdput_fd;
763 } 828 }
764 spin_lock(&session->target_lock); 829 kref_get(&dev->refcount);
765 session->mclk_target_mhz = dev->mclk_target_mhz ? dev->mclk_target_mhz : 830 llist_add(&dev->node, &session->targets);
766 session->mclk_target_mhz;
767 session->gpc2clk_target_mhz = dev->gpc2clk_target_mhz ?
768 dev->gpc2clk_target_mhz :
769 session->gpc2clk_target_mhz;
770
771 list_add_tail(&dev->link, &session->targets);
772 spin_unlock(&session->target_lock);
773 831
774 schedule_work(&arb->update_fn_work); 832 queue_work(arb->update_work_queue, &arb->update_fn_work);
775 833
776fdput_fd: 834fdput_fd:
777 fdput(fd); 835 fdput(fd);
@@ -799,15 +857,8 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
799 857
800 gk20a_dbg_fn(""); 858 gk20a_dbg_fn("");
801 859
802 spin_lock(&arb->req_lock);
803 spin_lock(&session->target_lock);
804 if (!list_empty(&dev->link))
805 list_del_init(&dev->link);
806 spin_unlock(&session->target_lock);
807 spin_unlock(&arb->req_lock);
808
809 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 860 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
810 kfree(dev); 861 kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
811 862
812 return 0; 863 return 0;
813} 864}
@@ -824,10 +875,11 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
824 gk20a_dbg_fn(""); 875 gk20a_dbg_fn("");
825 876
826 spin_lock(&arb->users_lock); 877 spin_lock(&arb->users_lock);
827 list_del(&dev->link); 878 list_del_rcu(&dev->link);
828 spin_unlock(&arb->users_lock); 879 spin_unlock(&arb->users_lock);
829 880
830 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 881 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
882 synchronize_rcu();
831 kfree(dev); 883 kfree(dev);
832 884
833 return 0; 885 return 0;
@@ -875,24 +927,27 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
875 u32 api_domain, u16 *freq_mhz) 927 u32 api_domain, u16 *freq_mhz)
876{ 928{
877 int err = 0; 929 int err = 0;
930 struct nvgpu_clk_arb_target *target;
878 931
879 spin_lock(&session->target_lock); 932 do {
880 933 target = ACCESS_ONCE(session->target);
881 switch (api_domain) { 934 /* no reordering of this pointer */
882 case NVGPU_GPU_CLK_DOMAIN_MCLK: 935 smp_rmb();
883 *freq_mhz = session->mclk_target_mhz;
884 break;
885 936
886 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 937 switch (api_domain) {
887 *freq_mhz = session->gpc2clk_target_mhz; 938 case NVGPU_GPU_CLK_DOMAIN_MCLK:
888 break; 939 *freq_mhz = target->mclk;
940 break;
889 941
890 default: 942 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
891 *freq_mhz = 0; 943 *freq_mhz = target->gpc2clk;
892 err = -EINVAL; 944 break;
893 }
894 945
895 spin_unlock(&session->target_lock); 946 default:
947 *freq_mhz = 0;
948 err = -EINVAL;
949 }
950 } while (target != ACCESS_ONCE(session->target));
896 return err; 951 return err;
897} 952}
898 953
@@ -901,24 +956,27 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
901{ 956{
902 struct nvgpu_clk_arb *arb = g->clk_arb; 957 struct nvgpu_clk_arb *arb = g->clk_arb;
903 int err = 0; 958 int err = 0;
959 struct nvgpu_clk_arb_target *actual;
904 960
905 spin_lock(&arb->data_lock); 961 do {
906 962 actual = ACCESS_ONCE(arb->actual);
907 switch (api_domain) { 963 /* no reordering of this pointer */
908 case NVGPU_GPU_CLK_DOMAIN_MCLK: 964 smp_rmb();
909 *freq_mhz = arb->mclk_actual_mhz;
910 break;
911 965
912 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 966 switch (api_domain) {
913 *freq_mhz = arb->gpc2clk_actual_mhz; 967 case NVGPU_GPU_CLK_DOMAIN_MCLK:
914 break; 968 *freq_mhz = actual->mclk;
969 break;
915 970
916 default: 971 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
917 *freq_mhz = 0; 972 *freq_mhz = actual->gpc2clk;
918 err = -EINVAL; 973 break;
919 }
920 974
921 spin_unlock(&arb->data_lock); 975 default:
976 *freq_mhz = 0;
977 err = -EINVAL;
978 }
979 } while (actual != ACCESS_ONCE(arb->actual));
922 return err; 980 return err;
923} 981}
924 982
@@ -948,22 +1006,163 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
948 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); 1006 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
949} 1007}
950 1008
1009static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1010 u16 *gpc2clk, u16 *mclk, u32 *voltuv,
1011 u32 *voltuv_sram)
1012{
1013 u16 gpc2clk_target, mclk_target;
1014 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
1015 u32 mclk_voltuv, mclk_voltuv_sram;
1016 struct nvgpu_clk_vf_table *table;
1017 int index;
1018
1019 gpc2clk_target = *gpc2clk;
1020 mclk_target = *mclk;
1021 gpc2clk_voltuv = 0;
1022 gpc2clk_voltuv_sram = 0;
1023 mclk_voltuv = 0;
1024 mclk_voltuv_sram = 0;
1025
1026 do {
1027 table = ACCESS_ONCE(arb->current_vf_table);
1028 /* pointer to table can be updated by callback */
1029 smp_rmb();
1030
1031 if (!table)
1032 continue;
1033 /* round up the freq requests */
1034 for (index = 0; index < table->gpc2clk_num_points; index++) {
1035 if (table->gpc2clk_points[index].mhz >=
1036 gpc2clk_target) {
1037 gpc2clk_target =
1038 table->gpc2clk_points[index].mhz;
1039 gpc2clk_voltuv =
1040 table->gpc2clk_points[index].uvolt;
1041 gpc2clk_voltuv_sram =
1042 table->gpc2clk_points[index].uvolt_sram;
1043 break;
1044 }
1045 }
1046
1047 if (index == table->gpc2clk_num_points) {
1048 gpc2clk_target = table->gpc2clk_points[index].mhz;
1049 gpc2clk_voltuv = table->gpc2clk_points[index].uvolt;
1050 gpc2clk_voltuv_sram =
1051 table->gpc2clk_points[index].uvolt_sram;
1052 }
1053
1054 for (index = 0; index < table->mclk_num_points; index++) {
1055 if (table->mclk_points[index].mhz >= mclk_target) {
1056 mclk_target = table->mclk_points[index].mhz;
1057 mclk_voltuv = table->mclk_points[index].uvolt;
1058 mclk_voltuv_sram =
1059 table->mclk_points[index].uvolt_sram;
1060 break;
1061 }
1062 }
1063 if (index == table->mclk_num_points) {
1064 mclk_target = table->mclk_points[index].mhz;
1065 mclk_voltuv = table->mclk_points[index].uvolt;
1066 mclk_voltuv_sram =
1067 table->mclk_points[index].uvolt_sram;
1068 }
1069 } while (!table ||
1070 (ACCESS_ONCE(arb->current_vf_table) != table));
1071
1072 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
1073 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
1074 gpc2clk_voltuv_sram : mclk_voltuv_sram;
1075
1076 *gpc2clk = gpc2clk_target;
1077 *mclk = mclk_target;
1078}
1079
1080static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1081 u16 mclk_target, u32 voltuv, u32 voltuv_sram)
1082{
1083 struct change_fll_clk fllclk;
1084 struct nvgpu_clk_arb *arb = g->clk_arb;
1085 int status;
1086
1087 /* if voltage ascends we do:
1088 * (1) FLL change
1089 * (2) Voltage change
1090 * (3) MCLK change
1091 * If it goes down
1092 * (1) MCLK change
1093 * (2) Voltage change
1094 * (3) FLL change
1095 */
1096
1097 /* descending */
1098 if (voltuv < arb->voltuv_actual) {
1099 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
1100 if (status < 0)
1101 return status;
1102
1103 status = volt_set_voltage(g, voltuv, voltuv_sram);
1104 if (status < 0)
1105 return status;
1106
1107 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
1108 fllclk.clkmhz = gpc2clk_target;
1109 fllclk.voltuv = voltuv;
1110 status = clk_program_fll_clks(g, &fllclk);
1111 if (status < 0)
1112 return status;
1113 } else if (voltuv > arb->voltuv_actual) {
1114 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
1115 fllclk.clkmhz = gpc2clk_target;
1116 fllclk.voltuv = voltuv;
1117 status = clk_program_fll_clks(g, &fllclk);
1118 if (status < 0)
1119 return status;
1120
1121 status = volt_set_voltage(g, voltuv, voltuv_sram);
1122 if (status < 0)
1123 return status;
1124
1125 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
1126 if (status < 0)
1127 return status;
1128 } else {
1129 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
1130 if (status < 0)
1131 return status;
1132
1133 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
1134 fllclk.clkmhz = gpc2clk_target;
1135 fllclk.voltuv = voltuv;
1136 status = clk_program_fll_clks(g, &fllclk);
1137 if (status < 0)
1138 return status;
1139
1140 }
1141
1142 return 0;
1143}
1144
951#ifdef CONFIG_DEBUG_FS 1145#ifdef CONFIG_DEBUG_FS
952static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) 1146static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
953{ 1147{
954 struct gk20a *g = s->private; 1148 struct gk20a *g = s->private;
955 struct nvgpu_clk_arb *arb = g->clk_arb; 1149 struct nvgpu_clk_arb *arb = g->clk_arb;
1150 struct nvgpu_clk_arb_debug *debug;
1151
956 u64 num; 1152 u64 num;
957 s64 tmp, avg, std, max, min; 1153 s64 tmp, avg, std, max, min;
958 1154
959 /* Make copy of structure to reduce time with lock held */ 1155 debug = ACCESS_ONCE(arb->debug);
960 mutex_lock(&arb->debug_lock); 1156 /* Make copy of structure and ensure no reordering */
961 std = arb->switch_std; 1157 smp_rmb();
962 avg = arb->switch_avg; 1158 if (!debug)
963 max = arb->switch_max; 1159 return -EINVAL;
964 min = arb->switch_min; 1160
965 num = arb->switch_num; 1161 std = debug->switch_std;
966 mutex_unlock(&arb->debug_lock); 1162 avg = debug->switch_avg;
1163 max = debug->switch_max;
1164 min = debug->switch_min;
1165 num = debug->switch_num;
967 1166
968 tmp = std; 1167 tmp = std;
969 do_div(tmp, num); 1168 do_div(tmp, num);
diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c
index f01b52ad..cf758023 100644
--- a/drivers/gpu/nvgpu/pstate/pstate.c
+++ b/drivers/gpu/nvgpu/pstate/pstate.c
@@ -149,10 +149,6 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g)
149 if (err) 149 if (err)
150 return err; 150 return err;
151 151
152 err = clk_vf_point_cache(g);
153 if (err)
154 return err;
155
156 err = pmgr_domain_pmu_setup(g); 152 err = pmgr_domain_pmu_setup(g);
157 return err; 153 return err;
158} 154}