summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/clk/clk_arb.c
diff options
context:
space:
mode:
authorDavid Nieto <dmartineznie@nvidia.com>2016-10-17 11:49:27 -0400
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:56:52 -0500
commit5ab254c6e84d741f56e9bcc93512f82eb7ce518c (patch)
tree9bdc712199fb3f52dd1dcccf020e8aac1d43132b /drivers/gpu/nvgpu/clk/clk_arb.c
parentc123d5056d3af043e2376fa0c5429db7deb3b31b (diff)
gpu: nvgpu: make clock arbiter lockless
Modification of the arbiter to remove all locking from the arbiter callback and move the callbacks out of the global queue Adding modification to call for updated values of vf table from arbiter init function and on vf table update JIRA: DNVGPU-170 Change-Id: I7a7d34c0590522901e06356c06fd8114ebf10f37 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1243212 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-on: http://git-master/r/1268009 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/clk/clk_arb.c')
-rw-r--r--drivers/gpu/nvgpu/clk/clk_arb.c893
1 files changed, 546 insertions, 347 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 1f7c2aea..aea32cb8 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -19,7 +19,8 @@
19#include <linux/nvgpu.h> 19#include <linux/nvgpu.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22 22#include <linux/rculist.h>
23#include <linux/llist.h>
23#include "clk/clk_arb.h" 24#include "clk/clk_arb.h"
24 25
25 26
@@ -36,8 +37,15 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
36static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); 37static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait);
37 38
38static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); 39static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
39static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work); 40static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
40static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *); 41static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
42static void nvgpu_clk_arb_free_fd(struct kref *refcount);
43static void nvgpu_clk_arb_free_session(struct kref *refcount);
44static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk,
45 u16 mclk, u32 voltuv, u32 voltuv_sram);
46static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
47 u16 *gpc2clk_target, u16 *mclk_target, u32 *voltuv,
48 u32 *voltuv_sram);
41 49
42struct nvgpu_clk_vf_point { 50struct nvgpu_clk_vf_point {
43 u16 mhz; 51 u16 mhz;
@@ -45,58 +53,80 @@ struct nvgpu_clk_vf_point {
45 u32 uvolt_sram; 53 u32 uvolt_sram;
46}; 54};
47 55
56struct nvgpu_clk_vf_table {
57 u32 mclk_num_points;
58 struct nvgpu_clk_vf_point *mclk_points;
59 u32 gpc2clk_num_points;
60 struct nvgpu_clk_vf_point *gpc2clk_points;
61};
62#ifdef CONFIG_DEBUG_FS
63struct nvgpu_clk_arb_debug {
64 s64 switch_max;
65 s64 switch_min;
66 u64 switch_num;
67 s64 switch_avg;
68 s64 switch_std;
69};
70#endif
71
72struct nvgpu_clk_arb_target {
73 u16 mclk;
74 u16 gpc2clk;
75};
76
48struct nvgpu_clk_arb { 77struct nvgpu_clk_arb {
49 spinlock_t sessions_lock; 78 spinlock_t sessions_lock;
50 spinlock_t users_lock; 79 spinlock_t users_lock;
51 spinlock_t req_lock;
52 80
53 struct list_head users; 81 struct list_head users;
54 struct list_head sessions; 82 struct list_head sessions;
55 struct list_head requests; 83 struct llist_head requests;
56 84
57 struct gk20a *g; 85 struct gk20a *g;
58 spinlock_t data_lock;
59 spinlock_t vf_lock;
60 86
61 u16 gpc2clk_actual_mhz; 87 struct nvgpu_clk_arb_target actual_pool[2];
62 u16 gpc2clk_default_mhz; 88 struct nvgpu_clk_arb_target *actual;
63 89
64 u16 mclk_actual_mhz; 90 u16 gpc2clk_default_mhz;
65 u16 mclk_default_mhz; 91 u16 mclk_default_mhz;
66 u32 voltuv_actual; 92 u32 voltuv_actual;
67 93
68 struct work_struct update_fn_work; 94 struct work_struct update_fn_work;
69 struct work_struct vftable_fn_work; 95 struct workqueue_struct *update_work_queue;
70 wait_queue_head_t vftable_wq; 96 struct work_struct vf_table_fn_work;
97 struct workqueue_struct *vf_table_work_queue;
98
99 wait_queue_head_t request_wq;
100
101 struct nvgpu_clk_vf_table *current_vf_table;
102 struct nvgpu_clk_vf_table vf_table_pool[2];
103 u32 vf_table_index;
71 104
72 u16 *mclk_f_points; 105 u16 *mclk_f_points;
73 bool vftable_set; 106 atomic_t req_nr;
74 107
75 struct nvgpu_clk_vf_point *mclk_vf_points;
76 u32 mclk_f_numpoints; 108 u32 mclk_f_numpoints;
77 u16 *gpc2clk_f_points; 109 u16 *gpc2clk_f_points;
78 u32 gpc2clk_f_numpoints; 110 u32 gpc2clk_f_numpoints;
79 struct nvgpu_clk_vf_point *gpc2clk_vf_points;
80 111
81#ifdef CONFIG_DEBUG_FS 112#ifdef CONFIG_DEBUG_FS
82 struct mutex debug_lock; 113 struct nvgpu_clk_arb_debug debug_pool[2];
83 s64 switch_max; 114 struct nvgpu_clk_arb_debug *debug;
84 s64 switch_min;
85 u64 switch_num;
86 s64 switch_avg;
87 s64 switch_std;
88 bool debugfs_set; 115 bool debugfs_set;
89#endif 116#endif
90}; 117};
91 118
92
93struct nvgpu_clk_dev { 119struct nvgpu_clk_dev {
94 struct nvgpu_clk_session *session; 120 struct nvgpu_clk_session *session;
95 struct list_head link; 121 union {
122 struct list_head link;
123 struct llist_node node;
124 };
96 wait_queue_head_t readout_wq; 125 wait_queue_head_t readout_wq;
97 atomic_t poll_mask; 126 atomic_t poll_mask;
98 u16 gpc2clk_target_mhz; 127 u16 gpc2clk_target_mhz;
99 u16 mclk_target_mhz; 128 u16 mclk_target_mhz;
129 struct kref refcount;
100}; 130};
101 131
102struct nvgpu_clk_session { 132struct nvgpu_clk_session {
@@ -104,11 +134,10 @@ struct nvgpu_clk_session {
104 struct gk20a *g; 134 struct gk20a *g;
105 struct kref refcount; 135 struct kref refcount;
106 struct list_head link; 136 struct list_head link;
107 struct list_head targets; 137 struct llist_head targets;
108 138
109 spinlock_t target_lock; 139 struct nvgpu_clk_arb_target target_pool[2];
110 u16 gpc2clk_target_mhz; 140 struct nvgpu_clk_arb_target *target;
111 u16 mclk_target_mhz;
112}; 141};
113 142
114static const struct file_operations completion_dev_ops = { 143static const struct file_operations completion_dev_ops = {
@@ -128,6 +157,8 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
128 struct nvgpu_clk_arb *arb; 157 struct nvgpu_clk_arb *arb;
129 u16 default_mhz; 158 u16 default_mhz;
130 int err; 159 int err;
160 int index;
161 struct nvgpu_clk_vf_table *table;
131 162
132 gk20a_dbg_fn(""); 163 gk20a_dbg_fn("");
133 164
@@ -140,33 +171,37 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
140 goto init_fail; 171 goto init_fail;
141 } 172 }
142 173
143 arb->gpc2clk_f_numpoints = MAX_F_POINTS;
144 arb->mclk_f_numpoints = MAX_F_POINTS;
145
146 arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
147 if (!arb->gpc2clk_f_points) {
148 err = -ENOMEM;
149 goto init_fail;
150 }
151
152 arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); 174 arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
153 if (!arb->mclk_f_points) { 175 if (!arb->mclk_f_points) {
154 err = -ENOMEM; 176 err = -ENOMEM;
155 goto init_fail; 177 goto init_fail;
156 } 178 }
157 179
158 arb->gpc2clk_vf_points = kcalloc(MAX_F_POINTS, 180 arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL);
159 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); 181 if (!arb->gpc2clk_f_points) {
160 if (!arb->gpc2clk_vf_points) {
161 err = -ENOMEM; 182 err = -ENOMEM;
162 goto init_fail; 183 goto init_fail;
163 } 184 }
164 185
165 arb->mclk_vf_points = kcalloc(MAX_F_POINTS, 186 for (index = 0; index < 2; index++) {
166 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); 187 table = &arb->vf_table_pool[index];
167 if (!arb->mclk_vf_points) { 188 table->gpc2clk_num_points = MAX_F_POINTS;
168 err = -ENOMEM; 189 table->mclk_num_points = MAX_F_POINTS;
169 goto init_fail; 190
191 table->gpc2clk_points = kcalloc(MAX_F_POINTS,
192 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
193 if (!table->gpc2clk_points) {
194 err = -ENOMEM;
195 goto init_fail;
196 }
197
198
199 table->mclk_points = kcalloc(MAX_F_POINTS,
200 sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL);
201 if (!table->mclk_points) {
202 err = -ENOMEM;
203 goto init_fail;
204 }
170 } 205 }
171 206
172 g->clk_arb = arb; 207 g->clk_arb = arb;
@@ -174,9 +209,6 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
174 209
175 spin_lock_init(&arb->sessions_lock); 210 spin_lock_init(&arb->sessions_lock);
176 spin_lock_init(&arb->users_lock); 211 spin_lock_init(&arb->users_lock);
177 spin_lock_init(&arb->req_lock);
178 spin_lock_init(&arb->data_lock);
179 spin_lock_init(&arb->vf_lock);
180 212
181 err = g->ops.clk_arb.get_arbiter_clk_default(g, 213 err = g->ops.clk_arb.get_arbiter_clk_default(g,
182 NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz); 214 NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz);
@@ -196,39 +228,58 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
196 228
197 arb->gpc2clk_default_mhz = default_mhz; 229 arb->gpc2clk_default_mhz = default_mhz;
198 230
199 INIT_LIST_HEAD(&arb->users); 231 arb->actual = &arb->actual_pool[0];
200 INIT_LIST_HEAD(&arb->sessions); 232
201 INIT_LIST_HEAD(&arb->requests); 233 atomic_set(&arb->req_nr, 0);
202 234
203 init_waitqueue_head(&arb->vftable_wq); 235 INIT_LIST_HEAD_RCU(&arb->users);
236 INIT_LIST_HEAD_RCU(&arb->sessions);
237 init_llist_head(&arb->requests);
204 238
205 INIT_WORK(&arb->vftable_fn_work, nvgpu_clk_arb_run_vftable_cb); 239 init_waitqueue_head(&arb->request_wq);
240 arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
241 "vf_table_update");
242 arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
243 "arbiter_update");
244
245
246 INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
206 247
207 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); 248 INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
208 249
209#ifdef CONFIG_DEBUG_FS 250#ifdef CONFIG_DEBUG_FS
210 mutex_init(&arb->debug_lock); 251 arb->debug = &arb->debug_pool[0];
252
211 if (!arb->debugfs_set) { 253 if (!arb->debugfs_set) {
212 if (nvgpu_clk_arb_debugfs_init(g)) 254 if (nvgpu_clk_arb_debugfs_init(g))
213 arb->debugfs_set = true; 255 arb->debugfs_set = true;
214 } 256 }
215#endif 257#endif
216 err = nvgpu_clk_arb_update_vftable(arb); 258 err = clk_vf_point_cache(g);
217 if (err < 0) 259 if (err < 0)
218 goto init_fail; 260 goto init_fail;
219 261
220 /* Schedule first run */ 262 err = nvgpu_clk_arb_update_vf_table(arb);
221 schedule_work(&arb->update_fn_work); 263 if (err < 0)
264 goto init_fail;
265 do {
266 /* Check that first run is completed */
267 smp_mb();
268 wait_event_interruptible(arb->request_wq,
269 atomic_read(&arb->req_nr));
270 } while (!atomic_read(&arb->req_nr));
222 271
223 return 0; 272 return 0;
224 273
225init_fail: 274init_fail:
226 275
227 kfree(arb->gpc2clk_f_points); 276 kfree(arb->gpc2clk_f_points);
228 kfree(arb->gpc2clk_vf_points);
229
230 kfree(arb->mclk_f_points); 277 kfree(arb->mclk_f_points);
231 kfree(arb->mclk_vf_points); 278
279 for (index = 0; index < 2; index++) {
280 kfree(arb->vf_table_pool[index].gpc2clk_points);
281 kfree(arb->vf_table_pool[index].mclk_points);
282 }
232 283
233 kfree(arb); 284 kfree(arb);
234 285
@@ -275,6 +326,8 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g,
275 atomic_set(&dev->poll_mask, 0); 326 atomic_set(&dev->poll_mask, 0);
276 327
277 dev->session = session; 328 dev->session = session;
329 kref_init(&dev->refcount);
330
278 kref_get(&session->refcount); 331 kref_get(&session->refcount);
279 332
280 *_dev = dev; 333 *_dev = dev;
@@ -305,15 +358,15 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
305 session->g = g; 358 session->g = g;
306 359
307 kref_init(&session->refcount); 360 kref_init(&session->refcount);
308 spin_lock_init(&session->target_lock);
309 361
310 session->zombie = false; 362 session->zombie = false;
311 session->mclk_target_mhz = arb->mclk_default_mhz; 363 session->target = &session->target_pool[0];
312 session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz; 364 session->target->mclk = arb->mclk_default_mhz;
313 INIT_LIST_HEAD(&session->targets); 365 session->target->gpc2clk = arb->gpc2clk_default_mhz;
366 init_llist_head(&session->targets);
314 367
315 spin_lock(&arb->sessions_lock); 368 spin_lock(&arb->sessions_lock);
316 list_add_tail(&session->link, &arb->sessions); 369 list_add_tail_rcu(&session->link, &arb->sessions);
317 spin_unlock(&arb->sessions_lock); 370 spin_unlock(&arb->sessions_lock);
318 371
319 *_session = session; 372 *_session = session;
@@ -321,19 +374,34 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
321 return 0; 374 return 0;
322} 375}
323 376
324void nvgpu_clk_arb_free_session(struct kref *refcount) 377static void nvgpu_clk_arb_free_fd(struct kref *refcount)
378{
379 struct nvgpu_clk_dev *dev = container_of(refcount,
380 struct nvgpu_clk_dev, refcount);
381
382 kfree(dev);
383}
384
385static void nvgpu_clk_arb_free_session(struct kref *refcount)
325{ 386{
326 struct nvgpu_clk_session *session = container_of(refcount, 387 struct nvgpu_clk_session *session = container_of(refcount,
327 struct nvgpu_clk_session, refcount); 388 struct nvgpu_clk_session, refcount);
328 struct nvgpu_clk_arb *arb = session->g->clk_arb; 389 struct nvgpu_clk_arb *arb = session->g->clk_arb;
390 struct nvgpu_clk_dev *dev, *tmp;
391 struct llist_node *head;
329 392
330 gk20a_dbg_fn(""); 393 gk20a_dbg_fn("");
331 394
332 spin_lock(&arb->sessions_lock); 395 spin_lock(&arb->sessions_lock);
333 list_del(&session->link); 396 list_del_rcu(&session->link);
334 spin_unlock(&arb->sessions_lock); 397 spin_unlock(&arb->sessions_lock);
398
399 head = llist_del_all(&session->targets);
400 llist_for_each_entry_safe(dev, tmp, head, node) {
401 kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
402 }
403 synchronize_rcu();
335 kfree(session); 404 kfree(session);
336;
337} 405}
338 406
339void nvgpu_clk_arb_release_session(struct gk20a *g, 407void nvgpu_clk_arb_release_session(struct gk20a *g,
@@ -346,7 +414,7 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
346 session->zombie = true; 414 session->zombie = true;
347 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 415 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
348 416
349 schedule_work(&arb->update_fn_work); 417 queue_work(arb->update_work_queue, &arb->update_fn_work);
350} 418}
351 419
352int nvgpu_clk_arb_install_event_fd(struct gk20a *g, 420int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
@@ -363,7 +431,7 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
363 return fd; 431 return fd;
364 432
365 spin_lock(&arb->users_lock); 433 spin_lock(&arb->users_lock);
366 list_add_tail(&dev->link, &arb->users); 434 list_add_tail_rcu(&dev->link, &arb->users);
367 spin_unlock(&arb->users_lock); 435 spin_unlock(&arb->users_lock);
368 436
369 *event_fd = fd; 437 *event_fd = fd;
@@ -388,121 +456,159 @@ int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
388 return 0; 456 return 0;
389} 457}
390 458
391static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *arb) 459static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
392{ 460{
393 struct gk20a *g = arb->g; 461 struct gk20a *g = arb->g;
462 struct nvgpu_clk_vf_table *table;
394 463
395 int i; 464 int i, j;
396 int status = 0; 465 int status = 0;
397 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; 466 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
398 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; 467 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
468 u16 gpc2clk_min, gpc2clk_max, clk_cur;
469 u16 mclk_min, mclk_max;
470 u32 num_points;
471
472 table = ACCESS_ONCE(arb->current_vf_table);
473 /* make flag visible when all data has resolved in the tables */
474 smp_rmb();
399 475
400 /* the flag must be visible in all threads */ 476 table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
401 mb(); 477 &arb->vf_table_pool[0];
402 ACCESS_ONCE(arb->vftable_set) = false;
403 478
404 spin_lock(&arb->vf_lock); 479 /* Get allowed memory ranges */
480 if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
481 &gpc2clk_min, &gpc2clk_max) < 0)
482 goto exit_vf_table;
483 if (nvgpu_clk_arb_get_arbiter_clk_range(g, NVGPU_GPU_CLK_DOMAIN_MCLK,
484 &mclk_min, &mclk_max) < 0)
485 goto exit_vf_table;
405 486
406 if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK, 487 if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK,
407 &arb->gpc2clk_f_numpoints, arb->gpc2clk_f_points) < 0) { 488 &table->gpc2clk_num_points, arb->gpc2clk_f_points) < 0) {
408 gk20a_err(dev_from_gk20a(g), 489 gk20a_err(dev_from_gk20a(g),
409 "failed to fetch GPC2CLK frequency points"); 490 "failed to fetch GPC2CLK frequency points");
410 goto exit_vftable; 491 goto exit_vf_table;
411 } 492 }
412 if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK, 493 if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK,
413 &arb->mclk_f_numpoints, arb->mclk_f_points) < 0) { 494 &table->mclk_num_points, arb->mclk_f_points) < 0) {
414 gk20a_err(dev_from_gk20a(g), 495 gk20a_err(dev_from_gk20a(g),
415 "failed to fetch MCLK frequency points"); 496 "failed to fetch MCLK frequency points");
416 goto exit_vftable; 497 goto exit_vf_table;
417 } 498 }
418 499
500 memset(table->mclk_points, 0,
501 table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point));
502 memset(table->gpc2clk_points, 0,
503 table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
504
505 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
506 i < table->mclk_num_points; i++) {
507 if ((arb->mclk_f_points[i] >= mclk_min) &&
508 (arb->mclk_f_points[i] <= mclk_max) &&
509 (arb->mclk_f_points[i] != clk_cur)) {
510
511 table->mclk_points[j].mhz = arb->mclk_f_points[i];
512 mclk_voltuv = mclk_voltuv_sram = 0;
513
514 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
515 &table->mclk_points[j].mhz, &mclk_voltuv,
516 CTRL_VOLT_DOMAIN_LOGIC);
517 if (status < 0) {
518 gk20a_err(dev_from_gk20a(g),
519 "failed to get MCLK LOGIC voltage");
520 goto exit_vf_table;
521 }
522 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
523 &table->mclk_points[j].mhz, &mclk_voltuv_sram,
524 CTRL_VOLT_DOMAIN_SRAM);
525 if (status < 0) {
526 gk20a_err(dev_from_gk20a(g),
527 "failed to get MCLK SRAM voltage");
528 goto exit_vf_table;
529 }
419 530
420 memset(arb->mclk_vf_points, 0, 531 table->mclk_points[j].uvolt = mclk_voltuv;
421 arb->mclk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); 532 table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
422 memset(arb->gpc2clk_vf_points, 0, 533 clk_cur = table->mclk_points[j].mhz;
423 arb->gpc2clk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); 534 j++;
424 535 num_points++;
425 for (i = 0 ; i < arb->mclk_f_numpoints; i++) {
426 arb->mclk_vf_points[i].mhz = arb->mclk_f_points[i];
427 mclk_voltuv = mclk_voltuv_sram = 0;
428
429 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
430 &arb->mclk_vf_points[i].mhz, &mclk_voltuv,
431 CTRL_VOLT_DOMAIN_LOGIC);
432 if (status < 0) {
433 gk20a_err(dev_from_gk20a(g),
434 "failed to get MCLK LOGIC voltage");
435 goto exit_vftable;
436 }
437 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
438 &arb->mclk_vf_points[i].mhz, &mclk_voltuv_sram,
439 CTRL_VOLT_DOMAIN_SRAM);
440 if (status < 0) {
441 gk20a_err(dev_from_gk20a(g),
442 "failed to get MCLK SRAM voltage");
443 goto exit_vftable;
444 } 536 }
445
446 arb->mclk_vf_points[i].uvolt = mclk_voltuv;
447 arb->mclk_vf_points[i].uvolt_sram = mclk_voltuv_sram;
448 } 537 }
538 table->mclk_num_points = num_points;
539
540 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
541 i < table->gpc2clk_num_points; i++) {
542 if ((arb->gpc2clk_f_points[i] >= gpc2clk_min) &&
543 (arb->gpc2clk_f_points[i] <= gpc2clk_max) &&
544 (arb->gpc2clk_f_points[i] != clk_cur)) {
545
546 table->gpc2clk_points[j].mhz = arb->gpc2clk_f_points[i];
547 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
548
549 status = clk_domain_get_f_or_v(g,
550 CTRL_CLK_DOMAIN_GPC2CLK,
551 &table->gpc2clk_points[j].mhz, &gpc2clk_voltuv,
552 CTRL_VOLT_DOMAIN_LOGIC);
553 if (status < 0) {
554 gk20a_err(dev_from_gk20a(g),
555 "failed to get GPC2CLK LOGIC voltage");
556 goto exit_vf_table;
557 }
449 558
450 for (i = 0 ; i < arb->gpc2clk_f_numpoints; i++) { 559 status = clk_domain_get_f_or_v(g,
451 arb->gpc2clk_vf_points[i].mhz = arb->gpc2clk_f_points[i]; 560 CTRL_CLK_DOMAIN_GPC2CLK,
452 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; 561 &table->gpc2clk_points[j].mhz,
453 562 &gpc2clk_voltuv_sram,
454 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, 563 CTRL_VOLT_DOMAIN_SRAM);
455 &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv, 564 if (status < 0) {
456 CTRL_VOLT_DOMAIN_LOGIC); 565 gk20a_err(dev_from_gk20a(g),
457 if (status < 0) { 566 "failed to get GPC2CLK SRAM voltage");
458 gk20a_err(dev_from_gk20a(g), 567 goto exit_vf_table;
459 "failed to get GPC2CLK LOGIC voltage"); 568 }
460 goto exit_vftable;
461 }
462 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
463 &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv_sram,
464 CTRL_VOLT_DOMAIN_SRAM);
465 if (status < 0) {
466 gk20a_err(dev_from_gk20a(g),
467 "failed to get GPC2CLK SRAM voltage");
468 goto exit_vftable;
469 }
470
471 arb->gpc2clk_vf_points[i].uvolt = gpc2clk_voltuv;
472 arb->gpc2clk_vf_points[i].uvolt_sram = gpc2clk_voltuv_sram;
473 569
570 table->gpc2clk_points[j].uvolt = gpc2clk_voltuv;
571 table->gpc2clk_points[j].uvolt_sram =
572 gpc2clk_voltuv_sram;
573 clk_cur = table->gpc2clk_points[j].mhz;
574 j++;
575 num_points++;
576 }
474 } 577 }
578 table->gpc2clk_num_points = num_points;
475 579
476 /* make flag visible when all data has resolved in the tables */ 580 /* make table visible when all data has resolved in the tables */
477 wmb(); 581 smp_wmb();
478 ACCESS_ONCE(arb->vftable_set) = true; 582 xchg(&arb->current_vf_table, table);
479
480 wake_up(&arb->vftable_wq);
481exit_vftable:
482 583
483 spin_unlock(&arb->vf_lock); 584 queue_work(arb->update_work_queue, &arb->update_fn_work);
585exit_vf_table:
484 586
485 return status; 587 return status;
486} 588}
487 589
488void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g) 590void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
489{ 591{
490 struct nvgpu_clk_arb *arb = g->clk_arb; 592 struct nvgpu_clk_arb *arb = g->clk_arb;
491 593
492 ACCESS_ONCE(arb->vftable_set) = false; 594 queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
493 /* Disable the flag in case arbiter gets scheduled first */
494 mb();
495
496 schedule_work(&arb->vftable_fn_work);
497 schedule_work(&arb->update_fn_work);
498} 595}
499 596
500static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work) 597static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
501{ 598{
502 struct nvgpu_clk_arb *arb = 599 struct nvgpu_clk_arb *arb =
503 container_of(work, struct nvgpu_clk_arb, update_fn_work); 600 container_of(work, struct nvgpu_clk_arb, vf_table_fn_work);
601 struct gk20a *g = arb->g;
602 u32 err;
504 603
505 nvgpu_clk_arb_update_vftable(arb); 604 /* get latest vf curve from pmu */
605 err = clk_vf_point_cache(g);
606 if (err) {
607 gk20a_err(dev_from_gk20a(g),
608 "failed to get GPC2CLK SRAM voltage");
609 return;
610 }
611 nvgpu_clk_arb_update_vf_table(arb);
506} 612}
507 613
508static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) 614static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
@@ -512,24 +618,22 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
512 struct nvgpu_clk_session *session; 618 struct nvgpu_clk_session *session;
513 struct nvgpu_clk_dev *dev; 619 struct nvgpu_clk_dev *dev;
514 struct nvgpu_clk_dev *tmp; 620 struct nvgpu_clk_dev *tmp;
621 struct nvgpu_clk_arb_target *target, *actual;
515 struct gk20a *g = arb->g; 622 struct gk20a *g = arb->g;
516 623 struct llist_node *head;
517 struct change_fll_clk fllclk;
518 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
519 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
520 624
521 u32 voltuv, voltuv_sram; 625 u32 voltuv, voltuv_sram;
626 bool mclk_set, gpc2clk_set;
522 627
523 int status; 628 int status;
524 629
525 /* Temporary variables for checking target frequency */ 630 /* Temporary variables for checking target frequency */
526 u16 gpc2clk_target, mclk_target; 631 u16 gpc2clk_target, mclk_target;
527 632
528 /* iteration index */
529 u32 index;
530
531#ifdef CONFIG_DEBUG_FS 633#ifdef CONFIG_DEBUG_FS
532 u64 t0, t1; 634 u64 t0, t1;
635 struct nvgpu_clk_arb_debug *debug;
636
533#endif 637#endif
534 638
535 gk20a_dbg_fn(""); 639 gk20a_dbg_fn("");
@@ -542,37 +646,61 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
542 gpc2clk_target = 0; 646 gpc2clk_target = 0;
543 mclk_target = 0; 647 mclk_target = 0;
544 648
545 spin_lock(&arb->sessions_lock); 649 rcu_read_lock();
546 list_for_each_entry(session, &arb->sessions, link) { 650 list_for_each_entry_rcu(session, &arb->sessions, link) {
547 if (!session->zombie) { 651 if (!session->zombie) {
548 spin_lock(&arb->req_lock); 652 mclk_set = false;
549 spin_lock(&session->target_lock); 653 gpc2clk_set = false;
654 target = ACCESS_ONCE(session->target) ==
655 &session->target_pool[0] ?
656 &session->target_pool[1] :
657 &session->target_pool[0];
658 /* Do not reorder pointer */
659 smp_rmb();
660 head = llist_del_all(&session->targets);
661 if (head) {
662
663 /* Copy over state */
664 target->mclk = session->target->mclk;
665 target->gpc2clk = session->target->gpc2clk;
666 /* Query the latest committed request */
667 llist_for_each_entry_safe(dev, tmp, head,
668 node) {
669 if (!mclk_set && dev->mclk_target_mhz) {
670 target->mclk =
671 dev->mclk_target_mhz;
672 mclk_set = true;
673 }
674 if (!gpc2clk_set &&
675 dev->gpc2clk_target_mhz) {
676 target->gpc2clk =
677 dev->gpc2clk_target_mhz;
678 gpc2clk_set = true;
679 }
680 kref_get(&dev->refcount);
681 llist_add(&dev->node, &arb->requests);
682 }
683 /* Ensure target is updated before ptr sawp */
684 smp_wmb();
685 xchg(&session->target, target);
686 }
550 687
551 mclk_target = mclk_target > session->mclk_target_mhz ? 688 mclk_target = mclk_target > session->target->mclk ?
552 mclk_target : session->mclk_target_mhz; 689 mclk_target : session->target->mclk;
553 690
554 gpc2clk_target = 691 gpc2clk_target =
555 gpc2clk_target > session->gpc2clk_target_mhz ? 692 gpc2clk_target > session->target->gpc2clk ?
556 gpc2clk_target : session->gpc2clk_target_mhz; 693 gpc2clk_target : session->target->gpc2clk;
557 /* Move processed requests to notification list*/
558 list_for_each_entry_safe(dev, tmp, &session->targets,
559 link) {
560 list_del_init(&dev->link);
561 list_add_tail(&dev->link, &arb->requests);
562 }
563 spin_unlock(&session->target_lock);
564 spin_unlock(&arb->req_lock);
565
566 } 694 }
567 } 695 }
568 spin_unlock(&arb->sessions_lock); 696 rcu_read_unlock();
569 697
570 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : 698 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
571 arb->gpc2clk_actual_mhz ? gpc2clk_target : 699 arb->actual->gpc2clk ? gpc2clk_target :
572 arb->gpc2clk_default_mhz; 700 arb->gpc2clk_default_mhz;
573 701
574 mclk_target = (mclk_target > 0) ? mclk_target : 702 mclk_target = (mclk_target > 0) ? mclk_target :
575 arb->mclk_actual_mhz ? mclk_target : 703 arb->actual->mclk ? mclk_target :
576 arb->mclk_default_mhz; 704 arb->mclk_default_mhz;
577 705
578 if (!gpc2clk_target && !mclk_target) { 706 if (!gpc2clk_target && !mclk_target) {
@@ -581,163 +709,100 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
581 } 709 }
582 710
583 if (!gpc2clk_target) 711 if (!gpc2clk_target)
584 gpc2clk_target = arb->gpc2clk_actual_mhz; 712 gpc2clk_target = arb->actual->mclk;
585 713
586 do { 714 if (!mclk_target)
587 /* Check that the table is set */ 715 mclk_target = arb->actual->mclk;
588 mb();
589 wait_event(arb->vftable_wq, arb->vftable_set);
590 } while (!ACCESS_ONCE(arb->vftable_set));
591
592 spin_lock(&arb->vf_lock);
593 /* round up the freq requests */
594 for (index = 0; index < arb->gpc2clk_f_numpoints; index++) {
595 if (arb->gpc2clk_vf_points[index].mhz >= gpc2clk_target) {
596 gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
597 gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
598 gpc2clk_voltuv_sram =
599 arb->gpc2clk_vf_points[index].uvolt_sram;
600 break;
601 }
602 }
603 716
604 if (index == arb->gpc2clk_f_numpoints) {
605 gpc2clk_target = arb->gpc2clk_vf_points[index].mhz;
606 gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt;
607 gpc2clk_voltuv_sram =
608 arb->gpc2clk_vf_points[index].uvolt_sram;
609 }
610 717
611 if (!mclk_target) 718 /* Query the table for the closest vf point to program */
612 mclk_target = arb->mclk_actual_mhz; 719 nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, &mclk_target, &voltuv,
720 &voltuv_sram);
613 721
614 for (index = 0; index < arb->mclk_f_numpoints; index++) { 722 if ((arb->actual->gpc2clk == gpc2clk_target) &&
615 if (arb->mclk_vf_points[index].mhz >= mclk_target) { 723 (arb->actual->mclk == mclk_target) &&
616 mclk_target = arb->mclk_vf_points[index].mhz; 724 (arb->voltuv_actual == voltuv)) {
617 mclk_voltuv = arb->mclk_vf_points[index].uvolt; 725 goto exit_arb;
618 mclk_voltuv_sram =
619 arb->mclk_vf_points[index].uvolt_sram;
620 break;
621 }
622 }
623 if (index == arb->mclk_f_numpoints) {
624 mclk_target = arb->mclk_vf_points[index].mhz;
625 mclk_voltuv = arb->mclk_vf_points[index].uvolt;
626 mclk_voltuv_sram =
627 arb->mclk_vf_points[index].uvolt_sram;
628 } 726 }
629 spin_unlock(&arb->vf_lock);
630 727
631 /* Program clocks */ 728 /* Program clocks */
632 /* A change in both mclk of gpc2clk may require a change in voltage */ 729 /* A change in both mclk of gpc2clk may require a change in voltage */
633 if ((arb->gpc2clk_actual_mhz == gpc2clk_target) &&
634 (arb->mclk_actual_mhz == mclk_target)) {
635 goto exit_arb;
636 }
637
638 voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
639 voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
640 gpc2clk_voltuv_sram : mclk_voltuv_sram;
641 730
642 /* if voltage ascends we do: 731 status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, mclk_target,
643 * (1) FLL change 732 voltuv, voltuv_sram);
644 * (2) Voltage change
645 * (3) MCLK change
646 * If it goes down
647 * (1) MCLK change
648 * (2) Voltage change
649 * (3) FLL change
650 */
651 733
652 /* descending */ 734 if (status < 0)
653 if (voltuv <= arb->voltuv_actual) { 735 goto exit_arb;
654 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
655 if (status < 0)
656 goto exit_arb;
657 736
658 status = volt_set_voltage(g, voltuv, voltuv_sram); 737 actual = ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
659 if (status < 0) 738 &arb->actual_pool[1] : &arb->actual_pool[0];
660 goto exit_arb;
661 739
662 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; 740 /* do not reorder this pointer */
663 fllclk.clkmhz = gpc2clk_target; 741 smp_rmb();
664 fllclk.voltuv = voltuv; 742 actual->gpc2clk = gpc2clk_target;
665 status = clk_program_fll_clks(g, &fllclk); 743 actual->mclk = mclk_target;
666 if (status < 0) 744 arb->voltuv_actual = voltuv;
667 goto exit_arb;
668 } else {
669 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
670 fllclk.clkmhz = gpc2clk_target;
671 fllclk.voltuv = voltuv;
672 status = clk_program_fll_clks(g, &fllclk);
673 if (status < 0)
674 goto exit_arb;
675 745
676 status = volt_set_voltage(g, voltuv, voltuv_sram); 746 /* Make changes visible to other threads */
677 if (status < 0) 747 smp_wmb();
678 goto exit_arb; 748 xchg(&arb->actual, actual);
679 749
680 status = g->clk_pmu.clk_mclk.change(g, mclk_target); 750 atomic_inc(&arb->req_nr);
681 if (status < 0)
682 goto exit_arb;
683 }
684 751
685 spin_lock(&arb->data_lock); 752 wake_up_interruptible(&arb->request_wq);
686 arb->gpc2clk_actual_mhz = gpc2clk_target;
687 arb->mclk_actual_mhz = mclk_target;
688 arb->voltuv_actual = voltuv;
689 /* Make changes visible to other threads */
690 wmb();
691 753
692 spin_unlock(&arb->data_lock);
693 754
694#ifdef CONFIG_DEBUG_FS 755#ifdef CONFIG_DEBUG_FS
695 g->ops.read_ptimer(g, &t1); 756 g->ops.read_ptimer(g, &t1);
696 arb->switch_num++;
697 757
698 mutex_lock(&arb->debug_lock); 758 debug = arb->debug == &arb->debug_pool[0] ?
699 if (arb->switch_num == 1) { 759 &arb->debug_pool[1] : &arb->debug_pool[0];
700 arb->switch_max = arb->switch_min = 760
701 arb->switch_avg = (t1-t0)/1000; 761 memcpy(debug, arb->debug, sizeof(arb->debug_pool[0]));
702 arb->switch_std = 0; 762 debug->switch_num++;
763
764 if (debug->switch_num == 1) {
765 debug->switch_max = debug->switch_min =
766 debug->switch_avg = (t1-t0)/1000;
767 debug->switch_std = 0;
703 } else { 768 } else {
704 s64 prev_avg; 769 s64 prev_avg;
705 u64 curr = (t1-t0)/1000; 770 u64 curr = (t1-t0)/1000;
706 771
707 arb->switch_max = curr > arb->switch_max ? 772 debug->switch_max = curr > debug->switch_max ?
708 curr : arb->switch_max; 773 curr : debug->switch_max;
709 arb->switch_min = arb->switch_min ? 774 debug->switch_min = debug->switch_min ?
710 (curr < arb->switch_min ? 775 (curr < debug->switch_min ?
711 curr : arb->switch_min) : curr; 776 curr : debug->switch_min) : curr;
712 prev_avg = arb->switch_avg; 777 prev_avg = debug->switch_avg;
713 arb->switch_avg = (curr + 778 debug->switch_avg = (curr +
714 (arb->switch_avg * (arb->switch_num-1))) / 779 (debug->switch_avg * (debug->switch_num-1))) /
715 arb->switch_num; 780 debug->switch_num;
716 arb->switch_std += 781 debug->switch_std +=
717 (curr - arb->switch_avg) * (curr - prev_avg); 782 (curr - debug->switch_avg) * (curr - prev_avg);
718 } 783 }
719 mutex_unlock(&arb->debug_lock); 784 /* commit changes before exchanging debug pointer */
720 785 smp_wmb();
786 xchg(&arb->debug, debug);
721#endif 787#endif
722 788
723exit_arb: 789exit_arb:
724 790
725 spin_lock(&arb->req_lock);
726 /* notify completion for all requests */ 791 /* notify completion for all requests */
727 list_for_each_entry_safe(dev, tmp, &arb->requests, link) { 792 head = llist_del_all(&arb->requests);
793 llist_for_each_entry_safe(dev, tmp, head, node) {
728 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); 794 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
729 wake_up_interruptible(&dev->readout_wq); 795 wake_up_interruptible(&dev->readout_wq);
730 list_del_init(&dev->link); 796 kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
731 } 797 }
732 spin_unlock(&arb->req_lock);
733 798
734 /* notify event for all users */ 799 /* notify event for all users */
735 spin_lock(&arb->users_lock); 800 rcu_read_lock();
736 list_for_each_entry(dev, &arb->users, link) { 801 list_for_each_entry_rcu(dev, &arb->users, link) {
737 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); 802 atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
738 wake_up_interruptible(&dev->readout_wq); 803 wake_up_interruptible(&dev->readout_wq);
739 } 804 }
740 spin_unlock(&arb->users_lock); 805 rcu_read_unlock();
741} 806}
742 807
743int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, 808int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
@@ -761,17 +826,10 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
761 err = -EINVAL; 826 err = -EINVAL;
762 goto fdput_fd; 827 goto fdput_fd;
763 } 828 }
764 spin_lock(&session->target_lock); 829 kref_get(&dev->refcount);
765 session->mclk_target_mhz = dev->mclk_target_mhz ? dev->mclk_target_mhz : 830 llist_add(&dev->node, &session->targets);
766 session->mclk_target_mhz;
767 session->gpc2clk_target_mhz = dev->gpc2clk_target_mhz ?
768 dev->gpc2clk_target_mhz :
769 session->gpc2clk_target_mhz;
770
771 list_add_tail(&dev->link, &session->targets);
772 spin_unlock(&session->target_lock);
773 831
774 schedule_work(&arb->update_fn_work); 832 queue_work(arb->update_work_queue, &arb->update_fn_work);
775 833
776fdput_fd: 834fdput_fd:
777 fdput(fd); 835 fdput(fd);
@@ -799,15 +857,8 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
799 857
800 gk20a_dbg_fn(""); 858 gk20a_dbg_fn("");
801 859
802 spin_lock(&arb->req_lock);
803 spin_lock(&session->target_lock);
804 if (!list_empty(&dev->link))
805 list_del_init(&dev->link);
806 spin_unlock(&session->target_lock);
807 spin_unlock(&arb->req_lock);
808
809 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 860 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
810 kfree(dev); 861 kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
811 862
812 return 0; 863 return 0;
813} 864}
@@ -824,10 +875,11 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
824 gk20a_dbg_fn(""); 875 gk20a_dbg_fn("");
825 876
826 spin_lock(&arb->users_lock); 877 spin_lock(&arb->users_lock);
827 list_del(&dev->link); 878 list_del_rcu(&dev->link);
828 spin_unlock(&arb->users_lock); 879 spin_unlock(&arb->users_lock);
829 880
830 kref_put(&session->refcount, nvgpu_clk_arb_free_session); 881 kref_put(&session->refcount, nvgpu_clk_arb_free_session);
882 synchronize_rcu();
831 kfree(dev); 883 kfree(dev);
832 884
833 return 0; 885 return 0;
@@ -875,24 +927,27 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
875 u32 api_domain, u16 *freq_mhz) 927 u32 api_domain, u16 *freq_mhz)
876{ 928{
877 int err = 0; 929 int err = 0;
930 struct nvgpu_clk_arb_target *target;
878 931
879 spin_lock(&session->target_lock); 932 do {
880 933 target = ACCESS_ONCE(session->target);
881 switch (api_domain) { 934 /* no reordering of this pointer */
882 case NVGPU_GPU_CLK_DOMAIN_MCLK: 935 smp_rmb();
883 *freq_mhz = session->mclk_target_mhz;
884 break;
885 936
886 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 937 switch (api_domain) {
887 *freq_mhz = session->gpc2clk_target_mhz; 938 case NVGPU_GPU_CLK_DOMAIN_MCLK:
888 break; 939 *freq_mhz = target->mclk;
940 break;
889 941
890 default: 942 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
891 *freq_mhz = 0; 943 *freq_mhz = target->gpc2clk;
892 err = -EINVAL; 944 break;
893 }
894 945
895 spin_unlock(&session->target_lock); 946 default:
947 *freq_mhz = 0;
948 err = -EINVAL;
949 }
950 } while (target != ACCESS_ONCE(session->target));
896 return err; 951 return err;
897} 952}
898 953
@@ -901,24 +956,27 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
901{ 956{
902 struct nvgpu_clk_arb *arb = g->clk_arb; 957 struct nvgpu_clk_arb *arb = g->clk_arb;
903 int err = 0; 958 int err = 0;
959 struct nvgpu_clk_arb_target *actual;
904 960
905 spin_lock(&arb->data_lock); 961 do {
906 962 actual = ACCESS_ONCE(arb->actual);
907 switch (api_domain) { 963 /* no reordering of this pointer */
908 case NVGPU_GPU_CLK_DOMAIN_MCLK: 964 smp_rmb();
909 *freq_mhz = arb->mclk_actual_mhz;
910 break;
911 965
912 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: 966 switch (api_domain) {
913 *freq_mhz = arb->gpc2clk_actual_mhz; 967 case NVGPU_GPU_CLK_DOMAIN_MCLK:
914 break; 968 *freq_mhz = actual->mclk;
969 break;
915 970
916 default: 971 case NVGPU_GPU_CLK_DOMAIN_GPC2CLK:
917 *freq_mhz = 0; 972 *freq_mhz = actual->gpc2clk;
918 err = -EINVAL; 973 break;
919 }
920 974
921 spin_unlock(&arb->data_lock); 975 default:
976 *freq_mhz = 0;
977 err = -EINVAL;
978 }
979 } while (actual != ACCESS_ONCE(arb->actual));
922 return err; 980 return err;
923} 981}
924 982
@@ -948,22 +1006,163 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
948 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); 1006 return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints);
949} 1007}
950 1008
1009static void nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
1010 u16 *gpc2clk, u16 *mclk, u32 *voltuv,
1011 u32 *voltuv_sram)
1012{
1013 u16 gpc2clk_target, mclk_target;
1014 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
1015 u32 mclk_voltuv, mclk_voltuv_sram;
1016 struct nvgpu_clk_vf_table *table;
1017 int index;
1018
1019 gpc2clk_target = *gpc2clk;
1020 mclk_target = *mclk;
1021 gpc2clk_voltuv = 0;
1022 gpc2clk_voltuv_sram = 0;
1023 mclk_voltuv = 0;
1024 mclk_voltuv_sram = 0;
1025
1026 do {
1027 table = ACCESS_ONCE(arb->current_vf_table);
1028 /* pointer to table can be updated by callback */
1029 smp_rmb();
1030
1031 if (!table)
1032 continue;
1033 /* round up the freq requests */
1034 for (index = 0; index < table->gpc2clk_num_points; index++) {
1035 if (table->gpc2clk_points[index].mhz >=
1036 gpc2clk_target) {
1037 gpc2clk_target =
1038 table->gpc2clk_points[index].mhz;
1039 gpc2clk_voltuv =
1040 table->gpc2clk_points[index].uvolt;
1041 gpc2clk_voltuv_sram =
1042 table->gpc2clk_points[index].uvolt_sram;
1043 break;
1044 }
1045 }
1046
1047 if (index == table->gpc2clk_num_points) {
1048 gpc2clk_target = table->gpc2clk_points[index].mhz;
1049 gpc2clk_voltuv = table->gpc2clk_points[index].uvolt;
1050 gpc2clk_voltuv_sram =
1051 table->gpc2clk_points[index].uvolt_sram;
1052 }
1053
1054 for (index = 0; index < table->mclk_num_points; index++) {
1055 if (table->mclk_points[index].mhz >= mclk_target) {
1056 mclk_target = table->mclk_points[index].mhz;
1057 mclk_voltuv = table->mclk_points[index].uvolt;
1058 mclk_voltuv_sram =
1059 table->mclk_points[index].uvolt_sram;
1060 break;
1061 }
1062 }
1063 if (index == table->mclk_num_points) {
1064 mclk_target = table->mclk_points[index].mhz;
1065 mclk_voltuv = table->mclk_points[index].uvolt;
1066 mclk_voltuv_sram =
1067 table->mclk_points[index].uvolt_sram;
1068 }
1069 } while (!table ||
1070 (ACCESS_ONCE(arb->current_vf_table) != table));
1071
1072 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
1073 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
1074 gpc2clk_voltuv_sram : mclk_voltuv_sram;
1075
1076 *gpc2clk = gpc2clk_target;
1077 *mclk = mclk_target;
1078}
1079
1080static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
1081 u16 mclk_target, u32 voltuv, u32 voltuv_sram)
1082{
1083 struct change_fll_clk fllclk;
1084 struct nvgpu_clk_arb *arb = g->clk_arb;
1085 int status;
1086
1087 /* if voltage ascends we do:
1088 * (1) FLL change
1089 * (2) Voltage change
1090 * (3) MCLK change
1091 * If it goes down
1092 * (1) MCLK change
1093 * (2) Voltage change
1094 * (3) FLL change
1095 */
1096
1097 /* descending */
1098 if (voltuv < arb->voltuv_actual) {
1099 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
1100 if (status < 0)
1101 return status;
1102
1103 status = volt_set_voltage(g, voltuv, voltuv_sram);
1104 if (status < 0)
1105 return status;
1106
1107 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
1108 fllclk.clkmhz = gpc2clk_target;
1109 fllclk.voltuv = voltuv;
1110 status = clk_program_fll_clks(g, &fllclk);
1111 if (status < 0)
1112 return status;
1113 } else if (voltuv > arb->voltuv_actual) {
1114 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
1115 fllclk.clkmhz = gpc2clk_target;
1116 fllclk.voltuv = voltuv;
1117 status = clk_program_fll_clks(g, &fllclk);
1118 if (status < 0)
1119 return status;
1120
1121 status = volt_set_voltage(g, voltuv, voltuv_sram);
1122 if (status < 0)
1123 return status;
1124
1125 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
1126 if (status < 0)
1127 return status;
1128 } else {
1129 status = g->clk_pmu.clk_mclk.change(g, mclk_target);
1130 if (status < 0)
1131 return status;
1132
1133 fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK;
1134 fllclk.clkmhz = gpc2clk_target;
1135 fllclk.voltuv = voltuv;
1136 status = clk_program_fll_clks(g, &fllclk);
1137 if (status < 0)
1138 return status;
1139
1140 }
1141
1142 return 0;
1143}
1144
951#ifdef CONFIG_DEBUG_FS 1145#ifdef CONFIG_DEBUG_FS
952static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) 1146static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
953{ 1147{
954 struct gk20a *g = s->private; 1148 struct gk20a *g = s->private;
955 struct nvgpu_clk_arb *arb = g->clk_arb; 1149 struct nvgpu_clk_arb *arb = g->clk_arb;
1150 struct nvgpu_clk_arb_debug *debug;
1151
956 u64 num; 1152 u64 num;
957 s64 tmp, avg, std, max, min; 1153 s64 tmp, avg, std, max, min;
958 1154
959 /* Make copy of structure to reduce time with lock held */ 1155 debug = ACCESS_ONCE(arb->debug);
960 mutex_lock(&arb->debug_lock); 1156 /* Make copy of structure and ensure no reordering */
961 std = arb->switch_std; 1157 smp_rmb();
962 avg = arb->switch_avg; 1158 if (!debug)
963 max = arb->switch_max; 1159 return -EINVAL;
964 min = arb->switch_min; 1160
965 num = arb->switch_num; 1161 std = debug->switch_std;
966 mutex_unlock(&arb->debug_lock); 1162 avg = debug->switch_avg;
1163 max = debug->switch_max;
1164 min = debug->switch_min;
1165 num = debug->switch_num;
967 1166
968 tmp = std; 1167 tmp = std;
969 do_div(tmp, num); 1168 do_div(tmp, num);