summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/clk/clk_arb.c
diff options
context:
space:
mode:
authorSourab Gupta <sourabg@nvidia.com>2018-05-04 02:13:33 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-18 02:34:32 -0400
commitfc3ac7d2ae2c7cb856fff58af77bb8d453ff5473 (patch)
tree412d3ebe84b7ba469b6ff4d49936c4d4b8920c18 /drivers/gpu/nvgpu/clk/clk_arb.c
parent6a43e51ee362bbf61ca35cda75f31ed3f01862ed (diff)
gpu: nvgpu: move clk_arb.c to common code
Now that clk_arb.c is free of Linux'isms, move it to the clk/ directory. Jira VQRM-741 Change-Id: I53298c76f834322aa586781cdfd2e6031f4826a1 Signed-off-by: Sourab Gupta <sourabg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1709651 GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/clk/clk_arb.c')
-rw-r--r--drivers/gpu/nvgpu/clk/clk_arb.c1527
1 files changed, 1527 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
new file mode 100644
index 00000000..fdf17408
--- /dev/null
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -0,0 +1,1527 @@
1/*
2 * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/bitops.h>
24#include <nvgpu/lock.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/atomic.h>
27#include <nvgpu/bug.h>
28#include <nvgpu/kref.h>
29#include <nvgpu/log.h>
30#include <nvgpu/barrier.h>
31#include <nvgpu/cond.h>
32#include <nvgpu/list.h>
33#include <nvgpu/clk_arb.h>
34
35#include "gk20a/gk20a.h"
36#include "clk/clk.h"
37#include "pstate/pstate.h"
38#include "lpwr/lpwr.h"
39#include "volt/volt.h"
40
41int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
42 struct nvgpu_clk_notification_queue *queue,
43 size_t events_number) {
44 queue->notifications = nvgpu_kcalloc(g, events_number,
45 sizeof(struct nvgpu_clk_notification));
46 if (!queue->notifications)
47 return -ENOMEM;
48 queue->size = events_number;
49
50 nvgpu_atomic_set(&queue->head, 0);
51 nvgpu_atomic_set(&queue->tail, 0);
52
53 return 0;
54}
55
56void nvgpu_clk_notification_queue_free(struct gk20a *g,
57 struct nvgpu_clk_notification_queue *queue) {
58 nvgpu_kfree(g, queue->notifications);
59 queue->size = 0;
60 nvgpu_atomic_set(&queue->head, 0);
61 nvgpu_atomic_set(&queue->tail, 0);
62}
63
64static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
65 struct nvgpu_clk_notification_queue *queue,
66 u32 alarm_mask) {
67
68 u32 queue_index;
69 u64 timestamp;
70
71 queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
72 /* get current timestamp */
73 timestamp = (u64) sched_clock();
74
75 queue->notifications[queue_index].timestamp = timestamp;
76 queue->notifications[queue_index].notification = alarm_mask;
77
78}
79
80static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
81{
82 struct nvgpu_clk_arb *arb = g->clk_arb;
83
84 u64 current_mask;
85 u32 refcnt;
86 u32 alarm_mask;
87 u64 new_mask;
88
89 do {
90 current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
91 /* atomic operations are strong so they do not need masks */
92
93 refcnt = ((u32) (current_mask >> 32)) + 1;
94 alarm_mask = (u32) (current_mask & ~0) | alarm;
95 new_mask = ((u64) refcnt << 32) | alarm_mask;
96
97 } while (unlikely(current_mask !=
98 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
99 current_mask, new_mask)));
100
101 nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);
102}
103
104
105static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
106{
107 struct gk20a *g = arb->g;
108 struct nvgpu_clk_vf_table *table;
109
110 u32 i, j;
111 int status = -EINVAL;
112 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
113 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
114 u16 clk_cur;
115 u32 num_points;
116
117 struct clk_set_info *p5_info, *p0_info;
118
119
120 table = NV_ACCESS_ONCE(arb->current_vf_table);
121 /* make flag visible when all data has resolved in the tables */
122 nvgpu_smp_rmb();
123
124 table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
125 &arb->vf_table_pool[0];
126
127 /* Get allowed memory ranges */
128 if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK,
129 &arb->gpc2clk_min,
130 &arb->gpc2clk_max) < 0) {
131 nvgpu_err(g, "failed to fetch GPC2CLK range");
132 goto exit_vf_table;
133 }
134 if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_MCLK,
135 &arb->mclk_min,
136 &arb->mclk_max) < 0) {
137 nvgpu_err(g, "failed to fetch MCLK range");
138 goto exit_vf_table;
139 }
140
141 table->gpc2clk_num_points = MAX_F_POINTS;
142 table->mclk_num_points = MAX_F_POINTS;
143
144 if (clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPC2CLK,
145 &table->gpc2clk_num_points, arb->gpc2clk_f_points)) {
146 nvgpu_err(g, "failed to fetch GPC2CLK frequency points");
147 goto exit_vf_table;
148 }
149
150 if (clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_MCLK,
151 &table->mclk_num_points, arb->mclk_f_points)) {
152 nvgpu_err(g, "failed to fetch MCLK frequency points");
153 goto exit_vf_table;
154 }
155 if (!table->mclk_num_points || !table->gpc2clk_num_points) {
156 nvgpu_err(g, "empty queries to f points mclk %d gpc2clk %d",
157 table->mclk_num_points, table->gpc2clk_num_points);
158 status = -EINVAL;
159 goto exit_vf_table;
160 }
161
162 memset(table->mclk_points, 0,
163 table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point));
164 memset(table->gpc2clk_points, 0,
165 table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
166
167 p5_info = pstate_get_clk_set_info(g,
168 CTRL_PERF_PSTATE_P5, clkwhich_mclk);
169 if (!p5_info) {
170 nvgpu_err(g, "failed to get MCLK P5 info");
171 goto exit_vf_table;
172 }
173 p0_info = pstate_get_clk_set_info(g,
174 CTRL_PERF_PSTATE_P0, clkwhich_mclk);
175 if (!p0_info) {
176 nvgpu_err(g, "failed to get MCLK P0 info");
177 goto exit_vf_table;
178 }
179
180 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
181 i < table->mclk_num_points; i++) {
182
183 if ((arb->mclk_f_points[i] >= arb->mclk_min) &&
184 (arb->mclk_f_points[i] <= arb->mclk_max) &&
185 (arb->mclk_f_points[i] != clk_cur)) {
186
187 table->mclk_points[j].mem_mhz = arb->mclk_f_points[i];
188 mclk_voltuv = mclk_voltuv_sram = 0;
189
190 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
191 &table->mclk_points[j].mem_mhz, &mclk_voltuv,
192 CTRL_VOLT_DOMAIN_LOGIC);
193 if (status < 0) {
194 nvgpu_err(g,
195 "failed to get MCLK LOGIC voltage");
196 goto exit_vf_table;
197 }
198 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
199 &table->mclk_points[j].mem_mhz,
200 &mclk_voltuv_sram,
201 CTRL_VOLT_DOMAIN_SRAM);
202 if (status < 0) {
203 nvgpu_err(g, "failed to get MCLK SRAM voltage");
204 goto exit_vf_table;
205 }
206
207 table->mclk_points[j].uvolt = mclk_voltuv;
208 table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
209 clk_cur = table->mclk_points[j].mem_mhz;
210
211 if ((clk_cur >= p5_info->min_mhz) &&
212 (clk_cur <= p5_info->max_mhz))
213 VF_POINT_SET_PSTATE_SUPPORTED(
214 &table->mclk_points[j],
215 CTRL_PERF_PSTATE_P5);
216 if ((clk_cur >= p0_info->min_mhz) &&
217 (clk_cur <= p0_info->max_mhz))
218 VF_POINT_SET_PSTATE_SUPPORTED(
219 &table->mclk_points[j],
220 CTRL_PERF_PSTATE_P0);
221
222 j++;
223 num_points++;
224
225 }
226 }
227 table->mclk_num_points = num_points;
228
229 p5_info = pstate_get_clk_set_info(g,
230 CTRL_PERF_PSTATE_P5, clkwhich_gpc2clk);
231 if (!p5_info) {
232 status = -EINVAL;
233 nvgpu_err(g, "failed to get GPC2CLK P5 info");
234 goto exit_vf_table;
235 }
236
237 p0_info = pstate_get_clk_set_info(g,
238 CTRL_PERF_PSTATE_P0, clkwhich_gpc2clk);
239 if (!p0_info) {
240 status = -EINVAL;
241 nvgpu_err(g, "failed to get GPC2CLK P0 info");
242 goto exit_vf_table;
243 }
244
245 /* GPC2CLK needs to be checked in two passes. The first determines the
246 * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the
247 * second verifies that the clocks minimum is satisfied and sets
248 * the voltages
249 */
250 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
251 i < table->gpc2clk_num_points; i++) {
252 struct set_fll_clk setfllclk;
253
254 if ((arb->gpc2clk_f_points[i] >= arb->gpc2clk_min) &&
255 (arb->gpc2clk_f_points[i] <= arb->gpc2clk_max) &&
256 (arb->gpc2clk_f_points[i] != clk_cur)) {
257
258 table->gpc2clk_points[j].gpc_mhz =
259 arb->gpc2clk_f_points[i];
260 setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i];
261 status = clk_get_fll_clks(g, &setfllclk);
262 if (status < 0) {
263 nvgpu_err(g,
264 "failed to get GPC2CLK slave clocks");
265 goto exit_vf_table;
266 }
267
268 table->gpc2clk_points[j].sys_mhz =
269 setfllclk.sys2clkmhz;
270 table->gpc2clk_points[j].xbar_mhz =
271 setfllclk.xbar2clkmhz;
272
273 clk_cur = table->gpc2clk_points[j].gpc_mhz;
274
275 if ((clk_cur >= p5_info->min_mhz) &&
276 (clk_cur <= p5_info->max_mhz))
277 VF_POINT_SET_PSTATE_SUPPORTED(
278 &table->gpc2clk_points[j],
279 CTRL_PERF_PSTATE_P5);
280 if ((clk_cur >= p0_info->min_mhz) &&
281 (clk_cur <= p0_info->max_mhz))
282 VF_POINT_SET_PSTATE_SUPPORTED(
283 &table->gpc2clk_points[j],
284 CTRL_PERF_PSTATE_P0);
285
286 j++;
287 num_points++;
288 }
289 }
290 table->gpc2clk_num_points = num_points;
291
292 /* Second pass */
293 for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) {
294
295 u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz;
296
297 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
298
299 /* Check sysclk */
300 p5_info = pstate_get_clk_set_info(g,
301 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
302 clkwhich_sys2clk);
303 if (!p5_info) {
304 status = -EINVAL;
305 nvgpu_err(g, "failed to get SYS2CLK P5 info");
306 goto exit_vf_table;
307 }
308
309 /* sys2clk below clk min, need to find correct clock */
310 if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) {
311 for (j = i + 1; j < table->gpc2clk_num_points; j++) {
312
313 if (table->gpc2clk_points[j].sys_mhz >=
314 p5_info->min_mhz) {
315
316
317 table->gpc2clk_points[i].sys_mhz =
318 p5_info->min_mhz;
319
320 alt_gpc2clk = alt_gpc2clk <
321 table->gpc2clk_points[j].
322 gpc_mhz ?
323 table->gpc2clk_points[j].
324 gpc_mhz :
325 alt_gpc2clk;
326 break;
327 }
328 }
329 /* no VF exists that satisfies condition */
330 if (j == table->gpc2clk_num_points) {
331 nvgpu_err(g, "NO SYS2CLK VF point possible");
332 status = -EINVAL;
333 goto exit_vf_table;
334 }
335 }
336
337 /* Check xbarclk */
338 p5_info = pstate_get_clk_set_info(g,
339 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
340 clkwhich_xbar2clk);
341 if (!p5_info) {
342 status = -EINVAL;
343 nvgpu_err(g, "failed to get SYS2CLK P5 info");
344 goto exit_vf_table;
345 }
346
347 /* xbar2clk below clk min, need to find correct clock */
348 if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) {
349 for (j = i; j < table->gpc2clk_num_points; j++) {
350 if (table->gpc2clk_points[j].xbar_mhz >=
351 p5_info->min_mhz) {
352
353 table->gpc2clk_points[i].xbar_mhz =
354 p5_info->min_mhz;
355
356 alt_gpc2clk = alt_gpc2clk <
357 table->gpc2clk_points[j].
358 gpc_mhz ?
359 table->gpc2clk_points[j].
360 gpc_mhz :
361 alt_gpc2clk;
362 break;
363 }
364 }
365 /* no VF exists that satisfies condition */
366 if (j == table->gpc2clk_num_points) {
367 status = -EINVAL;
368 nvgpu_err(g, "NO XBAR2CLK VF point possible");
369
370 goto exit_vf_table;
371 }
372 }
373
374 /* Calculate voltages */
375 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
376 &alt_gpc2clk, &gpc2clk_voltuv,
377 CTRL_VOLT_DOMAIN_LOGIC);
378 if (status < 0) {
379 nvgpu_err(g, "failed to get GPC2CLK LOGIC voltage");
380 goto exit_vf_table;
381 }
382
383 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
384 &alt_gpc2clk,
385 &gpc2clk_voltuv_sram,
386 CTRL_VOLT_DOMAIN_SRAM);
387 if (status < 0) {
388 nvgpu_err(g, "failed to get GPC2CLK SRAM voltage");
389 goto exit_vf_table;
390 }
391
392 table->gpc2clk_points[i].uvolt = gpc2clk_voltuv;
393 table->gpc2clk_points[i].uvolt_sram = gpc2clk_voltuv_sram;
394 }
395
396 /* make table visible when all data has resolved in the tables */
397 nvgpu_smp_wmb();
398 xchg(&arb->current_vf_table, table);
399
400exit_vf_table:
401
402 if (status < 0)
403 nvgpu_clk_arb_set_global_alarm(g,
404 EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
405 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
406
407 return status;
408}
409
410
411static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb)
412{
413 struct gk20a *g = arb->g;
414 u32 err;
415
416 /* get latest vf curve from pmu */
417 err = clk_vf_point_cache(g);
418 if (err) {
419 nvgpu_err(g, "failed to cache VF table");
420 nvgpu_clk_arb_set_global_alarm(g,
421 EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
422 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
423
424 return;
425 }
426 nvgpu_clk_arb_update_vf_table(arb);
427}
428
429static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
430 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
431 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
432{
433 u16 gpc2clk_target, mclk_target;
434 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
435 u32 mclk_voltuv, mclk_voltuv_sram;
436 u32 pstate = VF_POINT_INVALID_PSTATE;
437 struct nvgpu_clk_vf_table *table;
438 u32 index, index_mclk;
439 struct nvgpu_clk_vf_point *mclk_vf = NULL;
440
441 do {
442 gpc2clk_target = *gpc2clk;
443 mclk_target = *mclk;
444 gpc2clk_voltuv = 0;
445 gpc2clk_voltuv_sram = 0;
446 mclk_voltuv = 0;
447 mclk_voltuv_sram = 0;
448
449 table = NV_ACCESS_ONCE(arb->current_vf_table);
450 /* pointer to table can be updated by callback */
451 nvgpu_smp_rmb();
452
453 if (!table)
454 continue;
455 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
456 nvgpu_err(arb->g, "found empty table");
457 goto find_exit;
458 }
459 /* First we check MCLK to find out which PSTATE we are
460 * are requesting, and from there try to find the minimum
461 * GPC2CLK on the same PSTATE that satisfies the request.
462 * If no GPC2CLK can be found, then we need to up the PSTATE
463 */
464
465recalculate_vf_point:
466 for (index = 0; index < table->mclk_num_points; index++) {
467 if (table->mclk_points[index].mem_mhz >= mclk_target) {
468 mclk_vf = &table->mclk_points[index];
469 break;
470 }
471 }
472 if (index == table->mclk_num_points) {
473 mclk_vf = &table->mclk_points[index-1];
474 index = table->mclk_num_points - 1;
475 }
476 index_mclk = index;
477
478 /* round up the freq requests */
479 for (index = 0; index < table->gpc2clk_num_points; index++) {
480 pstate = VF_POINT_COMMON_PSTATE(
481 &table->gpc2clk_points[index], mclk_vf);
482
483 if ((table->gpc2clk_points[index].gpc_mhz >=
484 gpc2clk_target) &&
485 (pstate != VF_POINT_INVALID_PSTATE)) {
486 gpc2clk_target =
487 table->gpc2clk_points[index].gpc_mhz;
488 *sys2clk =
489 table->gpc2clk_points[index].sys_mhz;
490 *xbar2clk =
491 table->gpc2clk_points[index].xbar_mhz;
492
493 gpc2clk_voltuv =
494 table->gpc2clk_points[index].uvolt;
495 gpc2clk_voltuv_sram =
496 table->gpc2clk_points[index].uvolt_sram;
497 break;
498 }
499 }
500
501 if (index == table->gpc2clk_num_points) {
502 pstate = VF_POINT_COMMON_PSTATE(
503 &table->gpc2clk_points[index-1], mclk_vf);
504 if (pstate != VF_POINT_INVALID_PSTATE) {
505 gpc2clk_target =
506 table->gpc2clk_points[index-1].gpc_mhz;
507 *sys2clk =
508 table->gpc2clk_points[index-1].sys_mhz;
509 *xbar2clk =
510 table->gpc2clk_points[index-1].xbar_mhz;
511
512 gpc2clk_voltuv =
513 table->gpc2clk_points[index-1].uvolt;
514 gpc2clk_voltuv_sram =
515 table->gpc2clk_points[index-1].
516 uvolt_sram;
517 } else if (index_mclk >= table->mclk_num_points - 1) {
518 /* There is no available combination of MCLK
519 * and GPC2CLK, we need to fail this
520 */
521 gpc2clk_target = 0;
522 mclk_target = 0;
523 pstate = VF_POINT_INVALID_PSTATE;
524 goto find_exit;
525 } else {
526 /* recalculate with higher PSTATE */
527 gpc2clk_target = *gpc2clk;
528 mclk_target = table->mclk_points[index_mclk+1].
529 mem_mhz;
530 goto recalculate_vf_point;
531 }
532 }
533
534 mclk_target = mclk_vf->mem_mhz;
535 mclk_voltuv = mclk_vf->uvolt;
536 mclk_voltuv_sram = mclk_vf->uvolt_sram;
537
538 } while (!table ||
539 (NV_ACCESS_ONCE(arb->current_vf_table) != table));
540
541find_exit:
542 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
543 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
544 gpc2clk_voltuv_sram : mclk_voltuv_sram;
545 /* noise unaware vmin */
546 *nuvmin = mclk_voltuv;
547 *nuvmin_sram = mclk_voltuv_sram;
548 *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
549 *mclk = mclk_target;
550 return pstate;
551}
552
553static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
554 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
555 u32 voltuv_sram)
556{
557 struct set_fll_clk fllclk;
558 struct nvgpu_clk_arb *arb = g->clk_arb;
559 int status;
560
561 fllclk.gpc2clkmhz = gpc2clk_target;
562 fllclk.sys2clkmhz = sys2clk_target;
563 fllclk.xbar2clkmhz = xbar2clk_target;
564
565 fllclk.voltuv = voltuv;
566
567 /* if voltage ascends we do:
568 * (1) FLL change
569 * (2) Voltage change
570 * (3) MCLK change
571 * If it goes down
572 * (1) MCLK change
573 * (2) Voltage change
574 * (3) FLL change
575 */
576
577 /* descending */
578 if (voltuv < arb->voltuv_actual) {
579 status = g->ops.clk.mclk_change(g, mclk_target);
580 if (status < 0)
581 return status;
582
583 status = volt_set_voltage(g, voltuv, voltuv_sram);
584 if (status < 0)
585 return status;
586
587 status = clk_set_fll_clks(g, &fllclk);
588 if (status < 0)
589 return status;
590 } else {
591 status = clk_set_fll_clks(g, &fllclk);
592 if (status < 0)
593 return status;
594
595 status = volt_set_voltage(g, voltuv, voltuv_sram);
596 if (status < 0)
597 return status;
598
599 status = g->ops.clk.mclk_change(g, mclk_target);
600 if (status < 0)
601 return status;
602 }
603
604 return 0;
605}
606
607static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
608 struct nvgpu_clk_arb_target *target,
609 u32 alarm) {
610
611 struct nvgpu_clk_session *session = dev->session;
612 struct nvgpu_clk_arb *arb = session->g->clk_arb;
613 struct nvgpu_clk_notification *notification;
614
615 u32 queue_alarm_mask = 0;
616 u32 enabled_mask = 0;
617 u32 new_alarms_reported = 0;
618 u32 poll_mask = 0;
619 u32 tail, head;
620 u32 queue_index;
621 size_t size;
622 int index;
623
624 enabled_mask = nvgpu_atomic_read(&dev->enabled_mask);
625 size = arb->notification_queue.size;
626
627 /* queue global arbiter notifications in buffer */
628 do {
629 tail = nvgpu_atomic_read(&arb->notification_queue.tail);
630 /* copy items to the queue */
631 queue_index = nvgpu_atomic_read(&dev->queue.tail);
632 head = dev->arb_queue_head;
633 head = (tail - head) < arb->notification_queue.size ?
634 head : tail - arb->notification_queue.size;
635
636 for (index = head; _WRAPGTEQ(tail, index); index++) {
637 u32 alarm_detected;
638
639 notification = &arb->notification_queue.
640 notifications[(index+1) % size];
641 alarm_detected =
642 NV_ACCESS_ONCE(notification->notification);
643
644 if (!(enabled_mask & alarm_detected))
645 continue;
646
647 queue_index++;
648 dev->queue.notifications[
649 queue_index % dev->queue.size].timestamp =
650 NV_ACCESS_ONCE(notification->timestamp);
651
652 dev->queue.notifications[
653 queue_index % dev->queue.size].notification =
654 alarm_detected;
655
656 queue_alarm_mask |= alarm_detected;
657 }
658 } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
659 (int)tail));
660
661 nvgpu_atomic_set(&dev->queue.tail, queue_index);
662 /* update the last notification we processed from global queue */
663
664 dev->arb_queue_head = tail;
665
666 /* Check if current session targets are met */
667 if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
668 if ((target->gpc2clk < session->target->gpc2clk)
669 || (target->mclk < session->target->mclk)) {
670
671 poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI);
672 nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
673 EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
674 }
675 }
676
677 /* Check if there is a new VF update */
678 if (queue_alarm_mask & EVENT(VF_UPDATE))
679 poll_mask |= (NVGPU_POLLIN | NVGPU_POLLRDNORM);
680
681 /* Notify sticky alarms that were not reported on previous run*/
682 new_alarms_reported = (queue_alarm_mask |
683 (alarm & ~dev->alarms_reported & queue_alarm_mask));
684
685 if (new_alarms_reported & ~LOCAL_ALARM_MASK) {
686 /* check that we are not re-reporting */
687 if (new_alarms_reported & EVENT(ALARM_GPU_LOST))
688 poll_mask |= NVGPU_POLLHUP;
689
690 poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI);
691 /* On next run do not report global alarms that were already
692 * reported, but report SHUTDOWN always
693 */
694 dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
695 ~EVENT(ALARM_GPU_LOST);
696 }
697
698 if (poll_mask) {
699 nvgpu_atomic_set(&dev->poll_mask, poll_mask);
700 nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
701 }
702
703 return new_alarms_reported;
704}
705
706static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
707{
708 struct nvgpu_clk_arb *arb = g->clk_arb;
709
710 u64 current_mask;
711 u32 refcnt;
712 u32 alarm_mask;
713 u64 new_mask;
714
715 do {
716 current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
717 /* atomic operations are strong so they do not need masks */
718
719 refcnt = ((u32) (current_mask >> 32)) + 1;
720 alarm_mask = (u32) (current_mask & ~alarm);
721 new_mask = ((u64) refcnt << 32) | alarm_mask;
722
723 } while (unlikely(current_mask !=
724 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
725 current_mask, new_mask)));
726}
727
728static void nvgpu_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
729{
730 struct nvgpu_clk_session *session;
731 struct nvgpu_clk_dev *dev;
732 struct nvgpu_clk_dev *tmp;
733 struct nvgpu_clk_arb_target *target, *actual;
734 struct gk20a *g = arb->g;
735
736 u32 pstate = VF_POINT_INVALID_PSTATE;
737 u32 voltuv, voltuv_sram;
738 bool mclk_set, gpc2clk_set;
739 u32 nuvmin, nuvmin_sram;
740
741 u32 alarms_notified = 0;
742 u32 current_alarm;
743 int status = 0;
744
745 /* Temporary variables for checking target frequency */
746 u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target;
747 u16 gpc2clk_session_target, mclk_session_target;
748
749#ifdef CONFIG_DEBUG_FS
750 u64 t0, t1;
751 struct nvgpu_clk_arb_debug *debug;
752
753#endif
754
755 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
756
757 /* bail out if gpu is down */
758 if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST))
759 goto exit_arb;
760
761#ifdef CONFIG_DEBUG_FS
762 g->ops.bus.read_ptimer(g, &t0);
763#endif
764
765 /* Only one arbiter should be running */
766 gpc2clk_target = 0;
767 mclk_target = 0;
768
769 nvgpu_spinlock_acquire(&arb->sessions_lock);
770 nvgpu_list_for_each_entry(session, &arb->sessions,
771 nvgpu_clk_session, link) {
772 if (!session->zombie) {
773 mclk_set = false;
774 gpc2clk_set = false;
775 target = (session->target == &session->target_pool[0] ?
776 &session->target_pool[1] :
777 &session->target_pool[0]);
778 nvgpu_spinlock_acquire(&session->session_lock);
779 if (!nvgpu_list_empty(&session->targets)) {
780 /* Copy over state */
781 target->mclk = session->target->mclk;
782 target->gpc2clk = session->target->gpc2clk;
783 /* Query the latest committed request */
784 nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets,
785 nvgpu_clk_dev, node) {
786 if (!mclk_set && dev->mclk_target_mhz) {
787 target->mclk =
788 dev->mclk_target_mhz;
789 mclk_set = true;
790 }
791 if (!gpc2clk_set &&
792 dev->gpc2clk_target_mhz) {
793 target->gpc2clk =
794 dev->gpc2clk_target_mhz;
795 gpc2clk_set = true;
796 }
797 nvgpu_ref_get(&dev->refcount);
798 nvgpu_list_del(&dev->node);
799 nvgpu_spinlock_acquire(&arb->requests_lock);
800 nvgpu_list_add(&dev->node, &arb->requests);
801 nvgpu_spinlock_release(&arb->requests_lock);
802 }
803 xchg(&session->target, target);
804 }
805 nvgpu_spinlock_release(&session->session_lock);
806
807 mclk_target = mclk_target > session->target->mclk ?
808 mclk_target : session->target->mclk;
809
810 gpc2clk_target =
811 gpc2clk_target > session->target->gpc2clk ?
812 gpc2clk_target : session->target->gpc2clk;
813 }
814 }
815 nvgpu_spinlock_release(&arb->sessions_lock);
816
817 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
818 arb->gpc2clk_default_mhz;
819
820 if (gpc2clk_target < arb->gpc2clk_min)
821 gpc2clk_target = arb->gpc2clk_min;
822
823 if (gpc2clk_target > arb->gpc2clk_max)
824 gpc2clk_target = arb->gpc2clk_max;
825
826 mclk_target = (mclk_target > 0) ? mclk_target :
827 arb->mclk_default_mhz;
828
829 if (mclk_target < arb->mclk_min)
830 mclk_target = arb->mclk_min;
831
832 if (mclk_target > arb->mclk_max)
833 mclk_target = arb->mclk_max;
834
835 sys2clk_target = 0;
836 xbar2clk_target = 0;
837
838 gpc2clk_session_target = gpc2clk_target;
839 mclk_session_target = mclk_target;
840
841 /* Query the table for the closest vf point to program */
842 pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target,
843 &sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv,
844 &voltuv_sram, &nuvmin, &nuvmin_sram);
845
846 if (pstate == VF_POINT_INVALID_PSTATE) {
847 arb->status = -EINVAL;
848 /* make status visible */
849 nvgpu_smp_mb();
850 goto exit_arb;
851 }
852
853 if ((gpc2clk_target < gpc2clk_session_target) ||
854 (mclk_target < mclk_session_target))
855 nvgpu_clk_arb_set_global_alarm(g,
856 EVENT(ALARM_TARGET_VF_NOT_POSSIBLE));
857
858 if ((arb->actual->gpc2clk == gpc2clk_target) &&
859 (arb->actual->mclk == mclk_target) &&
860 (arb->voltuv_actual == voltuv)) {
861 goto exit_arb;
862 }
863
864 /* Program clocks */
865 /* A change in both mclk of gpc2clk may require a change in voltage */
866
867 nvgpu_mutex_acquire(&arb->pstate_lock);
868 status = nvgpu_lpwr_disable_pg(g, false);
869
870 status = clk_pmu_freq_controller_load(g, false,
871 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL);
872 if (status < 0) {
873 arb->status = status;
874 nvgpu_mutex_release(&arb->pstate_lock);
875
876 /* make status visible */
877 nvgpu_smp_mb();
878 goto exit_arb;
879 }
880 status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
881 if (status < 0) {
882 arb->status = status;
883 nvgpu_mutex_release(&arb->pstate_lock);
884
885 /* make status visible */
886 nvgpu_smp_mb();
887 goto exit_arb;
888 }
889
890 status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
891 sys2clk_target, xbar2clk_target, mclk_target, voltuv,
892 voltuv_sram);
893 if (status < 0) {
894 arb->status = status;
895 nvgpu_mutex_release(&arb->pstate_lock);
896
897 /* make status visible */
898 nvgpu_smp_mb();
899 goto exit_arb;
900 }
901
902 status = clk_pmu_freq_controller_load(g, true,
903 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL);
904 if (status < 0) {
905 arb->status = status;
906 nvgpu_mutex_release(&arb->pstate_lock);
907
908 /* make status visible */
909 nvgpu_smp_mb();
910 goto exit_arb;
911 }
912
913 status = nvgpu_lwpr_mclk_change(g, pstate);
914 if (status < 0) {
915 arb->status = status;
916 nvgpu_mutex_release(&arb->pstate_lock);
917
918 /* make status visible */
919 nvgpu_smp_mb();
920 goto exit_arb;
921 }
922
923 actual = NV_ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
924 &arb->actual_pool[1] : &arb->actual_pool[0];
925
926 /* do not reorder this pointer */
927 nvgpu_smp_rmb();
928 actual->gpc2clk = gpc2clk_target;
929 actual->mclk = mclk_target;
930 arb->voltuv_actual = voltuv;
931 actual->pstate = pstate;
932 arb->status = status;
933
934 /* Make changes visible to other threads */
935 nvgpu_smp_wmb();
936 xchg(&arb->actual, actual);
937
938 status = nvgpu_lpwr_enable_pg(g, false);
939 if (status < 0) {
940 arb->status = status;
941 nvgpu_mutex_release(&arb->pstate_lock);
942
943 /* make status visible */
944 nvgpu_smp_mb();
945 goto exit_arb;
946 }
947
948 /* status must be visible before atomic inc */
949 nvgpu_smp_wmb();
950 nvgpu_atomic_inc(&arb->req_nr);
951
952 /* Unlock pstate change for PG */
953 nvgpu_mutex_release(&arb->pstate_lock);
954
955 /* VF Update complete */
956 nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE));
957
958 nvgpu_cond_signal_interruptible(&arb->request_wq);
959
960#ifdef CONFIG_DEBUG_FS
961 g->ops.bus.read_ptimer(g, &t1);
962
963 debug = arb->debug == &arb->debug_pool[0] ?
964 &arb->debug_pool[1] : &arb->debug_pool[0];
965
966 memcpy(debug, arb->debug, sizeof(arb->debug_pool[0]));
967 debug->switch_num++;
968
969 if (debug->switch_num == 1) {
970 debug->switch_max = debug->switch_min =
971 debug->switch_avg = (t1-t0)/1000;
972 debug->switch_std = 0;
973 } else {
974 s64 prev_avg;
975 s64 curr = (t1-t0)/1000;
976
977 debug->switch_max = curr > debug->switch_max ?
978 curr : debug->switch_max;
979 debug->switch_min = debug->switch_min ?
980 (curr < debug->switch_min ?
981 curr : debug->switch_min) : curr;
982 prev_avg = debug->switch_avg;
983 debug->switch_avg = (curr +
984 (debug->switch_avg * (debug->switch_num-1))) /
985 debug->switch_num;
986 debug->switch_std +=
987 (curr - debug->switch_avg) * (curr - prev_avg);
988 }
989 /* commit changes before exchanging debug pointer */
990 nvgpu_smp_wmb();
991 xchg(&arb->debug, debug);
992#endif
993
994exit_arb:
995 if (status < 0) {
996 nvgpu_err(g, "Error in arbiter update");
997 nvgpu_clk_arb_set_global_alarm(g,
998 EVENT(ALARM_CLOCK_ARBITER_FAILED));
999 }
1000
1001 current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask);
1002 /* notify completion for all requests */
1003 nvgpu_spinlock_acquire(&arb->requests_lock);
1004 nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests,
1005 nvgpu_clk_dev, node) {
1006 nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM);
1007 nvgpu_cond_signal_interruptible(&dev->readout_wq);
1008 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1009 nvgpu_list_del(&dev->node);
1010 }
1011 nvgpu_spinlock_release(&arb->requests_lock);
1012
1013 nvgpu_atomic_set(&arb->notification_queue.head,
1014 nvgpu_atomic_read(&arb->notification_queue.tail));
1015 /* notify event for all users */
1016 nvgpu_spinlock_acquire(&arb->users_lock);
1017 nvgpu_list_for_each_entry(dev, &arb->users, nvgpu_clk_dev, link) {
1018 alarms_notified |=
1019 nvgpu_clk_arb_notify(dev, arb->actual, current_alarm);
1020 }
1021 nvgpu_spinlock_release(&arb->users_lock);
1022
1023 /* clear alarms */
1024 nvgpu_clk_arb_clear_global_alarm(g, alarms_notified &
1025 ~EVENT(ALARM_GPU_LOST));
1026}
1027
1028/*
1029 * Process one scheduled work item.
1030 */
1031static void nvgpu_clk_arb_worker_process_item(
1032 struct nvgpu_clk_arb_work_item *work_item)
1033{
1034 nvgpu_log(work_item->arb->g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1035
1036 if (work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE)
1037 nvgpu_clk_arb_run_vf_table_cb(work_item->arb);
1038 else if (work_item->item_type == CLK_ARB_WORK_UPDATE_ARB)
1039 nvgpu_clk_arb_run_arbiter_cb(work_item->arb);
1040}
1041
1042/**
1043 * Tell the worker that one more work needs to be done.
1044 *
1045 * Increase the work counter to synchronize the worker with the new work. Wake
1046 * up the worker. If the worker was already running, it will handle this work
1047 * before going to sleep.
1048 */
1049static int nvgpu_clk_arb_worker_wakeup(struct gk20a *g)
1050{
1051 int put;
1052
1053 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1054
1055 put = nvgpu_atomic_inc_return(&g->clk_arb_worker.put);
1056 nvgpu_cond_signal_interruptible(&g->clk_arb_worker.wq);
1057
1058 return put;
1059}
1060
1061/**
1062 * Test if there is some work pending.
1063 *
1064 * This is a pair for nvgpu_clk_arb_worker_wakeup to be called from the
1065 * worker. The worker has an internal work counter which is incremented once
1066 * per finished work item. This is compared with the number of queued jobs.
1067 */
1068static bool nvgpu_clk_arb_worker_pending(struct gk20a *g, int get)
1069{
1070 bool pending = nvgpu_atomic_read(&g->clk_arb_worker.put) != get;
1071
1072 /* We don't need barriers because they are implicit in locking */
1073 return pending;
1074}
1075
1076/**
1077 * Process the queued works for the worker thread serially.
1078 *
1079 * Flush all the work items in the queue one by one. This may block timeout
1080 * handling for a short while, as these are serialized.
1081 */
1082static void nvgpu_clk_arb_worker_process(struct gk20a *g, int *get)
1083{
1084
1085 while (nvgpu_clk_arb_worker_pending(g, *get)) {
1086 struct nvgpu_clk_arb_work_item *work_item = NULL;
1087
1088 nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
1089 if (!nvgpu_list_empty(&g->clk_arb_worker.items)) {
1090 work_item = nvgpu_list_first_entry(&g->clk_arb_worker.items,
1091 nvgpu_clk_arb_work_item, worker_item);
1092 nvgpu_list_del(&work_item->worker_item);
1093 }
1094 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
1095
1096 if (!work_item) {
1097 /*
1098 * Woke up for some other reason, but there are no
1099 * other reasons than a work item added in the items list
1100 * currently, so warn and ack the message.
1101 */
1102 nvgpu_warn(g, "Spurious worker event!");
1103 ++*get;
1104 break;
1105 }
1106
1107 nvgpu_clk_arb_worker_process_item(work_item);
1108 ++*get;
1109 }
1110}
1111
1112/*
1113 * Process all work items found in the clk arbiter work queue.
1114 */
1115static int nvgpu_clk_arb_poll_worker(void *arg)
1116{
1117 struct gk20a *g = (struct gk20a *)arg;
1118 struct gk20a_worker *worker = &g->clk_arb_worker;
1119 int get = 0;
1120
1121 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1122
1123 while (!nvgpu_thread_should_stop(&worker->poll_task)) {
1124 int ret;
1125
1126 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
1127 &worker->wq,
1128 nvgpu_clk_arb_worker_pending(g, get), 0);
1129
1130 if (ret == 0)
1131 nvgpu_clk_arb_worker_process(g, &get);
1132 }
1133 return 0;
1134}
1135
1136static int __nvgpu_clk_arb_worker_start(struct gk20a *g)
1137{
1138 char thread_name[64];
1139 int err = 0;
1140
1141 if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task))
1142 return err;
1143
1144 nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
1145
1146 /*
1147 * Mutexes have implicit barriers, so there is no risk of a thread
1148 * having a stale copy of the poll_task variable as the call to
1149 * thread_is_running is volatile
1150 */
1151
1152 if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task)) {
1153 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1154 return err;
1155 }
1156
1157 snprintf(thread_name, sizeof(thread_name),
1158 "nvgpu_clk_arb_poll_%s", g->name);
1159
1160 err = nvgpu_thread_create(&g->clk_arb_worker.poll_task, g,
1161 nvgpu_clk_arb_poll_worker, thread_name);
1162
1163 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1164 return err;
1165}
1166
1167/**
1168 * Append a work item to the worker's list.
1169 *
1170 * This adds work item to the end of the list and wakes the worker
1171 * up immediately. If the work item already existed in the list, it's not added,
1172 * because in that case it has been scheduled already but has not yet been
1173 * processed.
1174 */
1175void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
1176 struct nvgpu_clk_arb_work_item *work_item)
1177{
1178 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1179
1180 /*
1181 * Warn if worker thread cannot run
1182 */
1183 if (WARN_ON(__nvgpu_clk_arb_worker_start(g))) {
1184 nvgpu_warn(g, "clk arb worker cannot run!");
1185 return;
1186 }
1187
1188 nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
1189 if (!nvgpu_list_empty(&work_item->worker_item)) {
1190 /*
1191 * Already queued, so will get processed eventually.
1192 * The worker is probably awake already.
1193 */
1194 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
1195 return;
1196 }
1197 nvgpu_list_add_tail(&work_item->worker_item, &g->clk_arb_worker.items);
1198 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
1199
1200 nvgpu_clk_arb_worker_wakeup(g);
1201}
1202
1203/**
1204 * Initialize the clk arb worker's metadata and start the background thread.
1205 */
1206static int nvgpu_clk_arb_worker_init(struct gk20a *g)
1207{
1208 int err;
1209
1210 nvgpu_atomic_set(&g->clk_arb_worker.put, 0);
1211 nvgpu_cond_init(&g->clk_arb_worker.wq);
1212 nvgpu_init_list_node(&g->clk_arb_worker.items);
1213 nvgpu_spinlock_init(&g->clk_arb_worker.items_lock);
1214 err = nvgpu_mutex_init(&g->clk_arb_worker.start_lock);
1215 if (err)
1216 goto error_check;
1217
1218 err = __nvgpu_clk_arb_worker_start(g);
1219error_check:
1220 if (err) {
1221 nvgpu_err(g, "failed to start clk arb poller thread");
1222 return err;
1223 }
1224 return 0;
1225}
1226
1227int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1228{
1229 struct nvgpu_clk_arb *arb;
1230 u16 default_mhz;
1231 int err;
1232 int index;
1233 struct nvgpu_clk_vf_table *table;
1234
1235 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1236
1237 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1238 return 0;
1239
1240 arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
1241 if (!arb)
1242 return -ENOMEM;
1243
1244 err = nvgpu_mutex_init(&arb->pstate_lock);
1245 if (err)
1246 goto mutex_fail;
1247 nvgpu_spinlock_init(&arb->sessions_lock);
1248 nvgpu_spinlock_init(&arb->users_lock);
1249 nvgpu_spinlock_init(&arb->requests_lock);
1250
1251 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1252 if (!arb->mclk_f_points) {
1253 err = -ENOMEM;
1254 goto init_fail;
1255 }
1256
1257 arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1258 if (!arb->gpc2clk_f_points) {
1259 err = -ENOMEM;
1260 goto init_fail;
1261 }
1262
1263 for (index = 0; index < 2; index++) {
1264 table = &arb->vf_table_pool[index];
1265 table->gpc2clk_num_points = MAX_F_POINTS;
1266 table->mclk_num_points = MAX_F_POINTS;
1267
1268 table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1269 sizeof(struct nvgpu_clk_vf_point));
1270 if (!table->gpc2clk_points) {
1271 err = -ENOMEM;
1272 goto init_fail;
1273 }
1274
1275
1276 table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1277 sizeof(struct nvgpu_clk_vf_point));
1278 if (!table->mclk_points) {
1279 err = -ENOMEM;
1280 goto init_fail;
1281 }
1282 }
1283
1284 g->clk_arb = arb;
1285 arb->g = g;
1286
1287 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1288 CTRL_CLK_DOMAIN_MCLK, &default_mhz);
1289 if (err < 0) {
1290 err = -EINVAL;
1291 goto init_fail;
1292 }
1293
1294 arb->mclk_default_mhz = default_mhz;
1295
1296 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1297 CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
1298 if (err < 0) {
1299 err = -EINVAL;
1300 goto init_fail;
1301 }
1302
1303 arb->gpc2clk_default_mhz = default_mhz;
1304
1305 arb->actual = &arb->actual_pool[0];
1306
1307 nvgpu_atomic_set(&arb->req_nr, 0);
1308
1309 nvgpu_atomic64_set(&arb->alarm_mask, 0);
1310 err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
1311 DEFAULT_EVENT_NUMBER);
1312 if (err < 0)
1313 goto init_fail;
1314
1315 nvgpu_init_list_node(&arb->users);
1316 nvgpu_init_list_node(&arb->sessions);
1317 nvgpu_init_list_node(&arb->requests);
1318
1319 nvgpu_cond_init(&arb->request_wq);
1320
1321 nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item);
1322 nvgpu_init_list_node(&arb->update_arb_work_item.worker_item);
1323 arb->update_vf_table_work_item.arb = arb;
1324 arb->update_arb_work_item.arb = arb;
1325 arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE;
1326 arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB;
1327
1328 err = nvgpu_clk_arb_worker_init(g);
1329 if (err < 0)
1330 goto init_fail;
1331
1332#ifdef CONFIG_DEBUG_FS
1333 arb->debug = &arb->debug_pool[0];
1334
1335 if (!arb->debugfs_set) {
1336 if (nvgpu_clk_arb_debugfs_init(g))
1337 arb->debugfs_set = true;
1338 }
1339#endif
1340 err = clk_vf_point_cache(g);
1341 if (err < 0)
1342 goto init_fail;
1343
1344 err = nvgpu_clk_arb_update_vf_table(arb);
1345 if (err < 0)
1346 goto init_fail;
1347 do {
1348 /* Check that first run is completed */
1349 nvgpu_smp_mb();
1350 NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
1351 nvgpu_atomic_read(&arb->req_nr), 0);
1352 } while (!nvgpu_atomic_read(&arb->req_nr));
1353
1354
1355 return arb->status;
1356
1357init_fail:
1358 nvgpu_kfree(g, arb->gpc2clk_f_points);
1359 nvgpu_kfree(g, arb->mclk_f_points);
1360
1361 for (index = 0; index < 2; index++) {
1362 nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
1363 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1364 }
1365
1366 nvgpu_mutex_destroy(&arb->pstate_lock);
1367
1368mutex_fail:
1369 nvgpu_kfree(g, arb);
1370
1371 return err;
1372}
1373
1374void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
1375{
1376 nvgpu_clk_arb_schedule_alarm(g,
1377 (0x1UL << NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
1378}
1379
1380void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
1381{
1382 struct nvgpu_clk_arb *arb = g->clk_arb;
1383
1384 nvgpu_clk_arb_set_global_alarm(g, alarm);
1385 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
1386}
1387
1388static void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
1389{
1390 nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
1391 nvgpu_thread_stop(&g->clk_arb_worker.poll_task);
1392 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1393}
1394
1395void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
1396{
1397 struct nvgpu_clk_arb *arb = g->clk_arb;
1398 int index;
1399
1400 if (arb) {
1401 nvgpu_clk_arb_worker_deinit(g);
1402
1403 nvgpu_kfree(g, arb->gpc2clk_f_points);
1404 nvgpu_kfree(g, arb->mclk_f_points);
1405
1406 for (index = 0; index < 2; index++) {
1407 nvgpu_kfree(g,
1408 arb->vf_table_pool[index].gpc2clk_points);
1409 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1410 }
1411 nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
1412 nvgpu_kfree(g, g->clk_arb);
1413 g->clk_arb = NULL;
1414 }
1415}
1416
1417int nvgpu_clk_arb_init_session(struct gk20a *g,
1418 struct nvgpu_clk_session **_session)
1419{
1420 struct nvgpu_clk_arb *arb = g->clk_arb;
1421 struct nvgpu_clk_session *session = *(_session);
1422
1423 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1424
1425 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1426 return 0;
1427
1428 session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
1429 if (!session)
1430 return -ENOMEM;
1431 session->g = g;
1432
1433 nvgpu_ref_init(&session->refcount);
1434
1435 session->zombie = false;
1436 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
1437 /* make sure that the initialization of the pool is visible
1438 * before the update
1439 */
1440 nvgpu_smp_wmb();
1441 session->target = &session->target_pool[0];
1442
1443 nvgpu_init_list_node(&session->targets);
1444 nvgpu_spinlock_init(&session->session_lock);
1445
1446 nvgpu_spinlock_acquire(&arb->sessions_lock);
1447 nvgpu_list_add_tail(&session->link, &arb->sessions);
1448 nvgpu_spinlock_release(&arb->sessions_lock);
1449
1450 *_session = session;
1451
1452 return 0;
1453}
1454
1455void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
1456{
1457 struct nvgpu_clk_dev *dev = container_of(refcount,
1458 struct nvgpu_clk_dev, refcount);
1459 struct nvgpu_clk_session *session = dev->session;
1460
1461 nvgpu_kfree(session->g, dev);
1462}
1463
1464void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
1465{
1466 struct nvgpu_clk_session *session = container_of(refcount,
1467 struct nvgpu_clk_session, refcount);
1468 struct nvgpu_clk_arb *arb = session->g->clk_arb;
1469 struct gk20a *g = session->g;
1470 struct nvgpu_clk_dev *dev, *tmp;
1471
1472 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1473
1474 if (arb) {
1475 nvgpu_spinlock_acquire(&arb->sessions_lock);
1476 nvgpu_list_del(&session->link);
1477 nvgpu_spinlock_release(&arb->sessions_lock);
1478 }
1479
1480 nvgpu_spinlock_acquire(&session->session_lock);
1481 nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets,
1482 nvgpu_clk_dev, node) {
1483 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1484 nvgpu_list_del(&dev->node);
1485 }
1486 nvgpu_spinlock_release(&session->session_lock);
1487
1488 nvgpu_kfree(g, session);
1489}
1490
1491void nvgpu_clk_arb_release_session(struct gk20a *g,
1492 struct nvgpu_clk_session *session)
1493{
1494 struct nvgpu_clk_arb *arb = g->clk_arb;
1495
1496 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1497
1498 session->zombie = true;
1499 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1500 if (arb)
1501 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
1502}
1503
1504void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1505{
1506 struct nvgpu_clk_arb *arb = g->clk_arb;
1507
1508 nvgpu_clk_arb_worker_enqueue(g, &arb->update_vf_table_work_item);
1509}
1510
1511/* This function is inherently unsafe to call while arbiter is running
1512 * arbiter must be blocked before calling this function
1513 */
1514int nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
1515{
1516 return NV_ACCESS_ONCE(g->clk_arb->actual->pstate);
1517}
1518
1519void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
1520{
1521 struct nvgpu_clk_arb *arb = g->clk_arb;
1522
1523 if (lock)
1524 nvgpu_mutex_acquire(&arb->pstate_lock);
1525 else
1526 nvgpu_mutex_release(&arb->pstate_lock);
1527}