aboutsummaryrefslogtreecommitdiffstats
path: root/include/clk/clk_arb.c
diff options
context:
space:
mode:
Diffstat (limited to 'include/clk/clk_arb.c')
-rw-r--r--include/clk/clk_arb.c1087
1 files changed, 1087 insertions, 0 deletions
diff --git a/include/clk/clk_arb.c b/include/clk/clk_arb.c
new file mode 100644
index 0000000..6cf005c
--- /dev/null
+++ b/include/clk/clk_arb.c
@@ -0,0 +1,1087 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/bitops.h>
24#include <nvgpu/lock.h>
25#include <nvgpu/kmem.h>
26#include <nvgpu/atomic.h>
27#include <nvgpu/bug.h>
28#include <nvgpu/kref.h>
29#include <nvgpu/log.h>
30#include <nvgpu/barrier.h>
31#include <nvgpu/cond.h>
32#include <nvgpu/list.h>
33#include <nvgpu/clk_arb.h>
34#include <nvgpu/timers.h>
35#include <nvgpu/gk20a.h>
36
37#include "clk/clk.h"
38#include "pstate/pstate.h"
39#include "lpwr/lpwr.h"
40#include "volt/volt.h"
41
42int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
43 struct nvgpu_clk_notification_queue *queue,
44 size_t events_number) {
45 queue->notifications = nvgpu_kcalloc(g, events_number,
46 sizeof(struct nvgpu_clk_notification));
47 if (!queue->notifications)
48 return -ENOMEM;
49 queue->size = events_number;
50
51 nvgpu_atomic_set(&queue->head, 0);
52 nvgpu_atomic_set(&queue->tail, 0);
53
54 return 0;
55}
56
57void nvgpu_clk_notification_queue_free(struct gk20a *g,
58 struct nvgpu_clk_notification_queue *queue) {
59 if (queue->size > 0) {
60 nvgpu_kfree(g, queue->notifications);
61 queue->size = 0;
62 nvgpu_atomic_set(&queue->head, 0);
63 nvgpu_atomic_set(&queue->tail, 0);
64 }
65}
66
67static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
68 struct nvgpu_clk_notification_queue *queue,
69 u32 alarm_mask) {
70
71 u32 queue_index;
72 u64 timestamp;
73
74 queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
75 /* get current timestamp */
76 timestamp = (u64) nvgpu_hr_timestamp();
77
78 queue->notifications[queue_index].timestamp = timestamp;
79 queue->notifications[queue_index].notification = alarm_mask;
80
81}
82
83void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
84{
85 struct nvgpu_clk_arb *arb = g->clk_arb;
86
87 u64 current_mask;
88 u32 refcnt;
89 u32 alarm_mask;
90 u64 new_mask;
91
92 do {
93 current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
94 /* atomic operations are strong so they do not need masks */
95
96 refcnt = ((u32) (current_mask >> 32)) + 1;
97 alarm_mask = (u32) (current_mask & ~0) | alarm;
98 new_mask = ((u64) refcnt << 32) | alarm_mask;
99
100 } while (unlikely(current_mask !=
101 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
102 current_mask, new_mask)));
103
104 nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);
105}
106
107
108int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
109{
110 struct gk20a *g = arb->g;
111 struct nvgpu_clk_vf_table *table;
112
113 u32 i, j;
114 int status = -EINVAL;
115 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
116 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
117 u16 clk_cur;
118 u32 num_points;
119
120 struct clk_set_info *p5_info, *p0_info;
121
122 table = NV_ACCESS_ONCE(arb->current_vf_table);
123 /* make flag visible when all data has resolved in the tables */
124 nvgpu_smp_rmb();
125
126 table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
127 &arb->vf_table_pool[0];
128
129 /* Get allowed memory ranges */
130 if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK,
131 &arb->gpc2clk_min,
132 &arb->gpc2clk_max) < 0) {
133 nvgpu_err(g, "failed to fetch GPC2CLK range");
134 goto exit_vf_table;
135 }
136 if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_MCLK,
137 &arb->mclk_min,
138 &arb->mclk_max) < 0) {
139 nvgpu_err(g, "failed to fetch MCLK range");
140 goto exit_vf_table;
141 }
142
143 table->gpc2clk_num_points = MAX_F_POINTS;
144 table->mclk_num_points = MAX_F_POINTS;
145
146 if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPC2CLK,
147 &table->gpc2clk_num_points, arb->gpc2clk_f_points)) {
148 nvgpu_err(g, "failed to fetch GPC2CLK frequency points");
149 goto exit_vf_table;
150 }
151
152 if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_MCLK,
153 &table->mclk_num_points, arb->mclk_f_points)) {
154 nvgpu_err(g, "failed to fetch MCLK frequency points");
155 goto exit_vf_table;
156 }
157 if (!table->mclk_num_points || !table->gpc2clk_num_points) {
158 nvgpu_err(g, "empty queries to f points mclk %d gpc2clk %d",
159 table->mclk_num_points, table->gpc2clk_num_points);
160 status = -EINVAL;
161 goto exit_vf_table;
162 }
163
164 memset(table->mclk_points, 0,
165 table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point));
166 memset(table->gpc2clk_points, 0,
167 table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
168
169 p5_info = pstate_get_clk_set_info(g,
170 CTRL_PERF_PSTATE_P5, clkwhich_mclk);
171 if (!p5_info) {
172 nvgpu_err(g, "failed to get MCLK P5 info");
173 goto exit_vf_table;
174 }
175 p0_info = pstate_get_clk_set_info(g,
176 CTRL_PERF_PSTATE_P0, clkwhich_mclk);
177 if (!p0_info) {
178 nvgpu_err(g, "failed to get MCLK P0 info");
179 goto exit_vf_table;
180 }
181
182 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
183 i < table->mclk_num_points; i++) {
184
185 if ((arb->mclk_f_points[i] >= arb->mclk_min) &&
186 (arb->mclk_f_points[i] <= arb->mclk_max) &&
187 (arb->mclk_f_points[i] != clk_cur)) {
188
189 table->mclk_points[j].mem_mhz = arb->mclk_f_points[i];
190 mclk_voltuv = mclk_voltuv_sram = 0;
191
192 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
193 &table->mclk_points[j].mem_mhz, &mclk_voltuv,
194 CTRL_VOLT_DOMAIN_LOGIC);
195 if (status < 0) {
196 nvgpu_err(g,
197 "failed to get MCLK LOGIC voltage");
198 goto exit_vf_table;
199 }
200 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
201 &table->mclk_points[j].mem_mhz,
202 &mclk_voltuv_sram,
203 CTRL_VOLT_DOMAIN_SRAM);
204 if (status < 0) {
205 nvgpu_err(g, "failed to get MCLK SRAM voltage");
206 goto exit_vf_table;
207 }
208
209 table->mclk_points[j].uvolt = mclk_voltuv;
210 table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
211 clk_cur = table->mclk_points[j].mem_mhz;
212
213 if ((clk_cur >= p5_info->min_mhz) &&
214 (clk_cur <= p5_info->max_mhz))
215 VF_POINT_SET_PSTATE_SUPPORTED(
216 &table->mclk_points[j],
217 CTRL_PERF_PSTATE_P5);
218 if ((clk_cur >= p0_info->min_mhz) &&
219 (clk_cur <= p0_info->max_mhz))
220 VF_POINT_SET_PSTATE_SUPPORTED(
221 &table->mclk_points[j],
222 CTRL_PERF_PSTATE_P0);
223
224 j++;
225 num_points++;
226
227 }
228 }
229 table->mclk_num_points = num_points;
230
231 p5_info = pstate_get_clk_set_info(g,
232 CTRL_PERF_PSTATE_P5, clkwhich_gpc2clk);
233 if (!p5_info) {
234 status = -EINVAL;
235 nvgpu_err(g, "failed to get GPC2CLK P5 info");
236 goto exit_vf_table;
237 }
238
239 p0_info = pstate_get_clk_set_info(g,
240 CTRL_PERF_PSTATE_P0, clkwhich_gpc2clk);
241 if (!p0_info) {
242 status = -EINVAL;
243 nvgpu_err(g, "failed to get GPC2CLK P0 info");
244 goto exit_vf_table;
245 }
246
247 /* GPC2CLK needs to be checked in two passes. The first determines the
248 * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the
249 * second verifies that the clocks minimum is satisfied and sets
250 * the voltages
251 */
252 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
253 i < table->gpc2clk_num_points; i++) {
254 struct set_fll_clk setfllclk;
255
256 if ((arb->gpc2clk_f_points[i] >= arb->gpc2clk_min) &&
257 (arb->gpc2clk_f_points[i] <= arb->gpc2clk_max) &&
258 (arb->gpc2clk_f_points[i] != clk_cur)) {
259
260 table->gpc2clk_points[j].gpc_mhz =
261 arb->gpc2clk_f_points[i];
262 setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i];
263 status = clk_get_fll_clks(g, &setfllclk);
264 if (status < 0) {
265 nvgpu_err(g,
266 "failed to get GPC2CLK slave clocks");
267 goto exit_vf_table;
268 }
269
270 table->gpc2clk_points[j].sys_mhz =
271 setfllclk.sys2clkmhz;
272 table->gpc2clk_points[j].xbar_mhz =
273 setfllclk.xbar2clkmhz;
274
275 clk_cur = table->gpc2clk_points[j].gpc_mhz;
276
277 if ((clk_cur >= p5_info->min_mhz) &&
278 (clk_cur <= p5_info->max_mhz))
279 VF_POINT_SET_PSTATE_SUPPORTED(
280 &table->gpc2clk_points[j],
281 CTRL_PERF_PSTATE_P5);
282 if ((clk_cur >= p0_info->min_mhz) &&
283 (clk_cur <= p0_info->max_mhz))
284 VF_POINT_SET_PSTATE_SUPPORTED(
285 &table->gpc2clk_points[j],
286 CTRL_PERF_PSTATE_P0);
287
288 j++;
289 num_points++;
290 }
291 }
292 table->gpc2clk_num_points = num_points;
293
294 /* Second pass */
295 for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) {
296
297 u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz;
298
299 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
300
301 /* Check sysclk */
302 p5_info = pstate_get_clk_set_info(g,
303 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
304 clkwhich_sys2clk);
305 if (!p5_info) {
306 status = -EINVAL;
307 nvgpu_err(g, "failed to get SYS2CLK P5 info");
308 goto exit_vf_table;
309 }
310
311 /* sys2clk below clk min, need to find correct clock */
312 if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) {
313 for (j = i + 1; j < table->gpc2clk_num_points; j++) {
314
315 if (table->gpc2clk_points[j].sys_mhz >=
316 p5_info->min_mhz) {
317
318
319 table->gpc2clk_points[i].sys_mhz =
320 p5_info->min_mhz;
321
322 alt_gpc2clk = alt_gpc2clk <
323 table->gpc2clk_points[j].
324 gpc_mhz ?
325 table->gpc2clk_points[j].
326 gpc_mhz :
327 alt_gpc2clk;
328 break;
329 }
330 }
331 /* no VF exists that satisfies condition */
332 if (j == table->gpc2clk_num_points) {
333 nvgpu_err(g, "NO SYS2CLK VF point possible");
334 status = -EINVAL;
335 goto exit_vf_table;
336 }
337 }
338
339 /* Check xbarclk */
340 p5_info = pstate_get_clk_set_info(g,
341 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
342 clkwhich_xbar2clk);
343 if (!p5_info) {
344 status = -EINVAL;
345 nvgpu_err(g, "failed to get SYS2CLK P5 info");
346 goto exit_vf_table;
347 }
348
349 /* xbar2clk below clk min, need to find correct clock */
350 if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) {
351 for (j = i; j < table->gpc2clk_num_points; j++) {
352 if (table->gpc2clk_points[j].xbar_mhz >=
353 p5_info->min_mhz) {
354
355 table->gpc2clk_points[i].xbar_mhz =
356 p5_info->min_mhz;
357
358 alt_gpc2clk = alt_gpc2clk <
359 table->gpc2clk_points[j].
360 gpc_mhz ?
361 table->gpc2clk_points[j].
362 gpc_mhz :
363 alt_gpc2clk;
364 break;
365 }
366 }
367 /* no VF exists that satisfies condition */
368 if (j == table->gpc2clk_num_points) {
369 status = -EINVAL;
370 nvgpu_err(g, "NO XBAR2CLK VF point possible");
371
372 goto exit_vf_table;
373 }
374 }
375
376 /* Calculate voltages */
377 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
378 &alt_gpc2clk, &gpc2clk_voltuv,
379 CTRL_VOLT_DOMAIN_LOGIC);
380 if (status < 0) {
381 nvgpu_err(g, "failed to get GPC2CLK LOGIC voltage");
382 goto exit_vf_table;
383 }
384
385 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
386 &alt_gpc2clk,
387 &gpc2clk_voltuv_sram,
388 CTRL_VOLT_DOMAIN_SRAM);
389 if (status < 0) {
390 nvgpu_err(g, "failed to get GPC2CLK SRAM voltage");
391 goto exit_vf_table;
392 }
393
394 table->gpc2clk_points[i].uvolt = gpc2clk_voltuv;
395 table->gpc2clk_points[i].uvolt_sram = gpc2clk_voltuv_sram;
396 }
397
398 /* make table visible when all data has resolved in the tables */
399 nvgpu_smp_wmb();
400 arb->current_vf_table = table;
401
402exit_vf_table:
403
404 if (status < 0)
405 nvgpu_clk_arb_set_global_alarm(g,
406 EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
407 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
408
409 return status;
410}
411
412
413static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb)
414{
415 struct gk20a *g = arb->g;
416 u32 err;
417
418 /* get latest vf curve from pmu */
419 err = clk_vf_point_cache(g);
420 if (err) {
421 nvgpu_err(g, "failed to cache VF table");
422 nvgpu_clk_arb_set_global_alarm(g,
423 EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
424 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
425
426 return;
427 }
428 nvgpu_clk_arb_update_vf_table(arb);
429}
430
431u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
432 struct nvgpu_clk_arb_target *target,
433 u32 alarm) {
434
435 struct nvgpu_clk_session *session = dev->session;
436 struct nvgpu_clk_arb *arb = session->g->clk_arb;
437 struct nvgpu_clk_notification *notification;
438
439 u32 queue_alarm_mask = 0;
440 u32 enabled_mask = 0;
441 u32 new_alarms_reported = 0;
442 u32 poll_mask = 0;
443 u32 tail, head;
444 u32 queue_index;
445 size_t size;
446 int index;
447
448 enabled_mask = nvgpu_atomic_read(&dev->enabled_mask);
449 size = arb->notification_queue.size;
450
451 /* queue global arbiter notifications in buffer */
452 do {
453 tail = nvgpu_atomic_read(&arb->notification_queue.tail);
454 /* copy items to the queue */
455 queue_index = nvgpu_atomic_read(&dev->queue.tail);
456 head = dev->arb_queue_head;
457 head = (tail - head) < arb->notification_queue.size ?
458 head : tail - arb->notification_queue.size;
459
460 for (index = head; _WRAPGTEQ(tail, index); index++) {
461 u32 alarm_detected;
462
463 notification = &arb->notification_queue.
464 notifications[(index+1) % size];
465 alarm_detected =
466 NV_ACCESS_ONCE(notification->notification);
467
468 if (!(enabled_mask & alarm_detected))
469 continue;
470
471 queue_index++;
472 dev->queue.notifications[
473 queue_index % dev->queue.size].timestamp =
474 NV_ACCESS_ONCE(notification->timestamp);
475
476 dev->queue.notifications[
477 queue_index % dev->queue.size].notification =
478 alarm_detected;
479
480 queue_alarm_mask |= alarm_detected;
481 }
482 } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
483 (int)tail));
484
485 nvgpu_atomic_set(&dev->queue.tail, queue_index);
486 /* update the last notification we processed from global queue */
487
488 dev->arb_queue_head = tail;
489
490 /* Check if current session targets are met */
491 if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
492 if ((target->gpc2clk < session->target->gpc2clk)
493 || (target->mclk < session->target->mclk)) {
494
495 poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI);
496 nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
497 EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
498 }
499 }
500
501 /* Check if there is a new VF update */
502 if (queue_alarm_mask & EVENT(VF_UPDATE))
503 poll_mask |= (NVGPU_POLLIN | NVGPU_POLLRDNORM);
504
505 /* Notify sticky alarms that were not reported on previous run*/
506 new_alarms_reported = (queue_alarm_mask |
507 (alarm & ~dev->alarms_reported & queue_alarm_mask));
508
509 if (new_alarms_reported & ~LOCAL_ALARM_MASK) {
510 /* check that we are not re-reporting */
511 if (new_alarms_reported & EVENT(ALARM_GPU_LOST))
512 poll_mask |= NVGPU_POLLHUP;
513
514 poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI);
515 /* On next run do not report global alarms that were already
516 * reported, but report SHUTDOWN always
517 */
518 dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
519 ~EVENT(ALARM_GPU_LOST);
520 }
521
522 if (poll_mask) {
523 nvgpu_atomic_set(&dev->poll_mask, poll_mask);
524 nvgpu_clk_arb_event_post_event(dev);
525 }
526
527 return new_alarms_reported;
528}
529
530void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
531{
532 struct nvgpu_clk_arb *arb = g->clk_arb;
533
534 u64 current_mask;
535 u32 refcnt;
536 u32 alarm_mask;
537 u64 new_mask;
538
539 do {
540 current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
541 /* atomic operations are strong so they do not need masks */
542
543 refcnt = ((u32) (current_mask >> 32)) + 1;
544 alarm_mask = (u32) (current_mask & ~alarm);
545 new_mask = ((u64) refcnt << 32) | alarm_mask;
546
547 } while (unlikely(current_mask !=
548 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
549 current_mask, new_mask)));
550}
551
552/*
553 * Process one scheduled work item.
554 */
555static void nvgpu_clk_arb_worker_process_item(
556 struct nvgpu_clk_arb_work_item *work_item)
557{
558 struct gk20a *g = work_item->arb->g;
559
560 clk_arb_dbg(g, " ");
561
562 if (work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE)
563 nvgpu_clk_arb_run_vf_table_cb(work_item->arb);
564 else if (work_item->item_type == CLK_ARB_WORK_UPDATE_ARB)
565 g->ops.clk_arb.clk_arb_run_arbiter_cb(work_item->arb);
566}
567
568/**
569 * Tell the worker that one more work needs to be done.
570 *
571 * Increase the work counter to synchronize the worker with the new work. Wake
572 * up the worker. If the worker was already running, it will handle this work
573 * before going to sleep.
574 */
575static int nvgpu_clk_arb_worker_wakeup(struct gk20a *g)
576{
577 int put;
578
579 clk_arb_dbg(g, " ");
580
581 put = nvgpu_atomic_inc_return(&g->clk_arb_worker.put);
582 nvgpu_cond_signal_interruptible(&g->clk_arb_worker.wq);
583
584 return put;
585}
586
587/**
588 * Test if there is some work pending.
589 *
590 * This is a pair for nvgpu_clk_arb_worker_wakeup to be called from the
591 * worker. The worker has an internal work counter which is incremented once
592 * per finished work item. This is compared with the number of queued jobs.
593 */
594static bool nvgpu_clk_arb_worker_pending(struct gk20a *g, int get)
595{
596 bool pending = nvgpu_atomic_read(&g->clk_arb_worker.put) != get;
597
598 /* We don't need barriers because they are implicit in locking */
599 return pending;
600}
601
602/**
603 * Process the queued works for the worker thread serially.
604 *
605 * Flush all the work items in the queue one by one. This may block timeout
606 * handling for a short while, as these are serialized.
607 */
608static void nvgpu_clk_arb_worker_process(struct gk20a *g, int *get)
609{
610
611 while (nvgpu_clk_arb_worker_pending(g, *get)) {
612 struct nvgpu_clk_arb_work_item *work_item = NULL;
613
614 nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
615 if (!nvgpu_list_empty(&g->clk_arb_worker.items)) {
616 work_item = nvgpu_list_first_entry(&g->clk_arb_worker.items,
617 nvgpu_clk_arb_work_item, worker_item);
618 nvgpu_list_del(&work_item->worker_item);
619 }
620 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
621
622 if (!work_item) {
623 /*
624 * Woke up for some other reason, but there are no
625 * other reasons than a work item added in the items list
626 * currently, so warn and ack the message.
627 */
628 nvgpu_warn(g, "Spurious worker event!");
629 ++*get;
630 break;
631 }
632
633 nvgpu_clk_arb_worker_process_item(work_item);
634 ++*get;
635 }
636}
637
638/*
639 * Process all work items found in the clk arbiter work queue.
640 */
641static int nvgpu_clk_arb_poll_worker(void *arg)
642{
643 struct gk20a *g = (struct gk20a *)arg;
644 struct gk20a_worker *worker = &g->clk_arb_worker;
645 int get = 0;
646
647 clk_arb_dbg(g, " ");
648
649 while (!nvgpu_thread_should_stop(&worker->poll_task)) {
650 int ret;
651
652 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
653 &worker->wq,
654 nvgpu_clk_arb_worker_pending(g, get), 0);
655
656 if (nvgpu_thread_should_stop(&worker->poll_task)) {
657 break;
658 }
659
660 if (ret == 0)
661 nvgpu_clk_arb_worker_process(g, &get);
662 }
663 return 0;
664}
665
666static int __nvgpu_clk_arb_worker_start(struct gk20a *g)
667{
668 char thread_name[64];
669 int err = 0;
670
671 if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task))
672 return err;
673
674 nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
675
676 /*
677 * Mutexes have implicit barriers, so there is no risk of a thread
678 * having a stale copy of the poll_task variable as the call to
679 * thread_is_running is volatile
680 */
681
682 if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task)) {
683 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
684 return err;
685 }
686
687 snprintf(thread_name, sizeof(thread_name),
688 "nvgpu_clk_arb_poll_%s", g->name);
689
690 err = nvgpu_thread_create(&g->clk_arb_worker.poll_task, g,
691 nvgpu_clk_arb_poll_worker, thread_name);
692
693 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
694 return err;
695}
696
697/**
698 * Append a work item to the worker's list.
699 *
700 * This adds work item to the end of the list and wakes the worker
701 * up immediately. If the work item already existed in the list, it's not added,
702 * because in that case it has been scheduled already but has not yet been
703 * processed.
704 */
705void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
706 struct nvgpu_clk_arb_work_item *work_item)
707{
708 clk_arb_dbg(g, " ");
709
710 /*
711 * Warn if worker thread cannot run
712 */
713 if (WARN_ON(__nvgpu_clk_arb_worker_start(g))) {
714 nvgpu_warn(g, "clk arb worker cannot run!");
715 return;
716 }
717
718 nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
719 if (!nvgpu_list_empty(&work_item->worker_item)) {
720 /*
721 * Already queued, so will get processed eventually.
722 * The worker is probably awake already.
723 */
724 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
725 return;
726 }
727 nvgpu_list_add_tail(&work_item->worker_item, &g->clk_arb_worker.items);
728 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
729
730 nvgpu_clk_arb_worker_wakeup(g);
731}
732
733/**
734 * Initialize the clk arb worker's metadata and start the background thread.
735 */
736int nvgpu_clk_arb_worker_init(struct gk20a *g)
737{
738 int err;
739
740 nvgpu_atomic_set(&g->clk_arb_worker.put, 0);
741 nvgpu_cond_init(&g->clk_arb_worker.wq);
742 nvgpu_init_list_node(&g->clk_arb_worker.items);
743 nvgpu_spinlock_init(&g->clk_arb_worker.items_lock);
744 err = nvgpu_mutex_init(&g->clk_arb_worker.start_lock);
745 if (err)
746 goto error_check;
747
748 err = __nvgpu_clk_arb_worker_start(g);
749error_check:
750 if (err) {
751 nvgpu_err(g, "failed to start clk arb poller thread");
752 return err;
753 }
754 return 0;
755}
756
757int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
758{
759 int err = 0;
760
761 if (!g->ops.clk.support_clk_freq_controller ||
762 !g->ops.clk_arb.get_arbiter_clk_domains) {
763 return 0;
764 }
765
766 nvgpu_mutex_acquire(&g->clk_arb_enable_lock);
767
768 err = g->ops.clk_arb.arbiter_clk_init(g);
769
770 nvgpu_mutex_release(&g->clk_arb_enable_lock);
771
772 return err;
773}
774
775bool nvgpu_clk_arb_has_active_req(struct gk20a *g)
776{
777 return (nvgpu_atomic_read(&g->clk_arb_global_nr) > 0);
778}
779
780void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
781{
782 nvgpu_clk_arb_schedule_alarm(g,
783 (0x1UL << NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
784}
785
786void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
787{
788 struct nvgpu_clk_arb *arb = g->clk_arb;
789
790 nvgpu_clk_arb_set_global_alarm(g, alarm);
791 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
792}
793
794static void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
795{
796 nvgpu_atomic_inc(&g->clk_arb_worker.put);
797
798 nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
799 nvgpu_thread_stop(&g->clk_arb_worker.poll_task);
800 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
801}
802
803void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
804{
805 struct nvgpu_clk_arb *arb = g->clk_arb;
806
807 nvgpu_mutex_acquire(&g->clk_arb_enable_lock);
808
809 if (arb) {
810 nvgpu_clk_arb_worker_deinit(g);
811 g->ops.clk_arb.clk_arb_cleanup(g->clk_arb);
812 }
813
814 nvgpu_mutex_release(&g->clk_arb_enable_lock);
815}
816
817int nvgpu_clk_arb_init_session(struct gk20a *g,
818 struct nvgpu_clk_session **_session)
819{
820 struct nvgpu_clk_arb *arb = g->clk_arb;
821 struct nvgpu_clk_session *session = *(_session);
822
823 clk_arb_dbg(g, " ");
824
825 if (!g->ops.clk.support_clk_freq_controller ||
826 !g->ops.clk_arb.get_arbiter_clk_domains) {
827 return 0;
828 }
829
830 session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
831 if (!session)
832 return -ENOMEM;
833 session->g = g;
834
835 nvgpu_ref_init(&session->refcount);
836
837 session->zombie = false;
838 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
839 /* make sure that the initialization of the pool is visible
840 * before the update
841 */
842 nvgpu_smp_wmb();
843 session->target = &session->target_pool[0];
844
845 nvgpu_init_list_node(&session->targets);
846 nvgpu_spinlock_init(&session->session_lock);
847
848 nvgpu_spinlock_acquire(&arb->sessions_lock);
849 nvgpu_list_add_tail(&session->link, &arb->sessions);
850 nvgpu_spinlock_release(&arb->sessions_lock);
851
852 *_session = session;
853
854 return 0;
855}
856
857void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
858{
859 struct nvgpu_clk_dev *dev = container_of(refcount,
860 struct nvgpu_clk_dev, refcount);
861 struct nvgpu_clk_session *session = dev->session;
862 struct gk20a *g = session->g;
863
864 nvgpu_clk_notification_queue_free(g, &dev->queue);
865
866 nvgpu_atomic_dec(&g->clk_arb_global_nr);
867 nvgpu_kfree(g, dev);
868}
869
870void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
871{
872 struct nvgpu_clk_session *session = container_of(refcount,
873 struct nvgpu_clk_session, refcount);
874 struct nvgpu_clk_arb *arb = session->g->clk_arb;
875 struct gk20a *g = session->g;
876 struct nvgpu_clk_dev *dev, *tmp;
877
878 clk_arb_dbg(g, " ");
879
880 if (arb) {
881 nvgpu_spinlock_acquire(&arb->sessions_lock);
882 nvgpu_list_del(&session->link);
883 nvgpu_spinlock_release(&arb->sessions_lock);
884 }
885
886 nvgpu_spinlock_acquire(&session->session_lock);
887 nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets,
888 nvgpu_clk_dev, node) {
889 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
890 nvgpu_list_del(&dev->node);
891 }
892 nvgpu_spinlock_release(&session->session_lock);
893
894 nvgpu_kfree(g, session);
895}
896
897void nvgpu_clk_arb_release_session(struct gk20a *g,
898 struct nvgpu_clk_session *session)
899{
900 struct nvgpu_clk_arb *arb = g->clk_arb;
901
902 clk_arb_dbg(g, " ");
903
904 session->zombie = true;
905 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
906 if (arb)
907 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
908}
909
910void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
911{
912 struct nvgpu_clk_arb *arb = g->clk_arb;
913
914 nvgpu_clk_arb_worker_enqueue(g, &arb->update_vf_table_work_item);
915}
916
917/* This function is inherently unsafe to call while arbiter is running
918 * arbiter must be blocked before calling this function
919 */
920int nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
921{
922 return NV_ACCESS_ONCE(g->clk_arb->actual->pstate);
923}
924
925void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
926{
927 struct nvgpu_clk_arb *arb = g->clk_arb;
928
929 if (lock)
930 nvgpu_mutex_acquire(&arb->pstate_lock);
931 else
932 nvgpu_mutex_release(&arb->pstate_lock);
933}
934
935bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain)
936{
937 u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
938
939 switch (api_domain) {
940 case NVGPU_CLK_DOMAIN_MCLK:
941 return (clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0;
942
943 case NVGPU_CLK_DOMAIN_GPCCLK:
944 return (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK) != 0;
945
946 default:
947 return false;
948 }
949}
950
951int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
952 u16 *min_mhz, u16 *max_mhz)
953{
954 int ret;
955
956 switch (api_domain) {
957 case NVGPU_CLK_DOMAIN_MCLK:
958 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
959 CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz);
960 return ret;
961
962 case NVGPU_CLK_DOMAIN_GPCCLK:
963 ret = g->ops.clk_arb.get_arbiter_clk_range(g,
964 CTRL_CLK_DOMAIN_GPC2CLK, min_mhz, max_mhz);
965 if (!ret) {
966 *min_mhz /= 2;
967 *max_mhz /= 2;
968 }
969 return ret;
970
971 default:
972 return -EINVAL;
973 }
974}
975
976int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
977 u32 api_domain, u32 *max_points, u16 *fpoints)
978{
979 int err;
980 u32 i;
981
982 switch (api_domain) {
983 case NVGPU_CLK_DOMAIN_GPCCLK:
984 err = g->ops.clk_arb.get_arbiter_f_points(g,
985 CTRL_CLK_DOMAIN_GPC2CLK, max_points, fpoints);
986 if (err || !fpoints)
987 return err;
988 for (i = 0; i < *max_points; i++)
989 fpoints[i] /= 2;
990 return 0;
991 case NVGPU_CLK_DOMAIN_MCLK:
992 return g->ops.clk_arb.get_arbiter_f_points(g,
993 CTRL_CLK_DOMAIN_MCLK, max_points, fpoints);
994 default:
995 return -EINVAL;
996 }
997}
998
999int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
1000 u32 api_domain, u16 *freq_mhz)
1001{
1002 int err = 0;
1003 struct nvgpu_clk_arb_target *target = session->target;
1004
1005 if (!nvgpu_clk_arb_is_valid_domain(session->g, api_domain)) {
1006 return -EINVAL;
1007 }
1008
1009 switch (api_domain) {
1010 case NVGPU_CLK_DOMAIN_MCLK:
1011 *freq_mhz = target->mclk;
1012 break;
1013
1014 case NVGPU_CLK_DOMAIN_GPCCLK:
1015 *freq_mhz = target->gpc2clk / 2ULL;
1016 break;
1017
1018 default:
1019 *freq_mhz = 0;
1020 err = -EINVAL;
1021 }
1022 return err;
1023}
1024
1025int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
1026 u32 api_domain, u16 *freq_mhz)
1027{
1028 struct nvgpu_clk_arb *arb = g->clk_arb;
1029 int err = 0;
1030 struct nvgpu_clk_arb_target *actual = arb->actual;
1031
1032 if (!nvgpu_clk_arb_is_valid_domain(g, api_domain)) {
1033 return -EINVAL;
1034 }
1035
1036 switch (api_domain) {
1037 case NVGPU_CLK_DOMAIN_MCLK:
1038 *freq_mhz = actual->mclk;
1039 break;
1040
1041 case NVGPU_CLK_DOMAIN_GPCCLK:
1042 *freq_mhz = actual->gpc2clk / 2ULL;
1043 break;
1044
1045 default:
1046 *freq_mhz = 0;
1047 err = -EINVAL;
1048 }
1049 return err;
1050}
1051
1052unsigned long nvgpu_clk_measure_freq(struct gk20a *g, u32 api_domain)
1053{
1054 unsigned long freq = 0UL;
1055
1056 switch (api_domain) {
1057 case CTRL_CLK_DOMAIN_GPC2CLK:
1058 freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) * 2UL;
1059 break;
1060 default:
1061 break;
1062 }
1063 return freq;
1064}
1065
1066int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
1067 u32 api_domain, u16 *freq_mhz)
1068{
1069 if (!nvgpu_clk_arb_is_valid_domain(g, api_domain)) {
1070 return -EINVAL;
1071 }
1072
1073 switch (api_domain) {
1074 case NVGPU_CLK_DOMAIN_MCLK:
1075 *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) /
1076 1000000ULL;
1077 return 0;
1078
1079 case NVGPU_CLK_DOMAIN_GPCCLK:
1080 *freq_mhz = g->ops.clk.measure_freq(g,
1081 CTRL_CLK_DOMAIN_GPC2CLK) / 2000000ULL;
1082 return 0;
1083
1084 default:
1085 return -EINVAL;
1086 }
1087}