summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common/linux
diff options
context:
space:
mode:
authorSourab Gupta <sourabg@nvidia.com>2018-05-04 02:13:33 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-18 02:34:32 -0400
commitfc3ac7d2ae2c7cb856fff58af77bb8d453ff5473 (patch)
tree412d3ebe84b7ba469b6ff4d49936c4d4b8920c18 /drivers/gpu/nvgpu/common/linux
parent6a43e51ee362bbf61ca35cda75f31ed3f01862ed (diff)
gpu: nvgpu: move clk_arb.c to common code
Now that clk_arb.c is free of Linux'isms, move it to the clk/ directory. Jira VQRM-741 Change-Id: I53298c76f834322aa586781cdfd2e6031f4826a1 Signed-off-by: Sourab Gupta <sourabg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1709651 GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux')
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb.c1522
-rw-r--r--drivers/gpu/nvgpu/common/linux/clk_arb_linux.h163
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c1
3 files changed, 0 insertions, 1686 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c
deleted file mode 100644
index 2a6278e8..00000000
--- a/drivers/gpu/nvgpu/common/linux/clk_arb.c
+++ /dev/null
@@ -1,1522 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/bitops.h>
18#include <nvgpu/lock.h>
19#include <nvgpu/kmem.h>
20#include <nvgpu/atomic.h>
21#include <nvgpu/bug.h>
22#include <nvgpu/kref.h>
23#include <nvgpu/log.h>
24#include <nvgpu/barrier.h>
25#include <nvgpu/cond.h>
26#include <nvgpu/list.h>
27#include <nvgpu/clk_arb.h>
28
29#include "gk20a/gk20a.h"
30#include "clk/clk.h"
31#include "clk_arb_linux.h"
32#include "pstate/pstate.h"
33#include "lpwr/lpwr.h"
34#include "volt/volt.h"
35
36int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
37 struct nvgpu_clk_notification_queue *queue,
38 size_t events_number) {
39 queue->notifications = nvgpu_kcalloc(g, events_number,
40 sizeof(struct nvgpu_clk_notification));
41 if (!queue->notifications)
42 return -ENOMEM;
43 queue->size = events_number;
44
45 nvgpu_atomic_set(&queue->head, 0);
46 nvgpu_atomic_set(&queue->tail, 0);
47
48 return 0;
49}
50
51void nvgpu_clk_notification_queue_free(struct gk20a *g,
52 struct nvgpu_clk_notification_queue *queue) {
53 nvgpu_kfree(g, queue->notifications);
54 queue->size = 0;
55 nvgpu_atomic_set(&queue->head, 0);
56 nvgpu_atomic_set(&queue->tail, 0);
57}
58
59static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
60 struct nvgpu_clk_notification_queue *queue,
61 u32 alarm_mask) {
62
63 u32 queue_index;
64 u64 timestamp;
65
66 queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
67 /* get current timestamp */
68 timestamp = (u64) sched_clock();
69
70 queue->notifications[queue_index].timestamp = timestamp;
71 queue->notifications[queue_index].notification = alarm_mask;
72
73}
74
75static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
76{
77 struct nvgpu_clk_arb *arb = g->clk_arb;
78
79 u64 current_mask;
80 u32 refcnt;
81 u32 alarm_mask;
82 u64 new_mask;
83
84 do {
85 current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
86 /* atomic operations are strong so they do not need masks */
87
88 refcnt = ((u32) (current_mask >> 32)) + 1;
89 alarm_mask = (u32) (current_mask & ~0) | alarm;
90 new_mask = ((u64) refcnt << 32) | alarm_mask;
91
92 } while (unlikely(current_mask !=
93 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
94 current_mask, new_mask)));
95
96 nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);
97}
98
99
100static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
101{
102 struct gk20a *g = arb->g;
103 struct nvgpu_clk_vf_table *table;
104
105 u32 i, j;
106 int status = -EINVAL;
107 u32 gpc2clk_voltuv = 0, mclk_voltuv = 0;
108 u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0;
109 u16 clk_cur;
110 u32 num_points;
111
112 struct clk_set_info *p5_info, *p0_info;
113
114
115 table = NV_ACCESS_ONCE(arb->current_vf_table);
116 /* make flag visible when all data has resolved in the tables */
117 nvgpu_smp_rmb();
118
119 table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
120 &arb->vf_table_pool[0];
121
122 /* Get allowed memory ranges */
123 if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK,
124 &arb->gpc2clk_min,
125 &arb->gpc2clk_max) < 0) {
126 nvgpu_err(g, "failed to fetch GPC2CLK range");
127 goto exit_vf_table;
128 }
129 if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_MCLK,
130 &arb->mclk_min,
131 &arb->mclk_max) < 0) {
132 nvgpu_err(g, "failed to fetch MCLK range");
133 goto exit_vf_table;
134 }
135
136 table->gpc2clk_num_points = MAX_F_POINTS;
137 table->mclk_num_points = MAX_F_POINTS;
138
139 if (clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPC2CLK,
140 &table->gpc2clk_num_points, arb->gpc2clk_f_points)) {
141 nvgpu_err(g, "failed to fetch GPC2CLK frequency points");
142 goto exit_vf_table;
143 }
144
145 if (clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_MCLK,
146 &table->mclk_num_points, arb->mclk_f_points)) {
147 nvgpu_err(g, "failed to fetch MCLK frequency points");
148 goto exit_vf_table;
149 }
150 if (!table->mclk_num_points || !table->gpc2clk_num_points) {
151 nvgpu_err(g, "empty queries to f points mclk %d gpc2clk %d",
152 table->mclk_num_points, table->gpc2clk_num_points);
153 status = -EINVAL;
154 goto exit_vf_table;
155 }
156
157 memset(table->mclk_points, 0,
158 table->mclk_num_points*sizeof(struct nvgpu_clk_vf_point));
159 memset(table->gpc2clk_points, 0,
160 table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
161
162 p5_info = pstate_get_clk_set_info(g,
163 CTRL_PERF_PSTATE_P5, clkwhich_mclk);
164 if (!p5_info) {
165 nvgpu_err(g, "failed to get MCLK P5 info");
166 goto exit_vf_table;
167 }
168 p0_info = pstate_get_clk_set_info(g,
169 CTRL_PERF_PSTATE_P0, clkwhich_mclk);
170 if (!p0_info) {
171 nvgpu_err(g, "failed to get MCLK P0 info");
172 goto exit_vf_table;
173 }
174
175 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
176 i < table->mclk_num_points; i++) {
177
178 if ((arb->mclk_f_points[i] >= arb->mclk_min) &&
179 (arb->mclk_f_points[i] <= arb->mclk_max) &&
180 (arb->mclk_f_points[i] != clk_cur)) {
181
182 table->mclk_points[j].mem_mhz = arb->mclk_f_points[i];
183 mclk_voltuv = mclk_voltuv_sram = 0;
184
185 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
186 &table->mclk_points[j].mem_mhz, &mclk_voltuv,
187 CTRL_VOLT_DOMAIN_LOGIC);
188 if (status < 0) {
189 nvgpu_err(g,
190 "failed to get MCLK LOGIC voltage");
191 goto exit_vf_table;
192 }
193 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK,
194 &table->mclk_points[j].mem_mhz,
195 &mclk_voltuv_sram,
196 CTRL_VOLT_DOMAIN_SRAM);
197 if (status < 0) {
198 nvgpu_err(g, "failed to get MCLK SRAM voltage");
199 goto exit_vf_table;
200 }
201
202 table->mclk_points[j].uvolt = mclk_voltuv;
203 table->mclk_points[j].uvolt_sram = mclk_voltuv_sram;
204 clk_cur = table->mclk_points[j].mem_mhz;
205
206 if ((clk_cur >= p5_info->min_mhz) &&
207 (clk_cur <= p5_info->max_mhz))
208 VF_POINT_SET_PSTATE_SUPPORTED(
209 &table->mclk_points[j],
210 CTRL_PERF_PSTATE_P5);
211 if ((clk_cur >= p0_info->min_mhz) &&
212 (clk_cur <= p0_info->max_mhz))
213 VF_POINT_SET_PSTATE_SUPPORTED(
214 &table->mclk_points[j],
215 CTRL_PERF_PSTATE_P0);
216
217 j++;
218 num_points++;
219
220 }
221 }
222 table->mclk_num_points = num_points;
223
224 p5_info = pstate_get_clk_set_info(g,
225 CTRL_PERF_PSTATE_P5, clkwhich_gpc2clk);
226 if (!p5_info) {
227 status = -EINVAL;
228 nvgpu_err(g, "failed to get GPC2CLK P5 info");
229 goto exit_vf_table;
230 }
231
232 p0_info = pstate_get_clk_set_info(g,
233 CTRL_PERF_PSTATE_P0, clkwhich_gpc2clk);
234 if (!p0_info) {
235 status = -EINVAL;
236 nvgpu_err(g, "failed to get GPC2CLK P0 info");
237 goto exit_vf_table;
238 }
239
240 /* GPC2CLK needs to be checked in two passes. The first determines the
241 * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the
242 * second verifies that the clocks minimum is satisfied and sets
243 * the voltages
244 */
245 for (i = 0, j = 0, num_points = 0, clk_cur = 0;
246 i < table->gpc2clk_num_points; i++) {
247 struct set_fll_clk setfllclk;
248
249 if ((arb->gpc2clk_f_points[i] >= arb->gpc2clk_min) &&
250 (arb->gpc2clk_f_points[i] <= arb->gpc2clk_max) &&
251 (arb->gpc2clk_f_points[i] != clk_cur)) {
252
253 table->gpc2clk_points[j].gpc_mhz =
254 arb->gpc2clk_f_points[i];
255 setfllclk.gpc2clkmhz = arb->gpc2clk_f_points[i];
256 status = clk_get_fll_clks(g, &setfllclk);
257 if (status < 0) {
258 nvgpu_err(g,
259 "failed to get GPC2CLK slave clocks");
260 goto exit_vf_table;
261 }
262
263 table->gpc2clk_points[j].sys_mhz =
264 setfllclk.sys2clkmhz;
265 table->gpc2clk_points[j].xbar_mhz =
266 setfllclk.xbar2clkmhz;
267
268 clk_cur = table->gpc2clk_points[j].gpc_mhz;
269
270 if ((clk_cur >= p5_info->min_mhz) &&
271 (clk_cur <= p5_info->max_mhz))
272 VF_POINT_SET_PSTATE_SUPPORTED(
273 &table->gpc2clk_points[j],
274 CTRL_PERF_PSTATE_P5);
275 if ((clk_cur >= p0_info->min_mhz) &&
276 (clk_cur <= p0_info->max_mhz))
277 VF_POINT_SET_PSTATE_SUPPORTED(
278 &table->gpc2clk_points[j],
279 CTRL_PERF_PSTATE_P0);
280
281 j++;
282 num_points++;
283 }
284 }
285 table->gpc2clk_num_points = num_points;
286
287 /* Second pass */
288 for (i = 0, j = 0; i < table->gpc2clk_num_points; i++) {
289
290 u16 alt_gpc2clk = table->gpc2clk_points[i].gpc_mhz;
291
292 gpc2clk_voltuv = gpc2clk_voltuv_sram = 0;
293
294 /* Check sysclk */
295 p5_info = pstate_get_clk_set_info(g,
296 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
297 clkwhich_sys2clk);
298 if (!p5_info) {
299 status = -EINVAL;
300 nvgpu_err(g, "failed to get SYS2CLK P5 info");
301 goto exit_vf_table;
302 }
303
304 /* sys2clk below clk min, need to find correct clock */
305 if (table->gpc2clk_points[i].sys_mhz < p5_info->min_mhz) {
306 for (j = i + 1; j < table->gpc2clk_num_points; j++) {
307
308 if (table->gpc2clk_points[j].sys_mhz >=
309 p5_info->min_mhz) {
310
311
312 table->gpc2clk_points[i].sys_mhz =
313 p5_info->min_mhz;
314
315 alt_gpc2clk = alt_gpc2clk <
316 table->gpc2clk_points[j].
317 gpc_mhz ?
318 table->gpc2clk_points[j].
319 gpc_mhz :
320 alt_gpc2clk;
321 break;
322 }
323 }
324 /* no VF exists that satisfies condition */
325 if (j == table->gpc2clk_num_points) {
326 nvgpu_err(g, "NO SYS2CLK VF point possible");
327 status = -EINVAL;
328 goto exit_vf_table;
329 }
330 }
331
332 /* Check xbarclk */
333 p5_info = pstate_get_clk_set_info(g,
334 VF_POINT_GET_PSTATE(&table->gpc2clk_points[i]),
335 clkwhich_xbar2clk);
336 if (!p5_info) {
337 status = -EINVAL;
338 nvgpu_err(g, "failed to get SYS2CLK P5 info");
339 goto exit_vf_table;
340 }
341
342 /* xbar2clk below clk min, need to find correct clock */
343 if (table->gpc2clk_points[i].xbar_mhz < p5_info->min_mhz) {
344 for (j = i; j < table->gpc2clk_num_points; j++) {
345 if (table->gpc2clk_points[j].xbar_mhz >=
346 p5_info->min_mhz) {
347
348 table->gpc2clk_points[i].xbar_mhz =
349 p5_info->min_mhz;
350
351 alt_gpc2clk = alt_gpc2clk <
352 table->gpc2clk_points[j].
353 gpc_mhz ?
354 table->gpc2clk_points[j].
355 gpc_mhz :
356 alt_gpc2clk;
357 break;
358 }
359 }
360 /* no VF exists that satisfies condition */
361 if (j == table->gpc2clk_num_points) {
362 status = -EINVAL;
363 nvgpu_err(g, "NO XBAR2CLK VF point possible");
364
365 goto exit_vf_table;
366 }
367 }
368
369 /* Calculate voltages */
370 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
371 &alt_gpc2clk, &gpc2clk_voltuv,
372 CTRL_VOLT_DOMAIN_LOGIC);
373 if (status < 0) {
374 nvgpu_err(g, "failed to get GPC2CLK LOGIC voltage");
375 goto exit_vf_table;
376 }
377
378 status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK,
379 &alt_gpc2clk,
380 &gpc2clk_voltuv_sram,
381 CTRL_VOLT_DOMAIN_SRAM);
382 if (status < 0) {
383 nvgpu_err(g, "failed to get GPC2CLK SRAM voltage");
384 goto exit_vf_table;
385 }
386
387 table->gpc2clk_points[i].uvolt = gpc2clk_voltuv;
388 table->gpc2clk_points[i].uvolt_sram = gpc2clk_voltuv_sram;
389 }
390
391 /* make table visible when all data has resolved in the tables */
392 nvgpu_smp_wmb();
393 xchg(&arb->current_vf_table, table);
394
395exit_vf_table:
396
397 if (status < 0)
398 nvgpu_clk_arb_set_global_alarm(g,
399 EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
400 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
401
402 return status;
403}
404
405
406static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb)
407{
408 struct gk20a *g = arb->g;
409 u32 err;
410
411 /* get latest vf curve from pmu */
412 err = clk_vf_point_cache(g);
413 if (err) {
414 nvgpu_err(g, "failed to cache VF table");
415 nvgpu_clk_arb_set_global_alarm(g,
416 EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
417 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
418
419 return;
420 }
421 nvgpu_clk_arb_update_vf_table(arb);
422}
423
424static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb,
425 u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk,
426 u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram)
427{
428 u16 gpc2clk_target, mclk_target;
429 u32 gpc2clk_voltuv, gpc2clk_voltuv_sram;
430 u32 mclk_voltuv, mclk_voltuv_sram;
431 u32 pstate = VF_POINT_INVALID_PSTATE;
432 struct nvgpu_clk_vf_table *table;
433 u32 index, index_mclk;
434 struct nvgpu_clk_vf_point *mclk_vf = NULL;
435
436 do {
437 gpc2clk_target = *gpc2clk;
438 mclk_target = *mclk;
439 gpc2clk_voltuv = 0;
440 gpc2clk_voltuv_sram = 0;
441 mclk_voltuv = 0;
442 mclk_voltuv_sram = 0;
443
444 table = NV_ACCESS_ONCE(arb->current_vf_table);
445 /* pointer to table can be updated by callback */
446 nvgpu_smp_rmb();
447
448 if (!table)
449 continue;
450 if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) {
451 nvgpu_err(arb->g, "found empty table");
452 goto find_exit;
453 }
454 /* First we check MCLK to find out which PSTATE we are
455 * are requesting, and from there try to find the minimum
456 * GPC2CLK on the same PSTATE that satisfies the request.
457 * If no GPC2CLK can be found, then we need to up the PSTATE
458 */
459
460recalculate_vf_point:
461 for (index = 0; index < table->mclk_num_points; index++) {
462 if (table->mclk_points[index].mem_mhz >= mclk_target) {
463 mclk_vf = &table->mclk_points[index];
464 break;
465 }
466 }
467 if (index == table->mclk_num_points) {
468 mclk_vf = &table->mclk_points[index-1];
469 index = table->mclk_num_points - 1;
470 }
471 index_mclk = index;
472
473 /* round up the freq requests */
474 for (index = 0; index < table->gpc2clk_num_points; index++) {
475 pstate = VF_POINT_COMMON_PSTATE(
476 &table->gpc2clk_points[index], mclk_vf);
477
478 if ((table->gpc2clk_points[index].gpc_mhz >=
479 gpc2clk_target) &&
480 (pstate != VF_POINT_INVALID_PSTATE)) {
481 gpc2clk_target =
482 table->gpc2clk_points[index].gpc_mhz;
483 *sys2clk =
484 table->gpc2clk_points[index].sys_mhz;
485 *xbar2clk =
486 table->gpc2clk_points[index].xbar_mhz;
487
488 gpc2clk_voltuv =
489 table->gpc2clk_points[index].uvolt;
490 gpc2clk_voltuv_sram =
491 table->gpc2clk_points[index].uvolt_sram;
492 break;
493 }
494 }
495
496 if (index == table->gpc2clk_num_points) {
497 pstate = VF_POINT_COMMON_PSTATE(
498 &table->gpc2clk_points[index-1], mclk_vf);
499 if (pstate != VF_POINT_INVALID_PSTATE) {
500 gpc2clk_target =
501 table->gpc2clk_points[index-1].gpc_mhz;
502 *sys2clk =
503 table->gpc2clk_points[index-1].sys_mhz;
504 *xbar2clk =
505 table->gpc2clk_points[index-1].xbar_mhz;
506
507 gpc2clk_voltuv =
508 table->gpc2clk_points[index-1].uvolt;
509 gpc2clk_voltuv_sram =
510 table->gpc2clk_points[index-1].
511 uvolt_sram;
512 } else if (index_mclk >= table->mclk_num_points - 1) {
513 /* There is no available combination of MCLK
514 * and GPC2CLK, we need to fail this
515 */
516 gpc2clk_target = 0;
517 mclk_target = 0;
518 pstate = VF_POINT_INVALID_PSTATE;
519 goto find_exit;
520 } else {
521 /* recalculate with higher PSTATE */
522 gpc2clk_target = *gpc2clk;
523 mclk_target = table->mclk_points[index_mclk+1].
524 mem_mhz;
525 goto recalculate_vf_point;
526 }
527 }
528
529 mclk_target = mclk_vf->mem_mhz;
530 mclk_voltuv = mclk_vf->uvolt;
531 mclk_voltuv_sram = mclk_vf->uvolt_sram;
532
533 } while (!table ||
534 (NV_ACCESS_ONCE(arb->current_vf_table) != table));
535
536find_exit:
537 *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv;
538 *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ?
539 gpc2clk_voltuv_sram : mclk_voltuv_sram;
540 /* noise unaware vmin */
541 *nuvmin = mclk_voltuv;
542 *nuvmin_sram = mclk_voltuv_sram;
543 *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk;
544 *mclk = mclk_target;
545 return pstate;
546}
547
548static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
549 u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
550 u32 voltuv_sram)
551{
552 struct set_fll_clk fllclk;
553 struct nvgpu_clk_arb *arb = g->clk_arb;
554 int status;
555
556 fllclk.gpc2clkmhz = gpc2clk_target;
557 fllclk.sys2clkmhz = sys2clk_target;
558 fllclk.xbar2clkmhz = xbar2clk_target;
559
560 fllclk.voltuv = voltuv;
561
562 /* if voltage ascends we do:
563 * (1) FLL change
564 * (2) Voltage change
565 * (3) MCLK change
566 * If it goes down
567 * (1) MCLK change
568 * (2) Voltage change
569 * (3) FLL change
570 */
571
572 /* descending */
573 if (voltuv < arb->voltuv_actual) {
574 status = g->ops.clk.mclk_change(g, mclk_target);
575 if (status < 0)
576 return status;
577
578 status = volt_set_voltage(g, voltuv, voltuv_sram);
579 if (status < 0)
580 return status;
581
582 status = clk_set_fll_clks(g, &fllclk);
583 if (status < 0)
584 return status;
585 } else {
586 status = clk_set_fll_clks(g, &fllclk);
587 if (status < 0)
588 return status;
589
590 status = volt_set_voltage(g, voltuv, voltuv_sram);
591 if (status < 0)
592 return status;
593
594 status = g->ops.clk.mclk_change(g, mclk_target);
595 if (status < 0)
596 return status;
597 }
598
599 return 0;
600}
601
602static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
603 struct nvgpu_clk_arb_target *target,
604 u32 alarm) {
605
606 struct nvgpu_clk_session *session = dev->session;
607 struct nvgpu_clk_arb *arb = session->g->clk_arb;
608 struct nvgpu_clk_notification *notification;
609
610 u32 queue_alarm_mask = 0;
611 u32 enabled_mask = 0;
612 u32 new_alarms_reported = 0;
613 u32 poll_mask = 0;
614 u32 tail, head;
615 u32 queue_index;
616 size_t size;
617 int index;
618
619 enabled_mask = nvgpu_atomic_read(&dev->enabled_mask);
620 size = arb->notification_queue.size;
621
622 /* queue global arbiter notifications in buffer */
623 do {
624 tail = nvgpu_atomic_read(&arb->notification_queue.tail);
625 /* copy items to the queue */
626 queue_index = nvgpu_atomic_read(&dev->queue.tail);
627 head = dev->arb_queue_head;
628 head = (tail - head) < arb->notification_queue.size ?
629 head : tail - arb->notification_queue.size;
630
631 for (index = head; _WRAPGTEQ(tail, index); index++) {
632 u32 alarm_detected;
633
634 notification = &arb->notification_queue.
635 notifications[(index+1) % size];
636 alarm_detected =
637 NV_ACCESS_ONCE(notification->notification);
638
639 if (!(enabled_mask & alarm_detected))
640 continue;
641
642 queue_index++;
643 dev->queue.notifications[
644 queue_index % dev->queue.size].timestamp =
645 NV_ACCESS_ONCE(notification->timestamp);
646
647 dev->queue.notifications[
648 queue_index % dev->queue.size].notification =
649 alarm_detected;
650
651 queue_alarm_mask |= alarm_detected;
652 }
653 } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
654 (int)tail));
655
656 nvgpu_atomic_set(&dev->queue.tail, queue_index);
657 /* update the last notification we processed from global queue */
658
659 dev->arb_queue_head = tail;
660
661 /* Check if current session targets are met */
662 if (enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) {
663 if ((target->gpc2clk < session->target->gpc2clk)
664 || (target->mclk < session->target->mclk)) {
665
666 poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI);
667 nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
668 EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
669 }
670 }
671
672 /* Check if there is a new VF update */
673 if (queue_alarm_mask & EVENT(VF_UPDATE))
674 poll_mask |= (NVGPU_POLLIN | NVGPU_POLLRDNORM);
675
676 /* Notify sticky alarms that were not reported on previous run*/
677 new_alarms_reported = (queue_alarm_mask |
678 (alarm & ~dev->alarms_reported & queue_alarm_mask));
679
680 if (new_alarms_reported & ~LOCAL_ALARM_MASK) {
681 /* check that we are not re-reporting */
682 if (new_alarms_reported & EVENT(ALARM_GPU_LOST))
683 poll_mask |= NVGPU_POLLHUP;
684
685 poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI);
686 /* On next run do not report global alarms that were already
687 * reported, but report SHUTDOWN always
688 */
689 dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
690 ~EVENT(ALARM_GPU_LOST);
691 }
692
693 if (poll_mask) {
694 nvgpu_atomic_set(&dev->poll_mask, poll_mask);
695 nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
696 }
697
698 return new_alarms_reported;
699}
700
701static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
702{
703 struct nvgpu_clk_arb *arb = g->clk_arb;
704
705 u64 current_mask;
706 u32 refcnt;
707 u32 alarm_mask;
708 u64 new_mask;
709
710 do {
711 current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
712 /* atomic operations are strong so they do not need masks */
713
714 refcnt = ((u32) (current_mask >> 32)) + 1;
715 alarm_mask = (u32) (current_mask & ~alarm);
716 new_mask = ((u64) refcnt << 32) | alarm_mask;
717
718 } while (unlikely(current_mask !=
719 (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
720 current_mask, new_mask)));
721}
722
723static void nvgpu_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
724{
725 struct nvgpu_clk_session *session;
726 struct nvgpu_clk_dev *dev;
727 struct nvgpu_clk_dev *tmp;
728 struct nvgpu_clk_arb_target *target, *actual;
729 struct gk20a *g = arb->g;
730
731 u32 pstate = VF_POINT_INVALID_PSTATE;
732 u32 voltuv, voltuv_sram;
733 bool mclk_set, gpc2clk_set;
734 u32 nuvmin, nuvmin_sram;
735
736 u32 alarms_notified = 0;
737 u32 current_alarm;
738 int status = 0;
739
740 /* Temporary variables for checking target frequency */
741 u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target;
742 u16 gpc2clk_session_target, mclk_session_target;
743
744#ifdef CONFIG_DEBUG_FS
745 u64 t0, t1;
746 struct nvgpu_clk_arb_debug *debug;
747
748#endif
749
750 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
751
752 /* bail out if gpu is down */
753 if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST))
754 goto exit_arb;
755
756#ifdef CONFIG_DEBUG_FS
757 g->ops.bus.read_ptimer(g, &t0);
758#endif
759
760 /* Only one arbiter should be running */
761 gpc2clk_target = 0;
762 mclk_target = 0;
763
764 nvgpu_spinlock_acquire(&arb->sessions_lock);
765 nvgpu_list_for_each_entry(session, &arb->sessions,
766 nvgpu_clk_session, link) {
767 if (!session->zombie) {
768 mclk_set = false;
769 gpc2clk_set = false;
770 target = (session->target == &session->target_pool[0] ?
771 &session->target_pool[1] :
772 &session->target_pool[0]);
773 nvgpu_spinlock_acquire(&session->session_lock);
774 if (!nvgpu_list_empty(&session->targets)) {
775 /* Copy over state */
776 target->mclk = session->target->mclk;
777 target->gpc2clk = session->target->gpc2clk;
778 /* Query the latest committed request */
779 nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets,
780 nvgpu_clk_dev, node) {
781 if (!mclk_set && dev->mclk_target_mhz) {
782 target->mclk =
783 dev->mclk_target_mhz;
784 mclk_set = true;
785 }
786 if (!gpc2clk_set &&
787 dev->gpc2clk_target_mhz) {
788 target->gpc2clk =
789 dev->gpc2clk_target_mhz;
790 gpc2clk_set = true;
791 }
792 nvgpu_ref_get(&dev->refcount);
793 nvgpu_list_del(&dev->node);
794 nvgpu_spinlock_acquire(&arb->requests_lock);
795 nvgpu_list_add(&dev->node, &arb->requests);
796 nvgpu_spinlock_release(&arb->requests_lock);
797 }
798 xchg(&session->target, target);
799 }
800 nvgpu_spinlock_release(&session->session_lock);
801
802 mclk_target = mclk_target > session->target->mclk ?
803 mclk_target : session->target->mclk;
804
805 gpc2clk_target =
806 gpc2clk_target > session->target->gpc2clk ?
807 gpc2clk_target : session->target->gpc2clk;
808 }
809 }
810 nvgpu_spinlock_release(&arb->sessions_lock);
811
812 gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target :
813 arb->gpc2clk_default_mhz;
814
815 if (gpc2clk_target < arb->gpc2clk_min)
816 gpc2clk_target = arb->gpc2clk_min;
817
818 if (gpc2clk_target > arb->gpc2clk_max)
819 gpc2clk_target = arb->gpc2clk_max;
820
821 mclk_target = (mclk_target > 0) ? mclk_target :
822 arb->mclk_default_mhz;
823
824 if (mclk_target < arb->mclk_min)
825 mclk_target = arb->mclk_min;
826
827 if (mclk_target > arb->mclk_max)
828 mclk_target = arb->mclk_max;
829
830 sys2clk_target = 0;
831 xbar2clk_target = 0;
832
833 gpc2clk_session_target = gpc2clk_target;
834 mclk_session_target = mclk_target;
835
836 /* Query the table for the closest vf point to program */
837 pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target,
838 &sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv,
839 &voltuv_sram, &nuvmin, &nuvmin_sram);
840
841 if (pstate == VF_POINT_INVALID_PSTATE) {
842 arb->status = -EINVAL;
843 /* make status visible */
844 nvgpu_smp_mb();
845 goto exit_arb;
846 }
847
848 if ((gpc2clk_target < gpc2clk_session_target) ||
849 (mclk_target < mclk_session_target))
850 nvgpu_clk_arb_set_global_alarm(g,
851 EVENT(ALARM_TARGET_VF_NOT_POSSIBLE));
852
853 if ((arb->actual->gpc2clk == gpc2clk_target) &&
854 (arb->actual->mclk == mclk_target) &&
855 (arb->voltuv_actual == voltuv)) {
856 goto exit_arb;
857 }
858
859 /* Program clocks */
860 /* A change in both mclk of gpc2clk may require a change in voltage */
861
862 nvgpu_mutex_acquire(&arb->pstate_lock);
863 status = nvgpu_lpwr_disable_pg(g, false);
864
865 status = clk_pmu_freq_controller_load(g, false,
866 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL);
867 if (status < 0) {
868 arb->status = status;
869 nvgpu_mutex_release(&arb->pstate_lock);
870
871 /* make status visible */
872 nvgpu_smp_mb();
873 goto exit_arb;
874 }
875 status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram);
876 if (status < 0) {
877 arb->status = status;
878 nvgpu_mutex_release(&arb->pstate_lock);
879
880 /* make status visible */
881 nvgpu_smp_mb();
882 goto exit_arb;
883 }
884
885 status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target,
886 sys2clk_target, xbar2clk_target, mclk_target, voltuv,
887 voltuv_sram);
888 if (status < 0) {
889 arb->status = status;
890 nvgpu_mutex_release(&arb->pstate_lock);
891
892 /* make status visible */
893 nvgpu_smp_mb();
894 goto exit_arb;
895 }
896
897 status = clk_pmu_freq_controller_load(g, true,
898 CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL);
899 if (status < 0) {
900 arb->status = status;
901 nvgpu_mutex_release(&arb->pstate_lock);
902
903 /* make status visible */
904 nvgpu_smp_mb();
905 goto exit_arb;
906 }
907
908 status = nvgpu_lwpr_mclk_change(g, pstate);
909 if (status < 0) {
910 arb->status = status;
911 nvgpu_mutex_release(&arb->pstate_lock);
912
913 /* make status visible */
914 nvgpu_smp_mb();
915 goto exit_arb;
916 }
917
918 actual = NV_ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ?
919 &arb->actual_pool[1] : &arb->actual_pool[0];
920
921 /* do not reorder this pointer */
922 nvgpu_smp_rmb();
923 actual->gpc2clk = gpc2clk_target;
924 actual->mclk = mclk_target;
925 arb->voltuv_actual = voltuv;
926 actual->pstate = pstate;
927 arb->status = status;
928
929 /* Make changes visible to other threads */
930 nvgpu_smp_wmb();
931 xchg(&arb->actual, actual);
932
933 status = nvgpu_lpwr_enable_pg(g, false);
934 if (status < 0) {
935 arb->status = status;
936 nvgpu_mutex_release(&arb->pstate_lock);
937
938 /* make status visible */
939 nvgpu_smp_mb();
940 goto exit_arb;
941 }
942
943 /* status must be visible before atomic inc */
944 nvgpu_smp_wmb();
945 nvgpu_atomic_inc(&arb->req_nr);
946
947 /* Unlock pstate change for PG */
948 nvgpu_mutex_release(&arb->pstate_lock);
949
950 /* VF Update complete */
951 nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE));
952
953 nvgpu_cond_signal_interruptible(&arb->request_wq);
954
955#ifdef CONFIG_DEBUG_FS
956 g->ops.bus.read_ptimer(g, &t1);
957
958 debug = arb->debug == &arb->debug_pool[0] ?
959 &arb->debug_pool[1] : &arb->debug_pool[0];
960
961 memcpy(debug, arb->debug, sizeof(arb->debug_pool[0]));
962 debug->switch_num++;
963
964 if (debug->switch_num == 1) {
965 debug->switch_max = debug->switch_min =
966 debug->switch_avg = (t1-t0)/1000;
967 debug->switch_std = 0;
968 } else {
969 s64 prev_avg;
970 s64 curr = (t1-t0)/1000;
971
972 debug->switch_max = curr > debug->switch_max ?
973 curr : debug->switch_max;
974 debug->switch_min = debug->switch_min ?
975 (curr < debug->switch_min ?
976 curr : debug->switch_min) : curr;
977 prev_avg = debug->switch_avg;
978 debug->switch_avg = (curr +
979 (debug->switch_avg * (debug->switch_num-1))) /
980 debug->switch_num;
981 debug->switch_std +=
982 (curr - debug->switch_avg) * (curr - prev_avg);
983 }
984 /* commit changes before exchanging debug pointer */
985 nvgpu_smp_wmb();
986 xchg(&arb->debug, debug);
987#endif
988
989exit_arb:
990 if (status < 0) {
991 nvgpu_err(g, "Error in arbiter update");
992 nvgpu_clk_arb_set_global_alarm(g,
993 EVENT(ALARM_CLOCK_ARBITER_FAILED));
994 }
995
996 current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask);
997 /* notify completion for all requests */
998 nvgpu_spinlock_acquire(&arb->requests_lock);
999 nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests,
1000 nvgpu_clk_dev, node) {
1001 nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM);
1002 nvgpu_cond_signal_interruptible(&dev->readout_wq);
1003 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1004 nvgpu_list_del(&dev->node);
1005 }
1006 nvgpu_spinlock_release(&arb->requests_lock);
1007
1008 nvgpu_atomic_set(&arb->notification_queue.head,
1009 nvgpu_atomic_read(&arb->notification_queue.tail));
1010 /* notify event for all users */
1011 nvgpu_spinlock_acquire(&arb->users_lock);
1012 nvgpu_list_for_each_entry(dev, &arb->users, nvgpu_clk_dev, link) {
1013 alarms_notified |=
1014 nvgpu_clk_arb_notify(dev, arb->actual, current_alarm);
1015 }
1016 nvgpu_spinlock_release(&arb->users_lock);
1017
1018 /* clear alarms */
1019 nvgpu_clk_arb_clear_global_alarm(g, alarms_notified &
1020 ~EVENT(ALARM_GPU_LOST));
1021}
1022
1023/*
1024 * Process one scheduled work item.
1025 */
1026static void nvgpu_clk_arb_worker_process_item(
1027 struct nvgpu_clk_arb_work_item *work_item)
1028{
1029 nvgpu_log(work_item->arb->g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1030
1031 if (work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE)
1032 nvgpu_clk_arb_run_vf_table_cb(work_item->arb);
1033 else if (work_item->item_type == CLK_ARB_WORK_UPDATE_ARB)
1034 nvgpu_clk_arb_run_arbiter_cb(work_item->arb);
1035}
1036
1037/**
1038 * Tell the worker that one more work needs to be done.
1039 *
1040 * Increase the work counter to synchronize the worker with the new work. Wake
1041 * up the worker. If the worker was already running, it will handle this work
1042 * before going to sleep.
1043 */
1044static int nvgpu_clk_arb_worker_wakeup(struct gk20a *g)
1045{
1046 int put;
1047
1048 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1049
1050 put = nvgpu_atomic_inc_return(&g->clk_arb_worker.put);
1051 nvgpu_cond_signal_interruptible(&g->clk_arb_worker.wq);
1052
1053 return put;
1054}
1055
1056/**
1057 * Test if there is some work pending.
1058 *
1059 * This is a pair for nvgpu_clk_arb_worker_wakeup to be called from the
1060 * worker. The worker has an internal work counter which is incremented once
1061 * per finished work item. This is compared with the number of queued jobs.
1062 */
1063static bool nvgpu_clk_arb_worker_pending(struct gk20a *g, int get)
1064{
1065 bool pending = nvgpu_atomic_read(&g->clk_arb_worker.put) != get;
1066
1067 /* We don't need barriers because they are implicit in locking */
1068 return pending;
1069}
1070
1071/**
1072 * Process the queued works for the worker thread serially.
1073 *
1074 * Flush all the work items in the queue one by one. This may block timeout
1075 * handling for a short while, as these are serialized.
1076 */
1077static void nvgpu_clk_arb_worker_process(struct gk20a *g, int *get)
1078{
1079
1080 while (nvgpu_clk_arb_worker_pending(g, *get)) {
1081 struct nvgpu_clk_arb_work_item *work_item = NULL;
1082
1083 nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
1084 if (!nvgpu_list_empty(&g->clk_arb_worker.items)) {
1085 work_item = nvgpu_list_first_entry(&g->clk_arb_worker.items,
1086 nvgpu_clk_arb_work_item, worker_item);
1087 nvgpu_list_del(&work_item->worker_item);
1088 }
1089 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
1090
1091 if (!work_item) {
1092 /*
1093 * Woke up for some other reason, but there are no
1094 * other reasons than a work item added in the items list
1095 * currently, so warn and ack the message.
1096 */
1097 nvgpu_warn(g, "Spurious worker event!");
1098 ++*get;
1099 break;
1100 }
1101
1102 nvgpu_clk_arb_worker_process_item(work_item);
1103 ++*get;
1104 }
1105}
1106
1107/*
1108 * Process all work items found in the clk arbiter work queue.
1109 */
1110static int nvgpu_clk_arb_poll_worker(void *arg)
1111{
1112 struct gk20a *g = (struct gk20a *)arg;
1113 struct gk20a_worker *worker = &g->clk_arb_worker;
1114 int get = 0;
1115
1116 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1117
1118 while (!nvgpu_thread_should_stop(&worker->poll_task)) {
1119 int ret;
1120
1121 ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
1122 &worker->wq,
1123 nvgpu_clk_arb_worker_pending(g, get), 0);
1124
1125 if (ret == 0)
1126 nvgpu_clk_arb_worker_process(g, &get);
1127 }
1128 return 0;
1129}
1130
1131static int __nvgpu_clk_arb_worker_start(struct gk20a *g)
1132{
1133 char thread_name[64];
1134 int err = 0;
1135
1136 if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task))
1137 return err;
1138
1139 nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
1140
1141 /*
1142 * Mutexes have implicit barriers, so there is no risk of a thread
1143 * having a stale copy of the poll_task variable as the call to
1144 * thread_is_running is volatile
1145 */
1146
1147 if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task)) {
1148 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1149 return err;
1150 }
1151
1152 snprintf(thread_name, sizeof(thread_name),
1153 "nvgpu_clk_arb_poll_%s", g->name);
1154
1155 err = nvgpu_thread_create(&g->clk_arb_worker.poll_task, g,
1156 nvgpu_clk_arb_poll_worker, thread_name);
1157
1158 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1159 return err;
1160}
1161
1162/**
1163 * Append a work item to the worker's list.
1164 *
1165 * This adds work item to the end of the list and wakes the worker
1166 * up immediately. If the work item already existed in the list, it's not added,
1167 * because in that case it has been scheduled already but has not yet been
1168 * processed.
1169 */
1170void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
1171 struct nvgpu_clk_arb_work_item *work_item)
1172{
1173 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1174
1175 /*
1176 * Warn if worker thread cannot run
1177 */
1178 if (WARN_ON(__nvgpu_clk_arb_worker_start(g))) {
1179 nvgpu_warn(g, "clk arb worker cannot run!");
1180 return;
1181 }
1182
1183 nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
1184 if (!nvgpu_list_empty(&work_item->worker_item)) {
1185 /*
1186 * Already queued, so will get processed eventually.
1187 * The worker is probably awake already.
1188 */
1189 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
1190 return;
1191 }
1192 nvgpu_list_add_tail(&work_item->worker_item, &g->clk_arb_worker.items);
1193 nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
1194
1195 nvgpu_clk_arb_worker_wakeup(g);
1196}
1197
1198/**
1199 * Initialize the clk arb worker's metadata and start the background thread.
1200 */
1201int nvgpu_clk_arb_worker_init(struct gk20a *g)
1202{
1203 int err;
1204
1205 nvgpu_atomic_set(&g->clk_arb_worker.put, 0);
1206 nvgpu_cond_init(&g->clk_arb_worker.wq);
1207 nvgpu_init_list_node(&g->clk_arb_worker.items);
1208 nvgpu_spinlock_init(&g->clk_arb_worker.items_lock);
1209 err = nvgpu_mutex_init(&g->clk_arb_worker.start_lock);
1210 if (err)
1211 goto error_check;
1212
1213 err = __nvgpu_clk_arb_worker_start(g);
1214error_check:
1215 if (err) {
1216 nvgpu_err(g, "failed to start clk arb poller thread");
1217 return err;
1218 }
1219 return 0;
1220}
1221
1222int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
1223{
1224 struct nvgpu_clk_arb *arb;
1225 u16 default_mhz;
1226 int err;
1227 int index;
1228 struct nvgpu_clk_vf_table *table;
1229
1230 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1231
1232 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1233 return 0;
1234
1235 arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
1236 if (!arb)
1237 return -ENOMEM;
1238
1239 err = nvgpu_mutex_init(&arb->pstate_lock);
1240 if (err)
1241 goto mutex_fail;
1242 nvgpu_spinlock_init(&arb->sessions_lock);
1243 nvgpu_spinlock_init(&arb->users_lock);
1244 nvgpu_spinlock_init(&arb->requests_lock);
1245
1246 arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1247 if (!arb->mclk_f_points) {
1248 err = -ENOMEM;
1249 goto init_fail;
1250 }
1251
1252 arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
1253 if (!arb->gpc2clk_f_points) {
1254 err = -ENOMEM;
1255 goto init_fail;
1256 }
1257
1258 for (index = 0; index < 2; index++) {
1259 table = &arb->vf_table_pool[index];
1260 table->gpc2clk_num_points = MAX_F_POINTS;
1261 table->mclk_num_points = MAX_F_POINTS;
1262
1263 table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1264 sizeof(struct nvgpu_clk_vf_point));
1265 if (!table->gpc2clk_points) {
1266 err = -ENOMEM;
1267 goto init_fail;
1268 }
1269
1270
1271 table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
1272 sizeof(struct nvgpu_clk_vf_point));
1273 if (!table->mclk_points) {
1274 err = -ENOMEM;
1275 goto init_fail;
1276 }
1277 }
1278
1279 g->clk_arb = arb;
1280 arb->g = g;
1281
1282 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1283 CTRL_CLK_DOMAIN_MCLK, &default_mhz);
1284 if (err < 0) {
1285 err = -EINVAL;
1286 goto init_fail;
1287 }
1288
1289 arb->mclk_default_mhz = default_mhz;
1290
1291 err = g->ops.clk_arb.get_arbiter_clk_default(g,
1292 CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz);
1293 if (err < 0) {
1294 err = -EINVAL;
1295 goto init_fail;
1296 }
1297
1298 arb->gpc2clk_default_mhz = default_mhz;
1299
1300 arb->actual = &arb->actual_pool[0];
1301
1302 nvgpu_atomic_set(&arb->req_nr, 0);
1303
1304 nvgpu_atomic64_set(&arb->alarm_mask, 0);
1305 err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
1306 DEFAULT_EVENT_NUMBER);
1307 if (err < 0)
1308 goto init_fail;
1309
1310 nvgpu_init_list_node(&arb->users);
1311 nvgpu_init_list_node(&arb->sessions);
1312 nvgpu_init_list_node(&arb->requests);
1313
1314 nvgpu_cond_init(&arb->request_wq);
1315
1316 nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item);
1317 nvgpu_init_list_node(&arb->update_arb_work_item.worker_item);
1318 arb->update_vf_table_work_item.arb = arb;
1319 arb->update_arb_work_item.arb = arb;
1320 arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE;
1321 arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB;
1322
1323 err = nvgpu_clk_arb_worker_init(g);
1324 if (err < 0)
1325 goto init_fail;
1326
1327#ifdef CONFIG_DEBUG_FS
1328 arb->debug = &arb->debug_pool[0];
1329
1330 if (!arb->debugfs_set) {
1331 if (nvgpu_clk_arb_debugfs_init(g))
1332 arb->debugfs_set = true;
1333 }
1334#endif
1335 err = clk_vf_point_cache(g);
1336 if (err < 0)
1337 goto init_fail;
1338
1339 err = nvgpu_clk_arb_update_vf_table(arb);
1340 if (err < 0)
1341 goto init_fail;
1342 do {
1343 /* Check that first run is completed */
1344 nvgpu_smp_mb();
1345 NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
1346 nvgpu_atomic_read(&arb->req_nr), 0);
1347 } while (!nvgpu_atomic_read(&arb->req_nr));
1348
1349
1350 return arb->status;
1351
1352init_fail:
1353 nvgpu_kfree(g, arb->gpc2clk_f_points);
1354 nvgpu_kfree(g, arb->mclk_f_points);
1355
1356 for (index = 0; index < 2; index++) {
1357 nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
1358 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1359 }
1360
1361 nvgpu_mutex_destroy(&arb->pstate_lock);
1362
1363mutex_fail:
1364 nvgpu_kfree(g, arb);
1365
1366 return err;
1367}
1368
1369void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
1370{
1371 nvgpu_clk_arb_schedule_alarm(g,
1372 (0x1UL << NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
1373}
1374
1375void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
1376{
1377 struct nvgpu_clk_arb *arb = g->clk_arb;
1378
1379 nvgpu_clk_arb_set_global_alarm(g, alarm);
1380 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
1381}
1382
1383void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
1384{
1385 nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
1386 nvgpu_thread_stop(&g->clk_arb_worker.poll_task);
1387 nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
1388}
1389
1390void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
1391{
1392 struct nvgpu_clk_arb *arb = g->clk_arb;
1393 int index;
1394
1395 if (arb) {
1396 nvgpu_clk_arb_worker_deinit(g);
1397
1398 nvgpu_kfree(g, arb->gpc2clk_f_points);
1399 nvgpu_kfree(g, arb->mclk_f_points);
1400
1401 for (index = 0; index < 2; index++) {
1402 nvgpu_kfree(g,
1403 arb->vf_table_pool[index].gpc2clk_points);
1404 nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
1405 }
1406 nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
1407 nvgpu_kfree(g, g->clk_arb);
1408 g->clk_arb = NULL;
1409 }
1410}
1411
1412int nvgpu_clk_arb_init_session(struct gk20a *g,
1413 struct nvgpu_clk_session **_session)
1414{
1415 struct nvgpu_clk_arb *arb = g->clk_arb;
1416 struct nvgpu_clk_session *session = *(_session);
1417
1418 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1419
1420 if (!g->ops.clk_arb.get_arbiter_clk_domains)
1421 return 0;
1422
1423 session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
1424 if (!session)
1425 return -ENOMEM;
1426 session->g = g;
1427
1428 nvgpu_ref_init(&session->refcount);
1429
1430 session->zombie = false;
1431 session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
1432 /* make sure that the initialization of the pool is visible
1433 * before the update
1434 */
1435 nvgpu_smp_wmb();
1436 session->target = &session->target_pool[0];
1437
1438 nvgpu_init_list_node(&session->targets);
1439 nvgpu_spinlock_init(&session->session_lock);
1440
1441 nvgpu_spinlock_acquire(&arb->sessions_lock);
1442 nvgpu_list_add_tail(&session->link, &arb->sessions);
1443 nvgpu_spinlock_release(&arb->sessions_lock);
1444
1445 *_session = session;
1446
1447 return 0;
1448}
1449
1450void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
1451{
1452 struct nvgpu_clk_dev *dev = container_of(refcount,
1453 struct nvgpu_clk_dev, refcount);
1454 struct nvgpu_clk_session *session = dev->session;
1455
1456 nvgpu_kfree(session->g, dev);
1457}
1458
1459void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
1460{
1461 struct nvgpu_clk_session *session = container_of(refcount,
1462 struct nvgpu_clk_session, refcount);
1463 struct nvgpu_clk_arb *arb = session->g->clk_arb;
1464 struct gk20a *g = session->g;
1465 struct nvgpu_clk_dev *dev, *tmp;
1466
1467 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1468
1469 if (arb) {
1470 nvgpu_spinlock_acquire(&arb->sessions_lock);
1471 nvgpu_list_del(&session->link);
1472 nvgpu_spinlock_release(&arb->sessions_lock);
1473 }
1474
1475 nvgpu_spinlock_acquire(&session->session_lock);
1476 nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets,
1477 nvgpu_clk_dev, node) {
1478 nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
1479 nvgpu_list_del(&dev->node);
1480 }
1481 nvgpu_spinlock_release(&session->session_lock);
1482
1483 nvgpu_kfree(g, session);
1484}
1485
1486void nvgpu_clk_arb_release_session(struct gk20a *g,
1487 struct nvgpu_clk_session *session)
1488{
1489 struct nvgpu_clk_arb *arb = g->clk_arb;
1490
1491 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
1492
1493 session->zombie = true;
1494 nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
1495 if (arb)
1496 nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
1497}
1498
1499void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
1500{
1501 struct nvgpu_clk_arb *arb = g->clk_arb;
1502
1503 nvgpu_clk_arb_worker_enqueue(g, &arb->update_vf_table_work_item);
1504}
1505
1506/* This function is inherently unsafe to call while arbiter is running
1507 * arbiter must be blocked before calling this function
1508 */
1509int nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
1510{
1511 return NV_ACCESS_ONCE(g->clk_arb->actual->pstate);
1512}
1513
1514void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
1515{
1516 struct nvgpu_clk_arb *arb = g->clk_arb;
1517
1518 if (lock)
1519 nvgpu_mutex_acquire(&arb->pstate_lock);
1520 else
1521 nvgpu_mutex_release(&arb->pstate_lock);
1522}
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
deleted file mode 100644
index 464590d5..00000000
--- a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
+++ /dev/null
@@ -1,163 +0,0 @@
1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This software is licensed under the terms of the GNU General Public
5 * License version 2, as published by the Free Software Foundation, and
6 * may be copied, distributed, and modified under those terms.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __NVGPU_CLK_ARB_LINUX_H__
18#define __NVGPU_CLK_ARB_LINUX_H__
19
20#include <nvgpu/types.h>
21#include <nvgpu/bitops.h>
22#include <nvgpu/lock.h>
23#include <nvgpu/kmem.h>
24#include <nvgpu/atomic.h>
25#include <nvgpu/bug.h>
26#include <nvgpu/kref.h>
27#include <nvgpu/log.h>
28#include <nvgpu/barrier.h>
29#include <nvgpu/cond.h>
30#include <nvgpu/list.h>
31
32#include "gk20a/gk20a.h"
33#include "clk/clk.h"
34#include "pstate/pstate.h"
35#include "lpwr/lpwr.h"
36#include "volt/volt.h"
37
38/*
39 * The defines here should finally move to clk_arb.h, once these are
40 * refactored to be free of Linux fields.
41 */
42
43enum clk_arb_work_item_type {
44 CLK_ARB_WORK_UPDATE_VF_TABLE,
45 CLK_ARB_WORK_UPDATE_ARB
46};
47
48struct nvgpu_clk_arb_work_item {
49 enum clk_arb_work_item_type item_type;
50 struct nvgpu_clk_arb *arb;
51 struct nvgpu_list_node worker_item;
52};
53
54struct nvgpu_clk_arb {
55 struct nvgpu_spinlock sessions_lock;
56 struct nvgpu_spinlock users_lock;
57 struct nvgpu_spinlock requests_lock;
58
59 struct nvgpu_mutex pstate_lock;
60 struct nvgpu_list_node users;
61 struct nvgpu_list_node sessions;
62 struct nvgpu_list_node requests;
63
64 struct gk20a *g;
65 int status;
66
67 struct nvgpu_clk_arb_target actual_pool[2];
68 struct nvgpu_clk_arb_target *actual;
69
70 u16 gpc2clk_default_mhz;
71 u16 mclk_default_mhz;
72 u32 voltuv_actual;
73
74 u16 gpc2clk_min, gpc2clk_max;
75 u16 mclk_min, mclk_max;
76
77 struct nvgpu_clk_arb_work_item update_vf_table_work_item;
78 struct nvgpu_clk_arb_work_item update_arb_work_item;
79
80 struct nvgpu_cond request_wq;
81
82 struct nvgpu_clk_vf_table *current_vf_table;
83 struct nvgpu_clk_vf_table vf_table_pool[2];
84 u32 vf_table_index;
85
86 u16 *mclk_f_points;
87 nvgpu_atomic_t req_nr;
88
89 u32 mclk_f_numpoints;
90 u16 *gpc2clk_f_points;
91 u32 gpc2clk_f_numpoints;
92
93 nvgpu_atomic64_t alarm_mask;
94 struct nvgpu_clk_notification_queue notification_queue;
95
96#ifdef CONFIG_DEBUG_FS
97 struct nvgpu_clk_arb_debug debug_pool[2];
98 struct nvgpu_clk_arb_debug *debug;
99 bool debugfs_set;
100#endif
101};
102
103struct nvgpu_clk_dev {
104 struct nvgpu_clk_session *session;
105 union {
106 struct nvgpu_list_node link;
107 struct nvgpu_list_node node;
108 };
109 struct nvgpu_cond readout_wq;
110 nvgpu_atomic_t poll_mask;
111 u16 gpc2clk_target_mhz;
112 u16 mclk_target_mhz;
113 u32 alarms_reported;
114 nvgpu_atomic_t enabled_mask;
115 struct nvgpu_clk_notification_queue queue;
116 u32 arb_queue_head;
117 struct nvgpu_ref refcount;
118};
119
120struct nvgpu_clk_session {
121 bool zombie;
122 struct gk20a *g;
123 struct nvgpu_ref refcount;
124 struct nvgpu_list_node link;
125 struct nvgpu_list_node targets;
126
127 struct nvgpu_spinlock session_lock;
128 struct nvgpu_clk_arb_target target_pool[2];
129 struct nvgpu_clk_arb_target *target;
130};
131
132static inline struct nvgpu_clk_session *
133nvgpu_clk_session_from_link(struct nvgpu_list_node *node)
134{
135 return (struct nvgpu_clk_session *)
136 ((uintptr_t)node - offsetof(struct nvgpu_clk_session, link));
137};
138
139static inline struct nvgpu_clk_dev *
140nvgpu_clk_dev_from_node(struct nvgpu_list_node *node)
141{
142 return (struct nvgpu_clk_dev *)
143 ((uintptr_t)node - offsetof(struct nvgpu_clk_dev, node));
144};
145
146static inline struct nvgpu_clk_dev *
147nvgpu_clk_dev_from_link(struct nvgpu_list_node *node)
148{
149 return (struct nvgpu_clk_dev *)
150 ((uintptr_t)node - offsetof(struct nvgpu_clk_dev, link));
151};
152
153static inline struct nvgpu_clk_arb_work_item *
154nvgpu_clk_arb_work_item_from_worker_item(struct nvgpu_list_node *node)
155{
156 return (struct nvgpu_clk_arb_work_item *)
157 ((uintptr_t)node - offsetof(struct nvgpu_clk_arb_work_item, worker_item));
158};
159
160void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
161 struct nvgpu_clk_arb_work_item *work_item);
162#endif /* __NVGPU_CLK_ARB_LINUX_H__ */
163
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
index 3ab8cf9e..e4ac68a3 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
@@ -38,7 +38,6 @@
38 38
39#include "gk20a/gk20a.h" 39#include "gk20a/gk20a.h"
40#include "clk/clk.h" 40#include "clk/clk.h"
41#include "clk_arb_linux.h"
42#include "pstate/pstate.h" 41#include "pstate/pstate.h"
43#include "lpwr/lpwr.h" 42#include "lpwr/lpwr.h"
44#include "volt/volt.h" 43#include "volt/volt.h"