aboutsummaryrefslogtreecommitdiffstats
path: root/include/os/linux/scale.c
diff options
context:
space:
mode:
Diffstat (limited to 'include/os/linux/scale.c')
-rw-r--r--include/os/linux/scale.c435
1 files changed, 435 insertions, 0 deletions
diff --git a/include/os/linux/scale.c b/include/os/linux/scale.c
new file mode 100644
index 0000000..388e168
--- /dev/null
+++ b/include/os/linux/scale.c
@@ -0,0 +1,435 @@
1/*
2 * gk20a clock scaling profile
3 *
4 * Copyright (c) 2013-2020, NVIDIA Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <linux/devfreq.h>
20#include <linux/export.h>
21#include <soc/tegra/chip-id.h>
22#include <linux/pm_qos.h>
23
24#include <governor.h>
25
26#include <nvgpu/kmem.h>
27#include <nvgpu/log.h>
28#include <nvgpu/gk20a.h>
29#include <nvgpu/clk_arb.h>
30
31#include "platform_gk20a.h"
32#include "scale.h"
33#include "os_linux.h"
34
35/*
36 * gk20a_scale_qos_notify()
37 *
38 * This function is called when the minimum QoS requirement for the device
39 * has changed. The function calls postscaling callback if it is defined.
40 */
41
42#if defined(CONFIG_GK20A_PM_QOS) && defined(CONFIG_COMMON_CLK)
43int gk20a_scale_qos_notify(struct notifier_block *nb,
44 unsigned long n, void *p)
45{
46 struct gk20a_scale_profile *profile =
47 container_of(nb, struct gk20a_scale_profile,
48 qos_notify_block);
49 struct gk20a *g = get_gk20a(profile->dev);
50 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
51 struct devfreq *devfreq = l->devfreq;
52
53 if (!devfreq)
54 return NOTIFY_OK;
55
56 mutex_lock(&devfreq->lock);
57 /* check for pm_qos min and max frequency requirement */
58 profile->qos_min_freq =
59 (unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
60 profile->qos_max_freq =
61 (unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
62
63 if (profile->qos_min_freq > profile->qos_max_freq) {
64 nvgpu_err(g,
65 "QoS: setting invalid limit, min_freq=%lu max_freq=%lu",
66 profile->qos_min_freq, profile->qos_max_freq);
67 profile->qos_min_freq = profile->qos_max_freq;
68 }
69
70 update_devfreq(devfreq);
71 mutex_unlock(&devfreq->lock);
72
73 return NOTIFY_OK;
74}
75#elif defined(CONFIG_GK20A_PM_QOS)
76int gk20a_scale_qos_notify(struct notifier_block *nb,
77 unsigned long n, void *p)
78{
79 struct gk20a_scale_profile *profile =
80 container_of(nb, struct gk20a_scale_profile,
81 qos_notify_block);
82 struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
83 struct gk20a *g = get_gk20a(profile->dev);
84 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
85 unsigned long freq;
86
87 if (!platform->postscale)
88 return NOTIFY_OK;
89
90 /* get the frequency requirement. if devfreq is enabled, check if it
91 * has higher demand than qos */
92 freq = platform->clk_round_rate(profile->dev,
93 (u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS));
94 if (l->devfreq)
95 freq = max(l->devfreq->previous_freq, freq);
96
97 /* Update gpu load because we may scale the emc target
98 * if the gpu load changed. */
99 nvgpu_pmu_load_update(g);
100 platform->postscale(profile->dev, freq);
101
102 return NOTIFY_OK;
103}
104#else
105int gk20a_scale_qos_notify(struct notifier_block *nb,
106 unsigned long n, void *p)
107{
108 return 0;
109}
110#endif
111
112/*
113 * gk20a_scale_make_freq_table(profile)
114 *
115 * This function initialises the frequency table for the given device profile
116 */
117
118static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
119{
120 struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
121 int num_freqs, err;
122 unsigned long *freqs;
123
124 if (platform->get_clk_freqs) {
125 /* get gpu frequency table */
126 err = platform->get_clk_freqs(profile->dev, &freqs,
127 &num_freqs);
128
129 if (err)
130 return -ENOSYS;
131 } else
132 return -ENOSYS;
133
134 profile->devfreq_profile.freq_table = (unsigned long *)freqs;
135 profile->devfreq_profile.max_state = num_freqs;
136
137 return 0;
138}
139
140/*
141 * gk20a_scale_target(dev, *freq, flags)
142 *
143 * This function scales the clock
144 */
145
146static int gk20a_scale_target(struct device *dev, unsigned long *freq,
147 u32 flags)
148{
149 struct gk20a_platform *platform = dev_get_drvdata(dev);
150 struct gk20a *g = platform->g;
151 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
152 struct gk20a_scale_profile *profile = g->scale_profile;
153 struct devfreq *devfreq = l->devfreq;
154 unsigned long local_freq = *freq;
155 unsigned long rounded_rate;
156 unsigned long min_freq = 0, max_freq = 0;
157
158 if (nvgpu_clk_arb_has_active_req(g))
159 return 0;
160 /*
161 * Calculate floor and cap frequency values
162 *
163 * Policy :
164 * We have two APIs to clip the frequency
165 * 1. devfreq
166 * 2. pm_qos
167 *
168 * To calculate floor (min) freq, we select MAX of floor frequencies
169 * requested from both APIs
170 * To get cap (max) freq, we select MIN of max frequencies
171 *
172 * In case we have conflict (min_freq > max_freq) after above
173 * steps, we ensure that max_freq wins over min_freq
174 */
175 min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq);
176 max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq);
177
178 if (min_freq > max_freq)
179 min_freq = max_freq;
180
181 /* Clip requested frequency */
182 if (local_freq < min_freq)
183 local_freq = min_freq;
184
185 if (local_freq > max_freq)
186 local_freq = max_freq;
187
188 /* set the final frequency */
189 rounded_rate = platform->clk_round_rate(dev, local_freq);
190
191 /* Check for duplicate request */
192 if (rounded_rate == g->last_freq)
193 return 0;
194
195 if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate)
196 *freq = rounded_rate;
197 else {
198 g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
199 *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
200 }
201
202 g->last_freq = *freq;
203
204 /* postscale will only scale emc (dram clock) if evaluating
205 * gk20a_tegra_get_emc_rate() produces a new or different emc
206 * target because the load or_and gpufreq has changed */
207 if (platform->postscale)
208 platform->postscale(dev, rounded_rate);
209
210 return 0;
211}
212
213/*
214 * update_load_estimate_busy_cycles(dev)
215 *
216 * Update load estimate using pmu idle counters. Result is normalised
217 * based on the time it was asked last time.
218 */
219
220static void update_load_estimate_busy_cycles(struct device *dev)
221{
222 struct gk20a *g = get_gk20a(dev);
223 struct gk20a_scale_profile *profile = g->scale_profile;
224 unsigned long dt;
225 u32 busy_cycles_norm;
226 ktime_t t;
227
228 t = ktime_get();
229 dt = ktime_us_delta(t, profile->last_event_time);
230
231 profile->dev_stat.total_time = dt;
232 profile->last_event_time = t;
233 nvgpu_pmu_busy_cycles_norm(g, &busy_cycles_norm);
234 profile->dev_stat.busy_time =
235 (busy_cycles_norm * dt) / PMU_BUSY_CYCLES_NORM_MAX;
236}
237
238/*
239 * gk20a_scale_suspend(dev)
240 *
241 * This function informs devfreq of suspend
242 */
243
244void gk20a_scale_suspend(struct device *dev)
245{
246 struct gk20a *g = get_gk20a(dev);
247 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
248 struct devfreq *devfreq = l->devfreq;
249
250 if (!devfreq)
251 return;
252
253 devfreq_suspend_device(devfreq);
254}
255
256/*
257 * gk20a_scale_resume(dev)
258 *
259 * This functions informs devfreq of resume
260 */
261
262void gk20a_scale_resume(struct device *dev)
263{
264 struct gk20a *g = get_gk20a(dev);
265 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
266 struct devfreq *devfreq = l->devfreq;
267
268 if (!devfreq)
269 return;
270
271 g->last_freq = 0;
272 devfreq_resume_device(devfreq);
273}
274
275/*
276 * gk20a_scale_get_dev_status(dev, *stat)
277 *
278 * This function queries the current device status.
279 */
280
281static int gk20a_scale_get_dev_status(struct device *dev,
282 struct devfreq_dev_status *stat)
283{
284 struct gk20a *g = get_gk20a(dev);
285 struct gk20a_scale_profile *profile = g->scale_profile;
286 struct gk20a_platform *platform = dev_get_drvdata(dev);
287
288 /* inform edp about new constraint */
289 if (platform->prescale)
290 platform->prescale(dev);
291
292 /* Make sure there are correct values for the current frequency */
293 profile->dev_stat.current_frequency =
294 g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
295
296 /* Update load estimate */
297 update_load_estimate_busy_cycles(dev);
298
299 /* Copy the contents of the current device status */
300 *stat = profile->dev_stat;
301
302 /* Finally, clear out the local values */
303 profile->dev_stat.total_time = 0;
304 profile->dev_stat.busy_time = 0;
305
306 return 0;
307}
308
309/*
310 * get_cur_freq(struct device *dev, unsigned long *freq)
311 *
312 * This function gets the current GPU clock rate.
313 */
314
315static int get_cur_freq(struct device *dev, unsigned long *freq)
316{
317 struct gk20a *g = get_gk20a(dev);
318 *freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
319 return 0;
320}
321
322
323/*
324 * gk20a_scale_init(dev)
325 */
326
327void gk20a_scale_init(struct device *dev)
328{
329 struct gk20a_platform *platform = dev_get_drvdata(dev);
330 struct gk20a *g = platform->g;
331 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
332 struct gk20a_scale_profile *profile;
333 int err;
334
335 if (g->scale_profile)
336 return;
337
338 if (!platform->devfreq_governor && !platform->qos_notify)
339 return;
340
341 profile = nvgpu_kzalloc(g, sizeof(*profile));
342 if (!profile)
343 return;
344
345 profile->dev = dev;
346 profile->dev_stat.busy = false;
347
348 /* Create frequency table */
349 err = gk20a_scale_make_freq_table(profile);
350 if (err || !profile->devfreq_profile.max_state)
351 goto err_get_freqs;
352
353 profile->qos_min_freq = 0;
354 profile->qos_max_freq = UINT_MAX;
355
356 /* Store device profile so we can access it if devfreq governor
357 * init needs that */
358 g->scale_profile = profile;
359
360 if (platform->devfreq_governor) {
361 struct devfreq *devfreq;
362
363 profile->devfreq_profile.initial_freq =
364 profile->devfreq_profile.freq_table[0];
365 profile->devfreq_profile.target = gk20a_scale_target;
366 profile->devfreq_profile.get_dev_status =
367 gk20a_scale_get_dev_status;
368 profile->devfreq_profile.get_cur_freq = get_cur_freq;
369 profile->devfreq_profile.polling_ms = 25;
370
371 devfreq = devm_devfreq_add_device(dev,
372 &profile->devfreq_profile,
373 platform->devfreq_governor, NULL);
374
375 if (IS_ERR_OR_NULL(devfreq))
376 devfreq = NULL;
377
378 l->devfreq = devfreq;
379 }
380
381#ifdef CONFIG_GK20A_PM_QOS
382 /* Should we register QoS callback for this device? */
383 if (platform->qos_notify) {
384 profile->qos_notify_block.notifier_call =
385 platform->qos_notify;
386
387 pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
388 &profile->qos_notify_block);
389 pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
390 &profile->qos_notify_block);
391 }
392#endif
393
394 return;
395
396err_get_freqs:
397 nvgpu_kfree(g, profile);
398}
399
400void gk20a_scale_exit(struct device *dev)
401{
402 struct gk20a_platform *platform = dev_get_drvdata(dev);
403 struct gk20a *g = platform->g;
404
405#ifdef CONFIG_GK20A_PM_QOS
406 if (platform->qos_notify) {
407 pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
408 &g->scale_profile->qos_notify_block);
409 pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
410 &g->scale_profile->qos_notify_block);
411 }
412#endif
413
414 nvgpu_kfree(g, g->scale_profile);
415 g->scale_profile = NULL;
416}
417
418/*
419 * gk20a_scale_hw_init(dev)
420 *
421 * Initialize hardware portion of the device
422 */
423
424void gk20a_scale_hw_init(struct device *dev)
425{
426 struct gk20a_platform *platform = dev_get_drvdata(dev);
427 struct gk20a_scale_profile *profile = platform->g->scale_profile;
428
429 /* make sure that scaling has bee initialised */
430 if (!profile)
431 return;
432
433 profile->dev_stat.total_time = 0;
434 profile->last_event_time = ktime_get();
435}