diff options
author | Ben Skeggs <bskeggs@redhat.com> | 2011-10-28 10:22:49 -0400 |
---|---|---|
committer | Ben Skeggs <bskeggs@redhat.com> | 2011-12-21 04:01:46 -0500 |
commit | 045da4e55581d9b4de135bbdbdd1b7fa98dc18a9 (patch) | |
tree | 5f2907413a0a4e37ff8cf71fbcb7c51c8874f018 /drivers/gpu/drm/nouveau | |
parent | 52c4d767437b40b0cbc02d6a4480abb45ace64bb (diff) |
drm/nvc0/pm: initial engine reclocking
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/nouveau')
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_pm.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_state.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nvc0_pm.c | 237 |
3 files changed, 243 insertions, 0 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.h b/drivers/gpu/drm/nouveau/nouveau_pm.h index 7e0cc2eeb307..2f8e14fbcff8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_pm.h +++ b/drivers/gpu/drm/nouveau/nouveau_pm.h | |||
@@ -72,6 +72,8 @@ int nva3_pm_clocks_set(struct drm_device *, void *); | |||
72 | 72 | ||
73 | /* nvc0_pm.c */ | 73 | /* nvc0_pm.c */ |
74 | int nvc0_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *); | 74 | int nvc0_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *); |
75 | void *nvc0_pm_clocks_pre(struct drm_device *, struct nouveau_pm_level *); | ||
76 | int nvc0_pm_clocks_set(struct drm_device *, void *); | ||
75 | 77 | ||
76 | /* nouveau_temp.c */ | 78 | /* nouveau_temp.c */ |
77 | void nouveau_temp_init(struct drm_device *dev); | 79 | void nouveau_temp_init(struct drm_device *dev); |
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index 57ccda47a70b..f5e98910d17f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c | |||
@@ -417,6 +417,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) | |||
417 | engine->vram.flags_valid = nvc0_vram_flags_valid; | 417 | engine->vram.flags_valid = nvc0_vram_flags_valid; |
418 | engine->pm.temp_get = nv84_temp_get; | 418 | engine->pm.temp_get = nv84_temp_get; |
419 | engine->pm.clocks_get = nvc0_pm_clocks_get; | 419 | engine->pm.clocks_get = nvc0_pm_clocks_get; |
420 | engine->pm.clocks_pre = nvc0_pm_clocks_pre; | ||
421 | engine->pm.clocks_set = nvc0_pm_clocks_set; | ||
420 | engine->pm.voltage_get = nouveau_voltage_gpio_get; | 422 | engine->pm.voltage_get = nouveau_voltage_gpio_get; |
421 | engine->pm.voltage_set = nouveau_voltage_gpio_set; | 423 | engine->pm.voltage_set = nouveau_voltage_gpio_set; |
422 | engine->pm.pwm_get = nv50_pm_pwm_get; | 424 | engine->pm.pwm_get = nv50_pm_pwm_get; |
@@ -468,6 +470,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) | |||
468 | engine->vram.flags_valid = nvc0_vram_flags_valid; | 470 | engine->vram.flags_valid = nvc0_vram_flags_valid; |
469 | engine->pm.temp_get = nv84_temp_get; | 471 | engine->pm.temp_get = nv84_temp_get; |
470 | engine->pm.clocks_get = nvc0_pm_clocks_get; | 472 | engine->pm.clocks_get = nvc0_pm_clocks_get; |
473 | engine->pm.clocks_pre = nvc0_pm_clocks_pre; | ||
474 | engine->pm.clocks_set = nvc0_pm_clocks_set; | ||
471 | engine->pm.voltage_get = nouveau_voltage_gpio_get; | 475 | engine->pm.voltage_get = nouveau_voltage_gpio_get; |
472 | engine->pm.voltage_set = nouveau_voltage_gpio_set; | 476 | engine->pm.voltage_set = nouveau_voltage_gpio_set; |
473 | break; | 477 | break; |
diff --git a/drivers/gpu/drm/nouveau/nvc0_pm.c b/drivers/gpu/drm/nouveau/nvc0_pm.c index 929aded35cb5..e9992f62c1c0 100644 --- a/drivers/gpu/drm/nouveau/nvc0_pm.c +++ b/drivers/gpu/drm/nouveau/nvc0_pm.c | |||
@@ -153,3 +153,240 @@ nvc0_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl) | |||
153 | perflvl->vdec = read_clk(dev, 0x0e); | 153 | perflvl->vdec = read_clk(dev, 0x0e); |
154 | return 0; | 154 | return 0; |
155 | } | 155 | } |
156 | |||
157 | struct nvc0_pm_clock { | ||
158 | u32 freq; | ||
159 | u32 ssel; | ||
160 | u32 mdiv; | ||
161 | u32 dsrc; | ||
162 | u32 ddiv; | ||
163 | u32 coef; | ||
164 | }; | ||
165 | |||
166 | struct nvc0_pm_state { | ||
167 | struct nvc0_pm_clock eng[16]; | ||
168 | }; | ||
169 | |||
170 | static u32 | ||
171 | calc_div(struct drm_device *dev, int clk, u32 ref, u32 freq, u32 *ddiv) | ||
172 | { | ||
173 | u32 div = min((ref * 2) / freq, (u32)65); | ||
174 | if (div < 2) | ||
175 | div = 2; | ||
176 | |||
177 | *ddiv = div - 2; | ||
178 | return (ref * 2) / div; | ||
179 | } | ||
180 | |||
181 | static u32 | ||
182 | calc_src(struct drm_device *dev, int clk, u32 freq, u32 *dsrc, u32 *ddiv) | ||
183 | { | ||
184 | u32 sclk; | ||
185 | |||
186 | /* use one of the fixed frequencies if possible */ | ||
187 | *ddiv = 0x00000000; | ||
188 | switch (freq) { | ||
189 | case 27000: | ||
190 | case 108000: | ||
191 | *dsrc = 0x00000000; | ||
192 | if (freq == 108000) | ||
193 | *dsrc |= 0x00030000; | ||
194 | return freq; | ||
195 | case 100000: | ||
196 | *dsrc = 0x00000002; | ||
197 | return freq; | ||
198 | default: | ||
199 | *dsrc = 0x00000003; | ||
200 | break; | ||
201 | } | ||
202 | |||
203 | /* otherwise, calculate the closest divider */ | ||
204 | sclk = read_vco(dev, clk); | ||
205 | if (clk < 7) | ||
206 | sclk = calc_div(dev, clk, sclk, freq, ddiv); | ||
207 | return sclk; | ||
208 | } | ||
209 | |||
210 | static u32 | ||
211 | calc_pll(struct drm_device *dev, int clk, u32 freq, u32 *coef) | ||
212 | { | ||
213 | struct pll_lims limits; | ||
214 | int N, M, P, ret; | ||
215 | |||
216 | ret = get_pll_limits(dev, 0x137000 + (clk * 0x20), &limits); | ||
217 | if (ret) | ||
218 | return 0; | ||
219 | |||
220 | limits.refclk = read_div(dev, clk, 0x137120, 0x137140); | ||
221 | if (!limits.refclk) | ||
222 | return 0; | ||
223 | |||
224 | ret = nva3_calc_pll(dev, &limits, freq, &N, NULL, &M, &P); | ||
225 | if (ret <= 0) | ||
226 | return 0; | ||
227 | |||
228 | *coef = (P << 16) | (N << 8) | M; | ||
229 | return ret; | ||
230 | } | ||
231 | |||
232 | /* A (likely rather simplified and incomplete) view of the clock tree | ||
233 | * | ||
234 | * Key: | ||
235 | * | ||
236 | * S: source select | ||
237 | * D: divider | ||
238 | * P: pll | ||
239 | * F: switch | ||
240 | * | ||
241 | * Engine clocks: | ||
242 | * | ||
243 | * 137250(D) ---- 137100(F0) ---- 137160(S)/1371d0(D) ------------------- ref | ||
244 | * (F1) ---- 1370X0(P) ---- 137120(S)/137140(D) ---- ref | ||
245 | * | ||
246 | * Not all registers exist for all clocks. For example: clocks >= 8 don't | ||
247 | * have their own PLL (all tied to clock 7's PLL when in PLL mode), nor do | ||
248 | * they have the divider at 1371d0, though the source selection at 137160 | ||
249 | * still exists. You must use the divider at 137250 for these instead. | ||
250 | * | ||
251 | * Memory clock: | ||
252 | * | ||
253 | * TBD, read_mem() above is likely very wrong... | ||
254 | * | ||
255 | */ | ||
256 | |||
257 | static int | ||
258 | calc_clk(struct drm_device *dev, int clk, struct nvc0_pm_clock *info, u32 freq) | ||
259 | { | ||
260 | u32 src0, div0, div1D, div1P = 0; | ||
261 | u32 clk0, clk1 = 0; | ||
262 | |||
263 | /* invalid clock domain */ | ||
264 | if (!freq) | ||
265 | return 0; | ||
266 | |||
267 | /* first possible path, using only dividers */ | ||
268 | clk0 = calc_src(dev, clk, freq, &src0, &div0); | ||
269 | clk0 = calc_div(dev, clk, clk0, freq, &div1D); | ||
270 | |||
271 | /* see if we can get any closer using PLLs */ | ||
272 | if (clk0 != freq) { | ||
273 | if (clk < 7) | ||
274 | clk1 = calc_pll(dev, clk, freq, &info->coef); | ||
275 | else | ||
276 | clk1 = read_pll(dev, 0x1370e0); | ||
277 | clk1 = calc_div(dev, clk, clk1, freq, &div1P); | ||
278 | } | ||
279 | |||
280 | /* select the method which gets closest to target freq */ | ||
281 | if (abs((int)freq - clk0) <= abs((int)freq - clk1)) { | ||
282 | info->dsrc = src0; | ||
283 | if (div0) { | ||
284 | info->ddiv |= 0x80000000; | ||
285 | info->ddiv |= div0 << 8; | ||
286 | info->ddiv |= div0; | ||
287 | } | ||
288 | if (div1D) { | ||
289 | info->mdiv |= 0x80000000; | ||
290 | info->mdiv |= div1D; | ||
291 | } | ||
292 | info->ssel = 0; | ||
293 | info->freq = clk0; | ||
294 | } else { | ||
295 | if (div1P) { | ||
296 | info->mdiv |= 0x80000000; | ||
297 | info->mdiv |= div1P << 8; | ||
298 | } | ||
299 | info->ssel = (1 << clk); | ||
300 | info->freq = clk1; | ||
301 | } | ||
302 | |||
303 | return 0; | ||
304 | } | ||
305 | |||
306 | void * | ||
307 | nvc0_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) | ||
308 | { | ||
309 | struct drm_nouveau_private *dev_priv = dev->dev_private; | ||
310 | struct nvc0_pm_state *info; | ||
311 | int ret; | ||
312 | |||
313 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
314 | if (!info) | ||
315 | return ERR_PTR(-ENOMEM); | ||
316 | |||
317 | /* NFI why this is still in the performance table, the ROPCs appear | ||
318 | * to get their clock from clock 2 ("hub07", actually hub05 on this | ||
319 | * chip, but, anyway...) as well. nvatiming confirms hub05 and ROP | ||
320 | * are always the same freq with the binary driver even when the | ||
321 | * performance table says they should differ. | ||
322 | */ | ||
323 | if (dev_priv->chipset == 0xd9) | ||
324 | perflvl->rop = 0; | ||
325 | |||
326 | if ((ret = calc_clk(dev, 0x00, &info->eng[0x00], perflvl->shader)) || | ||
327 | (ret = calc_clk(dev, 0x01, &info->eng[0x01], perflvl->rop)) || | ||
328 | (ret = calc_clk(dev, 0x02, &info->eng[0x02], perflvl->hub07)) || | ||
329 | (ret = calc_clk(dev, 0x07, &info->eng[0x07], perflvl->hub06)) || | ||
330 | (ret = calc_clk(dev, 0x08, &info->eng[0x08], perflvl->hub01)) || | ||
331 | (ret = calc_clk(dev, 0x09, &info->eng[0x09], perflvl->copy)) || | ||
332 | (ret = calc_clk(dev, 0x0c, &info->eng[0x0c], perflvl->daemon)) || | ||
333 | (ret = calc_clk(dev, 0x0e, &info->eng[0x0e], perflvl->vdec))) { | ||
334 | kfree(info); | ||
335 | return ERR_PTR(ret); | ||
336 | } | ||
337 | |||
338 | return info; | ||
339 | } | ||
340 | |||
341 | static void | ||
342 | prog_clk(struct drm_device *dev, int clk, struct nvc0_pm_clock *info) | ||
343 | { | ||
344 | /* program dividers at 137160/1371d0 first */ | ||
345 | if (clk < 7 && !info->ssel) { | ||
346 | nv_mask(dev, 0x1371d0 + (clk * 0x04), 0x80003f3f, info->ddiv); | ||
347 | nv_wr32(dev, 0x137160 + (clk * 0x04), info->dsrc); | ||
348 | } | ||
349 | |||
350 | /* switch clock to non-pll mode */ | ||
351 | nv_mask(dev, 0x137100, (1 << clk), 0x00000000); | ||
352 | nv_wait(dev, 0x137100, (1 << clk), 0x00000000); | ||
353 | |||
354 | /* reprogram pll */ | ||
355 | if (clk < 7) { | ||
356 | /* make sure it's disabled first... */ | ||
357 | u32 base = 0x137000 + (clk * 0x20); | ||
358 | u32 ctrl = nv_rd32(dev, base + 0x00); | ||
359 | if (ctrl & 0x00000001) { | ||
360 | nv_mask(dev, base + 0x00, 0x00000004, 0x00000000); | ||
361 | nv_mask(dev, base + 0x00, 0x00000001, 0x00000000); | ||
362 | } | ||
363 | /* program it to new values, if necessary */ | ||
364 | if (info->ssel) { | ||
365 | nv_wr32(dev, base + 0x04, info->coef); | ||
366 | nv_mask(dev, base + 0x00, 0x00000001, 0x00000001); | ||
367 | nv_wait(dev, base + 0x00, 0x00020000, 0x00020000); | ||
368 | nv_mask(dev, base + 0x00, 0x00020004, 0x00000004); | ||
369 | } | ||
370 | } | ||
371 | |||
372 | /* select pll/non-pll mode, and program final clock divider */ | ||
373 | nv_mask(dev, 0x137100, (1 << clk), info->ssel); | ||
374 | nv_wait(dev, 0x137100, (1 << clk), info->ssel); | ||
375 | nv_mask(dev, 0x137250 + (clk * 0x04), 0x00003f3f, info->mdiv); | ||
376 | } | ||
377 | |||
378 | int | ||
379 | nvc0_pm_clocks_set(struct drm_device *dev, void *data) | ||
380 | { | ||
381 | struct nvc0_pm_state *info = data; | ||
382 | int i; | ||
383 | |||
384 | for (i = 0; i < 16; i++) { | ||
385 | if (!info->eng[i].freq) | ||
386 | continue; | ||
387 | prog_clk(dev, i, &info->eng[i]); | ||
388 | } | ||
389 | |||
390 | kfree(info); | ||
391 | return 0; | ||
392 | } | ||