aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r100.c
diff options
context:
space:
mode:
authorJerome Glisse <jglisse@redhat.com>2009-06-05 08:42:42 -0400
committerDave Airlie <airlied@redhat.com>2009-06-14 22:01:53 -0400
commit771fe6b912fca54f03e8a72eb63058b582775362 (patch)
tree58aa5469ba8058c2b564d50807395ad6cd7bd7e4 /drivers/gpu/drm/radeon/r100.c
parentba4e7d973dd09b66912ac4c0856add8b0703a997 (diff)
drm/radeon: introduce kernel modesetting for radeon hardware
Add kernel modesetting support to radeon driver, use the ttm memory manager to manage memory and DRM/GEM to provide userspace API. In order to avoid backward compatibility issue and to allow clean design and code the radeon kernel modesetting use different code path than old radeon/drm driver. When kernel modesetting is enabled the IOCTL of radeon/drm driver are considered as invalid and an error message is printed in the log and they return failure. KMS enabled userspace will use new API to talk with the radeon/drm driver. The new API provide functions to create/destroy/share/mmap buffer object which are then managed by the kernel memory manager (here TTM). In order to submit command to the GPU the userspace provide a buffer holding the command stream, along this buffer userspace have to provide a list of buffer object used by the command stream. The kernel radeon driver will then place buffer in GPU accessible memory and will update command stream to reflect the position of the different buffers. The kernel will also perform security check on command stream provided by the user, we want to catch and forbid any illegal use of the GPU such as DMA into random system memory or into memory not owned by the process supplying the command stream. This part of the code is still incomplete and this why we propose that patch as a staging driver addition, future security might forbid current experimental userspace to run. This code support the following hardware : R1XX,R2XX,R3XX,R4XX,R5XX (radeon up to X1950). Works is underway to provide support for R6XX, R7XX and newer hardware (radeon from HD2XXX to HD4XXX). Authors: Jerome Glisse <jglisse@redhat.com> Dave Airlie <airlied@redhat.com> Alex Deucher <alexdeucher@gmail.com> Signed-off-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com> Signed-off-by: Alex Deucher <alexdeucher@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r100.c')
-rw-r--r--drivers/gpu/drm/radeon/r100.c1524
1 files changed, 1524 insertions, 0 deletions
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
new file mode 100644
index 000000000000..5225f5be7ea7
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -0,0 +1,1524 @@
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
28#include <linux/seq_file.h>
29#include "drmP.h"
30#include "drm.h"
31#include "radeon_drm.h"
32#include "radeon_microcode.h"
33#include "radeon_reg.h"
34#include "radeon.h"
35
36/* This files gather functions specifics to:
37 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
38 *
39 * Some of these functions might be used by newer ASICs.
40 */
41void r100_hdp_reset(struct radeon_device *rdev);
42void r100_gpu_init(struct radeon_device *rdev);
43int r100_gui_wait_for_idle(struct radeon_device *rdev);
44int r100_mc_wait_for_idle(struct radeon_device *rdev);
45void r100_gpu_wait_for_vsync(struct radeon_device *rdev);
46void r100_gpu_wait_for_vsync2(struct radeon_device *rdev);
47int r100_debugfs_mc_info_init(struct radeon_device *rdev);
48
49
50/*
51 * PCI GART
52 */
53void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
54{
55 /* TODO: can we do somethings here ? */
56 /* It seems hw only cache one entry so we should discard this
57 * entry otherwise if first GPU GART read hit this entry it
58 * could end up in wrong address. */
59}
60
61int r100_pci_gart_enable(struct radeon_device *rdev)
62{
63 uint32_t tmp;
64 int r;
65
66 /* Initialize common gart structure */
67 r = radeon_gart_init(rdev);
68 if (r) {
69 return r;
70 }
71 if (rdev->gart.table.ram.ptr == NULL) {
72 rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
73 r = radeon_gart_table_ram_alloc(rdev);
74 if (r) {
75 return r;
76 }
77 }
78 /* discard memory request outside of configured range */
79 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
80 WREG32(RADEON_AIC_CNTL, tmp);
81 /* set address range for PCI address translate */
82 WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location);
83 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
84 WREG32(RADEON_AIC_HI_ADDR, tmp);
85 /* Enable bus mastering */
86 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
87 WREG32(RADEON_BUS_CNTL, tmp);
88 /* set PCI GART page-table base address */
89 WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
90 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
91 WREG32(RADEON_AIC_CNTL, tmp);
92 r100_pci_gart_tlb_flush(rdev);
93 rdev->gart.ready = true;
94 return 0;
95}
96
97void r100_pci_gart_disable(struct radeon_device *rdev)
98{
99 uint32_t tmp;
100
101 /* discard memory request outside of configured range */
102 tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
103 WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
104 WREG32(RADEON_AIC_LO_ADDR, 0);
105 WREG32(RADEON_AIC_HI_ADDR, 0);
106}
107
108int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
109{
110 if (i < 0 || i > rdev->gart.num_gpu_pages) {
111 return -EINVAL;
112 }
113 rdev->gart.table.ram.ptr[i] = cpu_to_le32((uint32_t)addr);
114 return 0;
115}
116
117int r100_gart_enable(struct radeon_device *rdev)
118{
119 if (rdev->flags & RADEON_IS_AGP) {
120 r100_pci_gart_disable(rdev);
121 return 0;
122 }
123 return r100_pci_gart_enable(rdev);
124}
125
126
127/*
128 * MC
129 */
130void r100_mc_disable_clients(struct radeon_device *rdev)
131{
132 uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl;
133
134 /* FIXME: is this function correct for rs100,rs200,rs300 ? */
135 if (r100_gui_wait_for_idle(rdev)) {
136 printk(KERN_WARNING "Failed to wait GUI idle while "
137 "programming pipes. Bad things might happen.\n");
138 }
139
140 /* stop display and memory access */
141 ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL);
142 WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE);
143 crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
144 WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS);
145 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
146
147 r100_gpu_wait_for_vsync(rdev);
148
149 WREG32(RADEON_CRTC_GEN_CNTL,
150 (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) |
151 RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN);
152
153 if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
154 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
155
156 r100_gpu_wait_for_vsync2(rdev);
157 WREG32(RADEON_CRTC2_GEN_CNTL,
158 (crtc2_gen_cntl &
159 ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) |
160 RADEON_CRTC2_DISP_REQ_EN_B);
161 }
162
163 udelay(500);
164}
165
166void r100_mc_setup(struct radeon_device *rdev)
167{
168 uint32_t tmp;
169 int r;
170
171 r = r100_debugfs_mc_info_init(rdev);
172 if (r) {
173 DRM_ERROR("Failed to register debugfs file for R100 MC !\n");
174 }
175 /* Write VRAM size in case we are limiting it */
176 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
177 tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
178 tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
179 tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
180 WREG32(RADEON_MC_FB_LOCATION, tmp);
181
182 /* Enable bus mastering */
183 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
184 WREG32(RADEON_BUS_CNTL, tmp);
185
186 if (rdev->flags & RADEON_IS_AGP) {
187 tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
188 tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16);
189 tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16);
190 WREG32(RADEON_MC_AGP_LOCATION, tmp);
191 WREG32(RADEON_AGP_BASE, rdev->mc.agp_base);
192 } else {
193 WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF);
194 WREG32(RADEON_AGP_BASE, 0);
195 }
196
197 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
198 tmp |= (7 << 28);
199 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
200 (void)RREG32(RADEON_HOST_PATH_CNTL);
201 WREG32(RADEON_HOST_PATH_CNTL, tmp);
202 (void)RREG32(RADEON_HOST_PATH_CNTL);
203}
204
205int r100_mc_init(struct radeon_device *rdev)
206{
207 int r;
208
209 if (r100_debugfs_rbbm_init(rdev)) {
210 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
211 }
212
213 r100_gpu_init(rdev);
214 /* Disable gart which also disable out of gart access */
215 r100_pci_gart_disable(rdev);
216
217 /* Setup GPU memory space */
218 rdev->mc.vram_location = 0xFFFFFFFFUL;
219 rdev->mc.gtt_location = 0xFFFFFFFFUL;
220 if (rdev->flags & RADEON_IS_AGP) {
221 r = radeon_agp_init(rdev);
222 if (r) {
223 printk(KERN_WARNING "[drm] Disabling AGP\n");
224 rdev->flags &= ~RADEON_IS_AGP;
225 rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
226 } else {
227 rdev->mc.gtt_location = rdev->mc.agp_base;
228 }
229 }
230 r = radeon_mc_setup(rdev);
231 if (r) {
232 return r;
233 }
234
235 r100_mc_disable_clients(rdev);
236 if (r100_mc_wait_for_idle(rdev)) {
237 printk(KERN_WARNING "Failed to wait MC idle while "
238 "programming pipes. Bad things might happen.\n");
239 }
240
241 r100_mc_setup(rdev);
242 return 0;
243}
244
245void r100_mc_fini(struct radeon_device *rdev)
246{
247 r100_pci_gart_disable(rdev);
248 radeon_gart_table_ram_free(rdev);
249 radeon_gart_fini(rdev);
250}
251
252
253/*
254 * Fence emission
255 */
256void r100_fence_ring_emit(struct radeon_device *rdev,
257 struct radeon_fence *fence)
258{
259 /* Who ever call radeon_fence_emit should call ring_lock and ask
260 * for enough space (today caller are ib schedule and buffer move) */
261 /* Wait until IDLE & CLEAN */
262 radeon_ring_write(rdev, PACKET0(0x1720, 0));
263 radeon_ring_write(rdev, (1 << 16) | (1 << 17));
264 /* Emit fence sequence & fire IRQ */
265 radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
266 radeon_ring_write(rdev, fence->seq);
267 radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
268 radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
269}
270
271
272/*
273 * Writeback
274 */
275int r100_wb_init(struct radeon_device *rdev)
276{
277 int r;
278
279 if (rdev->wb.wb_obj == NULL) {
280 r = radeon_object_create(rdev, NULL, 4096,
281 true,
282 RADEON_GEM_DOMAIN_GTT,
283 false, &rdev->wb.wb_obj);
284 if (r) {
285 DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
286 return r;
287 }
288 r = radeon_object_pin(rdev->wb.wb_obj,
289 RADEON_GEM_DOMAIN_GTT,
290 &rdev->wb.gpu_addr);
291 if (r) {
292 DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
293 return r;
294 }
295 r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
296 if (r) {
297 DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
298 return r;
299 }
300 }
301 WREG32(0x774, rdev->wb.gpu_addr);
302 WREG32(0x70C, rdev->wb.gpu_addr + 1024);
303 WREG32(0x770, 0xff);
304 return 0;
305}
306
307void r100_wb_fini(struct radeon_device *rdev)
308{
309 if (rdev->wb.wb_obj) {
310 radeon_object_kunmap(rdev->wb.wb_obj);
311 radeon_object_unpin(rdev->wb.wb_obj);
312 radeon_object_unref(&rdev->wb.wb_obj);
313 rdev->wb.wb = NULL;
314 rdev->wb.wb_obj = NULL;
315 }
316}
317
318int r100_copy_blit(struct radeon_device *rdev,
319 uint64_t src_offset,
320 uint64_t dst_offset,
321 unsigned num_pages,
322 struct radeon_fence *fence)
323{
324 uint32_t cur_pages;
325 uint32_t stride_bytes = PAGE_SIZE;
326 uint32_t pitch;
327 uint32_t stride_pixels;
328 unsigned ndw;
329 int num_loops;
330 int r = 0;
331
332 /* radeon limited to 16k stride */
333 stride_bytes &= 0x3fff;
334 /* radeon pitch is /64 */
335 pitch = stride_bytes / 64;
336 stride_pixels = stride_bytes / 4;
337 num_loops = DIV_ROUND_UP(num_pages, 8191);
338
339 /* Ask for enough room for blit + flush + fence */
340 ndw = 64 + (10 * num_loops);
341 r = radeon_ring_lock(rdev, ndw);
342 if (r) {
343 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
344 return -EINVAL;
345 }
346 while (num_pages > 0) {
347 cur_pages = num_pages;
348 if (cur_pages > 8191) {
349 cur_pages = 8191;
350 }
351 num_pages -= cur_pages;
352
353 /* pages are in Y direction - height
354 page width in X direction - width */
355 radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
356 radeon_ring_write(rdev,
357 RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
358 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
359 RADEON_GMC_SRC_CLIPPING |
360 RADEON_GMC_DST_CLIPPING |
361 RADEON_GMC_BRUSH_NONE |
362 (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
363 RADEON_GMC_SRC_DATATYPE_COLOR |
364 RADEON_ROP3_S |
365 RADEON_DP_SRC_SOURCE_MEMORY |
366 RADEON_GMC_CLR_CMP_CNTL_DIS |
367 RADEON_GMC_WR_MSK_DIS);
368 radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
369 radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
370 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
371 radeon_ring_write(rdev, 0);
372 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
373 radeon_ring_write(rdev, num_pages);
374 radeon_ring_write(rdev, num_pages);
375 radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
376 }
377 radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
378 radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
379 radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
380 radeon_ring_write(rdev,
381 RADEON_WAIT_2D_IDLECLEAN |
382 RADEON_WAIT_HOST_IDLECLEAN |
383 RADEON_WAIT_DMA_GUI_IDLE);
384 if (fence) {
385 r = radeon_fence_emit(rdev, fence);
386 }
387 radeon_ring_unlock_commit(rdev);
388 return r;
389}
390
391
392/*
393 * CP
394 */
395void r100_ring_start(struct radeon_device *rdev)
396{
397 int r;
398
399 r = radeon_ring_lock(rdev, 2);
400 if (r) {
401 return;
402 }
403 radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
404 radeon_ring_write(rdev,
405 RADEON_ISYNC_ANY2D_IDLE3D |
406 RADEON_ISYNC_ANY3D_IDLE2D |
407 RADEON_ISYNC_WAIT_IDLEGUI |
408 RADEON_ISYNC_CPSCRATCH_IDLEGUI);
409 radeon_ring_unlock_commit(rdev);
410}
411
412static void r100_cp_load_microcode(struct radeon_device *rdev)
413{
414 int i;
415
416 if (r100_gui_wait_for_idle(rdev)) {
417 printk(KERN_WARNING "Failed to wait GUI idle while "
418 "programming pipes. Bad things might happen.\n");
419 }
420
421 WREG32(RADEON_CP_ME_RAM_ADDR, 0);
422 if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
423 (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
424 (rdev->family == CHIP_RS200)) {
425 DRM_INFO("Loading R100 Microcode\n");
426 for (i = 0; i < 256; i++) {
427 WREG32(RADEON_CP_ME_RAM_DATAH, R100_cp_microcode[i][1]);
428 WREG32(RADEON_CP_ME_RAM_DATAL, R100_cp_microcode[i][0]);
429 }
430 } else if ((rdev->family == CHIP_R200) ||
431 (rdev->family == CHIP_RV250) ||
432 (rdev->family == CHIP_RV280) ||
433 (rdev->family == CHIP_RS300)) {
434 DRM_INFO("Loading R200 Microcode\n");
435 for (i = 0; i < 256; i++) {
436 WREG32(RADEON_CP_ME_RAM_DATAH, R200_cp_microcode[i][1]);
437 WREG32(RADEON_CP_ME_RAM_DATAL, R200_cp_microcode[i][0]);
438 }
439 } else if ((rdev->family == CHIP_R300) ||
440 (rdev->family == CHIP_R350) ||
441 (rdev->family == CHIP_RV350) ||
442 (rdev->family == CHIP_RV380) ||
443 (rdev->family == CHIP_RS400) ||
444 (rdev->family == CHIP_RS480)) {
445 DRM_INFO("Loading R300 Microcode\n");
446 for (i = 0; i < 256; i++) {
447 WREG32(RADEON_CP_ME_RAM_DATAH, R300_cp_microcode[i][1]);
448 WREG32(RADEON_CP_ME_RAM_DATAL, R300_cp_microcode[i][0]);
449 }
450 } else if ((rdev->family == CHIP_R420) ||
451 (rdev->family == CHIP_R423) ||
452 (rdev->family == CHIP_RV410)) {
453 DRM_INFO("Loading R400 Microcode\n");
454 for (i = 0; i < 256; i++) {
455 WREG32(RADEON_CP_ME_RAM_DATAH, R420_cp_microcode[i][1]);
456 WREG32(RADEON_CP_ME_RAM_DATAL, R420_cp_microcode[i][0]);
457 }
458 } else if ((rdev->family == CHIP_RS690) ||
459 (rdev->family == CHIP_RS740)) {
460 DRM_INFO("Loading RS690/RS740 Microcode\n");
461 for (i = 0; i < 256; i++) {
462 WREG32(RADEON_CP_ME_RAM_DATAH, RS690_cp_microcode[i][1]);
463 WREG32(RADEON_CP_ME_RAM_DATAL, RS690_cp_microcode[i][0]);
464 }
465 } else if (rdev->family == CHIP_RS600) {
466 DRM_INFO("Loading RS600 Microcode\n");
467 for (i = 0; i < 256; i++) {
468 WREG32(RADEON_CP_ME_RAM_DATAH, RS600_cp_microcode[i][1]);
469 WREG32(RADEON_CP_ME_RAM_DATAL, RS600_cp_microcode[i][0]);
470 }
471 } else if ((rdev->family == CHIP_RV515) ||
472 (rdev->family == CHIP_R520) ||
473 (rdev->family == CHIP_RV530) ||
474 (rdev->family == CHIP_R580) ||
475 (rdev->family == CHIP_RV560) ||
476 (rdev->family == CHIP_RV570)) {
477 DRM_INFO("Loading R500 Microcode\n");
478 for (i = 0; i < 256; i++) {
479 WREG32(RADEON_CP_ME_RAM_DATAH, R520_cp_microcode[i][1]);
480 WREG32(RADEON_CP_ME_RAM_DATAL, R520_cp_microcode[i][0]);
481 }
482 }
483}
484
485int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
486{
487 unsigned rb_bufsz;
488 unsigned rb_blksz;
489 unsigned max_fetch;
490 unsigned pre_write_timer;
491 unsigned pre_write_limit;
492 unsigned indirect2_start;
493 unsigned indirect1_start;
494 uint32_t tmp;
495 int r;
496
497 if (r100_debugfs_cp_init(rdev)) {
498 DRM_ERROR("Failed to register debugfs file for CP !\n");
499 }
500 /* Reset CP */
501 tmp = RREG32(RADEON_CP_CSQ_STAT);
502 if ((tmp & (1 << 31))) {
503 DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp);
504 WREG32(RADEON_CP_CSQ_MODE, 0);
505 WREG32(RADEON_CP_CSQ_CNTL, 0);
506 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
507 tmp = RREG32(RADEON_RBBM_SOFT_RESET);
508 mdelay(2);
509 WREG32(RADEON_RBBM_SOFT_RESET, 0);
510 tmp = RREG32(RADEON_RBBM_SOFT_RESET);
511 mdelay(2);
512 tmp = RREG32(RADEON_CP_CSQ_STAT);
513 if ((tmp & (1 << 31))) {
514 DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp);
515 }
516 } else {
517 DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
518 }
519 /* Align ring size */
520 rb_bufsz = drm_order(ring_size / 8);
521 ring_size = (1 << (rb_bufsz + 1)) * 4;
522 r100_cp_load_microcode(rdev);
523 r = radeon_ring_init(rdev, ring_size);
524 if (r) {
525 return r;
526 }
527 /* Each time the cp read 1024 bytes (16 dword/quadword) update
528 * the rptr copy in system ram */
529 rb_blksz = 9;
530 /* cp will read 128bytes at a time (4 dwords) */
531 max_fetch = 1;
532 rdev->cp.align_mask = 16 - 1;
533 /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
534 pre_write_timer = 64;
535 /* Force CP_RB_WPTR write if written more than one time before the
536 * delay expire
537 */
538 pre_write_limit = 0;
539 /* Setup the cp cache like this (cache size is 96 dwords) :
540 * RING 0 to 15
541 * INDIRECT1 16 to 79
542 * INDIRECT2 80 to 95
543 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
544 * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
545 * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
546 * Idea being that most of the gpu cmd will be through indirect1 buffer
547 * so it gets the bigger cache.
548 */
549 indirect2_start = 80;
550 indirect1_start = 16;
551 /* cp setup */
552 WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
553 WREG32(RADEON_CP_RB_CNTL,
554 REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
555 REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
556 REG_SET(RADEON_MAX_FETCH, max_fetch) |
557 RADEON_RB_NO_UPDATE);
558 /* Set ring address */
559 DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
560 WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
561 /* Force read & write ptr to 0 */
562 tmp = RREG32(RADEON_CP_RB_CNTL);
563 WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
564 WREG32(RADEON_CP_RB_RPTR_WR, 0);
565 WREG32(RADEON_CP_RB_WPTR, 0);
566 WREG32(RADEON_CP_RB_CNTL, tmp);
567 udelay(10);
568 rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
569 rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
570 /* Set cp mode to bus mastering & enable cp*/
571 WREG32(RADEON_CP_CSQ_MODE,
572 REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
573 REG_SET(RADEON_INDIRECT1_START, indirect1_start));
574 WREG32(0x718, 0);
575 WREG32(0x744, 0x00004D4D);
576 WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
577 radeon_ring_start(rdev);
578 r = radeon_ring_test(rdev);
579 if (r) {
580 DRM_ERROR("radeon: cp isn't working (%d).\n", r);
581 return r;
582 }
583 rdev->cp.ready = true;
584 return 0;
585}
586
587void r100_cp_fini(struct radeon_device *rdev)
588{
589 /* Disable ring */
590 rdev->cp.ready = false;
591 WREG32(RADEON_CP_CSQ_CNTL, 0);
592 radeon_ring_fini(rdev);
593 DRM_INFO("radeon: cp finalized\n");
594}
595
596void r100_cp_disable(struct radeon_device *rdev)
597{
598 /* Disable ring */
599 rdev->cp.ready = false;
600 WREG32(RADEON_CP_CSQ_MODE, 0);
601 WREG32(RADEON_CP_CSQ_CNTL, 0);
602 if (r100_gui_wait_for_idle(rdev)) {
603 printk(KERN_WARNING "Failed to wait GUI idle while "
604 "programming pipes. Bad things might happen.\n");
605 }
606}
607
608int r100_cp_reset(struct radeon_device *rdev)
609{
610 uint32_t tmp;
611 bool reinit_cp;
612 int i;
613
614 reinit_cp = rdev->cp.ready;
615 rdev->cp.ready = false;
616 WREG32(RADEON_CP_CSQ_MODE, 0);
617 WREG32(RADEON_CP_CSQ_CNTL, 0);
618 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
619 (void)RREG32(RADEON_RBBM_SOFT_RESET);
620 udelay(200);
621 WREG32(RADEON_RBBM_SOFT_RESET, 0);
622 /* Wait to prevent race in RBBM_STATUS */
623 mdelay(1);
624 for (i = 0; i < rdev->usec_timeout; i++) {
625 tmp = RREG32(RADEON_RBBM_STATUS);
626 if (!(tmp & (1 << 16))) {
627 DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n",
628 tmp);
629 if (reinit_cp) {
630 return r100_cp_init(rdev, rdev->cp.ring_size);
631 }
632 return 0;
633 }
634 DRM_UDELAY(1);
635 }
636 tmp = RREG32(RADEON_RBBM_STATUS);
637 DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp);
638 return -1;
639}
640
641
642/*
643 * CS functions
644 */
645int r100_cs_parse_packet0(struct radeon_cs_parser *p,
646 struct radeon_cs_packet *pkt,
647 unsigned *auth, unsigned n,
648 radeon_packet0_check_t check)
649{
650 unsigned reg;
651 unsigned i, j, m;
652 unsigned idx;
653 int r;
654
655 idx = pkt->idx + 1;
656 reg = pkt->reg;
657 if (pkt->one_reg_wr) {
658 if ((reg >> 7) > n) {
659 return -EINVAL;
660 }
661 } else {
662 if (((reg + (pkt->count << 2)) >> 7) > n) {
663 return -EINVAL;
664 }
665 }
666 for (i = 0; i <= pkt->count; i++, idx++) {
667 j = (reg >> 7);
668 m = 1 << ((reg >> 2) & 31);
669 if (auth[j] & m) {
670 r = check(p, pkt, idx, reg);
671 if (r) {
672 return r;
673 }
674 }
675 if (pkt->one_reg_wr) {
676 if (!(auth[j] & m)) {
677 break;
678 }
679 } else {
680 reg += 4;
681 }
682 }
683 return 0;
684}
685
686int r100_cs_parse_packet3(struct radeon_cs_parser *p,
687 struct radeon_cs_packet *pkt,
688 unsigned *auth, unsigned n,
689 radeon_packet3_check_t check)
690{
691 unsigned i, m;
692
693 if ((pkt->opcode >> 5) > n) {
694 return -EINVAL;
695 }
696 i = pkt->opcode >> 5;
697 m = 1 << (pkt->opcode & 31);
698 if (auth[i] & m) {
699 return check(p, pkt);
700 }
701 return 0;
702}
703
704void r100_cs_dump_packet(struct radeon_cs_parser *p,
705 struct radeon_cs_packet *pkt)
706{
707 struct radeon_cs_chunk *ib_chunk;
708 volatile uint32_t *ib;
709 unsigned i;
710 unsigned idx;
711
712 ib = p->ib->ptr;
713 ib_chunk = &p->chunks[p->chunk_ib_idx];
714 idx = pkt->idx;
715 for (i = 0; i <= (pkt->count + 1); i++, idx++) {
716 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
717 }
718}
719
720/**
721 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
722 * @parser: parser structure holding parsing context.
723 * @pkt: where to store packet informations
724 *
725 * Assume that chunk_ib_index is properly set. Will return -EINVAL
726 * if packet is bigger than remaining ib size. or if packets is unknown.
727 **/
728int r100_cs_packet_parse(struct radeon_cs_parser *p,
729 struct radeon_cs_packet *pkt,
730 unsigned idx)
731{
732 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
733 uint32_t header = ib_chunk->kdata[idx];
734
735 if (idx >= ib_chunk->length_dw) {
736 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
737 idx, ib_chunk->length_dw);
738 return -EINVAL;
739 }
740 pkt->idx = idx;
741 pkt->type = CP_PACKET_GET_TYPE(header);
742 pkt->count = CP_PACKET_GET_COUNT(header);
743 switch (pkt->type) {
744 case PACKET_TYPE0:
745 pkt->reg = CP_PACKET0_GET_REG(header);
746 pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
747 break;
748 case PACKET_TYPE3:
749 pkt->opcode = CP_PACKET3_GET_OPCODE(header);
750 break;
751 case PACKET_TYPE2:
752 pkt->count = -1;
753 break;
754 default:
755 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
756 return -EINVAL;
757 }
758 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
759 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
760 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
761 return -EINVAL;
762 }
763 return 0;
764}
765
766/**
767 * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
768 * @parser: parser structure holding parsing context.
769 * @data: pointer to relocation data
770 * @offset_start: starting offset
771 * @offset_mask: offset mask (to align start offset on)
772 * @reloc: reloc informations
773 *
774 * Check next packet is relocation packet3, do bo validation and compute
775 * GPU offset using the provided start.
776 **/
777int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
778 struct radeon_cs_reloc **cs_reloc)
779{
780 struct radeon_cs_chunk *ib_chunk;
781 struct radeon_cs_chunk *relocs_chunk;
782 struct radeon_cs_packet p3reloc;
783 unsigned idx;
784 int r;
785
786 if (p->chunk_relocs_idx == -1) {
787 DRM_ERROR("No relocation chunk !\n");
788 return -EINVAL;
789 }
790 *cs_reloc = NULL;
791 ib_chunk = &p->chunks[p->chunk_ib_idx];
792 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
793 r = r100_cs_packet_parse(p, &p3reloc, p->idx);
794 if (r) {
795 return r;
796 }
797 p->idx += p3reloc.count + 2;
798 if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
799 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
800 p3reloc.idx);
801 r100_cs_dump_packet(p, &p3reloc);
802 return -EINVAL;
803 }
804 idx = ib_chunk->kdata[p3reloc.idx + 1];
805 if (idx >= relocs_chunk->length_dw) {
806 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
807 idx, relocs_chunk->length_dw);
808 r100_cs_dump_packet(p, &p3reloc);
809 return -EINVAL;
810 }
811 /* FIXME: we assume reloc size is 4 dwords */
812 *cs_reloc = p->relocs_ptr[(idx / 4)];
813 return 0;
814}
815
816static int r100_packet0_check(struct radeon_cs_parser *p,
817 struct radeon_cs_packet *pkt)
818{
819 struct radeon_cs_chunk *ib_chunk;
820 struct radeon_cs_reloc *reloc;
821 volatile uint32_t *ib;
822 uint32_t tmp;
823 unsigned reg;
824 unsigned i;
825 unsigned idx;
826 bool onereg;
827 int r;
828
829 ib = p->ib->ptr;
830 ib_chunk = &p->chunks[p->chunk_ib_idx];
831 idx = pkt->idx + 1;
832 reg = pkt->reg;
833 onereg = false;
834 if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) {
835 onereg = true;
836 }
837 for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
838 switch (reg) {
839 /* FIXME: only allow PACKET3 blit? easier to check for out of
840 * range access */
841 case RADEON_DST_PITCH_OFFSET:
842 case RADEON_SRC_PITCH_OFFSET:
843 r = r100_cs_packet_next_reloc(p, &reloc);
844 if (r) {
845 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
846 idx, reg);
847 r100_cs_dump_packet(p, pkt);
848 return r;
849 }
850 tmp = ib_chunk->kdata[idx] & 0x003fffff;
851 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
852 ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
853 break;
854 case RADEON_RB3D_DEPTHOFFSET:
855 case RADEON_RB3D_COLOROFFSET:
856 case R300_RB3D_COLOROFFSET0:
857 case R300_ZB_DEPTHOFFSET:
858 case R200_PP_TXOFFSET_0:
859 case R200_PP_TXOFFSET_1:
860 case R200_PP_TXOFFSET_2:
861 case R200_PP_TXOFFSET_3:
862 case R200_PP_TXOFFSET_4:
863 case R200_PP_TXOFFSET_5:
864 case RADEON_PP_TXOFFSET_0:
865 case RADEON_PP_TXOFFSET_1:
866 case RADEON_PP_TXOFFSET_2:
867 case R300_TX_OFFSET_0:
868 case R300_TX_OFFSET_0+4:
869 case R300_TX_OFFSET_0+8:
870 case R300_TX_OFFSET_0+12:
871 case R300_TX_OFFSET_0+16:
872 case R300_TX_OFFSET_0+20:
873 case R300_TX_OFFSET_0+24:
874 case R300_TX_OFFSET_0+28:
875 case R300_TX_OFFSET_0+32:
876 case R300_TX_OFFSET_0+36:
877 case R300_TX_OFFSET_0+40:
878 case R300_TX_OFFSET_0+44:
879 case R300_TX_OFFSET_0+48:
880 case R300_TX_OFFSET_0+52:
881 case R300_TX_OFFSET_0+56:
882 case R300_TX_OFFSET_0+60:
883 r = r100_cs_packet_next_reloc(p, &reloc);
884 if (r) {
885 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
886 idx, reg);
887 r100_cs_dump_packet(p, pkt);
888 return r;
889 }
890 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
891 break;
892 default:
893 /* FIXME: we don't want to allow anyothers packet */
894 break;
895 }
896 if (onereg) {
897 /* FIXME: forbid onereg write to register on relocate */
898 break;
899 }
900 }
901 return 0;
902}
903
904static int r100_packet3_check(struct radeon_cs_parser *p,
905 struct radeon_cs_packet *pkt)
906{
907 struct radeon_cs_chunk *ib_chunk;
908 struct radeon_cs_reloc *reloc;
909 unsigned idx;
910 unsigned i, c;
911 volatile uint32_t *ib;
912 int r;
913
914 ib = p->ib->ptr;
915 ib_chunk = &p->chunks[p->chunk_ib_idx];
916 idx = pkt->idx + 1;
917 switch (pkt->opcode) {
918 case PACKET3_3D_LOAD_VBPNTR:
919 c = ib_chunk->kdata[idx++];
920 for (i = 0; i < (c - 1); i += 2, idx += 3) {
921 r = r100_cs_packet_next_reloc(p, &reloc);
922 if (r) {
923 DRM_ERROR("No reloc for packet3 %d\n",
924 pkt->opcode);
925 r100_cs_dump_packet(p, pkt);
926 return r;
927 }
928 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
929 r = r100_cs_packet_next_reloc(p, &reloc);
930 if (r) {
931 DRM_ERROR("No reloc for packet3 %d\n",
932 pkt->opcode);
933 r100_cs_dump_packet(p, pkt);
934 return r;
935 }
936 ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
937 }
938 if (c & 1) {
939 r = r100_cs_packet_next_reloc(p, &reloc);
940 if (r) {
941 DRM_ERROR("No reloc for packet3 %d\n",
942 pkt->opcode);
943 r100_cs_dump_packet(p, pkt);
944 return r;
945 }
946 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
947 }
948 break;
949 case PACKET3_INDX_BUFFER:
950 r = r100_cs_packet_next_reloc(p, &reloc);
951 if (r) {
952 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
953 r100_cs_dump_packet(p, pkt);
954 return r;
955 }
956 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
957 break;
958 case 0x23:
959 /* FIXME: cleanup */
960 /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
961 r = r100_cs_packet_next_reloc(p, &reloc);
962 if (r) {
963 DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
964 r100_cs_dump_packet(p, pkt);
965 return r;
966 }
967 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
968 break;
969 case PACKET3_3D_DRAW_IMMD:
970 /* triggers drawing using in-packet vertex data */
971 case PACKET3_3D_DRAW_IMMD_2:
972 /* triggers drawing using in-packet vertex data */
973 case PACKET3_3D_DRAW_VBUF_2:
974 /* triggers drawing of vertex buffers setup elsewhere */
975 case PACKET3_3D_DRAW_INDX_2:
976 /* triggers drawing using indices to vertex buffer */
977 case PACKET3_3D_DRAW_VBUF:
978 /* triggers drawing of vertex buffers setup elsewhere */
979 case PACKET3_3D_DRAW_INDX:
980 /* triggers drawing using indices to vertex buffer */
981 case PACKET3_NOP:
982 break;
983 default:
984 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
985 return -EINVAL;
986 }
987 return 0;
988}
989
990int r100_cs_parse(struct radeon_cs_parser *p)
991{
992 struct radeon_cs_packet pkt;
993 int r;
994
995 do {
996 r = r100_cs_packet_parse(p, &pkt, p->idx);
997 if (r) {
998 return r;
999 }
1000 p->idx += pkt.count + 2;
1001 switch (pkt.type) {
1002 case PACKET_TYPE0:
1003 r = r100_packet0_check(p, &pkt);
1004 break;
1005 case PACKET_TYPE2:
1006 break;
1007 case PACKET_TYPE3:
1008 r = r100_packet3_check(p, &pkt);
1009 break;
1010 default:
1011 DRM_ERROR("Unknown packet type %d !\n",
1012 pkt.type);
1013 return -EINVAL;
1014 }
1015 if (r) {
1016 return r;
1017 }
1018 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1019 return 0;
1020}
1021
1022
1023/*
1024 * Global GPU functions
1025 */
1026void r100_errata(struct radeon_device *rdev)
1027{
1028 rdev->pll_errata = 0;
1029
1030 if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
1031 rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
1032 }
1033
1034 if (rdev->family == CHIP_RV100 ||
1035 rdev->family == CHIP_RS100 ||
1036 rdev->family == CHIP_RS200) {
1037 rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
1038 }
1039}
1040
1041/* Wait for vertical sync on primary CRTC */
1042void r100_gpu_wait_for_vsync(struct radeon_device *rdev)
1043{
1044 uint32_t crtc_gen_cntl, tmp;
1045 int i;
1046
1047 crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
1048 if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) ||
1049 !(crtc_gen_cntl & RADEON_CRTC_EN)) {
1050 return;
1051 }
1052 /* Clear the CRTC_VBLANK_SAVE bit */
1053 WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR);
1054 for (i = 0; i < rdev->usec_timeout; i++) {
1055 tmp = RREG32(RADEON_CRTC_STATUS);
1056 if (tmp & RADEON_CRTC_VBLANK_SAVE) {
1057 return;
1058 }
1059 DRM_UDELAY(1);
1060 }
1061}
1062
1063/* Wait for vertical sync on secondary CRTC */
1064void r100_gpu_wait_for_vsync2(struct radeon_device *rdev)
1065{
1066 uint32_t crtc2_gen_cntl, tmp;
1067 int i;
1068
1069 crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
1070 if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) ||
1071 !(crtc2_gen_cntl & RADEON_CRTC2_EN))
1072 return;
1073
1074 /* Clear the CRTC_VBLANK_SAVE bit */
1075 WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR);
1076 for (i = 0; i < rdev->usec_timeout; i++) {
1077 tmp = RREG32(RADEON_CRTC2_STATUS);
1078 if (tmp & RADEON_CRTC2_VBLANK_SAVE) {
1079 return;
1080 }
1081 DRM_UDELAY(1);
1082 }
1083}
1084
1085int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
1086{
1087 unsigned i;
1088 uint32_t tmp;
1089
1090 for (i = 0; i < rdev->usec_timeout; i++) {
1091 tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
1092 if (tmp >= n) {
1093 return 0;
1094 }
1095 DRM_UDELAY(1);
1096 }
1097 return -1;
1098}
1099
1100int r100_gui_wait_for_idle(struct radeon_device *rdev)
1101{
1102 unsigned i;
1103 uint32_t tmp;
1104
1105 if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
1106 printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
1107 " Bad things might happen.\n");
1108 }
1109 for (i = 0; i < rdev->usec_timeout; i++) {
1110 tmp = RREG32(RADEON_RBBM_STATUS);
1111 if (!(tmp & (1 << 31))) {
1112 return 0;
1113 }
1114 DRM_UDELAY(1);
1115 }
1116 return -1;
1117}
1118
1119int r100_mc_wait_for_idle(struct radeon_device *rdev)
1120{
1121 unsigned i;
1122 uint32_t tmp;
1123
1124 for (i = 0; i < rdev->usec_timeout; i++) {
1125 /* read MC_STATUS */
1126 tmp = RREG32(0x0150);
1127 if (tmp & (1 << 2)) {
1128 return 0;
1129 }
1130 DRM_UDELAY(1);
1131 }
1132 return -1;
1133}
1134
1135void r100_gpu_init(struct radeon_device *rdev)
1136{
1137 /* TODO: anythings to do here ? pipes ? */
1138 r100_hdp_reset(rdev);
1139}
1140
1141void r100_hdp_reset(struct radeon_device *rdev)
1142{
1143 uint32_t tmp;
1144
1145 tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
1146 tmp |= (7 << 28);
1147 WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
1148 (void)RREG32(RADEON_HOST_PATH_CNTL);
1149 udelay(200);
1150 WREG32(RADEON_RBBM_SOFT_RESET, 0);
1151 WREG32(RADEON_HOST_PATH_CNTL, tmp);
1152 (void)RREG32(RADEON_HOST_PATH_CNTL);
1153}
1154
1155int r100_rb2d_reset(struct radeon_device *rdev)
1156{
1157 uint32_t tmp;
1158 int i;
1159
1160 WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2);
1161 (void)RREG32(RADEON_RBBM_SOFT_RESET);
1162 udelay(200);
1163 WREG32(RADEON_RBBM_SOFT_RESET, 0);
1164 /* Wait to prevent race in RBBM_STATUS */
1165 mdelay(1);
1166 for (i = 0; i < rdev->usec_timeout; i++) {
1167 tmp = RREG32(RADEON_RBBM_STATUS);
1168 if (!(tmp & (1 << 26))) {
1169 DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n",
1170 tmp);
1171 return 0;
1172 }
1173 DRM_UDELAY(1);
1174 }
1175 tmp = RREG32(RADEON_RBBM_STATUS);
1176 DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp);
1177 return -1;
1178}
1179
1180int r100_gpu_reset(struct radeon_device *rdev)
1181{
1182 uint32_t status;
1183
1184 /* reset order likely matter */
1185 status = RREG32(RADEON_RBBM_STATUS);
1186 /* reset HDP */
1187 r100_hdp_reset(rdev);
1188 /* reset rb2d */
1189 if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
1190 r100_rb2d_reset(rdev);
1191 }
1192 /* TODO: reset 3D engine */
1193 /* reset CP */
1194 status = RREG32(RADEON_RBBM_STATUS);
1195 if (status & (1 << 16)) {
1196 r100_cp_reset(rdev);
1197 }
1198 /* Check if GPU is idle */
1199 status = RREG32(RADEON_RBBM_STATUS);
1200 if (status & (1 << 31)) {
1201 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
1202 return -1;
1203 }
1204 DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
1205 return 0;
1206}
1207
1208
1209/*
1210 * VRAM info
1211 */
1212static void r100_vram_get_type(struct radeon_device *rdev)
1213{
1214 uint32_t tmp;
1215
1216 rdev->mc.vram_is_ddr = false;
1217 if (rdev->flags & RADEON_IS_IGP)
1218 rdev->mc.vram_is_ddr = true;
1219 else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
1220 rdev->mc.vram_is_ddr = true;
1221 if ((rdev->family == CHIP_RV100) ||
1222 (rdev->family == CHIP_RS100) ||
1223 (rdev->family == CHIP_RS200)) {
1224 tmp = RREG32(RADEON_MEM_CNTL);
1225 if (tmp & RV100_HALF_MODE) {
1226 rdev->mc.vram_width = 32;
1227 } else {
1228 rdev->mc.vram_width = 64;
1229 }
1230 if (rdev->flags & RADEON_SINGLE_CRTC) {
1231 rdev->mc.vram_width /= 4;
1232 rdev->mc.vram_is_ddr = true;
1233 }
1234 } else if (rdev->family <= CHIP_RV280) {
1235 tmp = RREG32(RADEON_MEM_CNTL);
1236 if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
1237 rdev->mc.vram_width = 128;
1238 } else {
1239 rdev->mc.vram_width = 64;
1240 }
1241 } else {
1242 /* newer IGPs */
1243 rdev->mc.vram_width = 128;
1244 }
1245}
1246
1247void r100_vram_info(struct radeon_device *rdev)
1248{
1249 r100_vram_get_type(rdev);
1250
1251 if (rdev->flags & RADEON_IS_IGP) {
1252 uint32_t tom;
1253 /* read NB_TOM to get the amount of ram stolen for the GPU */
1254 tom = RREG32(RADEON_NB_TOM);
1255 rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
1256 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
1257 } else {
1258 rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
1259 /* Some production boards of m6 will report 0
1260 * if it's 8 MB
1261 */
1262 if (rdev->mc.vram_size == 0) {
1263 rdev->mc.vram_size = 8192 * 1024;
1264 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
1265 }
1266 }
1267
1268 rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
1269 rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
1270 if (rdev->mc.aper_size > rdev->mc.vram_size) {
1271 /* Why does some hw doesn't have CONFIG_MEMSIZE properly
1272 * setup ? */
1273 rdev->mc.vram_size = rdev->mc.aper_size;
1274 WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
1275 }
1276}
1277
1278
1279/*
1280 * Indirect registers accessor
1281 */
1282void r100_pll_errata_after_index(struct radeon_device *rdev)
1283{
1284 if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) {
1285 return;
1286 }
1287 (void)RREG32(RADEON_CLOCK_CNTL_DATA);
1288 (void)RREG32(RADEON_CRTC_GEN_CNTL);
1289}
1290
1291static void r100_pll_errata_after_data(struct radeon_device *rdev)
1292{
1293 /* This workarounds is necessary on RV100, RS100 and RS200 chips
1294 * or the chip could hang on a subsequent access
1295 */
1296 if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
1297 udelay(5000);
1298 }
1299
1300 /* This function is required to workaround a hardware bug in some (all?)
1301 * revisions of the R300. This workaround should be called after every
1302 * CLOCK_CNTL_INDEX register access. If not, register reads afterward
1303 * may not be correct.
1304 */
1305 if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
1306 uint32_t save, tmp;
1307
1308 save = RREG32(RADEON_CLOCK_CNTL_INDEX);
1309 tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
1310 WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
1311 tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
1312 WREG32(RADEON_CLOCK_CNTL_INDEX, save);
1313 }
1314}
1315
1316uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
1317{
1318 uint32_t data;
1319
1320 WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
1321 r100_pll_errata_after_index(rdev);
1322 data = RREG32(RADEON_CLOCK_CNTL_DATA);
1323 r100_pll_errata_after_data(rdev);
1324 return data;
1325}
1326
1327void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
1328{
1329 WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
1330 r100_pll_errata_after_index(rdev);
1331 WREG32(RADEON_CLOCK_CNTL_DATA, v);
1332 r100_pll_errata_after_data(rdev);
1333}
1334
1335uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
1336{
1337 if (reg < 0x10000)
1338 return readl(((void __iomem *)rdev->rmmio) + reg);
1339 else {
1340 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
1341 return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
1342 }
1343}
1344
1345void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
1346{
1347 if (reg < 0x10000)
1348 writel(v, ((void __iomem *)rdev->rmmio) + reg);
1349 else {
1350 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
1351 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
1352 }
1353}
1354
1355/*
1356 * Debugfs info
1357 */
1358#if defined(CONFIG_DEBUG_FS)
1359static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
1360{
1361 struct drm_info_node *node = (struct drm_info_node *) m->private;
1362 struct drm_device *dev = node->minor->dev;
1363 struct radeon_device *rdev = dev->dev_private;
1364 uint32_t reg, value;
1365 unsigned i;
1366
1367 seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
1368 seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
1369 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1370 for (i = 0; i < 64; i++) {
1371 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
1372 reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
1373 WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
1374 value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
1375 seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
1376 }
1377 return 0;
1378}
1379
1380static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
1381{
1382 struct drm_info_node *node = (struct drm_info_node *) m->private;
1383 struct drm_device *dev = node->minor->dev;
1384 struct radeon_device *rdev = dev->dev_private;
1385 uint32_t rdp, wdp;
1386 unsigned count, i, j;
1387
1388 radeon_ring_free_size(rdev);
1389 rdp = RREG32(RADEON_CP_RB_RPTR);
1390 wdp = RREG32(RADEON_CP_RB_WPTR);
1391 count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
1392 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1393 seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
1394 seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
1395 seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
1396 seq_printf(m, "%u dwords in ring\n", count);
1397 for (j = 0; j <= count; j++) {
1398 i = (rdp + j) & rdev->cp.ptr_mask;
1399 seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
1400 }
1401 return 0;
1402}
1403
1404
1405static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
1406{
1407 struct drm_info_node *node = (struct drm_info_node *) m->private;
1408 struct drm_device *dev = node->minor->dev;
1409 struct radeon_device *rdev = dev->dev_private;
1410 uint32_t csq_stat, csq2_stat, tmp;
1411 unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
1412 unsigned i;
1413
1414 seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1415 seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
1416 csq_stat = RREG32(RADEON_CP_CSQ_STAT);
1417 csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
1418 r_rptr = (csq_stat >> 0) & 0x3ff;
1419 r_wptr = (csq_stat >> 10) & 0x3ff;
1420 ib1_rptr = (csq_stat >> 20) & 0x3ff;
1421 ib1_wptr = (csq2_stat >> 0) & 0x3ff;
1422 ib2_rptr = (csq2_stat >> 10) & 0x3ff;
1423 ib2_wptr = (csq2_stat >> 20) & 0x3ff;
1424 seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
1425 seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
1426 seq_printf(m, "Ring rptr %u\n", r_rptr);
1427 seq_printf(m, "Ring wptr %u\n", r_wptr);
1428 seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
1429 seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
1430 seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
1431 seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
1432 /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
1433 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
1434 seq_printf(m, "Ring fifo:\n");
1435 for (i = 0; i < 256; i++) {
1436 WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1437 tmp = RREG32(RADEON_CP_CSQ_DATA);
1438 seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
1439 }
1440 seq_printf(m, "Indirect1 fifo:\n");
1441 for (i = 256; i <= 512; i++) {
1442 WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1443 tmp = RREG32(RADEON_CP_CSQ_DATA);
1444 seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
1445 }
1446 seq_printf(m, "Indirect2 fifo:\n");
1447 for (i = 640; i < ib1_wptr; i++) {
1448 WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1449 tmp = RREG32(RADEON_CP_CSQ_DATA);
1450 seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
1451 }
1452 return 0;
1453}
1454
1455static int r100_debugfs_mc_info(struct seq_file *m, void *data)
1456{
1457 struct drm_info_node *node = (struct drm_info_node *) m->private;
1458 struct drm_device *dev = node->minor->dev;
1459 struct radeon_device *rdev = dev->dev_private;
1460 uint32_t tmp;
1461
1462 tmp = RREG32(RADEON_CONFIG_MEMSIZE);
1463 seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
1464 tmp = RREG32(RADEON_MC_FB_LOCATION);
1465 seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
1466 tmp = RREG32(RADEON_BUS_CNTL);
1467 seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
1468 tmp = RREG32(RADEON_MC_AGP_LOCATION);
1469 seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
1470 tmp = RREG32(RADEON_AGP_BASE);
1471 seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
1472 tmp = RREG32(RADEON_HOST_PATH_CNTL);
1473 seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
1474 tmp = RREG32(0x01D0);
1475 seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
1476 tmp = RREG32(RADEON_AIC_LO_ADDR);
1477 seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
1478 tmp = RREG32(RADEON_AIC_HI_ADDR);
1479 seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
1480 tmp = RREG32(0x01E4);
1481 seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
1482 return 0;
1483}
1484
1485static struct drm_info_list r100_debugfs_rbbm_list[] = {
1486 {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
1487};
1488
1489static struct drm_info_list r100_debugfs_cp_list[] = {
1490 {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
1491 {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
1492};
1493
1494static struct drm_info_list r100_debugfs_mc_info_list[] = {
1495 {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
1496};
1497#endif
1498
1499int r100_debugfs_rbbm_init(struct radeon_device *rdev)
1500{
1501#if defined(CONFIG_DEBUG_FS)
1502 return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
1503#else
1504 return 0;
1505#endif
1506}
1507
1508int r100_debugfs_cp_init(struct radeon_device *rdev)
1509{
1510#if defined(CONFIG_DEBUG_FS)
1511 return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
1512#else
1513 return 0;
1514#endif
1515}
1516
1517int r100_debugfs_mc_info_init(struct radeon_device *rdev)
1518{
1519#if defined(CONFIG_DEBUG_FS)
1520 return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
1521#else
1522 return 0;
1523#endif
1524}