diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2010-11-12 08:46:18 -0500 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2010-11-25 10:01:39 -0500 |
commit | c6642782b988e907bb50767eab50042f4947e163 (patch) | |
tree | 3ffc5866272920701c9fb1d1410ee79be4873cb1 | |
parent | caea7476d48e5f401f2d18b1738827748fb56c12 (diff) |
drm/i915: Add a mechanism for pipelining fence register updates
Not employed just yet...
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 133 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_reg.h | 8 |
2 files changed, 98 insertions, 43 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2cfdee8811c4..1e9cf2bf9ba4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -2322,7 +2322,8 @@ i915_gpu_idle(struct drm_device *dev) | |||
2322 | return 0; | 2322 | return 0; |
2323 | } | 2323 | } |
2324 | 2324 | ||
2325 | static void sandybridge_write_fence_reg(struct drm_i915_gem_object *obj) | 2325 | static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj, |
2326 | struct intel_ring_buffer *pipelined) | ||
2326 | { | 2327 | { |
2327 | struct drm_device *dev = obj->base.dev; | 2328 | struct drm_device *dev = obj->base.dev; |
2328 | drm_i915_private_t *dev_priv = dev->dev_private; | 2329 | drm_i915_private_t *dev_priv = dev->dev_private; |
@@ -2331,7 +2332,7 @@ static void sandybridge_write_fence_reg(struct drm_i915_gem_object *obj) | |||
2331 | uint64_t val; | 2332 | uint64_t val; |
2332 | 2333 | ||
2333 | val = (uint64_t)((obj->gtt_offset + size - 4096) & | 2334 | val = (uint64_t)((obj->gtt_offset + size - 4096) & |
2334 | 0xfffff000) << 32; | 2335 | 0xfffff000) << 32; |
2335 | val |= obj->gtt_offset & 0xfffff000; | 2336 | val |= obj->gtt_offset & 0xfffff000; |
2336 | val |= (uint64_t)((obj->stride / 128) - 1) << | 2337 | val |= (uint64_t)((obj->stride / 128) - 1) << |
2337 | SANDYBRIDGE_FENCE_PITCH_SHIFT; | 2338 | SANDYBRIDGE_FENCE_PITCH_SHIFT; |
@@ -2340,10 +2341,26 @@ static void sandybridge_write_fence_reg(struct drm_i915_gem_object *obj) | |||
2340 | val |= 1 << I965_FENCE_TILING_Y_SHIFT; | 2341 | val |= 1 << I965_FENCE_TILING_Y_SHIFT; |
2341 | val |= I965_FENCE_REG_VALID; | 2342 | val |= I965_FENCE_REG_VALID; |
2342 | 2343 | ||
2343 | I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val); | 2344 | if (pipelined) { |
2345 | int ret = intel_ring_begin(pipelined, 6); | ||
2346 | if (ret) | ||
2347 | return ret; | ||
2348 | |||
2349 | intel_ring_emit(pipelined, MI_NOOP); | ||
2350 | intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); | ||
2351 | intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8); | ||
2352 | intel_ring_emit(pipelined, (u32)val); | ||
2353 | intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4); | ||
2354 | intel_ring_emit(pipelined, (u32)(val >> 32)); | ||
2355 | intel_ring_advance(pipelined); | ||
2356 | } else | ||
2357 | I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val); | ||
2358 | |||
2359 | return 0; | ||
2344 | } | 2360 | } |
2345 | 2361 | ||
2346 | static void i965_write_fence_reg(struct drm_i915_gem_object *obj) | 2362 | static int i965_write_fence_reg(struct drm_i915_gem_object *obj, |
2363 | struct intel_ring_buffer *pipelined) | ||
2347 | { | 2364 | { |
2348 | struct drm_device *dev = obj->base.dev; | 2365 | struct drm_device *dev = obj->base.dev; |
2349 | drm_i915_private_t *dev_priv = dev->dev_private; | 2366 | drm_i915_private_t *dev_priv = dev->dev_private; |
@@ -2359,27 +2376,41 @@ static void i965_write_fence_reg(struct drm_i915_gem_object *obj) | |||
2359 | val |= 1 << I965_FENCE_TILING_Y_SHIFT; | 2376 | val |= 1 << I965_FENCE_TILING_Y_SHIFT; |
2360 | val |= I965_FENCE_REG_VALID; | 2377 | val |= I965_FENCE_REG_VALID; |
2361 | 2378 | ||
2362 | I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); | 2379 | if (pipelined) { |
2380 | int ret = intel_ring_begin(pipelined, 6); | ||
2381 | if (ret) | ||
2382 | return ret; | ||
2383 | |||
2384 | intel_ring_emit(pipelined, MI_NOOP); | ||
2385 | intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); | ||
2386 | intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8); | ||
2387 | intel_ring_emit(pipelined, (u32)val); | ||
2388 | intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4); | ||
2389 | intel_ring_emit(pipelined, (u32)(val >> 32)); | ||
2390 | intel_ring_advance(pipelined); | ||
2391 | } else | ||
2392 | I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val); | ||
2393 | |||
2394 | return 0; | ||
2363 | } | 2395 | } |
2364 | 2396 | ||
2365 | static void i915_write_fence_reg(struct drm_i915_gem_object *obj) | 2397 | static int i915_write_fence_reg(struct drm_i915_gem_object *obj, |
2398 | struct intel_ring_buffer *pipelined) | ||
2366 | { | 2399 | { |
2367 | struct drm_device *dev = obj->base.dev; | 2400 | struct drm_device *dev = obj->base.dev; |
2368 | drm_i915_private_t *dev_priv = dev->dev_private; | 2401 | drm_i915_private_t *dev_priv = dev->dev_private; |
2369 | u32 size = obj->gtt_space->size; | 2402 | u32 size = obj->gtt_space->size; |
2370 | uint32_t fence_reg, val, pitch_val; | 2403 | u32 fence_reg, val, pitch_val; |
2371 | int tile_width; | 2404 | int tile_width; |
2372 | 2405 | ||
2373 | if ((obj->gtt_offset & ~I915_FENCE_START_MASK) || | 2406 | if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || |
2374 | (obj->gtt_offset & (size - 1))) { | 2407 | (size & -size) != size || |
2375 | WARN(1, "%s: object 0x%08x [fenceable? %d] not 1M or size (0x%08x) aligned [gtt_space offset=%lx, size=%lx]\n", | 2408 | (obj->gtt_offset & (size - 1)), |
2376 | __func__, obj->gtt_offset, obj->map_and_fenceable, size, | 2409 | "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", |
2377 | obj->gtt_space->start, obj->gtt_space->size); | 2410 | obj->gtt_offset, obj->map_and_fenceable, size)) |
2378 | return; | 2411 | return -EINVAL; |
2379 | } | ||
2380 | 2412 | ||
2381 | if (obj->tiling_mode == I915_TILING_Y && | 2413 | if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) |
2382 | HAS_128_BYTE_Y_TILING(dev)) | ||
2383 | tile_width = 128; | 2414 | tile_width = 128; |
2384 | else | 2415 | else |
2385 | tile_width = 512; | 2416 | tile_width = 512; |
@@ -2388,12 +2419,6 @@ static void i915_write_fence_reg(struct drm_i915_gem_object *obj) | |||
2388 | pitch_val = obj->stride / tile_width; | 2419 | pitch_val = obj->stride / tile_width; |
2389 | pitch_val = ffs(pitch_val) - 1; | 2420 | pitch_val = ffs(pitch_val) - 1; |
2390 | 2421 | ||
2391 | if (obj->tiling_mode == I915_TILING_Y && | ||
2392 | HAS_128_BYTE_Y_TILING(dev)) | ||
2393 | WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); | ||
2394 | else | ||
2395 | WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL); | ||
2396 | |||
2397 | val = obj->gtt_offset; | 2422 | val = obj->gtt_offset; |
2398 | if (obj->tiling_mode == I915_TILING_Y) | 2423 | if (obj->tiling_mode == I915_TILING_Y) |
2399 | val |= 1 << I830_FENCE_TILING_Y_SHIFT; | 2424 | val |= 1 << I830_FENCE_TILING_Y_SHIFT; |
@@ -2406,10 +2431,25 @@ static void i915_write_fence_reg(struct drm_i915_gem_object *obj) | |||
2406 | fence_reg = FENCE_REG_830_0 + fence_reg * 4; | 2431 | fence_reg = FENCE_REG_830_0 + fence_reg * 4; |
2407 | else | 2432 | else |
2408 | fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; | 2433 | fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; |
2409 | I915_WRITE(fence_reg, val); | 2434 | |
2435 | if (pipelined) { | ||
2436 | int ret = intel_ring_begin(pipelined, 4); | ||
2437 | if (ret) | ||
2438 | return ret; | ||
2439 | |||
2440 | intel_ring_emit(pipelined, MI_NOOP); | ||
2441 | intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); | ||
2442 | intel_ring_emit(pipelined, fence_reg); | ||
2443 | intel_ring_emit(pipelined, val); | ||
2444 | intel_ring_advance(pipelined); | ||
2445 | } else | ||
2446 | I915_WRITE(fence_reg, val); | ||
2447 | |||
2448 | return 0; | ||
2410 | } | 2449 | } |
2411 | 2450 | ||
2412 | static void i830_write_fence_reg(struct drm_i915_gem_object *obj) | 2451 | static int i830_write_fence_reg(struct drm_i915_gem_object *obj, |
2452 | struct intel_ring_buffer *pipelined) | ||
2413 | { | 2453 | { |
2414 | struct drm_device *dev = obj->base.dev; | 2454 | struct drm_device *dev = obj->base.dev; |
2415 | drm_i915_private_t *dev_priv = dev->dev_private; | 2455 | drm_i915_private_t *dev_priv = dev->dev_private; |
@@ -2417,29 +2457,38 @@ static void i830_write_fence_reg(struct drm_i915_gem_object *obj) | |||
2417 | int regnum = obj->fence_reg; | 2457 | int regnum = obj->fence_reg; |
2418 | uint32_t val; | 2458 | uint32_t val; |
2419 | uint32_t pitch_val; | 2459 | uint32_t pitch_val; |
2420 | uint32_t fence_size_bits; | ||
2421 | 2460 | ||
2422 | if ((obj->gtt_offset & ~I830_FENCE_START_MASK) || | 2461 | if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || |
2423 | (obj->gtt_offset & (obj->base.size - 1))) { | 2462 | (size & -size) != size || |
2424 | WARN(1, "%s: object 0x%08x not 512K or size aligned\n", | 2463 | (obj->gtt_offset & (size - 1)), |
2425 | __func__, obj->gtt_offset); | 2464 | "object 0x%08x not 512K or pot-size 0x%08x aligned\n", |
2426 | return; | 2465 | obj->gtt_offset, size)) |
2427 | } | 2466 | return -EINVAL; |
2428 | 2467 | ||
2429 | pitch_val = obj->stride / 128; | 2468 | pitch_val = obj->stride / 128; |
2430 | pitch_val = ffs(pitch_val) - 1; | 2469 | pitch_val = ffs(pitch_val) - 1; |
2431 | WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); | ||
2432 | 2470 | ||
2433 | val = obj->gtt_offset; | 2471 | val = obj->gtt_offset; |
2434 | if (obj->tiling_mode == I915_TILING_Y) | 2472 | if (obj->tiling_mode == I915_TILING_Y) |
2435 | val |= 1 << I830_FENCE_TILING_Y_SHIFT; | 2473 | val |= 1 << I830_FENCE_TILING_Y_SHIFT; |
2436 | fence_size_bits = I830_FENCE_SIZE_BITS(size); | 2474 | val |= I830_FENCE_SIZE_BITS(size); |
2437 | WARN_ON(fence_size_bits & ~0x00000f00); | ||
2438 | val |= fence_size_bits; | ||
2439 | val |= pitch_val << I830_FENCE_PITCH_SHIFT; | 2475 | val |= pitch_val << I830_FENCE_PITCH_SHIFT; |
2440 | val |= I830_FENCE_REG_VALID; | 2476 | val |= I830_FENCE_REG_VALID; |
2441 | 2477 | ||
2442 | I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); | 2478 | if (pipelined) { |
2479 | int ret = intel_ring_begin(pipelined, 4); | ||
2480 | if (ret) | ||
2481 | return ret; | ||
2482 | |||
2483 | intel_ring_emit(pipelined, MI_NOOP); | ||
2484 | intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); | ||
2485 | intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4); | ||
2486 | intel_ring_emit(pipelined, val); | ||
2487 | intel_ring_advance(pipelined); | ||
2488 | } else | ||
2489 | I915_WRITE(FENCE_REG_830_0 + regnum * 4, val); | ||
2490 | |||
2491 | return 0; | ||
2443 | } | 2492 | } |
2444 | 2493 | ||
2445 | static int i915_find_fence_reg(struct drm_device *dev, | 2494 | static int i915_find_fence_reg(struct drm_device *dev, |
@@ -2512,6 +2561,7 @@ i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj, | |||
2512 | struct drm_device *dev = obj->base.dev; | 2561 | struct drm_device *dev = obj->base.dev; |
2513 | struct drm_i915_private *dev_priv = dev->dev_private; | 2562 | struct drm_i915_private *dev_priv = dev->dev_private; |
2514 | struct drm_i915_fence_reg *reg = NULL; | 2563 | struct drm_i915_fence_reg *reg = NULL; |
2564 | struct intel_ring_buffer *pipelined = NULL; | ||
2515 | int ret; | 2565 | int ret; |
2516 | 2566 | ||
2517 | /* Just update our place in the LRU if our fence is getting used. */ | 2567 | /* Just update our place in the LRU if our fence is getting used. */ |
@@ -2553,25 +2603,24 @@ i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj, | |||
2553 | 2603 | ||
2554 | switch (INTEL_INFO(dev)->gen) { | 2604 | switch (INTEL_INFO(dev)->gen) { |
2555 | case 6: | 2605 | case 6: |
2556 | sandybridge_write_fence_reg(obj); | 2606 | ret = sandybridge_write_fence_reg(obj, pipelined); |
2557 | break; | 2607 | break; |
2558 | case 5: | 2608 | case 5: |
2559 | case 4: | 2609 | case 4: |
2560 | i965_write_fence_reg(obj); | 2610 | ret = i965_write_fence_reg(obj, pipelined); |
2561 | break; | 2611 | break; |
2562 | case 3: | 2612 | case 3: |
2563 | i915_write_fence_reg(obj); | 2613 | ret = i915_write_fence_reg(obj, pipelined); |
2564 | break; | 2614 | break; |
2565 | case 2: | 2615 | case 2: |
2566 | i830_write_fence_reg(obj); | 2616 | ret = i830_write_fence_reg(obj, pipelined); |
2567 | break; | 2617 | break; |
2568 | } | 2618 | } |
2569 | 2619 | ||
2570 | trace_i915_gem_object_get_fence(obj, | 2620 | trace_i915_gem_object_get_fence(obj, |
2571 | obj->fence_reg, | 2621 | obj->fence_reg, |
2572 | obj->tiling_mode); | 2622 | obj->tiling_mode); |
2573 | 2623 | return ret; | |
2574 | return 0; | ||
2575 | } | 2624 | } |
2576 | 2625 | ||
2577 | /** | 2626 | /** |
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c668b2fb7e3d..ce97471d9c40 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h | |||
@@ -164,7 +164,13 @@ | |||
164 | #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ | 164 | #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ |
165 | #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) | 165 | #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) |
166 | #define MI_STORE_DWORD_INDEX_SHIFT 2 | 166 | #define MI_STORE_DWORD_INDEX_SHIFT 2 |
167 | #define MI_LOAD_REGISTER_IMM MI_INSTR(0x22, 1) | 167 | /* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: |
168 | * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw | ||
169 | * simply ignores the register load under certain conditions. | ||
170 | * - One can actually load arbitrary many arbitrary registers: Simply issue x | ||
171 | * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! | ||
172 | */ | ||
173 | #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1) | ||
168 | #define MI_FLUSH_DW MI_INSTR(0x26, 2) /* for GEN6 */ | 174 | #define MI_FLUSH_DW MI_INSTR(0x26, 2) /* for GEN6 */ |
169 | #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) | 175 | #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) |
170 | #define MI_BATCH_NON_SECURE (1) | 176 | #define MI_BATCH_NON_SECURE (1) |