aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2010-11-12 08:46:18 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2010-11-25 10:01:39 -0500
commitc6642782b988e907bb50767eab50042f4947e163 (patch)
tree3ffc5866272920701c9fb1d1410ee79be4873cb1
parentcaea7476d48e5f401f2d18b1738827748fb56c12 (diff)
drm/i915: Add a mechanism for pipelining fence register updates
Not employed just yet... Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c133
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h8
2 files changed, 98 insertions, 43 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2cfdee8811c4..1e9cf2bf9ba4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2322,7 +2322,8 @@ i915_gpu_idle(struct drm_device *dev)
2322 return 0; 2322 return 0;
2323} 2323}
2324 2324
2325static void sandybridge_write_fence_reg(struct drm_i915_gem_object *obj) 2325static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
2326 struct intel_ring_buffer *pipelined)
2326{ 2327{
2327 struct drm_device *dev = obj->base.dev; 2328 struct drm_device *dev = obj->base.dev;
2328 drm_i915_private_t *dev_priv = dev->dev_private; 2329 drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2331,7 +2332,7 @@ static void sandybridge_write_fence_reg(struct drm_i915_gem_object *obj)
2331 uint64_t val; 2332 uint64_t val;
2332 2333
2333 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2334 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2334 0xfffff000) << 32; 2335 0xfffff000) << 32;
2335 val |= obj->gtt_offset & 0xfffff000; 2336 val |= obj->gtt_offset & 0xfffff000;
2336 val |= (uint64_t)((obj->stride / 128) - 1) << 2337 val |= (uint64_t)((obj->stride / 128) - 1) <<
2337 SANDYBRIDGE_FENCE_PITCH_SHIFT; 2338 SANDYBRIDGE_FENCE_PITCH_SHIFT;
@@ -2340,10 +2341,26 @@ static void sandybridge_write_fence_reg(struct drm_i915_gem_object *obj)
2340 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2341 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2341 val |= I965_FENCE_REG_VALID; 2342 val |= I965_FENCE_REG_VALID;
2342 2343
2343 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val); 2344 if (pipelined) {
2345 int ret = intel_ring_begin(pipelined, 6);
2346 if (ret)
2347 return ret;
2348
2349 intel_ring_emit(pipelined, MI_NOOP);
2350 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2351 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
2352 intel_ring_emit(pipelined, (u32)val);
2353 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
2354 intel_ring_emit(pipelined, (u32)(val >> 32));
2355 intel_ring_advance(pipelined);
2356 } else
2357 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
2358
2359 return 0;
2344} 2360}
2345 2361
2346static void i965_write_fence_reg(struct drm_i915_gem_object *obj) 2362static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
2363 struct intel_ring_buffer *pipelined)
2347{ 2364{
2348 struct drm_device *dev = obj->base.dev; 2365 struct drm_device *dev = obj->base.dev;
2349 drm_i915_private_t *dev_priv = dev->dev_private; 2366 drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2359,27 +2376,41 @@ static void i965_write_fence_reg(struct drm_i915_gem_object *obj)
2359 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2376 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2360 val |= I965_FENCE_REG_VALID; 2377 val |= I965_FENCE_REG_VALID;
2361 2378
2362 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); 2379 if (pipelined) {
2380 int ret = intel_ring_begin(pipelined, 6);
2381 if (ret)
2382 return ret;
2383
2384 intel_ring_emit(pipelined, MI_NOOP);
2385 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2386 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
2387 intel_ring_emit(pipelined, (u32)val);
2388 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
2389 intel_ring_emit(pipelined, (u32)(val >> 32));
2390 intel_ring_advance(pipelined);
2391 } else
2392 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
2393
2394 return 0;
2363} 2395}
2364 2396
2365static void i915_write_fence_reg(struct drm_i915_gem_object *obj) 2397static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
2398 struct intel_ring_buffer *pipelined)
2366{ 2399{
2367 struct drm_device *dev = obj->base.dev; 2400 struct drm_device *dev = obj->base.dev;
2368 drm_i915_private_t *dev_priv = dev->dev_private; 2401 drm_i915_private_t *dev_priv = dev->dev_private;
2369 u32 size = obj->gtt_space->size; 2402 u32 size = obj->gtt_space->size;
2370 uint32_t fence_reg, val, pitch_val; 2403 u32 fence_reg, val, pitch_val;
2371 int tile_width; 2404 int tile_width;
2372 2405
2373 if ((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2406 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2374 (obj->gtt_offset & (size - 1))) { 2407 (size & -size) != size ||
2375 WARN(1, "%s: object 0x%08x [fenceable? %d] not 1M or size (0x%08x) aligned [gtt_space offset=%lx, size=%lx]\n", 2408 (obj->gtt_offset & (size - 1)),
2376 __func__, obj->gtt_offset, obj->map_and_fenceable, size, 2409 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2377 obj->gtt_space->start, obj->gtt_space->size); 2410 obj->gtt_offset, obj->map_and_fenceable, size))
2378 return; 2411 return -EINVAL;
2379 }
2380 2412
2381 if (obj->tiling_mode == I915_TILING_Y && 2413 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2382 HAS_128_BYTE_Y_TILING(dev))
2383 tile_width = 128; 2414 tile_width = 128;
2384 else 2415 else
2385 tile_width = 512; 2416 tile_width = 512;
@@ -2388,12 +2419,6 @@ static void i915_write_fence_reg(struct drm_i915_gem_object *obj)
2388 pitch_val = obj->stride / tile_width; 2419 pitch_val = obj->stride / tile_width;
2389 pitch_val = ffs(pitch_val) - 1; 2420 pitch_val = ffs(pitch_val) - 1;
2390 2421
2391 if (obj->tiling_mode == I915_TILING_Y &&
2392 HAS_128_BYTE_Y_TILING(dev))
2393 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2394 else
2395 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2396
2397 val = obj->gtt_offset; 2422 val = obj->gtt_offset;
2398 if (obj->tiling_mode == I915_TILING_Y) 2423 if (obj->tiling_mode == I915_TILING_Y)
2399 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2424 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
@@ -2406,10 +2431,25 @@ static void i915_write_fence_reg(struct drm_i915_gem_object *obj)
2406 fence_reg = FENCE_REG_830_0 + fence_reg * 4; 2431 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2407 else 2432 else
2408 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; 2433 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2409 I915_WRITE(fence_reg, val); 2434
2435 if (pipelined) {
2436 int ret = intel_ring_begin(pipelined, 4);
2437 if (ret)
2438 return ret;
2439
2440 intel_ring_emit(pipelined, MI_NOOP);
2441 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2442 intel_ring_emit(pipelined, fence_reg);
2443 intel_ring_emit(pipelined, val);
2444 intel_ring_advance(pipelined);
2445 } else
2446 I915_WRITE(fence_reg, val);
2447
2448 return 0;
2410} 2449}
2411 2450
2412static void i830_write_fence_reg(struct drm_i915_gem_object *obj) 2451static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
2452 struct intel_ring_buffer *pipelined)
2413{ 2453{
2414 struct drm_device *dev = obj->base.dev; 2454 struct drm_device *dev = obj->base.dev;
2415 drm_i915_private_t *dev_priv = dev->dev_private; 2455 drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2417,29 +2457,38 @@ static void i830_write_fence_reg(struct drm_i915_gem_object *obj)
2417 int regnum = obj->fence_reg; 2457 int regnum = obj->fence_reg;
2418 uint32_t val; 2458 uint32_t val;
2419 uint32_t pitch_val; 2459 uint32_t pitch_val;
2420 uint32_t fence_size_bits;
2421 2460
2422 if ((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2461 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2423 (obj->gtt_offset & (obj->base.size - 1))) { 2462 (size & -size) != size ||
2424 WARN(1, "%s: object 0x%08x not 512K or size aligned\n", 2463 (obj->gtt_offset & (size - 1)),
2425 __func__, obj->gtt_offset); 2464 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2426 return; 2465 obj->gtt_offset, size))
2427 } 2466 return -EINVAL;
2428 2467
2429 pitch_val = obj->stride / 128; 2468 pitch_val = obj->stride / 128;
2430 pitch_val = ffs(pitch_val) - 1; 2469 pitch_val = ffs(pitch_val) - 1;
2431 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2432 2470
2433 val = obj->gtt_offset; 2471 val = obj->gtt_offset;
2434 if (obj->tiling_mode == I915_TILING_Y) 2472 if (obj->tiling_mode == I915_TILING_Y)
2435 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2473 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2436 fence_size_bits = I830_FENCE_SIZE_BITS(size); 2474 val |= I830_FENCE_SIZE_BITS(size);
2437 WARN_ON(fence_size_bits & ~0x00000f00);
2438 val |= fence_size_bits;
2439 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2475 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2440 val |= I830_FENCE_REG_VALID; 2476 val |= I830_FENCE_REG_VALID;
2441 2477
2442 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); 2478 if (pipelined) {
2479 int ret = intel_ring_begin(pipelined, 4);
2480 if (ret)
2481 return ret;
2482
2483 intel_ring_emit(pipelined, MI_NOOP);
2484 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2485 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
2486 intel_ring_emit(pipelined, val);
2487 intel_ring_advance(pipelined);
2488 } else
2489 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
2490
2491 return 0;
2443} 2492}
2444 2493
2445static int i915_find_fence_reg(struct drm_device *dev, 2494static int i915_find_fence_reg(struct drm_device *dev,
@@ -2512,6 +2561,7 @@ i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj,
2512 struct drm_device *dev = obj->base.dev; 2561 struct drm_device *dev = obj->base.dev;
2513 struct drm_i915_private *dev_priv = dev->dev_private; 2562 struct drm_i915_private *dev_priv = dev->dev_private;
2514 struct drm_i915_fence_reg *reg = NULL; 2563 struct drm_i915_fence_reg *reg = NULL;
2564 struct intel_ring_buffer *pipelined = NULL;
2515 int ret; 2565 int ret;
2516 2566
2517 /* Just update our place in the LRU if our fence is getting used. */ 2567 /* Just update our place in the LRU if our fence is getting used. */
@@ -2553,25 +2603,24 @@ i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj,
2553 2603
2554 switch (INTEL_INFO(dev)->gen) { 2604 switch (INTEL_INFO(dev)->gen) {
2555 case 6: 2605 case 6:
2556 sandybridge_write_fence_reg(obj); 2606 ret = sandybridge_write_fence_reg(obj, pipelined);
2557 break; 2607 break;
2558 case 5: 2608 case 5:
2559 case 4: 2609 case 4:
2560 i965_write_fence_reg(obj); 2610 ret = i965_write_fence_reg(obj, pipelined);
2561 break; 2611 break;
2562 case 3: 2612 case 3:
2563 i915_write_fence_reg(obj); 2613 ret = i915_write_fence_reg(obj, pipelined);
2564 break; 2614 break;
2565 case 2: 2615 case 2:
2566 i830_write_fence_reg(obj); 2616 ret = i830_write_fence_reg(obj, pipelined);
2567 break; 2617 break;
2568 } 2618 }
2569 2619
2570 trace_i915_gem_object_get_fence(obj, 2620 trace_i915_gem_object_get_fence(obj,
2571 obj->fence_reg, 2621 obj->fence_reg,
2572 obj->tiling_mode); 2622 obj->tiling_mode);
2573 2623 return ret;
2574 return 0;
2575} 2624}
2576 2625
2577/** 2626/**
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c668b2fb7e3d..ce97471d9c40 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -164,7 +164,13 @@
164#define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ 164#define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */
165#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) 165#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1)
166#define MI_STORE_DWORD_INDEX_SHIFT 2 166#define MI_STORE_DWORD_INDEX_SHIFT 2
167#define MI_LOAD_REGISTER_IMM MI_INSTR(0x22, 1) 167/* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
168 * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
169 * simply ignores the register load under certain conditions.
170 * - One can actually load arbitrary many arbitrary registers: Simply issue x
171 * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
172 */
173#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
168#define MI_FLUSH_DW MI_INSTR(0x26, 2) /* for GEN6 */ 174#define MI_FLUSH_DW MI_INSTR(0x26, 2) /* for GEN6 */
169#define MI_BATCH_BUFFER MI_INSTR(0x30, 1) 175#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
170#define MI_BATCH_NON_SECURE (1) 176#define MI_BATCH_NON_SECURE (1)