aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLauri Kasanen <cand@gmx.com>2014-04-20 13:29:33 -0400
committerAlex Deucher <alexander.deucher@amd.com>2014-08-05 08:53:27 -0400
commit59bc1d89d6a4d67c94a9b70fa81bda1d5b04f0cb (patch)
tree89f2ca40816aea347dc9fa43c5ff3efd2b78682f
parent3e22920fbd0005927bc41f71daeb056a0f4def82 (diff)
drm/radeon: Inline r100_mm_rreg, -wreg, v3
This was originally un-inlined by Andi Kleen in 2011 citing size concerns. Indeed, a first attempt at inlining it grew radeon.ko by 7%. However, 2% of cpu is spent in this function. Simply inlining it gave 1% more fps in Urban Terror. v2: We know the minimum MMIO size. Adding it to the if allows the compiler to optimize the branch out, improving both performance and size. The v2 patch decreases radeon.ko size by 2%. I didn't re-benchmark, but common sense says perf is now more than 1% better. v3: Also change _wreg, make the threshold a define. Inlining _wreg increased the size a bit compared to v2, so now radeon.ko is only 1% smaller. Signed-off-by: Lauri Kasanen <cand@gmx.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/radeon/r100.c33
-rw-r--r--drivers/gpu/drm/radeon/radeon.h40
2 files changed, 36 insertions, 37 deletions
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index ebdce08cfefc..557fcdc32710 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4065,39 +4065,6 @@ int r100_init(struct radeon_device *rdev)
4065 return 0; 4065 return 0;
4066} 4066}
4067 4067
4068uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
4069 bool always_indirect)
4070{
4071 if (reg < rdev->rmmio_size && !always_indirect)
4072 return readl(((void __iomem *)rdev->rmmio) + reg);
4073 else {
4074 unsigned long flags;
4075 uint32_t ret;
4076
4077 spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4078 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4079 ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4080 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4081
4082 return ret;
4083 }
4084}
4085
4086void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
4087 bool always_indirect)
4088{
4089 if (reg < rdev->rmmio_size && !always_indirect)
4090 writel(v, ((void __iomem *)rdev->rmmio) + reg);
4091 else {
4092 unsigned long flags;
4093
4094 spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4095 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4096 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4097 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4098 }
4099}
4100
4101u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) 4068u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4102{ 4069{
4103 if (reg < rdev->rio_mem_size) 4070 if (reg < rdev->rio_mem_size)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7561b1332152..6aadfe43bad0 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2357,10 +2357,42 @@ int radeon_device_init(struct radeon_device *rdev,
2357void radeon_device_fini(struct radeon_device *rdev); 2357void radeon_device_fini(struct radeon_device *rdev);
2358int radeon_gpu_wait_for_idle(struct radeon_device *rdev); 2358int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
2359 2359
2360uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, 2360#define RADEON_MIN_MMIO_SIZE 0x10000
2361 bool always_indirect); 2361
2362void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, 2362static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
2363 bool always_indirect); 2363 bool always_indirect)
2364{
2365 /* The mmio size is 64kb at minimum. Allows the if to be optimized out. */
2366 if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
2367 return readl(((void __iomem *)rdev->rmmio) + reg);
2368 else {
2369 unsigned long flags;
2370 uint32_t ret;
2371
2372 spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
2373 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
2374 ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
2375 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
2376
2377 return ret;
2378 }
2379}
2380
2381static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
2382 bool always_indirect)
2383{
2384 if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
2385 writel(v, ((void __iomem *)rdev->rmmio) + reg);
2386 else {
2387 unsigned long flags;
2388
2389 spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
2390 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
2391 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
2392 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
2393 }
2394}
2395
2364u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); 2396u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
2365void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); 2397void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
2366 2398