diff options
author | Lauri Kasanen <cand@gmx.com> | 2014-04-20 13:29:33 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2014-08-05 08:53:27 -0400 |
commit | 59bc1d89d6a4d67c94a9b70fa81bda1d5b04f0cb (patch) | |
tree | 89f2ca40816aea347dc9fa43c5ff3efd2b78682f | |
parent | 3e22920fbd0005927bc41f71daeb056a0f4def82 (diff) |
drm/radeon: Inline r100_mm_rreg, -wreg, v3
This was originally un-inlined by Andi Kleen in 2011 citing size concerns.
Indeed, a first attempt at inlining it grew radeon.ko by 7%.
However, 2% of cpu is spent in this function. Simply inlining it gave 1% more fps
in Urban Terror.
v2: We know the minimum MMIO size. Adding it to the if allows the compiler to
optimize the branch out, improving both performance and size.
The v2 patch decreases radeon.ko size by 2%. I didn't re-benchmark, but common sense
says perf is now more than 1% better.
v3: Also change _wreg, make the threshold a define.
Inlining _wreg increased the size a bit compared to v2, so now radeon.ko
is only 1% smaller.
Signed-off-by: Lauri Kasanen <cand@gmx.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/radeon/r100.c | 33 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 40 |
2 files changed, 36 insertions, 37 deletions
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ebdce08cfefc..557fcdc32710 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c | |||
@@ -4065,39 +4065,6 @@ int r100_init(struct radeon_device *rdev) | |||
4065 | return 0; | 4065 | return 0; |
4066 | } | 4066 | } |
4067 | 4067 | ||
4068 | uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, | ||
4069 | bool always_indirect) | ||
4070 | { | ||
4071 | if (reg < rdev->rmmio_size && !always_indirect) | ||
4072 | return readl(((void __iomem *)rdev->rmmio) + reg); | ||
4073 | else { | ||
4074 | unsigned long flags; | ||
4075 | uint32_t ret; | ||
4076 | |||
4077 | spin_lock_irqsave(&rdev->mmio_idx_lock, flags); | ||
4078 | writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); | ||
4079 | ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); | ||
4080 | spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); | ||
4081 | |||
4082 | return ret; | ||
4083 | } | ||
4084 | } | ||
4085 | |||
4086 | void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, | ||
4087 | bool always_indirect) | ||
4088 | { | ||
4089 | if (reg < rdev->rmmio_size && !always_indirect) | ||
4090 | writel(v, ((void __iomem *)rdev->rmmio) + reg); | ||
4091 | else { | ||
4092 | unsigned long flags; | ||
4093 | |||
4094 | spin_lock_irqsave(&rdev->mmio_idx_lock, flags); | ||
4095 | writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); | ||
4096 | writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); | ||
4097 | spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); | ||
4098 | } | ||
4099 | } | ||
4100 | |||
4101 | u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) | 4068 | u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) |
4102 | { | 4069 | { |
4103 | if (reg < rdev->rio_mem_size) | 4070 | if (reg < rdev->rio_mem_size) |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7561b1332152..6aadfe43bad0 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -2357,10 +2357,42 @@ int radeon_device_init(struct radeon_device *rdev, | |||
2357 | void radeon_device_fini(struct radeon_device *rdev); | 2357 | void radeon_device_fini(struct radeon_device *rdev); |
2358 | int radeon_gpu_wait_for_idle(struct radeon_device *rdev); | 2358 | int radeon_gpu_wait_for_idle(struct radeon_device *rdev); |
2359 | 2359 | ||
2360 | uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, | 2360 | #define RADEON_MIN_MMIO_SIZE 0x10000 |
2361 | bool always_indirect); | 2361 | |
2362 | void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, | 2362 | static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, |
2363 | bool always_indirect); | 2363 | bool always_indirect) |
2364 | { | ||
2365 | /* The mmio size is 64kb at minimum. Allows the if to be optimized out. */ | ||
2366 | if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) | ||
2367 | return readl(((void __iomem *)rdev->rmmio) + reg); | ||
2368 | else { | ||
2369 | unsigned long flags; | ||
2370 | uint32_t ret; | ||
2371 | |||
2372 | spin_lock_irqsave(&rdev->mmio_idx_lock, flags); | ||
2373 | writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); | ||
2374 | ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); | ||
2375 | spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); | ||
2376 | |||
2377 | return ret; | ||
2378 | } | ||
2379 | } | ||
2380 | |||
2381 | static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, | ||
2382 | bool always_indirect) | ||
2383 | { | ||
2384 | if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) | ||
2385 | writel(v, ((void __iomem *)rdev->rmmio) + reg); | ||
2386 | else { | ||
2387 | unsigned long flags; | ||
2388 | |||
2389 | spin_lock_irqsave(&rdev->mmio_idx_lock, flags); | ||
2390 | writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); | ||
2391 | writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); | ||
2392 | spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); | ||
2393 | } | ||
2394 | } | ||
2395 | |||
2364 | u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); | 2396 | u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); |
2365 | void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); | 2397 | void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); |
2366 | 2398 | ||