diff options
| author | Ilija Hadzic <ihadzic@research.bell-labs.com> | 2012-02-02 10:26:24 -0500 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2012-02-02 10:54:48 -0500 |
| commit | 52b53a0bf8026a322cfa6cfec6a10dd31fef8752 (patch) | |
| tree | d2247328b87baf65a3245be2ea7e430f14b2be95 /drivers/gpu | |
| parent | 304a48400d9718f74ec35ae46f30868a5f4c4516 (diff) | |
drm/radeon/kms/blit: fix blit copy for very large buffers
Evergreen and NI blit copy was broken if the buffer maps to a rectangle
whose one dimension is 16384 (max dimension allowed by these chips).
In the mainline kernel, the problem is exposed only when buffers are
very large (1G), but it's still a problem. The problem could be exposed
for smaller buffers if anyone modifies the algorithm for rectangle
construction in r600_blit_create_rect() (the reason why someone would
modify that algorithm is to tune the performance of buffer moves).
The root cause was in i2f() function which only operated on range between
0 and 16383. Fix this by extending the range of i2f() function to 0 to
32767.
While at it improve the function so that the range can be easily
extended in the future (if it becomes necessary), cleanup lines
over 80 characters, and replace in-line comments with one strategic
comment that explains the crux of the function.
Credits to michel@daenzer.net for pointing out the root cause of
the bug.
v2: Fix I2F_MAX_INPUT constant definition goof and warn only once
if input argument is out of range. Edit the comment a little
bit to avoid some linguistic confusion and make it look better
in general.
Signed-off-by: Ilija Hadzic <ihadzic@research.bell-labs.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Michel Dänzer <michel@daenzer.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu')
| -rw-r--r-- | drivers/gpu/drm/radeon/r600_blit_kms.c | 35 |
1 files changed, 25 insertions, 10 deletions
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index d996f4381130..accc032c103f 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c | |||
| @@ -468,27 +468,42 @@ set_default_state(struct radeon_device *rdev) | |||
| 468 | radeon_ring_write(ring, sq_stack_resource_mgmt_2); | 468 | radeon_ring_write(ring, sq_stack_resource_mgmt_2); |
| 469 | } | 469 | } |
| 470 | 470 | ||
| 471 | #define I2F_MAX_BITS 15 | ||
| 472 | #define I2F_MAX_INPUT ((1 << I2F_MAX_BITS) - 1) | ||
| 473 | #define I2F_SHIFT (24 - I2F_MAX_BITS) | ||
| 474 | |||
| 475 | /* | ||
| 476 | * Converts unsigned integer into 32-bit IEEE floating point representation. | ||
| 477 | * Conversion is not universal and only works for the range from 0 | ||
| 478 | * to 2^I2F_MAX_BITS-1. Currently we only use it with inputs between | ||
| 479 | * 0 and 16384 (inclusive), so I2F_MAX_BITS=15 is enough. If necessary, | ||
| 480 | * I2F_MAX_BITS can be increased, but that will add to the loop iterations | ||
| 481 | * and slow us down. Conversion is done by shifting the input and counting | ||
| 482 | * down until the first 1 reaches bit position 23. The resulting counter | ||
| 483 | * and the shifted input are, respectively, the exponent and the fraction. | ||
| 484 | * The sign is always zero. | ||
| 485 | */ | ||
| 471 | static uint32_t i2f(uint32_t input) | 486 | static uint32_t i2f(uint32_t input) |
| 472 | { | 487 | { |
| 473 | u32 result, i, exponent, fraction; | 488 | u32 result, i, exponent, fraction; |
| 474 | 489 | ||
| 475 | if ((input & 0x3fff) == 0) | 490 | WARN_ON_ONCE(input > I2F_MAX_INPUT); |
| 476 | result = 0; /* 0 is a special case */ | 491 | |
| 492 | if ((input & I2F_MAX_INPUT) == 0) | ||
| 493 | result = 0; | ||
| 477 | else { | 494 | else { |
| 478 | exponent = 140; /* exponent biased by 127; */ | 495 | exponent = 126 + I2F_MAX_BITS; |
| 479 | fraction = (input & 0x3fff) << 10; /* cheat and only | 496 | fraction = (input & I2F_MAX_INPUT) << I2F_SHIFT; |
| 480 | handle numbers below 2^^15 */ | 497 | |
| 481 | for (i = 0; i < 14; i++) { | 498 | for (i = 0; i < I2F_MAX_BITS; i++) { |
| 482 | if (fraction & 0x800000) | 499 | if (fraction & 0x800000) |
| 483 | break; | 500 | break; |
| 484 | else { | 501 | else { |
| 485 | fraction = fraction << 1; /* keep | 502 | fraction = fraction << 1; |
| 486 | shifting left until top bit = 1 */ | ||
| 487 | exponent = exponent - 1; | 503 | exponent = exponent - 1; |
| 488 | } | 504 | } |
| 489 | } | 505 | } |
| 490 | result = exponent << 23 | (fraction & 0x7fffff); /* mask | 506 | result = exponent << 23 | (fraction & 0x7fffff); |
| 491 | off top bit; assumed 1 */ | ||
| 492 | } | 507 | } |
| 493 | return result; | 508 | return result; |
| 494 | } | 509 | } |
