drm/radeon/kms/blit: fix blit copy for very large buffers

Evergreen and NI blit copy was broken if the buffer maps to a rectangle whose one dimension is 16384 (max dimension allowed by these chips). In the mainline kernel, the problem is exposed only when buffers are very large (1G), but it's still a problem. The problem could be exposed for smaller buffers if anyone modifies the algorithm for rectangle construction in r600_blit_create_rect() (the reason why someone would modify that algorithm is to tune the performance of buffer moves). The root cause was in i2f() function which only operated on range between 0 and 16383. Fix this by extending the range of i2f() function to 0 to 32767. While at it improve the function so that the range can be easily extended in the future (if it becomes necessary), cleanup lines over 80 characters, and replace in-line comments with one strategic comment that explains the crux of the function. Credits to michel@daenzer.net for pointing out the root cause of the bug. v2: Fix I2F_MAX_INPUT constant definition goof and warn only once if input argument is out of range. Edit the comment a little bit to avoid some linguistic confusion and make it look better in general. Signed-off-by: Ilija Hadzic <ihadzic@research.bell-labs.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Michel Dänzer <michel@daenzer.net> Signed-off-by: Dave Airlie <airlied@redhat.com>
author: Ilija Hadzic <ihadzic@research.bell-labs.com> 2012-02-02 10:26:24 -0500
committer: Dave Airlie <airlied@redhat.com> 2012-02-02 10:54:48 -0500
commit: 52b53a0bf8026a322cfa6cfec6a10dd31fef8752 (patch)
tree: d2247328b87baf65a3245be2ea7e430f14b2be95 /drivers/gpu/drm
parent: 304a48400d9718f74ec35ae46f30868a5f4c4516 (diff)
1 files changed, 25 insertions, 10 deletions
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index d996f438113..accc032c103 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -468,27 +468,42 @@ set_default_state(struct radeon_device *rdev)
        radeon_ring_write(ring, sq_stack_resource_mgmt_2);
 }
+#define I2F_MAX_BITS 15
+#define I2F_MAX_INPUT  ((1 << I2F_MAX_BITS) - 1)
+#define I2F_SHIFT (24 - I2F_MAX_BITS)
+/*
+ * Converts unsigned integer into 32-bit IEEE floating point representation.
+ * Conversion is not universal and only works for the range from 0
+ * to 2^I2F_MAX_BITS-1. Currently we only use it with inputs between
+ * 0 and 16384 (inclusive), so I2F_MAX_BITS=15 is enough. If necessary,
+ * I2F_MAX_BITS can be increased, but that will add to the loop iterations
+ * and slow us down. Conversion is done by shifting the input and counting
+ * down until the first 1 reaches bit position 23. The resulting counter
+ * and the shifted input are, respectively, the exponent and the fraction.
+ * The sign is always zero.
+ */
 static uint32_t i2f(uint32_t input)
 {
        u32 result, i, exponent, fraction;
-        if ((input & 0x3fff) == 0)
+        WARN_ON_ONCE(input > I2F_MAX_INPUT);
-                result = 0; /* 0 is a special case */
+        if ((input & I2F_MAX_INPUT) == 0)
+                result = 0;
        else {
-                exponent = 140; /* exponent biased by 127; */
+                exponent = 126 + I2F_MAX_BITS;
-                fraction = (input & 0x3fff) << 10; /* cheat and only
+                fraction = (input & I2F_MAX_INPUT) << I2F_SHIFT;
-                                                      handle numbers below 2^^15 */
-                for (i = 0; i < 14; i++) {
+                for (i = 0; i < I2F_MAX_BITS; i++) {
                        if (fraction & 0x800000)
                                break;
                        else {
-                                fraction = fraction << 1; /* keep
+                                fraction = fraction << 1;
-                                                             shifting left until top bit = 1 */
                                exponent = exponent - 1;
                        }
                }
-                result = exponent << 23 | (fraction & 0x7fffff); /* mask
+                result = exponent << 23 | (fraction & 0x7fffff);
-                                                                    off top bit; assumed 1 */
        }
        return result;
 }
author	Ilija Hadzic <ihadzic@research.bell-labs.com>	2012-02-02 10:26:24 -0500
committer	Dave Airlie <airlied@redhat.com>	2012-02-02 10:54:48 -0500
commit	52b53a0bf8026a322cfa6cfec6a10dd31fef8752 (patch)
tree	d2247328b87baf65a3245be2ea7e430f14b2be95 /drivers/gpu/drm
parent	304a48400d9718f74ec35ae46f30868a5f4c4516 (diff)

diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index d996f438113..accc032c103 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -468,27 +468,42 @@ set_default_state(struct radeon_device *rdev)
468	radeon_ring_write(ring, sq_stack_resource_mgmt_2);	468	radeon_ring_write(ring, sq_stack_resource_mgmt_2);
469	}	469	}
470		470
		471	#define I2F_MAX_BITS 15
		472	#define I2F_MAX_INPUT ((1 << I2F_MAX_BITS) - 1)
		473	#define I2F_SHIFT (24 - I2F_MAX_BITS)
		474
		475	/*
		476	* Converts unsigned integer into 32-bit IEEE floating point representation.
		477	* Conversion is not universal and only works for the range from 0
		478	* to 2^I2F_MAX_BITS-1. Currently we only use it with inputs between
		479	* 0 and 16384 (inclusive), so I2F_MAX_BITS=15 is enough. If necessary,
		480	* I2F_MAX_BITS can be increased, but that will add to the loop iterations
		481	* and slow us down. Conversion is done by shifting the input and counting
		482	* down until the first 1 reaches bit position 23. The resulting counter
		483	* and the shifted input are, respectively, the exponent and the fraction.
		484	* The sign is always zero.
		485	*/
471	static uint32_t i2f(uint32_t input)	486	static uint32_t i2f(uint32_t input)
472	{	487	{
473	u32 result, i, exponent, fraction;	488	u32 result, i, exponent, fraction;
474		489
475	if ((input & 0x3fff) == 0)	490	WARN_ON_ONCE(input > I2F_MAX_INPUT);
476	result = 0; /* 0 is a special case */	491
		492	if ((input & I2F_MAX_INPUT) == 0)
		493	result = 0;
477	else {	494	else {
478	exponent = 140; /* exponent biased by 127; */	495	exponent = 126 + I2F_MAX_BITS;
479	fraction = (input & 0x3fff) << 10; /* cheat and only	496	fraction = (input & I2F_MAX_INPUT) << I2F_SHIFT;
480	handle numbers below 2^^15 */	497
481	for (i = 0; i < 14; i++) {	498	for (i = 0; i < I2F_MAX_BITS; i++) {
482	if (fraction & 0x800000)	499	if (fraction & 0x800000)
483	break;	500	break;
484	else {	501	else {
485	fraction = fraction << 1; /* keep	502	fraction = fraction << 1;
486	shifting left until top bit = 1 */
487	exponent = exponent - 1;	503	exponent = exponent - 1;
488	}	504	}
489	}	505	}
490	result = exponent << 23 \| (fraction & 0x7fffff); /* mask	506	result = exponent << 23 \| (fraction & 0x7fffff);
491	off top bit; assumed 1 */
492	}	507	}
493	return result;	508	return result;
494	}	509	}