aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-01-18 11:31:14 -0500
committerDave Airlie <airlied@redhat.com>2013-02-07 20:52:00 -0500
commite90a4ea534b110a43df87a05587c53cd78569467 (patch)
tree0959edcc69f8e333ddc61ad6cd64d99dfeaa07e3
parentbcb39af4486be07e896fc374a2336bad3104ae0a (diff)
drm/udl: Inline memcmp() for RLE compression of xfer
As we use a variable length the compiler does not realise that it is a fixed value of either 2 or 4 bytes. Instead of performing the inline comparison itself, the compiler inserts a function call to the generic memcmp routine which is optimised for long comparisons of variable length. That turns out to be quite expensive... Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--drivers/gpu/drm/udl/udl_transfer.c46
1 files changed, 28 insertions, 18 deletions
diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index 142fee5f983f..f343db73e095 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -75,15 +75,19 @@ static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes)
75} 75}
76#endif 76#endif
77 77
78static inline u16 pixel32_to_be16p(const uint8_t *pixel) 78static inline u16 pixel32_to_be16(const uint32_t pixel)
79{ 79{
80 uint32_t pix = *(uint32_t *)pixel; 80 return (((pixel >> 3) & 0x001f) |
81 u16 retval; 81 ((pixel >> 5) & 0x07e0) |
82 ((pixel >> 8) & 0xf800));
83}
82 84
83 retval = (((pix >> 3) & 0x001f) | 85static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp)
84 ((pix >> 5) & 0x07e0) | 86{
85 ((pix >> 8) & 0xf800)); 87 if (bpp == 2)
86 return retval; 88 return *(const uint16_t *)pixel == repeat;
89 else
90 return *(const uint32_t *)pixel == repeat;
87} 91}
88 92
89/* 93/*
@@ -152,29 +156,33 @@ static void udl_compress_hline16(
152 prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); 156 prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
153 157
154 while (pixel < cmd_pixel_end) { 158 while (pixel < cmd_pixel_end) {
155 const u8 * const repeating_pixel = pixel; 159 const u8 *const start = pixel;
156 160 u32 repeating_pixel;
157 if (bpp == 2) 161
158 *(uint16_t *)cmd = cpu_to_be16p((uint16_t *)pixel); 162 if (bpp == 2) {
159 else if (bpp == 4) 163 repeating_pixel = *(uint16_t *)pixel;
160 *(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16p(pixel)); 164 *(uint16_t *)cmd = cpu_to_be16(repeating_pixel);
165 } else {
166 repeating_pixel = *(uint32_t *)pixel;
167 *(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel));
168 }
161 169
162 cmd += 2; 170 cmd += 2;
163 pixel += bpp; 171 pixel += bpp;
164 172
165 if (unlikely((pixel < cmd_pixel_end) && 173 if (unlikely((pixel < cmd_pixel_end) &&
166 (!memcmp(pixel, repeating_pixel, bpp)))) { 174 (pixel_repeats(pixel, repeating_pixel, bpp)))) {
167 /* go back and fill in raw pixel count */ 175 /* go back and fill in raw pixel count */
168 *raw_pixels_count_byte = (((repeating_pixel - 176 *raw_pixels_count_byte = (((start -
169 raw_pixel_start) / bpp) + 1) & 0xFF; 177 raw_pixel_start) / bpp) + 1) & 0xFF;
170 178
171 while ((pixel < cmd_pixel_end) 179 while ((pixel < cmd_pixel_end) &&
172 && (!memcmp(pixel, repeating_pixel, bpp))) { 180 (pixel_repeats(pixel, repeating_pixel, bpp))) {
173 pixel += bpp; 181 pixel += bpp;
174 } 182 }
175 183
176 /* immediately after raw data is repeat byte */ 184 /* immediately after raw data is repeat byte */
177 *cmd++ = (((pixel - repeating_pixel) / bpp) - 1) & 0xFF; 185 *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF;
178 186
179 /* Then start another raw pixel span */ 187 /* Then start another raw pixel span */
180 raw_pixel_start = pixel; 188 raw_pixel_start = pixel;
@@ -223,6 +231,8 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr,
223 u8 *cmd = *urb_buf_ptr; 231 u8 *cmd = *urb_buf_ptr;
224 u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; 232 u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length;
225 233
234 BUG_ON(!(bpp == 2 || bpp == 4));
235
226 line_start = (u8 *) (front + byte_offset); 236 line_start = (u8 *) (front + byte_offset);
227 next_pixel = line_start; 237 next_pixel = line_start;
228 line_end = next_pixel + byte_width; 238 line_end = next_pixel + byte_width;