diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2013-01-18 11:31:14 -0500 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2013-02-07 20:52:00 -0500 |
commit | e90a4ea534b110a43df87a05587c53cd78569467 (patch) | |
tree | 0959edcc69f8e333ddc61ad6cd64d99dfeaa07e3 | |
parent | bcb39af4486be07e896fc374a2336bad3104ae0a (diff) |
drm/udl: Inline memcmp() for RLE compression of xfer
As we use a variable length the compiler does not realise that it is a
fixed value of either 2 or 4 bytes. Instead of performing the inline
comparison itself, the compiler inserts a function call to the generic
memcmp routine which is optimised for long comparisons of variable
length. That turns out to be quite expensive...
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r-- | drivers/gpu/drm/udl/udl_transfer.c | 46 |
1 files changed, 28 insertions, 18 deletions
diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index 142fee5f983f..f343db73e095 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c | |||
@@ -75,15 +75,19 @@ static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes) | |||
75 | } | 75 | } |
76 | #endif | 76 | #endif |
77 | 77 | ||
78 | static inline u16 pixel32_to_be16p(const uint8_t *pixel) | 78 | static inline u16 pixel32_to_be16(const uint32_t pixel) |
79 | { | 79 | { |
80 | uint32_t pix = *(uint32_t *)pixel; | 80 | return (((pixel >> 3) & 0x001f) | |
81 | u16 retval; | 81 | ((pixel >> 5) & 0x07e0) | |
82 | ((pixel >> 8) & 0xf800)); | ||
83 | } | ||
82 | 84 | ||
83 | retval = (((pix >> 3) & 0x001f) | | 85 | static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp) |
84 | ((pix >> 5) & 0x07e0) | | 86 | { |
85 | ((pix >> 8) & 0xf800)); | 87 | if (bpp == 2) |
86 | return retval; | 88 | return *(const uint16_t *)pixel == repeat; |
89 | else | ||
90 | return *(const uint32_t *)pixel == repeat; | ||
87 | } | 91 | } |
88 | 92 | ||
89 | /* | 93 | /* |
@@ -152,29 +156,33 @@ static void udl_compress_hline16( | |||
152 | prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); | 156 | prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); |
153 | 157 | ||
154 | while (pixel < cmd_pixel_end) { | 158 | while (pixel < cmd_pixel_end) { |
155 | const u8 * const repeating_pixel = pixel; | 159 | const u8 *const start = pixel; |
156 | 160 | u32 repeating_pixel; | |
157 | if (bpp == 2) | 161 | |
158 | *(uint16_t *)cmd = cpu_to_be16p((uint16_t *)pixel); | 162 | if (bpp == 2) { |
159 | else if (bpp == 4) | 163 | repeating_pixel = *(uint16_t *)pixel; |
160 | *(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16p(pixel)); | 164 | *(uint16_t *)cmd = cpu_to_be16(repeating_pixel); |
165 | } else { | ||
166 | repeating_pixel = *(uint32_t *)pixel; | ||
167 | *(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel)); | ||
168 | } | ||
161 | 169 | ||
162 | cmd += 2; | 170 | cmd += 2; |
163 | pixel += bpp; | 171 | pixel += bpp; |
164 | 172 | ||
165 | if (unlikely((pixel < cmd_pixel_end) && | 173 | if (unlikely((pixel < cmd_pixel_end) && |
166 | (!memcmp(pixel, repeating_pixel, bpp)))) { | 174 | (pixel_repeats(pixel, repeating_pixel, bpp)))) { |
167 | /* go back and fill in raw pixel count */ | 175 | /* go back and fill in raw pixel count */ |
168 | *raw_pixels_count_byte = (((repeating_pixel - | 176 | *raw_pixels_count_byte = (((start - |
169 | raw_pixel_start) / bpp) + 1) & 0xFF; | 177 | raw_pixel_start) / bpp) + 1) & 0xFF; |
170 | 178 | ||
171 | while ((pixel < cmd_pixel_end) | 179 | while ((pixel < cmd_pixel_end) && |
172 | && (!memcmp(pixel, repeating_pixel, bpp))) { | 180 | (pixel_repeats(pixel, repeating_pixel, bpp))) { |
173 | pixel += bpp; | 181 | pixel += bpp; |
174 | } | 182 | } |
175 | 183 | ||
176 | /* immediately after raw data is repeat byte */ | 184 | /* immediately after raw data is repeat byte */ |
177 | *cmd++ = (((pixel - repeating_pixel) / bpp) - 1) & 0xFF; | 185 | *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF; |
178 | 186 | ||
179 | /* Then start another raw pixel span */ | 187 | /* Then start another raw pixel span */ |
180 | raw_pixel_start = pixel; | 188 | raw_pixel_start = pixel; |
@@ -223,6 +231,8 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, | |||
223 | u8 *cmd = *urb_buf_ptr; | 231 | u8 *cmd = *urb_buf_ptr; |
224 | u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; | 232 | u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; |
225 | 233 | ||
234 | BUG_ON(!(bpp == 2 || bpp == 4)); | ||
235 | |||
226 | line_start = (u8 *) (front + byte_offset); | 236 | line_start = (u8 *) (front + byte_offset); |
227 | next_pixel = line_start; | 237 | next_pixel = line_start; |
228 | line_end = next_pixel + byte_width; | 238 | line_end = next_pixel + byte_width; |