diff options
author | Mikulas Patocka <mpatocka@redhat.com> | 2018-06-03 10:41:00 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2018-07-30 18:11:12 -0400 |
commit | 91ba11fb7d7ca0a3bbe8a512e65e666e2ec1e889 (patch) | |
tree | 65aca2b7e173c4cdc0658b2feb826b07277f8f50 | |
parent | 09a00abe3a9941c2715ca83eb88172cd2f54d8fd (diff) |
udl-kms: avoid division
Division is slow, so it shouldn't be done by the pixel generating code.
The driver supports only 2 or 4 bytes per pixel, so we can replace
division with a shift.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r-- | drivers/gpu/drm/udl/udl_drv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/udl/udl_fb.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/udl/udl_transfer.c | 39 |
3 files changed, 30 insertions, 26 deletions
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index 55c0cc309198..7588a9eb0ee0 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h | |||
@@ -112,7 +112,7 @@ udl_fb_user_fb_create(struct drm_device *dev, | |||
112 | struct drm_file *file, | 112 | struct drm_file *file, |
113 | const struct drm_mode_fb_cmd2 *mode_cmd); | 113 | const struct drm_mode_fb_cmd2 *mode_cmd); |
114 | 114 | ||
115 | int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, | 115 | int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr, |
116 | const char *front, char **urb_buf_ptr, | 116 | const char *front, char **urb_buf_ptr, |
117 | u32 byte_offset, u32 device_byte_offset, u32 byte_width, | 117 | u32 byte_offset, u32 device_byte_offset, u32 byte_width, |
118 | int *ident_ptr, int *sent_ptr); | 118 | int *ident_ptr, int *sent_ptr); |
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index c3f5867dac91..8746eeeec44d 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c | |||
@@ -90,7 +90,10 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, | |||
90 | int bytes_identical = 0; | 90 | int bytes_identical = 0; |
91 | struct urb *urb; | 91 | struct urb *urb; |
92 | int aligned_x; | 92 | int aligned_x; |
93 | int bpp = fb->base.format->cpp[0]; | 93 | int log_bpp; |
94 | |||
95 | BUG_ON(!is_power_of_2(fb->base.format->cpp[0])); | ||
96 | log_bpp = __ffs(fb->base.format->cpp[0]); | ||
94 | 97 | ||
95 | if (!fb->active_16) | 98 | if (!fb->active_16) |
96 | return 0; | 99 | return 0; |
@@ -125,12 +128,12 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, | |||
125 | 128 | ||
126 | for (i = y; i < y + height ; i++) { | 129 | for (i = y; i < y + height ; i++) { |
127 | const int line_offset = fb->base.pitches[0] * i; | 130 | const int line_offset = fb->base.pitches[0] * i; |
128 | const int byte_offset = line_offset + (x * bpp); | 131 | const int byte_offset = line_offset + (x << log_bpp); |
129 | const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp); | 132 | const int dev_byte_offset = (fb->base.width * i + x) << log_bpp; |
130 | if (udl_render_hline(dev, bpp, &urb, | 133 | if (udl_render_hline(dev, log_bpp, &urb, |
131 | (char *) fb->obj->vmapping, | 134 | (char *) fb->obj->vmapping, |
132 | &cmd, byte_offset, dev_byte_offset, | 135 | &cmd, byte_offset, dev_byte_offset, |
133 | width * bpp, | 136 | width << log_bpp, |
134 | &bytes_identical, &bytes_sent)) | 137 | &bytes_identical, &bytes_sent)) |
135 | goto error; | 138 | goto error; |
136 | } | 139 | } |
@@ -149,7 +152,7 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, | |||
149 | error: | 152 | error: |
150 | atomic_add(bytes_sent, &udl->bytes_sent); | 153 | atomic_add(bytes_sent, &udl->bytes_sent); |
151 | atomic_add(bytes_identical, &udl->bytes_identical); | 154 | atomic_add(bytes_identical, &udl->bytes_identical); |
152 | atomic_add(width*height*bpp, &udl->bytes_rendered); | 155 | atomic_add((width * height) << log_bpp, &udl->bytes_rendered); |
153 | end_cycles = get_cycles(); | 156 | end_cycles = get_cycles(); |
154 | atomic_add(((unsigned int) ((end_cycles - start_cycles) | 157 | atomic_add(((unsigned int) ((end_cycles - start_cycles) |
155 | >> 10)), /* Kcycles */ | 158 | >> 10)), /* Kcycles */ |
diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index b992644c17e6..f3331d33547a 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c | |||
@@ -83,12 +83,12 @@ static inline u16 pixel32_to_be16(const uint32_t pixel) | |||
83 | ((pixel >> 8) & 0xf800)); | 83 | ((pixel >> 8) & 0xf800)); |
84 | } | 84 | } |
85 | 85 | ||
86 | static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp) | 86 | static inline u16 get_pixel_val16(const uint8_t *pixel, int log_bpp) |
87 | { | 87 | { |
88 | u16 pixel_val16 = 0; | 88 | u16 pixel_val16; |
89 | if (bpp == 2) | 89 | if (log_bpp == 1) |
90 | pixel_val16 = *(const uint16_t *)pixel; | 90 | pixel_val16 = *(const uint16_t *)pixel; |
91 | else if (bpp == 4) | 91 | else |
92 | pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel); | 92 | pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel); |
93 | return pixel_val16; | 93 | return pixel_val16; |
94 | } | 94 | } |
@@ -125,8 +125,9 @@ static void udl_compress_hline16( | |||
125 | const u8 *const pixel_end, | 125 | const u8 *const pixel_end, |
126 | uint32_t *device_address_ptr, | 126 | uint32_t *device_address_ptr, |
127 | uint8_t **command_buffer_ptr, | 127 | uint8_t **command_buffer_ptr, |
128 | const uint8_t *const cmd_buffer_end, int bpp) | 128 | const uint8_t *const cmd_buffer_end, int log_bpp) |
129 | { | 129 | { |
130 | const int bpp = 1 << log_bpp; | ||
130 | const u8 *pixel = *pixel_start_ptr; | 131 | const u8 *pixel = *pixel_start_ptr; |
131 | uint32_t dev_addr = *device_address_ptr; | 132 | uint32_t dev_addr = *device_address_ptr; |
132 | uint8_t *cmd = *command_buffer_ptr; | 133 | uint8_t *cmd = *command_buffer_ptr; |
@@ -153,12 +154,12 @@ static void udl_compress_hline16( | |||
153 | raw_pixels_count_byte = cmd++; /* we'll know this later */ | 154 | raw_pixels_count_byte = cmd++; /* we'll know this later */ |
154 | raw_pixel_start = pixel; | 155 | raw_pixel_start = pixel; |
155 | 156 | ||
156 | cmd_pixel_end = pixel + min3(MAX_CMD_PIXELS + 1UL, | 157 | cmd_pixel_end = pixel + (min3(MAX_CMD_PIXELS + 1UL, |
157 | (unsigned long)(pixel_end - pixel) / bpp, | 158 | (unsigned long)(pixel_end - pixel) >> log_bpp, |
158 | (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) * bpp; | 159 | (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) << log_bpp); |
159 | 160 | ||
160 | prefetch_range((void *) pixel, cmd_pixel_end - pixel); | 161 | prefetch_range((void *) pixel, cmd_pixel_end - pixel); |
161 | pixel_val16 = get_pixel_val16(pixel, bpp); | 162 | pixel_val16 = get_pixel_val16(pixel, log_bpp); |
162 | 163 | ||
163 | while (pixel < cmd_pixel_end) { | 164 | while (pixel < cmd_pixel_end) { |
164 | const u8 *const start = pixel; | 165 | const u8 *const start = pixel; |
@@ -170,7 +171,7 @@ static void udl_compress_hline16( | |||
170 | pixel += bpp; | 171 | pixel += bpp; |
171 | 172 | ||
172 | while (pixel < cmd_pixel_end) { | 173 | while (pixel < cmd_pixel_end) { |
173 | pixel_val16 = get_pixel_val16(pixel, bpp); | 174 | pixel_val16 = get_pixel_val16(pixel, log_bpp); |
174 | if (pixel_val16 != repeating_pixel_val16) | 175 | if (pixel_val16 != repeating_pixel_val16) |
175 | break; | 176 | break; |
176 | pixel += bpp; | 177 | pixel += bpp; |
@@ -179,10 +180,10 @@ static void udl_compress_hline16( | |||
179 | if (unlikely(pixel > start + bpp)) { | 180 | if (unlikely(pixel > start + bpp)) { |
180 | /* go back and fill in raw pixel count */ | 181 | /* go back and fill in raw pixel count */ |
181 | *raw_pixels_count_byte = (((start - | 182 | *raw_pixels_count_byte = (((start - |
182 | raw_pixel_start) / bpp) + 1) & 0xFF; | 183 | raw_pixel_start) >> log_bpp) + 1) & 0xFF; |
183 | 184 | ||
184 | /* immediately after raw data is repeat byte */ | 185 | /* immediately after raw data is repeat byte */ |
185 | *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF; | 186 | *cmd++ = (((pixel - start) >> log_bpp) - 1) & 0xFF; |
186 | 187 | ||
187 | /* Then start another raw pixel span */ | 188 | /* Then start another raw pixel span */ |
188 | raw_pixel_start = pixel; | 189 | raw_pixel_start = pixel; |
@@ -192,14 +193,14 @@ static void udl_compress_hline16( | |||
192 | 193 | ||
193 | if (pixel > raw_pixel_start) { | 194 | if (pixel > raw_pixel_start) { |
194 | /* finalize last RAW span */ | 195 | /* finalize last RAW span */ |
195 | *raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF; | 196 | *raw_pixels_count_byte = ((pixel - raw_pixel_start) >> log_bpp) & 0xFF; |
196 | } else { | 197 | } else { |
197 | /* undo unused byte */ | 198 | /* undo unused byte */ |
198 | cmd--; | 199 | cmd--; |
199 | } | 200 | } |
200 | 201 | ||
201 | *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF; | 202 | *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) >> log_bpp) & 0xFF; |
202 | dev_addr += ((pixel - cmd_pixel_start) / bpp) * 2; | 203 | dev_addr += ((pixel - cmd_pixel_start) >> log_bpp) * 2; |
203 | } | 204 | } |
204 | 205 | ||
205 | if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) { | 206 | if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) { |
@@ -222,19 +223,19 @@ static void udl_compress_hline16( | |||
222 | * (that we can only write to, slowly, and can never read), and (optionally) | 223 | * (that we can only write to, slowly, and can never read), and (optionally) |
223 | * our shadow copy that tracks what's been sent to that hardware buffer. | 224 | * our shadow copy that tracks what's been sent to that hardware buffer. |
224 | */ | 225 | */ |
225 | int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, | 226 | int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr, |
226 | const char *front, char **urb_buf_ptr, | 227 | const char *front, char **urb_buf_ptr, |
227 | u32 byte_offset, u32 device_byte_offset, | 228 | u32 byte_offset, u32 device_byte_offset, |
228 | u32 byte_width, | 229 | u32 byte_width, |
229 | int *ident_ptr, int *sent_ptr) | 230 | int *ident_ptr, int *sent_ptr) |
230 | { | 231 | { |
231 | const u8 *line_start, *line_end, *next_pixel; | 232 | const u8 *line_start, *line_end, *next_pixel; |
232 | u32 base16 = 0 + (device_byte_offset / bpp) * 2; | 233 | u32 base16 = 0 + (device_byte_offset >> log_bpp) * 2; |
233 | struct urb *urb = *urb_ptr; | 234 | struct urb *urb = *urb_ptr; |
234 | u8 *cmd = *urb_buf_ptr; | 235 | u8 *cmd = *urb_buf_ptr; |
235 | u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; | 236 | u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; |
236 | 237 | ||
237 | BUG_ON(!(bpp == 2 || bpp == 4)); | 238 | BUG_ON(!(log_bpp == 1 || log_bpp == 2)); |
238 | 239 | ||
239 | line_start = (u8 *) (front + byte_offset); | 240 | line_start = (u8 *) (front + byte_offset); |
240 | next_pixel = line_start; | 241 | next_pixel = line_start; |
@@ -244,7 +245,7 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, | |||
244 | 245 | ||
245 | udl_compress_hline16(&next_pixel, | 246 | udl_compress_hline16(&next_pixel, |
246 | line_end, &base16, | 247 | line_end, &base16, |
247 | (u8 **) &cmd, (u8 *) cmd_end, bpp); | 248 | (u8 **) &cmd, (u8 *) cmd_end, log_bpp); |
248 | 249 | ||
249 | if (cmd >= cmd_end) { | 250 | if (cmd >= cmd_end) { |
250 | int len = cmd - (u8 *) urb->transfer_buffer; | 251 | int len = cmd - (u8 *) urb->transfer_buffer; |