aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2018-06-03 10:41:00 -0400
committerDave Airlie <airlied@redhat.com>2018-07-30 18:11:12 -0400
commit91ba11fb7d7ca0a3bbe8a512e65e666e2ec1e889 (patch)
tree65aca2b7e173c4cdc0658b2feb826b07277f8f50
parent09a00abe3a9941c2715ca83eb88172cd2f54d8fd (diff)
udl-kms: avoid division
Division is slow, so it shouldn't be done by the pixel generating code. The driver supports only 2 or 4 bytes per pixel, so we can replace division with a shift. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--drivers/gpu/drm/udl/udl_drv.h2
-rw-r--r--drivers/gpu/drm/udl/udl_fb.c15
-rw-r--r--drivers/gpu/drm/udl/udl_transfer.c39
3 files changed, 30 insertions, 26 deletions
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index 55c0cc309198..7588a9eb0ee0 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -112,7 +112,7 @@ udl_fb_user_fb_create(struct drm_device *dev,
112 struct drm_file *file, 112 struct drm_file *file,
113 const struct drm_mode_fb_cmd2 *mode_cmd); 113 const struct drm_mode_fb_cmd2 *mode_cmd);
114 114
115int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, 115int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr,
116 const char *front, char **urb_buf_ptr, 116 const char *front, char **urb_buf_ptr,
117 u32 byte_offset, u32 device_byte_offset, u32 byte_width, 117 u32 byte_offset, u32 device_byte_offset, u32 byte_width,
118 int *ident_ptr, int *sent_ptr); 118 int *ident_ptr, int *sent_ptr);
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index c3f5867dac91..8746eeeec44d 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -90,7 +90,10 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
90 int bytes_identical = 0; 90 int bytes_identical = 0;
91 struct urb *urb; 91 struct urb *urb;
92 int aligned_x; 92 int aligned_x;
93 int bpp = fb->base.format->cpp[0]; 93 int log_bpp;
94
95 BUG_ON(!is_power_of_2(fb->base.format->cpp[0]));
96 log_bpp = __ffs(fb->base.format->cpp[0]);
94 97
95 if (!fb->active_16) 98 if (!fb->active_16)
96 return 0; 99 return 0;
@@ -125,12 +128,12 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
125 128
126 for (i = y; i < y + height ; i++) { 129 for (i = y; i < y + height ; i++) {
127 const int line_offset = fb->base.pitches[0] * i; 130 const int line_offset = fb->base.pitches[0] * i;
128 const int byte_offset = line_offset + (x * bpp); 131 const int byte_offset = line_offset + (x << log_bpp);
129 const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp); 132 const int dev_byte_offset = (fb->base.width * i + x) << log_bpp;
130 if (udl_render_hline(dev, bpp, &urb, 133 if (udl_render_hline(dev, log_bpp, &urb,
131 (char *) fb->obj->vmapping, 134 (char *) fb->obj->vmapping,
132 &cmd, byte_offset, dev_byte_offset, 135 &cmd, byte_offset, dev_byte_offset,
133 width * bpp, 136 width << log_bpp,
134 &bytes_identical, &bytes_sent)) 137 &bytes_identical, &bytes_sent))
135 goto error; 138 goto error;
136 } 139 }
@@ -149,7 +152,7 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
149error: 152error:
150 atomic_add(bytes_sent, &udl->bytes_sent); 153 atomic_add(bytes_sent, &udl->bytes_sent);
151 atomic_add(bytes_identical, &udl->bytes_identical); 154 atomic_add(bytes_identical, &udl->bytes_identical);
152 atomic_add(width*height*bpp, &udl->bytes_rendered); 155 atomic_add((width * height) << log_bpp, &udl->bytes_rendered);
153 end_cycles = get_cycles(); 156 end_cycles = get_cycles();
154 atomic_add(((unsigned int) ((end_cycles - start_cycles) 157 atomic_add(((unsigned int) ((end_cycles - start_cycles)
155 >> 10)), /* Kcycles */ 158 >> 10)), /* Kcycles */
diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index b992644c17e6..f3331d33547a 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -83,12 +83,12 @@ static inline u16 pixel32_to_be16(const uint32_t pixel)
83 ((pixel >> 8) & 0xf800)); 83 ((pixel >> 8) & 0xf800));
84} 84}
85 85
86static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp) 86static inline u16 get_pixel_val16(const uint8_t *pixel, int log_bpp)
87{ 87{
88 u16 pixel_val16 = 0; 88 u16 pixel_val16;
89 if (bpp == 2) 89 if (log_bpp == 1)
90 pixel_val16 = *(const uint16_t *)pixel; 90 pixel_val16 = *(const uint16_t *)pixel;
91 else if (bpp == 4) 91 else
92 pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel); 92 pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel);
93 return pixel_val16; 93 return pixel_val16;
94} 94}
@@ -125,8 +125,9 @@ static void udl_compress_hline16(
125 const u8 *const pixel_end, 125 const u8 *const pixel_end,
126 uint32_t *device_address_ptr, 126 uint32_t *device_address_ptr,
127 uint8_t **command_buffer_ptr, 127 uint8_t **command_buffer_ptr,
128 const uint8_t *const cmd_buffer_end, int bpp) 128 const uint8_t *const cmd_buffer_end, int log_bpp)
129{ 129{
130 const int bpp = 1 << log_bpp;
130 const u8 *pixel = *pixel_start_ptr; 131 const u8 *pixel = *pixel_start_ptr;
131 uint32_t dev_addr = *device_address_ptr; 132 uint32_t dev_addr = *device_address_ptr;
132 uint8_t *cmd = *command_buffer_ptr; 133 uint8_t *cmd = *command_buffer_ptr;
@@ -153,12 +154,12 @@ static void udl_compress_hline16(
153 raw_pixels_count_byte = cmd++; /* we'll know this later */ 154 raw_pixels_count_byte = cmd++; /* we'll know this later */
154 raw_pixel_start = pixel; 155 raw_pixel_start = pixel;
155 156
156 cmd_pixel_end = pixel + min3(MAX_CMD_PIXELS + 1UL, 157 cmd_pixel_end = pixel + (min3(MAX_CMD_PIXELS + 1UL,
157 (unsigned long)(pixel_end - pixel) / bpp, 158 (unsigned long)(pixel_end - pixel) >> log_bpp,
158 (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) * bpp; 159 (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) << log_bpp);
159 160
160 prefetch_range((void *) pixel, cmd_pixel_end - pixel); 161 prefetch_range((void *) pixel, cmd_pixel_end - pixel);
161 pixel_val16 = get_pixel_val16(pixel, bpp); 162 pixel_val16 = get_pixel_val16(pixel, log_bpp);
162 163
163 while (pixel < cmd_pixel_end) { 164 while (pixel < cmd_pixel_end) {
164 const u8 *const start = pixel; 165 const u8 *const start = pixel;
@@ -170,7 +171,7 @@ static void udl_compress_hline16(
170 pixel += bpp; 171 pixel += bpp;
171 172
172 while (pixel < cmd_pixel_end) { 173 while (pixel < cmd_pixel_end) {
173 pixel_val16 = get_pixel_val16(pixel, bpp); 174 pixel_val16 = get_pixel_val16(pixel, log_bpp);
174 if (pixel_val16 != repeating_pixel_val16) 175 if (pixel_val16 != repeating_pixel_val16)
175 break; 176 break;
176 pixel += bpp; 177 pixel += bpp;
@@ -179,10 +180,10 @@ static void udl_compress_hline16(
179 if (unlikely(pixel > start + bpp)) { 180 if (unlikely(pixel > start + bpp)) {
180 /* go back and fill in raw pixel count */ 181 /* go back and fill in raw pixel count */
181 *raw_pixels_count_byte = (((start - 182 *raw_pixels_count_byte = (((start -
182 raw_pixel_start) / bpp) + 1) & 0xFF; 183 raw_pixel_start) >> log_bpp) + 1) & 0xFF;
183 184
184 /* immediately after raw data is repeat byte */ 185 /* immediately after raw data is repeat byte */
185 *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF; 186 *cmd++ = (((pixel - start) >> log_bpp) - 1) & 0xFF;
186 187
187 /* Then start another raw pixel span */ 188 /* Then start another raw pixel span */
188 raw_pixel_start = pixel; 189 raw_pixel_start = pixel;
@@ -192,14 +193,14 @@ static void udl_compress_hline16(
192 193
193 if (pixel > raw_pixel_start) { 194 if (pixel > raw_pixel_start) {
194 /* finalize last RAW span */ 195 /* finalize last RAW span */
195 *raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF; 196 *raw_pixels_count_byte = ((pixel - raw_pixel_start) >> log_bpp) & 0xFF;
196 } else { 197 } else {
197 /* undo unused byte */ 198 /* undo unused byte */
198 cmd--; 199 cmd--;
199 } 200 }
200 201
201 *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF; 202 *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) >> log_bpp) & 0xFF;
202 dev_addr += ((pixel - cmd_pixel_start) / bpp) * 2; 203 dev_addr += ((pixel - cmd_pixel_start) >> log_bpp) * 2;
203 } 204 }
204 205
205 if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) { 206 if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) {
@@ -222,19 +223,19 @@ static void udl_compress_hline16(
222 * (that we can only write to, slowly, and can never read), and (optionally) 223 * (that we can only write to, slowly, and can never read), and (optionally)
223 * our shadow copy that tracks what's been sent to that hardware buffer. 224 * our shadow copy that tracks what's been sent to that hardware buffer.
224 */ 225 */
225int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, 226int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr,
226 const char *front, char **urb_buf_ptr, 227 const char *front, char **urb_buf_ptr,
227 u32 byte_offset, u32 device_byte_offset, 228 u32 byte_offset, u32 device_byte_offset,
228 u32 byte_width, 229 u32 byte_width,
229 int *ident_ptr, int *sent_ptr) 230 int *ident_ptr, int *sent_ptr)
230{ 231{
231 const u8 *line_start, *line_end, *next_pixel; 232 const u8 *line_start, *line_end, *next_pixel;
232 u32 base16 = 0 + (device_byte_offset / bpp) * 2; 233 u32 base16 = 0 + (device_byte_offset >> log_bpp) * 2;
233 struct urb *urb = *urb_ptr; 234 struct urb *urb = *urb_ptr;
234 u8 *cmd = *urb_buf_ptr; 235 u8 *cmd = *urb_buf_ptr;
235 u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; 236 u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length;
236 237
237 BUG_ON(!(bpp == 2 || bpp == 4)); 238 BUG_ON(!(log_bpp == 1 || log_bpp == 2));
238 239
239 line_start = (u8 *) (front + byte_offset); 240 line_start = (u8 *) (front + byte_offset);
240 next_pixel = line_start; 241 next_pixel = line_start;
@@ -244,7 +245,7 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr,
244 245
245 udl_compress_hline16(&next_pixel, 246 udl_compress_hline16(&next_pixel,
246 line_end, &base16, 247 line_end, &base16,
247 (u8 **) &cmd, (u8 *) cmd_end, bpp); 248 (u8 **) &cmd, (u8 *) cmd_end, log_bpp);
248 249
249 if (cmd >= cmd_end) { 250 if (cmd >= cmd_end) {
250 int len = cmd - (u8 *) urb->transfer_buffer; 251 int len = cmd - (u8 *) urb->transfer_buffer;