diff options
author | George Spelvin <linux@horizon.com> | 2012-10-04 20:12:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-05 14:04:48 -0400 |
commit | 2359172a75986359ce9cf041a9aca6a32cdf8779 (patch) | |
tree | bc5a80e6153d9c3cf749b7ec4f79874d8b321fe7 /lib | |
parent | e49317d415f5a44bad8377a208d61902d752303e (diff) |
lib: vsprintf: optimize division by 10000
The same multiply-by-inverse technique can be used to convert division by
10000 to a 32x32->64-bit multiply.
Signed-off-by: George Spelvin <linux@horizon.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/vsprintf.c | 60 |
1 files changed, 33 insertions, 27 deletions
diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 67e74cbefa90..8cb7635b2ce3 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c | |||
@@ -245,17 +245,32 @@ char *put_dec(char *buf, unsigned long long n) | |||
245 | 245 | ||
246 | /* See comment in put_dec_full9 for choice of constants */ | 246 | /* See comment in put_dec_full9 for choice of constants */ |
247 | static noinline_for_stack | 247 | static noinline_for_stack |
248 | char *put_dec_full4(char *buf, unsigned q) | 248 | void put_dec_full4(char *buf, unsigned q) |
249 | { | 249 | { |
250 | unsigned r; | 250 | unsigned r; |
251 | r = (q * 0xccd) >> 15; | 251 | r = (q * 0xccd) >> 15; |
252 | *buf++ = (q - 10 * r) + '0'; | 252 | buf[0] = (q - 10 * r) + '0'; |
253 | q = (r * 0xcd) >> 11; | 253 | q = (r * 0xcd) >> 11; |
254 | *buf++ = (r - 10 * q) + '0'; | 254 | buf[1] = (r - 10 * q) + '0'; |
255 | r = (q * 0xcd) >> 11; | 255 | r = (q * 0xcd) >> 11; |
256 | *buf++ = (q - 10 * r) + '0'; | 256 | buf[2] = (q - 10 * r) + '0'; |
257 | *buf++ = r + '0'; | 257 | buf[3] = r + '0'; |
258 | return buf; | 258 | } |
259 | |||
260 | /* | ||
261 | * Call put_dec_full4 on x % 10000, return x / 10000. | ||
262 | * The approximation x/10000 == (x * 0x346DC5D7) >> 43 | ||
263 | * holds for all x < 1,128,869,999. The largest value this | ||
264 | * helper will ever be asked to convert is 1,125,520,955. | ||
265 | * (d1 in the put_dec code, assuming n is all-ones). | ||
266 | */ | ||
267 | static | ||
268 | unsigned put_dec_helper4(char *buf, unsigned x) | ||
269 | { | ||
270 | uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43; | ||
271 | |||
272 | put_dec_full4(buf, x - q * 10000); | ||
273 | return q; | ||
259 | } | 274 | } |
260 | 275 | ||
261 | /* Based on code by Douglas W. Jones found at | 276 | /* Based on code by Douglas W. Jones found at |
@@ -277,28 +292,19 @@ char *put_dec(char *buf, unsigned long long n) | |||
277 | d3 = (h >> 16); /* implicit "& 0xffff" */ | 292 | d3 = (h >> 16); /* implicit "& 0xffff" */ |
278 | 293 | ||
279 | q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); | 294 | q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); |
295 | q = put_dec_helper4(buf, q); | ||
296 | |||
297 | q += 7671 * d3 + 9496 * d2 + 6 * d1; | ||
298 | q = put_dec_helper4(buf+4, q); | ||
299 | |||
300 | q += 4749 * d3 + 42 * d2; | ||
301 | q = put_dec_helper4(buf+8, q); | ||
280 | 302 | ||
281 | buf = put_dec_full4(buf, q % 10000); | 303 | q += 281 * d3; |
282 | q = q / 10000; | 304 | buf += 12; |
283 | 305 | if (q) | |
284 | d1 = q + 7671 * d3 + 9496 * d2 + 6 * d1; | 306 | buf = put_dec_trunc8(buf, q); |
285 | buf = put_dec_full4(buf, d1 % 10000); | 307 | else while (buf[-1] == '0') |
286 | q = d1 / 10000; | ||
287 | |||
288 | d2 = q + 4749 * d3 + 42 * d2; | ||
289 | buf = put_dec_full4(buf, d2 % 10000); | ||
290 | q = d2 / 10000; | ||
291 | |||
292 | d3 = q + 281 * d3; | ||
293 | if (!d3) | ||
294 | goto done; | ||
295 | buf = put_dec_full4(buf, d3 % 10000); | ||
296 | q = d3 / 10000; | ||
297 | if (!q) | ||
298 | goto done; | ||
299 | buf = put_dec_full4(buf, q); | ||
300 | done: | ||
301 | while (buf[-1] == '0') | ||
302 | --buf; | 308 | --buf; |
303 | 309 | ||
304 | return buf; | 310 | return buf; |