aboutsummaryrefslogtreecommitdiffstats
path: root/lib/int_sqrt.c
diff options
context:
space:
mode:
authorDavidlohr Bueso <davidlohr.bueso@hp.com>2013-04-29 19:18:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 21:28:19 -0400
commit30493cc9dddb68066dcc4878015660fdaa8e0965 (patch)
tree89d750aea6ea06bbb95e5c76fa6c2279fcf6b116 /lib/int_sqrt.c
parent4d22f8c306233433bdf9298b2e7806e9c71674bc (diff)
lib/int_sqrt.c: optimize square root algorithm
Optimize the current version of the shift-and-subtract (hardware) algorithm, described by John von Newmann[1] and Guy L Steele. Iterating 1,000,000 times, perf shows for the current version: Performance counter stats for './sqrt-curr' (10 runs): 27.170996 task-clock # 0.979 CPUs utilized ( +- 3.19% ) 3 context-switches # 0.103 K/sec ( +- 4.76% ) 0 cpu-migrations # 0.004 K/sec ( +-100.00% ) 104 page-faults # 0.004 M/sec ( +- 0.16% ) 64,921,199 cycles # 2.389 GHz ( +- 0.03% ) 28,967,789 stalled-cycles-frontend # 44.62% frontend cycles idle ( +- 0.18% ) <not supported> stalled-cycles-backend 104,502,623 instructions # 1.61 insns per cycle # 0.28 stalled cycles per insn ( +- 0.00% ) 34,088,368 branches # 1254.587 M/sec ( +- 0.00% ) 4,901 branch-misses # 0.01% of all branches ( +- 1.32% ) 0.027763015 seconds time elapsed ( +- 3.22% ) And for the new version: Performance counter stats for './sqrt-new' (10 runs): 0.496869 task-clock # 0.519 CPUs utilized ( +- 2.38% ) 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.403 K/sec ( +-100.00% ) 104 page-faults # 0.209 M/sec ( +- 0.15% ) 590,760 cycles # 1.189 GHz ( +- 2.35% ) 395,053 stalled-cycles-frontend # 66.87% frontend cycles idle ( +- 3.67% ) <not supported> stalled-cycles-backend 398,963 instructions # 0.68 insns per cycle # 0.99 stalled cycles per insn ( +- 0.39% ) 70,228 branches # 141.341 M/sec ( +- 0.36% ) 3,364 branch-misses # 4.79% of all branches ( +- 5.45% ) 0.000957440 seconds time elapsed ( +- 2.42% ) Furthermore, this saves space in instruction text: text data bss dec hex filename 111 0 0 111 6f lib/int_sqrt-baseline.o 89 0 0 89 59 lib/int_sqrt.o [1] http://en.wikipedia.org/wiki/First_Draft_of_a_Report_on_the_EDVAC Signed-off-by: Davidlohr Bueso <davidlohr.bueso@hp.com> Reviewed-by: Jonathan Gonzalez <jgonzlez@linets.cl> Tested-by: Jonathan Gonzalez <jgonzlez@linets.cl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib/int_sqrt.c')
-rw-r--r--lib/int_sqrt.c32
1 files changed, 19 insertions, 13 deletions
diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c
index fc2eeb7cb2ea..1ef4cc344977 100644
--- a/lib/int_sqrt.c
+++ b/lib/int_sqrt.c
@@ -1,3 +1,9 @@
1/*
2 * Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com>
3 *
4 * Based on the shift-and-subtract algorithm for computing integer
5 * square root from Guy L. Steele.
6 */
1 7
2#include <linux/kernel.h> 8#include <linux/kernel.h>
3#include <linux/export.h> 9#include <linux/export.h>
@@ -10,23 +16,23 @@
10 */ 16 */
11unsigned long int_sqrt(unsigned long x) 17unsigned long int_sqrt(unsigned long x)
12{ 18{
13 unsigned long op, res, one; 19 unsigned long b, m, y = 0;
14 20
15 op = x; 21 if (x <= 1)
16 res = 0; 22 return x;
17 23
18 one = 1UL << (BITS_PER_LONG - 2); 24 m = 1UL << (BITS_PER_LONG - 2);
19 while (one > op) 25 while (m != 0) {
20 one >>= 2; 26 b = y + m;
27 y >>= 1;
21 28
22 while (one != 0) { 29 if (x >= b) {
23 if (op >= res + one) { 30 x -= b;
24 op = op - (res + one); 31 y += m;
25 res = res + 2 * one;
26 } 32 }
27 res /= 2; 33 m >>= 2;
28 one /= 4;
29 } 34 }
30 return res; 35
36 return y;
31} 37}
32EXPORT_SYMBOL(int_sqrt); 38EXPORT_SYMBOL(int_sqrt);