diff options
author | Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> | 2008-06-12 04:47:56 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-06-12 04:47:56 -0400 |
commit | f595ec964daf7f99668039d7303ddedd09a75142 (patch) | |
tree | 4ee6679105f0437995ff200f10885598921ae1cd | |
parent | 5e70b7f3c24468bb1635b295945edb48ecd9656a (diff) |
common implementation of iterative div/mod
We have a few instances of the open-coded iterative div/mod loop, used
when we don't expcet the dividend to be much bigger than the divisor.
Unfortunately modern gcc's have the tendency to strength "reduce" this
into a full mod operation, which isn't necessarily any faster, and
even if it were, doesn't exist if gcc implements it in libgcc.
The workaround is to put a dummy asm statement in the loop to prevent
gcc from performing the transformation.
This patch creates a single implementation of this loop, and uses it
to replace the open-coded versions I know about.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Cc: Christian Kujau <lists@nerdbynature.de>
Cc: Robert Hancock <hancockr@shaw.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/xen/time.c | 13 | ||||
-rw-r--r-- | include/linux/math64.h | 2 | ||||
-rw-r--r-- | include/linux/time.h | 11 | ||||
-rw-r--r-- | lib/div64.c | 23 |
4 files changed, 30 insertions, 19 deletions
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa241..52b2e3856980 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/clocksource.h> | 12 | #include <linux/clocksource.h> |
13 | #include <linux/clockchips.h> | 13 | #include <linux/clockchips.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/math64.h> | ||
15 | 16 | ||
16 | #include <asm/xen/hypervisor.h> | 17 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 18 | #include <asm/xen/hypercall.h> |
@@ -150,11 +151,7 @@ static void do_stolen_accounting(void) | |||
150 | if (stolen < 0) | 151 | if (stolen < 0) |
151 | stolen = 0; | 152 | stolen = 0; |
152 | 153 | ||
153 | ticks = 0; | 154 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); |
154 | while (stolen >= NS_PER_TICK) { | ||
155 | ticks++; | ||
156 | stolen -= NS_PER_TICK; | ||
157 | } | ||
158 | __get_cpu_var(residual_stolen) = stolen; | 155 | __get_cpu_var(residual_stolen) = stolen; |
159 | account_steal_time(NULL, ticks); | 156 | account_steal_time(NULL, ticks); |
160 | 157 | ||
@@ -166,11 +163,7 @@ static void do_stolen_accounting(void) | |||
166 | if (blocked < 0) | 163 | if (blocked < 0) |
167 | blocked = 0; | 164 | blocked = 0; |
168 | 165 | ||
169 | ticks = 0; | 166 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); |
170 | while (blocked >= NS_PER_TICK) { | ||
171 | ticks++; | ||
172 | blocked -= NS_PER_TICK; | ||
173 | } | ||
174 | __get_cpu_var(residual_blocked) = blocked; | 167 | __get_cpu_var(residual_blocked) = blocked; |
175 | account_steal_time(idle_task(smp_processor_id()), ticks); | 168 | account_steal_time(idle_task(smp_processor_id()), ticks); |
176 | } | 169 | } |
diff --git a/include/linux/math64.h b/include/linux/math64.h index c1a5f81501ff..177785e1e4a3 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h | |||
@@ -81,4 +81,6 @@ static inline s64 div_s64(s64 dividend, s32 divisor) | |||
81 | } | 81 | } |
82 | #endif | 82 | #endif |
83 | 83 | ||
84 | u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder); | ||
85 | |||
84 | #endif /* _LINUX_MATH64_H */ | 86 | #endif /* _LINUX_MATH64_H */ |
diff --git a/include/linux/time.h b/include/linux/time.h index d32ef0ad4c0a..05f9517a8ed1 100644 --- a/include/linux/time.h +++ b/include/linux/time.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #ifdef __KERNEL__ | 6 | #ifdef __KERNEL__ |
7 | # include <linux/cache.h> | 7 | # include <linux/cache.h> |
8 | # include <linux/seqlock.h> | 8 | # include <linux/seqlock.h> |
9 | # include <linux/math64.h> | ||
9 | #endif | 10 | #endif |
10 | 11 | ||
11 | #ifndef _STRUCT_TIMESPEC | 12 | #ifndef _STRUCT_TIMESPEC |
@@ -172,15 +173,7 @@ extern struct timeval ns_to_timeval(const s64 nsec); | |||
172 | */ | 173 | */ |
173 | static inline void timespec_add_ns(struct timespec *a, u64 ns) | 174 | static inline void timespec_add_ns(struct timespec *a, u64 ns) |
174 | { | 175 | { |
175 | ns += a->tv_nsec; | 176 | a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); |
176 | while(unlikely(ns >= NSEC_PER_SEC)) { | ||
177 | /* The following asm() prevents the compiler from | ||
178 | * optimising this loop into a modulo operation. */ | ||
179 | asm("" : "+r"(ns)); | ||
180 | |||
181 | ns -= NSEC_PER_SEC; | ||
182 | a->tv_sec++; | ||
183 | } | ||
184 | a->tv_nsec = ns; | 177 | a->tv_nsec = ns; |
185 | } | 178 | } |
186 | #endif /* __KERNEL__ */ | 179 | #endif /* __KERNEL__ */ |
diff --git a/lib/div64.c b/lib/div64.c index bb5bd0c0f030..76c01542d3e1 100644 --- a/lib/div64.c +++ b/lib/div64.c | |||
@@ -98,3 +98,26 @@ EXPORT_SYMBOL(div64_u64); | |||
98 | #endif | 98 | #endif |
99 | 99 | ||
100 | #endif /* BITS_PER_LONG == 32 */ | 100 | #endif /* BITS_PER_LONG == 32 */ |
101 | |||
102 | /* | ||
103 | * Iterative div/mod for use when dividend is not expected to be much | ||
104 | * bigger than divisor. | ||
105 | */ | ||
106 | u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) | ||
107 | { | ||
108 | u32 ret = 0; | ||
109 | |||
110 | while (dividend >= divisor) { | ||
111 | /* The following asm() prevents the compiler from | ||
112 | optimising this loop into a modulo operation. */ | ||
113 | asm("" : "+rm"(dividend)); | ||
114 | |||
115 | dividend -= divisor; | ||
116 | ret++; | ||
117 | } | ||
118 | |||
119 | *remainder = dividend; | ||
120 | |||
121 | return ret; | ||
122 | } | ||
123 | EXPORT_SYMBOL(iter_div_u64_rem); | ||