diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2011-02-28 13:21:52 -0500 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2011-03-01 16:20:04 -0500 |
commit | 13371731487896a6ef158b1cd74297f40a3da4bb (patch) | |
tree | af09fca3fd8811340b373faaddcdb528f8a07669 | |
parent | 04f7a3f12e10032ee3d44df1a509dbf5b2001fce (diff) |
arch/tile: fix __ndelay etc to work better
The current implementations of __ndelay and __udelay call a hypervisor
service to delay, but the hypervisor service isn't actually implemented
very well, and the consensus is that Linux should handle figuring this
out natively and not use a hypervisor service.
By converting nanoseconds to cycles, and then spinning until the
cycle counter reaches the desired cycle, we get several benefits:
first, we are sensitive to the actual clock speed; second, we use
less power by issuing a slow SPR read once every six cycles while
we delay; and third, we properly handle the case of an interrupt by
exiting at the target time rather than after some number of cycles.
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
-rw-r--r-- | arch/tile/include/asm/timex.h | 3 | ||||
-rw-r--r-- | arch/tile/include/hv/hypervisor.h | 5 | ||||
-rw-r--r-- | arch/tile/kernel/entry.S | 6 | ||||
-rw-r--r-- | arch/tile/kernel/time.c | 10 | ||||
-rw-r--r-- | arch/tile/lib/delay.c | 21 |
5 files changed, 34 insertions, 11 deletions
diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h index 3baf5fc4c0a1..29921f0b86da 100644 --- a/arch/tile/include/asm/timex.h +++ b/arch/tile/include/asm/timex.h | |||
@@ -38,6 +38,9 @@ static inline cycles_t get_cycles(void) | |||
38 | 38 | ||
39 | cycles_t get_clock_rate(void); | 39 | cycles_t get_clock_rate(void); |
40 | 40 | ||
41 | /* Convert nanoseconds to core clock cycles. */ | ||
42 | cycles_t ns2cycles(unsigned long nsecs); | ||
43 | |||
41 | /* Called at cpu initialization to set some low-level constants. */ | 44 | /* Called at cpu initialization to set some low-level constants. */ |
42 | void setup_clock(void); | 45 | void setup_clock(void); |
43 | 46 | ||
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index f672544cd4f9..103986b0c10a 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h | |||
@@ -964,6 +964,11 @@ HV_ASIDRange hv_inquire_asid(int idx); | |||
964 | 964 | ||
965 | /** Waits for at least the specified number of nanoseconds then returns. | 965 | /** Waits for at least the specified number of nanoseconds then returns. |
966 | * | 966 | * |
967 | * NOTE: this deprecated function currently assumes a 750 MHz clock, | ||
968 | * and is thus not generally suitable for use. New code should call | ||
969 | * hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for, | ||
970 | * and delay by looping while checking the cycle counter SPR. | ||
971 | * | ||
967 | * @param nanosecs The number of nanoseconds to sleep. | 972 | * @param nanosecs The number of nanoseconds to sleep. |
968 | */ | 973 | */ |
969 | void hv_nanosleep(int nanosecs); | 974 | void hv_nanosleep(int nanosecs); |
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index fd8dc42abdcb..c3aa0676ed06 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S | |||
@@ -38,12 +38,6 @@ STD_ENTRY(kernel_execve) | |||
38 | jrp lr | 38 | jrp lr |
39 | STD_ENDPROC(kernel_execve) | 39 | STD_ENDPROC(kernel_execve) |
40 | 40 | ||
41 | /* Delay a fixed number of cycles. */ | ||
42 | STD_ENTRY(__delay) | ||
43 | { addi r0, r0, -1; bnzt r0, . } | ||
44 | jrp lr | ||
45 | STD_ENDPROC(__delay) | ||
46 | |||
47 | /* | 41 | /* |
48 | * We don't run this function directly, but instead copy it to a page | 42 | * We don't run this function directly, but instead copy it to a page |
49 | * we map into every user process. See vdso_setup(). | 43 | * we map into every user process. See vdso_setup(). |
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index f2e156e44692..49a605be94c5 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c | |||
@@ -224,3 +224,13 @@ int setup_profiling_timer(unsigned int multiplier) | |||
224 | { | 224 | { |
225 | return -EINVAL; | 225 | return -EINVAL; |
226 | } | 226 | } |
227 | |||
228 | /* | ||
229 | * Use the tile timer to convert nsecs to core clock cycles, relying | ||
230 | * on it having the same frequency as SPR_CYCLE. | ||
231 | */ | ||
232 | cycles_t ns2cycles(unsigned long nsecs) | ||
233 | { | ||
234 | struct clock_event_device *dev = &__get_cpu_var(tile_timer); | ||
235 | return ((u64)nsecs * dev->mult) >> dev->shift; | ||
236 | } | ||
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c index 5801b03c13ef..cdacdd11d360 100644 --- a/arch/tile/lib/delay.c +++ b/arch/tile/lib/delay.c | |||
@@ -15,20 +15,31 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
17 | #include <linux/thread_info.h> | 17 | #include <linux/thread_info.h> |
18 | #include <asm/fixmap.h> | 18 | #include <asm/timex.h> |
19 | #include <hv/hypervisor.h> | ||
20 | 19 | ||
21 | void __udelay(unsigned long usecs) | 20 | void __udelay(unsigned long usecs) |
22 | { | 21 | { |
23 | hv_nanosleep(usecs * 1000); | 22 | if (usecs > ULONG_MAX / 1000) { |
23 | WARN_ON_ONCE(usecs > ULONG_MAX / 1000); | ||
24 | usecs = ULONG_MAX / 1000; | ||
25 | } | ||
26 | __ndelay(usecs * 1000); | ||
24 | } | 27 | } |
25 | EXPORT_SYMBOL(__udelay); | 28 | EXPORT_SYMBOL(__udelay); |
26 | 29 | ||
27 | void __ndelay(unsigned long nsecs) | 30 | void __ndelay(unsigned long nsecs) |
28 | { | 31 | { |
29 | hv_nanosleep(nsecs); | 32 | cycles_t target = get_cycles(); |
33 | target += ns2cycles(nsecs); | ||
34 | while (get_cycles() < target) | ||
35 | cpu_relax(); | ||
30 | } | 36 | } |
31 | EXPORT_SYMBOL(__ndelay); | 37 | EXPORT_SYMBOL(__ndelay); |
32 | 38 | ||
33 | /* FIXME: should be declared in a header somewhere. */ | 39 | void __delay(unsigned long cycles) |
40 | { | ||
41 | cycles_t target = get_cycles() + cycles; | ||
42 | while (get_cycles() < target) | ||
43 | cpu_relax(); | ||
44 | } | ||
34 | EXPORT_SYMBOL(__delay); | 45 | EXPORT_SYMBOL(__delay); |