aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2012-07-06 10:47:17 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2012-07-09 12:42:23 -0400
commitd0a533b18235d36206b9b422efadb7cee444dfdb (patch)
tree43dcdcf9eebd9c88430df031305ca4eb5c2346f4 /arch
parent923df96b9f31b7d08d8438ff9677326d9537accf (diff)
ARM: 7452/1: delay: allow timer-based delay implementation to be selected
This patch allows a timer-based delay implementation to be selected by switching the delay routines over to use get_cycles, which is implemented in terms of read_current_timer. This further allows us to skip the loop calibration and have a consistent delay function in the face of core frequency scaling. To avoid the pain of dealing with memory-mapped counters, this implementation uses the co-processor interface to the architected timers when they are available. The previous loop-based implementation is kept around for CPUs without the architected timers and we retain both the maximum delay (2ms) and the corresponding conversion factors for determining the number of loops required for a given interval. Since the indirection of the timer routines will only work when called from C, the sa1100 sleep routines are modified to branch to the loop-based delay functions directly. Tested-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com> Reviewed-by: Stephen Boyd <sboyd@codeaurora.org> Signed-off-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/delay.h32
-rw-r--r--arch/arm/kernel/arch_timer.c3
-rw-r--r--arch/arm/kernel/armksyms.c3
-rw-r--r--arch/arm/lib/Makefile2
-rw-r--r--arch/arm/lib/delay-loop.S (renamed from arch/arm/lib/delay.S)20
-rw-r--r--arch/arm/lib/delay.c71
-rw-r--r--arch/arm/mach-sa1100/sleep.S8
7 files changed, 114 insertions, 25 deletions
diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h
index b2deda181549..dc6145120de3 100644
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -6,9 +6,22 @@
6#ifndef __ASM_ARM_DELAY_H 6#ifndef __ASM_ARM_DELAY_H
7#define __ASM_ARM_DELAY_H 7#define __ASM_ARM_DELAY_H
8 8
9#include <asm/memory.h>
9#include <asm/param.h> /* HZ */ 10#include <asm/param.h> /* HZ */
10 11
11extern void __delay(int loops); 12#define MAX_UDELAY_MS 2
13#define UDELAY_MULT ((UL(2199023) * HZ) >> 11)
14#define UDELAY_SHIFT 30
15
16#ifndef __ASSEMBLY__
17
18extern struct arm_delay_ops {
19 void (*delay)(unsigned long);
20 void (*const_udelay)(unsigned long);
21 void (*udelay)(unsigned long);
22} arm_delay_ops;
23
24#define __delay(n) arm_delay_ops.delay(n)
12 25
13/* 26/*
14 * This function intentionally does not exist; if you see references to 27 * This function intentionally does not exist; if you see references to
@@ -23,22 +36,27 @@ extern void __bad_udelay(void);
23 * division by multiplication: you don't have to worry about 36 * division by multiplication: you don't have to worry about
24 * loss of precision. 37 * loss of precision.
25 * 38 *
26 * Use only for very small delays ( < 1 msec). Should probably use a 39 * Use only for very small delays ( < 2 msec). Should probably use a
27 * lookup table, really, as the multiplications take much too long with 40 * lookup table, really, as the multiplications take much too long with
28 * short delays. This is a "reasonable" implementation, though (and the 41 * short delays. This is a "reasonable" implementation, though (and the
29 * first constant multiplications gets optimized away if the delay is 42 * first constant multiplications gets optimized away if the delay is
30 * a constant) 43 * a constant)
31 */ 44 */
32extern void __udelay(unsigned long usecs); 45#define __udelay(n) arm_delay_ops.udelay(n)
33extern void __const_udelay(unsigned long); 46#define __const_udelay(n) arm_delay_ops.const_udelay(n)
34
35#define MAX_UDELAY_MS 2
36 47
37#define udelay(n) \ 48#define udelay(n) \
38 (__builtin_constant_p(n) ? \ 49 (__builtin_constant_p(n) ? \
39 ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() : \ 50 ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() : \
40 __const_udelay((n) * ((2199023U*HZ)>>11))) : \ 51 __const_udelay((n) * UDELAY_MULT)) : \
41 __udelay(n)) 52 __udelay(n))
42 53
54/* Loop-based definitions for assembly code. */
55extern void __loop_delay(unsigned long loops);
56extern void __loop_udelay(unsigned long usecs);
57extern void __loop_const_udelay(unsigned long);
58
59#endif /* __ASSEMBLY__ */
60
43#endif /* defined(_ARM_DELAY_H) */ 61#endif /* defined(_ARM_DELAY_H) */
44 62
diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
index dbbeec4f06e2..675cee09c014 100644
--- a/arch/arm/kernel/arch_timer.c
+++ b/arch/arm/kernel/arch_timer.c
@@ -32,6 +32,8 @@ static int arch_timer_ppi2;
32 32
33static struct clock_event_device __percpu **arch_timer_evt; 33static struct clock_event_device __percpu **arch_timer_evt;
34 34
35extern void init_current_timer_delay(unsigned long freq);
36
35/* 37/*
36 * Architected system timer support. 38 * Architected system timer support.
37 */ 39 */
@@ -304,6 +306,7 @@ static int __init arch_timer_register(void)
304 if (err) 306 if (err)
305 goto out_free_irq; 307 goto out_free_irq;
306 308
309 init_current_timer_delay(arch_timer_rate);
307 return 0; 310 return 0;
308 311
309out_free_irq: 312out_free_irq:
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index b57c75e0b01f..71962284d288 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -49,8 +49,7 @@ extern void __aeabi_ulcmp(void);
49extern void fpundefinstr(void); 49extern void fpundefinstr(void);
50 50
51 /* platform dependent support */ 51 /* platform dependent support */
52EXPORT_SYMBOL(__udelay); 52EXPORT_SYMBOL(arm_delay_ops);
53EXPORT_SYMBOL(__const_udelay);
54 53
55 /* networking */ 54 /* networking */
56EXPORT_SYMBOL(csum_partial); 55EXPORT_SYMBOL(csum_partial);
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 992769ae2599..b621114644fd 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -6,7 +6,7 @@
6 6
7lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ 7lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
8 csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ 8 csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
9 delay.o findbit.o memchr.o memcpy.o \ 9 delay.o delay-loop.o findbit.o memchr.o memcpy.o \
10 memmove.o memset.o memzero.o setbit.o \ 10 memmove.o memset.o memzero.o setbit.o \
11 strncpy_from_user.o strnlen_user.o \ 11 strncpy_from_user.o strnlen_user.o \
12 strchr.o strrchr.o \ 12 strchr.o strrchr.o \
diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay-loop.S
index 3c9a05c8d20b..36b668d8e121 100644
--- a/arch/arm/lib/delay.S
+++ b/arch/arm/lib/delay-loop.S
@@ -9,11 +9,11 @@
9 */ 9 */
10#include <linux/linkage.h> 10#include <linux/linkage.h>
11#include <asm/assembler.h> 11#include <asm/assembler.h>
12#include <asm/param.h> 12#include <asm/delay.h>
13 .text 13 .text
14 14
15.LC0: .word loops_per_jiffy 15.LC0: .word loops_per_jiffy
16.LC1: .word (2199023*HZ)>>11 16.LC1: .word UDELAY_MULT
17 17
18/* 18/*
19 * r0 <= 2000 19 * r0 <= 2000
@@ -21,10 +21,10 @@
21 * HZ <= 1000 21 * HZ <= 1000
22 */ 22 */
23 23
24ENTRY(__udelay) 24ENTRY(__loop_udelay)
25 ldr r2, .LC1 25 ldr r2, .LC1
26 mul r0, r2, r0 26 mul r0, r2, r0
27ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 27ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
28 mov r1, #-1 28 mov r1, #-1
29 ldr r2, .LC0 29 ldr r2, .LC0
30 ldr r2, [r2] @ max = 0x01ffffff 30 ldr r2, [r2] @ max = 0x01ffffff
@@ -39,12 +39,10 @@ ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06
39 39
40/* 40/*
41 * loops = r0 * HZ * loops_per_jiffy / 1000000 41 * loops = r0 * HZ * loops_per_jiffy / 1000000
42 *
43 * Oh, if only we had a cycle counter...
44 */ 42 */
45 43
46@ Delay routine 44@ Delay routine
47ENTRY(__delay) 45ENTRY(__loop_delay)
48 subs r0, r0, #1 46 subs r0, r0, #1
49#if 0 47#if 0
50 movls pc, lr 48 movls pc, lr
@@ -62,8 +60,8 @@ ENTRY(__delay)
62 movls pc, lr 60 movls pc, lr
63 subs r0, r0, #1 61 subs r0, r0, #1
64#endif 62#endif
65 bhi __delay 63 bhi __loop_delay
66 mov pc, lr 64 mov pc, lr
67ENDPROC(__udelay) 65ENDPROC(__loop_udelay)
68ENDPROC(__const_udelay) 66ENDPROC(__loop_const_udelay)
69ENDPROC(__delay) 67ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
new file mode 100644
index 000000000000..d6dacc69254e
--- /dev/null
+++ b/arch/arm/lib/delay.c
@@ -0,0 +1,71 @@
1/*
2 * Delay loops based on the OpenRISC implementation.
3 *
4 * Copyright (C) 2012 ARM Limited
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *
19 * Author: Will Deacon <will.deacon@arm.com>
20 */
21
22#include <linux/delay.h>
23#include <linux/init.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/timex.h>
27
28/*
29 * Default to the loop-based delay implementation.
30 */
31struct arm_delay_ops arm_delay_ops = {
32 .delay = __loop_delay,
33 .const_udelay = __loop_const_udelay,
34 .udelay = __loop_udelay,
35};
36
37#ifdef ARCH_HAS_READ_CURRENT_TIMER
38static void __timer_delay(unsigned long cycles)
39{
40 cycles_t start = get_cycles();
41
42 while ((get_cycles() - start) < cycles)
43 cpu_relax();
44}
45
46static void __timer_const_udelay(unsigned long xloops)
47{
48 unsigned long long loops = xloops;
49 loops *= loops_per_jiffy;
50 __timer_delay(loops >> UDELAY_SHIFT);
51}
52
53static void __timer_udelay(unsigned long usecs)
54{
55 __timer_const_udelay(usecs * UDELAY_MULT);
56}
57
58void __init init_current_timer_delay(unsigned long freq)
59{
60 pr_info("Switching to timer-based delay loop\n");
61 lpj_fine = freq / HZ;
62 arm_delay_ops.delay = __timer_delay;
63 arm_delay_ops.const_udelay = __timer_const_udelay;
64 arm_delay_ops.udelay = __timer_udelay;
65}
66
67unsigned long __cpuinit calibrate_delay_is_known(void)
68{
69 return lpj_fine;
70}
71#endif
diff --git a/arch/arm/mach-sa1100/sleep.S b/arch/arm/mach-sa1100/sleep.S
index 30cc6721665b..85863741ef8b 100644
--- a/arch/arm/mach-sa1100/sleep.S
+++ b/arch/arm/mach-sa1100/sleep.S
@@ -38,9 +38,9 @@ ENTRY(sa1100_finish_suspend)
38 orr r4, r4, #MDREFR_K1DB2 38 orr r4, r4, #MDREFR_K1DB2
39 ldr r5, =PPCR 39 ldr r5, =PPCR
40 40
41 @ Pre-load __udelay into the I-cache 41 @ Pre-load __loop_udelay into the I-cache
42 mov r0, #1 42 mov r0, #1
43 bl __udelay 43 bl __loop_udelay
44 mov r0, r0 44 mov r0, r0
45 45
46 @ The following must all exist in a single cache line to 46 @ The following must all exist in a single cache line to
@@ -53,11 +53,11 @@ ENTRY(sa1100_finish_suspend)
53 @ delay 90us and set CPU PLL to lowest speed 53 @ delay 90us and set CPU PLL to lowest speed
54 @ fixes resume problem on high speed SA1110 54 @ fixes resume problem on high speed SA1110
55 mov r0, #90 55 mov r0, #90
56 bl __udelay 56 bl __loop_udelay
57 mov r1, #0 57 mov r1, #0
58 str r1, [r5] 58 str r1, [r5]
59 mov r0, #90 59 mov r0, #90
60 bl __udelay 60 bl __loop_udelay
61 61
62 /* 62 /*
63 * SA1110 SDRAM controller workaround. register values: 63 * SA1110 SDRAM controller workaround. register values: