aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/copy_user_64.S25
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S25
-rw-r--r--arch/x86/lib/delay_32.c31
-rw-r--r--arch/x86/lib/delay_64.c30
4 files changed, 75 insertions, 36 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 70bebd310408..ee1c3f635157 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -217,19 +217,19 @@ ENTRY(copy_user_generic_unrolled)
217 /* table sorted by exception address */ 217 /* table sorted by exception address */
218 .section __ex_table,"a" 218 .section __ex_table,"a"
219 .align 8 219 .align 8
220 .quad .Ls1,.Ls1e 220 .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */
221 .quad .Ls2,.Ls2e 221 .quad .Ls2,.Ls1e
222 .quad .Ls3,.Ls3e 222 .quad .Ls3,.Ls1e
223 .quad .Ls4,.Ls4e 223 .quad .Ls4,.Ls1e
224 .quad .Ld1,.Ls1e 224 .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */
225 .quad .Ld2,.Ls2e 225 .quad .Ld2,.Ls2e
226 .quad .Ld3,.Ls3e 226 .quad .Ld3,.Ls3e
227 .quad .Ld4,.Ls4e 227 .quad .Ld4,.Ls4e
228 .quad .Ls5,.Ls5e 228 .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */
229 .quad .Ls6,.Ls6e 229 .quad .Ls6,.Ls5e
230 .quad .Ls7,.Ls7e 230 .quad .Ls7,.Ls5e
231 .quad .Ls8,.Ls8e 231 .quad .Ls8,.Ls5e
232 .quad .Ld5,.Ls5e 232 .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */
233 .quad .Ld6,.Ls6e 233 .quad .Ld6,.Ls6e
234 .quad .Ld7,.Ls7e 234 .quad .Ld7,.Ls7e
235 .quad .Ld8,.Ls8e 235 .quad .Ld8,.Ls8e
@@ -244,11 +244,8 @@ ENTRY(copy_user_generic_unrolled)
244 .quad .Le5,.Le_zero 244 .quad .Le5,.Le_zero
245 .previous 245 .previous
246 246
247 /* compute 64-offset for main loop. 8 bytes accuracy with error on the
248 pessimistic side. this is gross. it would be better to fix the
249 interface. */
250 /* eax: zero, ebx: 64 */ 247 /* eax: zero, ebx: 64 */
251.Ls1e: addl $8,%eax 248.Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
252.Ls2e: addl $8,%eax 249.Ls2e: addl $8,%eax
253.Ls3e: addl $8,%eax 250.Ls3e: addl $8,%eax
254.Ls4e: addl $8,%eax 251.Ls4e: addl $8,%eax
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 5196762b3b0e..9d3d1ab83763 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -145,19 +145,19 @@ ENTRY(__copy_user_nocache)
145 /* table sorted by exception address */ 145 /* table sorted by exception address */
146 .section __ex_table,"a" 146 .section __ex_table,"a"
147 .align 8 147 .align 8
148 .quad .Ls1,.Ls1e 148 .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */
149 .quad .Ls2,.Ls2e 149 .quad .Ls2,.Ls1e
150 .quad .Ls3,.Ls3e 150 .quad .Ls3,.Ls1e
151 .quad .Ls4,.Ls4e 151 .quad .Ls4,.Ls1e
152 .quad .Ld1,.Ls1e 152 .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */
153 .quad .Ld2,.Ls2e 153 .quad .Ld2,.Ls2e
154 .quad .Ld3,.Ls3e 154 .quad .Ld3,.Ls3e
155 .quad .Ld4,.Ls4e 155 .quad .Ld4,.Ls4e
156 .quad .Ls5,.Ls5e 156 .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */
157 .quad .Ls6,.Ls6e 157 .quad .Ls6,.Ls5e
158 .quad .Ls7,.Ls7e 158 .quad .Ls7,.Ls5e
159 .quad .Ls8,.Ls8e 159 .quad .Ls8,.Ls5e
160 .quad .Ld5,.Ls5e 160 .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */
161 .quad .Ld6,.Ls6e 161 .quad .Ld6,.Ls6e
162 .quad .Ld7,.Ls7e 162 .quad .Ld7,.Ls7e
163 .quad .Ld8,.Ls8e 163 .quad .Ld8,.Ls8e
@@ -172,11 +172,8 @@ ENTRY(__copy_user_nocache)
172 .quad .Le5,.Le_zero 172 .quad .Le5,.Le_zero
173 .previous 173 .previous
174 174
175 /* compute 64-offset for main loop. 8 bytes accuracy with error on the
176 pessimistic side. this is gross. it would be better to fix the
177 interface. */
178 /* eax: zero, ebx: 64 */ 175 /* eax: zero, ebx: 64 */
179.Ls1e: addl $8,%eax 176.Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */
180.Ls2e: addl $8,%eax 177.Ls2e: addl $8,%eax
181.Ls3e: addl $8,%eax 178.Ls3e: addl $8,%eax
182.Ls4e: addl $8,%eax 179.Ls4e: addl $8,%eax
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c
index 4535e6d147ad..d710f2d167bb 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay_32.c
@@ -44,13 +44,36 @@ static void delay_loop(unsigned long loops)
44static void delay_tsc(unsigned long loops) 44static void delay_tsc(unsigned long loops)
45{ 45{
46 unsigned long bclock, now; 46 unsigned long bclock, now;
47 int cpu;
47 48
48 preempt_disable(); /* TSC's are per-cpu */ 49 preempt_disable();
50 cpu = smp_processor_id();
49 rdtscl(bclock); 51 rdtscl(bclock);
50 do { 52 for (;;) {
51 rep_nop();
52 rdtscl(now); 53 rdtscl(now);
53 } while ((now-bclock) < loops); 54 if ((now - bclock) >= loops)
55 break;
56
57 /* Allow RT tasks to run */
58 preempt_enable();
59 rep_nop();
60 preempt_disable();
61
62 /*
63 * It is possible that we moved to another CPU, and
64 * since TSC's are per-cpu we need to calculate
65 * that. The delay must guarantee that we wait "at
66 * least" the amount of time. Being moved to another
67 * CPU could make the wait longer but we just need to
68 * make sure we waited long enough. Rebalance the
69 * counter for this CPU.
70 */
71 if (unlikely(cpu != smp_processor_id())) {
72 loops -= (now - bclock);
73 cpu = smp_processor_id();
74 rdtscl(bclock);
75 }
76 }
54 preempt_enable(); 77 preempt_enable();
55} 78}
56 79
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
index bbc610518516..4c441be92641 100644
--- a/arch/x86/lib/delay_64.c
+++ b/arch/x86/lib/delay_64.c
@@ -31,14 +31,36 @@ int __devinit read_current_timer(unsigned long *timer_value)
31void __delay(unsigned long loops) 31void __delay(unsigned long loops)
32{ 32{
33 unsigned bclock, now; 33 unsigned bclock, now;
34 int cpu;
34 35
35 preempt_disable(); /* TSC's are pre-cpu */ 36 preempt_disable();
37 cpu = smp_processor_id();
36 rdtscl(bclock); 38 rdtscl(bclock);
37 do { 39 for (;;) {
38 rep_nop();
39 rdtscl(now); 40 rdtscl(now);
41 if ((now - bclock) >= loops)
42 break;
43
44 /* Allow RT tasks to run */
45 preempt_enable();
46 rep_nop();
47 preempt_disable();
48
49 /*
50 * It is possible that we moved to another CPU, and
51 * since TSC's are per-cpu we need to calculate
52 * that. The delay must guarantee that we wait "at
53 * least" the amount of time. Being moved to another
54 * CPU could make the wait longer but we just need to
55 * make sure we waited long enough. Rebalance the
56 * counter for this CPU.
57 */
58 if (unlikely(cpu != smp_processor_id())) {
59 loops -= (now - bclock);
60 cpu = smp_processor_id();
61 rdtscl(bclock);
62 }
40 } 63 }
41 while ((now-bclock) < loops);
42 preempt_enable(); 64 preempt_enable();
43} 65}
44EXPORT_SYMBOL(__delay); 66EXPORT_SYMBOL(__delay);