diff options
-rw-r--r-- | arch/x86/Kconfig | 3 | ||||
-rw-r--r-- | include/asm-x86/paravirt.h | 21 | ||||
-rw-r--r-- | include/asm-x86/spinlock.h | 153 | ||||
-rw-r--r-- | include/asm-x86/spinlock_types.h | 2 |
4 files changed, 83 insertions, 96 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index db434f8171d3..1992b8fe6a2f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -20,8 +20,7 @@ config X86 | |||
20 | def_bool y | 20 | def_bool y |
21 | 21 | ||
22 | config GENERIC_LOCKBREAK | 22 | config GENERIC_LOCKBREAK |
23 | def_bool y | 23 | def_bool n |
24 | depends on SMP && PREEMPT | ||
25 | 24 | ||
26 | config GENERIC_TIME | 25 | config GENERIC_TIME |
27 | def_bool y | 26 | def_bool y |
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 4f23f434a1f3..24406703007f 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h | |||
@@ -1077,27 +1077,6 @@ static inline unsigned long __raw_local_irq_save(void) | |||
1077 | return f; | 1077 | return f; |
1078 | } | 1078 | } |
1079 | 1079 | ||
1080 | #define CLI_STRING \ | ||
1081 | _paravirt_alt("pushl %%ecx; pushl %%edx;" \ | ||
1082 | "call *%[paravirt_cli_opptr];" \ | ||
1083 | "popl %%edx; popl %%ecx", \ | ||
1084 | "%c[paravirt_cli_type]", "%c[paravirt_clobber]") | ||
1085 | |||
1086 | #define STI_STRING \ | ||
1087 | _paravirt_alt("pushl %%ecx; pushl %%edx;" \ | ||
1088 | "call *%[paravirt_sti_opptr];" \ | ||
1089 | "popl %%edx; popl %%ecx", \ | ||
1090 | "%c[paravirt_sti_type]", "%c[paravirt_clobber]") | ||
1091 | |||
1092 | #define CLI_STI_CLOBBERS , "%eax" | ||
1093 | #define CLI_STI_INPUT_ARGS \ | ||
1094 | , \ | ||
1095 | [paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)), \ | ||
1096 | [paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable), \ | ||
1097 | [paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)), \ | ||
1098 | [paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable), \ | ||
1099 | paravirt_clobber(CLBR_EAX) | ||
1100 | |||
1101 | /* Make sure as little as possible of this mess escapes. */ | 1080 | /* Make sure as little as possible of this mess escapes. */ |
1102 | #undef PARAVIRT_CALL | 1081 | #undef PARAVIRT_CALL |
1103 | #undef __PVOP_CALL | 1082 | #undef __PVOP_CALL |
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index afd4b80ff0ad..97d52b506af8 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <asm/rwlock.h> | 5 | #include <asm/rwlock.h> |
6 | #include <asm/page.h> | 6 | #include <asm/page.h> |
7 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
8 | #include <linux/compiler.h> | ||
8 | 9 | ||
9 | /* | 10 | /* |
10 | * Your basic SMP spinlocks, allowing only a single CPU anywhere | 11 | * Your basic SMP spinlocks, allowing only a single CPU anywhere |
@@ -12,7 +13,8 @@ | |||
12 | * Simple spin lock operations. There are two variants, one clears IRQ's | 13 | * Simple spin lock operations. There are two variants, one clears IRQ's |
13 | * on the local processor, one does not. | 14 | * on the local processor, one does not. |
14 | * | 15 | * |
15 | * We make no fairness assumptions. They have a cost. | 16 | * These are fair FIFO ticket locks, which are currently limited to 256 |
17 | * CPUs. | ||
16 | * | 18 | * |
17 | * (the type definitions are in asm/spinlock_types.h) | 19 | * (the type definitions are in asm/spinlock_types.h) |
18 | */ | 20 | */ |
@@ -42,103 +44,102 @@ typedef int _slock_t; | |||
42 | # define LOCK_PTR_REG "D" | 44 | # define LOCK_PTR_REG "D" |
43 | #endif | 45 | #endif |
44 | 46 | ||
47 | #if (NR_CPUS > 256) | ||
48 | #error spinlock supports a maximum of 256 CPUs | ||
49 | #endif | ||
50 | |||
45 | static inline int __raw_spin_is_locked(raw_spinlock_t *lock) | 51 | static inline int __raw_spin_is_locked(raw_spinlock_t *lock) |
46 | { | 52 | { |
47 | return *(volatile _slock_t *)(&(lock)->slock) <= 0; | 53 | int tmp = *(volatile signed int *)(&(lock)->slock); |
54 | |||
55 | return (((tmp >> 8) & 0xff) != (tmp & 0xff)); | ||
48 | } | 56 | } |
49 | 57 | ||
50 | static inline void __raw_spin_lock(raw_spinlock_t *lock) | 58 | static inline int __raw_spin_is_contended(raw_spinlock_t *lock) |
51 | { | 59 | { |
52 | asm volatile( | 60 | int tmp = *(volatile signed int *)(&(lock)->slock); |
53 | "\n1:\t" | 61 | |
54 | LOCK_PREFIX " ; " LOCK_INS_DEC " %0\n\t" | 62 | return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1; |
55 | "jns 3f\n" | ||
56 | "2:\t" | ||
57 | "rep;nop\n\t" | ||
58 | LOCK_INS_CMP " $0,%0\n\t" | ||
59 | "jle 2b\n\t" | ||
60 | "jmp 1b\n" | ||
61 | "3:\n\t" | ||
62 | : "+m" (lock->slock) : : "memory"); | ||
63 | } | 63 | } |
64 | 64 | ||
65 | /* | 65 | static inline void __raw_spin_lock(raw_spinlock_t *lock) |
66 | * It is easier for the lock validator if interrupts are not re-enabled | ||
67 | * in the middle of a lock-acquire. This is a performance feature anyway | ||
68 | * so we turn it off: | ||
69 | * | ||
70 | * NOTE: there's an irqs-on section here, which normally would have to be | ||
71 | * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant. | ||
72 | */ | ||
73 | #ifndef CONFIG_PROVE_LOCKING | ||
74 | static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, | ||
75 | unsigned long flags) | ||
76 | { | 66 | { |
77 | asm volatile( | 67 | short inc = 0x0100; |
78 | "\n1:\t" | 68 | |
79 | LOCK_PREFIX " ; " LOCK_INS_DEC " %[slock]\n\t" | 69 | /* |
80 | "jns 5f\n" | 70 | * Ticket locks are conceptually two bytes, one indicating the current |
81 | "testl $0x200, %[flags]\n\t" | 71 | * head of the queue, and the other indicating the current tail. The |
82 | "jz 4f\n\t" | 72 | * lock is acquired by atomically noting the tail and incrementing it |
83 | STI_STRING "\n" | 73 | * by one (thus adding ourself to the queue and noting our position), |
84 | "3:\t" | 74 | * then waiting until the head becomes equal to the the initial value |
85 | "rep;nop\n\t" | 75 | * of the tail. |
86 | LOCK_INS_CMP " $0, %[slock]\n\t" | 76 | * |
87 | "jle 3b\n\t" | 77 | * This uses a 16-bit xadd to increment the tail and also load the |
88 | CLI_STRING "\n\t" | 78 | * position of the head, which takes care of memory ordering issues |
79 | * and should be optimal for the uncontended case. Note the tail must | ||
80 | * be in the high byte, otherwise the 16-bit wide increment of the low | ||
81 | * byte would carry up and contaminate the high byte. | ||
82 | */ | ||
83 | |||
84 | __asm__ __volatile__ ( | ||
85 | LOCK_PREFIX "xaddw %w0, %1\n" | ||
86 | "1:\t" | ||
87 | "cmpb %h0, %b0\n\t" | ||
88 | "je 2f\n\t" | ||
89 | "rep ; nop\n\t" | ||
90 | "movb %1, %b0\n\t" | ||
91 | /* don't need lfence here, because loads are in-order */ | ||
89 | "jmp 1b\n" | 92 | "jmp 1b\n" |
90 | "4:\t" | 93 | "2:" |
91 | "rep;nop\n\t" | 94 | :"+Q" (inc), "+m" (lock->slock) |
92 | LOCK_INS_CMP " $0, %[slock]\n\t" | 95 | : |
93 | "jg 1b\n\t" | 96 | :"memory", "cc"); |
94 | "jmp 4b\n" | ||
95 | "5:\n\t" | ||
96 | : [slock] "+m" (lock->slock) | ||
97 | : [flags] "r" ((u32)flags) | ||
98 | CLI_STI_INPUT_ARGS | ||
99 | : "memory" CLI_STI_CLOBBERS); | ||
100 | } | 97 | } |
101 | #endif | 98 | |
99 | #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) | ||
102 | 100 | ||
103 | static inline int __raw_spin_trylock(raw_spinlock_t *lock) | 101 | static inline int __raw_spin_trylock(raw_spinlock_t *lock) |
104 | { | 102 | { |
105 | _slock_t oldval; | 103 | int tmp; |
104 | short new; | ||
106 | 105 | ||
107 | asm volatile( | 106 | asm volatile( |
108 | LOCK_INS_XCH " %0,%1" | 107 | "movw %2,%w0\n\t" |
109 | :"=q" (oldval), "+m" (lock->slock) | 108 | "cmpb %h0,%b0\n\t" |
110 | :"0" (0) : "memory"); | 109 | "jne 1f\n\t" |
111 | 110 | "movw %w0,%w1\n\t" | |
112 | return oldval > 0; | 111 | "incb %h1\n\t" |
112 | "lock ; cmpxchgw %w1,%2\n\t" | ||
113 | "1:" | ||
114 | "sete %b1\n\t" | ||
115 | "movzbl %b1,%0\n\t" | ||
116 | :"=&a" (tmp), "=Q" (new), "+m" (lock->slock) | ||
117 | : | ||
118 | : "memory", "cc"); | ||
119 | |||
120 | return tmp; | ||
113 | } | 121 | } |
114 | 122 | ||
123 | #if defined(CONFIG_X86_32) && \ | ||
124 | (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) | ||
115 | /* | 125 | /* |
116 | * __raw_spin_unlock based on writing $1 to the low byte. | 126 | * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock |
117 | * This method works. Despite all the confusion. | ||
118 | * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there) | ||
119 | * (PPro errata 66, 92) | 127 | * (PPro errata 66, 92) |
120 | */ | 128 | */ |
121 | #if defined(X86_64) || \ | 129 | # define UNLOCK_LOCK_PREFIX LOCK_PREFIX |
122 | (!defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)) | ||
123 | |||
124 | static inline void __raw_spin_unlock(raw_spinlock_t *lock) | ||
125 | { | ||
126 | asm volatile(LOCK_INS_MOV " $1,%0" : "=m" (lock->slock) :: "memory"); | ||
127 | } | ||
128 | |||
129 | #else | 130 | #else |
131 | # define UNLOCK_LOCK_PREFIX | ||
132 | #endif | ||
130 | 133 | ||
131 | static inline void __raw_spin_unlock(raw_spinlock_t *lock) | 134 | static inline void __raw_spin_unlock(raw_spinlock_t *lock) |
132 | { | 135 | { |
133 | unsigned char oldval = 1; | 136 | __asm__ __volatile__( |
134 | 137 | UNLOCK_LOCK_PREFIX "incb %0" | |
135 | asm volatile("xchgb %b0, %1" | 138 | :"+m" (lock->slock) |
136 | : "=q" (oldval), "+m" (lock->slock) | 139 | : |
137 | : "0" (oldval) : "memory"); | 140 | :"memory", "cc"); |
138 | } | 141 | } |
139 | 142 | ||
140 | #endif | ||
141 | |||
142 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) | 143 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) |
143 | { | 144 | { |
144 | while (__raw_spin_is_locked(lock)) | 145 | while (__raw_spin_is_locked(lock)) |
@@ -159,11 +160,19 @@ static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) | |||
159 | * with the high bit (sign) being the "contended" bit. | 160 | * with the high bit (sign) being the "contended" bit. |
160 | */ | 161 | */ |
161 | 162 | ||
163 | /** | ||
164 | * read_can_lock - would read_trylock() succeed? | ||
165 | * @lock: the rwlock in question. | ||
166 | */ | ||
162 | static inline int __raw_read_can_lock(raw_rwlock_t *lock) | 167 | static inline int __raw_read_can_lock(raw_rwlock_t *lock) |
163 | { | 168 | { |
164 | return (int)(lock)->lock > 0; | 169 | return (int)(lock)->lock > 0; |
165 | } | 170 | } |
166 | 171 | ||
172 | /** | ||
173 | * write_can_lock - would write_trylock() succeed? | ||
174 | * @lock: the rwlock in question. | ||
175 | */ | ||
167 | static inline int __raw_write_can_lock(raw_rwlock_t *lock) | 176 | static inline int __raw_write_can_lock(raw_rwlock_t *lock) |
168 | { | 177 | { |
169 | return (lock)->lock == RW_LOCK_BIAS; | 178 | return (lock)->lock == RW_LOCK_BIAS; |
diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h index 4da9345c1500..9029cf78cf5d 100644 --- a/include/asm-x86/spinlock_types.h +++ b/include/asm-x86/spinlock_types.h | |||
@@ -9,7 +9,7 @@ typedef struct { | |||
9 | unsigned int slock; | 9 | unsigned int slock; |
10 | } raw_spinlock_t; | 10 | } raw_spinlock_t; |
11 | 11 | ||
12 | #define __RAW_SPIN_LOCK_UNLOCKED { 1 } | 12 | #define __RAW_SPIN_LOCK_UNLOCKED { 0 } |
13 | 13 | ||
14 | typedef struct { | 14 | typedef struct { |
15 | unsigned int lock; | 15 | unsigned int lock; |