diff options
Diffstat (limited to 'kernel/smp.c')
| -rw-r--r-- | kernel/smp.c | 432 |
1 files changed, 236 insertions, 196 deletions
diff --git a/kernel/smp.c b/kernel/smp.c index bbedbb7efe32..858baac568ee 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
| @@ -2,40 +2,82 @@ | |||
| 2 | * Generic helpers for smp ipi calls | 2 | * Generic helpers for smp ipi calls |
| 3 | * | 3 | * |
| 4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 | 4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 |
| 5 | * | ||
| 6 | */ | 5 | */ |
| 7 | #include <linux/init.h> | ||
| 8 | #include <linux/module.h> | ||
| 9 | #include <linux/percpu.h> | ||
| 10 | #include <linux/rcupdate.h> | 6 | #include <linux/rcupdate.h> |
| 11 | #include <linux/rculist.h> | 7 | #include <linux/rculist.h> |
| 8 | #include <linux/kernel.h> | ||
| 9 | #include <linux/module.h> | ||
| 10 | #include <linux/percpu.h> | ||
| 11 | #include <linux/init.h> | ||
| 12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
| 13 | #include <linux/cpu.h> | ||
| 13 | 14 | ||
| 14 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); | 15 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); |
| 15 | static LIST_HEAD(call_function_queue); | 16 | |
| 16 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock); | 17 | static struct { |
| 18 | struct list_head queue; | ||
| 19 | spinlock_t lock; | ||
| 20 | } call_function __cacheline_aligned_in_smp = | ||
| 21 | { | ||
| 22 | .queue = LIST_HEAD_INIT(call_function.queue), | ||
| 23 | .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), | ||
| 24 | }; | ||
| 17 | 25 | ||
| 18 | enum { | 26 | enum { |
| 19 | CSD_FLAG_WAIT = 0x01, | 27 | CSD_FLAG_LOCK = 0x01, |
| 20 | CSD_FLAG_ALLOC = 0x02, | ||
| 21 | CSD_FLAG_LOCK = 0x04, | ||
| 22 | }; | 28 | }; |
| 23 | 29 | ||
| 24 | struct call_function_data { | 30 | struct call_function_data { |
| 25 | struct call_single_data csd; | 31 | struct call_single_data csd; |
| 26 | spinlock_t lock; | 32 | spinlock_t lock; |
| 27 | unsigned int refs; | 33 | unsigned int refs; |
| 28 | struct rcu_head rcu_head; | 34 | cpumask_var_t cpumask; |
| 29 | unsigned long cpumask_bits[]; | ||
| 30 | }; | 35 | }; |
| 31 | 36 | ||
| 32 | struct call_single_queue { | 37 | struct call_single_queue { |
| 33 | struct list_head list; | 38 | struct list_head list; |
| 34 | spinlock_t lock; | 39 | spinlock_t lock; |
| 40 | }; | ||
| 41 | |||
| 42 | static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { | ||
| 43 | .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), | ||
| 44 | }; | ||
| 45 | |||
| 46 | static int | ||
| 47 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
| 48 | { | ||
| 49 | long cpu = (long)hcpu; | ||
| 50 | struct call_function_data *cfd = &per_cpu(cfd_data, cpu); | ||
| 51 | |||
| 52 | switch (action) { | ||
| 53 | case CPU_UP_PREPARE: | ||
| 54 | case CPU_UP_PREPARE_FROZEN: | ||
| 55 | if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, | ||
| 56 | cpu_to_node(cpu))) | ||
| 57 | return NOTIFY_BAD; | ||
| 58 | break; | ||
| 59 | |||
| 60 | #ifdef CONFIG_CPU_HOTPLUG | ||
| 61 | case CPU_UP_CANCELED: | ||
| 62 | case CPU_UP_CANCELED_FROZEN: | ||
| 63 | |||
| 64 | case CPU_DEAD: | ||
| 65 | case CPU_DEAD_FROZEN: | ||
| 66 | free_cpumask_var(cfd->cpumask); | ||
| 67 | break; | ||
| 68 | #endif | ||
| 69 | }; | ||
| 70 | |||
| 71 | return NOTIFY_OK; | ||
| 72 | } | ||
| 73 | |||
| 74 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | ||
| 75 | .notifier_call = hotplug_cfd, | ||
| 35 | }; | 76 | }; |
| 36 | 77 | ||
| 37 | static int __cpuinit init_call_single_data(void) | 78 | static int __cpuinit init_call_single_data(void) |
| 38 | { | 79 | { |
| 80 | void *cpu = (void *)(long)smp_processor_id(); | ||
| 39 | int i; | 81 | int i; |
| 40 | 82 | ||
| 41 | for_each_possible_cpu(i) { | 83 | for_each_possible_cpu(i) { |
| @@ -44,29 +86,63 @@ static int __cpuinit init_call_single_data(void) | |||
| 44 | spin_lock_init(&q->lock); | 86 | spin_lock_init(&q->lock); |
| 45 | INIT_LIST_HEAD(&q->list); | 87 | INIT_LIST_HEAD(&q->list); |
| 46 | } | 88 | } |
| 89 | |||
| 90 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); | ||
| 91 | register_cpu_notifier(&hotplug_cfd_notifier); | ||
| 92 | |||
| 47 | return 0; | 93 | return 0; |
| 48 | } | 94 | } |
| 49 | early_initcall(init_call_single_data); | 95 | early_initcall(init_call_single_data); |
| 50 | 96 | ||
| 51 | static void csd_flag_wait(struct call_single_data *data) | 97 | /* |
| 98 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources | ||
| 99 | * | ||
| 100 | * For non-synchronous ipi calls the csd can still be in use by the | ||
| 101 | * previous function call. For multi-cpu calls its even more interesting | ||
| 102 | * as we'll have to ensure no other cpu is observing our csd. | ||
| 103 | */ | ||
| 104 | static void csd_lock_wait(struct call_single_data *data) | ||
| 52 | { | 105 | { |
| 53 | /* Wait for response */ | 106 | while (data->flags & CSD_FLAG_LOCK) |
| 54 | do { | ||
| 55 | if (!(data->flags & CSD_FLAG_WAIT)) | ||
| 56 | break; | ||
| 57 | cpu_relax(); | 107 | cpu_relax(); |
| 58 | } while (1); | 108 | } |
| 109 | |||
| 110 | static void csd_lock(struct call_single_data *data) | ||
| 111 | { | ||
| 112 | csd_lock_wait(data); | ||
| 113 | data->flags = CSD_FLAG_LOCK; | ||
| 114 | |||
| 115 | /* | ||
| 116 | * prevent CPU from reordering the above assignment | ||
| 117 | * to ->flags with any subsequent assignments to other | ||
| 118 | * fields of the specified call_single_data structure: | ||
| 119 | */ | ||
| 120 | smp_mb(); | ||
| 121 | } | ||
| 122 | |||
| 123 | static void csd_unlock(struct call_single_data *data) | ||
| 124 | { | ||
| 125 | WARN_ON(!(data->flags & CSD_FLAG_LOCK)); | ||
| 126 | |||
| 127 | /* | ||
| 128 | * ensure we're all done before releasing data: | ||
| 129 | */ | ||
| 130 | smp_mb(); | ||
| 131 | |||
| 132 | data->flags &= ~CSD_FLAG_LOCK; | ||
| 59 | } | 133 | } |
| 60 | 134 | ||
| 61 | /* | 135 | /* |
| 62 | * Insert a previously allocated call_single_data element for execution | 136 | * Insert a previously allocated call_single_data element |
| 63 | * on the given CPU. data must already have ->func, ->info, and ->flags set. | 137 | * for execution on the given CPU. data must already have |
| 138 | * ->func, ->info, and ->flags set. | ||
| 64 | */ | 139 | */ |
| 65 | static void generic_exec_single(int cpu, struct call_single_data *data) | 140 | static |
| 141 | void generic_exec_single(int cpu, struct call_single_data *data, int wait) | ||
| 66 | { | 142 | { |
| 67 | struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); | 143 | struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); |
| 68 | int wait = data->flags & CSD_FLAG_WAIT, ipi; | ||
| 69 | unsigned long flags; | 144 | unsigned long flags; |
| 145 | int ipi; | ||
| 70 | 146 | ||
| 71 | spin_lock_irqsave(&dst->lock, flags); | 147 | spin_lock_irqsave(&dst->lock, flags); |
| 72 | ipi = list_empty(&dst->list); | 148 | ipi = list_empty(&dst->list); |
| @@ -74,24 +150,21 @@ static void generic_exec_single(int cpu, struct call_single_data *data) | |||
| 74 | spin_unlock_irqrestore(&dst->lock, flags); | 150 | spin_unlock_irqrestore(&dst->lock, flags); |
| 75 | 151 | ||
| 76 | /* | 152 | /* |
| 77 | * Make the list addition visible before sending the ipi. | 153 | * The list addition should be visible before sending the IPI |
| 154 | * handler locks the list to pull the entry off it because of | ||
| 155 | * normal cache coherency rules implied by spinlocks. | ||
| 156 | * | ||
| 157 | * If IPIs can go out of order to the cache coherency protocol | ||
| 158 | * in an architecture, sufficient synchronisation should be added | ||
| 159 | * to arch code to make it appear to obey cache coherency WRT | ||
| 160 | * locking and barrier primitives. Generic code isn't really | ||
| 161 | * equipped to do the right thing... | ||
| 78 | */ | 162 | */ |
| 79 | smp_mb(); | ||
| 80 | |||
| 81 | if (ipi) | 163 | if (ipi) |
| 82 | arch_send_call_function_single_ipi(cpu); | 164 | arch_send_call_function_single_ipi(cpu); |
| 83 | 165 | ||
| 84 | if (wait) | 166 | if (wait) |
| 85 | csd_flag_wait(data); | 167 | csd_lock_wait(data); |
| 86 | } | ||
| 87 | |||
| 88 | static void rcu_free_call_data(struct rcu_head *head) | ||
| 89 | { | ||
| 90 | struct call_function_data *data; | ||
| 91 | |||
| 92 | data = container_of(head, struct call_function_data, rcu_head); | ||
| 93 | |||
| 94 | kfree(data); | ||
| 95 | } | 168 | } |
| 96 | 169 | ||
| 97 | /* | 170 | /* |
| @@ -104,99 +177,83 @@ void generic_smp_call_function_interrupt(void) | |||
| 104 | int cpu = get_cpu(); | 177 | int cpu = get_cpu(); |
| 105 | 178 | ||
| 106 | /* | 179 | /* |
| 107 | * It's ok to use list_for_each_rcu() here even though we may delete | 180 | * Ensure entry is visible on call_function_queue after we have |
| 108 | * 'pos', since list_del_rcu() doesn't clear ->next | 181 | * entered the IPI. See comment in smp_call_function_many. |
| 182 | * If we don't have this, then we may miss an entry on the list | ||
| 183 | * and never get another IPI to process it. | ||
| 184 | */ | ||
| 185 | smp_mb(); | ||
| 186 | |||
| 187 | /* | ||
| 188 | * It's ok to use list_for_each_rcu() here even though we may | ||
| 189 | * delete 'pos', since list_del_rcu() doesn't clear ->next | ||
| 109 | */ | 190 | */ |
| 110 | rcu_read_lock(); | 191 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
| 111 | list_for_each_entry_rcu(data, &call_function_queue, csd.list) { | ||
| 112 | int refs; | 192 | int refs; |
| 113 | 193 | ||
| 114 | if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits))) | 194 | spin_lock(&data->lock); |
| 195 | if (!cpumask_test_cpu(cpu, data->cpumask)) { | ||
| 196 | spin_unlock(&data->lock); | ||
| 115 | continue; | 197 | continue; |
| 198 | } | ||
| 199 | cpumask_clear_cpu(cpu, data->cpumask); | ||
| 200 | spin_unlock(&data->lock); | ||
| 116 | 201 | ||
| 117 | data->csd.func(data->csd.info); | 202 | data->csd.func(data->csd.info); |
| 118 | 203 | ||
| 119 | spin_lock(&data->lock); | 204 | spin_lock(&data->lock); |
| 120 | cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits)); | ||
| 121 | WARN_ON(data->refs == 0); | 205 | WARN_ON(data->refs == 0); |
| 122 | data->refs--; | 206 | refs = --data->refs; |
| 123 | refs = data->refs; | 207 | if (!refs) { |
| 208 | spin_lock(&call_function.lock); | ||
| 209 | list_del_rcu(&data->csd.list); | ||
| 210 | spin_unlock(&call_function.lock); | ||
| 211 | } | ||
| 124 | spin_unlock(&data->lock); | 212 | spin_unlock(&data->lock); |
| 125 | 213 | ||
| 126 | if (refs) | 214 | if (refs) |
| 127 | continue; | 215 | continue; |
| 128 | 216 | ||
| 129 | spin_lock(&call_function_lock); | 217 | csd_unlock(&data->csd); |
| 130 | list_del_rcu(&data->csd.list); | ||
| 131 | spin_unlock(&call_function_lock); | ||
| 132 | |||
| 133 | if (data->csd.flags & CSD_FLAG_WAIT) { | ||
| 134 | /* | ||
| 135 | * serialize stores to data with the flag clear | ||
| 136 | * and wakeup | ||
| 137 | */ | ||
| 138 | smp_wmb(); | ||
| 139 | data->csd.flags &= ~CSD_FLAG_WAIT; | ||
| 140 | } | ||
| 141 | if (data->csd.flags & CSD_FLAG_ALLOC) | ||
| 142 | call_rcu(&data->rcu_head, rcu_free_call_data); | ||
| 143 | } | 218 | } |
| 144 | rcu_read_unlock(); | ||
| 145 | 219 | ||
| 146 | put_cpu(); | 220 | put_cpu(); |
| 147 | } | 221 | } |
| 148 | 222 | ||
| 149 | /* | 223 | /* |
| 150 | * Invoked by arch to handle an IPI for call function single. Must be called | 224 | * Invoked by arch to handle an IPI for call function single. Must be |
| 151 | * from the arch with interrupts disabled. | 225 | * called from the arch with interrupts disabled. |
| 152 | */ | 226 | */ |
| 153 | void generic_smp_call_function_single_interrupt(void) | 227 | void generic_smp_call_function_single_interrupt(void) |
| 154 | { | 228 | { |
| 155 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); | 229 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); |
| 230 | unsigned int data_flags; | ||
| 156 | LIST_HEAD(list); | 231 | LIST_HEAD(list); |
| 157 | 232 | ||
| 158 | /* | 233 | spin_lock(&q->lock); |
| 159 | * Need to see other stores to list head for checking whether | 234 | list_replace_init(&q->list, &list); |
| 160 | * list is empty without holding q->lock | 235 | spin_unlock(&q->lock); |
| 161 | */ | 236 | |
| 162 | smp_read_barrier_depends(); | 237 | while (!list_empty(&list)) { |
| 163 | while (!list_empty(&q->list)) { | 238 | struct call_single_data *data; |
| 164 | unsigned int data_flags; | 239 | |
| 165 | 240 | data = list_entry(list.next, struct call_single_data, list); | |
| 166 | spin_lock(&q->lock); | 241 | list_del(&data->list); |
| 167 | list_replace_init(&q->list, &list); | 242 | |
| 168 | spin_unlock(&q->lock); | 243 | /* |
| 169 | 244 | * 'data' can be invalid after this call if flags == 0 | |
| 170 | while (!list_empty(&list)) { | 245 | * (when called through generic_exec_single()), |
| 171 | struct call_single_data *data; | 246 | * so save them away before making the call: |
| 172 | 247 | */ | |
| 173 | data = list_entry(list.next, struct call_single_data, | 248 | data_flags = data->flags; |
| 174 | list); | 249 | |
| 175 | list_del(&data->list); | 250 | data->func(data->info); |
| 176 | 251 | ||
| 177 | /* | ||
| 178 | * 'data' can be invalid after this call if | ||
| 179 | * flags == 0 (when called through | ||
| 180 | * generic_exec_single(), so save them away before | ||
| 181 | * making the call. | ||
| 182 | */ | ||
| 183 | data_flags = data->flags; | ||
| 184 | |||
| 185 | data->func(data->info); | ||
| 186 | |||
| 187 | if (data_flags & CSD_FLAG_WAIT) { | ||
| 188 | smp_wmb(); | ||
| 189 | data->flags &= ~CSD_FLAG_WAIT; | ||
| 190 | } else if (data_flags & CSD_FLAG_LOCK) { | ||
| 191 | smp_wmb(); | ||
| 192 | data->flags &= ~CSD_FLAG_LOCK; | ||
| 193 | } else if (data_flags & CSD_FLAG_ALLOC) | ||
| 194 | kfree(data); | ||
| 195 | } | ||
| 196 | /* | 252 | /* |
| 197 | * See comment on outer loop | 253 | * Unlocked CSDs are valid through generic_exec_single(): |
| 198 | */ | 254 | */ |
| 199 | smp_read_barrier_depends(); | 255 | if (data_flags & CSD_FLAG_LOCK) |
| 256 | csd_unlock(data); | ||
| 200 | } | 257 | } |
| 201 | } | 258 | } |
| 202 | 259 | ||
| @@ -215,65 +272,45 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data); | |||
| 215 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 272 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
| 216 | int wait) | 273 | int wait) |
| 217 | { | 274 | { |
| 218 | struct call_single_data d; | 275 | struct call_single_data d = { |
| 276 | .flags = 0, | ||
| 277 | }; | ||
| 219 | unsigned long flags; | 278 | unsigned long flags; |
| 220 | /* prevent preemption and reschedule on another processor, | 279 | int this_cpu; |
| 221 | as well as CPU removal */ | ||
| 222 | int me = get_cpu(); | ||
| 223 | int err = 0; | 280 | int err = 0; |
| 224 | 281 | ||
| 282 | /* | ||
| 283 | * prevent preemption and reschedule on another processor, | ||
| 284 | * as well as CPU removal | ||
| 285 | */ | ||
| 286 | this_cpu = get_cpu(); | ||
| 287 | |||
| 225 | /* Can deadlock when called with interrupts disabled */ | 288 | /* Can deadlock when called with interrupts disabled */ |
| 226 | WARN_ON(irqs_disabled()); | 289 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); |
| 227 | 290 | ||
| 228 | if (cpu == me) { | 291 | if (cpu == this_cpu) { |
| 229 | local_irq_save(flags); | 292 | local_irq_save(flags); |
| 230 | func(info); | 293 | func(info); |
| 231 | local_irq_restore(flags); | 294 | local_irq_restore(flags); |
| 232 | } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { | 295 | } else { |
| 233 | struct call_single_data *data; | 296 | if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { |
| 297 | struct call_single_data *data = &d; | ||
| 298 | |||
| 299 | if (!wait) | ||
| 300 | data = &__get_cpu_var(csd_data); | ||
| 234 | 301 | ||
| 235 | if (!wait) { | 302 | csd_lock(data); |
| 236 | /* | 303 | |
| 237 | * We are calling a function on a single CPU | 304 | data->func = func; |
| 238 | * and we are not going to wait for it to finish. | 305 | data->info = info; |
| 239 | * We first try to allocate the data, but if we | 306 | generic_exec_single(cpu, data, wait); |
| 240 | * fail, we fall back to use a per cpu data to pass | ||
| 241 | * the information to that CPU. Since all callers | ||
| 242 | * of this code will use the same data, we must | ||
| 243 | * synchronize the callers to prevent a new caller | ||
| 244 | * from corrupting the data before the callee | ||
| 245 | * can access it. | ||
| 246 | * | ||
| 247 | * The CSD_FLAG_LOCK is used to let us know when | ||
| 248 | * the IPI handler is done with the data. | ||
| 249 | * The first caller will set it, and the callee | ||
| 250 | * will clear it. The next caller must wait for | ||
| 251 | * it to clear before we set it again. This | ||
| 252 | * will make sure the callee is done with the | ||
| 253 | * data before a new caller will use it. | ||
| 254 | */ | ||
| 255 | data = kmalloc(sizeof(*data), GFP_ATOMIC); | ||
| 256 | if (data) | ||
| 257 | data->flags = CSD_FLAG_ALLOC; | ||
| 258 | else { | ||
| 259 | data = &per_cpu(csd_data, me); | ||
| 260 | while (data->flags & CSD_FLAG_LOCK) | ||
| 261 | cpu_relax(); | ||
| 262 | data->flags = CSD_FLAG_LOCK; | ||
| 263 | } | ||
| 264 | } else { | 307 | } else { |
| 265 | data = &d; | 308 | err = -ENXIO; /* CPU not online */ |
| 266 | data->flags = CSD_FLAG_WAIT; | ||
| 267 | } | 309 | } |
| 268 | |||
| 269 | data->func = func; | ||
| 270 | data->info = info; | ||
| 271 | generic_exec_single(cpu, data); | ||
| 272 | } else { | ||
| 273 | err = -ENXIO; /* CPU not online */ | ||
| 274 | } | 310 | } |
| 275 | 311 | ||
| 276 | put_cpu(); | 312 | put_cpu(); |
| 313 | |||
| 277 | return err; | 314 | return err; |
| 278 | } | 315 | } |
| 279 | EXPORT_SYMBOL(smp_call_function_single); | 316 | EXPORT_SYMBOL(smp_call_function_single); |
| @@ -283,23 +320,26 @@ EXPORT_SYMBOL(smp_call_function_single); | |||
| 283 | * @cpu: The CPU to run on. | 320 | * @cpu: The CPU to run on. |
| 284 | * @data: Pre-allocated and setup data structure | 321 | * @data: Pre-allocated and setup data structure |
| 285 | * | 322 | * |
| 286 | * Like smp_call_function_single(), but allow caller to pass in a pre-allocated | 323 | * Like smp_call_function_single(), but allow caller to pass in a |
| 287 | * data structure. Useful for embedding @data inside other structures, for | 324 | * pre-allocated data structure. Useful for embedding @data inside |
| 288 | * instance. | 325 | * other structures, for instance. |
| 289 | * | ||
| 290 | */ | 326 | */ |
| 291 | void __smp_call_function_single(int cpu, struct call_single_data *data) | 327 | void __smp_call_function_single(int cpu, struct call_single_data *data, |
| 328 | int wait) | ||
| 292 | { | 329 | { |
| 330 | csd_lock(data); | ||
| 331 | |||
| 293 | /* Can deadlock when called with interrupts disabled */ | 332 | /* Can deadlock when called with interrupts disabled */ |
| 294 | WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled()); | 333 | WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress); |
| 295 | 334 | ||
| 296 | generic_exec_single(cpu, data); | 335 | generic_exec_single(cpu, data, wait); |
| 297 | } | 336 | } |
| 298 | 337 | ||
| 299 | /* FIXME: Shim for archs using old arch_send_call_function_ipi API. */ | 338 | /* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ |
| 339 | |||
| 300 | #ifndef arch_send_call_function_ipi_mask | 340 | #ifndef arch_send_call_function_ipi_mask |
| 301 | #define arch_send_call_function_ipi_mask(maskp) \ | 341 | # define arch_send_call_function_ipi_mask(maskp) \ |
| 302 | arch_send_call_function_ipi(*(maskp)) | 342 | arch_send_call_function_ipi(*(maskp)) |
| 303 | #endif | 343 | #endif |
| 304 | 344 | ||
| 305 | /** | 345 | /** |
| @@ -307,7 +347,8 @@ void __smp_call_function_single(int cpu, struct call_single_data *data) | |||
| 307 | * @mask: The set of cpus to run on (only runs on online subset). | 347 | * @mask: The set of cpus to run on (only runs on online subset). |
| 308 | * @func: The function to run. This must be fast and non-blocking. | 348 | * @func: The function to run. This must be fast and non-blocking. |
| 309 | * @info: An arbitrary pointer to pass to the function. | 349 | * @info: An arbitrary pointer to pass to the function. |
| 310 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 350 | * @wait: If true, wait (atomically) until function has completed |
| 351 | * on other CPUs. | ||
| 311 | * | 352 | * |
| 312 | * If @wait is true, then returns once @func has returned. Note that @wait | 353 | * If @wait is true, then returns once @func has returned. Note that @wait |
| 313 | * will be implicitly turned on in case of allocation failures, since | 354 | * will be implicitly turned on in case of allocation failures, since |
| @@ -318,27 +359,27 @@ void __smp_call_function_single(int cpu, struct call_single_data *data) | |||
| 318 | * must be disabled when calling this function. | 359 | * must be disabled when calling this function. |
| 319 | */ | 360 | */ |
| 320 | void smp_call_function_many(const struct cpumask *mask, | 361 | void smp_call_function_many(const struct cpumask *mask, |
| 321 | void (*func)(void *), void *info, | 362 | void (*func)(void *), void *info, bool wait) |
| 322 | bool wait) | ||
| 323 | { | 363 | { |
| 324 | struct call_function_data *data; | 364 | struct call_function_data *data; |
| 325 | unsigned long flags; | 365 | unsigned long flags; |
| 326 | int cpu, next_cpu; | 366 | int cpu, next_cpu, this_cpu = smp_processor_id(); |
| 327 | 367 | ||
| 328 | /* Can deadlock when called with interrupts disabled */ | 368 | /* Can deadlock when called with interrupts disabled */ |
| 329 | WARN_ON(irqs_disabled()); | 369 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); |
| 330 | 370 | ||
| 331 | /* So, what's a CPU they want? Ignoring this one. */ | 371 | /* So, what's a CPU they want? Ignoring this one. */ |
| 332 | cpu = cpumask_first_and(mask, cpu_online_mask); | 372 | cpu = cpumask_first_and(mask, cpu_online_mask); |
| 333 | if (cpu == smp_processor_id()) | 373 | if (cpu == this_cpu) |
| 334 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 374 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
| 375 | |||
| 335 | /* No online cpus? We're done. */ | 376 | /* No online cpus? We're done. */ |
| 336 | if (cpu >= nr_cpu_ids) | 377 | if (cpu >= nr_cpu_ids) |
| 337 | return; | 378 | return; |
| 338 | 379 | ||
| 339 | /* Do we have another CPU which isn't us? */ | 380 | /* Do we have another CPU which isn't us? */ |
| 340 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 381 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
| 341 | if (next_cpu == smp_processor_id()) | 382 | if (next_cpu == this_cpu) |
| 342 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); | 383 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); |
| 343 | 384 | ||
| 344 | /* Fastpath: do that cpu by itself. */ | 385 | /* Fastpath: do that cpu by itself. */ |
| @@ -347,43 +388,40 @@ void smp_call_function_many(const struct cpumask *mask, | |||
| 347 | return; | 388 | return; |
| 348 | } | 389 | } |
| 349 | 390 | ||
| 350 | data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC); | 391 | data = &__get_cpu_var(cfd_data); |
| 351 | if (unlikely(!data)) { | 392 | csd_lock(&data->csd); |
| 352 | /* Slow path. */ | ||
| 353 | for_each_online_cpu(cpu) { | ||
| 354 | if (cpu == smp_processor_id()) | ||
| 355 | continue; | ||
| 356 | if (cpumask_test_cpu(cpu, mask)) | ||
| 357 | smp_call_function_single(cpu, func, info, wait); | ||
| 358 | } | ||
| 359 | return; | ||
| 360 | } | ||
| 361 | 393 | ||
| 362 | spin_lock_init(&data->lock); | 394 | spin_lock_irqsave(&data->lock, flags); |
| 363 | data->csd.flags = CSD_FLAG_ALLOC; | ||
| 364 | if (wait) | ||
| 365 | data->csd.flags |= CSD_FLAG_WAIT; | ||
| 366 | data->csd.func = func; | 395 | data->csd.func = func; |
| 367 | data->csd.info = info; | 396 | data->csd.info = info; |
| 368 | cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask); | 397 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
| 369 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits)); | 398 | cpumask_clear_cpu(this_cpu, data->cpumask); |
| 370 | data->refs = cpumask_weight(to_cpumask(data->cpumask_bits)); | 399 | data->refs = cpumask_weight(data->cpumask); |
| 371 | 400 | ||
| 372 | spin_lock_irqsave(&call_function_lock, flags); | 401 | spin_lock(&call_function.lock); |
| 373 | list_add_tail_rcu(&data->csd.list, &call_function_queue); | 402 | /* |
| 374 | spin_unlock_irqrestore(&call_function_lock, flags); | 403 | * Place entry at the _HEAD_ of the list, so that any cpu still |
| 404 | * observing the entry in generic_smp_call_function_interrupt() | ||
| 405 | * will not miss any other list entries: | ||
| 406 | */ | ||
| 407 | list_add_rcu(&data->csd.list, &call_function.queue); | ||
| 408 | spin_unlock(&call_function.lock); | ||
| 409 | |||
| 410 | spin_unlock_irqrestore(&data->lock, flags); | ||
| 375 | 411 | ||
| 376 | /* | 412 | /* |
| 377 | * Make the list addition visible before sending the ipi. | 413 | * Make the list addition visible before sending the ipi. |
| 414 | * (IPIs must obey or appear to obey normal Linux cache | ||
| 415 | * coherency rules -- see comment in generic_exec_single). | ||
| 378 | */ | 416 | */ |
| 379 | smp_mb(); | 417 | smp_mb(); |
| 380 | 418 | ||
| 381 | /* Send a message to all CPUs in the map */ | 419 | /* Send a message to all CPUs in the map */ |
| 382 | arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits)); | 420 | arch_send_call_function_ipi_mask(data->cpumask); |
| 383 | 421 | ||
| 384 | /* optionally wait for the CPUs to complete */ | 422 | /* Optionally wait for the CPUs to complete */ |
| 385 | if (wait) | 423 | if (wait) |
| 386 | csd_flag_wait(&data->csd); | 424 | csd_lock_wait(&data->csd); |
| 387 | } | 425 | } |
| 388 | EXPORT_SYMBOL(smp_call_function_many); | 426 | EXPORT_SYMBOL(smp_call_function_many); |
| 389 | 427 | ||
| @@ -391,7 +429,8 @@ EXPORT_SYMBOL(smp_call_function_many); | |||
| 391 | * smp_call_function(): Run a function on all other CPUs. | 429 | * smp_call_function(): Run a function on all other CPUs. |
| 392 | * @func: The function to run. This must be fast and non-blocking. | 430 | * @func: The function to run. This must be fast and non-blocking. |
| 393 | * @info: An arbitrary pointer to pass to the function. | 431 | * @info: An arbitrary pointer to pass to the function. |
| 394 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 432 | * @wait: If true, wait (atomically) until function has completed |
| 433 | * on other CPUs. | ||
| 395 | * | 434 | * |
| 396 | * Returns 0. | 435 | * Returns 0. |
| 397 | * | 436 | * |
| @@ -407,26 +446,27 @@ int smp_call_function(void (*func)(void *), void *info, int wait) | |||
| 407 | preempt_disable(); | 446 | preempt_disable(); |
| 408 | smp_call_function_many(cpu_online_mask, func, info, wait); | 447 | smp_call_function_many(cpu_online_mask, func, info, wait); |
| 409 | preempt_enable(); | 448 | preempt_enable(); |
| 449 | |||
| 410 | return 0; | 450 | return 0; |
| 411 | } | 451 | } |
| 412 | EXPORT_SYMBOL(smp_call_function); | 452 | EXPORT_SYMBOL(smp_call_function); |
| 413 | 453 | ||
| 414 | void ipi_call_lock(void) | 454 | void ipi_call_lock(void) |
| 415 | { | 455 | { |
| 416 | spin_lock(&call_function_lock); | 456 | spin_lock(&call_function.lock); |
| 417 | } | 457 | } |
| 418 | 458 | ||
| 419 | void ipi_call_unlock(void) | 459 | void ipi_call_unlock(void) |
| 420 | { | 460 | { |
| 421 | spin_unlock(&call_function_lock); | 461 | spin_unlock(&call_function.lock); |
| 422 | } | 462 | } |
| 423 | 463 | ||
| 424 | void ipi_call_lock_irq(void) | 464 | void ipi_call_lock_irq(void) |
| 425 | { | 465 | { |
| 426 | spin_lock_irq(&call_function_lock); | 466 | spin_lock_irq(&call_function.lock); |
| 427 | } | 467 | } |
| 428 | 468 | ||
| 429 | void ipi_call_unlock_irq(void) | 469 | void ipi_call_unlock_irq(void) |
| 430 | { | 470 | { |
| 431 | spin_unlock_irq(&call_function_lock); | 471 | spin_unlock_irq(&call_function.lock); |
| 432 | } | 472 | } |
