Merge branch 'cpus4096-for-linus-3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'cpus4096-for-linus-3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (77 commits) x86: setup_per_cpu_areas() cleanup cpumask: fix compile error when CONFIG_NR_CPUS is not defined cpumask: use alloc_cpumask_var_node where appropriate cpumask: convert shared_cpu_map in acpi_processor* structs to cpumask_var_t x86: use cpumask_var_t in acpi/boot.c x86: cleanup some remaining usages of NR_CPUS where s/b nr_cpu_ids sched: put back some stack hog changes that were undone in kernel/sched.c x86: enable cpus display of kernel_max and offlined cpus ia64: cpumask fix for is_affinity_mask_valid() cpumask: convert RCU implementations, fix xtensa: define __fls mn10300: define __fls m32r: define __fls h8300: define __fls frv: define __fls cris: define __fls cpumask: CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS cpumask: zero extra bits in alloc_cpumask_var_node cpumask: replace for_each_cpu_mask_nr with for_each_cpu in kernel/time/ cpumask: convert mm/ ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2009-01-03 15:04:39 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-01-03 15:04:39 -0500
commit: 7d3b56ba37a95f1f370f50258ed3954c304c524b (patch)
tree: 86102527b92f02450aa245f084ffb491c18d2e0a /kernel/smp.c
parent: 269b012321f2f1f8e4648c43a93bf432b42c6668 (diff)
parent: ab14398abd195af91a744c320a52a1bce814dd1e (diff)
1 files changed, 55 insertions, 90 deletions
diff --git a/kernel/smp.c b/kernel/smp.c
index 75c8dde58c55..5cfa0e5e3e88 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -24,8 +24,8 @@ struct call_function_data {
        struct call_single_data csd;
        spinlock_t lock;
        unsigned int refs;
-        cpumask_t cpumask;
        struct rcu_head rcu_head;
+        unsigned long cpumask_bits[];
 };
 struct call_single_queue {
@@ -110,13 +110,13 @@ void generic_smp_call_function_interrupt(void)
        list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
                int refs;
-                if (!cpu_isset(cpu, data->cpumask))
+                if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
                        continue;
                data->csd.func(data->csd.info);
                spin_lock(&data->lock);
-                cpu_clear(cpu, data->cpumask);
+                cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
                WARN_ON(data->refs == 0);
                data->refs--;
                refs = data->refs;
@@ -223,7 +223,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
                local_irq_save(flags);
                func(info);
                local_irq_restore(flags);
-        } else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) {
+        } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
                struct call_single_data *data = NULL;
                if (!wait) {
@@ -266,51 +266,19 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
        generic_exec_single(cpu, data);
 }
-/* Dummy function */
+/* FIXME: Shim for archs using old arch_send_call_function_ipi API. */
-static void quiesce_dummy(void *unused)
+#ifndef arch_send_call_function_ipi_mask
-{
+#define arch_send_call_function_ipi_mask(maskp) \
-}
+        arch_send_call_function_ipi(*(maskp))
+#endif
-/*
- * Ensure stack based data used in call function mask is safe to free.
- *
- * This is needed by smp_call_function_mask when using on-stack data, because
- * a single call function queue is shared by all CPUs, and any CPU may pick up
- * the data item on the queue at any time before it is deleted. So we need to
- * ensure that all CPUs have transitioned through a quiescent state after
- * this call.
- *
- * This is a very slow function, implemented by sending synchronous IPIs to
- * all possible CPUs. For this reason, we have to alloc data rather than use
- * stack based data even in the case of synchronous calls. The stack based
- * data is then just used for deadlock/oom fallback which will be very rare.
- *
- * If a faster scheme can be made, we could go back to preferring stack based
- * data -- the data allocation/free is non-zero cost.
- */
-static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
-{
-        struct call_single_data data;
-        int cpu;
-        data.func = quiesce_dummy;
-        data.info = NULL;
-        for_each_cpu_mask(cpu, mask) {
-                data.flags = CSD_FLAG_WAIT;
-                generic_exec_single(cpu, &data);
-        }
-}
 /**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * smp_call_function_many(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.
+ * @mask: The set of cpus to run on (only runs on online subset).
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait (atomically) until function has completed on other CPUs.
 *
- * Returns 0 on success, else a negative status code.
- *
 * If @wait is true, then returns once @func has returned. Note that @wait
 * will be implicitly turned on in case of allocation failures, since
 * we fall back to on-stack allocation.
@@ -319,53 +287,57 @@ static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
 * hardware interrupt handler or from a bottom half handler. Preemption
 * must be disabled when calling this function.
 */
-int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
+void smp_call_function_many(const struct cpumask *mask,
-                           int wait)
+                            void (*func)(void *), void *info,
+                            bool wait)
 {
-        struct call_function_data d;
+        struct call_function_data *data;
-        struct call_function_data *data = NULL;
-        cpumask_t allbutself;
        unsigned long flags;
-        int cpu, num_cpus;
+        int cpu, next_cpu;
-        int slowpath = 0;
        /* Can deadlock when called with interrupts disabled */
        WARN_ON(irqs_disabled());
-        cpu = smp_processor_id();
+        /* So, what's a CPU they want?  Ignoring this one. */
-        allbutself = cpu_online_map;
+        cpu = cpumask_first_and(mask, cpu_online_mask);
-        cpu_clear(cpu, allbutself);
+        if (cpu == smp_processor_id())
-        cpus_and(mask, mask, allbutself);
+                cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
-        num_cpus = cpus_weight(mask);
+        /* No online cpus?  We're done. */
+        if (cpu >= nr_cpu_ids)
-        /*
+                return;
-         * If zero CPUs, return. If just a single CPU, turn this request
-         * into a targetted single call instead since it's faster.
+        /* Do we have another CPU which isn't us? */
-         */
+        next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
-        if (!num_cpus)
+        if (next_cpu == smp_processor_id())
-                return 0;
+                next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
-        else if (num_cpus == 1) {
-                cpu = first_cpu(mask);
+        /* Fastpath: do that cpu by itself. */
-                return smp_call_function_single(cpu, func, info, wait);
+        if (next_cpu >= nr_cpu_ids) {
+                smp_call_function_single(cpu, func, info, wait);
+                return;
        }
-        data = kmalloc(sizeof(*data), GFP_ATOMIC);
+        data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
-        if (data) {
+        if (unlikely(!data)) {
-                data->csd.flags = CSD_FLAG_ALLOC;
+                /* Slow path. */
-                if (wait)
+                for_each_online_cpu(cpu) {
-                        data->csd.flags |= CSD_FLAG_WAIT;
+                        if (cpu == smp_processor_id())
-        } else {
+                                continue;
-                data = &d;
+                        if (cpumask_test_cpu(cpu, mask))
-                data->csd.flags = CSD_FLAG_WAIT;
+                                smp_call_function_single(cpu, func, info, wait);
-                wait = 1;
+                }
-                slowpath = 1;
+                return;
        }
        spin_lock_init(&data->lock);
+        data->csd.flags = CSD_FLAG_ALLOC;
+        if (wait)
+                data->csd.flags |= CSD_FLAG_WAIT;
        data->csd.func = func;
        data->csd.info = info;
-        data->refs = num_cpus;
+        cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
-        data->cpumask = mask;
+        cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
+        data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
        spin_lock_irqsave(&call_function_lock, flags);
        list_add_tail_rcu(&data->csd.list, &call_function_queue);
@@ -377,18 +349,13 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
        smp_mb();
        /* Send a message to all CPUs in the map */
-        arch_send_call_function_ipi(mask);
+        arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits));
        /* optionally wait for the CPUs to complete */
-        if (wait) {
+        if (wait)
                csd_flag_wait(&data->csd);
-                if (unlikely(slowpath))
-                        smp_call_function_mask_quiesce_stack(mask);
-        }
-        return 0;
 }
-EXPORT_SYMBOL(smp_call_function_mask);
+EXPORT_SYMBOL(smp_call_function_many);
 /**
 * smp_call_function(): Run a function on all other CPUs.
@@ -396,7 +363,7 @@ EXPORT_SYMBOL(smp_call_function_mask);
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait (atomically) until function has completed on other CPUs.
 *
- * Returns 0 on success, else a negative status code.
+ * Returns 0.
 *
 * If @wait is true, then returns once @func has returned; otherwise
 * it returns just before the target cpu calls @func. In case of allocation
@@ -407,12 +374,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
 */
 int smp_call_function(void (*func)(void *), void *info, int wait)
 {
-        int ret;
        preempt_disable();
-        ret = smp_call_function_mask(cpu_online_map, func, info, wait);
+        smp_call_function_many(cpu_online_mask, func, info, wait);
        preempt_enable();
-        return ret;
+        return 0;
 }
 EXPORT_SYMBOL(smp_call_function);
author	Linus Torvalds <torvalds@linux-foundation.org>	2009-01-03 15:04:39 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-01-03 15:04:39 -0500
commit	7d3b56ba37a95f1f370f50258ed3954c304c524b (patch)
tree	86102527b92f02450aa245f084ffb491c18d2e0a /kernel/smp.c
parent	269b012321f2f1f8e4648c43a93bf432b42c6668 (diff)
parent	ab14398abd195af91a744c320a52a1bce814dd1e (diff)

diff --git a/kernel/smp.c b/kernel/smp.c index 75c8dde58c55..5cfa0e5e3e88 100644 --- a/kernel/smp.c +++ b/kernel/smp.c
@@ -24,8 +24,8 @@ struct call_function_data {
24	struct call_single_data csd;	24	struct call_single_data csd;
25	spinlock_t lock;	25	spinlock_t lock;
26	unsigned int refs;	26	unsigned int refs;
27	cpumask_t cpumask;
28	struct rcu_head rcu_head;	27	struct rcu_head rcu_head;
		28	unsigned long cpumask_bits[];
29	};	29	};
30		30
31	struct call_single_queue {	31	struct call_single_queue {
@@ -110,13 +110,13 @@ void generic_smp_call_function_interrupt(void)
110	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {	110	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
111	int refs;	111	int refs;
112		112
113	if (!cpu_isset(cpu, data->cpumask))	113	if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
114	continue;	114	continue;
115		115
116	data->csd.func(data->csd.info);	116	data->csd.func(data->csd.info);
117		117
118	spin_lock(&data->lock);	118	spin_lock(&data->lock);
119	cpu_clear(cpu, data->cpumask);	119	cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
120	WARN_ON(data->refs == 0);	120	WARN_ON(data->refs == 0);
121	data->refs--;	121	data->refs--;
122	refs = data->refs;	122	refs = data->refs;
@@ -223,7 +223,7 @@ int smp_call_function_single(int cpu, void (func) (void info), void *info,
223	local_irq_save(flags);	223	local_irq_save(flags);
224	func(info);	224	func(info);
225	local_irq_restore(flags);	225	local_irq_restore(flags);
226	} else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) {	226	} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
227	struct call_single_data *data = NULL;	227	struct call_single_data *data = NULL;
228		228
229	if (!wait) {	229	if (!wait) {
@@ -266,51 +266,19 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
266	generic_exec_single(cpu, data);	266	generic_exec_single(cpu, data);
267	}	267	}
268		268
269	/* Dummy function */	269	/* FIXME: Shim for archs using old arch_send_call_function_ipi API. */
270	static void quiesce_dummy(void *unused)	270	#ifndef arch_send_call_function_ipi_mask
271	{	271	#define arch_send_call_function_ipi_mask(maskp) \
272	}	272	arch_send_call_function_ipi(*(maskp))
273		273	#endif
274	/*
275	* Ensure stack based data used in call function mask is safe to free.
276	*
277	* This is needed by smp_call_function_mask when using on-stack data, because
278	* a single call function queue is shared by all CPUs, and any CPU may pick up
279	* the data item on the queue at any time before it is deleted. So we need to
280	* ensure that all CPUs have transitioned through a quiescent state after
281	* this call.
282	*
283	* This is a very slow function, implemented by sending synchronous IPIs to
284	* all possible CPUs. For this reason, we have to alloc data rather than use
285	* stack based data even in the case of synchronous calls. The stack based
286	* data is then just used for deadlock/oom fallback which will be very rare.
287	*
288	* If a faster scheme can be made, we could go back to preferring stack based
289	* data -- the data allocation/free is non-zero cost.
290	*/
291	static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
292	{
293	struct call_single_data data;
294	int cpu;
295
296	data.func = quiesce_dummy;
297	data.info = NULL;
298
299	for_each_cpu_mask(cpu, mask) {
300	data.flags = CSD_FLAG_WAIT;
301	generic_exec_single(cpu, &data);
302	}
303	}
304		274
305	/**	275	/**
306	* smp_call_function_mask(): Run a function on a set of other CPUs.	276	* smp_call_function_many(): Run a function on a set of other CPUs.
307	* @mask: The set of cpus to run on.	277	* @mask: The set of cpus to run on (only runs on online subset).
308	* @func: The function to run. This must be fast and non-blocking.	278	* @func: The function to run. This must be fast and non-blocking.
309	* @info: An arbitrary pointer to pass to the function.	279	* @info: An arbitrary pointer to pass to the function.
310	* @wait: If true, wait (atomically) until function has completed on other CPUs.	280	* @wait: If true, wait (atomically) until function has completed on other CPUs.
311	*	281	*
312	* Returns 0 on success, else a negative status code.
313	*
314	* If @wait is true, then returns once @func has returned. Note that @wait	282	* If @wait is true, then returns once @func has returned. Note that @wait
315	* will be implicitly turned on in case of allocation failures, since	283	* will be implicitly turned on in case of allocation failures, since
316	* we fall back to on-stack allocation.	284	* we fall back to on-stack allocation.
@@ -319,53 +287,57 @@ static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
319	* hardware interrupt handler or from a bottom half handler. Preemption	287	* hardware interrupt handler or from a bottom half handler. Preemption
320	* must be disabled when calling this function.	288	* must be disabled when calling this function.
321	*/	289	*/
322	int smp_call_function_mask(cpumask_t mask, void (func)(void ), void *info,	290	void smp_call_function_many(const struct cpumask *mask,
323	int wait)	291	void (func)(void ), void *info,
		292	bool wait)
324	{	293	{
325	struct call_function_data d;	294	struct call_function_data *data;
326	struct call_function_data *data = NULL;
327	cpumask_t allbutself;
328	unsigned long flags;	295	unsigned long flags;
329	int cpu, num_cpus;	296	int cpu, next_cpu;
330	int slowpath = 0;
331		297
332	/* Can deadlock when called with interrupts disabled */	298	/* Can deadlock when called with interrupts disabled */
333	WARN_ON(irqs_disabled());	299	WARN_ON(irqs_disabled());
334		300
335	cpu = smp_processor_id();	301	/* So, what's a CPU they want? Ignoring this one. */
336	allbutself = cpu_online_map;	302	cpu = cpumask_first_and(mask, cpu_online_mask);
337	cpu_clear(cpu, allbutself);	303	if (cpu == smp_processor_id())
338	cpus_and(mask, mask, allbutself);	304	cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
339	num_cpus = cpus_weight(mask);	305	/* No online cpus? We're done. */
340		306	if (cpu >= nr_cpu_ids)
341	/*	307	return;
342	* If zero CPUs, return. If just a single CPU, turn this request	308
343	* into a targetted single call instead since it's faster.	309	/* Do we have another CPU which isn't us? */
344	*/	310	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
345	if (!num_cpus)	311	if (next_cpu == smp_processor_id())
346	return 0;	312	next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
347	else if (num_cpus == 1) {	313
348	cpu = first_cpu(mask);	314	/* Fastpath: do that cpu by itself. */
349	return smp_call_function_single(cpu, func, info, wait);	315	if (next_cpu >= nr_cpu_ids) {
		316	smp_call_function_single(cpu, func, info, wait);
		317	return;
350	}	318	}
351		319
352	data = kmalloc(sizeof(*data), GFP_ATOMIC);	320	data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
353	if (data) {	321	if (unlikely(!data)) {
354	data->csd.flags = CSD_FLAG_ALLOC;	322	/* Slow path. */
355	if (wait)	323	for_each_online_cpu(cpu) {
356	data->csd.flags \|= CSD_FLAG_WAIT;	324	if (cpu == smp_processor_id())
357	} else {	325	continue;
358	data = &d;	326	if (cpumask_test_cpu(cpu, mask))
359	data->csd.flags = CSD_FLAG_WAIT;	327	smp_call_function_single(cpu, func, info, wait);
360	wait = 1;	328	}
361	slowpath = 1;	329	return;
362	}	330	}
363		331
364	spin_lock_init(&data->lock);	332	spin_lock_init(&data->lock);
		333	data->csd.flags = CSD_FLAG_ALLOC;
		334	if (wait)
		335	data->csd.flags \|= CSD_FLAG_WAIT;
365	data->csd.func = func;	336	data->csd.func = func;
366	data->csd.info = info;	337	data->csd.info = info;
367	data->refs = num_cpus;	338	cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
368	data->cpumask = mask;	339	cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
		340	data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
369		341
370	spin_lock_irqsave(&call_function_lock, flags);	342	spin_lock_irqsave(&call_function_lock, flags);
371	list_add_tail_rcu(&data->csd.list, &call_function_queue);	343	list_add_tail_rcu(&data->csd.list, &call_function_queue);
@@ -377,18 +349,13 @@ int smp_call_function_mask(cpumask_t mask, void (func)(void ), void *info,
377	smp_mb();	349	smp_mb();
378		350
379	/* Send a message to all CPUs in the map */	351	/* Send a message to all CPUs in the map */
380	arch_send_call_function_ipi(mask);	352	arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits));
381		353
382	/* optionally wait for the CPUs to complete */	354	/* optionally wait for the CPUs to complete */
383	if (wait) {	355	if (wait)
384	csd_flag_wait(&data->csd);	356	csd_flag_wait(&data->csd);
385	if (unlikely(slowpath))
386	smp_call_function_mask_quiesce_stack(mask);
387	}
388
389	return 0;
390	}	357	}
391	EXPORT_SYMBOL(smp_call_function_mask);	358	EXPORT_SYMBOL(smp_call_function_many);
392		359
393	/**	360	/**
394	* smp_call_function(): Run a function on all other CPUs.	361	* smp_call_function(): Run a function on all other CPUs.
@@ -396,7 +363,7 @@ EXPORT_SYMBOL(smp_call_function_mask);
396	* @info: An arbitrary pointer to pass to the function.	363	* @info: An arbitrary pointer to pass to the function.
397	* @wait: If true, wait (atomically) until function has completed on other CPUs.	364	* @wait: If true, wait (atomically) until function has completed on other CPUs.
398	*	365	*
399	* Returns 0 on success, else a negative status code.	366	* Returns 0.
400	*	367	*
401	* If @wait is true, then returns once @func has returned; otherwise	368	* If @wait is true, then returns once @func has returned; otherwise
402	* it returns just before the target cpu calls @func. In case of allocation	369	* it returns just before the target cpu calls @func. In case of allocation
@@ -407,12 +374,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
407	*/	374	*/
408	int smp_call_function(void (func)(void ), void *info, int wait)	375	int smp_call_function(void (func)(void ), void *info, int wait)
409	{	376	{
410	int ret;
411
412	preempt_disable();	377	preempt_disable();
413	ret = smp_call_function_mask(cpu_online_map, func, info, wait);	378	smp_call_function_many(cpu_online_mask, func, info, wait);
414	preempt_enable();	379	preempt_enable();
415	return ret;	380	return 0;
416	}	381	}
417	EXPORT_SYMBOL(smp_call_function);	382	EXPORT_SYMBOL(smp_call_function);
418		383