cpumask: smp_call_function_many()

Impact: Implementation change to remove cpumask_t from stack. Actually change smp_call_function_mask() to smp_call_function_many(). We avoid cpumasks on the stack in this version. (S390 has its own version, but that's going away apparently). We have to do some dancing to figure out if 0 or 1 other cpus are in the mask supplied and the online mask without allocating a tmp cpumask. It's still fairly cheap. We allocate the cpumask at the end of the call_function_data structure: if allocation fails we fallback to smp_call_function_single rather than using the baroque quiescing code (which needs a cpumask on stack). (Thanks to Hiroshi Shimamoto for spotting several bugs in previous versions!) Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Mike Travis <travis@sgi.com> Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> Cc: npiggin@suse.de Cc: axboe@kernel.dk
author: Rusty Russell <rusty@rustcorp.com.au> 2008-12-29 17:35:16 -0500
committer: Rusty Russell <rusty@rustcorp.com.au> 2008-12-29 17:35:16 -0500
commit: 54b11e6d57a10aa9d0009efd93873e17bffd5d30 (patch)
tree: ac09296e7b0726aa6143913526f8983fae1cb497 /kernel
parent: 3fa41520696fec2815e2d88fbcccdda77ba4d693 (diff)
1 files changed, 49 insertions, 90 deletions
diff --git a/kernel/smp.c b/kernel/smp.c
index 75c8dde58c55..9f0eafed1399 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -24,8 +24,8 @@ struct call_function_data {
        struct call_single_data csd;
        spinlock_t lock;
        unsigned int refs;
-        cpumask_t cpumask;
        struct rcu_head rcu_head;
+        unsigned long cpumask_bits[];
 };
 struct call_single_queue {
@@ -110,13 +110,13 @@ void generic_smp_call_function_interrupt(void)
        list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
                int refs;
-                if (!cpu_isset(cpu, data->cpumask))
+                if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
                        continue;
                data->csd.func(data->csd.info);
                spin_lock(&data->lock);
-                cpu_clear(cpu, data->cpumask);
+                cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
                WARN_ON(data->refs == 0);
                data->refs--;
                refs = data->refs;
@@ -266,51 +266,13 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
        generic_exec_single(cpu, data);
 }
-/* Dummy function */
-static void quiesce_dummy(void *unused)
-{
-}
-/*
- * Ensure stack based data used in call function mask is safe to free.
- *
- * This is needed by smp_call_function_mask when using on-stack data, because
- * a single call function queue is shared by all CPUs, and any CPU may pick up
- * the data item on the queue at any time before it is deleted. So we need to
- * ensure that all CPUs have transitioned through a quiescent state after
- * this call.
- *
- * This is a very slow function, implemented by sending synchronous IPIs to
- * all possible CPUs. For this reason, we have to alloc data rather than use
- * stack based data even in the case of synchronous calls. The stack based
- * data is then just used for deadlock/oom fallback which will be very rare.
- *
- * If a faster scheme can be made, we could go back to preferring stack based
- * data -- the data allocation/free is non-zero cost.
- */
-static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
-{
-        struct call_single_data data;
-        int cpu;
-        data.func = quiesce_dummy;
-        data.info = NULL;
-        for_each_cpu_mask(cpu, mask) {
-                data.flags = CSD_FLAG_WAIT;
-                generic_exec_single(cpu, &data);
-        }
-}
 /**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * smp_call_function_many(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.
+ * @mask: The set of cpus to run on (only runs on online subset).
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait (atomically) until function has completed on other CPUs.
 *
- * Returns 0 on success, else a negative status code.
- *
 * If @wait is true, then returns once @func has returned. Note that @wait
 * will be implicitly turned on in case of allocation failures, since
 * we fall back to on-stack allocation.
@@ -319,53 +281,57 @@ static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
 * hardware interrupt handler or from a bottom half handler. Preemption
 * must be disabled when calling this function.
 */
-int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
+void smp_call_function_many(const struct cpumask *mask,
-                           int wait)
+                            void (*func)(void *), void *info,
+                            bool wait)
 {
-        struct call_function_data d;
+        struct call_function_data *data;
-        struct call_function_data *data = NULL;
-        cpumask_t allbutself;
        unsigned long flags;
-        int cpu, num_cpus;
+        int cpu, next_cpu;
-        int slowpath = 0;
        /* Can deadlock when called with interrupts disabled */
        WARN_ON(irqs_disabled());
-        cpu = smp_processor_id();
+        /* So, what's a CPU they want?  Ignoring this one. */
-        allbutself = cpu_online_map;
+        cpu = cpumask_first_and(mask, cpu_online_mask);
-        cpu_clear(cpu, allbutself);
+        if (cpu == smp_processor_id())
-        cpus_and(mask, mask, allbutself);
+                cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
-        num_cpus = cpus_weight(mask);
+        /* No online cpus?  We're done. */
+        if (cpu >= nr_cpu_ids)
-        /*
+                return;
-         * If zero CPUs, return. If just a single CPU, turn this request
-         * into a targetted single call instead since it's faster.
+        /* Do we have another CPU which isn't us? */
-         */
+        next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
-        if (!num_cpus)
+        if (next_cpu == smp_processor_id())
-                return 0;
+                next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
-        else if (num_cpus == 1) {
-                cpu = first_cpu(mask);
+        /* Fastpath: do that cpu by itself. */
-                return smp_call_function_single(cpu, func, info, wait);
+        if (next_cpu >= nr_cpu_ids) {
+                smp_call_function_single(cpu, func, info, wait);
+                return;
        }
-        data = kmalloc(sizeof(*data), GFP_ATOMIC);
+        data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
-        if (data) {
+        if (unlikely(!data)) {
-                data->csd.flags = CSD_FLAG_ALLOC;
+                /* Slow path. */
-                if (wait)
+                for_each_online_cpu(cpu) {
-                        data->csd.flags |= CSD_FLAG_WAIT;
+                        if (cpu == smp_processor_id())
-        } else {
+                                continue;
-                data = &d;
+                        if (cpumask_test_cpu(cpu, mask))
-                data->csd.flags = CSD_FLAG_WAIT;
+                                smp_call_function_single(cpu, func, info, wait);
-                wait = 1;
+                }
-                slowpath = 1;
+                return;
        }
        spin_lock_init(&data->lock);
+        data->csd.flags = CSD_FLAG_ALLOC;
+        if (wait)
+                data->csd.flags |= CSD_FLAG_WAIT;
        data->csd.func = func;
        data->csd.info = info;
-        data->refs = num_cpus;
+        cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
-        data->cpumask = mask;
+        cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
+        data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
        spin_lock_irqsave(&call_function_lock, flags);
        list_add_tail_rcu(&data->csd.list, &call_function_queue);
@@ -377,18 +343,13 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
        smp_mb();
        /* Send a message to all CPUs in the map */
-        arch_send_call_function_ipi(mask);
+        arch_send_call_function_ipi(*to_cpumask(data->cpumask_bits));
        /* optionally wait for the CPUs to complete */
-        if (wait) {
+        if (wait)
                csd_flag_wait(&data->csd);
-                if (unlikely(slowpath))
-                        smp_call_function_mask_quiesce_stack(mask);
-        }
-        return 0;
 }
-EXPORT_SYMBOL(smp_call_function_mask);
+EXPORT_SYMBOL(smp_call_function_many);
 /**
 * smp_call_function(): Run a function on all other CPUs.
@@ -396,7 +357,7 @@ EXPORT_SYMBOL(smp_call_function_mask);
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait (atomically) until function has completed on other CPUs.
 *
- * Returns 0 on success, else a negative status code.
+ * Returns 0.
 *
 * If @wait is true, then returns once @func has returned; otherwise
 * it returns just before the target cpu calls @func. In case of allocation
@@ -407,12 +368,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
 */
 int smp_call_function(void (*func)(void *), void *info, int wait)
 {
-        int ret;
        preempt_disable();
-        ret = smp_call_function_mask(cpu_online_map, func, info, wait);
+        smp_call_function_many(cpu_online_mask, func, info, wait);
        preempt_enable();
-        return ret;
+        return 0;
 }
 EXPORT_SYMBOL(smp_call_function);
author	Rusty Russell <rusty@rustcorp.com.au>	2008-12-29 17:35:16 -0500
committer	Rusty Russell <rusty@rustcorp.com.au>	2008-12-29 17:35:16 -0500
commit	54b11e6d57a10aa9d0009efd93873e17bffd5d30 (patch)
tree	ac09296e7b0726aa6143913526f8983fae1cb497 /kernel
parent	3fa41520696fec2815e2d88fbcccdda77ba4d693 (diff)

diff --git a/kernel/smp.c b/kernel/smp.c index 75c8dde58c55..9f0eafed1399 100644 --- a/kernel/smp.c +++ b/kernel/smp.c
@@ -24,8 +24,8 @@ struct call_function_data {
24	struct call_single_data csd;	24	struct call_single_data csd;
25	spinlock_t lock;	25	spinlock_t lock;
26	unsigned int refs;	26	unsigned int refs;
27	cpumask_t cpumask;
28	struct rcu_head rcu_head;	27	struct rcu_head rcu_head;
		28	unsigned long cpumask_bits[];
29	};	29	};
30		30
31	struct call_single_queue {	31	struct call_single_queue {
@@ -110,13 +110,13 @@ void generic_smp_call_function_interrupt(void)
110	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {	110	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
111	int refs;	111	int refs;
112		112
113	if (!cpu_isset(cpu, data->cpumask))	113	if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
114	continue;	114	continue;
115		115
116	data->csd.func(data->csd.info);	116	data->csd.func(data->csd.info);
117		117
118	spin_lock(&data->lock);	118	spin_lock(&data->lock);
119	cpu_clear(cpu, data->cpumask);	119	cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
120	WARN_ON(data->refs == 0);	120	WARN_ON(data->refs == 0);
121	data->refs--;	121	data->refs--;
122	refs = data->refs;	122	refs = data->refs;
@@ -266,51 +266,13 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
266	generic_exec_single(cpu, data);	266	generic_exec_single(cpu, data);
267	}	267	}
268		268
269	/* Dummy function */
270	static void quiesce_dummy(void *unused)
271	{
272	}
273
274	/*
275	* Ensure stack based data used in call function mask is safe to free.
276	*
277	* This is needed by smp_call_function_mask when using on-stack data, because
278	* a single call function queue is shared by all CPUs, and any CPU may pick up
279	* the data item on the queue at any time before it is deleted. So we need to
280	* ensure that all CPUs have transitioned through a quiescent state after
281	* this call.
282	*
283	* This is a very slow function, implemented by sending synchronous IPIs to
284	* all possible CPUs. For this reason, we have to alloc data rather than use
285	* stack based data even in the case of synchronous calls. The stack based
286	* data is then just used for deadlock/oom fallback which will be very rare.
287	*
288	* If a faster scheme can be made, we could go back to preferring stack based
289	* data -- the data allocation/free is non-zero cost.
290	*/
291	static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
292	{
293	struct call_single_data data;
294	int cpu;
295
296	data.func = quiesce_dummy;
297	data.info = NULL;
298
299	for_each_cpu_mask(cpu, mask) {
300	data.flags = CSD_FLAG_WAIT;
301	generic_exec_single(cpu, &data);
302	}
303	}
304
305	/**	269	/**
306	* smp_call_function_mask(): Run a function on a set of other CPUs.	270	* smp_call_function_many(): Run a function on a set of other CPUs.
307	* @mask: The set of cpus to run on.	271	* @mask: The set of cpus to run on (only runs on online subset).
308	* @func: The function to run. This must be fast and non-blocking.	272	* @func: The function to run. This must be fast and non-blocking.
309	* @info: An arbitrary pointer to pass to the function.	273	* @info: An arbitrary pointer to pass to the function.
310	* @wait: If true, wait (atomically) until function has completed on other CPUs.	274	* @wait: If true, wait (atomically) until function has completed on other CPUs.
311	*	275	*
312	* Returns 0 on success, else a negative status code.
313	*
314	* If @wait is true, then returns once @func has returned. Note that @wait	276	* If @wait is true, then returns once @func has returned. Note that @wait
315	* will be implicitly turned on in case of allocation failures, since	277	* will be implicitly turned on in case of allocation failures, since
316	* we fall back to on-stack allocation.	278	* we fall back to on-stack allocation.
@@ -319,53 +281,57 @@ static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
319	* hardware interrupt handler or from a bottom half handler. Preemption	281	* hardware interrupt handler or from a bottom half handler. Preemption
320	* must be disabled when calling this function.	282	* must be disabled when calling this function.
321	*/	283	*/
322	int smp_call_function_mask(cpumask_t mask, void (func)(void ), void *info,	284	void smp_call_function_many(const struct cpumask *mask,
323	int wait)	285	void (func)(void ), void *info,
		286	bool wait)
324	{	287	{
325	struct call_function_data d;	288	struct call_function_data *data;
326	struct call_function_data *data = NULL;
327	cpumask_t allbutself;
328	unsigned long flags;	289	unsigned long flags;
329	int cpu, num_cpus;	290	int cpu, next_cpu;
330	int slowpath = 0;
331		291
332	/* Can deadlock when called with interrupts disabled */	292	/* Can deadlock when called with interrupts disabled */
333	WARN_ON(irqs_disabled());	293	WARN_ON(irqs_disabled());
334		294
335	cpu = smp_processor_id();	295	/* So, what's a CPU they want? Ignoring this one. */
336	allbutself = cpu_online_map;	296	cpu = cpumask_first_and(mask, cpu_online_mask);
337	cpu_clear(cpu, allbutself);	297	if (cpu == smp_processor_id())
338	cpus_and(mask, mask, allbutself);	298	cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
339	num_cpus = cpus_weight(mask);	299	/* No online cpus? We're done. */
340		300	if (cpu >= nr_cpu_ids)
341	/*	301	return;
342	* If zero CPUs, return. If just a single CPU, turn this request	302
343	* into a targetted single call instead since it's faster.	303	/* Do we have another CPU which isn't us? */
344	*/	304	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
345	if (!num_cpus)	305	if (next_cpu == smp_processor_id())
346	return 0;	306	next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
347	else if (num_cpus == 1) {	307
348	cpu = first_cpu(mask);	308	/* Fastpath: do that cpu by itself. */
349	return smp_call_function_single(cpu, func, info, wait);	309	if (next_cpu >= nr_cpu_ids) {
		310	smp_call_function_single(cpu, func, info, wait);
		311	return;
350	}	312	}
351		313
352	data = kmalloc(sizeof(*data), GFP_ATOMIC);	314	data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
353	if (data) {	315	if (unlikely(!data)) {
354	data->csd.flags = CSD_FLAG_ALLOC;	316	/* Slow path. */
355	if (wait)	317	for_each_online_cpu(cpu) {
356	data->csd.flags \|= CSD_FLAG_WAIT;	318	if (cpu == smp_processor_id())
357	} else {	319	continue;
358	data = &d;	320	if (cpumask_test_cpu(cpu, mask))
359	data->csd.flags = CSD_FLAG_WAIT;	321	smp_call_function_single(cpu, func, info, wait);
360	wait = 1;	322	}
361	slowpath = 1;	323	return;
362	}	324	}
363		325
364	spin_lock_init(&data->lock);	326	spin_lock_init(&data->lock);
		327	data->csd.flags = CSD_FLAG_ALLOC;
		328	if (wait)
		329	data->csd.flags \|= CSD_FLAG_WAIT;
365	data->csd.func = func;	330	data->csd.func = func;
366	data->csd.info = info;	331	data->csd.info = info;
367	data->refs = num_cpus;	332	cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
368	data->cpumask = mask;	333	cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
		334	data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
369		335
370	spin_lock_irqsave(&call_function_lock, flags);	336	spin_lock_irqsave(&call_function_lock, flags);
371	list_add_tail_rcu(&data->csd.list, &call_function_queue);	337	list_add_tail_rcu(&data->csd.list, &call_function_queue);
@@ -377,18 +343,13 @@ int smp_call_function_mask(cpumask_t mask, void (func)(void ), void *info,
377	smp_mb();	343	smp_mb();
378		344
379	/* Send a message to all CPUs in the map */	345	/* Send a message to all CPUs in the map */
380	arch_send_call_function_ipi(mask);	346	arch_send_call_function_ipi(*to_cpumask(data->cpumask_bits));
381		347
382	/* optionally wait for the CPUs to complete */	348	/* optionally wait for the CPUs to complete */
383	if (wait) {	349	if (wait)
384	csd_flag_wait(&data->csd);	350	csd_flag_wait(&data->csd);
385	if (unlikely(slowpath))
386	smp_call_function_mask_quiesce_stack(mask);
387	}
388
389	return 0;
390	}	351	}
391	EXPORT_SYMBOL(smp_call_function_mask);	352	EXPORT_SYMBOL(smp_call_function_many);
392		353
393	/**	354	/**
394	* smp_call_function(): Run a function on all other CPUs.	355	* smp_call_function(): Run a function on all other CPUs.
@@ -396,7 +357,7 @@ EXPORT_SYMBOL(smp_call_function_mask);
396	* @info: An arbitrary pointer to pass to the function.	357	* @info: An arbitrary pointer to pass to the function.
397	* @wait: If true, wait (atomically) until function has completed on other CPUs.	358	* @wait: If true, wait (atomically) until function has completed on other CPUs.
398	*	359	*
399	* Returns 0 on success, else a negative status code.	360	* Returns 0.
400	*	361	*
401	* If @wait is true, then returns once @func has returned; otherwise	362	* If @wait is true, then returns once @func has returned; otherwise
402	* it returns just before the target cpu calls @func. In case of allocation	363	* it returns just before the target cpu calls @func. In case of allocation
@@ -407,12 +368,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
407	*/	368	*/
408	int smp_call_function(void (func)(void ), void *info, int wait)	369	int smp_call_function(void (func)(void ), void *info, int wait)
409	{	370	{
410	int ret;
411
412	preempt_disable();	371	preempt_disable();
413	ret = smp_call_function_mask(cpu_online_map, func, info, wait);	372	smp_call_function_many(cpu_online_mask, func, info, wait);
414	preempt_enable();	373	preempt_enable();
415	return ret;	374	return 0;
416	}	375	}
417	EXPORT_SYMBOL(smp_call_function);	376	EXPORT_SYMBOL(smp_call_function);
418		377