aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-31 12:34:22 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-31 12:34:22 -0400
commit6679ce6e5f519096612b480d255d9ca97be0c2be (patch)
treecce6814d3e7c62adf59e565fb6ae95fd409f86ab /kernel
parent2c3d103ba90827cfb478bf10464d9b5b9cea369c (diff)
parent6e86841d05f371b5b9b86ce76c02aaee83352298 (diff)
Merge branch 'linus' into sched/urgent
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cpu.c41
-rw-r--r--kernel/dma-coherent.c154
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/module.c33
-rw-r--r--kernel/rcuclassic.c4
-rw-r--r--kernel/stop_machine.c288
-rw-r--r--kernel/time/tick-common.c8
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/trace_sysprof.c4
10 files changed, 345 insertions, 197 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 54f69837d35a..4e1d7df7c3e2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
84obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 84obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
85obj-$(CONFIG_MARKERS) += marker.o 85obj-$(CONFIG_MARKERS) += marker.o
86obj-$(CONFIG_LATENCYTOP) += latencytop.o 86obj-$(CONFIG_LATENCYTOP) += latencytop.o
87obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
87obj-$(CONFIG_FTRACE) += trace/ 88obj-$(CONFIG_FTRACE) += trace/
88obj-$(CONFIG_TRACING) += trace/ 89obj-$(CONFIG_TRACING) += trace/
89obj-$(CONFIG_SMP) += sched_cpupri.o 90obj-$(CONFIG_SMP) += sched_cpupri.o
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 10ba5f1004a5..e202a68d1cc1 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -216,7 +216,6 @@ static int __ref take_cpu_down(void *_param)
216static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) 216static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
217{ 217{
218 int err, nr_calls = 0; 218 int err, nr_calls = 0;
219 struct task_struct *p;
220 cpumask_t old_allowed, tmp; 219 cpumask_t old_allowed, tmp;
221 void *hcpu = (void *)(long)cpu; 220 void *hcpu = (void *)(long)cpu;
222 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; 221 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
@@ -249,21 +248,18 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
249 cpus_setall(tmp); 248 cpus_setall(tmp);
250 cpu_clear(cpu, tmp); 249 cpu_clear(cpu, tmp);
251 set_cpus_allowed_ptr(current, &tmp); 250 set_cpus_allowed_ptr(current, &tmp);
251 tmp = cpumask_of_cpu(cpu);
252 252
253 p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); 253 err = __stop_machine(take_cpu_down, &tcd_param, &tmp);
254 254 if (err) {
255 if (IS_ERR(p) || cpu_online(cpu)) {
256 /* CPU didn't die: tell everyone. Can't complain. */ 255 /* CPU didn't die: tell everyone. Can't complain. */
257 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, 256 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
258 hcpu) == NOTIFY_BAD) 257 hcpu) == NOTIFY_BAD)
259 BUG(); 258 BUG();
260 259
261 if (IS_ERR(p)) { 260 goto out_allowed;
262 err = PTR_ERR(p);
263 goto out_allowed;
264 }
265 goto out_thread;
266 } 261 }
262 BUG_ON(cpu_online(cpu));
267 263
268 /* Wait for it to sleep (leaving idle task). */ 264 /* Wait for it to sleep (leaving idle task). */
269 while (!idle_cpu(cpu)) 265 while (!idle_cpu(cpu))
@@ -279,8 +275,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
279 275
280 check_for_tasks(cpu); 276 check_for_tasks(cpu);
281 277
282out_thread:
283 err = kthread_stop(p);
284out_allowed: 278out_allowed:
285 set_cpus_allowed_ptr(current, &old_allowed); 279 set_cpus_allowed_ptr(current, &old_allowed);
286out_release: 280out_release:
@@ -461,3 +455,28 @@ out:
461#endif /* CONFIG_PM_SLEEP_SMP */ 455#endif /* CONFIG_PM_SLEEP_SMP */
462 456
463#endif /* CONFIG_SMP */ 457#endif /* CONFIG_SMP */
458
459/*
460 * cpu_bit_bitmap[] is a special, "compressed" data structure that
461 * represents all NR_CPUS bits binary values of 1<<nr.
462 *
463 * It is used by cpumask_of_cpu() to get a constant address to a CPU
464 * mask value that has a single bit set only.
465 */
466
467/* cpu_bit_bitmap[0] is empty - so we can back into it */
468#define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x)
469#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
470#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
471#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
472
473const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
474
475 MASK_DECLARE_8(0), MASK_DECLARE_8(8),
476 MASK_DECLARE_8(16), MASK_DECLARE_8(24),
477#if BITS_PER_LONG > 32
478 MASK_DECLARE_8(32), MASK_DECLARE_8(40),
479 MASK_DECLARE_8(48), MASK_DECLARE_8(56),
480#endif
481};
482EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
new file mode 100644
index 000000000000..7517115a8cce
--- /dev/null
+++ b/kernel/dma-coherent.c
@@ -0,0 +1,154 @@
1/*
2 * Coherent per-device memory handling.
3 * Borrowed from i386
4 */
5#include <linux/kernel.h>
6#include <linux/dma-mapping.h>
7
8struct dma_coherent_mem {
9 void *virt_base;
10 u32 device_base;
11 int size;
12 int flags;
13 unsigned long *bitmap;
14};
15
16int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
17 dma_addr_t device_addr, size_t size, int flags)
18{
19 void __iomem *mem_base = NULL;
20 int pages = size >> PAGE_SHIFT;
21 int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
22
23 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
24 goto out;
25 if (!size)
26 goto out;
27 if (dev->dma_mem)
28 goto out;
29
30 /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
31
32 mem_base = ioremap(bus_addr, size);
33 if (!mem_base)
34 goto out;
35
36 dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
37 if (!dev->dma_mem)
38 goto out;
39 dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
40 if (!dev->dma_mem->bitmap)
41 goto free1_out;
42
43 dev->dma_mem->virt_base = mem_base;
44 dev->dma_mem->device_base = device_addr;
45 dev->dma_mem->size = pages;
46 dev->dma_mem->flags = flags;
47
48 if (flags & DMA_MEMORY_MAP)
49 return DMA_MEMORY_MAP;
50
51 return DMA_MEMORY_IO;
52
53 free1_out:
54 kfree(dev->dma_mem);
55 out:
56 if (mem_base)
57 iounmap(mem_base);
58 return 0;
59}
60EXPORT_SYMBOL(dma_declare_coherent_memory);
61
62void dma_release_declared_memory(struct device *dev)
63{
64 struct dma_coherent_mem *mem = dev->dma_mem;
65
66 if (!mem)
67 return;
68 dev->dma_mem = NULL;
69 iounmap(mem->virt_base);
70 kfree(mem->bitmap);
71 kfree(mem);
72}
73EXPORT_SYMBOL(dma_release_declared_memory);
74
75void *dma_mark_declared_memory_occupied(struct device *dev,
76 dma_addr_t device_addr, size_t size)
77{
78 struct dma_coherent_mem *mem = dev->dma_mem;
79 int pos, err;
80 int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
81
82 pages >>= PAGE_SHIFT;
83
84 if (!mem)
85 return ERR_PTR(-EINVAL);
86
87 pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
88 err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
89 if (err != 0)
90 return ERR_PTR(err);
91 return mem->virt_base + (pos << PAGE_SHIFT);
92}
93EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
94
95/**
96 * Try to allocate memory from the per-device coherent area.
97 *
98 * @dev: device from which we allocate memory
99 * @size: size of requested memory area
100 * @dma_handle: This will be filled with the correct dma handle
101 * @ret: This pointer will be filled with the virtual address
102 * to allocated area.
103 *
104 * This function should be only called from per-arch %dma_alloc_coherent()
105 * to support allocation from per-device coherent memory pools.
106 *
107 * Returns 0 if dma_alloc_coherent should continue with allocating from
108 * generic memory areas, or !0 if dma_alloc_coherent should return %ret.
109 */
110int dma_alloc_from_coherent(struct device *dev, ssize_t size,
111 dma_addr_t *dma_handle, void **ret)
112{
113 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
114 int order = get_order(size);
115
116 if (mem) {
117 int page = bitmap_find_free_region(mem->bitmap, mem->size,
118 order);
119 if (page >= 0) {
120 *dma_handle = mem->device_base + (page << PAGE_SHIFT);
121 *ret = mem->virt_base + (page << PAGE_SHIFT);
122 memset(*ret, 0, size);
123 } else if (mem->flags & DMA_MEMORY_EXCLUSIVE)
124 *ret = NULL;
125 }
126 return (mem != NULL);
127}
128
129/**
130 * Try to free the memory allocated from per-device coherent memory pool.
131 * @dev: device from which the memory was allocated
132 * @order: the order of pages allocated
133 * @vaddr: virtual address of allocated pages
134 *
135 * This checks whether the memory was allocated from the per-device
136 * coherent memory pool and if so, releases that memory.
137 *
138 * Returns 1 if we correctly released the memory, or 0 if
139 * %dma_release_coherent() should proceed with releasing memory from
140 * generic pools.
141 */
142int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
143{
144 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
145
146 if (mem && vaddr >= mem->virt_base && vaddr <
147 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
148 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
149
150 bitmap_release_region(mem->bitmap, page, order);
151 return 1;
152 }
153 return 0;
154}
diff --git a/kernel/fork.c b/kernel/fork.c
index 8214ba7c8bb1..7ce2ebe84796 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -27,6 +27,7 @@
27#include <linux/key.h> 27#include <linux/key.h>
28#include <linux/binfmts.h> 28#include <linux/binfmts.h>
29#include <linux/mman.h> 29#include <linux/mman.h>
30#include <linux/mmu_notifier.h>
30#include <linux/fs.h> 31#include <linux/fs.h>
31#include <linux/nsproxy.h> 32#include <linux/nsproxy.h>
32#include <linux/capability.h> 33#include <linux/capability.h>
@@ -414,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
414 415
415 if (likely(!mm_alloc_pgd(mm))) { 416 if (likely(!mm_alloc_pgd(mm))) {
416 mm->def_flags = 0; 417 mm->def_flags = 0;
418 mmu_notifier_mm_init(mm);
417 return mm; 419 return mm;
418 } 420 }
419 421
@@ -446,6 +448,7 @@ void __mmdrop(struct mm_struct *mm)
446 BUG_ON(mm == &init_mm); 448 BUG_ON(mm == &init_mm);
447 mm_free_pgd(mm); 449 mm_free_pgd(mm);
448 destroy_context(mm); 450 destroy_context(mm);
451 mmu_notifier_mm_destroy(mm);
449 free_mm(mm); 452 free_mm(mm);
450} 453}
451EXPORT_SYMBOL_GPL(__mmdrop); 454EXPORT_SYMBOL_GPL(__mmdrop);
diff --git a/kernel/module.c b/kernel/module.c
index d8b5605132a0..61d212120df4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -325,18 +325,6 @@ static unsigned long find_symbol(const char *name,
325 return -ENOENT; 325 return -ENOENT;
326} 326}
327 327
328/* lookup symbol in given range of kernel_symbols */
329static const struct kernel_symbol *lookup_symbol(const char *name,
330 const struct kernel_symbol *start,
331 const struct kernel_symbol *stop)
332{
333 const struct kernel_symbol *ks = start;
334 for (; ks < stop; ks++)
335 if (strcmp(ks->name, name) == 0)
336 return ks;
337 return NULL;
338}
339
340/* Search for module by name: must hold module_mutex. */ 328/* Search for module by name: must hold module_mutex. */
341static struct module *find_module(const char *name) 329static struct module *find_module(const char *name)
342{ 330{
@@ -690,7 +678,7 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
690 if (flags & O_NONBLOCK) { 678 if (flags & O_NONBLOCK) {
691 struct stopref sref = { mod, flags, forced }; 679 struct stopref sref = { mod, flags, forced };
692 680
693 return stop_machine_run(__try_stop_module, &sref, NR_CPUS); 681 return stop_machine(__try_stop_module, &sref, NULL);
694 } else { 682 } else {
695 /* We don't need to stop the machine for this. */ 683 /* We don't need to stop the machine for this. */
696 mod->state = MODULE_STATE_GOING; 684 mod->state = MODULE_STATE_GOING;
@@ -1428,7 +1416,7 @@ static int __unlink_module(void *_mod)
1428static void free_module(struct module *mod) 1416static void free_module(struct module *mod)
1429{ 1417{
1430 /* Delete from various lists */ 1418 /* Delete from various lists */
1431 stop_machine_run(__unlink_module, mod, NR_CPUS); 1419 stop_machine(__unlink_module, mod, NULL);
1432 remove_notes_attrs(mod); 1420 remove_notes_attrs(mod);
1433 remove_sect_attrs(mod); 1421 remove_sect_attrs(mod);
1434 mod_kobject_remove(mod); 1422 mod_kobject_remove(mod);
@@ -1703,6 +1691,19 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
1703} 1691}
1704 1692
1705#ifdef CONFIG_KALLSYMS 1693#ifdef CONFIG_KALLSYMS
1694
1695/* lookup symbol in given range of kernel_symbols */
1696static const struct kernel_symbol *lookup_symbol(const char *name,
1697 const struct kernel_symbol *start,
1698 const struct kernel_symbol *stop)
1699{
1700 const struct kernel_symbol *ks = start;
1701 for (; ks < stop; ks++)
1702 if (strcmp(ks->name, name) == 0)
1703 return ks;
1704 return NULL;
1705}
1706
1706static int is_exported(const char *name, const struct module *mod) 1707static int is_exported(const char *name, const struct module *mod)
1707{ 1708{
1708 if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) 1709 if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab))
@@ -2196,7 +2197,7 @@ static struct module *load_module(void __user *umod,
2196 /* Now sew it into the lists so we can get lockdep and oops 2197 /* Now sew it into the lists so we can get lockdep and oops
2197 * info during argument parsing. Noone should access us, since 2198 * info during argument parsing. Noone should access us, since
2198 * strong_try_module_get() will fail. */ 2199 * strong_try_module_get() will fail. */
2199 stop_machine_run(__link_module, mod, NR_CPUS); 2200 stop_machine(__link_module, mod, NULL);
2200 2201
2201 /* Size of section 0 is 0, so this works well if no params */ 2202 /* Size of section 0 is 0, so this works well if no params */
2202 err = parse_args(mod->name, mod->args, 2203 err = parse_args(mod->name, mod->args,
@@ -2230,7 +2231,7 @@ static struct module *load_module(void __user *umod,
2230 return mod; 2231 return mod;
2231 2232
2232 unlink: 2233 unlink:
2233 stop_machine_run(__unlink_module, mod, NR_CPUS); 2234 stop_machine(__unlink_module, mod, NULL);
2234 module_arch_cleanup(mod); 2235 module_arch_cleanup(mod);
2235 cleanup: 2236 cleanup:
2236 kobject_del(&mod->mkobj.kobj); 2237 kobject_del(&mod->mkobj.kobj);
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 6f8696c502f4..aad93cdc9f68 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -91,8 +91,8 @@ static void force_quiescent_state(struct rcu_data *rdp,
91 * rdp->cpu is the current cpu. 91 * rdp->cpu is the current cpu.
92 * 92 *
93 * cpu_online_map is updated by the _cpu_down() 93 * cpu_online_map is updated by the _cpu_down()
94 * using stop_machine_run(). Since we're in irqs disabled 94 * using __stop_machine(). Since we're in irqs disabled
95 * section, stop_machine_run() is not exectuting, hence 95 * section, __stop_machine() is not exectuting, hence
96 * the cpu_online_map is stable. 96 * the cpu_online_map is stable.
97 * 97 *
98 * However, a cpu might have been offlined _just_ before 98 * However, a cpu might have been offlined _just_ before
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 738b411ff2d3..e446c7c7d6a9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -1,4 +1,4 @@
1/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation. 1/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
2 * GPL v2 and any later version. 2 * GPL v2 and any later version.
3 */ 3 */
4#include <linux/cpu.h> 4#include <linux/cpu.h>
@@ -13,204 +13,178 @@
13#include <asm/atomic.h> 13#include <asm/atomic.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15 15
16/* Since we effect priority and affinity (both of which are visible 16/* This controls the threads on each CPU. */
17 * to, and settable by outside processes) we do indirection via a
18 * kthread. */
19
20/* Thread to stop each CPU in user context. */
21enum stopmachine_state { 17enum stopmachine_state {
22 STOPMACHINE_WAIT, 18 /* Dummy starting state for thread. */
19 STOPMACHINE_NONE,
20 /* Awaiting everyone to be scheduled. */
23 STOPMACHINE_PREPARE, 21 STOPMACHINE_PREPARE,
22 /* Disable interrupts. */
24 STOPMACHINE_DISABLE_IRQ, 23 STOPMACHINE_DISABLE_IRQ,
24 /* Run the function */
25 STOPMACHINE_RUN,
26 /* Exit */
25 STOPMACHINE_EXIT, 27 STOPMACHINE_EXIT,
26}; 28};
29static enum stopmachine_state state;
27 30
28static enum stopmachine_state stopmachine_state; 31struct stop_machine_data {
29static unsigned int stopmachine_num_threads; 32 int (*fn)(void *);
30static atomic_t stopmachine_thread_ack; 33 void *data;
31 34 int fnret;
32static int stopmachine(void *cpu) 35};
33{
34 int irqs_disabled = 0;
35 int prepared = 0;
36 cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
37
38 set_cpus_allowed_ptr(current, cpumask);
39
40 /* Ack: we are alive */
41 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
42 atomic_inc(&stopmachine_thread_ack);
43
44 /* Simple state machine */
45 while (stopmachine_state != STOPMACHINE_EXIT) {
46 if (stopmachine_state == STOPMACHINE_DISABLE_IRQ
47 && !irqs_disabled) {
48 local_irq_disable();
49 hard_irq_disable();
50 irqs_disabled = 1;
51 /* Ack: irqs disabled. */
52 smp_mb(); /* Must read state first. */
53 atomic_inc(&stopmachine_thread_ack);
54 } else if (stopmachine_state == STOPMACHINE_PREPARE
55 && !prepared) {
56 /* Everyone is in place, hold CPU. */
57 preempt_disable();
58 prepared = 1;
59 smp_mb(); /* Must read state first. */
60 atomic_inc(&stopmachine_thread_ack);
61 }
62 /* Yield in first stage: migration threads need to
63 * help our sisters onto their CPUs. */
64 if (!prepared && !irqs_disabled)
65 yield();
66 cpu_relax();
67 }
68
69 /* Ack: we are exiting. */
70 smp_mb(); /* Must read state first. */
71 atomic_inc(&stopmachine_thread_ack);
72
73 if (irqs_disabled)
74 local_irq_enable();
75 if (prepared)
76 preempt_enable();
77 36
78 return 0; 37/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
79} 38static unsigned int num_threads;
39static atomic_t thread_ack;
40static struct completion finished;
41static DEFINE_MUTEX(lock);
80 42
81/* Change the thread state */ 43static void set_state(enum stopmachine_state newstate)
82static void stopmachine_set_state(enum stopmachine_state state)
83{ 44{
84 atomic_set(&stopmachine_thread_ack, 0); 45 /* Reset ack counter. */
46 atomic_set(&thread_ack, num_threads);
85 smp_wmb(); 47 smp_wmb();
86 stopmachine_state = state; 48 state = newstate;
87 while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
88 cpu_relax();
89} 49}
90 50
91static int stop_machine(void) 51/* Last one to ack a state moves to the next state. */
52static void ack_state(void)
92{ 53{
93 int i, ret = 0; 54 if (atomic_dec_and_test(&thread_ack)) {
94 55 /* If we're the last one to ack the EXIT, we're finished. */
95 atomic_set(&stopmachine_thread_ack, 0); 56 if (state == STOPMACHINE_EXIT)
96 stopmachine_num_threads = 0; 57 complete(&finished);
97 stopmachine_state = STOPMACHINE_WAIT; 58 else
98 59 set_state(state + 1);
99 for_each_online_cpu(i) {
100 if (i == raw_smp_processor_id())
101 continue;
102 ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
103 if (ret < 0)
104 break;
105 stopmachine_num_threads++;
106 }
107
108 /* Wait for them all to come to life. */
109 while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
110 yield();
111 cpu_relax();
112 } 60 }
61}
113 62
114 /* If some failed, kill them all. */ 63/* This is the actual thread which stops the CPU. It exits by itself rather
115 if (ret < 0) { 64 * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
116 stopmachine_set_state(STOPMACHINE_EXIT); 65static int stop_cpu(struct stop_machine_data *smdata)
117 return ret; 66{
118 } 67 enum stopmachine_state curstate = STOPMACHINE_NONE;
68 int uninitialized_var(ret);
119 69
120 /* Now they are all started, make them hold the CPUs, ready. */ 70 /* Simple state machine */
121 preempt_disable(); 71 do {
122 stopmachine_set_state(STOPMACHINE_PREPARE); 72 /* Chill out and ensure we re-read stopmachine_state. */
73 cpu_relax();
74 if (state != curstate) {
75 curstate = state;
76 switch (curstate) {
77 case STOPMACHINE_DISABLE_IRQ:
78 local_irq_disable();
79 hard_irq_disable();
80 break;
81 case STOPMACHINE_RUN:
82 /* |= allows error detection if functions on
83 * multiple CPUs. */
84 smdata->fnret |= smdata->fn(smdata->data);
85 break;
86 default:
87 break;
88 }
89 ack_state();
90 }
91 } while (curstate != STOPMACHINE_EXIT);
123 92
124 /* Make them disable irqs. */ 93 local_irq_enable();
125 local_irq_disable(); 94 do_exit(0);
126 hard_irq_disable(); 95}
127 stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
128 96
97/* Callback for CPUs which aren't supposed to do anything. */
98static int chill(void *unused)
99{
129 return 0; 100 return 0;
130} 101}
131 102
132static void restart_machine(void) 103int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
133{ 104{
134 stopmachine_set_state(STOPMACHINE_EXIT); 105 int i, err;
135 local_irq_enable(); 106 struct stop_machine_data active, idle;
136 preempt_enable_no_resched(); 107 struct task_struct **threads;
137} 108
109 active.fn = fn;
110 active.data = data;
111 active.fnret = 0;
112 idle.fn = chill;
113 idle.data = NULL;
114
115 /* This could be too big for stack on large machines. */
116 threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
117 if (!threads)
118 return -ENOMEM;
119
120 /* Set up initial state. */
121 mutex_lock(&lock);
122 init_completion(&finished);
123 num_threads = num_online_cpus();
124 set_state(STOPMACHINE_PREPARE);
138 125
139struct stop_machine_data { 126 for_each_online_cpu(i) {
140 int (*fn)(void *); 127 struct stop_machine_data *smdata = &idle;
141 void *data; 128 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
142 struct completion done;
143};
144 129
145static int do_stop(void *_smdata) 130 if (!cpus) {
146{ 131 if (i == first_cpu(cpu_online_map))
147 struct stop_machine_data *smdata = _smdata; 132 smdata = &active;
148 int ret; 133 } else {
134 if (cpu_isset(i, *cpus))
135 smdata = &active;
136 }
149 137
150 ret = stop_machine(); 138 threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
151 if (ret == 0) { 139 i);
152 ret = smdata->fn(smdata->data); 140 if (IS_ERR(threads[i])) {
153 restart_machine(); 141 err = PTR_ERR(threads[i]);
154 } 142 threads[i] = NULL;
143 goto kill_threads;
144 }
155 145
156 /* We're done: you can kthread_stop us now */ 146 /* Place it onto correct cpu. */
157 complete(&smdata->done); 147 kthread_bind(threads[i], i);
158 148
159 /* Wait for kthread_stop */ 149 /* Make it highest prio. */
160 set_current_state(TASK_INTERRUPTIBLE); 150 if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
161 while (!kthread_should_stop()) { 151 BUG();
162 schedule();
163 set_current_state(TASK_INTERRUPTIBLE);
164 } 152 }
165 __set_current_state(TASK_RUNNING);
166 return ret;
167}
168 153
169struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, 154 /* We've created all the threads. Wake them all: hold this CPU so one
170 unsigned int cpu) 155 * doesn't hit this CPU until we're ready. */
171{ 156 get_cpu();
172 static DEFINE_MUTEX(stopmachine_mutex); 157 for_each_online_cpu(i)
173 struct stop_machine_data smdata; 158 wake_up_process(threads[i]);
174 struct task_struct *p;
175 159
176 smdata.fn = fn; 160 /* This will release the thread on our CPU. */
177 smdata.data = data; 161 put_cpu();
178 init_completion(&smdata.done); 162 wait_for_completion(&finished);
163 mutex_unlock(&lock);
179 164
180 mutex_lock(&stopmachine_mutex); 165 kfree(threads);
181 166
182 /* If they don't care which CPU fn runs on, bind to any online one. */ 167 return active.fnret;
183 if (cpu == NR_CPUS)
184 cpu = raw_smp_processor_id();
185 168
186 p = kthread_create(do_stop, &smdata, "kstopmachine"); 169kill_threads:
187 if (!IS_ERR(p)) { 170 for_each_online_cpu(i)
188 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 171 if (threads[i])
172 kthread_stop(threads[i]);
173 mutex_unlock(&lock);
189 174
190 /* One high-prio thread per cpu. We'll do this one. */ 175 kfree(threads);
191 sched_setscheduler_nocheck(p, SCHED_FIFO, &param); 176 return err;
192 kthread_bind(p, cpu);
193 wake_up_process(p);
194 wait_for_completion(&smdata.done);
195 }
196 mutex_unlock(&stopmachine_mutex);
197 return p;
198} 177}
199 178
200int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) 179int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
201{ 180{
202 struct task_struct *p;
203 int ret; 181 int ret;
204 182
205 /* No CPUs can come up or down during this. */ 183 /* No CPUs can come up or down during this. */
206 get_online_cpus(); 184 get_online_cpus();
207 p = __stop_machine_run(fn, data, cpu); 185 ret = __stop_machine(fn, data, cpus);
208 if (!IS_ERR(p))
209 ret = kthread_stop(p);
210 else
211 ret = PTR_ERR(p);
212 put_online_cpus(); 186 put_online_cpus();
213 187
214 return ret; 188 return ret;
215} 189}
216EXPORT_SYMBOL_GPL(stop_machine_run); 190EXPORT_SYMBOL_GPL(stop_machine);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index bf43284d6855..80c4336f4188 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -196,12 +196,10 @@ static int tick_check_new_device(struct clock_event_device *newdev)
196 struct tick_device *td; 196 struct tick_device *td;
197 int cpu, ret = NOTIFY_OK; 197 int cpu, ret = NOTIFY_OK;
198 unsigned long flags; 198 unsigned long flags;
199 cpumask_of_cpu_ptr_declare(cpumask);
200 199
201 spin_lock_irqsave(&tick_device_lock, flags); 200 spin_lock_irqsave(&tick_device_lock, flags);
202 201
203 cpu = smp_processor_id(); 202 cpu = smp_processor_id();
204 cpumask_of_cpu_ptr_next(cpumask, cpu);
205 if (!cpu_isset(cpu, newdev->cpumask)) 203 if (!cpu_isset(cpu, newdev->cpumask))
206 goto out_bc; 204 goto out_bc;
207 205
@@ -209,7 +207,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
209 curdev = td->evtdev; 207 curdev = td->evtdev;
210 208
211 /* cpu local device ? */ 209 /* cpu local device ? */
212 if (!cpus_equal(newdev->cpumask, *cpumask)) { 210 if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) {
213 211
214 /* 212 /*
215 * If the cpu affinity of the device interrupt can not 213 * If the cpu affinity of the device interrupt can not
@@ -222,7 +220,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
222 * If we have a cpu local device already, do not replace it 220 * If we have a cpu local device already, do not replace it
223 * by a non cpu local device 221 * by a non cpu local device
224 */ 222 */
225 if (curdev && cpus_equal(curdev->cpumask, *cpumask)) 223 if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu)))
226 goto out_bc; 224 goto out_bc;
227 } 225 }
228 226
@@ -254,7 +252,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
254 curdev = NULL; 252 curdev = NULL;
255 } 253 }
256 clockevents_exchange_device(curdev, newdev); 254 clockevents_exchange_device(curdev, newdev);
257 tick_setup_device(td, newdev, cpu, cpumask); 255 tick_setup_device(td, newdev, cpu, &cpumask_of_cpu(cpu));
258 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) 256 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
259 tick_oneshot_notify(); 257 tick_oneshot_notify();
260 258
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4231a3dc224a..f6e3af31b403 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -587,7 +587,7 @@ static int __ftrace_modify_code(void *data)
587 587
588static void ftrace_run_update_code(int command) 588static void ftrace_run_update_code(int command)
589{ 589{
590 stop_machine_run(__ftrace_modify_code, &command, NR_CPUS); 590 stop_machine(__ftrace_modify_code, &command, NULL);
591} 591}
592 592
593void ftrace_disable_daemon(void) 593void ftrace_disable_daemon(void)
@@ -787,7 +787,7 @@ static int ftrace_update_code(void)
787 !ftrace_enabled || !ftraced_trigger) 787 !ftrace_enabled || !ftraced_trigger)
788 return 0; 788 return 0;
789 789
790 stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); 790 stop_machine(__ftrace_update_code, NULL, NULL);
791 791
792 return 1; 792 return 1;
793} 793}
@@ -1564,7 +1564,7 @@ static int __init ftrace_dynamic_init(void)
1564 1564
1565 addr = (unsigned long)ftrace_record_ip; 1565 addr = (unsigned long)ftrace_record_ip;
1566 1566
1567 stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS); 1567 stop_machine(ftrace_dyn_arch_init, &addr, NULL);
1568 1568
1569 /* ftrace_dyn_arch_init places the return code in addr */ 1569 /* ftrace_dyn_arch_init places the return code in addr */
1570 if (addr) { 1570 if (addr) {
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index ce2d723c10e1..bb948e52ce20 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -213,9 +213,7 @@ static void start_stack_timers(void)
213 int cpu; 213 int cpu;
214 214
215 for_each_online_cpu(cpu) { 215 for_each_online_cpu(cpu) {
216 cpumask_of_cpu_ptr(new_mask, cpu); 216 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
217
218 set_cpus_allowed_ptr(current, new_mask);
219 start_stack_timer(cpu); 217 start_stack_timer(cpu);
220 } 218 }
221 set_cpus_allowed_ptr(current, &saved_mask); 219 set_cpus_allowed_ptr(current, &saved_mask);