aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c11
-rw-r--r--kernel/cpuset.c4
-rw-r--r--kernel/hrtimer.c1
-rw-r--r--kernel/irq/autoprobe.c15
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/handle.c48
-rw-r--r--kernel/irq/manage.c22
-rw-r--r--kernel/irq/migration.c14
-rw-r--r--kernel/irq/numa_migrate.c7
-rw-r--r--kernel/irq/proc.c29
-rw-r--r--kernel/irq/spurious.c5
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/rcuclassic.c2
-rw-r--r--kernel/sched.c970
-rw-r--r--kernel/sched_cpupri.c39
-rw-r--r--kernel/sched_cpupri.h5
-rw-r--r--kernel/sched_fair.c32
-rw-r--r--kernel/sched_rt.c73
-rw-r--r--kernel/sched_stats.h3
-rw-r--r--kernel/softirq.c20
-rw-r--r--kernel/taskstats.c2
-rw-r--r--kernel/time/clockevents.c2
-rw-r--r--kernel/time/tick-broadcast.c2
-rw-r--r--kernel/time/tick-common.c12
-rw-r--r--kernel/time/tick-sched.c10
-rw-r--r--kernel/trace/trace.c4
26 files changed, 776 insertions, 562 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8ea32e8d68b0..bae131a1211b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -24,19 +24,20 @@
24cpumask_t cpu_present_map __read_mostly; 24cpumask_t cpu_present_map __read_mostly;
25EXPORT_SYMBOL(cpu_present_map); 25EXPORT_SYMBOL(cpu_present_map);
26 26
27#ifndef CONFIG_SMP
28
29/* 27/*
30 * Represents all cpu's that are currently online. 28 * Represents all cpu's that are currently online.
31 */ 29 */
32cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; 30cpumask_t cpu_online_map __read_mostly;
33EXPORT_SYMBOL(cpu_online_map); 31EXPORT_SYMBOL(cpu_online_map);
34 32
33#ifdef CONFIG_INIT_ALL_POSSIBLE
35cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; 34cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
35#else
36cpumask_t cpu_possible_map __read_mostly;
37#endif
36EXPORT_SYMBOL(cpu_possible_map); 38EXPORT_SYMBOL(cpu_possible_map);
37 39
38#else /* CONFIG_SMP */ 40#ifdef CONFIG_SMP
39
40/* Serializes the updates to cpu_online_map, cpu_present_map */ 41/* Serializes the updates to cpu_online_map, cpu_present_map */
41static DEFINE_MUTEX(cpu_add_remove_lock); 42static DEFINE_MUTEX(cpu_add_remove_lock);
42 43
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 96c0ba13b8cd..39c1a4c1c5a9 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -896,7 +896,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
896 if (!*buf) { 896 if (!*buf) {
897 cpus_clear(trialcs.cpus_allowed); 897 cpus_clear(trialcs.cpus_allowed);
898 } else { 898 } else {
899 retval = cpulist_parse(buf, trialcs.cpus_allowed); 899 retval = cpulist_parse(buf, &trialcs.cpus_allowed);
900 if (retval < 0) 900 if (retval < 0)
901 return retval; 901 return retval;
902 902
@@ -1482,7 +1482,7 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1482 mask = cs->cpus_allowed; 1482 mask = cs->cpus_allowed;
1483 mutex_unlock(&callback_mutex); 1483 mutex_unlock(&callback_mutex);
1484 1484
1485 return cpulist_scnprintf(page, PAGE_SIZE, mask); 1485 return cpulist_scnprintf(page, PAGE_SIZE, &mask);
1486} 1486}
1487 1487
1488static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1488static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index bda9cb924276..eb2bfefa6dcc 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -32,7 +32,6 @@
32 */ 32 */
33 33
34#include <linux/cpu.h> 34#include <linux/cpu.h>
35#include <linux/irq.h>
36#include <linux/module.h> 35#include <linux/module.h>
37#include <linux/percpu.h> 36#include <linux/percpu.h>
38#include <linux/hrtimer.h> 37#include <linux/hrtimer.h>
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 650ce4102a63..cc0f7321b8ce 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -40,9 +40,6 @@ unsigned long probe_irq_on(void)
40 * flush such a longstanding irq before considering it as spurious. 40 * flush such a longstanding irq before considering it as spurious.
41 */ 41 */
42 for_each_irq_desc_reverse(i, desc) { 42 for_each_irq_desc_reverse(i, desc) {
43 if (!desc)
44 continue;
45
46 spin_lock_irq(&desc->lock); 43 spin_lock_irq(&desc->lock);
47 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 44 if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
48 /* 45 /*
@@ -71,9 +68,6 @@ unsigned long probe_irq_on(void)
71 * happened in the previous stage, it may have masked itself) 68 * happened in the previous stage, it may have masked itself)
72 */ 69 */
73 for_each_irq_desc_reverse(i, desc) { 70 for_each_irq_desc_reverse(i, desc) {
74 if (!desc)
75 continue;
76
77 spin_lock_irq(&desc->lock); 71 spin_lock_irq(&desc->lock);
78 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 72 if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
79 desc->status |= IRQ_AUTODETECT | IRQ_WAITING; 73 desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -92,9 +86,6 @@ unsigned long probe_irq_on(void)
92 * Now filter out any obviously spurious interrupts 86 * Now filter out any obviously spurious interrupts
93 */ 87 */
94 for_each_irq_desc(i, desc) { 88 for_each_irq_desc(i, desc) {
95 if (!desc)
96 continue;
97
98 spin_lock_irq(&desc->lock); 89 spin_lock_irq(&desc->lock);
99 status = desc->status; 90 status = desc->status;
100 91
@@ -133,9 +124,6 @@ unsigned int probe_irq_mask(unsigned long val)
133 int i; 124 int i;
134 125
135 for_each_irq_desc(i, desc) { 126 for_each_irq_desc(i, desc) {
136 if (!desc)
137 continue;
138
139 spin_lock_irq(&desc->lock); 127 spin_lock_irq(&desc->lock);
140 status = desc->status; 128 status = desc->status;
141 129
@@ -178,9 +166,6 @@ int probe_irq_off(unsigned long val)
178 unsigned int status; 166 unsigned int status;
179 167
180 for_each_irq_desc(i, desc) { 168 for_each_irq_desc(i, desc) {
181 if (!desc)
182 continue;
183
184 spin_lock_irq(&desc->lock); 169 spin_lock_irq(&desc->lock);
185 status = desc->status; 170 status = desc->status;
186 171
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6eb3c7952b64..f63c706d25e1 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -46,7 +46,7 @@ void dynamic_irq_init(unsigned int irq)
46 desc->irq_count = 0; 46 desc->irq_count = 0;
47 desc->irqs_unhandled = 0; 47 desc->irqs_unhandled = 0;
48#ifdef CONFIG_SMP 48#ifdef CONFIG_SMP
49 cpus_setall(desc->affinity); 49 cpumask_setall(&desc->affinity);
50#endif 50#endif
51 spin_unlock_irqrestore(&desc->lock, flags); 51 spin_unlock_irqrestore(&desc->lock, flags);
52} 52}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6492400cb50d..c20db0be9173 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -56,10 +56,6 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
56int nr_irqs = NR_IRQS; 56int nr_irqs = NR_IRQS;
57EXPORT_SYMBOL_GPL(nr_irqs); 57EXPORT_SYMBOL_GPL(nr_irqs);
58 58
59void __init __attribute__((weak)) arch_early_irq_init(void)
60{
61}
62
63#ifdef CONFIG_SPARSE_IRQ 59#ifdef CONFIG_SPARSE_IRQ
64static struct irq_desc irq_desc_init = { 60static struct irq_desc irq_desc_init = {
65 .irq = -1, 61 .irq = -1,
@@ -90,13 +86,11 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
90 desc->kstat_irqs = (unsigned int *)ptr; 86 desc->kstat_irqs = (unsigned int *)ptr;
91} 87}
92 88
93void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
94{
95}
96
97static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) 89static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
98{ 90{
99 memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); 91 memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
92
93 spin_lock_init(&desc->lock);
100 desc->irq = irq; 94 desc->irq = irq;
101#ifdef CONFIG_SMP 95#ifdef CONFIG_SMP
102 desc->cpu = cpu; 96 desc->cpu = cpu;
@@ -134,7 +128,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
134/* FIXME: use bootmem alloc ...*/ 128/* FIXME: use bootmem alloc ...*/
135static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS]; 129static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
136 130
137void __init early_irq_init(void) 131int __init early_irq_init(void)
138{ 132{
139 struct irq_desc *desc; 133 struct irq_desc *desc;
140 int legacy_count; 134 int legacy_count;
@@ -146,6 +140,7 @@ void __init early_irq_init(void)
146 for (i = 0; i < legacy_count; i++) { 140 for (i = 0; i < legacy_count; i++) {
147 desc[i].irq = i; 141 desc[i].irq = i;
148 desc[i].kstat_irqs = kstat_irqs_legacy[i]; 142 desc[i].kstat_irqs = kstat_irqs_legacy[i];
143 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
149 144
150 irq_desc_ptrs[i] = desc + i; 145 irq_desc_ptrs[i] = desc + i;
151 } 146 }
@@ -153,7 +148,7 @@ void __init early_irq_init(void)
153 for (i = legacy_count; i < NR_IRQS; i++) 148 for (i = legacy_count; i < NR_IRQS; i++)
154 irq_desc_ptrs[i] = NULL; 149 irq_desc_ptrs[i] = NULL;
155 150
156 arch_early_irq_init(); 151 return arch_early_irq_init();
157} 152}
158 153
159struct irq_desc *irq_to_desc(unsigned int irq) 154struct irq_desc *irq_to_desc(unsigned int irq)
@@ -203,7 +198,7 @@ out_unlock:
203 return desc; 198 return desc;
204} 199}
205 200
206#else 201#else /* !CONFIG_SPARSE_IRQ */
207 202
208struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { 203struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
209 [0 ... NR_IRQS-1] = { 204 [0 ... NR_IRQS-1] = {
@@ -218,7 +213,31 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
218 } 213 }
219}; 214};
220 215
221#endif 216int __init early_irq_init(void)
217{
218 struct irq_desc *desc;
219 int count;
220 int i;
221
222 desc = irq_desc;
223 count = ARRAY_SIZE(irq_desc);
224
225 for (i = 0; i < count; i++)
226 desc[i].irq = i;
227
228 return arch_early_irq_init();
229}
230
231struct irq_desc *irq_to_desc(unsigned int irq)
232{
233 return (irq < NR_IRQS) ? irq_desc + irq : NULL;
234}
235
236struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
237{
238 return irq_to_desc(irq);
239}
240#endif /* !CONFIG_SPARSE_IRQ */
222 241
223/* 242/*
224 * What should we do if we get a hw irq event on an illegal vector? 243 * What should we do if we get a hw irq event on an illegal vector?
@@ -428,9 +447,6 @@ void early_init_irq_lock_class(void)
428 int i; 447 int i;
429 448
430 for_each_irq_desc(i, desc) { 449 for_each_irq_desc(i, desc) {
431 if (!desc)
432 continue;
433
434 lockdep_set_class(&desc->lock, &irq_desc_lock_class); 450 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
435 } 451 }
436} 452}
@@ -439,7 +455,7 @@ void early_init_irq_lock_class(void)
439unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) 455unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
440{ 456{
441 struct irq_desc *desc = irq_to_desc(irq); 457 struct irq_desc *desc = irq_to_desc(irq);
442 return desc->kstat_irqs[cpu]; 458 return desc ? desc->kstat_irqs[cpu] : 0;
443} 459}
444#endif 460#endif
445EXPORT_SYMBOL(kstat_irqs_cpu); 461EXPORT_SYMBOL(kstat_irqs_cpu);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 540f6c49f3fa..61c4a9b62165 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -79,7 +79,7 @@ int irq_can_set_affinity(unsigned int irq)
79 * @cpumask: cpumask 79 * @cpumask: cpumask
80 * 80 *
81 */ 81 */
82int irq_set_affinity(unsigned int irq, cpumask_t cpumask) 82int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
83{ 83{
84 struct irq_desc *desc = irq_to_desc(irq); 84 struct irq_desc *desc = irq_to_desc(irq);
85 unsigned long flags; 85 unsigned long flags;
@@ -91,14 +91,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
91 91
92#ifdef CONFIG_GENERIC_PENDING_IRQ 92#ifdef CONFIG_GENERIC_PENDING_IRQ
93 if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { 93 if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
94 desc->affinity = cpumask; 94 cpumask_copy(&desc->affinity, cpumask);
95 desc->chip->set_affinity(irq, cpumask); 95 desc->chip->set_affinity(irq, cpumask);
96 } else { 96 } else {
97 desc->status |= IRQ_MOVE_PENDING; 97 desc->status |= IRQ_MOVE_PENDING;
98 desc->pending_mask = cpumask; 98 cpumask_copy(&desc->pending_mask, cpumask);
99 } 99 }
100#else 100#else
101 desc->affinity = cpumask; 101 cpumask_copy(&desc->affinity, cpumask);
102 desc->chip->set_affinity(irq, cpumask); 102 desc->chip->set_affinity(irq, cpumask);
103#endif 103#endif
104 desc->status |= IRQ_AFFINITY_SET; 104 desc->status |= IRQ_AFFINITY_SET;
@@ -112,26 +112,24 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
112 */ 112 */
113int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) 113int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
114{ 114{
115 cpumask_t mask;
116
117 if (!irq_can_set_affinity(irq)) 115 if (!irq_can_set_affinity(irq))
118 return 0; 116 return 0;
119 117
120 cpus_and(mask, cpu_online_map, irq_default_affinity);
121
122 /* 118 /*
123 * Preserve an userspace affinity setup, but make sure that 119 * Preserve an userspace affinity setup, but make sure that
124 * one of the targets is online. 120 * one of the targets is online.
125 */ 121 */
126 if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { 122 if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
127 if (cpus_intersects(desc->affinity, cpu_online_map)) 123 if (cpumask_any_and(&desc->affinity, cpu_online_mask)
128 mask = desc->affinity; 124 < nr_cpu_ids)
125 goto set_affinity;
129 else 126 else
130 desc->status &= ~IRQ_AFFINITY_SET; 127 desc->status &= ~IRQ_AFFINITY_SET;
131 } 128 }
132 129
133 desc->affinity = mask; 130 cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
134 desc->chip->set_affinity(irq, mask); 131set_affinity:
132 desc->chip->set_affinity(irq, &desc->affinity);
135 133
136 return 0; 134 return 0;
137} 135}
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 9db681d95814..bd72329e630c 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,7 +4,6 @@
4void move_masked_irq(int irq) 4void move_masked_irq(int irq)
5{ 5{
6 struct irq_desc *desc = irq_to_desc(irq); 6 struct irq_desc *desc = irq_to_desc(irq);
7 cpumask_t tmp;
8 7
9 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 8 if (likely(!(desc->status & IRQ_MOVE_PENDING)))
10 return; 9 return;
@@ -19,7 +18,7 @@ void move_masked_irq(int irq)
19 18
20 desc->status &= ~IRQ_MOVE_PENDING; 19 desc->status &= ~IRQ_MOVE_PENDING;
21 20
22 if (unlikely(cpus_empty(desc->pending_mask))) 21 if (unlikely(cpumask_empty(&desc->pending_mask)))
23 return; 22 return;
24 23
25 if (!desc->chip->set_affinity) 24 if (!desc->chip->set_affinity)
@@ -27,8 +26,6 @@ void move_masked_irq(int irq)
27 26
28 assert_spin_locked(&desc->lock); 27 assert_spin_locked(&desc->lock);
29 28
30 cpus_and(tmp, desc->pending_mask, cpu_online_map);
31
32 /* 29 /*
33 * If there was a valid mask to work with, please 30 * If there was a valid mask to work with, please
34 * do the disable, re-program, enable sequence. 31 * do the disable, re-program, enable sequence.
@@ -41,10 +38,13 @@ void move_masked_irq(int irq)
41 * For correct operation this depends on the caller 38 * For correct operation this depends on the caller
42 * masking the irqs. 39 * masking the irqs.
43 */ 40 */
44 if (likely(!cpus_empty(tmp))) { 41 if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
45 desc->chip->set_affinity(irq,tmp); 42 < nr_cpu_ids)) {
43 cpumask_and(&desc->affinity,
44 &desc->pending_mask, cpu_online_mask);
45 desc->chip->set_affinity(irq, &desc->affinity);
46 } 46 }
47 cpus_clear(desc->pending_mask); 47 cpumask_clear(&desc->pending_mask);
48} 48}
49 49
50void move_native_irq(int irq) 50void move_native_irq(int irq)
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 089c3746358a..ecf765c6a77a 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -42,6 +42,7 @@ static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
42 struct irq_desc *desc, int cpu) 42 struct irq_desc *desc, int cpu)
43{ 43{
44 memcpy(desc, old_desc, sizeof(struct irq_desc)); 44 memcpy(desc, old_desc, sizeof(struct irq_desc));
45 spin_lock_init(&desc->lock);
45 desc->cpu = cpu; 46 desc->cpu = cpu;
46 lockdep_set_class(&desc->lock, &irq_desc_lock_class); 47 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
47 init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); 48 init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
@@ -74,10 +75,8 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
74 75
75 node = cpu_to_node(cpu); 76 node = cpu_to_node(cpu);
76 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); 77 desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
77 printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n",
78 irq, cpu, node);
79 if (!desc) { 78 if (!desc) {
80 printk(KERN_ERR "can not get new irq_desc for moving\n"); 79 printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq);
81 /* still use old one */ 80 /* still use old one */
82 desc = old_desc; 81 desc = old_desc;
83 goto out_unlock; 82 goto out_unlock;
@@ -106,8 +105,6 @@ struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
106 return desc; 105 return desc;
107 106
108 old_cpu = desc->cpu; 107 old_cpu = desc->cpu;
109 printk(KERN_DEBUG
110 "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
111 if (old_cpu != cpu) { 108 if (old_cpu != cpu) {
112 node = cpu_to_node(cpu); 109 node = cpu_to_node(cpu);
113 old_node = cpu_to_node(old_cpu); 110 old_node = cpu_to_node(old_cpu);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index f6b3440f05bc..d2c0e5ee53c5 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -40,33 +40,42 @@ static ssize_t irq_affinity_proc_write(struct file *file,
40 const char __user *buffer, size_t count, loff_t *pos) 40 const char __user *buffer, size_t count, loff_t *pos)
41{ 41{
42 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; 42 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
43 cpumask_t new_value; 43 cpumask_var_t new_value;
44 int err; 44 int err;
45 45
46 if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity || 46 if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
47 irq_balancing_disabled(irq)) 47 irq_balancing_disabled(irq))
48 return -EIO; 48 return -EIO;
49 49
50 if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
51 return -ENOMEM;
52
50 err = cpumask_parse_user(buffer, count, new_value); 53 err = cpumask_parse_user(buffer, count, new_value);
51 if (err) 54 if (err)
52 return err; 55 goto free_cpumask;
53 56
54 if (!is_affinity_mask_valid(new_value)) 57 if (!is_affinity_mask_valid(*new_value)) {
55 return -EINVAL; 58 err = -EINVAL;
59 goto free_cpumask;
60 }
56 61
57 /* 62 /*
58 * Do not allow disabling IRQs completely - it's a too easy 63 * Do not allow disabling IRQs completely - it's a too easy
59 * way to make the system unusable accidentally :-) At least 64 * way to make the system unusable accidentally :-) At least
60 * one online CPU still has to be targeted. 65 * one online CPU still has to be targeted.
61 */ 66 */
62 if (!cpus_intersects(new_value, cpu_online_map)) 67 if (!cpumask_intersects(new_value, cpu_online_mask)) {
63 /* Special case for empty set - allow the architecture 68 /* Special case for empty set - allow the architecture
64 code to set default SMP affinity. */ 69 code to set default SMP affinity. */
65 return irq_select_affinity_usr(irq) ? -EINVAL : count; 70 err = irq_select_affinity_usr(irq) ? -EINVAL : count;
66 71 } else {
67 irq_set_affinity(irq, new_value); 72 irq_set_affinity(irq, new_value);
73 err = count;
74 }
68 75
69 return count; 76free_cpumask:
77 free_cpumask_var(new_value);
78 return err;
70} 79}
71 80
72static int irq_affinity_proc_open(struct inode *inode, struct file *file) 81static int irq_affinity_proc_open(struct inode *inode, struct file *file)
@@ -95,7 +104,7 @@ static ssize_t default_affinity_write(struct file *file,
95 cpumask_t new_value; 104 cpumask_t new_value;
96 int err; 105 int err;
97 106
98 err = cpumask_parse_user(buffer, count, new_value); 107 err = cpumask_parse_user(buffer, count, &new_value);
99 if (err) 108 if (err)
100 return err; 109 return err;
101 110
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3738107531fd..dd364c11e56e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -91,9 +91,6 @@ static int misrouted_irq(int irq)
91 int i, ok = 0; 91 int i, ok = 0;
92 92
93 for_each_irq_desc(i, desc) { 93 for_each_irq_desc(i, desc) {
94 if (!desc)
95 continue;
96
97 if (!i) 94 if (!i)
98 continue; 95 continue;
99 96
@@ -115,8 +112,6 @@ static void poll_spurious_irqs(unsigned long dummy)
115 for_each_irq_desc(i, desc) { 112 for_each_irq_desc(i, desc) {
116 unsigned int status; 113 unsigned int status;
117 114
118 if (!desc)
119 continue;
120 if (!i) 115 if (!i)
121 continue; 116 continue;
122 117
diff --git a/kernel/profile.c b/kernel/profile.c
index 60adefb59b5e..4cb7d68fed82 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -442,7 +442,7 @@ void profile_tick(int type)
442static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, 442static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
443 int count, int *eof, void *data) 443 int count, int *eof, void *data)
444{ 444{
445 int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); 445 int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
446 if (count - len < 2) 446 if (count - len < 2)
447 return -EINVAL; 447 return -EINVAL;
448 len += sprintf(page + len, "\n"); 448 len += sprintf(page + len, "\n");
@@ -456,7 +456,7 @@ static int prof_cpu_mask_write_proc(struct file *file,
456 unsigned long full_count = count, err; 456 unsigned long full_count = count, err;
457 cpumask_t new_value; 457 cpumask_t new_value;
458 458
459 err = cpumask_parse_user(buffer, count, new_value); 459 err = cpumask_parse_user(buffer, count, &new_value);
460 if (err) 460 if (err)
461 return err; 461 return err;
462 462
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index e503a002f330..c03ca3e61919 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -393,7 +393,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
393 * unnecessarily. 393 * unnecessarily.
394 */ 394 */
395 smp_mb(); 395 smp_mb();
396 cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); 396 cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask);
397 397
398 rcp->signaled = 0; 398 rcp->signaled = 0;
399 } 399 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 635eaffe1e4c..930bf2e6d714 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -498,18 +498,26 @@ struct rt_rq {
498 */ 498 */
499struct root_domain { 499struct root_domain {
500 atomic_t refcount; 500 atomic_t refcount;
501 cpumask_t span; 501 cpumask_var_t span;
502 cpumask_t online; 502 cpumask_var_t online;
503 503
504 /* 504 /*
505 * The "RT overload" flag: it gets set if a CPU has more than 505 * The "RT overload" flag: it gets set if a CPU has more than
506 * one runnable RT task. 506 * one runnable RT task.
507 */ 507 */
508 cpumask_t rto_mask; 508 cpumask_var_t rto_mask;
509 atomic_t rto_count; 509 atomic_t rto_count;
510#ifdef CONFIG_SMP 510#ifdef CONFIG_SMP
511 struct cpupri cpupri; 511 struct cpupri cpupri;
512#endif 512#endif
513#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
514 /*
515 * Preferred wake up cpu nominated by sched_mc balance that will be
516 * used when most cpus are idle in the system indicating overall very
517 * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
518 */
519 unsigned int sched_mc_preferred_wakeup_cpu;
520#endif
513}; 521};
514 522
515/* 523/*
@@ -1514,7 +1522,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1514 struct sched_domain *sd = data; 1522 struct sched_domain *sd = data;
1515 int i; 1523 int i;
1516 1524
1517 for_each_cpu_mask(i, sd->span) { 1525 for_each_cpu(i, sched_domain_span(sd)) {
1518 /* 1526 /*
1519 * If there are currently no tasks on the cpu pretend there 1527 * If there are currently no tasks on the cpu pretend there
1520 * is one of average load so that when a new task gets to 1528 * is one of average load so that when a new task gets to
@@ -1535,7 +1543,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1535 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) 1543 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
1536 shares = tg->shares; 1544 shares = tg->shares;
1537 1545
1538 for_each_cpu_mask(i, sd->span) 1546 for_each_cpu(i, sched_domain_span(sd))
1539 update_group_shares_cpu(tg, i, shares, rq_weight); 1547 update_group_shares_cpu(tg, i, shares, rq_weight);
1540 1548
1541 return 0; 1549 return 0;
@@ -2101,15 +2109,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
2101 int i; 2109 int i;
2102 2110
2103 /* Skip over this group if it has no CPUs allowed */ 2111 /* Skip over this group if it has no CPUs allowed */
2104 if (!cpus_intersects(group->cpumask, p->cpus_allowed)) 2112 if (!cpumask_intersects(sched_group_cpus(group),
2113 &p->cpus_allowed))
2105 continue; 2114 continue;
2106 2115
2107 local_group = cpu_isset(this_cpu, group->cpumask); 2116 local_group = cpumask_test_cpu(this_cpu,
2117 sched_group_cpus(group));
2108 2118
2109 /* Tally up the load of all CPUs in the group */ 2119 /* Tally up the load of all CPUs in the group */
2110 avg_load = 0; 2120 avg_load = 0;
2111 2121
2112 for_each_cpu_mask_nr(i, group->cpumask) { 2122 for_each_cpu(i, sched_group_cpus(group)) {
2113 /* Bias balancing toward cpus of our domain */ 2123 /* Bias balancing toward cpus of our domain */
2114 if (local_group) 2124 if (local_group)
2115 load = source_load(i, load_idx); 2125 load = source_load(i, load_idx);
@@ -2141,17 +2151,14 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
2141 * find_idlest_cpu - find the idlest cpu among the cpus in group. 2151 * find_idlest_cpu - find the idlest cpu among the cpus in group.
2142 */ 2152 */
2143static int 2153static int
2144find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, 2154find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
2145 cpumask_t *tmp)
2146{ 2155{
2147 unsigned long load, min_load = ULONG_MAX; 2156 unsigned long load, min_load = ULONG_MAX;
2148 int idlest = -1; 2157 int idlest = -1;
2149 int i; 2158 int i;
2150 2159
2151 /* Traverse only the allowed CPUs */ 2160 /* Traverse only the allowed CPUs */
2152 cpus_and(*tmp, group->cpumask, p->cpus_allowed); 2161 for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
2153
2154 for_each_cpu_mask_nr(i, *tmp) {
2155 load = weighted_cpuload(i); 2162 load = weighted_cpuload(i);
2156 2163
2157 if (load < min_load || (load == min_load && i == this_cpu)) { 2164 if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2193,7 +2200,6 @@ static int sched_balance_self(int cpu, int flag)
2193 update_shares(sd); 2200 update_shares(sd);
2194 2201
2195 while (sd) { 2202 while (sd) {
2196 cpumask_t span, tmpmask;
2197 struct sched_group *group; 2203 struct sched_group *group;
2198 int new_cpu, weight; 2204 int new_cpu, weight;
2199 2205
@@ -2202,14 +2208,13 @@ static int sched_balance_self(int cpu, int flag)
2202 continue; 2208 continue;
2203 } 2209 }
2204 2210
2205 span = sd->span;
2206 group = find_idlest_group(sd, t, cpu); 2211 group = find_idlest_group(sd, t, cpu);
2207 if (!group) { 2212 if (!group) {
2208 sd = sd->child; 2213 sd = sd->child;
2209 continue; 2214 continue;
2210 } 2215 }
2211 2216
2212 new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask); 2217 new_cpu = find_idlest_cpu(group, t, cpu);
2213 if (new_cpu == -1 || new_cpu == cpu) { 2218 if (new_cpu == -1 || new_cpu == cpu) {
2214 /* Now try balancing at a lower domain level of cpu */ 2219 /* Now try balancing at a lower domain level of cpu */
2215 sd = sd->child; 2220 sd = sd->child;
@@ -2218,10 +2223,10 @@ static int sched_balance_self(int cpu, int flag)
2218 2223
2219 /* Now try balancing at a lower domain level of new_cpu */ 2224 /* Now try balancing at a lower domain level of new_cpu */
2220 cpu = new_cpu; 2225 cpu = new_cpu;
2226 weight = cpumask_weight(sched_domain_span(sd));
2221 sd = NULL; 2227 sd = NULL;
2222 weight = cpus_weight(span);
2223 for_each_domain(cpu, tmp) { 2228 for_each_domain(cpu, tmp) {
2224 if (weight <= cpus_weight(tmp->span)) 2229 if (weight <= cpumask_weight(sched_domain_span(tmp)))
2225 break; 2230 break;
2226 if (tmp->flags & flag) 2231 if (tmp->flags & flag)
2227 sd = tmp; 2232 sd = tmp;
@@ -2266,7 +2271,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2266 cpu = task_cpu(p); 2271 cpu = task_cpu(p);
2267 2272
2268 for_each_domain(this_cpu, sd) { 2273 for_each_domain(this_cpu, sd) {
2269 if (cpu_isset(cpu, sd->span)) { 2274 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2270 update_shares(sd); 2275 update_shares(sd);
2271 break; 2276 break;
2272 } 2277 }
@@ -2315,7 +2320,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2315 else { 2320 else {
2316 struct sched_domain *sd; 2321 struct sched_domain *sd;
2317 for_each_domain(this_cpu, sd) { 2322 for_each_domain(this_cpu, sd) {
2318 if (cpu_isset(cpu, sd->span)) { 2323 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2319 schedstat_inc(sd, ttwu_wake_remote); 2324 schedstat_inc(sd, ttwu_wake_remote);
2320 break; 2325 break;
2321 } 2326 }
@@ -2846,7 +2851,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2846 struct rq *rq; 2851 struct rq *rq;
2847 2852
2848 rq = task_rq_lock(p, &flags); 2853 rq = task_rq_lock(p, &flags);
2849 if (!cpu_isset(dest_cpu, p->cpus_allowed) 2854 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
2850 || unlikely(!cpu_active(dest_cpu))) 2855 || unlikely(!cpu_active(dest_cpu)))
2851 goto out; 2856 goto out;
2852 2857
@@ -2911,7 +2916,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
2911 * 2) cannot be migrated to this CPU due to cpus_allowed, or 2916 * 2) cannot be migrated to this CPU due to cpus_allowed, or
2912 * 3) are cache-hot on their current CPU. 2917 * 3) are cache-hot on their current CPU.
2913 */ 2918 */
2914 if (!cpu_isset(this_cpu, p->cpus_allowed)) { 2919 if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
2915 schedstat_inc(p, se.nr_failed_migrations_affine); 2920 schedstat_inc(p, se.nr_failed_migrations_affine);
2916 return 0; 2921 return 0;
2917 } 2922 }
@@ -3086,7 +3091,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
3086static struct sched_group * 3091static struct sched_group *
3087find_busiest_group(struct sched_domain *sd, int this_cpu, 3092find_busiest_group(struct sched_domain *sd, int this_cpu,
3088 unsigned long *imbalance, enum cpu_idle_type idle, 3093 unsigned long *imbalance, enum cpu_idle_type idle,
3089 int *sd_idle, const cpumask_t *cpus, int *balance) 3094 int *sd_idle, const struct cpumask *cpus, int *balance)
3090{ 3095{
3091 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; 3096 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
3092 unsigned long max_load, avg_load, total_load, this_load, total_pwr; 3097 unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -3122,10 +3127,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3122 unsigned long sum_avg_load_per_task; 3127 unsigned long sum_avg_load_per_task;
3123 unsigned long avg_load_per_task; 3128 unsigned long avg_load_per_task;
3124 3129
3125 local_group = cpu_isset(this_cpu, group->cpumask); 3130 local_group = cpumask_test_cpu(this_cpu,
3131 sched_group_cpus(group));
3126 3132
3127 if (local_group) 3133 if (local_group)
3128 balance_cpu = first_cpu(group->cpumask); 3134 balance_cpu = cpumask_first(sched_group_cpus(group));
3129 3135
3130 /* Tally up the load of all CPUs in the group */ 3136 /* Tally up the load of all CPUs in the group */
3131 sum_weighted_load = sum_nr_running = avg_load = 0; 3137 sum_weighted_load = sum_nr_running = avg_load = 0;
@@ -3134,13 +3140,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3134 max_cpu_load = 0; 3140 max_cpu_load = 0;
3135 min_cpu_load = ~0UL; 3141 min_cpu_load = ~0UL;
3136 3142
3137 for_each_cpu_mask_nr(i, group->cpumask) { 3143 for_each_cpu_and(i, sched_group_cpus(group), cpus) {
3138 struct rq *rq; 3144 struct rq *rq = cpu_rq(i);
3139
3140 if (!cpu_isset(i, *cpus))
3141 continue;
3142
3143 rq = cpu_rq(i);
3144 3145
3145 if (*sd_idle && rq->nr_running) 3146 if (*sd_idle && rq->nr_running)
3146 *sd_idle = 0; 3147 *sd_idle = 0;
@@ -3251,8 +3252,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3251 */ 3252 */
3252 if ((sum_nr_running < min_nr_running) || 3253 if ((sum_nr_running < min_nr_running) ||
3253 (sum_nr_running == min_nr_running && 3254 (sum_nr_running == min_nr_running &&
3254 first_cpu(group->cpumask) < 3255 cpumask_first(sched_group_cpus(group)) >
3255 first_cpu(group_min->cpumask))) { 3256 cpumask_first(sched_group_cpus(group_min)))) {
3256 group_min = group; 3257 group_min = group;
3257 min_nr_running = sum_nr_running; 3258 min_nr_running = sum_nr_running;
3258 min_load_per_task = sum_weighted_load / 3259 min_load_per_task = sum_weighted_load /
@@ -3267,8 +3268,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3267 if (sum_nr_running <= group_capacity - 1) { 3268 if (sum_nr_running <= group_capacity - 1) {
3268 if (sum_nr_running > leader_nr_running || 3269 if (sum_nr_running > leader_nr_running ||
3269 (sum_nr_running == leader_nr_running && 3270 (sum_nr_running == leader_nr_running &&
3270 first_cpu(group->cpumask) > 3271 cpumask_first(sched_group_cpus(group)) <
3271 first_cpu(group_leader->cpumask))) { 3272 cpumask_first(sched_group_cpus(group_leader)))) {
3272 group_leader = group; 3273 group_leader = group;
3273 leader_nr_running = sum_nr_running; 3274 leader_nr_running = sum_nr_running;
3274 } 3275 }
@@ -3394,6 +3395,10 @@ out_balanced:
3394 3395
3395 if (this == group_leader && group_leader != group_min) { 3396 if (this == group_leader && group_leader != group_min) {
3396 *imbalance = min_load_per_task; 3397 *imbalance = min_load_per_task;
3398 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
3399 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
3400 cpumask_first(sched_group_cpus(group_leader));
3401 }
3397 return group_min; 3402 return group_min;
3398 } 3403 }
3399#endif 3404#endif
@@ -3407,16 +3412,16 @@ ret:
3407 */ 3412 */
3408static struct rq * 3413static struct rq *
3409find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, 3414find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3410 unsigned long imbalance, const cpumask_t *cpus) 3415 unsigned long imbalance, const struct cpumask *cpus)
3411{ 3416{
3412 struct rq *busiest = NULL, *rq; 3417 struct rq *busiest = NULL, *rq;
3413 unsigned long max_load = 0; 3418 unsigned long max_load = 0;
3414 int i; 3419 int i;
3415 3420
3416 for_each_cpu_mask_nr(i, group->cpumask) { 3421 for_each_cpu(i, sched_group_cpus(group)) {
3417 unsigned long wl; 3422 unsigned long wl;
3418 3423
3419 if (!cpu_isset(i, *cpus)) 3424 if (!cpumask_test_cpu(i, cpus))
3420 continue; 3425 continue;
3421 3426
3422 rq = cpu_rq(i); 3427 rq = cpu_rq(i);
@@ -3446,7 +3451,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3446 */ 3451 */
3447static int load_balance(int this_cpu, struct rq *this_rq, 3452static int load_balance(int this_cpu, struct rq *this_rq,
3448 struct sched_domain *sd, enum cpu_idle_type idle, 3453 struct sched_domain *sd, enum cpu_idle_type idle,
3449 int *balance, cpumask_t *cpus) 3454 int *balance, struct cpumask *cpus)
3450{ 3455{
3451 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3456 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
3452 struct sched_group *group; 3457 struct sched_group *group;
@@ -3454,7 +3459,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3454 struct rq *busiest; 3459 struct rq *busiest;
3455 unsigned long flags; 3460 unsigned long flags;
3456 3461
3457 cpus_setall(*cpus); 3462 cpumask_setall(cpus);
3458 3463
3459 /* 3464 /*
3460 * When power savings policy is enabled for the parent domain, idle 3465 * When power savings policy is enabled for the parent domain, idle
@@ -3514,8 +3519,8 @@ redo:
3514 3519
3515 /* All tasks on this runqueue were pinned by CPU affinity */ 3520 /* All tasks on this runqueue were pinned by CPU affinity */
3516 if (unlikely(all_pinned)) { 3521 if (unlikely(all_pinned)) {
3517 cpu_clear(cpu_of(busiest), *cpus); 3522 cpumask_clear_cpu(cpu_of(busiest), cpus);
3518 if (!cpus_empty(*cpus)) 3523 if (!cpumask_empty(cpus))
3519 goto redo; 3524 goto redo;
3520 goto out_balanced; 3525 goto out_balanced;
3521 } 3526 }
@@ -3532,7 +3537,8 @@ redo:
3532 /* don't kick the migration_thread, if the curr 3537 /* don't kick the migration_thread, if the curr
3533 * task on busiest cpu can't be moved to this_cpu 3538 * task on busiest cpu can't be moved to this_cpu
3534 */ 3539 */
3535 if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { 3540 if (!cpumask_test_cpu(this_cpu,
3541 &busiest->curr->cpus_allowed)) {
3536 spin_unlock_irqrestore(&busiest->lock, flags); 3542 spin_unlock_irqrestore(&busiest->lock, flags);
3537 all_pinned = 1; 3543 all_pinned = 1;
3538 goto out_one_pinned; 3544 goto out_one_pinned;
@@ -3607,7 +3613,7 @@ out:
3607 */ 3613 */
3608static int 3614static int
3609load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, 3615load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3610 cpumask_t *cpus) 3616 struct cpumask *cpus)
3611{ 3617{
3612 struct sched_group *group; 3618 struct sched_group *group;
3613 struct rq *busiest = NULL; 3619 struct rq *busiest = NULL;
@@ -3616,7 +3622,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3616 int sd_idle = 0; 3622 int sd_idle = 0;
3617 int all_pinned = 0; 3623 int all_pinned = 0;
3618 3624
3619 cpus_setall(*cpus); 3625 cpumask_setall(cpus);
3620 3626
3621 /* 3627 /*
3622 * When power savings policy is enabled for the parent domain, idle 3628 * When power savings policy is enabled for the parent domain, idle
@@ -3660,17 +3666,71 @@ redo:
3660 double_unlock_balance(this_rq, busiest); 3666 double_unlock_balance(this_rq, busiest);
3661 3667
3662 if (unlikely(all_pinned)) { 3668 if (unlikely(all_pinned)) {
3663 cpu_clear(cpu_of(busiest), *cpus); 3669 cpumask_clear_cpu(cpu_of(busiest), cpus);
3664 if (!cpus_empty(*cpus)) 3670 if (!cpumask_empty(cpus))
3665 goto redo; 3671 goto redo;
3666 } 3672 }
3667 } 3673 }
3668 3674
3669 if (!ld_moved) { 3675 if (!ld_moved) {
3676 int active_balance = 0;
3677
3670 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); 3678 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
3671 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 3679 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3672 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 3680 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3673 return -1; 3681 return -1;
3682
3683 if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
3684 return -1;
3685
3686 if (sd->nr_balance_failed++ < 2)
3687 return -1;
3688
3689 /*
3690 * The only task running in a non-idle cpu can be moved to this
3691 * cpu in an attempt to completely freeup the other CPU
3692 * package. The same method used to move task in load_balance()
3693 * have been extended for load_balance_newidle() to speedup
3694 * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2)
3695 *
3696 * The package power saving logic comes from
3697 * find_busiest_group(). If there are no imbalance, then
3698 * f_b_g() will return NULL. However when sched_mc={1,2} then
3699 * f_b_g() will select a group from which a running task may be
3700 * pulled to this cpu in order to make the other package idle.
3701 * If there is no opportunity to make a package idle and if
3702 * there are no imbalance, then f_b_g() will return NULL and no
3703 * action will be taken in load_balance_newidle().
3704 *
3705 * Under normal task pull operation due to imbalance, there
3706 * will be more than one task in the source run queue and
3707 * move_tasks() will succeed. ld_moved will be true and this
3708 * active balance code will not be triggered.
3709 */
3710
3711 /* Lock busiest in correct order while this_rq is held */
3712 double_lock_balance(this_rq, busiest);
3713
3714 /*
3715 * don't kick the migration_thread, if the curr
3716 * task on busiest cpu can't be moved to this_cpu
3717 */
3718 if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
3719 double_unlock_balance(this_rq, busiest);
3720 all_pinned = 1;
3721 return ld_moved;
3722 }
3723
3724 if (!busiest->active_balance) {
3725 busiest->active_balance = 1;
3726 busiest->push_cpu = this_cpu;
3727 active_balance = 1;
3728 }
3729
3730 double_unlock_balance(this_rq, busiest);
3731 if (active_balance)
3732 wake_up_process(busiest->migration_thread);
3733
3674 } else 3734 } else
3675 sd->nr_balance_failed = 0; 3735 sd->nr_balance_failed = 0;
3676 3736
@@ -3696,7 +3756,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3696 struct sched_domain *sd; 3756 struct sched_domain *sd;
3697 int pulled_task = 0; 3757 int pulled_task = 0;
3698 unsigned long next_balance = jiffies + HZ; 3758 unsigned long next_balance = jiffies + HZ;
3699 cpumask_t tmpmask; 3759 cpumask_var_t tmpmask;
3760
3761 if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
3762 return;
3700 3763
3701 for_each_domain(this_cpu, sd) { 3764 for_each_domain(this_cpu, sd) {
3702 unsigned long interval; 3765 unsigned long interval;
@@ -3707,7 +3770,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3707 if (sd->flags & SD_BALANCE_NEWIDLE) 3770 if (sd->flags & SD_BALANCE_NEWIDLE)
3708 /* If we've pulled tasks over stop searching: */ 3771 /* If we've pulled tasks over stop searching: */
3709 pulled_task = load_balance_newidle(this_cpu, this_rq, 3772 pulled_task = load_balance_newidle(this_cpu, this_rq,
3710 sd, &tmpmask); 3773 sd, tmpmask);
3711 3774
3712 interval = msecs_to_jiffies(sd->balance_interval); 3775 interval = msecs_to_jiffies(sd->balance_interval);
3713 if (time_after(next_balance, sd->last_balance + interval)) 3776 if (time_after(next_balance, sd->last_balance + interval))
@@ -3722,6 +3785,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3722 */ 3785 */
3723 this_rq->next_balance = next_balance; 3786 this_rq->next_balance = next_balance;
3724 } 3787 }
3788 free_cpumask_var(tmpmask);
3725} 3789}
3726 3790
3727/* 3791/*
@@ -3759,7 +3823,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3759 /* Search for an sd spanning us and the target CPU. */ 3823 /* Search for an sd spanning us and the target CPU. */
3760 for_each_domain(target_cpu, sd) { 3824 for_each_domain(target_cpu, sd) {
3761 if ((sd->flags & SD_LOAD_BALANCE) && 3825 if ((sd->flags & SD_LOAD_BALANCE) &&
3762 cpu_isset(busiest_cpu, sd->span)) 3826 cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
3763 break; 3827 break;
3764 } 3828 }
3765 3829
@@ -3778,10 +3842,9 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3778#ifdef CONFIG_NO_HZ 3842#ifdef CONFIG_NO_HZ
3779static struct { 3843static struct {
3780 atomic_t load_balancer; 3844 atomic_t load_balancer;
3781 cpumask_t cpu_mask; 3845 cpumask_var_t cpu_mask;
3782} nohz ____cacheline_aligned = { 3846} nohz ____cacheline_aligned = {
3783 .load_balancer = ATOMIC_INIT(-1), 3847 .load_balancer = ATOMIC_INIT(-1),
3784 .cpu_mask = CPU_MASK_NONE,
3785}; 3848};
3786 3849
3787/* 3850/*
@@ -3809,7 +3872,7 @@ int select_nohz_load_balancer(int stop_tick)
3809 int cpu = smp_processor_id(); 3872 int cpu = smp_processor_id();
3810 3873
3811 if (stop_tick) { 3874 if (stop_tick) {
3812 cpu_set(cpu, nohz.cpu_mask); 3875 cpumask_set_cpu(cpu, nohz.cpu_mask);
3813 cpu_rq(cpu)->in_nohz_recently = 1; 3876 cpu_rq(cpu)->in_nohz_recently = 1;
3814 3877
3815 /* 3878 /*
@@ -3823,7 +3886,7 @@ int select_nohz_load_balancer(int stop_tick)
3823 } 3886 }
3824 3887
3825 /* time for ilb owner also to sleep */ 3888 /* time for ilb owner also to sleep */
3826 if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) { 3889 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3827 if (atomic_read(&nohz.load_balancer) == cpu) 3890 if (atomic_read(&nohz.load_balancer) == cpu)
3828 atomic_set(&nohz.load_balancer, -1); 3891 atomic_set(&nohz.load_balancer, -1);
3829 return 0; 3892 return 0;
@@ -3836,10 +3899,10 @@ int select_nohz_load_balancer(int stop_tick)
3836 } else if (atomic_read(&nohz.load_balancer) == cpu) 3899 } else if (atomic_read(&nohz.load_balancer) == cpu)
3837 return 1; 3900 return 1;
3838 } else { 3901 } else {
3839 if (!cpu_isset(cpu, nohz.cpu_mask)) 3902 if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
3840 return 0; 3903 return 0;
3841 3904
3842 cpu_clear(cpu, nohz.cpu_mask); 3905 cpumask_clear_cpu(cpu, nohz.cpu_mask);
3843 3906
3844 if (atomic_read(&nohz.load_balancer) == cpu) 3907 if (atomic_read(&nohz.load_balancer) == cpu)
3845 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3908 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
@@ -3867,7 +3930,11 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3867 unsigned long next_balance = jiffies + 60*HZ; 3930 unsigned long next_balance = jiffies + 60*HZ;
3868 int update_next_balance = 0; 3931 int update_next_balance = 0;
3869 int need_serialize; 3932 int need_serialize;
3870 cpumask_t tmp; 3933 cpumask_var_t tmp;
3934
3935 /* Fails alloc? Rebalancing probably not a priority right now. */
3936 if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
3937 return;
3871 3938
3872 for_each_domain(cpu, sd) { 3939 for_each_domain(cpu, sd) {
3873 if (!(sd->flags & SD_LOAD_BALANCE)) 3940 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3892,7 +3959,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3892 } 3959 }
3893 3960
3894 if (time_after_eq(jiffies, sd->last_balance + interval)) { 3961 if (time_after_eq(jiffies, sd->last_balance + interval)) {
3895 if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) { 3962 if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
3896 /* 3963 /*
3897 * We've pulled tasks over so either we're no 3964 * We've pulled tasks over so either we're no
3898 * longer idle, or one of our SMT siblings is 3965 * longer idle, or one of our SMT siblings is
@@ -3926,6 +3993,8 @@ out:
3926 */ 3993 */
3927 if (likely(update_next_balance)) 3994 if (likely(update_next_balance))
3928 rq->next_balance = next_balance; 3995 rq->next_balance = next_balance;
3996
3997 free_cpumask_var(tmp);
3929} 3998}
3930 3999
3931/* 4000/*
@@ -3950,12 +4019,13 @@ static void run_rebalance_domains(struct softirq_action *h)
3950 */ 4019 */
3951 if (this_rq->idle_at_tick && 4020 if (this_rq->idle_at_tick &&
3952 atomic_read(&nohz.load_balancer) == this_cpu) { 4021 atomic_read(&nohz.load_balancer) == this_cpu) {
3953 cpumask_t cpus = nohz.cpu_mask;
3954 struct rq *rq; 4022 struct rq *rq;
3955 int balance_cpu; 4023 int balance_cpu;
3956 4024
3957 cpu_clear(this_cpu, cpus); 4025 for_each_cpu(balance_cpu, nohz.cpu_mask) {
3958 for_each_cpu_mask_nr(balance_cpu, cpus) { 4026 if (balance_cpu == this_cpu)
4027 continue;
4028
3959 /* 4029 /*
3960 * If this cpu gets work to do, stop the load balancing 4030 * If this cpu gets work to do, stop the load balancing
3961 * work being done for other cpus. Next load 4031 * work being done for other cpus. Next load
@@ -3993,7 +4063,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
3993 rq->in_nohz_recently = 0; 4063 rq->in_nohz_recently = 0;
3994 4064
3995 if (atomic_read(&nohz.load_balancer) == cpu) { 4065 if (atomic_read(&nohz.load_balancer) == cpu) {
3996 cpu_clear(cpu, nohz.cpu_mask); 4066 cpumask_clear_cpu(cpu, nohz.cpu_mask);
3997 atomic_set(&nohz.load_balancer, -1); 4067 atomic_set(&nohz.load_balancer, -1);
3998 } 4068 }
3999 4069
@@ -4006,7 +4076,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4006 * TBD: Traverse the sched domains and nominate 4076 * TBD: Traverse the sched domains and nominate
4007 * the nearest cpu in the nohz.cpu_mask. 4077 * the nearest cpu in the nohz.cpu_mask.
4008 */ 4078 */
4009 int ilb = first_cpu(nohz.cpu_mask); 4079 int ilb = cpumask_first(nohz.cpu_mask);
4010 4080
4011 if (ilb < nr_cpu_ids) 4081 if (ilb < nr_cpu_ids)
4012 resched_cpu(ilb); 4082 resched_cpu(ilb);
@@ -4018,7 +4088,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4018 * cpus with ticks stopped, is it time for that to stop? 4088 * cpus with ticks stopped, is it time for that to stop?
4019 */ 4089 */
4020 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu && 4090 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
4021 cpus_weight(nohz.cpu_mask) == num_online_cpus()) { 4091 cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
4022 resched_cpu(cpu); 4092 resched_cpu(cpu);
4023 return; 4093 return;
4024 } 4094 }
@@ -4028,7 +4098,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4028 * someone else, then no need raise the SCHED_SOFTIRQ 4098 * someone else, then no need raise the SCHED_SOFTIRQ
4029 */ 4099 */
4030 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu && 4100 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
4031 cpu_isset(cpu, nohz.cpu_mask)) 4101 cpumask_test_cpu(cpu, nohz.cpu_mask))
4032 return; 4102 return;
4033#endif 4103#endif
4034 if (time_after_eq(jiffies, rq->next_balance)) 4104 if (time_after_eq(jiffies, rq->next_balance))
@@ -5438,10 +5508,9 @@ out_unlock:
5438 return retval; 5508 return retval;
5439} 5509}
5440 5510
5441long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) 5511long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5442{ 5512{
5443 cpumask_t cpus_allowed; 5513 cpumask_var_t cpus_allowed, new_mask;
5444 cpumask_t new_mask = *in_mask;
5445 struct task_struct *p; 5514 struct task_struct *p;
5446 int retval; 5515 int retval;
5447 5516
@@ -5463,6 +5532,14 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
5463 get_task_struct(p); 5532 get_task_struct(p);
5464 read_unlock(&tasklist_lock); 5533 read_unlock(&tasklist_lock);
5465 5534
5535 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
5536 retval = -ENOMEM;
5537 goto out_put_task;
5538 }
5539 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
5540 retval = -ENOMEM;
5541 goto out_free_cpus_allowed;
5542 }
5466 retval = -EPERM; 5543 retval = -EPERM;
5467 if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) 5544 if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
5468 goto out_unlock; 5545 goto out_unlock;
@@ -5471,37 +5548,41 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
5471 if (retval) 5548 if (retval)
5472 goto out_unlock; 5549 goto out_unlock;
5473 5550
5474 cpuset_cpus_allowed(p, &cpus_allowed); 5551 cpuset_cpus_allowed(p, cpus_allowed);
5475 cpus_and(new_mask, new_mask, cpus_allowed); 5552 cpumask_and(new_mask, in_mask, cpus_allowed);
5476 again: 5553 again:
5477 retval = set_cpus_allowed_ptr(p, &new_mask); 5554 retval = set_cpus_allowed_ptr(p, new_mask);
5478 5555
5479 if (!retval) { 5556 if (!retval) {
5480 cpuset_cpus_allowed(p, &cpus_allowed); 5557 cpuset_cpus_allowed(p, cpus_allowed);
5481 if (!cpus_subset(new_mask, cpus_allowed)) { 5558 if (!cpumask_subset(new_mask, cpus_allowed)) {
5482 /* 5559 /*
5483 * We must have raced with a concurrent cpuset 5560 * We must have raced with a concurrent cpuset
5484 * update. Just reset the cpus_allowed to the 5561 * update. Just reset the cpus_allowed to the
5485 * cpuset's cpus_allowed 5562 * cpuset's cpus_allowed
5486 */ 5563 */
5487 new_mask = cpus_allowed; 5564 cpumask_copy(new_mask, cpus_allowed);
5488 goto again; 5565 goto again;
5489 } 5566 }
5490 } 5567 }
5491out_unlock: 5568out_unlock:
5569 free_cpumask_var(new_mask);
5570out_free_cpus_allowed:
5571 free_cpumask_var(cpus_allowed);
5572out_put_task:
5492 put_task_struct(p); 5573 put_task_struct(p);
5493 put_online_cpus(); 5574 put_online_cpus();
5494 return retval; 5575 return retval;
5495} 5576}
5496 5577
5497static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, 5578static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5498 cpumask_t *new_mask) 5579 struct cpumask *new_mask)
5499{ 5580{
5500 if (len < sizeof(cpumask_t)) { 5581 if (len < cpumask_size())
5501 memset(new_mask, 0, sizeof(cpumask_t)); 5582 cpumask_clear(new_mask);
5502 } else if (len > sizeof(cpumask_t)) { 5583 else if (len > cpumask_size())
5503 len = sizeof(cpumask_t); 5584 len = cpumask_size();
5504 } 5585
5505 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; 5586 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
5506} 5587}
5507 5588
@@ -5514,17 +5595,20 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5514asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, 5595asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
5515 unsigned long __user *user_mask_ptr) 5596 unsigned long __user *user_mask_ptr)
5516{ 5597{
5517 cpumask_t new_mask; 5598 cpumask_var_t new_mask;
5518 int retval; 5599 int retval;
5519 5600
5520 retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask); 5601 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
5521 if (retval) 5602 return -ENOMEM;
5522 return retval;
5523 5603
5524 return sched_setaffinity(pid, &new_mask); 5604 retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
5605 if (retval == 0)
5606 retval = sched_setaffinity(pid, new_mask);
5607 free_cpumask_var(new_mask);
5608 return retval;
5525} 5609}
5526 5610
5527long sched_getaffinity(pid_t pid, cpumask_t *mask) 5611long sched_getaffinity(pid_t pid, struct cpumask *mask)
5528{ 5612{
5529 struct task_struct *p; 5613 struct task_struct *p;
5530 int retval; 5614 int retval;
@@ -5541,7 +5625,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
5541 if (retval) 5625 if (retval)
5542 goto out_unlock; 5626 goto out_unlock;
5543 5627
5544 cpus_and(*mask, p->cpus_allowed, cpu_online_map); 5628 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
5545 5629
5546out_unlock: 5630out_unlock:
5547 read_unlock(&tasklist_lock); 5631 read_unlock(&tasklist_lock);
@@ -5560,19 +5644,24 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
5560 unsigned long __user *user_mask_ptr) 5644 unsigned long __user *user_mask_ptr)
5561{ 5645{
5562 int ret; 5646 int ret;
5563 cpumask_t mask; 5647 cpumask_var_t mask;
5564 5648
5565 if (len < sizeof(cpumask_t)) 5649 if (len < cpumask_size())
5566 return -EINVAL; 5650 return -EINVAL;
5567 5651
5568 ret = sched_getaffinity(pid, &mask); 5652 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
5569 if (ret < 0) 5653 return -ENOMEM;
5570 return ret;
5571 5654
5572 if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t))) 5655 ret = sched_getaffinity(pid, mask);
5573 return -EFAULT; 5656 if (ret == 0) {
5657 if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
5658 ret = -EFAULT;
5659 else
5660 ret = cpumask_size();
5661 }
5662 free_cpumask_var(mask);
5574 5663
5575 return sizeof(cpumask_t); 5664 return ret;
5576} 5665}
5577 5666
5578/** 5667/**
@@ -5914,7 +6003,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5914 idle->se.exec_start = sched_clock(); 6003 idle->se.exec_start = sched_clock();
5915 6004
5916 idle->prio = idle->normal_prio = MAX_PRIO; 6005 idle->prio = idle->normal_prio = MAX_PRIO;
5917 idle->cpus_allowed = cpumask_of_cpu(cpu); 6006 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
5918 __set_task_cpu(idle, cpu); 6007 __set_task_cpu(idle, cpu);
5919 6008
5920 rq->curr = rq->idle = idle; 6009 rq->curr = rq->idle = idle;
@@ -5941,9 +6030,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5941 * indicates which cpus entered this state. This is used 6030 * indicates which cpus entered this state. This is used
5942 * in the rcu update to wait only for active cpus. For system 6031 * in the rcu update to wait only for active cpus. For system
5943 * which do not switch off the HZ timer nohz_cpu_mask should 6032 * which do not switch off the HZ timer nohz_cpu_mask should
5944 * always be CPU_MASK_NONE. 6033 * always be CPU_BITS_NONE.
5945 */ 6034 */
5946cpumask_t nohz_cpu_mask = CPU_MASK_NONE; 6035cpumask_var_t nohz_cpu_mask;
5947 6036
5948/* 6037/*
5949 * Increase the granularity value when there are more CPUs, 6038 * Increase the granularity value when there are more CPUs,
@@ -5998,7 +6087,7 @@ static inline void sched_init_granularity(void)
5998 * task must not exit() & deallocate itself prematurely. The 6087 * task must not exit() & deallocate itself prematurely. The
5999 * call is not atomic; no spinlocks may be held. 6088 * call is not atomic; no spinlocks may be held.
6000 */ 6089 */
6001int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) 6090int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
6002{ 6091{
6003 struct migration_req req; 6092 struct migration_req req;
6004 unsigned long flags; 6093 unsigned long flags;
@@ -6006,13 +6095,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
6006 int ret = 0; 6095 int ret = 0;
6007 6096
6008 rq = task_rq_lock(p, &flags); 6097 rq = task_rq_lock(p, &flags);
6009 if (!cpus_intersects(*new_mask, cpu_online_map)) { 6098 if (!cpumask_intersects(new_mask, cpu_online_mask)) {
6010 ret = -EINVAL; 6099 ret = -EINVAL;
6011 goto out; 6100 goto out;
6012 } 6101 }
6013 6102
6014 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && 6103 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
6015 !cpus_equal(p->cpus_allowed, *new_mask))) { 6104 !cpumask_equal(&p->cpus_allowed, new_mask))) {
6016 ret = -EINVAL; 6105 ret = -EINVAL;
6017 goto out; 6106 goto out;
6018 } 6107 }
@@ -6020,15 +6109,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
6020 if (p->sched_class->set_cpus_allowed) 6109 if (p->sched_class->set_cpus_allowed)
6021 p->sched_class->set_cpus_allowed(p, new_mask); 6110 p->sched_class->set_cpus_allowed(p, new_mask);
6022 else { 6111 else {
6023 p->cpus_allowed = *new_mask; 6112 cpumask_copy(&p->cpus_allowed, new_mask);
6024 p->rt.nr_cpus_allowed = cpus_weight(*new_mask); 6113 p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
6025 } 6114 }
6026 6115
6027 /* Can the task run on the task's current CPU? If so, we're done */ 6116 /* Can the task run on the task's current CPU? If so, we're done */
6028 if (cpu_isset(task_cpu(p), *new_mask)) 6117 if (cpumask_test_cpu(task_cpu(p), new_mask))
6029 goto out; 6118 goto out;
6030 6119
6031 if (migrate_task(p, any_online_cpu(*new_mask), &req)) { 6120 if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
6032 /* Need help from migration thread: drop lock and wait. */ 6121 /* Need help from migration thread: drop lock and wait. */
6033 task_rq_unlock(rq, &flags); 6122 task_rq_unlock(rq, &flags);
6034 wake_up_process(rq->migration_thread); 6123 wake_up_process(rq->migration_thread);
@@ -6070,7 +6159,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
6070 if (task_cpu(p) != src_cpu) 6159 if (task_cpu(p) != src_cpu)
6071 goto done; 6160 goto done;
6072 /* Affinity changed (again). */ 6161 /* Affinity changed (again). */
6073 if (!cpu_isset(dest_cpu, p->cpus_allowed)) 6162 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
6074 goto fail; 6163 goto fail;
6075 6164
6076 on_rq = p->se.on_rq; 6165 on_rq = p->se.on_rq;
@@ -6167,50 +6256,43 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
6167 */ 6256 */
6168static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 6257static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
6169{ 6258{
6170 unsigned long flags;
6171 cpumask_t mask;
6172 struct rq *rq;
6173 int dest_cpu; 6259 int dest_cpu;
6260 /* FIXME: Use cpumask_of_node here. */
6261 cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
6262 const struct cpumask *nodemask = &_nodemask;
6263
6264again:
6265 /* Look for allowed, online CPU in same node. */
6266 for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
6267 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
6268 goto move;
6269
6270 /* Any allowed, online CPU? */
6271 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
6272 if (dest_cpu < nr_cpu_ids)
6273 goto move;
6274
6275 /* No more Mr. Nice Guy. */
6276 if (dest_cpu >= nr_cpu_ids) {
6277 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
6278 dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
6174 6279
6175 do { 6280 /*
6176 /* On same node? */ 6281 * Don't tell them about moving exiting tasks or
6177 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 6282 * kernel threads (both mm NULL), since they never
6178 cpus_and(mask, mask, p->cpus_allowed); 6283 * leave kernel.
6179 dest_cpu = any_online_cpu(mask); 6284 */
6180 6285 if (p->mm && printk_ratelimit()) {
6181 /* On any allowed CPU? */ 6286 printk(KERN_INFO "process %d (%s) no "
6182 if (dest_cpu >= nr_cpu_ids) 6287 "longer affine to cpu%d\n",
6183 dest_cpu = any_online_cpu(p->cpus_allowed); 6288 task_pid_nr(p), p->comm, dead_cpu);
6184
6185 /* No more Mr. Nice Guy. */
6186 if (dest_cpu >= nr_cpu_ids) {
6187 cpumask_t cpus_allowed;
6188
6189 cpuset_cpus_allowed_locked(p, &cpus_allowed);
6190 /*
6191 * Try to stay on the same cpuset, where the
6192 * current cpuset may be a subset of all cpus.
6193 * The cpuset_cpus_allowed_locked() variant of
6194 * cpuset_cpus_allowed() will not block. It must be
6195 * called within calls to cpuset_lock/cpuset_unlock.
6196 */
6197 rq = task_rq_lock(p, &flags);
6198 p->cpus_allowed = cpus_allowed;
6199 dest_cpu = any_online_cpu(p->cpus_allowed);
6200 task_rq_unlock(rq, &flags);
6201
6202 /*
6203 * Don't tell them about moving exiting tasks or
6204 * kernel threads (both mm NULL), since they never
6205 * leave kernel.
6206 */
6207 if (p->mm && printk_ratelimit()) {
6208 printk(KERN_INFO "process %d (%s) no "
6209 "longer affine to cpu%d\n",
6210 task_pid_nr(p), p->comm, dead_cpu);
6211 }
6212 } 6289 }
6213 } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); 6290 }
6291
6292move:
6293 /* It can have affinity changed while we were choosing. */
6294 if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
6295 goto again;
6214} 6296}
6215 6297
6216/* 6298/*
@@ -6222,7 +6304,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
6222 */ 6304 */
6223static void migrate_nr_uninterruptible(struct rq *rq_src) 6305static void migrate_nr_uninterruptible(struct rq *rq_src)
6224{ 6306{
6225 struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR)); 6307 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
6226 unsigned long flags; 6308 unsigned long flags;
6227 6309
6228 local_irq_save(flags); 6310 local_irq_save(flags);
@@ -6512,7 +6594,7 @@ static void set_rq_online(struct rq *rq)
6512 if (!rq->online) { 6594 if (!rq->online) {
6513 const struct sched_class *class; 6595 const struct sched_class *class;
6514 6596
6515 cpu_set(rq->cpu, rq->rd->online); 6597 cpumask_set_cpu(rq->cpu, rq->rd->online);
6516 rq->online = 1; 6598 rq->online = 1;
6517 6599
6518 for_each_class(class) { 6600 for_each_class(class) {
@@ -6532,7 +6614,7 @@ static void set_rq_offline(struct rq *rq)
6532 class->rq_offline(rq); 6614 class->rq_offline(rq);
6533 } 6615 }
6534 6616
6535 cpu_clear(rq->cpu, rq->rd->online); 6617 cpumask_clear_cpu(rq->cpu, rq->rd->online);
6536 rq->online = 0; 6618 rq->online = 0;
6537 } 6619 }
6538} 6620}
@@ -6573,7 +6655,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6573 rq = cpu_rq(cpu); 6655 rq = cpu_rq(cpu);
6574 spin_lock_irqsave(&rq->lock, flags); 6656 spin_lock_irqsave(&rq->lock, flags);
6575 if (rq->rd) { 6657 if (rq->rd) {
6576 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6658 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6577 6659
6578 set_rq_online(rq); 6660 set_rq_online(rq);
6579 } 6661 }
@@ -6587,7 +6669,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6587 break; 6669 break;
6588 /* Unbind it from offline cpu so it can run. Fall thru. */ 6670 /* Unbind it from offline cpu so it can run. Fall thru. */
6589 kthread_bind(cpu_rq(cpu)->migration_thread, 6671 kthread_bind(cpu_rq(cpu)->migration_thread,
6590 any_online_cpu(cpu_online_map)); 6672 cpumask_any(cpu_online_mask));
6591 kthread_stop(cpu_rq(cpu)->migration_thread); 6673 kthread_stop(cpu_rq(cpu)->migration_thread);
6592 cpu_rq(cpu)->migration_thread = NULL; 6674 cpu_rq(cpu)->migration_thread = NULL;
6593 break; 6675 break;
@@ -6637,7 +6719,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6637 rq = cpu_rq(cpu); 6719 rq = cpu_rq(cpu);
6638 spin_lock_irqsave(&rq->lock, flags); 6720 spin_lock_irqsave(&rq->lock, flags);
6639 if (rq->rd) { 6721 if (rq->rd) {
6640 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6722 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6641 set_rq_offline(rq); 6723 set_rq_offline(rq);
6642 } 6724 }
6643 spin_unlock_irqrestore(&rq->lock, flags); 6725 spin_unlock_irqrestore(&rq->lock, flags);
@@ -6676,13 +6758,13 @@ early_initcall(migration_init);
6676#ifdef CONFIG_SCHED_DEBUG 6758#ifdef CONFIG_SCHED_DEBUG
6677 6759
6678static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 6760static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6679 cpumask_t *groupmask) 6761 struct cpumask *groupmask)
6680{ 6762{
6681 struct sched_group *group = sd->groups; 6763 struct sched_group *group = sd->groups;
6682 char str[256]; 6764 char str[256];
6683 6765
6684 cpulist_scnprintf(str, sizeof(str), sd->span); 6766 cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
6685 cpus_clear(*groupmask); 6767 cpumask_clear(groupmask);
6686 6768
6687 printk(KERN_DEBUG "%*s domain %d: ", level, "", level); 6769 printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
6688 6770
@@ -6696,11 +6778,11 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6696 6778
6697 printk(KERN_CONT "span %s level %s\n", str, sd->name); 6779 printk(KERN_CONT "span %s level %s\n", str, sd->name);
6698 6780
6699 if (!cpu_isset(cpu, sd->span)) { 6781 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
6700 printk(KERN_ERR "ERROR: domain->span does not contain " 6782 printk(KERN_ERR "ERROR: domain->span does not contain "
6701 "CPU%d\n", cpu); 6783 "CPU%d\n", cpu);
6702 } 6784 }
6703 if (!cpu_isset(cpu, group->cpumask)) { 6785 if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
6704 printk(KERN_ERR "ERROR: domain->groups does not contain" 6786 printk(KERN_ERR "ERROR: domain->groups does not contain"
6705 " CPU%d\n", cpu); 6787 " CPU%d\n", cpu);
6706 } 6788 }
@@ -6720,31 +6802,32 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6720 break; 6802 break;
6721 } 6803 }
6722 6804
6723 if (!cpus_weight(group->cpumask)) { 6805 if (!cpumask_weight(sched_group_cpus(group))) {
6724 printk(KERN_CONT "\n"); 6806 printk(KERN_CONT "\n");
6725 printk(KERN_ERR "ERROR: empty group\n"); 6807 printk(KERN_ERR "ERROR: empty group\n");
6726 break; 6808 break;
6727 } 6809 }
6728 6810
6729 if (cpus_intersects(*groupmask, group->cpumask)) { 6811 if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
6730 printk(KERN_CONT "\n"); 6812 printk(KERN_CONT "\n");
6731 printk(KERN_ERR "ERROR: repeated CPUs\n"); 6813 printk(KERN_ERR "ERROR: repeated CPUs\n");
6732 break; 6814 break;
6733 } 6815 }
6734 6816
6735 cpus_or(*groupmask, *groupmask, group->cpumask); 6817 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
6736 6818
6737 cpulist_scnprintf(str, sizeof(str), group->cpumask); 6819 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
6738 printk(KERN_CONT " %s", str); 6820 printk(KERN_CONT " %s", str);
6739 6821
6740 group = group->next; 6822 group = group->next;
6741 } while (group != sd->groups); 6823 } while (group != sd->groups);
6742 printk(KERN_CONT "\n"); 6824 printk(KERN_CONT "\n");
6743 6825
6744 if (!cpus_equal(sd->span, *groupmask)) 6826 if (!cpumask_equal(sched_domain_span(sd), groupmask))
6745 printk(KERN_ERR "ERROR: groups don't span domain->span\n"); 6827 printk(KERN_ERR "ERROR: groups don't span domain->span\n");
6746 6828
6747 if (sd->parent && !cpus_subset(*groupmask, sd->parent->span)) 6829 if (sd->parent &&
6830 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
6748 printk(KERN_ERR "ERROR: parent span is not a superset " 6831 printk(KERN_ERR "ERROR: parent span is not a superset "
6749 "of domain->span\n"); 6832 "of domain->span\n");
6750 return 0; 6833 return 0;
@@ -6752,7 +6835,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6752 6835
6753static void sched_domain_debug(struct sched_domain *sd, int cpu) 6836static void sched_domain_debug(struct sched_domain *sd, int cpu)
6754{ 6837{
6755 cpumask_t *groupmask; 6838 cpumask_var_t groupmask;
6756 int level = 0; 6839 int level = 0;
6757 6840
6758 if (!sd) { 6841 if (!sd) {
@@ -6762,8 +6845,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6762 6845
6763 printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); 6846 printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
6764 6847
6765 groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 6848 if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
6766 if (!groupmask) {
6767 printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); 6849 printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
6768 return; 6850 return;
6769 } 6851 }
@@ -6776,7 +6858,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6776 if (!sd) 6858 if (!sd)
6777 break; 6859 break;
6778 } 6860 }
6779 kfree(groupmask); 6861 free_cpumask_var(groupmask);
6780} 6862}
6781#else /* !CONFIG_SCHED_DEBUG */ 6863#else /* !CONFIG_SCHED_DEBUG */
6782# define sched_domain_debug(sd, cpu) do { } while (0) 6864# define sched_domain_debug(sd, cpu) do { } while (0)
@@ -6784,7 +6866,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6784 6866
6785static int sd_degenerate(struct sched_domain *sd) 6867static int sd_degenerate(struct sched_domain *sd)
6786{ 6868{
6787 if (cpus_weight(sd->span) == 1) 6869 if (cpumask_weight(sched_domain_span(sd)) == 1)
6788 return 1; 6870 return 1;
6789 6871
6790 /* Following flags need at least 2 groups */ 6872 /* Following flags need at least 2 groups */
@@ -6815,7 +6897,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
6815 if (sd_degenerate(parent)) 6897 if (sd_degenerate(parent))
6816 return 1; 6898 return 1;
6817 6899
6818 if (!cpus_equal(sd->span, parent->span)) 6900 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
6819 return 0; 6901 return 0;
6820 6902
6821 /* Does parent contain flags not in child? */ 6903 /* Does parent contain flags not in child? */
@@ -6839,6 +6921,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
6839 return 1; 6921 return 1;
6840} 6922}
6841 6923
6924static void free_rootdomain(struct root_domain *rd)
6925{
6926 cpupri_cleanup(&rd->cpupri);
6927
6928 free_cpumask_var(rd->rto_mask);
6929 free_cpumask_var(rd->online);
6930 free_cpumask_var(rd->span);
6931 kfree(rd);
6932}
6933
6842static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6934static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6843{ 6935{
6844 unsigned long flags; 6936 unsigned long flags;
@@ -6848,38 +6940,63 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6848 if (rq->rd) { 6940 if (rq->rd) {
6849 struct root_domain *old_rd = rq->rd; 6941 struct root_domain *old_rd = rq->rd;
6850 6942
6851 if (cpu_isset(rq->cpu, old_rd->online)) 6943 if (cpumask_test_cpu(rq->cpu, old_rd->online))
6852 set_rq_offline(rq); 6944 set_rq_offline(rq);
6853 6945
6854 cpu_clear(rq->cpu, old_rd->span); 6946 cpumask_clear_cpu(rq->cpu, old_rd->span);
6855 6947
6856 if (atomic_dec_and_test(&old_rd->refcount)) 6948 if (atomic_dec_and_test(&old_rd->refcount))
6857 kfree(old_rd); 6949 free_rootdomain(old_rd);
6858 } 6950 }
6859 6951
6860 atomic_inc(&rd->refcount); 6952 atomic_inc(&rd->refcount);
6861 rq->rd = rd; 6953 rq->rd = rd;
6862 6954
6863 cpu_set(rq->cpu, rd->span); 6955 cpumask_set_cpu(rq->cpu, rd->span);
6864 if (cpu_isset(rq->cpu, cpu_online_map)) 6956 if (cpumask_test_cpu(rq->cpu, cpu_online_mask))
6865 set_rq_online(rq); 6957 set_rq_online(rq);
6866 6958
6867 spin_unlock_irqrestore(&rq->lock, flags); 6959 spin_unlock_irqrestore(&rq->lock, flags);
6868} 6960}
6869 6961
6870static void init_rootdomain(struct root_domain *rd) 6962static int init_rootdomain(struct root_domain *rd, bool bootmem)
6871{ 6963{
6872 memset(rd, 0, sizeof(*rd)); 6964 memset(rd, 0, sizeof(*rd));
6873 6965
6874 cpus_clear(rd->span); 6966 if (bootmem) {
6875 cpus_clear(rd->online); 6967 alloc_bootmem_cpumask_var(&def_root_domain.span);
6968 alloc_bootmem_cpumask_var(&def_root_domain.online);
6969 alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
6970 cpupri_init(&rd->cpupri, true);
6971 return 0;
6972 }
6973
6974 if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
6975 goto free_rd;
6976 if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
6977 goto free_span;
6978 if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
6979 goto free_online;
6980
6981 if (cpupri_init(&rd->cpupri, false) != 0)
6982 goto free_rto_mask;
6983 return 0;
6876 6984
6877 cpupri_init(&rd->cpupri); 6985free_rto_mask:
6986 free_cpumask_var(rd->rto_mask);
6987free_online:
6988 free_cpumask_var(rd->online);
6989free_span:
6990 free_cpumask_var(rd->span);
6991free_rd:
6992 kfree(rd);
6993 return -ENOMEM;
6878} 6994}
6879 6995
6880static void init_defrootdomain(void) 6996static void init_defrootdomain(void)
6881{ 6997{
6882 init_rootdomain(&def_root_domain); 6998 init_rootdomain(&def_root_domain, true);
6999
6883 atomic_set(&def_root_domain.refcount, 1); 7000 atomic_set(&def_root_domain.refcount, 1);
6884} 7001}
6885 7002
@@ -6891,7 +7008,10 @@ static struct root_domain *alloc_rootdomain(void)
6891 if (!rd) 7008 if (!rd)
6892 return NULL; 7009 return NULL;
6893 7010
6894 init_rootdomain(rd); 7011 if (init_rootdomain(rd, false) != 0) {
7012 kfree(rd);
7013 return NULL;
7014 }
6895 7015
6896 return rd; 7016 return rd;
6897} 7017}
@@ -6933,19 +7053,12 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
6933} 7053}
6934 7054
6935/* cpus with isolated domains */ 7055/* cpus with isolated domains */
6936static cpumask_t cpu_isolated_map = CPU_MASK_NONE; 7056static cpumask_var_t cpu_isolated_map;
6937 7057
6938/* Setup the mask of cpus configured for isolated domains */ 7058/* Setup the mask of cpus configured for isolated domains */
6939static int __init isolated_cpu_setup(char *str) 7059static int __init isolated_cpu_setup(char *str)
6940{ 7060{
6941 static int __initdata ints[NR_CPUS]; 7061 cpulist_parse(str, cpu_isolated_map);
6942 int i;
6943
6944 str = get_options(str, ARRAY_SIZE(ints), ints);
6945 cpus_clear(cpu_isolated_map);
6946 for (i = 1; i <= ints[0]; i++)
6947 if (ints[i] < NR_CPUS)
6948 cpu_set(ints[i], cpu_isolated_map);
6949 return 1; 7062 return 1;
6950} 7063}
6951 7064
@@ -6954,42 +7067,43 @@ __setup("isolcpus=", isolated_cpu_setup);
6954/* 7067/*
6955 * init_sched_build_groups takes the cpumask we wish to span, and a pointer 7068 * init_sched_build_groups takes the cpumask we wish to span, and a pointer
6956 * to a function which identifies what group(along with sched group) a CPU 7069 * to a function which identifies what group(along with sched group) a CPU
6957 * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS 7070 * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6958 * (due to the fact that we keep track of groups covered with a cpumask_t). 7071 * (due to the fact that we keep track of groups covered with a struct cpumask).
6959 * 7072 *
6960 * init_sched_build_groups will build a circular linked list of the groups 7073 * init_sched_build_groups will build a circular linked list of the groups
6961 * covered by the given span, and will set each group's ->cpumask correctly, 7074 * covered by the given span, and will set each group's ->cpumask correctly,
6962 * and ->cpu_power to 0. 7075 * and ->cpu_power to 0.
6963 */ 7076 */
6964static void 7077static void
6965init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, 7078init_sched_build_groups(const struct cpumask *span,
6966 int (*group_fn)(int cpu, const cpumask_t *cpu_map, 7079 const struct cpumask *cpu_map,
7080 int (*group_fn)(int cpu, const struct cpumask *cpu_map,
6967 struct sched_group **sg, 7081 struct sched_group **sg,
6968 cpumask_t *tmpmask), 7082 struct cpumask *tmpmask),
6969 cpumask_t *covered, cpumask_t *tmpmask) 7083 struct cpumask *covered, struct cpumask *tmpmask)
6970{ 7084{
6971 struct sched_group *first = NULL, *last = NULL; 7085 struct sched_group *first = NULL, *last = NULL;
6972 int i; 7086 int i;
6973 7087
6974 cpus_clear(*covered); 7088 cpumask_clear(covered);
6975 7089
6976 for_each_cpu_mask_nr(i, *span) { 7090 for_each_cpu(i, span) {
6977 struct sched_group *sg; 7091 struct sched_group *sg;
6978 int group = group_fn(i, cpu_map, &sg, tmpmask); 7092 int group = group_fn(i, cpu_map, &sg, tmpmask);
6979 int j; 7093 int j;
6980 7094
6981 if (cpu_isset(i, *covered)) 7095 if (cpumask_test_cpu(i, covered))
6982 continue; 7096 continue;
6983 7097
6984 cpus_clear(sg->cpumask); 7098 cpumask_clear(sched_group_cpus(sg));
6985 sg->__cpu_power = 0; 7099 sg->__cpu_power = 0;
6986 7100
6987 for_each_cpu_mask_nr(j, *span) { 7101 for_each_cpu(j, span) {
6988 if (group_fn(j, cpu_map, NULL, tmpmask) != group) 7102 if (group_fn(j, cpu_map, NULL, tmpmask) != group)
6989 continue; 7103 continue;
6990 7104
6991 cpu_set(j, *covered); 7105 cpumask_set_cpu(j, covered);
6992 cpu_set(j, sg->cpumask); 7106 cpumask_set_cpu(j, sched_group_cpus(sg));
6993 } 7107 }
6994 if (!first) 7108 if (!first)
6995 first = sg; 7109 first = sg;
@@ -7053,9 +7167,10 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
7053 * should be one that prevents unnecessary balancing, but also spreads tasks 7167 * should be one that prevents unnecessary balancing, but also spreads tasks
7054 * out optimally. 7168 * out optimally.
7055 */ 7169 */
7056static void sched_domain_node_span(int node, cpumask_t *span) 7170static void sched_domain_node_span(int node, struct cpumask *span)
7057{ 7171{
7058 nodemask_t used_nodes; 7172 nodemask_t used_nodes;
7173 /* FIXME: use cpumask_of_node() */
7059 node_to_cpumask_ptr(nodemask, node); 7174 node_to_cpumask_ptr(nodemask, node);
7060 int i; 7175 int i;
7061 7176
@@ -7077,18 +7192,33 @@ static void sched_domain_node_span(int node, cpumask_t *span)
7077int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 7192int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
7078 7193
7079/* 7194/*
7195 * The cpus mask in sched_group and sched_domain hangs off the end.
7196 * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
7197 * for nr_cpu_ids < CONFIG_NR_CPUS.
7198 */
7199struct static_sched_group {
7200 struct sched_group sg;
7201 DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
7202};
7203
7204struct static_sched_domain {
7205 struct sched_domain sd;
7206 DECLARE_BITMAP(span, CONFIG_NR_CPUS);
7207};
7208
7209/*
7080 * SMT sched-domains: 7210 * SMT sched-domains:
7081 */ 7211 */
7082#ifdef CONFIG_SCHED_SMT 7212#ifdef CONFIG_SCHED_SMT
7083static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 7213static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
7084static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); 7214static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
7085 7215
7086static int 7216static int
7087cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7217cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
7088 cpumask_t *unused) 7218 struct sched_group **sg, struct cpumask *unused)
7089{ 7219{
7090 if (sg) 7220 if (sg)
7091 *sg = &per_cpu(sched_group_cpus, cpu); 7221 *sg = &per_cpu(sched_group_cpus, cpu).sg;
7092 return cpu; 7222 return cpu;
7093} 7223}
7094#endif /* CONFIG_SCHED_SMT */ 7224#endif /* CONFIG_SCHED_SMT */
@@ -7097,56 +7227,55 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
7097 * multi-core sched-domains: 7227 * multi-core sched-domains:
7098 */ 7228 */
7099#ifdef CONFIG_SCHED_MC 7229#ifdef CONFIG_SCHED_MC
7100static DEFINE_PER_CPU(struct sched_domain, core_domains); 7230static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
7101static DEFINE_PER_CPU(struct sched_group, sched_group_core); 7231static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
7102#endif /* CONFIG_SCHED_MC */ 7232#endif /* CONFIG_SCHED_MC */
7103 7233
7104#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) 7234#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
7105static int 7235static int
7106cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7236cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
7107 cpumask_t *mask) 7237 struct sched_group **sg, struct cpumask *mask)
7108{ 7238{
7109 int group; 7239 int group;
7110 7240
7111 *mask = per_cpu(cpu_sibling_map, cpu); 7241 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
7112 cpus_and(*mask, *mask, *cpu_map); 7242 group = cpumask_first(mask);
7113 group = first_cpu(*mask);
7114 if (sg) 7243 if (sg)
7115 *sg = &per_cpu(sched_group_core, group); 7244 *sg = &per_cpu(sched_group_core, group).sg;
7116 return group; 7245 return group;
7117} 7246}
7118#elif defined(CONFIG_SCHED_MC) 7247#elif defined(CONFIG_SCHED_MC)
7119static int 7248static int
7120cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7249cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
7121 cpumask_t *unused) 7250 struct sched_group **sg, struct cpumask *unused)
7122{ 7251{
7123 if (sg) 7252 if (sg)
7124 *sg = &per_cpu(sched_group_core, cpu); 7253 *sg = &per_cpu(sched_group_core, cpu).sg;
7125 return cpu; 7254 return cpu;
7126} 7255}
7127#endif 7256#endif
7128 7257
7129static DEFINE_PER_CPU(struct sched_domain, phys_domains); 7258static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
7130static DEFINE_PER_CPU(struct sched_group, sched_group_phys); 7259static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
7131 7260
7132static int 7261static int
7133cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7262cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
7134 cpumask_t *mask) 7263 struct sched_group **sg, struct cpumask *mask)
7135{ 7264{
7136 int group; 7265 int group;
7137#ifdef CONFIG_SCHED_MC 7266#ifdef CONFIG_SCHED_MC
7267 /* FIXME: Use cpu_coregroup_mask. */
7138 *mask = cpu_coregroup_map(cpu); 7268 *mask = cpu_coregroup_map(cpu);
7139 cpus_and(*mask, *mask, *cpu_map); 7269 cpus_and(*mask, *mask, *cpu_map);
7140 group = first_cpu(*mask); 7270 group = cpumask_first(mask);
7141#elif defined(CONFIG_SCHED_SMT) 7271#elif defined(CONFIG_SCHED_SMT)
7142 *mask = per_cpu(cpu_sibling_map, cpu); 7272 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
7143 cpus_and(*mask, *mask, *cpu_map); 7273 group = cpumask_first(mask);
7144 group = first_cpu(*mask);
7145#else 7274#else
7146 group = cpu; 7275 group = cpu;
7147#endif 7276#endif
7148 if (sg) 7277 if (sg)
7149 *sg = &per_cpu(sched_group_phys, group); 7278 *sg = &per_cpu(sched_group_phys, group).sg;
7150 return group; 7279 return group;
7151} 7280}
7152 7281
@@ -7160,19 +7289,21 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains);
7160static struct sched_group ***sched_group_nodes_bycpu; 7289static struct sched_group ***sched_group_nodes_bycpu;
7161 7290
7162static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); 7291static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
7163static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); 7292static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
7164 7293
7165static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, 7294static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
7166 struct sched_group **sg, cpumask_t *nodemask) 7295 struct sched_group **sg,
7296 struct cpumask *nodemask)
7167{ 7297{
7168 int group; 7298 int group;
7299 /* FIXME: use cpumask_of_node */
7300 node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
7169 7301
7170 *nodemask = node_to_cpumask(cpu_to_node(cpu)); 7302 cpumask_and(nodemask, pnodemask, cpu_map);
7171 cpus_and(*nodemask, *nodemask, *cpu_map); 7303 group = cpumask_first(nodemask);
7172 group = first_cpu(*nodemask);
7173 7304
7174 if (sg) 7305 if (sg)
7175 *sg = &per_cpu(sched_group_allnodes, group); 7306 *sg = &per_cpu(sched_group_allnodes, group).sg;
7176 return group; 7307 return group;
7177} 7308}
7178 7309
@@ -7184,11 +7315,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
7184 if (!sg) 7315 if (!sg)
7185 return; 7316 return;
7186 do { 7317 do {
7187 for_each_cpu_mask_nr(j, sg->cpumask) { 7318 for_each_cpu(j, sched_group_cpus(sg)) {
7188 struct sched_domain *sd; 7319 struct sched_domain *sd;
7189 7320
7190 sd = &per_cpu(phys_domains, j); 7321 sd = &per_cpu(phys_domains, j).sd;
7191 if (j != first_cpu(sd->groups->cpumask)) { 7322 if (j != cpumask_first(sched_group_cpus(sd->groups))) {
7192 /* 7323 /*
7193 * Only add "power" once for each 7324 * Only add "power" once for each
7194 * physical package. 7325 * physical package.
@@ -7205,11 +7336,12 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
7205 7336
7206#ifdef CONFIG_NUMA 7337#ifdef CONFIG_NUMA
7207/* Free memory allocated for various sched_group structures */ 7338/* Free memory allocated for various sched_group structures */
7208static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) 7339static void free_sched_groups(const struct cpumask *cpu_map,
7340 struct cpumask *nodemask)
7209{ 7341{
7210 int cpu, i; 7342 int cpu, i;
7211 7343
7212 for_each_cpu_mask_nr(cpu, *cpu_map) { 7344 for_each_cpu(cpu, cpu_map) {
7213 struct sched_group **sched_group_nodes 7345 struct sched_group **sched_group_nodes
7214 = sched_group_nodes_bycpu[cpu]; 7346 = sched_group_nodes_bycpu[cpu];
7215 7347
@@ -7218,10 +7350,11 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
7218 7350
7219 for (i = 0; i < nr_node_ids; i++) { 7351 for (i = 0; i < nr_node_ids; i++) {
7220 struct sched_group *oldsg, *sg = sched_group_nodes[i]; 7352 struct sched_group *oldsg, *sg = sched_group_nodes[i];
7353 /* FIXME: Use cpumask_of_node */
7354 node_to_cpumask_ptr(pnodemask, i);
7221 7355
7222 *nodemask = node_to_cpumask(i); 7356 cpus_and(*nodemask, *pnodemask, *cpu_map);
7223 cpus_and(*nodemask, *nodemask, *cpu_map); 7357 if (cpumask_empty(nodemask))
7224 if (cpus_empty(*nodemask))
7225 continue; 7358 continue;
7226 7359
7227 if (sg == NULL) 7360 if (sg == NULL)
@@ -7239,7 +7372,8 @@ next_sg:
7239 } 7372 }
7240} 7373}
7241#else /* !CONFIG_NUMA */ 7374#else /* !CONFIG_NUMA */
7242static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) 7375static void free_sched_groups(const struct cpumask *cpu_map,
7376 struct cpumask *nodemask)
7243{ 7377{
7244} 7378}
7245#endif /* CONFIG_NUMA */ 7379#endif /* CONFIG_NUMA */
@@ -7265,7 +7399,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
7265 7399
7266 WARN_ON(!sd || !sd->groups); 7400 WARN_ON(!sd || !sd->groups);
7267 7401
7268 if (cpu != first_cpu(sd->groups->cpumask)) 7402 if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
7269 return; 7403 return;
7270 7404
7271 child = sd->child; 7405 child = sd->child;
@@ -7330,48 +7464,6 @@ SD_INIT_FUNC(CPU)
7330 SD_INIT_FUNC(MC) 7464 SD_INIT_FUNC(MC)
7331#endif 7465#endif
7332 7466
7333/*
7334 * To minimize stack usage kmalloc room for cpumasks and share the
7335 * space as the usage in build_sched_domains() dictates. Used only
7336 * if the amount of space is significant.
7337 */
7338struct allmasks {
7339 cpumask_t tmpmask; /* make this one first */
7340 union {
7341 cpumask_t nodemask;
7342 cpumask_t this_sibling_map;
7343 cpumask_t this_core_map;
7344 };
7345 cpumask_t send_covered;
7346
7347#ifdef CONFIG_NUMA
7348 cpumask_t domainspan;
7349 cpumask_t covered;
7350 cpumask_t notcovered;
7351#endif
7352};
7353
7354#if NR_CPUS > 128
7355#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
7356static inline void sched_cpumask_alloc(struct allmasks **masks)
7357{
7358 *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
7359}
7360static inline void sched_cpumask_free(struct allmasks *masks)
7361{
7362 kfree(masks);
7363}
7364#else
7365#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
7366static inline void sched_cpumask_alloc(struct allmasks **masks)
7367{ }
7368static inline void sched_cpumask_free(struct allmasks *masks)
7369{ }
7370#endif
7371
7372#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
7373 ((unsigned long)(a) + offsetof(struct allmasks, v))
7374
7375static int default_relax_domain_level = -1; 7467static int default_relax_domain_level = -1;
7376 7468
7377static int __init setup_relax_domain_level(char *str) 7469static int __init setup_relax_domain_level(char *str)
@@ -7411,17 +7503,38 @@ static void set_domain_attribute(struct sched_domain *sd,
7411 * Build sched domains for a given set of cpus and attach the sched domains 7503 * Build sched domains for a given set of cpus and attach the sched domains
7412 * to the individual cpus 7504 * to the individual cpus
7413 */ 7505 */
7414static int __build_sched_domains(const cpumask_t *cpu_map, 7506static int __build_sched_domains(const struct cpumask *cpu_map,
7415 struct sched_domain_attr *attr) 7507 struct sched_domain_attr *attr)
7416{ 7508{
7417 int i; 7509 int i, err = -ENOMEM;
7418 struct root_domain *rd; 7510 struct root_domain *rd;
7419 SCHED_CPUMASK_DECLARE(allmasks); 7511 cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
7420 cpumask_t *tmpmask; 7512 tmpmask;
7421#ifdef CONFIG_NUMA 7513#ifdef CONFIG_NUMA
7514 cpumask_var_t domainspan, covered, notcovered;
7422 struct sched_group **sched_group_nodes = NULL; 7515 struct sched_group **sched_group_nodes = NULL;
7423 int sd_allnodes = 0; 7516 int sd_allnodes = 0;
7424 7517
7518 if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
7519 goto out;
7520 if (!alloc_cpumask_var(&covered, GFP_KERNEL))
7521 goto free_domainspan;
7522 if (!alloc_cpumask_var(&notcovered, GFP_KERNEL))
7523 goto free_covered;
7524#endif
7525
7526 if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
7527 goto free_notcovered;
7528 if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
7529 goto free_nodemask;
7530 if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
7531 goto free_this_sibling_map;
7532 if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
7533 goto free_this_core_map;
7534 if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
7535 goto free_send_covered;
7536
7537#ifdef CONFIG_NUMA
7425 /* 7538 /*
7426 * Allocate the per-node list of sched groups 7539 * Allocate the per-node list of sched groups
7427 */ 7540 */
@@ -7429,54 +7542,37 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7429 GFP_KERNEL); 7542 GFP_KERNEL);
7430 if (!sched_group_nodes) { 7543 if (!sched_group_nodes) {
7431 printk(KERN_WARNING "Can not alloc sched group node list\n"); 7544 printk(KERN_WARNING "Can not alloc sched group node list\n");
7432 return -ENOMEM; 7545 goto free_tmpmask;
7433 } 7546 }
7434#endif 7547#endif
7435 7548
7436 rd = alloc_rootdomain(); 7549 rd = alloc_rootdomain();
7437 if (!rd) { 7550 if (!rd) {
7438 printk(KERN_WARNING "Cannot alloc root domain\n"); 7551 printk(KERN_WARNING "Cannot alloc root domain\n");
7439#ifdef CONFIG_NUMA 7552 goto free_sched_groups;
7440 kfree(sched_group_nodes);
7441#endif
7442 return -ENOMEM;
7443 } 7553 }
7444 7554
7445 /* get space for all scratch cpumask variables */
7446 sched_cpumask_alloc(&allmasks);
7447 if (!allmasks) {
7448 printk(KERN_WARNING "Cannot alloc cpumask array\n");
7449 kfree(rd);
7450#ifdef CONFIG_NUMA 7555#ifdef CONFIG_NUMA
7451 kfree(sched_group_nodes); 7556 sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
7452#endif
7453 return -ENOMEM;
7454 }
7455
7456 tmpmask = (cpumask_t *)allmasks;
7457
7458
7459#ifdef CONFIG_NUMA
7460 sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
7461#endif 7557#endif
7462 7558
7463 /* 7559 /*
7464 * Set up domains for cpus specified by the cpu_map. 7560 * Set up domains for cpus specified by the cpu_map.
7465 */ 7561 */
7466 for_each_cpu_mask_nr(i, *cpu_map) { 7562 for_each_cpu(i, cpu_map) {
7467 struct sched_domain *sd = NULL, *p; 7563 struct sched_domain *sd = NULL, *p;
7468 SCHED_CPUMASK_VAR(nodemask, allmasks);
7469 7564
7565 /* FIXME: use cpumask_of_node */
7470 *nodemask = node_to_cpumask(cpu_to_node(i)); 7566 *nodemask = node_to_cpumask(cpu_to_node(i));
7471 cpus_and(*nodemask, *nodemask, *cpu_map); 7567 cpus_and(*nodemask, *nodemask, *cpu_map);
7472 7568
7473#ifdef CONFIG_NUMA 7569#ifdef CONFIG_NUMA
7474 if (cpus_weight(*cpu_map) > 7570 if (cpumask_weight(cpu_map) >
7475 SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { 7571 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
7476 sd = &per_cpu(allnodes_domains, i); 7572 sd = &per_cpu(allnodes_domains, i);
7477 SD_INIT(sd, ALLNODES); 7573 SD_INIT(sd, ALLNODES);
7478 set_domain_attribute(sd, attr); 7574 set_domain_attribute(sd, attr);
7479 sd->span = *cpu_map; 7575 cpumask_copy(sched_domain_span(sd), cpu_map);
7480 cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); 7576 cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
7481 p = sd; 7577 p = sd;
7482 sd_allnodes = 1; 7578 sd_allnodes = 1;
@@ -7486,18 +7582,19 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7486 sd = &per_cpu(node_domains, i); 7582 sd = &per_cpu(node_domains, i);
7487 SD_INIT(sd, NODE); 7583 SD_INIT(sd, NODE);
7488 set_domain_attribute(sd, attr); 7584 set_domain_attribute(sd, attr);
7489 sched_domain_node_span(cpu_to_node(i), &sd->span); 7585 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
7490 sd->parent = p; 7586 sd->parent = p;
7491 if (p) 7587 if (p)
7492 p->child = sd; 7588 p->child = sd;
7493 cpus_and(sd->span, sd->span, *cpu_map); 7589 cpumask_and(sched_domain_span(sd),
7590 sched_domain_span(sd), cpu_map);
7494#endif 7591#endif
7495 7592
7496 p = sd; 7593 p = sd;
7497 sd = &per_cpu(phys_domains, i); 7594 sd = &per_cpu(phys_domains, i).sd;
7498 SD_INIT(sd, CPU); 7595 SD_INIT(sd, CPU);
7499 set_domain_attribute(sd, attr); 7596 set_domain_attribute(sd, attr);
7500 sd->span = *nodemask; 7597 cpumask_copy(sched_domain_span(sd), nodemask);
7501 sd->parent = p; 7598 sd->parent = p;
7502 if (p) 7599 if (p)
7503 p->child = sd; 7600 p->child = sd;
@@ -7505,11 +7602,12 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7505 7602
7506#ifdef CONFIG_SCHED_MC 7603#ifdef CONFIG_SCHED_MC
7507 p = sd; 7604 p = sd;
7508 sd = &per_cpu(core_domains, i); 7605 sd = &per_cpu(core_domains, i).sd;
7509 SD_INIT(sd, MC); 7606 SD_INIT(sd, MC);
7510 set_domain_attribute(sd, attr); 7607 set_domain_attribute(sd, attr);
7511 sd->span = cpu_coregroup_map(i); 7608 *sched_domain_span(sd) = cpu_coregroup_map(i);
7512 cpus_and(sd->span, sd->span, *cpu_map); 7609 cpumask_and(sched_domain_span(sd),
7610 sched_domain_span(sd), cpu_map);
7513 sd->parent = p; 7611 sd->parent = p;
7514 p->child = sd; 7612 p->child = sd;
7515 cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); 7613 cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7517,11 +7615,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7517 7615
7518#ifdef CONFIG_SCHED_SMT 7616#ifdef CONFIG_SCHED_SMT
7519 p = sd; 7617 p = sd;
7520 sd = &per_cpu(cpu_domains, i); 7618 sd = &per_cpu(cpu_domains, i).sd;
7521 SD_INIT(sd, SIBLING); 7619 SD_INIT(sd, SIBLING);
7522 set_domain_attribute(sd, attr); 7620 set_domain_attribute(sd, attr);
7523 sd->span = per_cpu(cpu_sibling_map, i); 7621 cpumask_and(sched_domain_span(sd),
7524 cpus_and(sd->span, sd->span, *cpu_map); 7622 &per_cpu(cpu_sibling_map, i), cpu_map);
7525 sd->parent = p; 7623 sd->parent = p;
7526 p->child = sd; 7624 p->child = sd;
7527 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); 7625 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7530,13 +7628,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7530 7628
7531#ifdef CONFIG_SCHED_SMT 7629#ifdef CONFIG_SCHED_SMT
7532 /* Set up CPU (sibling) groups */ 7630 /* Set up CPU (sibling) groups */
7533 for_each_cpu_mask_nr(i, *cpu_map) { 7631 for_each_cpu(i, cpu_map) {
7534 SCHED_CPUMASK_VAR(this_sibling_map, allmasks); 7632 cpumask_and(this_sibling_map,
7535 SCHED_CPUMASK_VAR(send_covered, allmasks); 7633 &per_cpu(cpu_sibling_map, i), cpu_map);
7536 7634 if (i != cpumask_first(this_sibling_map))
7537 *this_sibling_map = per_cpu(cpu_sibling_map, i);
7538 cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
7539 if (i != first_cpu(*this_sibling_map))
7540 continue; 7635 continue;
7541 7636
7542 init_sched_build_groups(this_sibling_map, cpu_map, 7637 init_sched_build_groups(this_sibling_map, cpu_map,
@@ -7547,13 +7642,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7547 7642
7548#ifdef CONFIG_SCHED_MC 7643#ifdef CONFIG_SCHED_MC
7549 /* Set up multi-core groups */ 7644 /* Set up multi-core groups */
7550 for_each_cpu_mask_nr(i, *cpu_map) { 7645 for_each_cpu(i, cpu_map) {
7551 SCHED_CPUMASK_VAR(this_core_map, allmasks); 7646 /* FIXME: Use cpu_coregroup_mask */
7552 SCHED_CPUMASK_VAR(send_covered, allmasks);
7553
7554 *this_core_map = cpu_coregroup_map(i); 7647 *this_core_map = cpu_coregroup_map(i);
7555 cpus_and(*this_core_map, *this_core_map, *cpu_map); 7648 cpus_and(*this_core_map, *this_core_map, *cpu_map);
7556 if (i != first_cpu(*this_core_map)) 7649 if (i != cpumask_first(this_core_map))
7557 continue; 7650 continue;
7558 7651
7559 init_sched_build_groups(this_core_map, cpu_map, 7652 init_sched_build_groups(this_core_map, cpu_map,
@@ -7564,12 +7657,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7564 7657
7565 /* Set up physical groups */ 7658 /* Set up physical groups */
7566 for (i = 0; i < nr_node_ids; i++) { 7659 for (i = 0; i < nr_node_ids; i++) {
7567 SCHED_CPUMASK_VAR(nodemask, allmasks); 7660 /* FIXME: Use cpumask_of_node */
7568 SCHED_CPUMASK_VAR(send_covered, allmasks);
7569
7570 *nodemask = node_to_cpumask(i); 7661 *nodemask = node_to_cpumask(i);
7571 cpus_and(*nodemask, *nodemask, *cpu_map); 7662 cpus_and(*nodemask, *nodemask, *cpu_map);
7572 if (cpus_empty(*nodemask)) 7663 if (cpumask_empty(nodemask))
7573 continue; 7664 continue;
7574 7665
7575 init_sched_build_groups(nodemask, cpu_map, 7666 init_sched_build_groups(nodemask, cpu_map,
@@ -7580,8 +7671,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7580#ifdef CONFIG_NUMA 7671#ifdef CONFIG_NUMA
7581 /* Set up node groups */ 7672 /* Set up node groups */
7582 if (sd_allnodes) { 7673 if (sd_allnodes) {
7583 SCHED_CPUMASK_VAR(send_covered, allmasks);
7584
7585 init_sched_build_groups(cpu_map, cpu_map, 7674 init_sched_build_groups(cpu_map, cpu_map,
7586 &cpu_to_allnodes_group, 7675 &cpu_to_allnodes_group,
7587 send_covered, tmpmask); 7676 send_covered, tmpmask);
@@ -7590,58 +7679,58 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7590 for (i = 0; i < nr_node_ids; i++) { 7679 for (i = 0; i < nr_node_ids; i++) {
7591 /* Set up node groups */ 7680 /* Set up node groups */
7592 struct sched_group *sg, *prev; 7681 struct sched_group *sg, *prev;
7593 SCHED_CPUMASK_VAR(nodemask, allmasks);
7594 SCHED_CPUMASK_VAR(domainspan, allmasks);
7595 SCHED_CPUMASK_VAR(covered, allmasks);
7596 int j; 7682 int j;
7597 7683
7684 /* FIXME: Use cpumask_of_node */
7598 *nodemask = node_to_cpumask(i); 7685 *nodemask = node_to_cpumask(i);
7599 cpus_clear(*covered); 7686 cpumask_clear(covered);
7600 7687
7601 cpus_and(*nodemask, *nodemask, *cpu_map); 7688 cpus_and(*nodemask, *nodemask, *cpu_map);
7602 if (cpus_empty(*nodemask)) { 7689 if (cpumask_empty(nodemask)) {
7603 sched_group_nodes[i] = NULL; 7690 sched_group_nodes[i] = NULL;
7604 continue; 7691 continue;
7605 } 7692 }
7606 7693
7607 sched_domain_node_span(i, domainspan); 7694 sched_domain_node_span(i, domainspan);
7608 cpus_and(*domainspan, *domainspan, *cpu_map); 7695 cpumask_and(domainspan, domainspan, cpu_map);
7609 7696
7610 sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); 7697 sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
7698 GFP_KERNEL, i);
7611 if (!sg) { 7699 if (!sg) {
7612 printk(KERN_WARNING "Can not alloc domain group for " 7700 printk(KERN_WARNING "Can not alloc domain group for "
7613 "node %d\n", i); 7701 "node %d\n", i);
7614 goto error; 7702 goto error;
7615 } 7703 }
7616 sched_group_nodes[i] = sg; 7704 sched_group_nodes[i] = sg;
7617 for_each_cpu_mask_nr(j, *nodemask) { 7705 for_each_cpu(j, nodemask) {
7618 struct sched_domain *sd; 7706 struct sched_domain *sd;
7619 7707
7620 sd = &per_cpu(node_domains, j); 7708 sd = &per_cpu(node_domains, j);
7621 sd->groups = sg; 7709 sd->groups = sg;
7622 } 7710 }
7623 sg->__cpu_power = 0; 7711 sg->__cpu_power = 0;
7624 sg->cpumask = *nodemask; 7712 cpumask_copy(sched_group_cpus(sg), nodemask);
7625 sg->next = sg; 7713 sg->next = sg;
7626 cpus_or(*covered, *covered, *nodemask); 7714 cpumask_or(covered, covered, nodemask);
7627 prev = sg; 7715 prev = sg;
7628 7716
7629 for (j = 0; j < nr_node_ids; j++) { 7717 for (j = 0; j < nr_node_ids; j++) {
7630 SCHED_CPUMASK_VAR(notcovered, allmasks);
7631 int n = (i + j) % nr_node_ids; 7718 int n = (i + j) % nr_node_ids;
7719 /* FIXME: Use cpumask_of_node */
7632 node_to_cpumask_ptr(pnodemask, n); 7720 node_to_cpumask_ptr(pnodemask, n);
7633 7721
7634 cpus_complement(*notcovered, *covered); 7722 cpumask_complement(notcovered, covered);
7635 cpus_and(*tmpmask, *notcovered, *cpu_map); 7723 cpumask_and(tmpmask, notcovered, cpu_map);
7636 cpus_and(*tmpmask, *tmpmask, *domainspan); 7724 cpumask_and(tmpmask, tmpmask, domainspan);
7637 if (cpus_empty(*tmpmask)) 7725 if (cpumask_empty(tmpmask))
7638 break; 7726 break;
7639 7727
7640 cpus_and(*tmpmask, *tmpmask, *pnodemask); 7728 cpumask_and(tmpmask, tmpmask, pnodemask);
7641 if (cpus_empty(*tmpmask)) 7729 if (cpumask_empty(tmpmask))
7642 continue; 7730 continue;
7643 7731
7644 sg = kmalloc_node(sizeof(struct sched_group), 7732 sg = kmalloc_node(sizeof(struct sched_group) +
7733 cpumask_size(),
7645 GFP_KERNEL, i); 7734 GFP_KERNEL, i);
7646 if (!sg) { 7735 if (!sg) {
7647 printk(KERN_WARNING 7736 printk(KERN_WARNING
@@ -7649,9 +7738,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7649 goto error; 7738 goto error;
7650 } 7739 }
7651 sg->__cpu_power = 0; 7740 sg->__cpu_power = 0;
7652 sg->cpumask = *tmpmask; 7741 cpumask_copy(sched_group_cpus(sg), tmpmask);
7653 sg->next = prev->next; 7742 sg->next = prev->next;
7654 cpus_or(*covered, *covered, *tmpmask); 7743 cpumask_or(covered, covered, tmpmask);
7655 prev->next = sg; 7744 prev->next = sg;
7656 prev = sg; 7745 prev = sg;
7657 } 7746 }
@@ -7660,22 +7749,22 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7660 7749
7661 /* Calculate CPU power for physical packages and nodes */ 7750 /* Calculate CPU power for physical packages and nodes */
7662#ifdef CONFIG_SCHED_SMT 7751#ifdef CONFIG_SCHED_SMT
7663 for_each_cpu_mask_nr(i, *cpu_map) { 7752 for_each_cpu(i, cpu_map) {
7664 struct sched_domain *sd = &per_cpu(cpu_domains, i); 7753 struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
7665 7754
7666 init_sched_groups_power(i, sd); 7755 init_sched_groups_power(i, sd);
7667 } 7756 }
7668#endif 7757#endif
7669#ifdef CONFIG_SCHED_MC 7758#ifdef CONFIG_SCHED_MC
7670 for_each_cpu_mask_nr(i, *cpu_map) { 7759 for_each_cpu(i, cpu_map) {
7671 struct sched_domain *sd = &per_cpu(core_domains, i); 7760 struct sched_domain *sd = &per_cpu(core_domains, i).sd;
7672 7761
7673 init_sched_groups_power(i, sd); 7762 init_sched_groups_power(i, sd);
7674 } 7763 }
7675#endif 7764#endif
7676 7765
7677 for_each_cpu_mask_nr(i, *cpu_map) { 7766 for_each_cpu(i, cpu_map) {
7678 struct sched_domain *sd = &per_cpu(phys_domains, i); 7767 struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
7679 7768
7680 init_sched_groups_power(i, sd); 7769 init_sched_groups_power(i, sd);
7681 } 7770 }
@@ -7687,53 +7776,78 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7687 if (sd_allnodes) { 7776 if (sd_allnodes) {
7688 struct sched_group *sg; 7777 struct sched_group *sg;
7689 7778
7690 cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg, 7779 cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
7691 tmpmask); 7780 tmpmask);
7692 init_numa_sched_groups_power(sg); 7781 init_numa_sched_groups_power(sg);
7693 } 7782 }
7694#endif 7783#endif
7695 7784
7696 /* Attach the domains */ 7785 /* Attach the domains */
7697 for_each_cpu_mask_nr(i, *cpu_map) { 7786 for_each_cpu(i, cpu_map) {
7698 struct sched_domain *sd; 7787 struct sched_domain *sd;
7699#ifdef CONFIG_SCHED_SMT 7788#ifdef CONFIG_SCHED_SMT
7700 sd = &per_cpu(cpu_domains, i); 7789 sd = &per_cpu(cpu_domains, i).sd;
7701#elif defined(CONFIG_SCHED_MC) 7790#elif defined(CONFIG_SCHED_MC)
7702 sd = &per_cpu(core_domains, i); 7791 sd = &per_cpu(core_domains, i).sd;
7703#else 7792#else
7704 sd = &per_cpu(phys_domains, i); 7793 sd = &per_cpu(phys_domains, i).sd;
7705#endif 7794#endif
7706 cpu_attach_domain(sd, rd, i); 7795 cpu_attach_domain(sd, rd, i);
7707 } 7796 }
7708 7797
7709 sched_cpumask_free(allmasks); 7798 err = 0;
7710 return 0; 7799
7800free_tmpmask:
7801 free_cpumask_var(tmpmask);
7802free_send_covered:
7803 free_cpumask_var(send_covered);
7804free_this_core_map:
7805 free_cpumask_var(this_core_map);
7806free_this_sibling_map:
7807 free_cpumask_var(this_sibling_map);
7808free_nodemask:
7809 free_cpumask_var(nodemask);
7810free_notcovered:
7811#ifdef CONFIG_NUMA
7812 free_cpumask_var(notcovered);
7813free_covered:
7814 free_cpumask_var(covered);
7815free_domainspan:
7816 free_cpumask_var(domainspan);
7817out:
7818#endif
7819 return err;
7820
7821free_sched_groups:
7822#ifdef CONFIG_NUMA
7823 kfree(sched_group_nodes);
7824#endif
7825 goto free_tmpmask;
7711 7826
7712#ifdef CONFIG_NUMA 7827#ifdef CONFIG_NUMA
7713error: 7828error:
7714 free_sched_groups(cpu_map, tmpmask); 7829 free_sched_groups(cpu_map, tmpmask);
7715 sched_cpumask_free(allmasks); 7830 free_rootdomain(rd);
7716 kfree(rd); 7831 goto free_tmpmask;
7717 return -ENOMEM;
7718#endif 7832#endif
7719} 7833}
7720 7834
7721static int build_sched_domains(const cpumask_t *cpu_map) 7835static int build_sched_domains(const struct cpumask *cpu_map)
7722{ 7836{
7723 return __build_sched_domains(cpu_map, NULL); 7837 return __build_sched_domains(cpu_map, NULL);
7724} 7838}
7725 7839
7726static cpumask_t *doms_cur; /* current sched domains */ 7840static struct cpumask *doms_cur; /* current sched domains */
7727static int ndoms_cur; /* number of sched domains in 'doms_cur' */ 7841static int ndoms_cur; /* number of sched domains in 'doms_cur' */
7728static struct sched_domain_attr *dattr_cur; 7842static struct sched_domain_attr *dattr_cur;
7729 /* attribues of custom domains in 'doms_cur' */ 7843 /* attribues of custom domains in 'doms_cur' */
7730 7844
7731/* 7845/*
7732 * Special case: If a kmalloc of a doms_cur partition (array of 7846 * Special case: If a kmalloc of a doms_cur partition (array of
7733 * cpumask_t) fails, then fallback to a single sched domain, 7847 * cpumask) fails, then fallback to a single sched domain,
7734 * as determined by the single cpumask_t fallback_doms. 7848 * as determined by the single cpumask fallback_doms.
7735 */ 7849 */
7736static cpumask_t fallback_doms; 7850static cpumask_var_t fallback_doms;
7737 7851
7738/* 7852/*
7739 * arch_update_cpu_topology lets virtualized architectures update the 7853 * arch_update_cpu_topology lets virtualized architectures update the
@@ -7750,16 +7864,16 @@ int __attribute__((weak)) arch_update_cpu_topology(void)
7750 * For now this just excludes isolated cpus, but could be used to 7864 * For now this just excludes isolated cpus, but could be used to
7751 * exclude other special cases in the future. 7865 * exclude other special cases in the future.
7752 */ 7866 */
7753static int arch_init_sched_domains(const cpumask_t *cpu_map) 7867static int arch_init_sched_domains(const struct cpumask *cpu_map)
7754{ 7868{
7755 int err; 7869 int err;
7756 7870
7757 arch_update_cpu_topology(); 7871 arch_update_cpu_topology();
7758 ndoms_cur = 1; 7872 ndoms_cur = 1;
7759 doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 7873 doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
7760 if (!doms_cur) 7874 if (!doms_cur)
7761 doms_cur = &fallback_doms; 7875 doms_cur = fallback_doms;
7762 cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); 7876 cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
7763 dattr_cur = NULL; 7877 dattr_cur = NULL;
7764 err = build_sched_domains(doms_cur); 7878 err = build_sched_domains(doms_cur);
7765 register_sched_domain_sysctl(); 7879 register_sched_domain_sysctl();
@@ -7767,8 +7881,8 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
7767 return err; 7881 return err;
7768} 7882}
7769 7883
7770static void arch_destroy_sched_domains(const cpumask_t *cpu_map, 7884static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
7771 cpumask_t *tmpmask) 7885 struct cpumask *tmpmask)
7772{ 7886{
7773 free_sched_groups(cpu_map, tmpmask); 7887 free_sched_groups(cpu_map, tmpmask);
7774} 7888}
@@ -7777,15 +7891,16 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
7777 * Detach sched domains from a group of cpus specified in cpu_map 7891 * Detach sched domains from a group of cpus specified in cpu_map
7778 * These cpus will now be attached to the NULL domain 7892 * These cpus will now be attached to the NULL domain
7779 */ 7893 */
7780static void detach_destroy_domains(const cpumask_t *cpu_map) 7894static void detach_destroy_domains(const struct cpumask *cpu_map)
7781{ 7895{
7782 cpumask_t tmpmask; 7896 /* Save because hotplug lock held. */
7897 static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
7783 int i; 7898 int i;
7784 7899
7785 for_each_cpu_mask_nr(i, *cpu_map) 7900 for_each_cpu(i, cpu_map)
7786 cpu_attach_domain(NULL, &def_root_domain, i); 7901 cpu_attach_domain(NULL, &def_root_domain, i);
7787 synchronize_sched(); 7902 synchronize_sched();
7788 arch_destroy_sched_domains(cpu_map, &tmpmask); 7903 arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
7789} 7904}
7790 7905
7791/* handle null as "default" */ 7906/* handle null as "default" */
@@ -7810,7 +7925,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7810 * doms_new[] to the current sched domain partitioning, doms_cur[]. 7925 * doms_new[] to the current sched domain partitioning, doms_cur[].
7811 * It destroys each deleted domain and builds each new domain. 7926 * It destroys each deleted domain and builds each new domain.
7812 * 7927 *
7813 * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. 7928 * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
7814 * The masks don't intersect (don't overlap.) We should setup one 7929 * The masks don't intersect (don't overlap.) We should setup one
7815 * sched domain for each mask. CPUs not in any of the cpumasks will 7930 * sched domain for each mask. CPUs not in any of the cpumasks will
7816 * not be load balanced. If the same cpumask appears both in the 7931 * not be load balanced. If the same cpumask appears both in the
@@ -7824,13 +7939,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7824 * the single partition 'fallback_doms', it also forces the domains 7939 * the single partition 'fallback_doms', it also forces the domains
7825 * to be rebuilt. 7940 * to be rebuilt.
7826 * 7941 *
7827 * If doms_new == NULL it will be replaced with cpu_online_map. 7942 * If doms_new == NULL it will be replaced with cpu_online_mask.
7828 * ndoms_new == 0 is a special case for destroying existing domains, 7943 * ndoms_new == 0 is a special case for destroying existing domains,
7829 * and it will not create the default domain. 7944 * and it will not create the default domain.
7830 * 7945 *
7831 * Call with hotplug lock held 7946 * Call with hotplug lock held
7832 */ 7947 */
7833void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, 7948/* FIXME: Change to struct cpumask *doms_new[] */
7949void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
7834 struct sched_domain_attr *dattr_new) 7950 struct sched_domain_attr *dattr_new)
7835{ 7951{
7836 int i, j, n; 7952 int i, j, n;
@@ -7849,7 +7965,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7849 /* Destroy deleted domains */ 7965 /* Destroy deleted domains */
7850 for (i = 0; i < ndoms_cur; i++) { 7966 for (i = 0; i < ndoms_cur; i++) {
7851 for (j = 0; j < n && !new_topology; j++) { 7967 for (j = 0; j < n && !new_topology; j++) {
7852 if (cpus_equal(doms_cur[i], doms_new[j]) 7968 if (cpumask_equal(&doms_cur[i], &doms_new[j])
7853 && dattrs_equal(dattr_cur, i, dattr_new, j)) 7969 && dattrs_equal(dattr_cur, i, dattr_new, j))
7854 goto match1; 7970 goto match1;
7855 } 7971 }
@@ -7861,15 +7977,15 @@ match1:
7861 7977
7862 if (doms_new == NULL) { 7978 if (doms_new == NULL) {
7863 ndoms_cur = 0; 7979 ndoms_cur = 0;
7864 doms_new = &fallback_doms; 7980 doms_new = fallback_doms;
7865 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); 7981 cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
7866 WARN_ON_ONCE(dattr_new); 7982 WARN_ON_ONCE(dattr_new);
7867 } 7983 }
7868 7984
7869 /* Build new domains */ 7985 /* Build new domains */
7870 for (i = 0; i < ndoms_new; i++) { 7986 for (i = 0; i < ndoms_new; i++) {
7871 for (j = 0; j < ndoms_cur && !new_topology; j++) { 7987 for (j = 0; j < ndoms_cur && !new_topology; j++) {
7872 if (cpus_equal(doms_new[i], doms_cur[j]) 7988 if (cpumask_equal(&doms_new[i], &doms_cur[j])
7873 && dattrs_equal(dattr_new, i, dattr_cur, j)) 7989 && dattrs_equal(dattr_new, i, dattr_cur, j))
7874 goto match2; 7990 goto match2;
7875 } 7991 }
@@ -7881,7 +7997,7 @@ match2:
7881 } 7997 }
7882 7998
7883 /* Remember the new sched domains */ 7999 /* Remember the new sched domains */
7884 if (doms_cur != &fallback_doms) 8000 if (doms_cur != fallback_doms)
7885 kfree(doms_cur); 8001 kfree(doms_cur);
7886 kfree(dattr_cur); /* kfree(NULL) is safe */ 8002 kfree(dattr_cur); /* kfree(NULL) is safe */
7887 doms_cur = doms_new; 8003 doms_cur = doms_new;
@@ -7910,14 +8026,25 @@ int arch_reinit_sched_domains(void)
7910static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) 8026static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
7911{ 8027{
7912 int ret; 8028 int ret;
8029 unsigned int level = 0;
7913 8030
7914 if (buf[0] != '0' && buf[0] != '1') 8031 if (sscanf(buf, "%u", &level) != 1)
8032 return -EINVAL;
8033
8034 /*
8035 * level is always be positive so don't check for
8036 * level < POWERSAVINGS_BALANCE_NONE which is 0
8037 * What happens on 0 or 1 byte write,
8038 * need to check for count as well?
8039 */
8040
8041 if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
7915 return -EINVAL; 8042 return -EINVAL;
7916 8043
7917 if (smt) 8044 if (smt)
7918 sched_smt_power_savings = (buf[0] == '1'); 8045 sched_smt_power_savings = level;
7919 else 8046 else
7920 sched_mc_power_savings = (buf[0] == '1'); 8047 sched_mc_power_savings = level;
7921 8048
7922 ret = arch_reinit_sched_domains(); 8049 ret = arch_reinit_sched_domains();
7923 8050
@@ -8021,7 +8148,9 @@ static int update_runtime(struct notifier_block *nfb,
8021 8148
8022void __init sched_init_smp(void) 8149void __init sched_init_smp(void)
8023{ 8150{
8024 cpumask_t non_isolated_cpus; 8151 cpumask_var_t non_isolated_cpus;
8152
8153 alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
8025 8154
8026#if defined(CONFIG_NUMA) 8155#if defined(CONFIG_NUMA)
8027 sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), 8156 sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
@@ -8030,10 +8159,10 @@ void __init sched_init_smp(void)
8030#endif 8159#endif
8031 get_online_cpus(); 8160 get_online_cpus();
8032 mutex_lock(&sched_domains_mutex); 8161 mutex_lock(&sched_domains_mutex);
8033 arch_init_sched_domains(&cpu_online_map); 8162 arch_init_sched_domains(cpu_online_mask);
8034 cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); 8163 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
8035 if (cpus_empty(non_isolated_cpus)) 8164 if (cpumask_empty(non_isolated_cpus))
8036 cpu_set(smp_processor_id(), non_isolated_cpus); 8165 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
8037 mutex_unlock(&sched_domains_mutex); 8166 mutex_unlock(&sched_domains_mutex);
8038 put_online_cpus(); 8167 put_online_cpus();
8039 8168
@@ -8048,9 +8177,13 @@ void __init sched_init_smp(void)
8048 init_hrtick(); 8177 init_hrtick();
8049 8178
8050 /* Move init over to a non-isolated CPU */ 8179 /* Move init over to a non-isolated CPU */
8051 if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) 8180 if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
8052 BUG(); 8181 BUG();
8053 sched_init_granularity(); 8182 sched_init_granularity();
8183 free_cpumask_var(non_isolated_cpus);
8184
8185 alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
8186 init_sched_rt_class();
8054} 8187}
8055#else 8188#else
8056void __init sched_init_smp(void) 8189void __init sched_init_smp(void)
@@ -8365,6 +8498,15 @@ void __init sched_init(void)
8365 */ 8498 */
8366 current->sched_class = &fair_sched_class; 8499 current->sched_class = &fair_sched_class;
8367 8500
8501 /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
8502 alloc_bootmem_cpumask_var(&nohz_cpu_mask);
8503#ifdef CONFIG_SMP
8504#ifdef CONFIG_NO_HZ
8505 alloc_bootmem_cpumask_var(&nohz.cpu_mask);
8506#endif
8507 alloc_bootmem_cpumask_var(&cpu_isolated_map);
8508#endif /* SMP */
8509
8368 scheduler_running = 1; 8510 scheduler_running = 1;
8369} 8511}
8370 8512
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 52154fefab7e..018b7be1db2e 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -67,24 +67,21 @@ static int convert_prio(int prio)
67 * Returns: (int)bool - CPUs were found 67 * Returns: (int)bool - CPUs were found
68 */ 68 */
69int cpupri_find(struct cpupri *cp, struct task_struct *p, 69int cpupri_find(struct cpupri *cp, struct task_struct *p,
70 cpumask_t *lowest_mask) 70 struct cpumask *lowest_mask)
71{ 71{
72 int idx = 0; 72 int idx = 0;
73 int task_pri = convert_prio(p->prio); 73 int task_pri = convert_prio(p->prio);
74 74
75 for_each_cpupri_active(cp->pri_active, idx) { 75 for_each_cpupri_active(cp->pri_active, idx) {
76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; 76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
77 cpumask_t mask;
78 77
79 if (idx >= task_pri) 78 if (idx >= task_pri)
80 break; 79 break;
81 80
82 cpus_and(mask, p->cpus_allowed, vec->mask); 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
83
84 if (cpus_empty(mask))
85 continue; 82 continue;
86 83
87 *lowest_mask = mask; 84 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
88 return 1; 85 return 1;
89 } 86 }
90 87
@@ -126,7 +123,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
126 vec->count--; 123 vec->count--;
127 if (!vec->count) 124 if (!vec->count)
128 clear_bit(oldpri, cp->pri_active); 125 clear_bit(oldpri, cp->pri_active);
129 cpu_clear(cpu, vec->mask); 126 cpumask_clear_cpu(cpu, vec->mask);
130 127
131 spin_unlock_irqrestore(&vec->lock, flags); 128 spin_unlock_irqrestore(&vec->lock, flags);
132 } 129 }
@@ -136,7 +133,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
136 133
137 spin_lock_irqsave(&vec->lock, flags); 134 spin_lock_irqsave(&vec->lock, flags);
138 135
139 cpu_set(cpu, vec->mask); 136 cpumask_set_cpu(cpu, vec->mask);
140 vec->count++; 137 vec->count++;
141 if (vec->count == 1) 138 if (vec->count == 1)
142 set_bit(newpri, cp->pri_active); 139 set_bit(newpri, cp->pri_active);
@@ -150,10 +147,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
150/** 147/**
151 * cpupri_init - initialize the cpupri structure 148 * cpupri_init - initialize the cpupri structure
152 * @cp: The cpupri context 149 * @cp: The cpupri context
150 * @bootmem: true if allocations need to use bootmem
153 * 151 *
154 * Returns: (void) 152 * Returns: -ENOMEM if memory fails.
155 */ 153 */
156void cpupri_init(struct cpupri *cp) 154int cpupri_init(struct cpupri *cp, bool bootmem)
157{ 155{
158 int i; 156 int i;
159 157
@@ -164,11 +162,30 @@ void cpupri_init(struct cpupri *cp)
164 162
165 spin_lock_init(&vec->lock); 163 spin_lock_init(&vec->lock);
166 vec->count = 0; 164 vec->count = 0;
167 cpus_clear(vec->mask); 165 if (bootmem)
166 alloc_bootmem_cpumask_var(&vec->mask);
167 else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
168 goto cleanup;
168 } 169 }
169 170
170 for_each_possible_cpu(i) 171 for_each_possible_cpu(i)
171 cp->cpu_to_pri[i] = CPUPRI_INVALID; 172 cp->cpu_to_pri[i] = CPUPRI_INVALID;
173 return 0;
174
175cleanup:
176 for (i--; i >= 0; i--)
177 free_cpumask_var(cp->pri_to_cpu[i].mask);
178 return -ENOMEM;
172} 179}
173 180
181/**
182 * cpupri_cleanup - clean up the cpupri structure
183 * @cp: The cpupri context
184 */
185void cpupri_cleanup(struct cpupri *cp)
186{
187 int i;
174 188
189 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
190 free_cpumask_var(cp->pri_to_cpu[i].mask);
191}
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index f25811b0f931..642a94ef8a0a 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -14,7 +14,7 @@
14struct cpupri_vec { 14struct cpupri_vec {
15 spinlock_t lock; 15 spinlock_t lock;
16 int count; 16 int count;
17 cpumask_t mask; 17 cpumask_var_t mask;
18}; 18};
19 19
20struct cpupri { 20struct cpupri {
@@ -27,7 +27,8 @@ struct cpupri {
27int cpupri_find(struct cpupri *cp, 27int cpupri_find(struct cpupri *cp,
28 struct task_struct *p, cpumask_t *lowest_mask); 28 struct task_struct *p, cpumask_t *lowest_mask);
29void cpupri_set(struct cpupri *cp, int cpu, int pri); 29void cpupri_set(struct cpupri *cp, int cpu, int pri);
30void cpupri_init(struct cpupri *cp); 30int cpupri_init(struct cpupri *cp, bool bootmem);
31void cpupri_cleanup(struct cpupri *cp);
31#else 32#else
32#define cpupri_set(cp, cpu, pri) do { } while (0) 33#define cpupri_set(cp, cpu, pri) do { } while (0)
33#define cpupri_init() do { } while (0) 34#define cpupri_init() do { } while (0)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5ad4440f0fc4..56c0efe902a7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1019,16 +1019,33 @@ static void yield_task_fair(struct rq *rq)
1019 * search starts with cpus closest then further out as needed, 1019 * search starts with cpus closest then further out as needed,
1020 * so we always favor a closer, idle cpu. 1020 * so we always favor a closer, idle cpu.
1021 * Domains may include CPUs that are not usable for migration, 1021 * Domains may include CPUs that are not usable for migration,
1022 * hence we need to mask them out (cpu_active_map) 1022 * hence we need to mask them out (cpu_active_mask)
1023 * 1023 *
1024 * Returns the CPU we should wake onto. 1024 * Returns the CPU we should wake onto.
1025 */ 1025 */
1026#if defined(ARCH_HAS_SCHED_WAKE_IDLE) 1026#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1027static int wake_idle(int cpu, struct task_struct *p) 1027static int wake_idle(int cpu, struct task_struct *p)
1028{ 1028{
1029 cpumask_t tmp;
1030 struct sched_domain *sd; 1029 struct sched_domain *sd;
1031 int i; 1030 int i;
1031 unsigned int chosen_wakeup_cpu;
1032 int this_cpu;
1033
1034 /*
1035 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
1036 * are idle and this is not a kernel thread and this task's affinity
1037 * allows it to be moved to preferred cpu, then just move!
1038 */
1039
1040 this_cpu = smp_processor_id();
1041 chosen_wakeup_cpu =
1042 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
1043
1044 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
1045 idle_cpu(cpu) && idle_cpu(this_cpu) &&
1046 p->mm && !(p->flags & PF_KTHREAD) &&
1047 cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
1048 return chosen_wakeup_cpu;
1032 1049
1033 /* 1050 /*
1034 * If it is idle, then it is the best cpu to run this task. 1051 * If it is idle, then it is the best cpu to run this task.
@@ -1046,10 +1063,9 @@ static int wake_idle(int cpu, struct task_struct *p)
1046 if ((sd->flags & SD_WAKE_IDLE) 1063 if ((sd->flags & SD_WAKE_IDLE)
1047 || ((sd->flags & SD_WAKE_IDLE_FAR) 1064 || ((sd->flags & SD_WAKE_IDLE_FAR)
1048 && !task_hot(p, task_rq(p)->clock, sd))) { 1065 && !task_hot(p, task_rq(p)->clock, sd))) {
1049 cpus_and(tmp, sd->span, p->cpus_allowed); 1066 for_each_cpu_and(i, sched_domain_span(sd),
1050 cpus_and(tmp, tmp, cpu_active_map); 1067 &p->cpus_allowed) {
1051 for_each_cpu_mask_nr(i, tmp) { 1068 if (cpu_active(i) && idle_cpu(i)) {
1052 if (idle_cpu(i)) {
1053 if (i != task_cpu(p)) { 1069 if (i != task_cpu(p)) {
1054 schedstat_inc(p, 1070 schedstat_inc(p,
1055 se.nr_wakeups_idle); 1071 se.nr_wakeups_idle);
@@ -1242,13 +1258,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
1242 * this_cpu and prev_cpu are present in: 1258 * this_cpu and prev_cpu are present in:
1243 */ 1259 */
1244 for_each_domain(this_cpu, sd) { 1260 for_each_domain(this_cpu, sd) {
1245 if (cpu_isset(prev_cpu, sd->span)) { 1261 if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
1246 this_sd = sd; 1262 this_sd = sd;
1247 break; 1263 break;
1248 } 1264 }
1249 } 1265 }
1250 1266
1251 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) 1267 if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
1252 goto out; 1268 goto out;
1253 1269
1254 /* 1270 /*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 51d2af3e6191..833b6d44483c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq)
15 if (!rq->online) 15 if (!rq->online)
16 return; 16 return;
17 17
18 cpu_set(rq->cpu, rq->rd->rto_mask); 18 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
19 /* 19 /*
20 * Make sure the mask is visible before we set 20 * Make sure the mask is visible before we set
21 * the overload count. That is checked to determine 21 * the overload count. That is checked to determine
@@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq)
34 34
35 /* the order here really doesn't matter */ 35 /* the order here really doesn't matter */
36 atomic_dec(&rq->rd->rto_count); 36 atomic_dec(&rq->rd->rto_count);
37 cpu_clear(rq->cpu, rq->rd->rto_mask); 37 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
38} 38}
39 39
40static void update_rt_migration(struct rq *rq) 40static void update_rt_migration(struct rq *rq)
@@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
139} 139}
140 140
141#ifdef CONFIG_SMP 141#ifdef CONFIG_SMP
142static inline cpumask_t sched_rt_period_mask(void) 142static inline const struct cpumask *sched_rt_period_mask(void)
143{ 143{
144 return cpu_rq(smp_processor_id())->rd->span; 144 return cpu_rq(smp_processor_id())->rd->span;
145} 145}
146#else 146#else
147static inline cpumask_t sched_rt_period_mask(void) 147static inline const struct cpumask *sched_rt_period_mask(void)
148{ 148{
149 return cpu_online_map; 149 return cpu_online_mask;
150} 150}
151#endif 151#endif
152 152
@@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
212 return rt_rq->rt_throttled; 212 return rt_rq->rt_throttled;
213} 213}
214 214
215static inline cpumask_t sched_rt_period_mask(void) 215static inline const struct cpumask *sched_rt_period_mask(void)
216{ 216{
217 return cpu_online_map; 217 return cpu_online_mask;
218} 218}
219 219
220static inline 220static inline
@@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
241 int i, weight, more = 0; 241 int i, weight, more = 0;
242 u64 rt_period; 242 u64 rt_period;
243 243
244 weight = cpus_weight(rd->span); 244 weight = cpumask_weight(rd->span);
245 245
246 spin_lock(&rt_b->rt_runtime_lock); 246 spin_lock(&rt_b->rt_runtime_lock);
247 rt_period = ktime_to_ns(rt_b->rt_period); 247 rt_period = ktime_to_ns(rt_b->rt_period);
248 for_each_cpu_mask_nr(i, rd->span) { 248 for_each_cpu(i, rd->span) {
249 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 249 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
250 s64 diff; 250 s64 diff;
251 251
@@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq)
324 /* 324 /*
325 * Greedy reclaim, take back as much as we can. 325 * Greedy reclaim, take back as much as we can.
326 */ 326 */
327 for_each_cpu_mask(i, rd->span) { 327 for_each_cpu(i, rd->span) {
328 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 328 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
329 s64 diff; 329 s64 diff;
330 330
@@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq)
429static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 429static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
430{ 430{
431 int i, idle = 1; 431 int i, idle = 1;
432 cpumask_t span; 432 const struct cpumask *span;
433 433
434 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 434 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
435 return 1; 435 return 1;
436 436
437 span = sched_rt_period_mask(); 437 span = sched_rt_period_mask();
438 for_each_cpu_mask(i, span) { 438 for_each_cpu(i, span) {
439 int enqueue = 0; 439 int enqueue = 0;
440 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 440 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
441 struct rq *rq = rq_of_rt_rq(rt_rq); 441 struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -805,17 +805,20 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
805 805
806static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 806static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
807{ 807{
808 cpumask_t mask; 808 cpumask_var_t mask;
809 809
810 if (rq->curr->rt.nr_cpus_allowed == 1) 810 if (rq->curr->rt.nr_cpus_allowed == 1)
811 return; 811 return;
812 812
813 if (p->rt.nr_cpus_allowed != 1 813 if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
814 && cpupri_find(&rq->rd->cpupri, p, &mask))
815 return; 814 return;
816 815
817 if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) 816 if (p->rt.nr_cpus_allowed != 1
818 return; 817 && cpupri_find(&rq->rd->cpupri, p, mask))
818 goto free;
819
820 if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
821 goto free;
819 822
820 /* 823 /*
821 * There appears to be other cpus that can accept 824 * There appears to be other cpus that can accept
@@ -824,6 +827,8 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
824 */ 827 */
825 requeue_task_rt(rq, p, 1); 828 requeue_task_rt(rq, p, 1);
826 resched_task(rq->curr); 829 resched_task(rq->curr);
830free:
831 free_cpumask_var(mask);
827} 832}
828 833
829#endif /* CONFIG_SMP */ 834#endif /* CONFIG_SMP */
@@ -914,7 +919,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
914static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 919static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
915{ 920{
916 if (!task_running(rq, p) && 921 if (!task_running(rq, p) &&
917 (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) && 922 (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
918 (p->rt.nr_cpus_allowed > 1)) 923 (p->rt.nr_cpus_allowed > 1))
919 return 1; 924 return 1;
920 return 0; 925 return 0;
@@ -953,7 +958,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
953 return next; 958 return next;
954} 959}
955 960
956static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); 961static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
957 962
958static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) 963static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
959{ 964{
@@ -973,7 +978,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
973static int find_lowest_rq(struct task_struct *task) 978static int find_lowest_rq(struct task_struct *task)
974{ 979{
975 struct sched_domain *sd; 980 struct sched_domain *sd;
976 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); 981 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
977 int this_cpu = smp_processor_id(); 982 int this_cpu = smp_processor_id();
978 int cpu = task_cpu(task); 983 int cpu = task_cpu(task);
979 984
@@ -988,7 +993,7 @@ static int find_lowest_rq(struct task_struct *task)
988 * I guess we might want to change cpupri_find() to ignore those 993 * I guess we might want to change cpupri_find() to ignore those
989 * in the first place. 994 * in the first place.
990 */ 995 */
991 cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); 996 cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
992 997
993 /* 998 /*
994 * At this point we have built a mask of cpus representing the 999 * At this point we have built a mask of cpus representing the
@@ -998,7 +1003,7 @@ static int find_lowest_rq(struct task_struct *task)
998 * We prioritize the last cpu that the task executed on since 1003 * We prioritize the last cpu that the task executed on since
999 * it is most likely cache-hot in that location. 1004 * it is most likely cache-hot in that location.
1000 */ 1005 */
1001 if (cpu_isset(cpu, *lowest_mask)) 1006 if (cpumask_test_cpu(cpu, lowest_mask))
1002 return cpu; 1007 return cpu;
1003 1008
1004 /* 1009 /*
@@ -1013,7 +1018,8 @@ static int find_lowest_rq(struct task_struct *task)
1013 cpumask_t domain_mask; 1018 cpumask_t domain_mask;
1014 int best_cpu; 1019 int best_cpu;
1015 1020
1016 cpus_and(domain_mask, sd->span, *lowest_mask); 1021 cpumask_and(&domain_mask, sched_domain_span(sd),
1022 lowest_mask);
1017 1023
1018 best_cpu = pick_optimal_cpu(this_cpu, 1024 best_cpu = pick_optimal_cpu(this_cpu,
1019 &domain_mask); 1025 &domain_mask);
@@ -1054,8 +1060,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1054 * Also make sure that it wasn't scheduled on its rq. 1060 * Also make sure that it wasn't scheduled on its rq.
1055 */ 1061 */
1056 if (unlikely(task_rq(task) != rq || 1062 if (unlikely(task_rq(task) != rq ||
1057 !cpu_isset(lowest_rq->cpu, 1063 !cpumask_test_cpu(lowest_rq->cpu,
1058 task->cpus_allowed) || 1064 &task->cpus_allowed) ||
1059 task_running(rq, task) || 1065 task_running(rq, task) ||
1060 !task->se.on_rq)) { 1066 !task->se.on_rq)) {
1061 1067
@@ -1176,7 +1182,7 @@ static int pull_rt_task(struct rq *this_rq)
1176 1182
1177 next = pick_next_task_rt(this_rq); 1183 next = pick_next_task_rt(this_rq);
1178 1184
1179 for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { 1185 for_each_cpu(cpu, this_rq->rd->rto_mask) {
1180 if (this_cpu == cpu) 1186 if (this_cpu == cpu)
1181 continue; 1187 continue;
1182 1188
@@ -1305,9 +1311,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
1305} 1311}
1306 1312
1307static void set_cpus_allowed_rt(struct task_struct *p, 1313static void set_cpus_allowed_rt(struct task_struct *p,
1308 const cpumask_t *new_mask) 1314 const struct cpumask *new_mask)
1309{ 1315{
1310 int weight = cpus_weight(*new_mask); 1316 int weight = cpumask_weight(new_mask);
1311 1317
1312 BUG_ON(!rt_task(p)); 1318 BUG_ON(!rt_task(p));
1313 1319
@@ -1328,7 +1334,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1328 update_rt_migration(rq); 1334 update_rt_migration(rq);
1329 } 1335 }
1330 1336
1331 p->cpus_allowed = *new_mask; 1337 cpumask_copy(&p->cpus_allowed, new_mask);
1332 p->rt.nr_cpus_allowed = weight; 1338 p->rt.nr_cpus_allowed = weight;
1333} 1339}
1334 1340
@@ -1371,6 +1377,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
1371 if (!rq->rt.rt_nr_running) 1377 if (!rq->rt.rt_nr_running)
1372 pull_rt_task(rq); 1378 pull_rt_task(rq);
1373} 1379}
1380
1381static inline void init_sched_rt_class(void)
1382{
1383 unsigned int i;
1384
1385 for_each_possible_cpu(i)
1386 alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
1387}
1374#endif /* CONFIG_SMP */ 1388#endif /* CONFIG_SMP */
1375 1389
1376/* 1390/*
@@ -1541,3 +1555,4 @@ static void print_rt_stats(struct seq_file *m, int cpu)
1541 rcu_read_unlock(); 1555 rcu_read_unlock();
1542} 1556}
1543#endif /* CONFIG_SCHED_DEBUG */ 1557#endif /* CONFIG_SCHED_DEBUG */
1558
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 3b01098164c8..f2773b5d1226 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -42,7 +42,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
42 for_each_domain(cpu, sd) { 42 for_each_domain(cpu, sd) {
43 enum cpu_idle_type itype; 43 enum cpu_idle_type itype;
44 44
45 cpumask_scnprintf(mask_str, mask_len, sd->span); 45 cpumask_scnprintf(mask_str, mask_len,
46 sched_domain_span(sd));
46 seq_printf(seq, "domain%d %s", dcount++, mask_str); 47 seq_printf(seq, "domain%d %s", dcount++, mask_str);
47 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; 48 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
48 itype++) { 49 itype++) {
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 466e75ce271a..670c1eca47ec 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -784,3 +784,23 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
784} 784}
785EXPORT_SYMBOL(on_each_cpu); 785EXPORT_SYMBOL(on_each_cpu);
786#endif 786#endif
787
788/*
789 * [ These __weak aliases are kept in a separate compilation unit, so that
790 * GCC does not inline them incorrectly. ]
791 */
792
793int __init __weak early_irq_init(void)
794{
795 return 0;
796}
797
798int __init __weak arch_early_irq_init(void)
799{
800 return 0;
801}
802
803int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
804{
805 return 0;
806}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index bd6be76303cf..6d7dc4ec4aa5 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -352,7 +352,7 @@ static int parse(struct nlattr *na, cpumask_t *mask)
352 if (!data) 352 if (!data)
353 return -ENOMEM; 353 return -ENOMEM;
354 nla_strlcpy(data, na, len); 354 nla_strlcpy(data, na, len);
355 ret = cpulist_parse(data, *mask); 355 ret = cpulist_parse(data, mask);
356 kfree(data); 356 kfree(data);
357 return ret; 357 return ret;
358} 358}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index f8d968063cea..ea2f48af83cf 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -166,6 +166,8 @@ static void clockevents_notify_released(void)
166void clockevents_register_device(struct clock_event_device *dev) 166void clockevents_register_device(struct clock_event_device *dev)
167{ 167{
168 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 168 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
169 BUG_ON(!dev->cpumask);
170
169 /* 171 /*
170 * A nsec2cyc multiplicator of 0 is invalid and we'd crash 172 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
171 * on it, so fix it up and emit a warning: 173 * on it, so fix it up and emit a warning:
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f98a1b7b16e9..9590af2327be 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -150,7 +150,7 @@ static void tick_do_broadcast(cpumask_t mask)
150 */ 150 */
151 cpu = first_cpu(mask); 151 cpu = first_cpu(mask);
152 td = &per_cpu(tick_cpu_device, cpu); 152 td = &per_cpu(tick_cpu_device, cpu);
153 td->evtdev->broadcast(mask); 153 td->evtdev->broadcast(&mask);
154 } 154 }
155} 155}
156 156
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index df12434b43ca..f8372be74122 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -136,7 +136,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
136 */ 136 */
137static void tick_setup_device(struct tick_device *td, 137static void tick_setup_device(struct tick_device *td,
138 struct clock_event_device *newdev, int cpu, 138 struct clock_event_device *newdev, int cpu,
139 const cpumask_t *cpumask) 139 const struct cpumask *cpumask)
140{ 140{
141 ktime_t next_event; 141 ktime_t next_event;
142 void (*handler)(struct clock_event_device *) = NULL; 142 void (*handler)(struct clock_event_device *) = NULL;
@@ -171,8 +171,8 @@ static void tick_setup_device(struct tick_device *td,
171 * When the device is not per cpu, pin the interrupt to the 171 * When the device is not per cpu, pin the interrupt to the
172 * current cpu: 172 * current cpu:
173 */ 173 */
174 if (!cpus_equal(newdev->cpumask, *cpumask)) 174 if (!cpumask_equal(newdev->cpumask, cpumask))
175 irq_set_affinity(newdev->irq, *cpumask); 175 irq_set_affinity(newdev->irq, cpumask);
176 176
177 /* 177 /*
178 * When global broadcasting is active, check if the current 178 * When global broadcasting is active, check if the current
@@ -202,14 +202,14 @@ static int tick_check_new_device(struct clock_event_device *newdev)
202 spin_lock_irqsave(&tick_device_lock, flags); 202 spin_lock_irqsave(&tick_device_lock, flags);
203 203
204 cpu = smp_processor_id(); 204 cpu = smp_processor_id();
205 if (!cpu_isset(cpu, newdev->cpumask)) 205 if (!cpumask_test_cpu(cpu, newdev->cpumask))
206 goto out_bc; 206 goto out_bc;
207 207
208 td = &per_cpu(tick_cpu_device, cpu); 208 td = &per_cpu(tick_cpu_device, cpu);
209 curdev = td->evtdev; 209 curdev = td->evtdev;
210 210
211 /* cpu local device ? */ 211 /* cpu local device ? */
212 if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) { 212 if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
213 213
214 /* 214 /*
215 * If the cpu affinity of the device interrupt can not 215 * If the cpu affinity of the device interrupt can not
@@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
222 * If we have a cpu local device already, do not replace it 222 * If we have a cpu local device already, do not replace it
223 * by a non cpu local device 223 * by a non cpu local device
224 */ 224 */
225 if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu))) 225 if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
226 goto out_bc; 226 goto out_bc;
227 } 227 }
228 228
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 611fa4c0baab..1b6c05bd0d0a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void)
144 if (!ts->tick_stopped) 144 if (!ts->tick_stopped)
145 return; 145 return;
146 146
147 cpu_clear(cpu, nohz_cpu_mask); 147 cpumask_clear_cpu(cpu, nohz_cpu_mask);
148 now = ktime_get(); 148 now = ktime_get();
149 ts->idle_waketime = now; 149 ts->idle_waketime = now;
150 150
@@ -301,7 +301,7 @@ void tick_nohz_stop_sched_tick(int inidle)
301 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 301 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
302 302
303 if (delta_jiffies > 1) 303 if (delta_jiffies > 1)
304 cpu_set(cpu, nohz_cpu_mask); 304 cpumask_set_cpu(cpu, nohz_cpu_mask);
305 305
306 /* Skip reprogram of event if its not changed */ 306 /* Skip reprogram of event if its not changed */
307 if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) 307 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
@@ -319,7 +319,7 @@ void tick_nohz_stop_sched_tick(int inidle)
319 /* 319 /*
320 * sched tick not stopped! 320 * sched tick not stopped!
321 */ 321 */
322 cpu_clear(cpu, nohz_cpu_mask); 322 cpumask_clear_cpu(cpu, nohz_cpu_mask);
323 goto out; 323 goto out;
324 } 324 }
325 325
@@ -361,7 +361,7 @@ void tick_nohz_stop_sched_tick(int inidle)
361 * softirq. 361 * softirq.
362 */ 362 */
363 tick_do_update_jiffies64(ktime_get()); 363 tick_do_update_jiffies64(ktime_get());
364 cpu_clear(cpu, nohz_cpu_mask); 364 cpumask_clear_cpu(cpu, nohz_cpu_mask);
365 } 365 }
366 raise_softirq_irqoff(TIMER_SOFTIRQ); 366 raise_softirq_irqoff(TIMER_SOFTIRQ);
367out: 367out:
@@ -441,7 +441,7 @@ void tick_nohz_restart_sched_tick(void)
441 select_nohz_load_balancer(0); 441 select_nohz_load_balancer(0);
442 now = ktime_get(); 442 now = ktime_get();
443 tick_do_update_jiffies64(now); 443 tick_do_update_jiffies64(now);
444 cpu_clear(cpu, nohz_cpu_mask); 444 cpumask_clear_cpu(cpu, nohz_cpu_mask);
445 445
446#ifndef CONFIG_VIRT_CPU_ACCOUNTING 446#ifndef CONFIG_VIRT_CPU_ACCOUNTING
447 /* 447 /*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4185d5221633..0e91f43b6baf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2674,7 +2674,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf,
2674 2674
2675 mutex_lock(&tracing_cpumask_update_lock); 2675 mutex_lock(&tracing_cpumask_update_lock);
2676 2676
2677 len = cpumask_scnprintf(mask_str, count, tracing_cpumask); 2677 len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
2678 if (count - len < 2) { 2678 if (count - len < 2) {
2679 count = -EINVAL; 2679 count = -EINVAL;
2680 goto out_err; 2680 goto out_err;
@@ -2695,7 +2695,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2695 int err, cpu; 2695 int err, cpu;
2696 2696
2697 mutex_lock(&tracing_cpumask_update_lock); 2697 mutex_lock(&tracing_cpumask_update_lock);
2698 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 2698 err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
2699 if (err) 2699 if (err)
2700 goto err_unlock; 2700 goto err_unlock;
2701 2701