aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cpu-hotplug.txt17
-rw-r--r--arch/ia64/include/asm/topology.h2
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h1
-rw-r--r--arch/powerpc/include/asm/topology.h1
-rw-r--r--arch/sh/include/asm/topology.h1
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/include/asm/bigsmp/apic.h32
-rw-r--r--arch/x86/include/asm/bigsmp/ipi.h13
-rw-r--r--arch/x86/include/asm/desc.h10
-rw-r--r--arch/x86/include/asm/es7000/apic.h82
-rw-r--r--arch/x86/include/asm/es7000/ipi.h12
-rw-r--r--arch/x86/include/asm/genapic_32.h13
-rw-r--r--arch/x86/include/asm/genapic_64.h14
-rw-r--r--arch/x86/include/asm/ipi.h23
-rw-r--r--arch/x86/include/asm/irq.h3
-rw-r--r--arch/x86/include/asm/mach-default/mach_apic.h28
-rw-r--r--arch/x86/include/asm/mach-default/mach_ipi.h18
-rw-r--r--arch/x86/include/asm/mach-generic/mach_apic.h1
-rw-r--r--arch/x86/include/asm/numaq/apic.h12
-rw-r--r--arch/x86/include/asm/numaq/ipi.h13
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/include/asm/summit/apic.h55
-rw-r--r--arch/x86/include/asm/summit/ipi.h9
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/kernel/apic.c34
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c41
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c108
-rw-r--r--arch/x86/kernel/genapic_flat_64.c107
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c81
-rw-r--r--arch/x86/kernel/genx2apic_phys.c74
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c61
-rw-r--r--arch/x86/kernel/io_apic.c358
-rw-r--r--arch/x86/kernel/ipi.c28
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c13
-rw-r--r--arch/x86/kernel/irq_64.c15
-rw-r--r--arch/x86/kernel/irqinit_32.c16
-rw-r--r--arch/x86/kernel/irqinit_64.c13
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/setup_percpu.c17
-rw-r--r--arch/x86/kernel/smp.c8
-rw-r--r--arch/x86/kernel/smpboot.c27
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/traps.c12
-rw-r--r--arch/x86/mach-generic/bigsmp.c5
-rw-r--r--arch/x86/mach-generic/es7000.c5
-rw-r--r--arch/x86/mach-generic/numaq.c5
-rw-r--r--arch/x86/mach-generic/summit.c5
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c2
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/mm/srat_64.c2
-rw-r--r--arch/x86/xen/mmu.c20
-rw-r--r--arch/x86/xen/smp.c27
-rw-r--r--arch/x86/xen/suspend.c3
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--drivers/lguest/interrupts_and_traps.c13
-rw-r--r--include/linux/sched.h92
-rw-r--r--include/linux/topology.h6
-rw-r--r--kernel/rcuclassic.c3
-rw-r--r--kernel/sched.c974
-rw-r--r--kernel/sched_cpupri.c39
-rw-r--r--kernel/sched_cpupri.h5
-rw-r--r--kernel/sched_fair.c32
-rw-r--r--kernel/sched_rt.c73
-rw-r--r--kernel/sched_stats.h3
-rw-r--r--kernel/time/tick-sched.c10
67 files changed, 1679 insertions, 1057 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 94bbc27ddd4f..9d620c153b04 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -50,16 +50,17 @@ additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets
50 cpu_possible_map = cpu_present_map + additional_cpus 50 cpu_possible_map = cpu_present_map + additional_cpus
51 51
52(*) Option valid only for following architectures 52(*) Option valid only for following architectures
53- x86_64, ia64 53- ia64
54 54
55ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT 55ia64 uses the number of disabled local apics in ACPI tables MADT to
56to determine the number of potentially hot-pluggable cpus. The implementation 56determine the number of potentially hot-pluggable cpus. The implementation
57should only rely on this to count the # of cpus, but *MUST* not rely on the 57should only rely on this to count the # of cpus, but *MUST* not rely
58apicid values in those tables for disabled apics. In the event BIOS doesn't 58on the apicid values in those tables for disabled apics. In the event
59mark such hot-pluggable cpus as disabled entries, one could use this 59BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
60parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map. 60use this parameter "additional_cpus=x" to represent those cpus in the
61cpu_possible_map.
61 62
62possible_cpus=n [s390 only] use this to set hotpluggable cpus. 63possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus.
63 This option sets possible_cpus bits in 64 This option sets possible_cpus bits in
64 cpu_possible_map. Thus keeping the numbers of bits set 65 cpu_possible_map. Thus keeping the numbers of bits set
65 constant even if the machine gets rebooted. 66 constant even if the machine gets rebooted.
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 97ae7f509109..76a33a91ca69 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -56,7 +56,6 @@
56void build_cpu_to_node_map(void); 56void build_cpu_to_node_map(void);
57 57
58#define SD_CPU_INIT (struct sched_domain) { \ 58#define SD_CPU_INIT (struct sched_domain) { \
59 .span = CPU_MASK_NONE, \
60 .parent = NULL, \ 59 .parent = NULL, \
61 .child = NULL, \ 60 .child = NULL, \
62 .groups = NULL, \ 61 .groups = NULL, \
@@ -81,7 +80,6 @@ void build_cpu_to_node_map(void);
81 80
82/* sched_domains SD_NODE_INIT for IA64 NUMA machines */ 81/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
83#define SD_NODE_INIT (struct sched_domain) { \ 82#define SD_NODE_INIT (struct sched_domain) { \
84 .span = CPU_MASK_NONE, \
85 .parent = NULL, \ 83 .parent = NULL, \
86 .child = NULL, \ 84 .child = NULL, \
87 .groups = NULL, \ 85 .groups = NULL, \
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index c1c3f5b2f18f..55d481569a1f 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -39,7 +39,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
39 39
40/* sched_domains SD_NODE_INIT for SGI IP27 machines */ 40/* sched_domains SD_NODE_INIT for SGI IP27 machines */
41#define SD_NODE_INIT (struct sched_domain) { \ 41#define SD_NODE_INIT (struct sched_domain) { \
42 .span = CPU_MASK_NONE, \
43 .parent = NULL, \ 42 .parent = NULL, \
44 .child = NULL, \ 43 .child = NULL, \
45 .groups = NULL, \ 44 .groups = NULL, \
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 236dae1cd29f..375258559ae6 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -52,7 +52,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
52 52
53/* sched_domains SD_NODE_INIT for PPC64 machines */ 53/* sched_domains SD_NODE_INIT for PPC64 machines */
54#define SD_NODE_INIT (struct sched_domain) { \ 54#define SD_NODE_INIT (struct sched_domain) { \
55 .span = CPU_MASK_NONE, \
56 .parent = NULL, \ 55 .parent = NULL, \
57 .child = NULL, \ 56 .child = NULL, \
58 .groups = NULL, \ 57 .groups = NULL, \
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 9aa160d0efe5..066f0fba590e 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -5,7 +5,6 @@
5 5
6/* sched_domains SD_NODE_INIT for sh machines */ 6/* sched_domains SD_NODE_INIT for sh machines */
7#define SD_NODE_INIT (struct sched_domain) { \ 7#define SD_NODE_INIT (struct sched_domain) { \
8 .span = CPU_MASK_NONE, \
9 .parent = NULL, \ 8 .parent = NULL, \
10 .child = NULL, \ 9 .child = NULL, \
11 .groups = NULL, \ 10 .groups = NULL, \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0f44add3e0b7..249d1e0824b5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -601,19 +601,20 @@ config IOMMU_HELPER
601 601
602config MAXSMP 602config MAXSMP
603 bool "Configure Maximum number of SMP Processors and NUMA Nodes" 603 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
604 depends on X86_64 && SMP && BROKEN 604 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
605 select CPUMASK_OFFSTACK
605 default n 606 default n
606 help 607 help
607 Configure maximum number of CPUS and NUMA Nodes for this architecture. 608 Configure maximum number of CPUS and NUMA Nodes for this architecture.
608 If unsure, say N. 609 If unsure, say N.
609 610
610config NR_CPUS 611config NR_CPUS
611 int "Maximum number of CPUs (2-512)" if !MAXSMP 612 int "Maximum number of CPUs" if SMP && !MAXSMP
612 range 2 512 613 range 2 512 if SMP && !MAXSMP
613 depends on SMP 614 default "1" if !SMP
614 default "4096" if MAXSMP 615 default "4096" if MAXSMP
615 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 616 default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
616 default "8" 617 default "8" if SMP
617 help 618 help
618 This allows you to specify the maximum number of CPUs which this 619 This allows you to specify the maximum number of CPUs which this
619 kernel will support. The maximum supported value is 512 and the 620 kernel will support. The maximum supported value is 512 and the
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index ce547f24a1cd..d8dd9f537911 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline const cpumask_t *target_cpus(void)
13{ 13{
14#ifdef CONFIG_SMP 14#ifdef CONFIG_SMP
15 return cpu_online_map; 15 return &cpu_online_map;
16#else 16#else
17 return cpumask_of_cpu(0); 17 return &cpumask_of_cpu(0);
18#endif 18#endif
19} 19}
20 20
@@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid)
79 79
80static inline int cpu_present_to_apicid(int mps_cpu) 80static inline int cpu_present_to_apicid(int mps_cpu)
81{ 81{
82 if (mps_cpu < NR_CPUS) 82 if (mps_cpu < nr_cpu_ids)
83 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); 83 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
84 84
85 return BAD_APICID; 85 return BAD_APICID;
@@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[];
94/* Mapping from cpu number to logical apicid */ 94/* Mapping from cpu number to logical apicid */
95static inline int cpu_to_logical_apicid(int cpu) 95static inline int cpu_to_logical_apicid(int cpu)
96{ 96{
97 if (cpu >= NR_CPUS) 97 if (cpu >= nr_cpu_ids)
98 return BAD_APICID; 98 return BAD_APICID;
99 return cpu_physical_id(cpu); 99 return cpu_physical_id(cpu);
100} 100}
@@ -119,16 +119,34 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
119} 119}
120 120
121/* As we are using single CPU as destination, pick only one CPU here */ 121/* As we are using single CPU as destination, pick only one CPU here */
122static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 122static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
123{ 123{
124 int cpu; 124 int cpu;
125 int apicid; 125 int apicid;
126 126
127 cpu = first_cpu(cpumask); 127 cpu = first_cpu(*cpumask);
128 apicid = cpu_to_logical_apicid(cpu); 128 apicid = cpu_to_logical_apicid(cpu);
129 return apicid; 129 return apicid;
130} 130}
131 131
132static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
133 const struct cpumask *andmask)
134{
135 int cpu;
136
137 /*
138 * We're using fixed IRQ delivery, can only return one phys APIC ID.
139 * May as well be the first.
140 */
141 for_each_cpu_and(cpu, cpumask, andmask)
142 if (cpumask_test_cpu(cpu, cpu_online_mask))
143 break;
144 if (cpu < nr_cpu_ids)
145 return cpu_to_logical_apicid(cpu);
146
147 return BAD_APICID;
148}
149
132static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) 150static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
133{ 151{
134 return cpuid_apic >> index_msb; 152 return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 9404c535b7ec..27fcd01b3ae6 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,25 +1,22 @@
1#ifndef __ASM_MACH_IPI_H 1#ifndef __ASM_MACH_IPI_H
2#define __ASM_MACH_IPI_H 2#define __ASM_MACH_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
5void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const struct cpumask *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
10 11
11static inline void send_IPI_allbutself(int vector) 12static inline void send_IPI_allbutself(int vector)
12{ 13{
13 cpumask_t mask = cpu_online_map; 14 send_IPI_mask_allbutself(cpu_online_mask, vector);
14 cpu_clear(smp_processor_id(), mask);
15
16 if (!cpus_empty(mask))
17 send_IPI_mask(mask, vector);
18} 15}
19 16
20static inline void send_IPI_all(int vector) 17static inline void send_IPI_all(int vector)
21{ 18{
22 send_IPI_mask(cpu_online_map, vector); 19 send_IPI_mask(cpu_online_mask, vector);
23} 20}
24 21
25#endif /* __ASM_MACH_IPI_H */ 22#endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e6b82b17b072..dc27705f5443 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr)
320 _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); 320 _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
321} 321}
322 322
323#define SYS_VECTOR_FREE 0
324#define SYS_VECTOR_ALLOCED 1
325
326extern int first_system_vector; 323extern int first_system_vector;
327extern char system_vectors[]; 324/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
325extern unsigned long used_vectors[];
328 326
329static inline void alloc_system_vector(int vector) 327static inline void alloc_system_vector(int vector)
330{ 328{
331 if (system_vectors[vector] == SYS_VECTOR_FREE) { 329 if (!test_bit(vector, used_vectors)) {
332 system_vectors[vector] = SYS_VECTOR_ALLOCED; 330 set_bit(vector, used_vectors);
333 if (first_system_vector > vector) 331 if (first_system_vector > vector)
334 first_system_vector = vector; 332 first_system_vector = vector;
335 } else 333 } else
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index e24ef876915f..51ac1230294e 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,14 +9,14 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12static inline cpumask_t target_cpus_cluster(void) 12static inline const cpumask_t *target_cpus_cluster(void)
13{ 13{
14 return CPU_MASK_ALL; 14 return &CPU_MASK_ALL;
15} 15}
16 16
17static inline cpumask_t target_cpus(void) 17static inline const cpumask_t *target_cpus(void)
18{ 18{
19 return cpumask_of_cpu(smp_processor_id()); 19 return &cpumask_of_cpu(smp_processor_id());
20} 20}
21 21
22#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) 22#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
@@ -80,9 +80,10 @@ extern int apic_version [MAX_APICS];
80static inline void setup_apic_routing(void) 80static inline void setup_apic_routing(void)
81{ 81{
82 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); 82 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
83 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", 83 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
84 (apic_version[apic] == 0x14) ? 84 (apic_version[apic] == 0x14) ?
85 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); 85 "Physical Cluster" : "Logical Cluster",
86 nr_ioapics, cpus_addr(*target_cpus())[0]);
86} 87}
87 88
88static inline int multi_timer_check(int apic, int irq) 89static inline int multi_timer_check(int apic, int irq)
@@ -100,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu)
100{ 101{
101 if (!mps_cpu) 102 if (!mps_cpu)
102 return boot_cpu_physical_apicid; 103 return boot_cpu_physical_apicid;
103 else if (mps_cpu < NR_CPUS) 104 else if (mps_cpu < nr_cpu_ids)
104 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); 105 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
105 else 106 else
106 return BAD_APICID; 107 return BAD_APICID;
@@ -120,9 +121,9 @@ extern u8 cpu_2_logical_apicid[];
120static inline int cpu_to_logical_apicid(int cpu) 121static inline int cpu_to_logical_apicid(int cpu)
121{ 122{
122#ifdef CONFIG_SMP 123#ifdef CONFIG_SMP
123 if (cpu >= NR_CPUS) 124 if (cpu >= nr_cpu_ids)
124 return BAD_APICID; 125 return BAD_APICID;
125 return (int)cpu_2_logical_apicid[cpu]; 126 return (int)cpu_2_logical_apicid[cpu];
126#else 127#else
127 return logical_smp_processor_id(); 128 return logical_smp_processor_id();
128#endif 129#endif
@@ -146,14 +147,15 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
146 return (1); 147 return (1);
147} 148}
148 149
149static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) 150static inline unsigned int
151cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
150{ 152{
151 int num_bits_set; 153 int num_bits_set;
152 int cpus_found = 0; 154 int cpus_found = 0;
153 int cpu; 155 int cpu;
154 int apicid; 156 int apicid;
155 157
156 num_bits_set = cpus_weight(cpumask); 158 num_bits_set = cpumask_weight(cpumask);
157 /* Return id to all */ 159 /* Return id to all */
158 if (num_bits_set == NR_CPUS) 160 if (num_bits_set == NR_CPUS)
159 return 0xFF; 161 return 0xFF;
@@ -161,10 +163,10 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
161 * The cpus in the mask must all be on the apic cluster. If are not 163 * The cpus in the mask must all be on the apic cluster. If are not
162 * on the same apicid cluster return default value of TARGET_CPUS. 164 * on the same apicid cluster return default value of TARGET_CPUS.
163 */ 165 */
164 cpu = first_cpu(cpumask); 166 cpu = cpumask_first(cpumask);
165 apicid = cpu_to_logical_apicid(cpu); 167 apicid = cpu_to_logical_apicid(cpu);
166 while (cpus_found < num_bits_set) { 168 while (cpus_found < num_bits_set) {
167 if (cpu_isset(cpu, cpumask)) { 169 if (cpumask_test_cpu(cpu, cpumask)) {
168 int new_apicid = cpu_to_logical_apicid(cpu); 170 int new_apicid = cpu_to_logical_apicid(cpu);
169 if (apicid_cluster(apicid) != 171 if (apicid_cluster(apicid) !=
170 apicid_cluster(new_apicid)){ 172 apicid_cluster(new_apicid)){
@@ -179,14 +181,14 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
179 return apicid; 181 return apicid;
180} 182}
181 183
182static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 184static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
183{ 185{
184 int num_bits_set; 186 int num_bits_set;
185 int cpus_found = 0; 187 int cpus_found = 0;
186 int cpu; 188 int cpu;
187 int apicid; 189 int apicid;
188 190
189 num_bits_set = cpus_weight(cpumask); 191 num_bits_set = cpus_weight(*cpumask);
190 /* Return id to all */ 192 /* Return id to all */
191 if (num_bits_set == NR_CPUS) 193 if (num_bits_set == NR_CPUS)
192 return cpu_to_logical_apicid(0); 194 return cpu_to_logical_apicid(0);
@@ -194,10 +196,52 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
194 * The cpus in the mask must all be on the apic cluster. If are not 196 * The cpus in the mask must all be on the apic cluster. If are not
195 * on the same apicid cluster return default value of TARGET_CPUS. 197 * on the same apicid cluster return default value of TARGET_CPUS.
196 */ 198 */
197 cpu = first_cpu(cpumask); 199 cpu = first_cpu(*cpumask);
200 apicid = cpu_to_logical_apicid(cpu);
201 while (cpus_found < num_bits_set) {
202 if (cpu_isset(cpu, *cpumask)) {
203 int new_apicid = cpu_to_logical_apicid(cpu);
204 if (apicid_cluster(apicid) !=
205 apicid_cluster(new_apicid)){
206 printk ("%s: Not a valid mask!\n", __func__);
207 return cpu_to_logical_apicid(0);
208 }
209 apicid = new_apicid;
210 cpus_found++;
211 }
212 cpu++;
213 }
214 return apicid;
215}
216
217
218static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
219 const struct cpumask *andmask)
220{
221 int num_bits_set;
222 int cpus_found = 0;
223 int cpu;
224 int apicid = cpu_to_logical_apicid(0);
225 cpumask_var_t cpumask;
226
227 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
228 return apicid;
229
230 cpumask_and(cpumask, inmask, andmask);
231 cpumask_and(cpumask, cpumask, cpu_online_mask);
232
233 num_bits_set = cpumask_weight(cpumask);
234 /* Return id to all */
235 if (num_bits_set == NR_CPUS)
236 goto exit;
237 /*
238 * The cpus in the mask must all be on the apic cluster. If are not
239 * on the same apicid cluster return default value of TARGET_CPUS.
240 */
241 cpu = cpumask_first(cpumask);
198 apicid = cpu_to_logical_apicid(cpu); 242 apicid = cpu_to_logical_apicid(cpu);
199 while (cpus_found < num_bits_set) { 243 while (cpus_found < num_bits_set) {
200 if (cpu_isset(cpu, cpumask)) { 244 if (cpumask_test_cpu(cpu, cpumask)) {
201 int new_apicid = cpu_to_logical_apicid(cpu); 245 int new_apicid = cpu_to_logical_apicid(cpu);
202 if (apicid_cluster(apicid) != 246 if (apicid_cluster(apicid) !=
203 apicid_cluster(new_apicid)){ 247 apicid_cluster(new_apicid)){
@@ -209,6 +253,8 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
209 } 253 }
210 cpu++; 254 cpu++;
211 } 255 }
256exit:
257 free_cpumask_var(cpumask);
212 return apicid; 258 return apicid;
213} 259}
214 260
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 632a955fcc0a..7e8ed24d4b8a 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,24 +1,22 @@
1#ifndef __ASM_ES7000_IPI_H 1#ifndef __ASM_ES7000_IPI_H
2#define __ASM_ES7000_IPI_H 2#define __ASM_ES7000_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
5void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const struct cpumask *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
10 11
11static inline void send_IPI_allbutself(int vector) 12static inline void send_IPI_allbutself(int vector)
12{ 13{
13 cpumask_t mask = cpu_online_map; 14 send_IPI_mask_allbutself(cpu_online_mask, vector);
14 cpu_clear(smp_processor_id(), mask);
15 if (!cpus_empty(mask))
16 send_IPI_mask(mask, vector);
17} 15}
18 16
19static inline void send_IPI_all(int vector) 17static inline void send_IPI_all(int vector)
20{ 18{
21 send_IPI_mask(cpu_online_map, vector); 19 send_IPI_mask(cpu_online_mask, vector);
22} 20}
23 21
24#endif /* __ASM_ES7000_IPI_H */ 22#endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 0ac17d33a8c7..746f37a7963a 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -24,7 +24,7 @@ struct genapic {
24 int (*probe)(void); 24 int (*probe)(void);
25 25
26 int (*apic_id_registered)(void); 26 int (*apic_id_registered)(void);
27 cpumask_t (*target_cpus)(void); 27 const struct cpumask *(*target_cpus)(void);
28 int int_delivery_mode; 28 int int_delivery_mode;
29 int int_dest_mode; 29 int int_dest_mode;
30 int ESR_DISABLE; 30 int ESR_DISABLE;
@@ -57,12 +57,16 @@ struct genapic {
57 57
58 unsigned (*get_apic_id)(unsigned long x); 58 unsigned (*get_apic_id)(unsigned long x);
59 unsigned long apic_id_mask; 59 unsigned long apic_id_mask;
60 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 60 unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
61 cpumask_t (*vector_allocation_domain)(int cpu); 61 unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
62 const struct cpumask *andmask);
63 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
62 64
63#ifdef CONFIG_SMP 65#ifdef CONFIG_SMP
64 /* ipi */ 66 /* ipi */
65 void (*send_IPI_mask)(cpumask_t mask, int vector); 67 void (*send_IPI_mask)(const struct cpumask *mask, int vector);
68 void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
69 int vector);
66 void (*send_IPI_allbutself)(int vector); 70 void (*send_IPI_allbutself)(int vector);
67 void (*send_IPI_all)(int vector); 71 void (*send_IPI_all)(int vector);
68#endif 72#endif
@@ -114,6 +118,7 @@ struct genapic {
114 APICFUNC(get_apic_id) \ 118 APICFUNC(get_apic_id) \
115 .apic_id_mask = APIC_ID_MASK, \ 119 .apic_id_mask = APIC_ID_MASK, \
116 APICFUNC(cpu_mask_to_apicid) \ 120 APICFUNC(cpu_mask_to_apicid) \
121 APICFUNC(cpu_mask_to_apicid_and) \
117 APICFUNC(vector_allocation_domain) \ 122 APICFUNC(vector_allocation_domain) \
118 APICFUNC(acpi_madt_oem_check) \ 123 APICFUNC(acpi_madt_oem_check) \
119 IPIFUNC(send_IPI_mask) \ 124 IPIFUNC(send_IPI_mask) \
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 2cae011668b7..adf32fb56aa6 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_GENAPIC_64_H 1#ifndef _ASM_X86_GENAPIC_64_H
2#define _ASM_X86_GENAPIC_64_H 2#define _ASM_X86_GENAPIC_64_H
3 3
4#include <linux/cpumask.h>
5
4/* 6/*
5 * Copyright 2004 James Cleverdon, IBM. 7 * Copyright 2004 James Cleverdon, IBM.
6 * Subject to the GNU Public License, v.2 8 * Subject to the GNU Public License, v.2
@@ -18,16 +20,20 @@ struct genapic {
18 u32 int_delivery_mode; 20 u32 int_delivery_mode;
19 u32 int_dest_mode; 21 u32 int_dest_mode;
20 int (*apic_id_registered)(void); 22 int (*apic_id_registered)(void);
21 cpumask_t (*target_cpus)(void); 23 const struct cpumask *(*target_cpus)(void);
22 cpumask_t (*vector_allocation_domain)(int cpu); 24 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
23 void (*init_apic_ldr)(void); 25 void (*init_apic_ldr)(void);
24 /* ipi */ 26 /* ipi */
25 void (*send_IPI_mask)(cpumask_t mask, int vector); 27 void (*send_IPI_mask)(const struct cpumask *mask, int vector);
28 void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
29 int vector);
26 void (*send_IPI_allbutself)(int vector); 30 void (*send_IPI_allbutself)(int vector);
27 void (*send_IPI_all)(int vector); 31 void (*send_IPI_all)(int vector);
28 void (*send_IPI_self)(int vector); 32 void (*send_IPI_self)(int vector);
29 /* */ 33 /* */
30 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 34 unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
35 unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
36 const struct cpumask *andmask);
31 unsigned int (*phys_pkg_id)(int index_msb); 37 unsigned int (*phys_pkg_id)(int index_msb);
32 unsigned int (*get_apic_id)(unsigned long x); 38 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id); 39 unsigned long (*set_apic_id)(unsigned int id);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index f89dffb28aa9..c745a306f7d3 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
117 native_apic_mem_write(APIC_ICR, cfg); 117 native_apic_mem_write(APIC_ICR, cfg);
118} 118}
119 119
120static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) 120static inline void send_IPI_mask_sequence(const struct cpumask *mask,
121 int vector)
121{ 122{
122 unsigned long flags; 123 unsigned long flags;
123 unsigned long query_cpu; 124 unsigned long query_cpu;
@@ -128,11 +129,29 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
128 * - mbligh 129 * - mbligh
129 */ 130 */
130 local_irq_save(flags); 131 local_irq_save(flags);
131 for_each_cpu_mask_nr(query_cpu, mask) { 132 for_each_cpu(query_cpu, mask) {
132 __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), 133 __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
133 vector, APIC_DEST_PHYSICAL); 134 vector, APIC_DEST_PHYSICAL);
134 } 135 }
135 local_irq_restore(flags); 136 local_irq_restore(flags);
136} 137}
137 138
139static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
140 int vector)
141{
142 unsigned long flags;
143 unsigned int query_cpu;
144 unsigned int this_cpu = smp_processor_id();
145
146 /* See Hack comment above */
147
148 local_irq_save(flags);
149 for_each_cpu(query_cpu, mask)
150 if (query_cpu != this_cpu)
151 __send_IPI_dest_field(
152 per_cpu(x86_cpu_to_apicid, query_cpu),
153 vector, APIC_DEST_PHYSICAL);
154 local_irq_restore(flags);
155}
156
138#endif /* _ASM_X86_IPI_H */ 157#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 28e409fc73f3..592688ed04d3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -33,7 +33,7 @@ static inline int irq_canonicalize(int irq)
33 33
34#ifdef CONFIG_HOTPLUG_CPU 34#ifdef CONFIG_HOTPLUG_CPU
35#include <linux/cpumask.h> 35#include <linux/cpumask.h>
36extern void fixup_irqs(cpumask_t map); 36extern void fixup_irqs(void);
37#endif 37#endif
38 38
39extern unsigned int do_IRQ(struct pt_regs *regs); 39extern unsigned int do_IRQ(struct pt_regs *regs);
@@ -42,5 +42,6 @@ extern void native_init_IRQ(void);
42 42
43/* Interrupt vector management */ 43/* Interrupt vector management */
44extern DECLARE_BITMAP(used_vectors, NR_VECTORS); 44extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
45extern int vector_used_by_percpu_irq(unsigned int vector);
45 46
46#endif /* _ASM_X86_IRQ_H */ 47#endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index 6cb3a467e067..cc09cbbee27e 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
8 8
9#define APIC_DFR_VALUE (APIC_DFR_FLAT) 9#define APIC_DFR_VALUE (APIC_DFR_FLAT)
10 10
11static inline cpumask_t target_cpus(void) 11static inline const struct cpumask *target_cpus(void)
12{ 12{
13#ifdef CONFIG_SMP 13#ifdef CONFIG_SMP
14 return cpu_online_map; 14 return cpu_online_mask;
15#else 15#else
16 return cpumask_of_cpu(0); 16 return cpumask_of(0);
17#endif 17#endif
18} 18}
19 19
@@ -28,6 +28,7 @@ static inline cpumask_t target_cpus(void)
28#define apic_id_registered (genapic->apic_id_registered) 28#define apic_id_registered (genapic->apic_id_registered)
29#define init_apic_ldr (genapic->init_apic_ldr) 29#define init_apic_ldr (genapic->init_apic_ldr)
30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
31#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
31#define phys_pkg_id (genapic->phys_pkg_id) 32#define phys_pkg_id (genapic->phys_pkg_id)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 33#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) 34#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
@@ -61,9 +62,19 @@ static inline int apic_id_registered(void)
61 return physid_isset(read_apic_id(), phys_cpu_present_map); 62 return physid_isset(read_apic_id(), phys_cpu_present_map);
62} 63}
63 64
64static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 65static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
65{ 66{
66 return cpus_addr(cpumask)[0]; 67 return cpumask_bits(cpumask)[0];
68}
69
70static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
71 const struct cpumask *andmask)
72{
73 unsigned long mask1 = cpumask_bits(cpumask)[0];
74 unsigned long mask2 = cpumask_bits(andmask)[0];
75 unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
76
77 return (unsigned int)(mask1 & mask2 & mask3);
67} 78}
68 79
69static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) 80static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
@@ -88,7 +99,7 @@ static inline int apicid_to_node(int logical_apicid)
88#endif 99#endif
89} 100}
90 101
91static inline cpumask_t vector_allocation_domain(int cpu) 102static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
92{ 103{
93 /* Careful. Some cpus do not strictly honor the set of cpus 104 /* Careful. Some cpus do not strictly honor the set of cpus
94 * specified in the interrupt destination when using lowest 105 * specified in the interrupt destination when using lowest
@@ -98,8 +109,7 @@ static inline cpumask_t vector_allocation_domain(int cpu)
98 * deliver interrupts to the wrong hyperthread when only one 109 * deliver interrupts to the wrong hyperthread when only one
99 * hyperthread was specified in the interrupt desitination. 110 * hyperthread was specified in the interrupt desitination.
100 */ 111 */
101 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 112 *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
102 return domain;
103} 113}
104#endif 114#endif
105 115
@@ -131,7 +141,7 @@ static inline int cpu_to_logical_apicid(int cpu)
131 141
132static inline int cpu_present_to_apicid(int mps_cpu) 142static inline int cpu_present_to_apicid(int mps_cpu)
133{ 143{
134 if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) 144 if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
135 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); 145 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
136 else 146 else
137 return BAD_APICID; 147 return BAD_APICID;
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index fabca01ebacf..191312d155da 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,7 +4,8 @@
4/* Avoid include hell */ 4/* Avoid include hell */
5#define NMI_VECTOR 0x02 5#define NMI_VECTOR 0x02
6 6
7void send_IPI_mask_bitmask(cpumask_t mask, int vector); 7void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
8void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
8void __send_IPI_shortcut(unsigned int shortcut, int vector); 9void __send_IPI_shortcut(unsigned int shortcut, int vector);
9 10
10extern int no_broadcast; 11extern int no_broadcast;
@@ -12,28 +13,27 @@ extern int no_broadcast;
12#ifdef CONFIG_X86_64 13#ifdef CONFIG_X86_64
13#include <asm/genapic.h> 14#include <asm/genapic.h>
14#define send_IPI_mask (genapic->send_IPI_mask) 15#define send_IPI_mask (genapic->send_IPI_mask)
16#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
15#else 17#else
16static inline void send_IPI_mask(cpumask_t mask, int vector) 18static inline void send_IPI_mask(const struct cpumask *mask, int vector)
17{ 19{
18 send_IPI_mask_bitmask(mask, vector); 20 send_IPI_mask_bitmask(mask, vector);
19} 21}
22void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
20#endif 23#endif
21 24
22static inline void __local_send_IPI_allbutself(int vector) 25static inline void __local_send_IPI_allbutself(int vector)
23{ 26{
24 if (no_broadcast || vector == NMI_VECTOR) { 27 if (no_broadcast || vector == NMI_VECTOR)
25 cpumask_t mask = cpu_online_map; 28 send_IPI_mask_allbutself(cpu_online_mask, vector);
26 29 else
27 cpu_clear(smp_processor_id(), mask);
28 send_IPI_mask(mask, vector);
29 } else
30 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); 30 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
31} 31}
32 32
33static inline void __local_send_IPI_all(int vector) 33static inline void __local_send_IPI_all(int vector)
34{ 34{
35 if (no_broadcast || vector == NMI_VECTOR) 35 if (no_broadcast || vector == NMI_VECTOR)
36 send_IPI_mask(cpu_online_map, vector); 36 send_IPI_mask(cpu_online_mask, vector);
37 else 37 else
38 __send_IPI_shortcut(APIC_DEST_ALLINC, vector); 38 __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
39} 39}
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index e430f47df667..48553e958ad5 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -24,6 +24,7 @@
24#define check_phys_apicid_present (genapic->check_phys_apicid_present) 24#define check_phys_apicid_present (genapic->check_phys_apicid_present)
25#define check_apicid_used (genapic->check_apicid_used) 25#define check_apicid_used (genapic->check_apicid_used)
26#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 26#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
27#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
27#define vector_allocation_domain (genapic->vector_allocation_domain) 28#define vector_allocation_domain (genapic->vector_allocation_domain)
28#define enable_apic_mode (genapic->enable_apic_mode) 29#define enable_apic_mode (genapic->enable_apic_mode)
29#define phys_pkg_id (genapic->phys_pkg_id) 30#define phys_pkg_id (genapic->phys_pkg_id)
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 0bf2a06b7a4e..c80f00d29965 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -7,9 +7,9 @@
7 7
8#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 8#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
9 9
10static inline cpumask_t target_cpus(void) 10static inline const cpumask_t *target_cpus(void)
11{ 11{
12 return CPU_MASK_ALL; 12 return &CPU_MASK_ALL;
13} 13}
14 14
15#define NO_BALANCE_IRQ (1) 15#define NO_BALANCE_IRQ (1)
@@ -122,7 +122,13 @@ static inline void enable_apic_mode(void)
122 * We use physical apicids here, not logical, so just return the default 122 * We use physical apicids here, not logical, so just return the default
123 * physical broadcast to stop people from breaking us 123 * physical broadcast to stop people from breaking us
124 */ 124 */
125static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 125static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
126{
127 return (int) 0xF;
128}
129
130static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
131 const struct cpumask *andmask)
126{ 132{
127 return (int) 0xF; 133 return (int) 0xF;
128} 134}
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index 935588d286cf..a8374c652778 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,25 +1,22 @@
1#ifndef __ASM_NUMAQ_IPI_H 1#ifndef __ASM_NUMAQ_IPI_H
2#define __ASM_NUMAQ_IPI_H 2#define __ASM_NUMAQ_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t, int vector); 4void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
5void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const struct cpumask *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
10 11
11static inline void send_IPI_allbutself(int vector) 12static inline void send_IPI_allbutself(int vector)
12{ 13{
13 cpumask_t mask = cpu_online_map; 14 send_IPI_mask_allbutself(cpu_online_mask, vector);
14 cpu_clear(smp_processor_id(), mask);
15
16 if (!cpus_empty(mask))
17 send_IPI_mask(mask, vector);
18} 15}
19 16
20static inline void send_IPI_all(int vector) 17static inline void send_IPI_all(int vector)
21{ 18{
22 send_IPI_mask(cpu_online_map, vector); 19 send_IPI_mask(cpu_online_mask, vector);
23} 20}
24 21
25#endif /* __ASM_NUMAQ_IPI_H */ 22#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index d12811ce51d9..830b9fcb6427 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
60 void (*cpu_die)(unsigned int cpu); 60 void (*cpu_die)(unsigned int cpu);
61 void (*play_dead)(void); 61 void (*play_dead)(void);
62 62
63 void (*send_call_func_ipi)(cpumask_t mask); 63 void (*send_call_func_ipi)(const struct cpumask *mask);
64 void (*send_call_func_single_ipi)(int cpu); 64 void (*send_call_func_single_ipi)(int cpu);
65}; 65};
66 66
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
125 125
126static inline void arch_send_call_function_ipi(cpumask_t mask) 126static inline void arch_send_call_function_ipi(cpumask_t mask)
127{ 127{
128 smp_ops.send_call_func_ipi(mask); 128 smp_ops.send_call_func_ipi(&mask);
129} 129}
130 130
131void cpu_disable_common(void); 131void cpu_disable_common(void);
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
138void native_play_dead(void); 138void native_play_dead(void);
139void play_dead_common(void); 139void play_dead_common(void);
140 140
141void native_send_call_func_ipi(cpumask_t mask); 141void native_send_call_func_ipi(const struct cpumask *mask);
142void native_send_call_func_single_ipi(int cpu); 142void native_send_call_func_single_ipi(int cpu);
143 143
144extern void prefill_possible_map(void); 144extern void prefill_possible_map(void);
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 9b3070f1c2ac..99327d1be49f 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -14,13 +14,13 @@
14 14
15#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 15#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
16 16
17static inline cpumask_t target_cpus(void) 17static inline const cpumask_t *target_cpus(void)
18{ 18{
19 /* CPU_MASK_ALL (0xff) has undefined behaviour with 19 /* CPU_MASK_ALL (0xff) has undefined behaviour with
20 * dest_LowestPrio mode logical clustered apic interrupt routing 20 * dest_LowestPrio mode logical clustered apic interrupt routing
21 * Just start on cpu 0. IRQ balancing will spread load 21 * Just start on cpu 0. IRQ balancing will spread load
22 */ 22 */
23 return cpumask_of_cpu(0); 23 return &cpumask_of_cpu(0);
24} 24}
25 25
26#define INT_DELIVERY_MODE (dest_LowestPrio) 26#define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -137,14 +137,14 @@ static inline void enable_apic_mode(void)
137{ 137{
138} 138}
139 139
140static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 140static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
141{ 141{
142 int num_bits_set; 142 int num_bits_set;
143 int cpus_found = 0; 143 int cpus_found = 0;
144 int cpu; 144 int cpu;
145 int apicid; 145 int apicid;
146 146
147 num_bits_set = cpus_weight(cpumask); 147 num_bits_set = cpus_weight(*cpumask);
148 /* Return id to all */ 148 /* Return id to all */
149 if (num_bits_set == NR_CPUS) 149 if (num_bits_set == NR_CPUS)
150 return (int) 0xFF; 150 return (int) 0xFF;
@@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
152 * The cpus in the mask must all be on the apic cluster. If are not 152 * The cpus in the mask must all be on the apic cluster. If are not
153 * on the same apicid cluster return default value of TARGET_CPUS. 153 * on the same apicid cluster return default value of TARGET_CPUS.
154 */ 154 */
155 cpu = first_cpu(cpumask); 155 cpu = first_cpu(*cpumask);
156 apicid = cpu_to_logical_apicid(cpu); 156 apicid = cpu_to_logical_apicid(cpu);
157 while (cpus_found < num_bits_set) { 157 while (cpus_found < num_bits_set) {
158 if (cpu_isset(cpu, cpumask)) { 158 if (cpu_isset(cpu, *cpumask)) {
159 int new_apicid = cpu_to_logical_apicid(cpu); 159 int new_apicid = cpu_to_logical_apicid(cpu);
160 if (apicid_cluster(apicid) != 160 if (apicid_cluster(apicid) !=
161 apicid_cluster(new_apicid)){ 161 apicid_cluster(new_apicid)){
@@ -170,6 +170,49 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
170 return apicid; 170 return apicid;
171} 171}
172 172
173static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
174 const struct cpumask *andmask)
175{
176 int num_bits_set;
177 int cpus_found = 0;
178 int cpu;
179 int apicid = 0xFF;
180 cpumask_var_t cpumask;
181
182 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
183 return (int) 0xFF;
184
185 cpumask_and(cpumask, inmask, andmask);
186 cpumask_and(cpumask, cpumask, cpu_online_mask);
187
188 num_bits_set = cpumask_weight(cpumask);
189 /* Return id to all */
190 if (num_bits_set == nr_cpu_ids)
191 goto exit;
192 /*
193 * The cpus in the mask must all be on the apic cluster. If are not
194 * on the same apicid cluster return default value of TARGET_CPUS.
195 */
196 cpu = cpumask_first(cpumask);
197 apicid = cpu_to_logical_apicid(cpu);
198 while (cpus_found < num_bits_set) {
199 if (cpumask_test_cpu(cpu, cpumask)) {
200 int new_apicid = cpu_to_logical_apicid(cpu);
201 if (apicid_cluster(apicid) !=
202 apicid_cluster(new_apicid)){
203 printk ("%s: Not a valid mask!\n", __func__);
204 return 0xFF;
205 }
206 apicid = apicid | new_apicid;
207 cpus_found++;
208 }
209 cpu++;
210 }
211exit:
212 free_cpumask_var(cpumask);
213 return apicid;
214}
215
173/* cpuid returns the value latched in the HW at reset, not the APIC ID 216/* cpuid returns the value latched in the HW at reset, not the APIC ID
174 * register's value. For any box whose BIOS changes APIC IDs, like 217 * register's value. For any box whose BIOS changes APIC IDs, like
175 * clustered APIC systems, we must use hard_smp_processor_id. 218 * clustered APIC systems, we must use hard_smp_processor_id.
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h
index 53bd1e7bd7b4..a8a2c24f50cc 100644
--- a/arch/x86/include/asm/summit/ipi.h
+++ b/arch/x86/include/asm/summit/ipi.h
@@ -1,9 +1,10 @@
1#ifndef __ASM_SUMMIT_IPI_H 1#ifndef __ASM_SUMMIT_IPI_H
2#define __ASM_SUMMIT_IPI_H 2#define __ASM_SUMMIT_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
5void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const cpumask_t *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
14 cpu_clear(smp_processor_id(), mask); 15 cpu_clear(smp_processor_id(), mask);
15 16
16 if (!cpus_empty(mask)) 17 if (!cpus_empty(mask))
17 send_IPI_mask(mask, vector); 18 send_IPI_mask(&mask, vector);
18} 19}
19 20
20static inline void send_IPI_all(int vector) 21static inline void send_IPI_all(int vector)
21{ 22{
22 send_IPI_mask(cpu_online_map, vector); 23 send_IPI_mask(&cpu_online_map, vector);
23} 24}
24 25
25#endif /* __ASM_SUMMIT_IPI_H */ 26#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 168203c0c316..4e2f2e0aab27 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -238,6 +238,8 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
238#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) 238#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
239#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) 239#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
240#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) 240#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
241#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
242#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
241 243
242/* indicates that pointers to the topology cpumask_t maps are valid */ 244/* indicates that pointers to the topology cpumask_t maps are valid */
243#define arch_provides_topology_pointers yes 245#define arch_provides_topology_pointers yes
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 6107b41da9a5..6b7f824db160 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -119,8 +119,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
119 119
120int first_system_vector = 0xfe; 120int first_system_vector = 0xfe;
121 121
122char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
123
124/* 122/*
125 * Debug level, exported for io_apic.c 123 * Debug level, exported for io_apic.c
126 */ 124 */
@@ -142,7 +140,7 @@ static int lapic_next_event(unsigned long delta,
142 struct clock_event_device *evt); 140 struct clock_event_device *evt);
143static void lapic_timer_setup(enum clock_event_mode mode, 141static void lapic_timer_setup(enum clock_event_mode mode,
144 struct clock_event_device *evt); 142 struct clock_event_device *evt);
145static void lapic_timer_broadcast(const struct cpumask *mask); 143static void lapic_timer_broadcast(const cpumask_t *mask);
146static void apic_pm_activate(void); 144static void apic_pm_activate(void);
147 145
148/* 146/*
@@ -455,10 +453,10 @@ static void lapic_timer_setup(enum clock_event_mode mode,
455/* 453/*
456 * Local APIC timer broadcast function 454 * Local APIC timer broadcast function
457 */ 455 */
458static void lapic_timer_broadcast(const struct cpumask *mask) 456static void lapic_timer_broadcast(const cpumask_t *mask)
459{ 457{
460#ifdef CONFIG_SMP 458#ifdef CONFIG_SMP
461 send_IPI_mask(*mask, LOCAL_TIMER_VECTOR); 459 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
462#endif 460#endif
463} 461}
464 462
@@ -1807,28 +1805,32 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1807void __cpuinit generic_processor_info(int apicid, int version) 1805void __cpuinit generic_processor_info(int apicid, int version)
1808{ 1806{
1809 int cpu; 1807 int cpu;
1810 cpumask_t tmp_map;
1811 1808
1812 /* 1809 /*
1813 * Validate version 1810 * Validate version
1814 */ 1811 */
1815 if (version == 0x0) { 1812 if (version == 0x0) {
1816 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " 1813 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1817 "fixing up to 0x10. (tell your hw vendor)\n", 1814 "fixing up to 0x10. (tell your hw vendor)\n",
1818 version); 1815 version);
1819 version = 0x10; 1816 version = 0x10;
1820 } 1817 }
1821 apic_version[apicid] = version; 1818 apic_version[apicid] = version;
1822 1819
1823 if (num_processors >= NR_CPUS) { 1820 if (num_processors >= nr_cpu_ids) {
1824 pr_warning("WARNING: NR_CPUS limit of %i reached." 1821 int max = nr_cpu_ids;
1825 " Processor ignored.\n", NR_CPUS); 1822 int thiscpu = max + disabled_cpus;
1823
1824 pr_warning(
1825 "ACPI: NR_CPUS/possible_cpus limit of %i reached."
1826 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
1827
1828 disabled_cpus++;
1826 return; 1829 return;
1827 } 1830 }
1828 1831
1829 num_processors++; 1832 num_processors++;
1830 cpus_complement(tmp_map, cpu_present_map); 1833 cpu = cpumask_next_zero(-1, cpu_present_mask);
1831 cpu = first_cpu(tmp_map);
1832 1834
1833 physid_set(apicid, phys_cpu_present_map); 1835 physid_set(apicid, phys_cpu_present_map);
1834 if (apicid == boot_cpu_physical_apicid) { 1836 if (apicid == boot_cpu_physical_apicid) {
@@ -1878,8 +1880,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1878 } 1880 }
1879#endif 1881#endif
1880 1882
1881 cpu_set(cpu, cpu_possible_map); 1883 set_cpu_possible(cpu, true);
1882 cpu_set(cpu, cpu_present_map); 1884 set_cpu_present(cpu, true);
1883} 1885}
1884 1886
1885#ifdef CONFIG_X86_64 1887#ifdef CONFIG_X86_64
@@ -2081,7 +2083,7 @@ __cpuinit int apic_is_clustered_box(void)
2081 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); 2083 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2082 bitmap_zero(clustermap, NUM_APIC_CLUSTERS); 2084 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2083 2085
2084 for (i = 0; i < NR_CPUS; i++) { 2086 for (i = 0; i < nr_cpu_ids; i++) {
2085 /* are we being called early in kernel startup? */ 2087 /* are we being called early in kernel startup? */
2086 if (bios_cpu_apicid) { 2088 if (bios_cpu_apicid) {
2087 id = bios_cpu_apicid[i]; 2089 id = bios_cpu_apicid[i];
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 15cf14e9bf26..48533d77be78 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
534 per_cpu(cpuid4_info, cpu) = NULL; 534 per_cpu(cpuid4_info, cpu) = NULL;
535} 535}
536 536
537static int __cpuinit detect_cache_attributes(unsigned int cpu) 537static void __cpuinit get_cpu_leaves(void *_retval)
538{ 538{
539 struct _cpuid4_info *this_leaf; 539 int j, *retval = _retval, cpu = smp_processor_id();
540 unsigned long j;
541 int retval;
542 cpumask_t oldmask;
543
544 if (num_cache_leaves == 0)
545 return -ENOENT;
546
547 per_cpu(cpuid4_info, cpu) = kzalloc(
548 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
549 if (per_cpu(cpuid4_info, cpu) == NULL)
550 return -ENOMEM;
551
552 oldmask = current->cpus_allowed;
553 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
554 if (retval)
555 goto out;
556 540
557 /* Do cpuid and store the results */ 541 /* Do cpuid and store the results */
558 for (j = 0; j < num_cache_leaves; j++) { 542 for (j = 0; j < num_cache_leaves; j++) {
543 struct _cpuid4_info *this_leaf;
559 this_leaf = CPUID4_INFO_IDX(cpu, j); 544 this_leaf = CPUID4_INFO_IDX(cpu, j);
560 retval = cpuid4_cache_lookup(j, this_leaf); 545 *retval = cpuid4_cache_lookup(j, this_leaf);
561 if (unlikely(retval < 0)) { 546 if (unlikely(*retval < 0)) {
562 int i; 547 int i;
563 548
564 for (i = 0; i < j; i++) 549 for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
567 } 552 }
568 cache_shared_cpu_map_setup(cpu, j); 553 cache_shared_cpu_map_setup(cpu, j);
569 } 554 }
570 set_cpus_allowed_ptr(current, &oldmask); 555}
556
557static int __cpuinit detect_cache_attributes(unsigned int cpu)
558{
559 int retval;
560
561 if (num_cache_leaves == 0)
562 return -ENOENT;
563
564 per_cpu(cpuid4_info, cpu) = kzalloc(
565 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
566 if (per_cpu(cpuid4_info, cpu) == NULL)
567 return -ENOMEM;
571 568
572out: 569 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
573 if (retval) { 570 if (retval) {
574 kfree(per_cpu(cpuid4_info, cpu)); 571 kfree(per_cpu(cpuid4_info, cpu));
575 per_cpu(cpuid4_info, cpu) = NULL; 572 per_cpu(cpuid4_info, cpu) = NULL;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 748c8f9e7a05..a5a5e0530370 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
83 * CPU Initialization 83 * CPU Initialization
84 */ 84 */
85 85
86struct thresh_restart {
87 struct threshold_block *b;
88 int reset;
89 u16 old_limit;
90};
91
86/* must be called with correct cpu affinity */ 92/* must be called with correct cpu affinity */
87static void threshold_restart_bank(struct threshold_block *b, 93static long threshold_restart_bank(void *_tr)
88 int reset, u16 old_limit)
89{ 94{
95 struct thresh_restart *tr = _tr;
90 u32 mci_misc_hi, mci_misc_lo; 96 u32 mci_misc_hi, mci_misc_lo;
91 97
92 rdmsr(b->address, mci_misc_lo, mci_misc_hi); 98 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
93 99
94 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 100 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
95 reset = 1; /* limit cannot be lower than err count */ 101 tr->reset = 1; /* limit cannot be lower than err count */
96 102
97 if (reset) { /* reset err count and overflow bit */ 103 if (tr->reset) { /* reset err count and overflow bit */
98 mci_misc_hi = 104 mci_misc_hi =
99 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 105 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
100 (THRESHOLD_MAX - b->threshold_limit); 106 (THRESHOLD_MAX - tr->b->threshold_limit);
101 } else if (old_limit) { /* change limit w/o reset */ 107 } else if (tr->old_limit) { /* change limit w/o reset */
102 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 108 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
103 (old_limit - b->threshold_limit); 109 (tr->old_limit - tr->b->threshold_limit);
104 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 110 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
105 (new_count & THRESHOLD_MAX); 111 (new_count & THRESHOLD_MAX);
106 } 112 }
107 113
108 b->interrupt_enable ? 114 tr->b->interrupt_enable ?
109 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 115 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
110 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 116 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
111 117
112 mci_misc_hi |= MASK_COUNT_EN_HI; 118 mci_misc_hi |= MASK_COUNT_EN_HI;
113 wrmsr(b->address, mci_misc_lo, mci_misc_hi); 119 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
120 return 0;
114} 121}
115 122
116/* cpu init entry point, called from mce.c with preempt off */ 123/* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
120 unsigned int cpu = smp_processor_id(); 127 unsigned int cpu = smp_processor_id();
121 u8 lvt_off; 128 u8 lvt_off;
122 u32 low = 0, high = 0, address = 0; 129 u32 low = 0, high = 0, address = 0;
130 struct thresh_restart tr;
123 131
124 for (bank = 0; bank < NR_BANKS; ++bank) { 132 for (bank = 0; bank < NR_BANKS; ++bank) {
125 for (block = 0; block < NR_BLOCKS; ++block) { 133 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
162 wrmsr(address, low, high); 170 wrmsr(address, low, high);
163 171
164 threshold_defaults.address = address; 172 threshold_defaults.address = address;
165 threshold_restart_bank(&threshold_defaults, 0, 0); 173 tr.b = &threshold_defaults;
174 tr.reset = 0;
175 tr.old_limit = 0;
176 threshold_restart_bank(&tr);
166 } 177 }
167 } 178 }
168} 179}
@@ -251,20 +262,6 @@ struct threshold_attr {
251 ssize_t(*store) (struct threshold_block *, const char *, size_t count); 262 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
252}; 263};
253 264
254static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
255 cpumask_t *newmask)
256{
257 *oldmask = current->cpus_allowed;
258 cpus_clear(*newmask);
259 cpu_set(cpu, *newmask);
260 set_cpus_allowed_ptr(current, newmask);
261}
262
263static void affinity_restore(const cpumask_t *oldmask)
264{
265 set_cpus_allowed_ptr(current, oldmask);
266}
267
268#define SHOW_FIELDS(name) \ 265#define SHOW_FIELDS(name) \
269static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ 266static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
270{ \ 267{ \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
277 const char *buf, size_t count) 274 const char *buf, size_t count)
278{ 275{
279 char *end; 276 char *end;
280 cpumask_t oldmask, newmask; 277 struct thresh_restart tr;
281 unsigned long new = simple_strtoul(buf, &end, 0); 278 unsigned long new = simple_strtoul(buf, &end, 0);
282 if (end == buf) 279 if (end == buf)
283 return -EINVAL; 280 return -EINVAL;
284 b->interrupt_enable = !!new; 281 b->interrupt_enable = !!new;
285 282
286 affinity_set(b->cpu, &oldmask, &newmask); 283 tr.b = b;
287 threshold_restart_bank(b, 0, 0); 284 tr.reset = 0;
288 affinity_restore(&oldmask); 285 tr.old_limit = 0;
286 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
289 287
290 return end - buf; 288 return end - buf;
291} 289}
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
294 const char *buf, size_t count) 292 const char *buf, size_t count)
295{ 293{
296 char *end; 294 char *end;
297 cpumask_t oldmask, newmask; 295 struct thresh_restart tr;
298 u16 old;
299 unsigned long new = simple_strtoul(buf, &end, 0); 296 unsigned long new = simple_strtoul(buf, &end, 0);
300 if (end == buf) 297 if (end == buf)
301 return -EINVAL; 298 return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
303 new = THRESHOLD_MAX; 300 new = THRESHOLD_MAX;
304 if (new < 1) 301 if (new < 1)
305 new = 1; 302 new = 1;
306 old = b->threshold_limit; 303 tr.old_limit = b->threshold_limit;
307 b->threshold_limit = new; 304 b->threshold_limit = new;
305 tr.b = b;
306 tr.reset = 0;
308 307
309 affinity_set(b->cpu, &oldmask, &newmask); 308 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
310 threshold_restart_bank(b, 0, old);
311 affinity_restore(&oldmask);
312 309
313 return end - buf; 310 return end - buf;
314} 311}
315 312
316static ssize_t show_error_count(struct threshold_block *b, char *buf) 313static long local_error_count(void *_b)
317{ 314{
318 u32 high, low; 315 struct threshold_block *b = _b;
319 cpumask_t oldmask, newmask; 316 u32 low, high;
320 affinity_set(b->cpu, &oldmask, &newmask); 317
321 rdmsr(b->address, low, high); 318 rdmsr(b->address, low, high);
322 affinity_restore(&oldmask); 319 return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
323 return sprintf(buf, "%x\n", 320}
324 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 321
322static ssize_t show_error_count(struct threshold_block *b, char *buf)
323{
324 return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
325} 325}
326 326
327static ssize_t store_error_count(struct threshold_block *b, 327static ssize_t store_error_count(struct threshold_block *b,
328 const char *buf, size_t count) 328 const char *buf, size_t count)
329{ 329{
330 cpumask_t oldmask, newmask; 330 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
331 affinity_set(b->cpu, &oldmask, &newmask); 331
332 threshold_restart_bank(b, 1, 0); 332 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
333 affinity_restore(&oldmask);
334 return 1; 333 return 1;
335} 334}
336 335
@@ -463,12 +462,19 @@ out_free:
463 return err; 462 return err;
464} 463}
465 464
465static long local_allocate_threshold_blocks(void *_bank)
466{
467 unsigned int *bank = _bank;
468
469 return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
470 MSR_IA32_MC0_MISC + *bank * 4);
471}
472
466/* symlinks sibling shared banks to first core. first core owns dir/files. */ 473/* symlinks sibling shared banks to first core. first core owns dir/files. */
467static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) 474static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
468{ 475{
469 int i, err = 0; 476 int i, err = 0;
470 struct threshold_bank *b = NULL; 477 struct threshold_bank *b = NULL;
471 cpumask_t oldmask, newmask;
472 char name[32]; 478 char name[32];
473 479
474 sprintf(name, "threshold_bank%i", bank); 480 sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
519 525
520 per_cpu(threshold_banks, cpu)[bank] = b; 526 per_cpu(threshold_banks, cpu)[bank] = b;
521 527
522 affinity_set(cpu, &oldmask, &newmask); 528 err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
523 err = allocate_threshold_blocks(cpu, bank, 0,
524 MSR_IA32_MC0_MISC + bank * 4);
525 affinity_restore(&oldmask);
526
527 if (err) 529 if (err)
528 goto out_free; 530 goto out_free;
529 531
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c0262791bda4..34185488e4fb 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
30 return 1; 30 return 1;
31} 31}
32 32
33static cpumask_t flat_target_cpus(void) 33static const struct cpumask *flat_target_cpus(void)
34{ 34{
35 return cpu_online_map; 35 return cpu_online_mask;
36} 36}
37 37
38static cpumask_t flat_vector_allocation_domain(int cpu) 38static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
39{ 39{
40 /* Careful. Some cpus do not strictly honor the set of cpus 40 /* Careful. Some cpus do not strictly honor the set of cpus
41 * specified in the interrupt destination when using lowest 41 * specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
45 * deliver interrupts to the wrong hyperthread when only one 45 * deliver interrupts to the wrong hyperthread when only one
46 * hyperthread was specified in the interrupt desitination. 46 * hyperthread was specified in the interrupt desitination.
47 */ 47 */
48 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 48 cpumask_clear(retmask);
49 return domain; 49 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
50} 50}
51 51
52/* 52/*
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void)
69 apic_write(APIC_LDR, val); 69 apic_write(APIC_LDR, val);
70} 70}
71 71
72static void flat_send_IPI_mask(cpumask_t cpumask, int vector) 72static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
73{ 73{
74 unsigned long mask = cpus_addr(cpumask)[0];
75 unsigned long flags; 74 unsigned long flags;
76 75
77 local_irq_save(flags); 76 local_irq_save(flags);
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
79 local_irq_restore(flags); 78 local_irq_restore(flags);
80} 79}
81 80
81static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
82{
83 unsigned long mask = cpumask_bits(cpumask)[0];
84
85 _flat_send_IPI_mask(mask, vector);
86}
87
88static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
89 int vector)
90{
91 unsigned long mask = cpumask_bits(cpumask)[0];
92 int cpu = smp_processor_id();
93
94 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask);
96 _flat_send_IPI_mask(mask, vector);
97}
98
82static void flat_send_IPI_allbutself(int vector) 99static void flat_send_IPI_allbutself(int vector)
83{ 100{
101 int cpu = smp_processor_id();
84#ifdef CONFIG_HOTPLUG_CPU 102#ifdef CONFIG_HOTPLUG_CPU
85 int hotplug = 1; 103 int hotplug = 1;
86#else 104#else
87 int hotplug = 0; 105 int hotplug = 0;
88#endif 106#endif
89 if (hotplug || vector == NMI_VECTOR) { 107 if (hotplug || vector == NMI_VECTOR) {
90 cpumask_t allbutme = cpu_online_map; 108 if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
109 unsigned long mask = cpumask_bits(cpu_online_mask)[0];
91 110
92 cpu_clear(smp_processor_id(), allbutme); 111 if (cpu < BITS_PER_LONG)
112 clear_bit(cpu, &mask);
93 113
94 if (!cpus_empty(allbutme)) 114 _flat_send_IPI_mask(mask, vector);
95 flat_send_IPI_mask(allbutme, vector); 115 }
96 } else if (num_online_cpus() > 1) { 116 } else if (num_online_cpus() > 1) {
97 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); 117 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
98 } 118 }
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
101static void flat_send_IPI_all(int vector) 121static void flat_send_IPI_all(int vector)
102{ 122{
103 if (vector == NMI_VECTOR) 123 if (vector == NMI_VECTOR)
104 flat_send_IPI_mask(cpu_online_map, vector); 124 flat_send_IPI_mask(cpu_online_mask, vector);
105 else 125 else
106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 126 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
107} 127}
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void)
135 return physid_isset(read_xapic_id(), phys_cpu_present_map); 155 return physid_isset(read_xapic_id(), phys_cpu_present_map);
136} 156}
137 157
138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 158static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
159{
160 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
161}
162
163static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
164 const struct cpumask *andmask)
139{ 165{
140 return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; 166 unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
167 unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
168
169 return mask1 & mask2;
141} 170}
142 171
143static unsigned int phys_pkg_id(int index_msb) 172static unsigned int phys_pkg_id(int index_msb)
@@ -157,8 +186,10 @@ struct genapic apic_flat = {
157 .send_IPI_all = flat_send_IPI_all, 186 .send_IPI_all = flat_send_IPI_all,
158 .send_IPI_allbutself = flat_send_IPI_allbutself, 187 .send_IPI_allbutself = flat_send_IPI_allbutself,
159 .send_IPI_mask = flat_send_IPI_mask, 188 .send_IPI_mask = flat_send_IPI_mask,
189 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
160 .send_IPI_self = apic_send_IPI_self, 190 .send_IPI_self = apic_send_IPI_self,
161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 191 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
192 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
162 .phys_pkg_id = phys_pkg_id, 193 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id, 194 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id, 195 .set_apic_id = set_apic_id,
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
188 return 0; 219 return 0;
189} 220}
190 221
191static cpumask_t physflat_target_cpus(void) 222static const struct cpumask *physflat_target_cpus(void)
192{ 223{
193 return cpu_online_map; 224 return cpu_online_mask;
194} 225}
195 226
196static cpumask_t physflat_vector_allocation_domain(int cpu) 227static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
197{ 228{
198 return cpumask_of_cpu(cpu); 229 cpumask_clear(retmask);
230 cpumask_set_cpu(cpu, retmask);
199} 231}
200 232
201static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) 233static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
202{ 234{
203 send_IPI_mask_sequence(cpumask, vector); 235 send_IPI_mask_sequence(cpumask, vector);
204} 236}
205 237
206static void physflat_send_IPI_allbutself(int vector) 238static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
239 int vector)
207{ 240{
208 cpumask_t allbutme = cpu_online_map; 241 send_IPI_mask_allbutself(cpumask, vector);
242}
209 243
210 cpu_clear(smp_processor_id(), allbutme); 244static void physflat_send_IPI_allbutself(int vector)
211 physflat_send_IPI_mask(allbutme, vector); 245{
246 send_IPI_mask_allbutself(cpu_online_mask, vector);
212} 247}
213 248
214static void physflat_send_IPI_all(int vector) 249static void physflat_send_IPI_all(int vector)
215{ 250{
216 physflat_send_IPI_mask(cpu_online_map, vector); 251 physflat_send_IPI_mask(cpu_online_mask, vector);
217} 252}
218 253
219static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) 254static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
220{ 255{
221 int cpu; 256 int cpu;
222 257
@@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
224 * We're using fixed IRQ delivery, can only return one phys APIC ID. 259 * We're using fixed IRQ delivery, can only return one phys APIC ID.
225 * May as well be the first. 260 * May as well be the first.
226 */ 261 */
227 cpu = first_cpu(cpumask); 262 cpu = cpumask_first(cpumask);
228 if ((unsigned)cpu < nr_cpu_ids) 263 if ((unsigned)cpu < nr_cpu_ids)
229 return per_cpu(x86_cpu_to_apicid, cpu); 264 return per_cpu(x86_cpu_to_apicid, cpu);
230 else 265 else
231 return BAD_APICID; 266 return BAD_APICID;
232} 267}
233 268
269static unsigned int
270physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
271 const struct cpumask *andmask)
272{
273 int cpu;
274
275 /*
276 * We're using fixed IRQ delivery, can only return one phys APIC ID.
277 * May as well be the first.
278 */
279 for_each_cpu_and(cpu, cpumask, andmask)
280 if (cpumask_test_cpu(cpu, cpu_online_mask))
281 break;
282 if (cpu < nr_cpu_ids)
283 return per_cpu(x86_cpu_to_apicid, cpu);
284 return BAD_APICID;
285}
286
234struct genapic apic_physflat = { 287struct genapic apic_physflat = {
235 .name = "physical flat", 288 .name = "physical flat",
236 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 289 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -243,8 +296,10 @@ struct genapic apic_physflat = {
243 .send_IPI_all = physflat_send_IPI_all, 296 .send_IPI_all = physflat_send_IPI_all,
244 .send_IPI_allbutself = physflat_send_IPI_allbutself, 297 .send_IPI_allbutself = physflat_send_IPI_allbutself,
245 .send_IPI_mask = physflat_send_IPI_mask, 298 .send_IPI_mask = physflat_send_IPI_mask,
299 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
246 .send_IPI_self = apic_send_IPI_self, 300 .send_IPI_self = apic_send_IPI_self,
247 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 301 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
302 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
248 .phys_pkg_id = phys_pkg_id, 303 .phys_pkg_id = phys_pkg_id,
249 .get_apic_id = get_apic_id, 304 .get_apic_id = get_apic_id,
250 .set_apic_id = set_apic_id, 305 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8eb48a6..6ce497cc372d 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
22 22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24 24
25static cpumask_t x2apic_target_cpus(void) 25static const struct cpumask *x2apic_target_cpus(void)
26{ 26{
27 return cpumask_of_cpu(0); 27 return cpumask_of(0);
28} 28}
29 29
30/* 30/*
31 * for now each logical cpu is in its own vector allocation domain. 31 * for now each logical cpu is in its own vector allocation domain.
32 */ 32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu) 33static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
34{ 34{
35 cpumask_t domain = CPU_MASK_NONE; 35 cpumask_clear(retmask);
36 cpu_set(cpu, domain); 36 cpumask_set_cpu(cpu, retmask);
37 return domain;
38} 37}
39 38
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 39static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register 55 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes. 56 * writes.
58 */ 57 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
60{ 59{
61 unsigned long flags; 60 unsigned long flags;
62 unsigned long query_cpu; 61 unsigned long query_cpu;
63 62
64 local_irq_save(flags); 63 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) { 64 for_each_cpu(query_cpu, mask)
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), 65 __x2apic_send_IPI_dest(
67 vector, APIC_DEST_LOGICAL); 66 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
68 } 67 vector, APIC_DEST_LOGICAL);
69 local_irq_restore(flags); 68 local_irq_restore(flags);
70} 69}
71 70
72static void x2apic_send_IPI_allbutself(int vector) 71static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
72 int vector)
73{ 73{
74 cpumask_t mask = cpu_online_map; 74 unsigned long flags;
75 unsigned long query_cpu;
76 unsigned long this_cpu = smp_processor_id();
75 77
76 cpu_clear(smp_processor_id(), mask); 78 local_irq_save(flags);
79 for_each_cpu(query_cpu, mask)
80 if (query_cpu != this_cpu)
81 __x2apic_send_IPI_dest(
82 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
83 vector, APIC_DEST_LOGICAL);
84 local_irq_restore(flags);
85}
86
87static void x2apic_send_IPI_allbutself(int vector)
88{
89 unsigned long flags;
90 unsigned long query_cpu;
91 unsigned long this_cpu = smp_processor_id();
77 92
78 if (!cpus_empty(mask)) 93 local_irq_save(flags);
79 x2apic_send_IPI_mask(mask, vector); 94 for_each_online_cpu(query_cpu)
95 if (query_cpu != this_cpu)
96 __x2apic_send_IPI_dest(
97 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
98 vector, APIC_DEST_LOGICAL);
99 local_irq_restore(flags);
80} 100}
81 101
82static void x2apic_send_IPI_all(int vector) 102static void x2apic_send_IPI_all(int vector)
83{ 103{
84 x2apic_send_IPI_mask(cpu_online_map, vector); 104 x2apic_send_IPI_mask(cpu_online_mask, vector);
85} 105}
86 106
87static int x2apic_apic_id_registered(void) 107static int x2apic_apic_id_registered(void)
@@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void)
89 return 1; 109 return 1;
90} 110}
91 111
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
93{ 113{
94 int cpu; 114 int cpu;
95 115
96 /* 116 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID. 117 * We're using fixed IRQ delivery, can only return one logical APIC ID.
98 * May as well be the first. 118 * May as well be the first.
99 */ 119 */
100 cpu = first_cpu(cpumask); 120 cpu = cpumask_first(cpumask);
101 if ((unsigned)cpu < NR_CPUS) 121 if ((unsigned)cpu < nr_cpu_ids)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu); 122 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else 123 else
104 return BAD_APICID; 124 return BAD_APICID;
105} 125}
106 126
127static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
128 const struct cpumask *andmask)
129{
130 int cpu;
131
132 /*
133 * We're using fixed IRQ delivery, can only return one logical APIC ID.
134 * May as well be the first.
135 */
136 for_each_cpu_and(cpu, cpumask, andmask)
137 if (cpumask_test_cpu(cpu, cpu_online_mask))
138 break;
139 if (cpu < nr_cpu_ids)
140 return per_cpu(x86_cpu_to_logical_apicid, cpu);
141 return BAD_APICID;
142}
143
107static unsigned int get_apic_id(unsigned long x) 144static unsigned int get_apic_id(unsigned long x)
108{ 145{
109 unsigned int id; 146 unsigned int id;
@@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = {
150 .send_IPI_all = x2apic_send_IPI_all, 187 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 188 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask, 189 .send_IPI_mask = x2apic_send_IPI_mask,
190 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
153 .send_IPI_self = x2apic_send_IPI_self, 191 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 192 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
193 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
155 .phys_pkg_id = phys_pkg_id, 194 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id, 195 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id, 196 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211768b7..62895cf315ff 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31 31
32static cpumask_t x2apic_target_cpus(void) 32static const struct cpumask *x2apic_target_cpus(void)
33{ 33{
34 return cpumask_of_cpu(0); 34 return cpumask_of(0);
35} 35}
36 36
37static cpumask_t x2apic_vector_allocation_domain(int cpu) 37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
38{ 38{
39 cpumask_t domain = CPU_MASK_NONE; 39 cpumask_clear(retmask);
40 cpu_set(cpu, domain); 40 cpumask_set_cpu(cpu, retmask);
41 return domain;
42} 41}
43 42
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 43static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
54 x2apic_icr_write(cfg, apicid); 53 x2apic_icr_write(cfg, apicid);
55} 54}
56 55
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
58{ 57{
59 unsigned long flags; 58 unsigned long flags;
60 unsigned long query_cpu; 59 unsigned long query_cpu;
61 60
62 local_irq_save(flags); 61 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) { 62 for_each_cpu(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), 63 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL); 64 vector, APIC_DEST_PHYSICAL);
66 } 65 }
67 local_irq_restore(flags); 66 local_irq_restore(flags);
68} 67}
69 68
70static void x2apic_send_IPI_allbutself(int vector) 69static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
70 int vector)
71{ 71{
72 cpumask_t mask = cpu_online_map; 72 unsigned long flags;
73 unsigned long query_cpu;
74 unsigned long this_cpu = smp_processor_id();
75
76 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) {
78 if (query_cpu != this_cpu)
79 __x2apic_send_IPI_dest(
80 per_cpu(x86_cpu_to_apicid, query_cpu),
81 vector, APIC_DEST_PHYSICAL);
82 }
83 local_irq_restore(flags);
84}
73 85
74 cpu_clear(smp_processor_id(), mask); 86static void x2apic_send_IPI_allbutself(int vector)
87{
88 unsigned long flags;
89 unsigned long query_cpu;
90 unsigned long this_cpu = smp_processor_id();
75 91
76 if (!cpus_empty(mask)) 92 local_irq_save(flags);
77 x2apic_send_IPI_mask(mask, vector); 93 for_each_online_cpu(query_cpu)
94 if (query_cpu != this_cpu)
95 __x2apic_send_IPI_dest(
96 per_cpu(x86_cpu_to_apicid, query_cpu),
97 vector, APIC_DEST_PHYSICAL);
98 local_irq_restore(flags);
78} 99}
79 100
80static void x2apic_send_IPI_all(int vector) 101static void x2apic_send_IPI_all(int vector)
81{ 102{
82 x2apic_send_IPI_mask(cpu_online_map, vector); 103 x2apic_send_IPI_mask(cpu_online_mask, vector);
83} 104}
84 105
85static int x2apic_apic_id_registered(void) 106static int x2apic_apic_id_registered(void)
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void)
87 return 1; 108 return 1;
88} 109}
89 110
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 111static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
91{ 112{
92 int cpu; 113 int cpu;
93 114
@@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
95 * We're using fixed IRQ delivery, can only return one phys APIC ID. 116 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first. 117 * May as well be the first.
97 */ 118 */
98 cpu = first_cpu(cpumask); 119 cpu = cpumask_first(cpumask);
99 if ((unsigned)cpu < NR_CPUS) 120 if ((unsigned)cpu < nr_cpu_ids)
100 return per_cpu(x86_cpu_to_apicid, cpu); 121 return per_cpu(x86_cpu_to_apicid, cpu);
101 else 122 else
102 return BAD_APICID; 123 return BAD_APICID;
103} 124}
104 125
126static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
127 const struct cpumask *andmask)
128{
129 int cpu;
130
131 /*
132 * We're using fixed IRQ delivery, can only return one phys APIC ID.
133 * May as well be the first.
134 */
135 for_each_cpu_and(cpu, cpumask, andmask)
136 if (cpumask_test_cpu(cpu, cpu_online_mask))
137 break;
138 if (cpu < nr_cpu_ids)
139 return per_cpu(x86_cpu_to_apicid, cpu);
140 return BAD_APICID;
141}
142
105static unsigned int get_apic_id(unsigned long x) 143static unsigned int get_apic_id(unsigned long x)
106{ 144{
107 unsigned int id; 145 unsigned int id;
@@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = {
145 .send_IPI_all = x2apic_send_IPI_all, 183 .send_IPI_all = x2apic_send_IPI_all,
146 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 184 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
147 .send_IPI_mask = x2apic_send_IPI_mask, 185 .send_IPI_mask = x2apic_send_IPI_mask,
186 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
148 .send_IPI_self = x2apic_send_IPI_self, 187 .send_IPI_self = x2apic_send_IPI_self,
149 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 188 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
189 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
150 .phys_pkg_id = phys_pkg_id, 190 .phys_pkg_id = phys_pkg_id,
151 .get_apic_id = get_apic_id, 191 .get_apic_id = get_apic_id,
152 .set_apic_id = set_apic_id, 192 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index dece17289731..b193e082f6ce 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -79,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
79 79
80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
81 81
82static cpumask_t uv_target_cpus(void) 82static const struct cpumask *uv_target_cpus(void)
83{ 83{
84 return cpumask_of_cpu(0); 84 return cpumask_of(0);
85} 85}
86 86
87static cpumask_t uv_vector_allocation_domain(int cpu) 87static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
88{ 88{
89 cpumask_t domain = CPU_MASK_NONE; 89 cpumask_clear(retmask);
90 cpu_set(cpu, domain); 90 cpumask_set_cpu(cpu, retmask);
91 return domain;
92} 91}
93 92
94int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) 93int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -127,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector)
127 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 126 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
128} 127}
129 128
130static void uv_send_IPI_mask(cpumask_t mask, int vector) 129static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
131{ 130{
132 unsigned int cpu; 131 unsigned int cpu;
133 132
134 for_each_possible_cpu(cpu) 133 for_each_cpu(cpu, mask)
135 if (cpu_isset(cpu, mask)) 134 uv_send_IPI_one(cpu, vector);
135}
136
137static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
138{
139 unsigned int cpu;
140 unsigned int this_cpu = smp_processor_id();
141
142 for_each_cpu(cpu, mask)
143 if (cpu != this_cpu)
136 uv_send_IPI_one(cpu, vector); 144 uv_send_IPI_one(cpu, vector);
137} 145}
138 146
139static void uv_send_IPI_allbutself(int vector) 147static void uv_send_IPI_allbutself(int vector)
140{ 148{
141 cpumask_t mask = cpu_online_map; 149 unsigned int cpu;
142 150 unsigned int this_cpu = smp_processor_id();
143 cpu_clear(smp_processor_id(), mask);
144 151
145 if (!cpus_empty(mask)) 152 for_each_online_cpu(cpu)
146 uv_send_IPI_mask(mask, vector); 153 if (cpu != this_cpu)
154 uv_send_IPI_one(cpu, vector);
147} 155}
148 156
149static void uv_send_IPI_all(int vector) 157static void uv_send_IPI_all(int vector)
150{ 158{
151 uv_send_IPI_mask(cpu_online_map, vector); 159 uv_send_IPI_mask(cpu_online_mask, vector);
152} 160}
153 161
154static int uv_apic_id_registered(void) 162static int uv_apic_id_registered(void)
@@ -160,7 +168,7 @@ static void uv_init_apic_ldr(void)
160{ 168{
161} 169}
162 170
163static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 171static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
164{ 172{
165 int cpu; 173 int cpu;
166 174
@@ -168,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
168 * We're using fixed IRQ delivery, can only return one phys APIC ID. 176 * We're using fixed IRQ delivery, can only return one phys APIC ID.
169 * May as well be the first. 177 * May as well be the first.
170 */ 178 */
171 cpu = first_cpu(cpumask); 179 cpu = cpumask_first(cpumask);
172 if ((unsigned)cpu < nr_cpu_ids) 180 if ((unsigned)cpu < nr_cpu_ids)
173 return per_cpu(x86_cpu_to_apicid, cpu); 181 return per_cpu(x86_cpu_to_apicid, cpu);
174 else 182 else
175 return BAD_APICID; 183 return BAD_APICID;
176} 184}
177 185
186static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
187 const struct cpumask *andmask)
188{
189 int cpu;
190
191 /*
192 * We're using fixed IRQ delivery, can only return one phys APIC ID.
193 * May as well be the first.
194 */
195 for_each_cpu_and(cpu, cpumask, andmask)
196 if (cpumask_test_cpu(cpu, cpu_online_mask))
197 break;
198 if (cpu < nr_cpu_ids)
199 return per_cpu(x86_cpu_to_apicid, cpu);
200 return BAD_APICID;
201}
202
178static unsigned int get_apic_id(unsigned long x) 203static unsigned int get_apic_id(unsigned long x)
179{ 204{
180 unsigned int id; 205 unsigned int id;
@@ -222,8 +247,10 @@ struct genapic apic_x2apic_uv_x = {
222 .send_IPI_all = uv_send_IPI_all, 247 .send_IPI_all = uv_send_IPI_all,
223 .send_IPI_allbutself = uv_send_IPI_allbutself, 248 .send_IPI_allbutself = uv_send_IPI_allbutself,
224 .send_IPI_mask = uv_send_IPI_mask, 249 .send_IPI_mask = uv_send_IPI_mask,
250 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
225 .send_IPI_self = uv_send_IPI_self, 251 .send_IPI_self = uv_send_IPI_self,
226 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 252 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
253 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
227 .phys_pkg_id = phys_pkg_id, 254 .phys_pkg_id = phys_pkg_id,
228 .get_apic_id = get_apic_id, 255 .get_apic_id = get_apic_id,
229 .set_apic_id = set_apic_id, 256 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index e7745961ed31..3e070bb961d7 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
136 136
137struct irq_cfg { 137struct irq_cfg {
138 struct irq_pin_list *irq_2_pin; 138 struct irq_pin_list *irq_2_pin;
139 cpumask_t domain; 139 cpumask_var_t domain;
140 cpumask_t old_domain; 140 cpumask_var_t old_domain;
141 unsigned move_cleanup_count; 141 unsigned move_cleanup_count;
142 u8 vector; 142 u8 vector;
143 u8 move_in_progress : 1; 143 u8 move_in_progress : 1;
@@ -152,22 +152,22 @@ static struct irq_cfg irq_cfgx[] = {
152#else 152#else
153static struct irq_cfg irq_cfgx[NR_IRQS] = { 153static struct irq_cfg irq_cfgx[NR_IRQS] = {
154#endif 154#endif
155 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 155 [0] = { .vector = IRQ0_VECTOR, },
156 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 156 [1] = { .vector = IRQ1_VECTOR, },
157 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 157 [2] = { .vector = IRQ2_VECTOR, },
158 [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, 158 [3] = { .vector = IRQ3_VECTOR, },
159 [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, 159 [4] = { .vector = IRQ4_VECTOR, },
160 [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, 160 [5] = { .vector = IRQ5_VECTOR, },
161 [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, 161 [6] = { .vector = IRQ6_VECTOR, },
162 [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, 162 [7] = { .vector = IRQ7_VECTOR, },
163 [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, 163 [8] = { .vector = IRQ8_VECTOR, },
164 [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, 164 [9] = { .vector = IRQ9_VECTOR, },
165 [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, 165 [10] = { .vector = IRQ10_VECTOR, },
166 [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, 166 [11] = { .vector = IRQ11_VECTOR, },
167 [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, 167 [12] = { .vector = IRQ12_VECTOR, },
168 [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, 168 [13] = { .vector = IRQ13_VECTOR, },
169 [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, 169 [14] = { .vector = IRQ14_VECTOR, },
170 [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, 170 [15] = { .vector = IRQ15_VECTOR, },
171}; 171};
172 172
173void __init arch_early_irq_init(void) 173void __init arch_early_irq_init(void)
@@ -183,6 +183,10 @@ void __init arch_early_irq_init(void)
183 for (i = 0; i < count; i++) { 183 for (i = 0; i < count; i++) {
184 desc = irq_to_desc(i); 184 desc = irq_to_desc(i);
185 desc->chip_data = &cfg[i]; 185 desc->chip_data = &cfg[i];
186 alloc_bootmem_cpumask_var(&cfg[i].domain);
187 alloc_bootmem_cpumask_var(&cfg[i].old_domain);
188 if (i < NR_IRQS_LEGACY)
189 cpumask_setall(cfg[i].domain);
186 } 190 }
187} 191}
188 192
@@ -207,6 +211,20 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
207 node = cpu_to_node(cpu); 211 node = cpu_to_node(cpu);
208 212
209 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 213 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
214 if (cfg) {
215 /* FIXME: needs alloc_cpumask_var_node() */
216 if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
217 kfree(cfg);
218 cfg = NULL;
219 } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
220 free_cpumask_var(cfg->domain);
221 kfree(cfg);
222 cfg = NULL;
223 } else {
224 cpumask_clear(cfg->domain);
225 cpumask_clear(cfg->old_domain);
226 }
227 }
210 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); 228 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
211 229
212 return cfg; 230 return cfg;
@@ -329,13 +347,14 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
329 } 347 }
330} 348}
331 349
332static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) 350static void
351set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
333{ 352{
334 struct irq_cfg *cfg = desc->chip_data; 353 struct irq_cfg *cfg = desc->chip_data;
335 354
336 if (!cfg->move_in_progress) { 355 if (!cfg->move_in_progress) {
337 /* it means that domain is not changed */ 356 /* it means that domain is not changed */
338 if (!cpus_intersects(desc->affinity, mask)) 357 if (!cpumask_intersects(&desc->affinity, mask))
339 cfg->move_desc_pending = 1; 358 cfg->move_desc_pending = 1;
340 } 359 }
341} 360}
@@ -350,7 +369,8 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
350#endif 369#endif
351 370
352#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC 371#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
353static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) 372static inline void
373set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
354{ 374{
355} 375}
356#endif 376#endif
@@ -481,6 +501,26 @@ static void ioapic_mask_entry(int apic, int pin)
481} 501}
482 502
483#ifdef CONFIG_SMP 503#ifdef CONFIG_SMP
504static void send_cleanup_vector(struct irq_cfg *cfg)
505{
506 cpumask_var_t cleanup_mask;
507
508 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
509 unsigned int i;
510 cfg->move_cleanup_count = 0;
511 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
512 cfg->move_cleanup_count++;
513 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
514 send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
515 } else {
516 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
517 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
518 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
519 free_cpumask_var(cleanup_mask);
520 }
521 cfg->move_in_progress = 0;
522}
523
484static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) 524static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
485{ 525{
486 int apic, pin; 526 int apic, pin;
@@ -516,48 +556,61 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
516 } 556 }
517} 557}
518 558
519static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); 559static int
560assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
520 561
521static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) 562/*
563 * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
564 * of that, or returns BAD_APICID and leaves desc->affinity untouched.
565 */
566static unsigned int
567set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
522{ 568{
523 struct irq_cfg *cfg; 569 struct irq_cfg *cfg;
524 unsigned long flags;
525 unsigned int dest;
526 cpumask_t tmp;
527 unsigned int irq; 570 unsigned int irq;
528 571
529 cpus_and(tmp, mask, cpu_online_map); 572 if (!cpumask_intersects(mask, cpu_online_mask))
530 if (cpus_empty(tmp)) 573 return BAD_APICID;
531 return;
532 574
533 irq = desc->irq; 575 irq = desc->irq;
534 cfg = desc->chip_data; 576 cfg = desc->chip_data;
535 if (assign_irq_vector(irq, cfg, mask)) 577 if (assign_irq_vector(irq, cfg, mask))
536 return; 578 return BAD_APICID;
537 579
580 cpumask_and(&desc->affinity, cfg->domain, mask);
538 set_extra_move_desc(desc, mask); 581 set_extra_move_desc(desc, mask);
582 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
583}
539 584
540 cpus_and(tmp, cfg->domain, mask); 585static void
541 dest = cpu_mask_to_apicid(tmp); 586set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
542 /* 587{
543 * Only the high 8 bits are valid. 588 struct irq_cfg *cfg;
544 */ 589 unsigned long flags;
545 dest = SET_APIC_LOGICAL_ID(dest); 590 unsigned int dest;
591 unsigned int irq;
592
593 irq = desc->irq;
594 cfg = desc->chip_data;
546 595
547 spin_lock_irqsave(&ioapic_lock, flags); 596 spin_lock_irqsave(&ioapic_lock, flags);
548 __target_IO_APIC_irq(irq, dest, cfg); 597 dest = set_desc_affinity(desc, mask);
549 desc->affinity = mask; 598 if (dest != BAD_APICID) {
599 /* Only the high 8 bits are valid. */
600 dest = SET_APIC_LOGICAL_ID(dest);
601 __target_IO_APIC_irq(irq, dest, cfg);
602 }
550 spin_unlock_irqrestore(&ioapic_lock, flags); 603 spin_unlock_irqrestore(&ioapic_lock, flags);
551} 604}
552 605
553static void set_ioapic_affinity_irq(unsigned int irq, 606static void
554 const struct cpumask *mask) 607set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
555{ 608{
556 struct irq_desc *desc; 609 struct irq_desc *desc;
557 610
558 desc = irq_to_desc(irq); 611 desc = irq_to_desc(irq);
559 612
560 set_ioapic_affinity_irq_desc(desc, *mask); 613 set_ioapic_affinity_irq_desc(desc, mask);
561} 614}
562#endif /* CONFIG_SMP */ 615#endif /* CONFIG_SMP */
563 616
@@ -1219,7 +1272,8 @@ void unlock_vector_lock(void)
1219 spin_unlock(&vector_lock); 1272 spin_unlock(&vector_lock);
1220} 1273}
1221 1274
1222static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) 1275static int
1276__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1223{ 1277{
1224 /* 1278 /*
1225 * NOTE! The local APIC isn't very good at handling 1279 * NOTE! The local APIC isn't very good at handling
@@ -1234,49 +1288,49 @@ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1234 */ 1288 */
1235 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1289 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1236 unsigned int old_vector; 1290 unsigned int old_vector;
1237 int cpu; 1291 int cpu, err;
1292 cpumask_var_t tmp_mask;
1238 1293
1239 if ((cfg->move_in_progress) || cfg->move_cleanup_count) 1294 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1240 return -EBUSY; 1295 return -EBUSY;
1241 1296
1242 /* Only try and allocate irqs on cpus that are present */ 1297 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
1243 cpus_and(mask, mask, cpu_online_map); 1298 return -ENOMEM;
1244 1299
1245 old_vector = cfg->vector; 1300 old_vector = cfg->vector;
1246 if (old_vector) { 1301 if (old_vector) {
1247 cpumask_t tmp; 1302 cpumask_and(tmp_mask, mask, cpu_online_mask);
1248 cpus_and(tmp, cfg->domain, mask); 1303 cpumask_and(tmp_mask, cfg->domain, tmp_mask);
1249 if (!cpus_empty(tmp)) 1304 if (!cpumask_empty(tmp_mask)) {
1305 free_cpumask_var(tmp_mask);
1250 return 0; 1306 return 0;
1307 }
1251 } 1308 }
1252 1309
1253 for_each_cpu_mask_nr(cpu, mask) { 1310 /* Only try and allocate irqs on cpus that are present */
1254 cpumask_t domain, new_mask; 1311 err = -ENOSPC;
1312 for_each_cpu_and(cpu, mask, cpu_online_mask) {
1255 int new_cpu; 1313 int new_cpu;
1256 int vector, offset; 1314 int vector, offset;
1257 1315
1258 domain = vector_allocation_domain(cpu); 1316 vector_allocation_domain(cpu, tmp_mask);
1259 cpus_and(new_mask, domain, cpu_online_map);
1260 1317
1261 vector = current_vector; 1318 vector = current_vector;
1262 offset = current_offset; 1319 offset = current_offset;
1263next: 1320next:
1264 vector += 8; 1321 vector += 8;
1265 if (vector >= first_system_vector) { 1322 if (vector >= first_system_vector) {
1266 /* If we run out of vectors on large boxen, must share them. */ 1323 /* If out of vectors on large boxen, must share them. */
1267 offset = (offset + 1) % 8; 1324 offset = (offset + 1) % 8;
1268 vector = FIRST_DEVICE_VECTOR + offset; 1325 vector = FIRST_DEVICE_VECTOR + offset;
1269 } 1326 }
1270 if (unlikely(current_vector == vector)) 1327 if (unlikely(current_vector == vector))
1271 continue; 1328 continue;
1272#ifdef CONFIG_X86_64 1329
1273 if (vector == IA32_SYSCALL_VECTOR) 1330 if (test_bit(vector, used_vectors))
1274 goto next;
1275#else
1276 if (vector == SYSCALL_VECTOR)
1277 goto next; 1331 goto next;
1278#endif 1332
1279 for_each_cpu_mask_nr(new_cpu, new_mask) 1333 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1280 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1334 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1281 goto next; 1335 goto next;
1282 /* Found one! */ 1336 /* Found one! */
@@ -1284,18 +1338,21 @@ next:
1284 current_offset = offset; 1338 current_offset = offset;
1285 if (old_vector) { 1339 if (old_vector) {
1286 cfg->move_in_progress = 1; 1340 cfg->move_in_progress = 1;
1287 cfg->old_domain = cfg->domain; 1341 cpumask_copy(cfg->old_domain, cfg->domain);
1288 } 1342 }
1289 for_each_cpu_mask_nr(new_cpu, new_mask) 1343 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1290 per_cpu(vector_irq, new_cpu)[vector] = irq; 1344 per_cpu(vector_irq, new_cpu)[vector] = irq;
1291 cfg->vector = vector; 1345 cfg->vector = vector;
1292 cfg->domain = domain; 1346 cpumask_copy(cfg->domain, tmp_mask);
1293 return 0; 1347 err = 0;
1348 break;
1294 } 1349 }
1295 return -ENOSPC; 1350 free_cpumask_var(tmp_mask);
1351 return err;
1296} 1352}
1297 1353
1298static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) 1354static int
1355assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1299{ 1356{
1300 int err; 1357 int err;
1301 unsigned long flags; 1358 unsigned long flags;
@@ -1308,23 +1365,20 @@ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1308 1365
1309static void __clear_irq_vector(int irq, struct irq_cfg *cfg) 1366static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1310{ 1367{
1311 cpumask_t mask;
1312 int cpu, vector; 1368 int cpu, vector;
1313 1369
1314 BUG_ON(!cfg->vector); 1370 BUG_ON(!cfg->vector);
1315 1371
1316 vector = cfg->vector; 1372 vector = cfg->vector;
1317 cpus_and(mask, cfg->domain, cpu_online_map); 1373 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1318 for_each_cpu_mask_nr(cpu, mask)
1319 per_cpu(vector_irq, cpu)[vector] = -1; 1374 per_cpu(vector_irq, cpu)[vector] = -1;
1320 1375
1321 cfg->vector = 0; 1376 cfg->vector = 0;
1322 cpus_clear(cfg->domain); 1377 cpumask_clear(cfg->domain);
1323 1378
1324 if (likely(!cfg->move_in_progress)) 1379 if (likely(!cfg->move_in_progress))
1325 return; 1380 return;
1326 cpus_and(mask, cfg->old_domain, cpu_online_map); 1381 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1327 for_each_cpu_mask_nr(cpu, mask) {
1328 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1382 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1329 vector++) { 1383 vector++) {
1330 if (per_cpu(vector_irq, cpu)[vector] != irq) 1384 if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1349,7 +1403,7 @@ void __setup_vector_irq(int cpu)
1349 if (!desc) 1403 if (!desc)
1350 continue; 1404 continue;
1351 cfg = desc->chip_data; 1405 cfg = desc->chip_data;
1352 if (!cpu_isset(cpu, cfg->domain)) 1406 if (!cpumask_test_cpu(cpu, cfg->domain))
1353 continue; 1407 continue;
1354 vector = cfg->vector; 1408 vector = cfg->vector;
1355 per_cpu(vector_irq, cpu)[vector] = irq; 1409 per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1361,7 +1415,7 @@ void __setup_vector_irq(int cpu)
1361 continue; 1415 continue;
1362 1416
1363 cfg = irq_cfg(irq); 1417 cfg = irq_cfg(irq);
1364 if (!cpu_isset(cpu, cfg->domain)) 1418 if (!cpumask_test_cpu(cpu, cfg->domain))
1365 per_cpu(vector_irq, cpu)[vector] = -1; 1419 per_cpu(vector_irq, cpu)[vector] = -1;
1366 } 1420 }
1367} 1421}
@@ -1497,18 +1551,17 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1497{ 1551{
1498 struct irq_cfg *cfg; 1552 struct irq_cfg *cfg;
1499 struct IO_APIC_route_entry entry; 1553 struct IO_APIC_route_entry entry;
1500 cpumask_t mask; 1554 unsigned int dest;
1501 1555
1502 if (!IO_APIC_IRQ(irq)) 1556 if (!IO_APIC_IRQ(irq))
1503 return; 1557 return;
1504 1558
1505 cfg = desc->chip_data; 1559 cfg = desc->chip_data;
1506 1560
1507 mask = TARGET_CPUS; 1561 if (assign_irq_vector(irq, cfg, TARGET_CPUS))
1508 if (assign_irq_vector(irq, cfg, mask))
1509 return; 1562 return;
1510 1563
1511 cpus_and(mask, cfg->domain, mask); 1564 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
1512 1565
1513 apic_printk(APIC_VERBOSE,KERN_DEBUG 1566 apic_printk(APIC_VERBOSE,KERN_DEBUG
1514 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1567 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1518,8 +1571,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1518 1571
1519 1572
1520 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1573 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
1521 cpu_mask_to_apicid(mask), trigger, polarity, 1574 dest, trigger, polarity, cfg->vector)) {
1522 cfg->vector)) {
1523 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1575 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1524 mp_ioapics[apic].mp_apicid, pin); 1576 mp_ioapics[apic].mp_apicid, pin);
1525 __clear_irq_vector(irq, cfg); 1577 __clear_irq_vector(irq, cfg);
@@ -2241,7 +2293,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2241 unsigned long flags; 2293 unsigned long flags;
2242 2294
2243 spin_lock_irqsave(&vector_lock, flags); 2295 spin_lock_irqsave(&vector_lock, flags);
2244 send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); 2296 send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2245 spin_unlock_irqrestore(&vector_lock, flags); 2297 spin_unlock_irqrestore(&vector_lock, flags);
2246 2298
2247 return 1; 2299 return 1;
@@ -2290,18 +2342,17 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2290 * as simple as edge triggered migration and we can do the irq migration 2342 * as simple as edge triggered migration and we can do the irq migration
2291 * with a simple atomic update to IO-APIC RTE. 2343 * with a simple atomic update to IO-APIC RTE.
2292 */ 2344 */
2293static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) 2345static void
2346migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2294{ 2347{
2295 struct irq_cfg *cfg; 2348 struct irq_cfg *cfg;
2296 cpumask_t tmp, cleanup_mask;
2297 struct irte irte; 2349 struct irte irte;
2298 int modify_ioapic_rte; 2350 int modify_ioapic_rte;
2299 unsigned int dest; 2351 unsigned int dest;
2300 unsigned long flags; 2352 unsigned long flags;
2301 unsigned int irq; 2353 unsigned int irq;
2302 2354
2303 cpus_and(tmp, mask, cpu_online_map); 2355 if (!cpumask_intersects(mask, cpu_online_mask))
2304 if (cpus_empty(tmp))
2305 return; 2356 return;
2306 2357
2307 irq = desc->irq; 2358 irq = desc->irq;
@@ -2314,8 +2365,7 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
2314 2365
2315 set_extra_move_desc(desc, mask); 2366 set_extra_move_desc(desc, mask);
2316 2367
2317 cpus_and(tmp, cfg->domain, mask); 2368 dest = cpu_mask_to_apicid_and(cfg->domain, mask);
2318 dest = cpu_mask_to_apicid(tmp);
2319 2369
2320 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2370 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2321 if (modify_ioapic_rte) { 2371 if (modify_ioapic_rte) {
@@ -2332,14 +2382,10 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
2332 */ 2382 */
2333 modify_irte(irq, &irte); 2383 modify_irte(irq, &irte);
2334 2384
2335 if (cfg->move_in_progress) { 2385 if (cfg->move_in_progress)
2336 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2386 send_cleanup_vector(cfg);
2337 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2338 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2339 cfg->move_in_progress = 0;
2340 }
2341 2387
2342 desc->affinity = mask; 2388 cpumask_copy(&desc->affinity, mask);
2343} 2389}
2344 2390
2345static int migrate_irq_remapped_level_desc(struct irq_desc *desc) 2391static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2361,11 +2407,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2361 } 2407 }
2362 2408
2363 /* everthing is clear. we have right of way */ 2409 /* everthing is clear. we have right of way */
2364 migrate_ioapic_irq_desc(desc, desc->pending_mask); 2410 migrate_ioapic_irq_desc(desc, &desc->pending_mask);
2365 2411
2366 ret = 0; 2412 ret = 0;
2367 desc->status &= ~IRQ_MOVE_PENDING; 2413 desc->status &= ~IRQ_MOVE_PENDING;
2368 cpus_clear(desc->pending_mask); 2414 cpumask_clear(&desc->pending_mask);
2369 2415
2370unmask: 2416unmask:
2371 unmask_IO_APIC_irq_desc(desc); 2417 unmask_IO_APIC_irq_desc(desc);
@@ -2402,11 +2448,12 @@ static void ir_irq_migration(struct work_struct *work)
2402/* 2448/*
2403 * Migrates the IRQ destination in the process context. 2449 * Migrates the IRQ destination in the process context.
2404 */ 2450 */
2405static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) 2451static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2452 const struct cpumask *mask)
2406{ 2453{
2407 if (desc->status & IRQ_LEVEL) { 2454 if (desc->status & IRQ_LEVEL) {
2408 desc->status |= IRQ_MOVE_PENDING; 2455 desc->status |= IRQ_MOVE_PENDING;
2409 desc->pending_mask = mask; 2456 cpumask_copy(&desc->pending_mask, mask);
2410 migrate_irq_remapped_level_desc(desc); 2457 migrate_irq_remapped_level_desc(desc);
2411 return; 2458 return;
2412 } 2459 }
@@ -2418,7 +2465,7 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq,
2418{ 2465{
2419 struct irq_desc *desc = irq_to_desc(irq); 2466 struct irq_desc *desc = irq_to_desc(irq);
2420 2467
2421 set_ir_ioapic_affinity_irq_desc(desc, *mask); 2468 set_ir_ioapic_affinity_irq_desc(desc, mask);
2422} 2469}
2423#endif 2470#endif
2424 2471
@@ -2449,7 +2496,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2449 if (!cfg->move_cleanup_count) 2496 if (!cfg->move_cleanup_count)
2450 goto unlock; 2497 goto unlock;
2451 2498
2452 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) 2499 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2453 goto unlock; 2500 goto unlock;
2454 2501
2455 __get_cpu_var(vector_irq)[vector] = -1; 2502 __get_cpu_var(vector_irq)[vector] = -1;
@@ -2486,20 +2533,14 @@ static void irq_complete_move(struct irq_desc **descp)
2486 2533
2487 vector = ~get_irq_regs()->orig_ax; 2534 vector = ~get_irq_regs()->orig_ax;
2488 me = smp_processor_id(); 2535 me = smp_processor_id();
2489 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
2490 cpumask_t cleanup_mask;
2491
2492#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC 2536#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2493 *descp = desc = move_irq_desc(desc, me); 2537 *descp = desc = move_irq_desc(desc, me);
2494 /* get the new one */ 2538 /* get the new one */
2495 cfg = desc->chip_data; 2539 cfg = desc->chip_data;
2496#endif 2540#endif
2497 2541
2498 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2542 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2499 cfg->move_cleanup_count = cpus_weight(cleanup_mask); 2543 send_cleanup_vector(cfg);
2500 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2501 cfg->move_in_progress = 0;
2502 }
2503} 2544}
2504#else 2545#else
2505static inline void irq_complete_move(struct irq_desc **descp) {} 2546static inline void irq_complete_move(struct irq_desc **descp) {}
@@ -3224,16 +3265,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3224 struct irq_cfg *cfg; 3265 struct irq_cfg *cfg;
3225 int err; 3266 int err;
3226 unsigned dest; 3267 unsigned dest;
3227 cpumask_t tmp;
3228 3268
3229 cfg = irq_cfg(irq); 3269 cfg = irq_cfg(irq);
3230 tmp = TARGET_CPUS; 3270 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3231 err = assign_irq_vector(irq, cfg, tmp);
3232 if (err) 3271 if (err)
3233 return err; 3272 return err;
3234 3273
3235 cpus_and(tmp, cfg->domain, tmp); 3274 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
3236 dest = cpu_mask_to_apicid(tmp);
3237 3275
3238#ifdef CONFIG_INTR_REMAP 3276#ifdef CONFIG_INTR_REMAP
3239 if (irq_remapped(irq)) { 3277 if (irq_remapped(irq)) {
@@ -3293,19 +3331,12 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3293 struct irq_cfg *cfg; 3331 struct irq_cfg *cfg;
3294 struct msi_msg msg; 3332 struct msi_msg msg;
3295 unsigned int dest; 3333 unsigned int dest;
3296 cpumask_t tmp;
3297 3334
3298 if (!cpumask_intersects(mask, cpu_online_mask)) 3335 dest = set_desc_affinity(desc, mask);
3336 if (dest == BAD_APICID)
3299 return; 3337 return;
3300 3338
3301 cfg = desc->chip_data; 3339 cfg = desc->chip_data;
3302 if (assign_irq_vector(irq, cfg, *mask))
3303 return;
3304
3305 set_extra_move_desc(desc, *mask);
3306
3307 cpumask_and(&tmp, &cfg->domain, mask);
3308 dest = cpu_mask_to_apicid(tmp);
3309 3340
3310 read_msi_msg_desc(desc, &msg); 3341 read_msi_msg_desc(desc, &msg);
3311 3342
@@ -3315,37 +3346,27 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3315 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3346 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3316 3347
3317 write_msi_msg_desc(desc, &msg); 3348 write_msi_msg_desc(desc, &msg);
3318 cpumask_copy(&desc->affinity, mask);
3319} 3349}
3320#ifdef CONFIG_INTR_REMAP 3350#ifdef CONFIG_INTR_REMAP
3321/* 3351/*
3322 * Migrate the MSI irq to another cpumask. This migration is 3352 * Migrate the MSI irq to another cpumask. This migration is
3323 * done in the process context using interrupt-remapping hardware. 3353 * done in the process context using interrupt-remapping hardware.
3324 */ 3354 */
3325static void ir_set_msi_irq_affinity(unsigned int irq, 3355static void
3326 const struct cpumask *mask) 3356ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3327{ 3357{
3328 struct irq_desc *desc = irq_to_desc(irq); 3358 struct irq_desc *desc = irq_to_desc(irq);
3329 struct irq_cfg *cfg; 3359 struct irq_cfg *cfg = desc->chip_data;
3330 unsigned int dest; 3360 unsigned int dest;
3331 cpumask_t tmp, cleanup_mask;
3332 struct irte irte; 3361 struct irte irte;
3333 3362
3334 if (!cpumask_intersects(mask, cpu_online_mask))
3335 return;
3336
3337 if (get_irte(irq, &irte)) 3363 if (get_irte(irq, &irte))
3338 return; 3364 return;
3339 3365
3340 cfg = desc->chip_data; 3366 dest = set_desc_affinity(desc, mask);
3341 if (assign_irq_vector(irq, cfg, *mask)) 3367 if (dest == BAD_APICID)
3342 return; 3368 return;
3343 3369
3344 set_extra_move_desc(desc, *mask);
3345
3346 cpumask_and(&tmp, &cfg->domain, mask);
3347 dest = cpu_mask_to_apicid(tmp);
3348
3349 irte.vector = cfg->vector; 3370 irte.vector = cfg->vector;
3350 irte.dest_id = IRTE_DEST(dest); 3371 irte.dest_id = IRTE_DEST(dest);
3351 3372
@@ -3359,14 +3380,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq,
3359 * at the new destination. So, time to cleanup the previous 3380 * at the new destination. So, time to cleanup the previous
3360 * vector allocation. 3381 * vector allocation.
3361 */ 3382 */
3362 if (cfg->move_in_progress) { 3383 if (cfg->move_in_progress)
3363 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 3384 send_cleanup_vector(cfg);
3364 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
3365 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
3366 cfg->move_in_progress = 0;
3367 }
3368
3369 cpumask_copy(&desc->affinity, mask);
3370} 3385}
3371 3386
3372#endif 3387#endif
@@ -3563,19 +3578,12 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3563 struct irq_cfg *cfg; 3578 struct irq_cfg *cfg;
3564 struct msi_msg msg; 3579 struct msi_msg msg;
3565 unsigned int dest; 3580 unsigned int dest;
3566 cpumask_t tmp;
3567 3581
3568 if (!cpumask_intersects(mask, cpu_online_mask)) 3582 dest = set_desc_affinity(desc, mask);
3583 if (dest == BAD_APICID)
3569 return; 3584 return;
3570 3585
3571 cfg = desc->chip_data; 3586 cfg = desc->chip_data;
3572 if (assign_irq_vector(irq, cfg, *mask))
3573 return;
3574
3575 set_extra_move_desc(desc, *mask);
3576
3577 cpumask_and(&tmp, &cfg->domain, mask);
3578 dest = cpu_mask_to_apicid(tmp);
3579 3587
3580 dmar_msi_read(irq, &msg); 3588 dmar_msi_read(irq, &msg);
3581 3589
@@ -3585,7 +3593,6 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3585 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3593 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3586 3594
3587 dmar_msi_write(irq, &msg); 3595 dmar_msi_write(irq, &msg);
3588 cpumask_copy(&desc->affinity, mask);
3589} 3596}
3590 3597
3591#endif /* CONFIG_SMP */ 3598#endif /* CONFIG_SMP */
@@ -3625,19 +3632,12 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3625 struct irq_cfg *cfg; 3632 struct irq_cfg *cfg;
3626 struct msi_msg msg; 3633 struct msi_msg msg;
3627 unsigned int dest; 3634 unsigned int dest;
3628 cpumask_t tmp;
3629 3635
3630 if (!cpumask_intersects(mask, cpu_online_mask)) 3636 dest = set_desc_affinity(desc, mask);
3637 if (dest == BAD_APICID)
3631 return; 3638 return;
3632 3639
3633 cfg = desc->chip_data; 3640 cfg = desc->chip_data;
3634 if (assign_irq_vector(irq, cfg, *mask))
3635 return;
3636
3637 set_extra_move_desc(desc, *mask);
3638
3639 cpumask_and(&tmp, &cfg->domain, mask);
3640 dest = cpu_mask_to_apicid(tmp);
3641 3641
3642 hpet_msi_read(irq, &msg); 3642 hpet_msi_read(irq, &msg);
3643 3643
@@ -3647,7 +3647,6 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3647 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3647 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3648 3648
3649 hpet_msi_write(irq, &msg); 3649 hpet_msi_write(irq, &msg);
3650 cpumask_copy(&desc->affinity, mask);
3651} 3650}
3652 3651
3653#endif /* CONFIG_SMP */ 3652#endif /* CONFIG_SMP */
@@ -3707,22 +3706,14 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3707 struct irq_desc *desc = irq_to_desc(irq); 3706 struct irq_desc *desc = irq_to_desc(irq);
3708 struct irq_cfg *cfg; 3707 struct irq_cfg *cfg;
3709 unsigned int dest; 3708 unsigned int dest;
3710 cpumask_t tmp;
3711 3709
3712 if (!cpumask_intersects(mask, cpu_online_mask)) 3710 dest = set_desc_affinity(desc, mask);
3711 if (dest == BAD_APICID)
3713 return; 3712 return;
3714 3713
3715 cfg = desc->chip_data; 3714 cfg = desc->chip_data;
3716 if (assign_irq_vector(irq, cfg, *mask))
3717 return;
3718
3719 set_extra_move_desc(desc, *mask);
3720
3721 cpumask_and(&tmp, &cfg->domain, mask);
3722 dest = cpu_mask_to_apicid(tmp);
3723 3715
3724 target_ht_irq(irq, dest, cfg->vector); 3716 target_ht_irq(irq, dest, cfg->vector);
3725 cpumask_copy(&desc->affinity, mask);
3726} 3717}
3727 3718
3728#endif 3719#endif
@@ -3742,17 +3733,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3742{ 3733{
3743 struct irq_cfg *cfg; 3734 struct irq_cfg *cfg;
3744 int err; 3735 int err;
3745 cpumask_t tmp;
3746 3736
3747 cfg = irq_cfg(irq); 3737 cfg = irq_cfg(irq);
3748 tmp = TARGET_CPUS; 3738 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3749 err = assign_irq_vector(irq, cfg, tmp);
3750 if (!err) { 3739 if (!err) {
3751 struct ht_irq_msg msg; 3740 struct ht_irq_msg msg;
3752 unsigned dest; 3741 unsigned dest;
3753 3742
3754 cpus_and(tmp, cfg->domain, tmp); 3743 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
3755 dest = cpu_mask_to_apicid(tmp);
3756 3744
3757 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3745 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3758 3746
@@ -3788,7 +3776,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3788int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, 3776int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3789 unsigned long mmr_offset) 3777 unsigned long mmr_offset)
3790{ 3778{
3791 const cpumask_t *eligible_cpu = get_cpu_mask(cpu); 3779 const struct cpumask *eligible_cpu = cpumask_of(cpu);
3792 struct irq_cfg *cfg; 3780 struct irq_cfg *cfg;
3793 int mmr_pnode; 3781 int mmr_pnode;
3794 unsigned long mmr_value; 3782 unsigned long mmr_value;
@@ -3798,7 +3786,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3798 3786
3799 cfg = irq_cfg(irq); 3787 cfg = irq_cfg(irq);
3800 3788
3801 err = assign_irq_vector(irq, cfg, *eligible_cpu); 3789 err = assign_irq_vector(irq, cfg, eligible_cpu);
3802 if (err != 0) 3790 if (err != 0)
3803 return err; 3791 return err;
3804 3792
@@ -3817,7 +3805,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3817 entry->polarity = 0; 3805 entry->polarity = 0;
3818 entry->trigger = 0; 3806 entry->trigger = 0;
3819 entry->mask = 0; 3807 entry->mask = 0;
3820 entry->dest = cpu_mask_to_apicid(*eligible_cpu); 3808 entry->dest = cpu_mask_to_apicid(eligible_cpu);
3821 3809
3822 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3810 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3823 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3811 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -4028,7 +4016,7 @@ void __init setup_ioapic_dest(void)
4028 int pin, ioapic, irq, irq_entry; 4016 int pin, ioapic, irq, irq_entry;
4029 struct irq_desc *desc; 4017 struct irq_desc *desc;
4030 struct irq_cfg *cfg; 4018 struct irq_cfg *cfg;
4031 cpumask_t mask; 4019 const struct cpumask *mask;
4032 4020
4033 if (skip_ioapic_setup == 1) 4021 if (skip_ioapic_setup == 1)
4034 return; 4022 return;
@@ -4059,7 +4047,7 @@ void __init setup_ioapic_dest(void)
4059 */ 4047 */
4060 if (desc->status & 4048 if (desc->status &
4061 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4049 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4062 mask = desc->affinity; 4050 mask = &desc->affinity;
4063 else 4051 else
4064 mask = TARGET_CPUS; 4052 mask = TARGET_CPUS;
4065 4053
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e46f35..285bbf8831fa 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
116/* 116/*
117 * This is only used on smaller machines. 117 * This is only used on smaller machines.
118 */ 118 */
119void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) 119void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
120{ 120{
121 unsigned long mask = cpus_addr(cpumask)[0]; 121 unsigned long mask = cpumask_bits(cpumask)[0];
122 unsigned long flags; 122 unsigned long flags;
123 123
124 local_irq_save(flags); 124 local_irq_save(flags);
125 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); 125 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
126 __send_IPI_dest_field(mask, vector); 126 __send_IPI_dest_field(mask, vector);
127 local_irq_restore(flags); 127 local_irq_restore(flags);
128} 128}
129 129
130void send_IPI_mask_sequence(cpumask_t mask, int vector) 130void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
131{ 131{
132 unsigned long flags; 132 unsigned long flags;
133 unsigned int query_cpu; 133 unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
139 */ 139 */
140 140
141 local_irq_save(flags); 141 local_irq_save(flags);
142 for_each_possible_cpu(query_cpu) { 142 for_each_cpu(query_cpu, mask)
143 if (cpu_isset(query_cpu, mask)) { 143 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
144 local_irq_restore(flags);
145}
146
147void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
148{
149 unsigned long flags;
150 unsigned int query_cpu;
151 unsigned int this_cpu = smp_processor_id();
152
153 /* See Hack comment above */
154
155 local_irq_save(flags);
156 for_each_cpu(query_cpu, mask)
157 if (query_cpu != this_cpu)
144 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), 158 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
145 vector); 159 vector);
146 }
147 }
148 local_irq_restore(flags); 160 local_irq_restore(flags);
149} 161}
150 162
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3f1d9d18df67..bce53e1352a0 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -9,6 +9,7 @@
9#include <asm/apic.h> 9#include <asm/apic.h>
10#include <asm/io_apic.h> 10#include <asm/io_apic.h>
11#include <asm/smp.h> 11#include <asm/smp.h>
12#include <asm/irq.h>
12 13
13atomic_t irq_err_count; 14atomic_t irq_err_count;
14 15
@@ -190,3 +191,5 @@ u64 arch_irq_stat(void)
190#endif 191#endif
191 return sum; 192 return sum;
192} 193}
194
195EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 9cf9cbbf7a02..9dc5588f336a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,27 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
233#ifdef CONFIG_HOTPLUG_CPU 233#ifdef CONFIG_HOTPLUG_CPU
234#include <mach_apic.h> 234#include <mach_apic.h>
235 235
236void fixup_irqs(cpumask_t map) 236/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
237void fixup_irqs(void)
237{ 238{
238 unsigned int irq; 239 unsigned int irq;
239 static int warned; 240 static int warned;
240 struct irq_desc *desc; 241 struct irq_desc *desc;
241 242
242 for_each_irq_desc(irq, desc) { 243 for_each_irq_desc(irq, desc) {
243 cpumask_t mask; 244 const struct cpumask *affinity;
244 245
245 if (!desc) 246 if (!desc)
246 continue; 247 continue;
247 if (irq == 2) 248 if (irq == 2)
248 continue; 249 continue;
249 250
250 cpus_and(mask, desc->affinity, map); 251 affinity = &desc->affinity;
251 if (any_online_cpu(mask) == NR_CPUS) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
252 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
253 mask = map; 254 affinity = cpu_all_mask;
254 } 255 }
255 if (desc->chip->set_affinity) 256 if (desc->chip->set_affinity)
256 desc->chip->set_affinity(irq, &mask); 257 desc->chip->set_affinity(irq, affinity);
257 else if (desc->action && !(warned++)) 258 else if (desc->action && !(warned++))
258 printk("Cannot set affinity for irq %i\n", irq); 259 printk("Cannot set affinity for irq %i\n", irq);
259 } 260 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 54c69d47a771..6383d50f82ea 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -80,16 +80,17 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
80} 80}
81 81
82#ifdef CONFIG_HOTPLUG_CPU 82#ifdef CONFIG_HOTPLUG_CPU
83void fixup_irqs(cpumask_t map) 83/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
84void fixup_irqs(void)
84{ 85{
85 unsigned int irq; 86 unsigned int irq;
86 static int warned; 87 static int warned;
87 struct irq_desc *desc; 88 struct irq_desc *desc;
88 89
89 for_each_irq_desc(irq, desc) { 90 for_each_irq_desc(irq, desc) {
90 cpumask_t mask;
91 int break_affinity = 0; 91 int break_affinity = 0;
92 int set_affinity = 1; 92 int set_affinity = 1;
93 const struct cpumask *affinity;
93 94
94 if (!desc) 95 if (!desc)
95 continue; 96 continue;
@@ -99,23 +100,23 @@ void fixup_irqs(cpumask_t map)
99 /* interrupt's are disabled at this point */ 100 /* interrupt's are disabled at this point */
100 spin_lock(&desc->lock); 101 spin_lock(&desc->lock);
101 102
103 affinity = &desc->affinity;
102 if (!irq_has_action(irq) || 104 if (!irq_has_action(irq) ||
103 cpus_equal(desc->affinity, map)) { 105 cpumask_equal(affinity, cpu_online_mask)) {
104 spin_unlock(&desc->lock); 106 spin_unlock(&desc->lock);
105 continue; 107 continue;
106 } 108 }
107 109
108 cpus_and(mask, desc->affinity, map); 110 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
109 if (cpus_empty(mask)) {
110 break_affinity = 1; 111 break_affinity = 1;
111 mask = map; 112 affinity = cpu_all_mask;
112 } 113 }
113 114
114 if (desc->chip->mask) 115 if (desc->chip->mask)
115 desc->chip->mask(irq); 116 desc->chip->mask(irq);
116 117
117 if (desc->chip->set_affinity) 118 if (desc->chip->set_affinity)
118 desc->chip->set_affinity(irq, &mask); 119 desc->chip->set_affinity(irq, affinity);
119 else if (!(warned++)) 120 else if (!(warned++))
120 set_affinity = 0; 121 set_affinity = 0;
121 122
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 203384ed2b5d..84723295f88a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -110,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
110 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 110 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
111}; 111};
112 112
113int vector_used_by_percpu_irq(unsigned int vector)
114{
115 int cpu;
116
117 for_each_online_cpu(cpu) {
118 if (per_cpu(vector_irq, cpu)[vector] != -1)
119 return 1;
120 }
121
122 return 0;
123}
124
113/* Overridden in paravirt.c */ 125/* Overridden in paravirt.c */
114void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 126void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
115 127
@@ -146,10 +158,12 @@ void __init native_init_IRQ(void)
146 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 158 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
147 159
148 /* IPI for single call function */ 160 /* IPI for single call function */
149 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); 161 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
162 call_function_single_interrupt);
150 163
151 /* Low priority IPI to cleanup after moving an irq */ 164 /* Low priority IPI to cleanup after moving an irq */
152 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 165 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
166 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
153#endif 167#endif
154 168
155#ifdef CONFIG_X86_LOCAL_APIC 169#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 6190e6ef546c..31ebfe38e96c 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -69,6 +69,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
70}; 70};
71 71
72int vector_used_by_percpu_irq(unsigned int vector)
73{
74 int cpu;
75
76 for_each_online_cpu(cpu) {
77 if (per_cpu(vector_irq, cpu)[vector] != -1)
78 return 1;
79 }
80
81 return 0;
82}
83
72void __init init_ISA_irqs(void) 84void __init init_ISA_irqs(void)
73{ 85{
74 int i; 86 int i;
@@ -121,6 +133,7 @@ static void __init smp_intr_init(void)
121 133
122 /* Low priority IPI to cleanup after moving an irq */ 134 /* Low priority IPI to cleanup after moving an irq */
123 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 135 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
136 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
124#endif 137#endif
125} 138}
126 139
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 61f718df6eec..ba7b9a0e6063 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -592,10 +592,7 @@ static int crash_nmi_callback(struct notifier_block *self,
592 592
593static void smp_send_nmi_allbutself(void) 593static void smp_send_nmi_allbutself(void)
594{ 594{
595 cpumask_t mask = cpu_online_map; 595 send_IPI_allbutself(NMI_VECTOR);
596 cpu_clear(safe_smp_processor_id(), mask);
597 if (!cpus_empty(mask))
598 send_IPI_mask(mask, NMI_VECTOR);
599} 596}
600 597
601static struct notifier_block crash_nmi_nb = { 598static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 8e8b1193add5..49f3f709ee1f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,6 +152,11 @@ void __init setup_per_cpu_areas(void)
152 old_size = PERCPU_ENOUGH_ROOM; 152 old_size = PERCPU_ENOUGH_ROOM;
153 align = max_t(unsigned long, PAGE_SIZE, align); 153 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align); 154 size = roundup(old_size, align);
155
156 printk(KERN_INFO
157 "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
158 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
159
155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 160 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
156 size); 161 size);
157 162
@@ -168,24 +173,24 @@ void __init setup_per_cpu_areas(void)
168 "cpu %d has no node %d or node-local memory\n", 173 "cpu %d has no node %d or node-local memory\n",
169 cpu, node); 174 cpu, node);
170 if (ptr) 175 if (ptr)
171 printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", 176 printk(KERN_DEBUG
177 "per cpu data for cpu%d at %016lx\n",
172 cpu, __pa(ptr)); 178 cpu, __pa(ptr));
173 } 179 }
174 else { 180 else {
175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 181 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
176 __pa(MAX_DMA_ADDRESS)); 182 __pa(MAX_DMA_ADDRESS));
177 if (ptr) 183 if (ptr)
178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 184 printk(KERN_DEBUG
179 cpu, node, __pa(ptr)); 185 "per cpu data for cpu%d on node%d "
186 "at %016lx\n",
187 cpu, node, __pa(ptr));
180 } 188 }
181#endif 189#endif
182 per_cpu_offset(cpu) = ptr - __per_cpu_start; 190 per_cpu_offset(cpu) = ptr - __per_cpu_start;
183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 191 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
184 } 192 }
185 193
186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
187 NR_CPUS, nr_cpu_ids, nr_node_ids);
188
189 /* Setup percpu data maps */ 194 /* Setup percpu data maps */
190 setup_per_cpu_maps(); 195 setup_per_cpu_maps();
191 196
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7e558db362c1..beea2649a240 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 118 WARN_ON(1);
119 return; 119 return;
120 } 120 }
121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 121 send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 122}
123 123
124void native_send_call_func_single_ipi(int cpu) 124void native_send_call_func_single_ipi(int cpu)
125{ 125{
126 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); 126 send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
127} 127}
128 128
129void native_send_call_func_ipi(cpumask_t mask) 129void native_send_call_func_ipi(const struct cpumask *mask)
130{ 130{
131 cpumask_t allbutself; 131 cpumask_t allbutself;
132 132
133 allbutself = cpu_online_map; 133 allbutself = cpu_online_map;
134 cpu_clear(smp_processor_id(), allbutself); 134 cpu_clear(smp_processor_id(), allbutself);
135 135
136 if (cpus_equal(mask, allbutself) && 136 if (cpus_equal(*mask, allbutself) &&
137 cpus_equal(cpu_online_map, cpu_callout_map)) 137 cpus_equal(cpu_online_map, cpu_callout_map))
138 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 138 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
139 else 139 else
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c5392058cd07..9e177a4077ee 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1259,6 +1259,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1259 check_nmi_watchdog(); 1259 check_nmi_watchdog();
1260} 1260}
1261 1261
1262static int __initdata setup_possible_cpus = -1;
1263static int __init _setup_possible_cpus(char *str)
1264{
1265 get_option(&str, &setup_possible_cpus);
1266 return 0;
1267}
1268early_param("possible_cpus", _setup_possible_cpus);
1269
1270
1262/* 1271/*
1263 * cpu_possible_map should be static, it cannot change as cpu's 1272 * cpu_possible_map should be static, it cannot change as cpu's
1264 * are onlined, or offlined. The reason is per-cpu data-structures 1273 * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1271,7 +1280,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1271 * 1280 *
1272 * Three ways to find out the number of additional hotplug CPUs: 1281 * Three ways to find out the number of additional hotplug CPUs:
1273 * - If the BIOS specified disabled CPUs in ACPI/mptables use that. 1282 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
1274 * - The user can overwrite it with additional_cpus=NUM 1283 * - The user can overwrite it with possible_cpus=NUM
1275 * - Otherwise don't reserve additional CPUs. 1284 * - Otherwise don't reserve additional CPUs.
1276 * We do this because additional CPUs waste a lot of memory. 1285 * We do this because additional CPUs waste a lot of memory.
1277 * -AK 1286 * -AK
@@ -1284,9 +1293,17 @@ __init void prefill_possible_map(void)
1284 if (!num_processors) 1293 if (!num_processors)
1285 num_processors = 1; 1294 num_processors = 1;
1286 1295
1287 possible = num_processors + disabled_cpus; 1296 if (setup_possible_cpus == -1)
1288 if (possible > NR_CPUS) 1297 possible = num_processors + disabled_cpus;
1289 possible = NR_CPUS; 1298 else
1299 possible = setup_possible_cpus;
1300
1301 if (possible > CONFIG_NR_CPUS) {
1302 printk(KERN_WARNING
1303 "%d Processors exceeds NR_CPUS limit of %d\n",
1304 possible, CONFIG_NR_CPUS);
1305 possible = CONFIG_NR_CPUS;
1306 }
1290 1307
1291 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1308 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
1292 possible, max_t(int, possible - num_processors, 0)); 1309 possible, max_t(int, possible - num_processors, 0));
@@ -1351,7 +1368,7 @@ void cpu_disable_common(void)
1351 lock_vector_lock(); 1368 lock_vector_lock();
1352 remove_cpu_from_maps(cpu); 1369 remove_cpu_from_maps(cpu);
1353 unlock_vector_lock(); 1370 unlock_vector_lock();
1354 fixup_irqs(cpu_online_map); 1371 fixup_irqs();
1355} 1372}
1356 1373
1357int native_cpu_disable(void) 1374int native_cpu_disable(void)
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 8da059f949be..ce5054642247 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
163 * We have to send the IPI only to 163 * We have to send the IPI only to
164 * CPUs affected. 164 * CPUs affected.
165 */ 165 */
166 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); 166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
167 167
168 while (!cpus_empty(flush_cpumask)) 168 while (!cpus_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */ 169 /* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 29887d7081a9..f8be6f1d2e48 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 191 * We have to send the IPI only to
192 * CPUs affected. 192 * CPUs affected.
193 */ 193 */
194 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); 194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
195 195
196 while (!cpus_empty(f->flush_cpumask)) 196 while (!cpus_empty(f->flush_cpumask))
197 cpu_relax(); 197 cpu_relax();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 141907ab6e22..2d1f4c7e4052 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,9 +72,6 @@
72 72
73#include "cpu/mcheck/mce.h" 73#include "cpu/mcheck/mce.h"
74 74
75DECLARE_BITMAP(used_vectors, NR_VECTORS);
76EXPORT_SYMBOL_GPL(used_vectors);
77
78asmlinkage int system_call(void); 75asmlinkage int system_call(void);
79 76
80/* Do we ignore FPU interrupts ? */ 77/* Do we ignore FPU interrupts ? */
@@ -89,6 +86,9 @@ gate_desc idt_table[256]
89 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 86 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
90#endif 87#endif
91 88
89DECLARE_BITMAP(used_vectors, NR_VECTORS);
90EXPORT_SYMBOL_GPL(used_vectors);
91
92static int ignore_nmis; 92static int ignore_nmis;
93 93
94static inline void conditional_sti(struct pt_regs *regs) 94static inline void conditional_sti(struct pt_regs *regs)
@@ -941,9 +941,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
941 941
942void __init trap_init(void) 942void __init trap_init(void)
943{ 943{
944#ifdef CONFIG_X86_32
945 int i; 944 int i;
946#endif
947 945
948#ifdef CONFIG_EISA 946#ifdef CONFIG_EISA
949 void __iomem *p = early_ioremap(0x0FFFD9, 4); 947 void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1000,11 +998,15 @@ void __init trap_init(void)
1000 } 998 }
1001 999
1002 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 1000 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
1001#endif
1003 1002
1004 /* Reserve all the builtin and the syscall vector: */ 1003 /* Reserve all the builtin and the syscall vector: */
1005 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 1004 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
1006 set_bit(i, used_vectors); 1005 set_bit(i, used_vectors);
1007 1006
1007#ifdef CONFIG_X86_64
1008 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
1009#else
1008 set_bit(SYSCALL_VECTOR, used_vectors); 1010 set_bit(SYSCALL_VECTOR, used_vectors);
1009#endif 1011#endif
1010 /* 1012 /*
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3624a364b7f3..bc4c7840b2a8 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -42,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
42 { } 42 { }
43}; 43};
44 44
45static cpumask_t vector_allocation_domain(int cpu) 45static void vector_allocation_domain(int cpu, cpumask_t *retmask)
46{ 46{
47 return cpumask_of_cpu(cpu); 47 cpus_clear(*retmask);
48 cpu_set(cpu, *retmask);
48} 49}
49 50
50static int probe_bigsmp(void) 51static int probe_bigsmp(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 7b4e6d0d1690..4ba5ccaa1584 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
87} 87}
88#endif 88#endif
89 89
90static cpumask_t vector_allocation_domain(int cpu) 90static void vector_allocation_domain(int cpu, cpumask_t *retmask)
91{ 91{
92 /* Careful. Some cpus do not strictly honor the set of cpus 92 /* Careful. Some cpus do not strictly honor the set of cpus
93 * specified in the interrupt destination when using lowest 93 * specified in the interrupt destination when using lowest
@@ -97,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu)
97 * deliver interrupts to the wrong hyperthread when only one 97 * deliver interrupts to the wrong hyperthread when only one
98 * hyperthread was specified in the interrupt desitination. 98 * hyperthread was specified in the interrupt desitination.
99 */ 99 */
100 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 100 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
101 return domain;
102} 101}
103 102
104struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); 103struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 71a309b122e6..511d7941364f 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
38 return 0; 38 return 0;
39} 39}
40 40
41static cpumask_t vector_allocation_domain(int cpu) 41static void vector_allocation_domain(int cpu, cpumask_t *retmask)
42{ 42{
43 /* Careful. Some cpus do not strictly honor the set of cpus 43 /* Careful. Some cpus do not strictly honor the set of cpus
44 * specified in the interrupt destination when using lowest 44 * specified in the interrupt destination when using lowest
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu)
48 * deliver interrupts to the wrong hyperthread when only one 48 * deliver interrupts to the wrong hyperthread when only one
49 * hyperthread was specified in the interrupt desitination. 49 * hyperthread was specified in the interrupt desitination.
50 */ 50 */
51 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 51 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
52 return domain;
53} 52}
54 53
55struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); 54struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 2c6d234e0009..2821ffc188b5 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -24,7 +24,7 @@ static int probe_summit(void)
24 return 0; 24 return 0;
25} 25}
26 26
27static cpumask_t vector_allocation_domain(int cpu) 27static void vector_allocation_domain(int cpu, cpumask_t *retmask)
28{ 28{
29 /* Careful. Some cpus do not strictly honor the set of cpus 29 /* Careful. Some cpus do not strictly honor the set of cpus
30 * specified in the interrupt destination when using lowest 30 * specified in the interrupt destination when using lowest
@@ -34,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu)
34 * deliver interrupts to the wrong hyperthread when only one 34 * deliver interrupts to the wrong hyperthread when only one
35 * hyperthread was specified in the interrupt desitination. 35 * hyperthread was specified in the interrupt desitination.
36 */ 36 */
37 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 37 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
38 return domain;
39} 38}
40 39
41struct genapic apic_summit = APIC_INIT("summit", probe_summit); 40struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 9c990185e9f2..a5bc05492b1e 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -672,7 +672,7 @@ void __init smp_boot_cpus(void)
672 672
673 /* loop over all the extended VIC CPUs and boot them. The 673 /* loop over all the extended VIC CPUs and boot them. The
674 * Quad CPUs must be bootstrapped by their extended VIC cpu */ 674 * Quad CPUs must be bootstrapped by their extended VIC cpu */
675 for (i = 0; i < NR_CPUS; i++) { 675 for (i = 0; i < nr_cpu_ids; i++) {
676 if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) 676 if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
677 continue; 677 continue;
678 do_boot_cpu(i); 678 do_boot_cpu(i);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf152d46..71a14f89f89e 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
278 int rr, i; 278 int rr, i;
279 279
280 rr = first_node(node_online_map); 280 rr = first_node(node_online_map);
281 for (i = 0; i < NR_CPUS; i++) { 281 for (i = 0; i < nr_cpu_ids; i++) {
282 if (early_cpu_to_node(i) != NUMA_NO_NODE) 282 if (early_cpu_to_node(i) != NUMA_NO_NODE)
283 continue; 283 continue;
284 numa_set_node(i, rr); 284 numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
549 memnodemap[0] = 0; 549 memnodemap[0] = 0;
550 node_set_online(0); 550 node_set_online(0);
551 node_set(0, node_possible_map); 551 node_set(0, node_possible_map);
552 for (i = 0; i < NR_CPUS; i++) 552 for (i = 0; i < nr_cpu_ids; i++)
553 numa_set_node(i, 0); 553 numa_set_node(i, 0);
554 e820_register_active_regions(0, start_pfn, last_pfn); 554 e820_register_active_regions(0, start_pfn, last_pfn);
555 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); 555 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2fc14fe..09737c8af074 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
382 if (!node_online(i)) 382 if (!node_online(i))
383 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 383 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
384 384
385 for (i = 0; i < NR_CPUS; i++) { 385 for (i = 0; i < nr_cpu_ids; i++) {
386 int node = early_cpu_to_node(i); 386 int node = early_cpu_to_node(i);
387 387
388 if (node == NUMA_NO_NODE) 388 if (node == NUMA_NO_NODE)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 773d68d3e912..503c240e26c7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1082,7 +1082,7 @@ static void drop_other_mm_ref(void *info)
1082 1082
1083static void xen_drop_mm_ref(struct mm_struct *mm) 1083static void xen_drop_mm_ref(struct mm_struct *mm)
1084{ 1084{
1085 cpumask_t mask; 1085 cpumask_var_t mask;
1086 unsigned cpu; 1086 unsigned cpu;
1087 1087
1088 if (current->active_mm == mm) { 1088 if (current->active_mm == mm) {
@@ -1094,7 +1094,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1094 } 1094 }
1095 1095
1096 /* Get the "official" set of cpus referring to our pagetable. */ 1096 /* Get the "official" set of cpus referring to our pagetable. */
1097 mask = mm->cpu_vm_mask; 1097 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
1098 for_each_online_cpu(cpu) {
1099 if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
1100 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
1101 continue;
1102 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
1103 }
1104 return;
1105 }
1106 cpumask_copy(mask, &mm->cpu_vm_mask);
1098 1107
1099 /* It's possible that a vcpu may have a stale reference to our 1108 /* It's possible that a vcpu may have a stale reference to our
1100 cr3, because its in lazy mode, and it hasn't yet flushed 1109 cr3, because its in lazy mode, and it hasn't yet flushed
@@ -1103,11 +1112,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1103 if needed. */ 1112 if needed. */
1104 for_each_online_cpu(cpu) { 1113 for_each_online_cpu(cpu) {
1105 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) 1114 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
1106 cpu_set(cpu, mask); 1115 cpumask_set_cpu(cpu, mask);
1107 } 1116 }
1108 1117
1109 if (!cpus_empty(mask)) 1118 if (!cpumask_empty(mask))
1110 smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); 1119 smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
1120 free_cpumask_var(mask);
1111} 1121}
1112#else 1122#else
1113static void xen_drop_mm_ref(struct mm_struct *mm) 1123static void xen_drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index acd9b6705e02..c44e2069c7c7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -33,7 +33,7 @@
33#include "xen-ops.h" 33#include "xen-ops.h"
34#include "mmu.h" 34#include "mmu.h"
35 35
36cpumask_t xen_cpu_initialized_map; 36cpumask_var_t xen_cpu_initialized_map;
37 37
38static DEFINE_PER_CPU(int, resched_irq); 38static DEFINE_PER_CPU(int, resched_irq);
39static DEFINE_PER_CPU(int, callfunc_irq); 39static DEFINE_PER_CPU(int, callfunc_irq);
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void)
158{ 158{
159 int i, rc; 159 int i, rc;
160 160
161 for (i = 0; i < NR_CPUS; i++) { 161 for (i = 0; i < nr_cpu_ids; i++) {
162 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); 162 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
163 if (rc >= 0) { 163 if (rc >= 0) {
164 num_processors++; 164 num_processors++;
@@ -192,11 +192,14 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
192 if (xen_smp_intr_init(0)) 192 if (xen_smp_intr_init(0))
193 BUG(); 193 BUG();
194 194
195 xen_cpu_initialized_map = cpumask_of_cpu(0); 195 if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
196 panic("could not allocate xen_cpu_initialized_map\n");
197
198 cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
196 199
197 /* Restrict the possible_map according to max_cpus. */ 200 /* Restrict the possible_map according to max_cpus. */
198 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { 201 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
199 for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) 202 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
200 continue; 203 continue;
201 cpu_clear(cpu, cpu_possible_map); 204 cpu_clear(cpu, cpu_possible_map);
202 } 205 }
@@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
221 struct vcpu_guest_context *ctxt; 224 struct vcpu_guest_context *ctxt;
222 struct desc_struct *gdt; 225 struct desc_struct *gdt;
223 226
224 if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) 227 if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
225 return 0; 228 return 0;
226 229
227 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 230 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
@@ -408,24 +411,23 @@ static void xen_smp_send_reschedule(int cpu)
408 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); 411 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
409} 412}
410 413
411static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) 414static void xen_send_IPI_mask(const struct cpumask *mask,
415 enum ipi_vector vector)
412{ 416{
413 unsigned cpu; 417 unsigned cpu;
414 418
415 cpus_and(mask, mask, cpu_online_map); 419 for_each_cpu_and(cpu, mask, cpu_online_mask)
416
417 for_each_cpu_mask_nr(cpu, mask)
418 xen_send_IPI_one(cpu, vector); 420 xen_send_IPI_one(cpu, vector);
419} 421}
420 422
421static void xen_smp_send_call_function_ipi(cpumask_t mask) 423static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
422{ 424{
423 int cpu; 425 int cpu;
424 426
425 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); 427 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
426 428
427 /* Make sure other vcpus get a chance to run if they need to. */ 429 /* Make sure other vcpus get a chance to run if they need to. */
428 for_each_cpu_mask_nr(cpu, mask) { 430 for_each_cpu(cpu, mask) {
429 if (xen_vcpu_stolen(cpu)) { 431 if (xen_vcpu_stolen(cpu)) {
430 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 432 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
431 break; 433 break;
@@ -435,7 +437,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask)
435 437
436static void xen_smp_send_call_function_single_ipi(int cpu) 438static void xen_smp_send_call_function_single_ipi(int cpu)
437{ 439{
438 xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); 440 xen_send_IPI_mask(cpumask_of(cpu),
441 XEN_CALL_FUNCTION_SINGLE_VECTOR);
439} 442}
440 443
441static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) 444static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 2a234db5949b..212ffe012b76 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled)
35 pfn_to_mfn(xen_start_info->console.domU.mfn); 35 pfn_to_mfn(xen_start_info->console.domU.mfn);
36 } else { 36 } else {
37#ifdef CONFIG_SMP 37#ifdef CONFIG_SMP
38 xen_cpu_initialized_map = cpu_online_map; 38 BUG_ON(xen_cpu_initialized_map == NULL);
39 cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
39#endif 40#endif
40 xen_vcpu_restore(); 41 xen_vcpu_restore();
41 } 42 }
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9e1afae8461f..c1f8faf0a2c5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void);
58__cpuinit void xen_init_lock_cpu(int cpu); 58__cpuinit void xen_init_lock_cpu(int cpu);
59void xen_uninit_lock_cpu(int cpu); 59void xen_uninit_lock_cpu(int cpu);
60 60
61extern cpumask_t xen_cpu_initialized_map; 61extern cpumask_var_t xen_cpu_initialized_map;
62#else 62#else
63static inline void xen_smp_init(void) {} 63static inline void xen_smp_init(void) {}
64#endif 64#endif
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a1039068f95c..415fab0125ac 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -222,11 +222,16 @@ bool check_syscall_vector(struct lguest *lg)
222int init_interrupts(void) 222int init_interrupts(void)
223{ 223{
224 /* If they want some strange system call vector, reserve it now */ 224 /* If they want some strange system call vector, reserve it now */
225 if (syscall_vector != SYSCALL_VECTOR 225 if (syscall_vector != SYSCALL_VECTOR) {
226 && test_and_set_bit(syscall_vector, used_vectors)) { 226 if (test_bit(syscall_vector, used_vectors) ||
227 printk("lg: couldn't reserve syscall %u\n", syscall_vector); 227 vector_used_by_percpu_irq(syscall_vector)) {
228 return -EBUSY; 228 printk(KERN_ERR "lg: couldn't reserve syscall %u\n",
229 syscall_vector);
230 return -EBUSY;
231 }
232 set_bit(syscall_vector, used_vectors);
229 } 233 }
234
230 return 0; 235 return 0;
231} 236}
232 237
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8395e715809d..158d53d07765 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -250,7 +250,7 @@ extern void init_idle_bootup_task(struct task_struct *idle);
250extern int runqueue_is_locked(void); 250extern int runqueue_is_locked(void);
251extern void task_rq_unlock_wait(struct task_struct *p); 251extern void task_rq_unlock_wait(struct task_struct *p);
252 252
253extern cpumask_t nohz_cpu_mask; 253extern cpumask_var_t nohz_cpu_mask;
254#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 254#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
255extern int select_nohz_load_balancer(int cpu); 255extern int select_nohz_load_balancer(int cpu);
256#else 256#else
@@ -758,20 +758,51 @@ enum cpu_idle_type {
758#define SD_SERIALIZE 1024 /* Only a single load balancing instance */ 758#define SD_SERIALIZE 1024 /* Only a single load balancing instance */
759#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ 759#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */
760 760
761#define BALANCE_FOR_MC_POWER \ 761enum powersavings_balance_level {
762 (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) 762 POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
763 POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package
764 * first for long running threads
765 */
766 POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle
767 * cpu package for power savings
768 */
769 MAX_POWERSAVINGS_BALANCE_LEVELS
770};
763 771
764#define BALANCE_FOR_PKG_POWER \ 772extern int sched_mc_power_savings, sched_smt_power_savings;
765 ((sched_mc_power_savings || sched_smt_power_savings) ? \
766 SD_POWERSAVINGS_BALANCE : 0)
767 773
768#define test_sd_parent(sd, flag) ((sd->parent && \ 774static inline int sd_balance_for_mc_power(void)
769 (sd->parent->flags & flag)) ? 1 : 0) 775{
776 if (sched_smt_power_savings)
777 return SD_POWERSAVINGS_BALANCE;
770 778
779 return 0;
780}
781
782static inline int sd_balance_for_package_power(void)
783{
784 if (sched_mc_power_savings | sched_smt_power_savings)
785 return SD_POWERSAVINGS_BALANCE;
786
787 return 0;
788}
789
790/*
791 * Optimise SD flags for power savings:
792 * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
793 * Keep default SD flags if sched_{smt,mc}_power_saving=0
794 */
795
796static inline int sd_power_saving_flags(void)
797{
798 if (sched_mc_power_savings | sched_smt_power_savings)
799 return SD_BALANCE_NEWIDLE;
800
801 return 0;
802}
771 803
772struct sched_group { 804struct sched_group {
773 struct sched_group *next; /* Must be a circular list */ 805 struct sched_group *next; /* Must be a circular list */
774 cpumask_t cpumask;
775 806
776 /* 807 /*
777 * CPU power of this group, SCHED_LOAD_SCALE being max power for a 808 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -784,8 +815,15 @@ struct sched_group {
784 * (see include/linux/reciprocal_div.h) 815 * (see include/linux/reciprocal_div.h)
785 */ 816 */
786 u32 reciprocal_cpu_power; 817 u32 reciprocal_cpu_power;
818
819 unsigned long cpumask[];
787}; 820};
788 821
822static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
823{
824 return to_cpumask(sg->cpumask);
825}
826
789enum sched_domain_level { 827enum sched_domain_level {
790 SD_LV_NONE = 0, 828 SD_LV_NONE = 0,
791 SD_LV_SIBLING, 829 SD_LV_SIBLING,
@@ -809,7 +847,6 @@ struct sched_domain {
809 struct sched_domain *parent; /* top domain must be null terminated */ 847 struct sched_domain *parent; /* top domain must be null terminated */
810 struct sched_domain *child; /* bottom domain must be null terminated */ 848 struct sched_domain *child; /* bottom domain must be null terminated */
811 struct sched_group *groups; /* the balancing groups of the domain */ 849 struct sched_group *groups; /* the balancing groups of the domain */
812 cpumask_t span; /* span of all CPUs in this domain */
813 unsigned long min_interval; /* Minimum balance interval ms */ 850 unsigned long min_interval; /* Minimum balance interval ms */
814 unsigned long max_interval; /* Maximum balance interval ms */ 851 unsigned long max_interval; /* Maximum balance interval ms */
815 unsigned int busy_factor; /* less balancing by factor if busy */ 852 unsigned int busy_factor; /* less balancing by factor if busy */
@@ -864,18 +901,35 @@ struct sched_domain {
864#ifdef CONFIG_SCHED_DEBUG 901#ifdef CONFIG_SCHED_DEBUG
865 char *name; 902 char *name;
866#endif 903#endif
904
905 /* span of all CPUs in this domain */
906 unsigned long span[];
867}; 907};
868 908
869extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, 909static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
910{
911 return to_cpumask(sd->span);
912}
913
914extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
870 struct sched_domain_attr *dattr_new); 915 struct sched_domain_attr *dattr_new);
871extern int arch_reinit_sched_domains(void); 916extern int arch_reinit_sched_domains(void);
872 917
918/* Test a flag in parent sched domain */
919static inline int test_sd_parent(struct sched_domain *sd, int flag)
920{
921 if (sd->parent && (sd->parent->flags & flag))
922 return 1;
923
924 return 0;
925}
926
873#else /* CONFIG_SMP */ 927#else /* CONFIG_SMP */
874 928
875struct sched_domain_attr; 929struct sched_domain_attr;
876 930
877static inline void 931static inline void
878partition_sched_domains(int ndoms_new, cpumask_t *doms_new, 932partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
879 struct sched_domain_attr *dattr_new) 933 struct sched_domain_attr *dattr_new)
880{ 934{
881} 935}
@@ -926,7 +980,7 @@ struct sched_class {
926 void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); 980 void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
927 981
928 void (*set_cpus_allowed)(struct task_struct *p, 982 void (*set_cpus_allowed)(struct task_struct *p,
929 const cpumask_t *newmask); 983 const struct cpumask *newmask);
930 984
931 void (*rq_online)(struct rq *rq); 985 void (*rq_online)(struct rq *rq);
932 void (*rq_offline)(struct rq *rq); 986 void (*rq_offline)(struct rq *rq);
@@ -1579,12 +1633,12 @@ extern cputime_t task_gtime(struct task_struct *p);
1579 1633
1580#ifdef CONFIG_SMP 1634#ifdef CONFIG_SMP
1581extern int set_cpus_allowed_ptr(struct task_struct *p, 1635extern int set_cpus_allowed_ptr(struct task_struct *p,
1582 const cpumask_t *new_mask); 1636 const struct cpumask *new_mask);
1583#else 1637#else
1584static inline int set_cpus_allowed_ptr(struct task_struct *p, 1638static inline int set_cpus_allowed_ptr(struct task_struct *p,
1585 const cpumask_t *new_mask) 1639 const struct cpumask *new_mask)
1586{ 1640{
1587 if (!cpu_isset(0, *new_mask)) 1641 if (!cpumask_test_cpu(0, new_mask))
1588 return -EINVAL; 1642 return -EINVAL;
1589 return 0; 1643 return 0;
1590} 1644}
@@ -2195,10 +2249,8 @@ __trace_special(void *__tr, void *__data,
2195} 2249}
2196#endif 2250#endif
2197 2251
2198extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); 2252extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
2199extern long sched_getaffinity(pid_t pid, cpumask_t *mask); 2253extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2200
2201extern int sched_mc_power_savings, sched_smt_power_savings;
2202 2254
2203extern void normalize_rt_tasks(void); 2255extern void normalize_rt_tasks(void);
2204 2256
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 0c5b5ac36d8e..e632d29f0544 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -125,7 +125,8 @@ int arch_update_cpu_topology(void);
125 | SD_WAKE_AFFINE \ 125 | SD_WAKE_AFFINE \
126 | SD_WAKE_BALANCE \ 126 | SD_WAKE_BALANCE \
127 | SD_SHARE_PKG_RESOURCES\ 127 | SD_SHARE_PKG_RESOURCES\
128 | BALANCE_FOR_MC_POWER, \ 128 | sd_balance_for_mc_power()\
129 | sd_power_saving_flags(),\
129 .last_balance = jiffies, \ 130 .last_balance = jiffies, \
130 .balance_interval = 1, \ 131 .balance_interval = 1, \
131} 132}
@@ -150,7 +151,8 @@ int arch_update_cpu_topology(void);
150 | SD_BALANCE_FORK \ 151 | SD_BALANCE_FORK \
151 | SD_WAKE_AFFINE \ 152 | SD_WAKE_AFFINE \
152 | SD_WAKE_BALANCE \ 153 | SD_WAKE_BALANCE \
153 | BALANCE_FOR_PKG_POWER,\ 154 | sd_balance_for_package_power()\
155 | sd_power_saving_flags(),\
154 .last_balance = jiffies, \ 156 .last_balance = jiffies, \
155 .balance_interval = 1, \ 157 .balance_interval = 1, \
156} 158}
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 0ff9b05706a6..6ec495f60ead 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -394,8 +394,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
394 * unnecessarily. 394 * unnecessarily.
395 */ 395 */
396 smp_mb(); 396 smp_mb();
397 cpumask_andnot(to_cpumask(rcp->cpumask), 397 cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask);
398 cpu_online_mask, &nohz_cpu_mask);
399 398
400 rcp->signaled = 0; 399 rcp->signaled = 0;
401 } 400 }
diff --git a/kernel/sched.c b/kernel/sched.c
index f2095660efec..27ba1d642f0f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -498,18 +498,26 @@ struct rt_rq {
498 */ 498 */
499struct root_domain { 499struct root_domain {
500 atomic_t refcount; 500 atomic_t refcount;
501 cpumask_t span; 501 cpumask_var_t span;
502 cpumask_t online; 502 cpumask_var_t online;
503 503
504 /* 504 /*
505 * The "RT overload" flag: it gets set if a CPU has more than 505 * The "RT overload" flag: it gets set if a CPU has more than
506 * one runnable RT task. 506 * one runnable RT task.
507 */ 507 */
508 cpumask_t rto_mask; 508 cpumask_var_t rto_mask;
509 atomic_t rto_count; 509 atomic_t rto_count;
510#ifdef CONFIG_SMP 510#ifdef CONFIG_SMP
511 struct cpupri cpupri; 511 struct cpupri cpupri;
512#endif 512#endif
513#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
514 /*
515 * Preferred wake up cpu nominated by sched_mc balance that will be
516 * used when most cpus are idle in the system indicating overall very
517 * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
518 */
519 unsigned int sched_mc_preferred_wakeup_cpu;
520#endif
513}; 521};
514 522
515/* 523/*
@@ -1514,7 +1522,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1514 struct sched_domain *sd = data; 1522 struct sched_domain *sd = data;
1515 int i; 1523 int i;
1516 1524
1517 for_each_cpu_mask(i, sd->span) { 1525 for_each_cpu(i, sched_domain_span(sd)) {
1518 /* 1526 /*
1519 * If there are currently no tasks on the cpu pretend there 1527 * If there are currently no tasks on the cpu pretend there
1520 * is one of average load so that when a new task gets to 1528 * is one of average load so that when a new task gets to
@@ -1535,7 +1543,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1535 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) 1543 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
1536 shares = tg->shares; 1544 shares = tg->shares;
1537 1545
1538 for_each_cpu_mask(i, sd->span) 1546 for_each_cpu(i, sched_domain_span(sd))
1539 update_group_shares_cpu(tg, i, shares, rq_weight); 1547 update_group_shares_cpu(tg, i, shares, rq_weight);
1540 1548
1541 return 0; 1549 return 0;
@@ -2101,15 +2109,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
2101 int i; 2109 int i;
2102 2110
2103 /* Skip over this group if it has no CPUs allowed */ 2111 /* Skip over this group if it has no CPUs allowed */
2104 if (!cpus_intersects(group->cpumask, p->cpus_allowed)) 2112 if (!cpumask_intersects(sched_group_cpus(group),
2113 &p->cpus_allowed))
2105 continue; 2114 continue;
2106 2115
2107 local_group = cpu_isset(this_cpu, group->cpumask); 2116 local_group = cpumask_test_cpu(this_cpu,
2117 sched_group_cpus(group));
2108 2118
2109 /* Tally up the load of all CPUs in the group */ 2119 /* Tally up the load of all CPUs in the group */
2110 avg_load = 0; 2120 avg_load = 0;
2111 2121
2112 for_each_cpu_mask_nr(i, group->cpumask) { 2122 for_each_cpu(i, sched_group_cpus(group)) {
2113 /* Bias balancing toward cpus of our domain */ 2123 /* Bias balancing toward cpus of our domain */
2114 if (local_group) 2124 if (local_group)
2115 load = source_load(i, load_idx); 2125 load = source_load(i, load_idx);
@@ -2141,17 +2151,14 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
2141 * find_idlest_cpu - find the idlest cpu among the cpus in group. 2151 * find_idlest_cpu - find the idlest cpu among the cpus in group.
2142 */ 2152 */
2143static int 2153static int
2144find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, 2154find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
2145 cpumask_t *tmp)
2146{ 2155{
2147 unsigned long load, min_load = ULONG_MAX; 2156 unsigned long load, min_load = ULONG_MAX;
2148 int idlest = -1; 2157 int idlest = -1;
2149 int i; 2158 int i;
2150 2159
2151 /* Traverse only the allowed CPUs */ 2160 /* Traverse only the allowed CPUs */
2152 cpus_and(*tmp, group->cpumask, p->cpus_allowed); 2161 for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
2153
2154 for_each_cpu_mask_nr(i, *tmp) {
2155 load = weighted_cpuload(i); 2162 load = weighted_cpuload(i);
2156 2163
2157 if (load < min_load || (load == min_load && i == this_cpu)) { 2164 if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2193,7 +2200,6 @@ static int sched_balance_self(int cpu, int flag)
2193 update_shares(sd); 2200 update_shares(sd);
2194 2201
2195 while (sd) { 2202 while (sd) {
2196 cpumask_t span, tmpmask;
2197 struct sched_group *group; 2203 struct sched_group *group;
2198 int new_cpu, weight; 2204 int new_cpu, weight;
2199 2205
@@ -2202,14 +2208,13 @@ static int sched_balance_self(int cpu, int flag)
2202 continue; 2208 continue;
2203 } 2209 }
2204 2210
2205 span = sd->span;
2206 group = find_idlest_group(sd, t, cpu); 2211 group = find_idlest_group(sd, t, cpu);
2207 if (!group) { 2212 if (!group) {
2208 sd = sd->child; 2213 sd = sd->child;
2209 continue; 2214 continue;
2210 } 2215 }
2211 2216
2212 new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask); 2217 new_cpu = find_idlest_cpu(group, t, cpu);
2213 if (new_cpu == -1 || new_cpu == cpu) { 2218 if (new_cpu == -1 || new_cpu == cpu) {
2214 /* Now try balancing at a lower domain level of cpu */ 2219 /* Now try balancing at a lower domain level of cpu */
2215 sd = sd->child; 2220 sd = sd->child;
@@ -2218,10 +2223,10 @@ static int sched_balance_self(int cpu, int flag)
2218 2223
2219 /* Now try balancing at a lower domain level of new_cpu */ 2224 /* Now try balancing at a lower domain level of new_cpu */
2220 cpu = new_cpu; 2225 cpu = new_cpu;
2226 weight = cpumask_weight(sched_domain_span(sd));
2221 sd = NULL; 2227 sd = NULL;
2222 weight = cpus_weight(span);
2223 for_each_domain(cpu, tmp) { 2228 for_each_domain(cpu, tmp) {
2224 if (weight <= cpus_weight(tmp->span)) 2229 if (weight <= cpumask_weight(sched_domain_span(tmp)))
2225 break; 2230 break;
2226 if (tmp->flags & flag) 2231 if (tmp->flags & flag)
2227 sd = tmp; 2232 sd = tmp;
@@ -2266,7 +2271,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2266 cpu = task_cpu(p); 2271 cpu = task_cpu(p);
2267 2272
2268 for_each_domain(this_cpu, sd) { 2273 for_each_domain(this_cpu, sd) {
2269 if (cpu_isset(cpu, sd->span)) { 2274 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2270 update_shares(sd); 2275 update_shares(sd);
2271 break; 2276 break;
2272 } 2277 }
@@ -2315,7 +2320,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2315 else { 2320 else {
2316 struct sched_domain *sd; 2321 struct sched_domain *sd;
2317 for_each_domain(this_cpu, sd) { 2322 for_each_domain(this_cpu, sd) {
2318 if (cpu_isset(cpu, sd->span)) { 2323 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2319 schedstat_inc(sd, ttwu_wake_remote); 2324 schedstat_inc(sd, ttwu_wake_remote);
2320 break; 2325 break;
2321 } 2326 }
@@ -2846,7 +2851,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2846 struct rq *rq; 2851 struct rq *rq;
2847 2852
2848 rq = task_rq_lock(p, &flags); 2853 rq = task_rq_lock(p, &flags);
2849 if (!cpu_isset(dest_cpu, p->cpus_allowed) 2854 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
2850 || unlikely(!cpu_active(dest_cpu))) 2855 || unlikely(!cpu_active(dest_cpu)))
2851 goto out; 2856 goto out;
2852 2857
@@ -2911,7 +2916,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
2911 * 2) cannot be migrated to this CPU due to cpus_allowed, or 2916 * 2) cannot be migrated to this CPU due to cpus_allowed, or
2912 * 3) are cache-hot on their current CPU. 2917 * 3) are cache-hot on their current CPU.
2913 */ 2918 */
2914 if (!cpu_isset(this_cpu, p->cpus_allowed)) { 2919 if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
2915 schedstat_inc(p, se.nr_failed_migrations_affine); 2920 schedstat_inc(p, se.nr_failed_migrations_affine);
2916 return 0; 2921 return 0;
2917 } 2922 }
@@ -3086,7 +3091,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
3086static struct sched_group * 3091static struct sched_group *
3087find_busiest_group(struct sched_domain *sd, int this_cpu, 3092find_busiest_group(struct sched_domain *sd, int this_cpu,
3088 unsigned long *imbalance, enum cpu_idle_type idle, 3093 unsigned long *imbalance, enum cpu_idle_type idle,
3089 int *sd_idle, const cpumask_t *cpus, int *balance) 3094 int *sd_idle, const struct cpumask *cpus, int *balance)
3090{ 3095{
3091 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; 3096 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
3092 unsigned long max_load, avg_load, total_load, this_load, total_pwr; 3097 unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -3122,10 +3127,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3122 unsigned long sum_avg_load_per_task; 3127 unsigned long sum_avg_load_per_task;
3123 unsigned long avg_load_per_task; 3128 unsigned long avg_load_per_task;
3124 3129
3125 local_group = cpu_isset(this_cpu, group->cpumask); 3130 local_group = cpumask_test_cpu(this_cpu,
3131 sched_group_cpus(group));
3126 3132
3127 if (local_group) 3133 if (local_group)
3128 balance_cpu = first_cpu(group->cpumask); 3134 balance_cpu = cpumask_first(sched_group_cpus(group));
3129 3135
3130 /* Tally up the load of all CPUs in the group */ 3136 /* Tally up the load of all CPUs in the group */
3131 sum_weighted_load = sum_nr_running = avg_load = 0; 3137 sum_weighted_load = sum_nr_running = avg_load = 0;
@@ -3134,13 +3140,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3134 max_cpu_load = 0; 3140 max_cpu_load = 0;
3135 min_cpu_load = ~0UL; 3141 min_cpu_load = ~0UL;
3136 3142
3137 for_each_cpu_mask_nr(i, group->cpumask) { 3143 for_each_cpu_and(i, sched_group_cpus(group), cpus) {
3138 struct rq *rq; 3144 struct rq *rq = cpu_rq(i);
3139
3140 if (!cpu_isset(i, *cpus))
3141 continue;
3142
3143 rq = cpu_rq(i);
3144 3145
3145 if (*sd_idle && rq->nr_running) 3146 if (*sd_idle && rq->nr_running)
3146 *sd_idle = 0; 3147 *sd_idle = 0;
@@ -3251,8 +3252,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3251 */ 3252 */
3252 if ((sum_nr_running < min_nr_running) || 3253 if ((sum_nr_running < min_nr_running) ||
3253 (sum_nr_running == min_nr_running && 3254 (sum_nr_running == min_nr_running &&
3254 first_cpu(group->cpumask) < 3255 cpumask_first(sched_group_cpus(group)) >
3255 first_cpu(group_min->cpumask))) { 3256 cpumask_first(sched_group_cpus(group_min)))) {
3256 group_min = group; 3257 group_min = group;
3257 min_nr_running = sum_nr_running; 3258 min_nr_running = sum_nr_running;
3258 min_load_per_task = sum_weighted_load / 3259 min_load_per_task = sum_weighted_load /
@@ -3267,8 +3268,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3267 if (sum_nr_running <= group_capacity - 1) { 3268 if (sum_nr_running <= group_capacity - 1) {
3268 if (sum_nr_running > leader_nr_running || 3269 if (sum_nr_running > leader_nr_running ||
3269 (sum_nr_running == leader_nr_running && 3270 (sum_nr_running == leader_nr_running &&
3270 first_cpu(group->cpumask) > 3271 cpumask_first(sched_group_cpus(group)) <
3271 first_cpu(group_leader->cpumask))) { 3272 cpumask_first(sched_group_cpus(group_leader)))) {
3272 group_leader = group; 3273 group_leader = group;
3273 leader_nr_running = sum_nr_running; 3274 leader_nr_running = sum_nr_running;
3274 } 3275 }
@@ -3394,6 +3395,10 @@ out_balanced:
3394 3395
3395 if (this == group_leader && group_leader != group_min) { 3396 if (this == group_leader && group_leader != group_min) {
3396 *imbalance = min_load_per_task; 3397 *imbalance = min_load_per_task;
3398 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
3399 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
3400 cpumask_first(sched_group_cpus(group_leader));
3401 }
3397 return group_min; 3402 return group_min;
3398 } 3403 }
3399#endif 3404#endif
@@ -3407,16 +3412,16 @@ ret:
3407 */ 3412 */
3408static struct rq * 3413static struct rq *
3409find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, 3414find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3410 unsigned long imbalance, const cpumask_t *cpus) 3415 unsigned long imbalance, const struct cpumask *cpus)
3411{ 3416{
3412 struct rq *busiest = NULL, *rq; 3417 struct rq *busiest = NULL, *rq;
3413 unsigned long max_load = 0; 3418 unsigned long max_load = 0;
3414 int i; 3419 int i;
3415 3420
3416 for_each_cpu_mask_nr(i, group->cpumask) { 3421 for_each_cpu(i, sched_group_cpus(group)) {
3417 unsigned long wl; 3422 unsigned long wl;
3418 3423
3419 if (!cpu_isset(i, *cpus)) 3424 if (!cpumask_test_cpu(i, cpus))
3420 continue; 3425 continue;
3421 3426
3422 rq = cpu_rq(i); 3427 rq = cpu_rq(i);
@@ -3446,7 +3451,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3446 */ 3451 */
3447static int load_balance(int this_cpu, struct rq *this_rq, 3452static int load_balance(int this_cpu, struct rq *this_rq,
3448 struct sched_domain *sd, enum cpu_idle_type idle, 3453 struct sched_domain *sd, enum cpu_idle_type idle,
3449 int *balance, cpumask_t *cpus) 3454 int *balance, struct cpumask *cpus)
3450{ 3455{
3451 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3456 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
3452 struct sched_group *group; 3457 struct sched_group *group;
@@ -3454,7 +3459,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3454 struct rq *busiest; 3459 struct rq *busiest;
3455 unsigned long flags; 3460 unsigned long flags;
3456 3461
3457 cpus_setall(*cpus); 3462 cpumask_setall(cpus);
3458 3463
3459 /* 3464 /*
3460 * When power savings policy is enabled for the parent domain, idle 3465 * When power savings policy is enabled for the parent domain, idle
@@ -3514,8 +3519,8 @@ redo:
3514 3519
3515 /* All tasks on this runqueue were pinned by CPU affinity */ 3520 /* All tasks on this runqueue were pinned by CPU affinity */
3516 if (unlikely(all_pinned)) { 3521 if (unlikely(all_pinned)) {
3517 cpu_clear(cpu_of(busiest), *cpus); 3522 cpumask_clear_cpu(cpu_of(busiest), cpus);
3518 if (!cpus_empty(*cpus)) 3523 if (!cpumask_empty(cpus))
3519 goto redo; 3524 goto redo;
3520 goto out_balanced; 3525 goto out_balanced;
3521 } 3526 }
@@ -3532,7 +3537,8 @@ redo:
3532 /* don't kick the migration_thread, if the curr 3537 /* don't kick the migration_thread, if the curr
3533 * task on busiest cpu can't be moved to this_cpu 3538 * task on busiest cpu can't be moved to this_cpu
3534 */ 3539 */
3535 if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { 3540 if (!cpumask_test_cpu(this_cpu,
3541 &busiest->curr->cpus_allowed)) {
3536 spin_unlock_irqrestore(&busiest->lock, flags); 3542 spin_unlock_irqrestore(&busiest->lock, flags);
3537 all_pinned = 1; 3543 all_pinned = 1;
3538 goto out_one_pinned; 3544 goto out_one_pinned;
@@ -3607,7 +3613,7 @@ out:
3607 */ 3613 */
3608static int 3614static int
3609load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, 3615load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3610 cpumask_t *cpus) 3616 struct cpumask *cpus)
3611{ 3617{
3612 struct sched_group *group; 3618 struct sched_group *group;
3613 struct rq *busiest = NULL; 3619 struct rq *busiest = NULL;
@@ -3616,7 +3622,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3616 int sd_idle = 0; 3622 int sd_idle = 0;
3617 int all_pinned = 0; 3623 int all_pinned = 0;
3618 3624
3619 cpus_setall(*cpus); 3625 cpumask_setall(cpus);
3620 3626
3621 /* 3627 /*
3622 * When power savings policy is enabled for the parent domain, idle 3628 * When power savings policy is enabled for the parent domain, idle
@@ -3660,17 +3666,71 @@ redo:
3660 double_unlock_balance(this_rq, busiest); 3666 double_unlock_balance(this_rq, busiest);
3661 3667
3662 if (unlikely(all_pinned)) { 3668 if (unlikely(all_pinned)) {
3663 cpu_clear(cpu_of(busiest), *cpus); 3669 cpumask_clear_cpu(cpu_of(busiest), cpus);
3664 if (!cpus_empty(*cpus)) 3670 if (!cpumask_empty(cpus))
3665 goto redo; 3671 goto redo;
3666 } 3672 }
3667 } 3673 }
3668 3674
3669 if (!ld_moved) { 3675 if (!ld_moved) {
3676 int active_balance = 0;
3677
3670 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); 3678 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
3671 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 3679 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3672 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 3680 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3673 return -1; 3681 return -1;
3682
3683 if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
3684 return -1;
3685
3686 if (sd->nr_balance_failed++ < 2)
3687 return -1;
3688
3689 /*
3690 * The only task running in a non-idle cpu can be moved to this
3691 * cpu in an attempt to completely freeup the other CPU
3692 * package. The same method used to move task in load_balance()
3693 * have been extended for load_balance_newidle() to speedup
3694 * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2)
3695 *
3696 * The package power saving logic comes from
3697 * find_busiest_group(). If there are no imbalance, then
3698 * f_b_g() will return NULL. However when sched_mc={1,2} then
3699 * f_b_g() will select a group from which a running task may be
3700 * pulled to this cpu in order to make the other package idle.
3701 * If there is no opportunity to make a package idle and if
3702 * there are no imbalance, then f_b_g() will return NULL and no
3703 * action will be taken in load_balance_newidle().
3704 *
3705 * Under normal task pull operation due to imbalance, there
3706 * will be more than one task in the source run queue and
3707 * move_tasks() will succeed. ld_moved will be true and this
3708 * active balance code will not be triggered.
3709 */
3710
3711 /* Lock busiest in correct order while this_rq is held */
3712 double_lock_balance(this_rq, busiest);
3713
3714 /*
3715 * don't kick the migration_thread, if the curr
3716 * task on busiest cpu can't be moved to this_cpu
3717 */
3718 if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
3719 double_unlock_balance(this_rq, busiest);
3720 all_pinned = 1;
3721 return ld_moved;
3722 }
3723
3724 if (!busiest->active_balance) {
3725 busiest->active_balance = 1;
3726 busiest->push_cpu = this_cpu;
3727 active_balance = 1;
3728 }
3729
3730 double_unlock_balance(this_rq, busiest);
3731 if (active_balance)
3732 wake_up_process(busiest->migration_thread);
3733
3674 } else 3734 } else
3675 sd->nr_balance_failed = 0; 3735 sd->nr_balance_failed = 0;
3676 3736
@@ -3696,7 +3756,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3696 struct sched_domain *sd; 3756 struct sched_domain *sd;
3697 int pulled_task = 0; 3757 int pulled_task = 0;
3698 unsigned long next_balance = jiffies + HZ; 3758 unsigned long next_balance = jiffies + HZ;
3699 cpumask_t tmpmask; 3759 cpumask_var_t tmpmask;
3760
3761 if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
3762 return;
3700 3763
3701 for_each_domain(this_cpu, sd) { 3764 for_each_domain(this_cpu, sd) {
3702 unsigned long interval; 3765 unsigned long interval;
@@ -3707,7 +3770,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3707 if (sd->flags & SD_BALANCE_NEWIDLE) 3770 if (sd->flags & SD_BALANCE_NEWIDLE)
3708 /* If we've pulled tasks over stop searching: */ 3771 /* If we've pulled tasks over stop searching: */
3709 pulled_task = load_balance_newidle(this_cpu, this_rq, 3772 pulled_task = load_balance_newidle(this_cpu, this_rq,
3710 sd, &tmpmask); 3773 sd, tmpmask);
3711 3774
3712 interval = msecs_to_jiffies(sd->balance_interval); 3775 interval = msecs_to_jiffies(sd->balance_interval);
3713 if (time_after(next_balance, sd->last_balance + interval)) 3776 if (time_after(next_balance, sd->last_balance + interval))
@@ -3722,6 +3785,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3722 */ 3785 */
3723 this_rq->next_balance = next_balance; 3786 this_rq->next_balance = next_balance;
3724 } 3787 }
3788 free_cpumask_var(tmpmask);
3725} 3789}
3726 3790
3727/* 3791/*
@@ -3759,7 +3823,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3759 /* Search for an sd spanning us and the target CPU. */ 3823 /* Search for an sd spanning us and the target CPU. */
3760 for_each_domain(target_cpu, sd) { 3824 for_each_domain(target_cpu, sd) {
3761 if ((sd->flags & SD_LOAD_BALANCE) && 3825 if ((sd->flags & SD_LOAD_BALANCE) &&
3762 cpu_isset(busiest_cpu, sd->span)) 3826 cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
3763 break; 3827 break;
3764 } 3828 }
3765 3829
@@ -3778,10 +3842,9 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3778#ifdef CONFIG_NO_HZ 3842#ifdef CONFIG_NO_HZ
3779static struct { 3843static struct {
3780 atomic_t load_balancer; 3844 atomic_t load_balancer;
3781 cpumask_t cpu_mask; 3845 cpumask_var_t cpu_mask;
3782} nohz ____cacheline_aligned = { 3846} nohz ____cacheline_aligned = {
3783 .load_balancer = ATOMIC_INIT(-1), 3847 .load_balancer = ATOMIC_INIT(-1),
3784 .cpu_mask = CPU_MASK_NONE,
3785}; 3848};
3786 3849
3787/* 3850/*
@@ -3809,7 +3872,7 @@ int select_nohz_load_balancer(int stop_tick)
3809 int cpu = smp_processor_id(); 3872 int cpu = smp_processor_id();
3810 3873
3811 if (stop_tick) { 3874 if (stop_tick) {
3812 cpu_set(cpu, nohz.cpu_mask); 3875 cpumask_set_cpu(cpu, nohz.cpu_mask);
3813 cpu_rq(cpu)->in_nohz_recently = 1; 3876 cpu_rq(cpu)->in_nohz_recently = 1;
3814 3877
3815 /* 3878 /*
@@ -3823,7 +3886,7 @@ int select_nohz_load_balancer(int stop_tick)
3823 } 3886 }
3824 3887
3825 /* time for ilb owner also to sleep */ 3888 /* time for ilb owner also to sleep */
3826 if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) { 3889 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3827 if (atomic_read(&nohz.load_balancer) == cpu) 3890 if (atomic_read(&nohz.load_balancer) == cpu)
3828 atomic_set(&nohz.load_balancer, -1); 3891 atomic_set(&nohz.load_balancer, -1);
3829 return 0; 3892 return 0;
@@ -3836,10 +3899,10 @@ int select_nohz_load_balancer(int stop_tick)
3836 } else if (atomic_read(&nohz.load_balancer) == cpu) 3899 } else if (atomic_read(&nohz.load_balancer) == cpu)
3837 return 1; 3900 return 1;
3838 } else { 3901 } else {
3839 if (!cpu_isset(cpu, nohz.cpu_mask)) 3902 if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
3840 return 0; 3903 return 0;
3841 3904
3842 cpu_clear(cpu, nohz.cpu_mask); 3905 cpumask_clear_cpu(cpu, nohz.cpu_mask);
3843 3906
3844 if (atomic_read(&nohz.load_balancer) == cpu) 3907 if (atomic_read(&nohz.load_balancer) == cpu)
3845 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3908 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
@@ -3867,7 +3930,11 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3867 unsigned long next_balance = jiffies + 60*HZ; 3930 unsigned long next_balance = jiffies + 60*HZ;
3868 int update_next_balance = 0; 3931 int update_next_balance = 0;
3869 int need_serialize; 3932 int need_serialize;
3870 cpumask_t tmp; 3933 cpumask_var_t tmp;
3934
3935 /* Fails alloc? Rebalancing probably not a priority right now. */
3936 if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
3937 return;
3871 3938
3872 for_each_domain(cpu, sd) { 3939 for_each_domain(cpu, sd) {
3873 if (!(sd->flags & SD_LOAD_BALANCE)) 3940 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3892,7 +3959,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3892 } 3959 }
3893 3960
3894 if (time_after_eq(jiffies, sd->last_balance + interval)) { 3961 if (time_after_eq(jiffies, sd->last_balance + interval)) {
3895 if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) { 3962 if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
3896 /* 3963 /*
3897 * We've pulled tasks over so either we're no 3964 * We've pulled tasks over so either we're no
3898 * longer idle, or one of our SMT siblings is 3965 * longer idle, or one of our SMT siblings is
@@ -3926,6 +3993,8 @@ out:
3926 */ 3993 */
3927 if (likely(update_next_balance)) 3994 if (likely(update_next_balance))
3928 rq->next_balance = next_balance; 3995 rq->next_balance = next_balance;
3996
3997 free_cpumask_var(tmp);
3929} 3998}
3930 3999
3931/* 4000/*
@@ -3950,12 +4019,13 @@ static void run_rebalance_domains(struct softirq_action *h)
3950 */ 4019 */
3951 if (this_rq->idle_at_tick && 4020 if (this_rq->idle_at_tick &&
3952 atomic_read(&nohz.load_balancer) == this_cpu) { 4021 atomic_read(&nohz.load_balancer) == this_cpu) {
3953 cpumask_t cpus = nohz.cpu_mask;
3954 struct rq *rq; 4022 struct rq *rq;
3955 int balance_cpu; 4023 int balance_cpu;
3956 4024
3957 cpu_clear(this_cpu, cpus); 4025 for_each_cpu(balance_cpu, nohz.cpu_mask) {
3958 for_each_cpu_mask_nr(balance_cpu, cpus) { 4026 if (balance_cpu == this_cpu)
4027 continue;
4028
3959 /* 4029 /*
3960 * If this cpu gets work to do, stop the load balancing 4030 * If this cpu gets work to do, stop the load balancing
3961 * work being done for other cpus. Next load 4031 * work being done for other cpus. Next load
@@ -3993,7 +4063,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
3993 rq->in_nohz_recently = 0; 4063 rq->in_nohz_recently = 0;
3994 4064
3995 if (atomic_read(&nohz.load_balancer) == cpu) { 4065 if (atomic_read(&nohz.load_balancer) == cpu) {
3996 cpu_clear(cpu, nohz.cpu_mask); 4066 cpumask_clear_cpu(cpu, nohz.cpu_mask);
3997 atomic_set(&nohz.load_balancer, -1); 4067 atomic_set(&nohz.load_balancer, -1);
3998 } 4068 }
3999 4069
@@ -4006,7 +4076,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4006 * TBD: Traverse the sched domains and nominate 4076 * TBD: Traverse the sched domains and nominate
4007 * the nearest cpu in the nohz.cpu_mask. 4077 * the nearest cpu in the nohz.cpu_mask.
4008 */ 4078 */
4009 int ilb = first_cpu(nohz.cpu_mask); 4079 int ilb = cpumask_first(nohz.cpu_mask);
4010 4080
4011 if (ilb < nr_cpu_ids) 4081 if (ilb < nr_cpu_ids)
4012 resched_cpu(ilb); 4082 resched_cpu(ilb);
@@ -4018,7 +4088,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4018 * cpus with ticks stopped, is it time for that to stop? 4088 * cpus with ticks stopped, is it time for that to stop?
4019 */ 4089 */
4020 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu && 4090 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
4021 cpus_weight(nohz.cpu_mask) == num_online_cpus()) { 4091 cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
4022 resched_cpu(cpu); 4092 resched_cpu(cpu);
4023 return; 4093 return;
4024 } 4094 }
@@ -4028,7 +4098,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4028 * someone else, then no need raise the SCHED_SOFTIRQ 4098 * someone else, then no need raise the SCHED_SOFTIRQ
4029 */ 4099 */
4030 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu && 4100 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
4031 cpu_isset(cpu, nohz.cpu_mask)) 4101 cpumask_test_cpu(cpu, nohz.cpu_mask))
4032 return; 4102 return;
4033#endif 4103#endif
4034 if (time_after_eq(jiffies, rq->next_balance)) 4104 if (time_after_eq(jiffies, rq->next_balance))
@@ -5401,10 +5471,9 @@ out_unlock:
5401 return retval; 5471 return retval;
5402} 5472}
5403 5473
5404long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) 5474long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5405{ 5475{
5406 cpumask_t cpus_allowed; 5476 cpumask_var_t cpus_allowed, new_mask;
5407 cpumask_t new_mask = *in_mask;
5408 struct task_struct *p; 5477 struct task_struct *p;
5409 int retval; 5478 int retval;
5410 5479
@@ -5426,6 +5495,14 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
5426 get_task_struct(p); 5495 get_task_struct(p);
5427 read_unlock(&tasklist_lock); 5496 read_unlock(&tasklist_lock);
5428 5497
5498 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
5499 retval = -ENOMEM;
5500 goto out_put_task;
5501 }
5502 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
5503 retval = -ENOMEM;
5504 goto out_free_cpus_allowed;
5505 }
5429 retval = -EPERM; 5506 retval = -EPERM;
5430 if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) 5507 if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
5431 goto out_unlock; 5508 goto out_unlock;
@@ -5434,37 +5511,41 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
5434 if (retval) 5511 if (retval)
5435 goto out_unlock; 5512 goto out_unlock;
5436 5513
5437 cpuset_cpus_allowed(p, &cpus_allowed); 5514 cpuset_cpus_allowed(p, cpus_allowed);
5438 cpus_and(new_mask, new_mask, cpus_allowed); 5515 cpumask_and(new_mask, in_mask, cpus_allowed);
5439 again: 5516 again:
5440 retval = set_cpus_allowed_ptr(p, &new_mask); 5517 retval = set_cpus_allowed_ptr(p, new_mask);
5441 5518
5442 if (!retval) { 5519 if (!retval) {
5443 cpuset_cpus_allowed(p, &cpus_allowed); 5520 cpuset_cpus_allowed(p, cpus_allowed);
5444 if (!cpus_subset(new_mask, cpus_allowed)) { 5521 if (!cpumask_subset(new_mask, cpus_allowed)) {
5445 /* 5522 /*
5446 * We must have raced with a concurrent cpuset 5523 * We must have raced with a concurrent cpuset
5447 * update. Just reset the cpus_allowed to the 5524 * update. Just reset the cpus_allowed to the
5448 * cpuset's cpus_allowed 5525 * cpuset's cpus_allowed
5449 */ 5526 */
5450 new_mask = cpus_allowed; 5527 cpumask_copy(new_mask, cpus_allowed);
5451 goto again; 5528 goto again;
5452 } 5529 }
5453 } 5530 }
5454out_unlock: 5531out_unlock:
5532 free_cpumask_var(new_mask);
5533out_free_cpus_allowed:
5534 free_cpumask_var(cpus_allowed);
5535out_put_task:
5455 put_task_struct(p); 5536 put_task_struct(p);
5456 put_online_cpus(); 5537 put_online_cpus();
5457 return retval; 5538 return retval;
5458} 5539}
5459 5540
5460static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, 5541static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5461 cpumask_t *new_mask) 5542 struct cpumask *new_mask)
5462{ 5543{
5463 if (len < sizeof(cpumask_t)) { 5544 if (len < cpumask_size())
5464 memset(new_mask, 0, sizeof(cpumask_t)); 5545 cpumask_clear(new_mask);
5465 } else if (len > sizeof(cpumask_t)) { 5546 else if (len > cpumask_size())
5466 len = sizeof(cpumask_t); 5547 len = cpumask_size();
5467 } 5548
5468 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; 5549 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
5469} 5550}
5470 5551
@@ -5477,17 +5558,20 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5477asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, 5558asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
5478 unsigned long __user *user_mask_ptr) 5559 unsigned long __user *user_mask_ptr)
5479{ 5560{
5480 cpumask_t new_mask; 5561 cpumask_var_t new_mask;
5481 int retval; 5562 int retval;
5482 5563
5483 retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask); 5564 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
5484 if (retval) 5565 return -ENOMEM;
5485 return retval;
5486 5566
5487 return sched_setaffinity(pid, &new_mask); 5567 retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
5568 if (retval == 0)
5569 retval = sched_setaffinity(pid, new_mask);
5570 free_cpumask_var(new_mask);
5571 return retval;
5488} 5572}
5489 5573
5490long sched_getaffinity(pid_t pid, cpumask_t *mask) 5574long sched_getaffinity(pid_t pid, struct cpumask *mask)
5491{ 5575{
5492 struct task_struct *p; 5576 struct task_struct *p;
5493 int retval; 5577 int retval;
@@ -5504,7 +5588,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
5504 if (retval) 5588 if (retval)
5505 goto out_unlock; 5589 goto out_unlock;
5506 5590
5507 cpus_and(*mask, p->cpus_allowed, cpu_online_map); 5591 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
5508 5592
5509out_unlock: 5593out_unlock:
5510 read_unlock(&tasklist_lock); 5594 read_unlock(&tasklist_lock);
@@ -5523,19 +5607,24 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
5523 unsigned long __user *user_mask_ptr) 5607 unsigned long __user *user_mask_ptr)
5524{ 5608{
5525 int ret; 5609 int ret;
5526 cpumask_t mask; 5610 cpumask_var_t mask;
5527 5611
5528 if (len < sizeof(cpumask_t)) 5612 if (len < cpumask_size())
5529 return -EINVAL; 5613 return -EINVAL;
5530 5614
5531 ret = sched_getaffinity(pid, &mask); 5615 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
5532 if (ret < 0) 5616 return -ENOMEM;
5533 return ret;
5534 5617
5535 if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t))) 5618 ret = sched_getaffinity(pid, mask);
5536 return -EFAULT; 5619 if (ret == 0) {
5620 if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
5621 ret = -EFAULT;
5622 else
5623 ret = cpumask_size();
5624 }
5625 free_cpumask_var(mask);
5537 5626
5538 return sizeof(cpumask_t); 5627 return ret;
5539} 5628}
5540 5629
5541/** 5630/**
@@ -5877,7 +5966,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5877 idle->se.exec_start = sched_clock(); 5966 idle->se.exec_start = sched_clock();
5878 5967
5879 idle->prio = idle->normal_prio = MAX_PRIO; 5968 idle->prio = idle->normal_prio = MAX_PRIO;
5880 idle->cpus_allowed = cpumask_of_cpu(cpu); 5969 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
5881 __set_task_cpu(idle, cpu); 5970 __set_task_cpu(idle, cpu);
5882 5971
5883 rq->curr = rq->idle = idle; 5972 rq->curr = rq->idle = idle;
@@ -5904,9 +5993,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5904 * indicates which cpus entered this state. This is used 5993 * indicates which cpus entered this state. This is used
5905 * in the rcu update to wait only for active cpus. For system 5994 * in the rcu update to wait only for active cpus. For system
5906 * which do not switch off the HZ timer nohz_cpu_mask should 5995 * which do not switch off the HZ timer nohz_cpu_mask should
5907 * always be CPU_MASK_NONE. 5996 * always be CPU_BITS_NONE.
5908 */ 5997 */
5909cpumask_t nohz_cpu_mask = CPU_MASK_NONE; 5998cpumask_var_t nohz_cpu_mask;
5910 5999
5911/* 6000/*
5912 * Increase the granularity value when there are more CPUs, 6001 * Increase the granularity value when there are more CPUs,
@@ -5961,7 +6050,7 @@ static inline void sched_init_granularity(void)
5961 * task must not exit() & deallocate itself prematurely. The 6050 * task must not exit() & deallocate itself prematurely. The
5962 * call is not atomic; no spinlocks may be held. 6051 * call is not atomic; no spinlocks may be held.
5963 */ 6052 */
5964int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) 6053int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5965{ 6054{
5966 struct migration_req req; 6055 struct migration_req req;
5967 unsigned long flags; 6056 unsigned long flags;
@@ -5969,13 +6058,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
5969 int ret = 0; 6058 int ret = 0;
5970 6059
5971 rq = task_rq_lock(p, &flags); 6060 rq = task_rq_lock(p, &flags);
5972 if (!cpus_intersects(*new_mask, cpu_online_map)) { 6061 if (!cpumask_intersects(new_mask, cpu_online_mask)) {
5973 ret = -EINVAL; 6062 ret = -EINVAL;
5974 goto out; 6063 goto out;
5975 } 6064 }
5976 6065
5977 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && 6066 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
5978 !cpus_equal(p->cpus_allowed, *new_mask))) { 6067 !cpumask_equal(&p->cpus_allowed, new_mask))) {
5979 ret = -EINVAL; 6068 ret = -EINVAL;
5980 goto out; 6069 goto out;
5981 } 6070 }
@@ -5983,15 +6072,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
5983 if (p->sched_class->set_cpus_allowed) 6072 if (p->sched_class->set_cpus_allowed)
5984 p->sched_class->set_cpus_allowed(p, new_mask); 6073 p->sched_class->set_cpus_allowed(p, new_mask);
5985 else { 6074 else {
5986 p->cpus_allowed = *new_mask; 6075 cpumask_copy(&p->cpus_allowed, new_mask);
5987 p->rt.nr_cpus_allowed = cpus_weight(*new_mask); 6076 p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
5988 } 6077 }
5989 6078
5990 /* Can the task run on the task's current CPU? If so, we're done */ 6079 /* Can the task run on the task's current CPU? If so, we're done */
5991 if (cpu_isset(task_cpu(p), *new_mask)) 6080 if (cpumask_test_cpu(task_cpu(p), new_mask))
5992 goto out; 6081 goto out;
5993 6082
5994 if (migrate_task(p, any_online_cpu(*new_mask), &req)) { 6083 if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
5995 /* Need help from migration thread: drop lock and wait. */ 6084 /* Need help from migration thread: drop lock and wait. */
5996 task_rq_unlock(rq, &flags); 6085 task_rq_unlock(rq, &flags);
5997 wake_up_process(rq->migration_thread); 6086 wake_up_process(rq->migration_thread);
@@ -6033,7 +6122,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
6033 if (task_cpu(p) != src_cpu) 6122 if (task_cpu(p) != src_cpu)
6034 goto done; 6123 goto done;
6035 /* Affinity changed (again). */ 6124 /* Affinity changed (again). */
6036 if (!cpu_isset(dest_cpu, p->cpus_allowed)) 6125 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
6037 goto fail; 6126 goto fail;
6038 6127
6039 on_rq = p->se.on_rq; 6128 on_rq = p->se.on_rq;
@@ -6130,50 +6219,43 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
6130 */ 6219 */
6131static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 6220static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
6132{ 6221{
6133 unsigned long flags;
6134 cpumask_t mask;
6135 struct rq *rq;
6136 int dest_cpu; 6222 int dest_cpu;
6223 /* FIXME: Use cpumask_of_node here. */
6224 cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
6225 const struct cpumask *nodemask = &_nodemask;
6226
6227again:
6228 /* Look for allowed, online CPU in same node. */
6229 for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
6230 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
6231 goto move;
6232
6233 /* Any allowed, online CPU? */
6234 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
6235 if (dest_cpu < nr_cpu_ids)
6236 goto move;
6237
6238 /* No more Mr. Nice Guy. */
6239 if (dest_cpu >= nr_cpu_ids) {
6240 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
6241 dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
6137 6242
6138 do { 6243 /*
6139 /* On same node? */ 6244 * Don't tell them about moving exiting tasks or
6140 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 6245 * kernel threads (both mm NULL), since they never
6141 cpus_and(mask, mask, p->cpus_allowed); 6246 * leave kernel.
6142 dest_cpu = any_online_cpu(mask); 6247 */
6143 6248 if (p->mm && printk_ratelimit()) {
6144 /* On any allowed CPU? */ 6249 printk(KERN_INFO "process %d (%s) no "
6145 if (dest_cpu >= nr_cpu_ids) 6250 "longer affine to cpu%d\n",
6146 dest_cpu = any_online_cpu(p->cpus_allowed); 6251 task_pid_nr(p), p->comm, dead_cpu);
6147
6148 /* No more Mr. Nice Guy. */
6149 if (dest_cpu >= nr_cpu_ids) {
6150 cpumask_t cpus_allowed;
6151
6152 cpuset_cpus_allowed_locked(p, &cpus_allowed);
6153 /*
6154 * Try to stay on the same cpuset, where the
6155 * current cpuset may be a subset of all cpus.
6156 * The cpuset_cpus_allowed_locked() variant of
6157 * cpuset_cpus_allowed() will not block. It must be
6158 * called within calls to cpuset_lock/cpuset_unlock.
6159 */
6160 rq = task_rq_lock(p, &flags);
6161 p->cpus_allowed = cpus_allowed;
6162 dest_cpu = any_online_cpu(p->cpus_allowed);
6163 task_rq_unlock(rq, &flags);
6164
6165 /*
6166 * Don't tell them about moving exiting tasks or
6167 * kernel threads (both mm NULL), since they never
6168 * leave kernel.
6169 */
6170 if (p->mm && printk_ratelimit()) {
6171 printk(KERN_INFO "process %d (%s) no "
6172 "longer affine to cpu%d\n",
6173 task_pid_nr(p), p->comm, dead_cpu);
6174 }
6175 } 6252 }
6176 } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); 6253 }
6254
6255move:
6256 /* It can have affinity changed while we were choosing. */
6257 if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
6258 goto again;
6177} 6259}
6178 6260
6179/* 6261/*
@@ -6185,7 +6267,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
6185 */ 6267 */
6186static void migrate_nr_uninterruptible(struct rq *rq_src) 6268static void migrate_nr_uninterruptible(struct rq *rq_src)
6187{ 6269{
6188 struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR)); 6270 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
6189 unsigned long flags; 6271 unsigned long flags;
6190 6272
6191 local_irq_save(flags); 6273 local_irq_save(flags);
@@ -6475,7 +6557,7 @@ static void set_rq_online(struct rq *rq)
6475 if (!rq->online) { 6557 if (!rq->online) {
6476 const struct sched_class *class; 6558 const struct sched_class *class;
6477 6559
6478 cpu_set(rq->cpu, rq->rd->online); 6560 cpumask_set_cpu(rq->cpu, rq->rd->online);
6479 rq->online = 1; 6561 rq->online = 1;
6480 6562
6481 for_each_class(class) { 6563 for_each_class(class) {
@@ -6495,7 +6577,7 @@ static void set_rq_offline(struct rq *rq)
6495 class->rq_offline(rq); 6577 class->rq_offline(rq);
6496 } 6578 }
6497 6579
6498 cpu_clear(rq->cpu, rq->rd->online); 6580 cpumask_clear_cpu(rq->cpu, rq->rd->online);
6499 rq->online = 0; 6581 rq->online = 0;
6500 } 6582 }
6501} 6583}
@@ -6536,7 +6618,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6536 rq = cpu_rq(cpu); 6618 rq = cpu_rq(cpu);
6537 spin_lock_irqsave(&rq->lock, flags); 6619 spin_lock_irqsave(&rq->lock, flags);
6538 if (rq->rd) { 6620 if (rq->rd) {
6539 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6621 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6540 6622
6541 set_rq_online(rq); 6623 set_rq_online(rq);
6542 } 6624 }
@@ -6550,7 +6632,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6550 break; 6632 break;
6551 /* Unbind it from offline cpu so it can run. Fall thru. */ 6633 /* Unbind it from offline cpu so it can run. Fall thru. */
6552 kthread_bind(cpu_rq(cpu)->migration_thread, 6634 kthread_bind(cpu_rq(cpu)->migration_thread,
6553 any_online_cpu(cpu_online_map)); 6635 cpumask_any(cpu_online_mask));
6554 kthread_stop(cpu_rq(cpu)->migration_thread); 6636 kthread_stop(cpu_rq(cpu)->migration_thread);
6555 cpu_rq(cpu)->migration_thread = NULL; 6637 cpu_rq(cpu)->migration_thread = NULL;
6556 break; 6638 break;
@@ -6600,7 +6682,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6600 rq = cpu_rq(cpu); 6682 rq = cpu_rq(cpu);
6601 spin_lock_irqsave(&rq->lock, flags); 6683 spin_lock_irqsave(&rq->lock, flags);
6602 if (rq->rd) { 6684 if (rq->rd) {
6603 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6685 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6604 set_rq_offline(rq); 6686 set_rq_offline(rq);
6605 } 6687 }
6606 spin_unlock_irqrestore(&rq->lock, flags); 6688 spin_unlock_irqrestore(&rq->lock, flags);
@@ -6639,13 +6721,13 @@ early_initcall(migration_init);
6639#ifdef CONFIG_SCHED_DEBUG 6721#ifdef CONFIG_SCHED_DEBUG
6640 6722
6641static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 6723static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6642 cpumask_t *groupmask) 6724 struct cpumask *groupmask)
6643{ 6725{
6644 struct sched_group *group = sd->groups; 6726 struct sched_group *group = sd->groups;
6645 char str[256]; 6727 char str[256];
6646 6728
6647 cpulist_scnprintf(str, sizeof(str), &sd->span); 6729 cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
6648 cpus_clear(*groupmask); 6730 cpumask_clear(groupmask);
6649 6731
6650 printk(KERN_DEBUG "%*s domain %d: ", level, "", level); 6732 printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
6651 6733
@@ -6659,11 +6741,11 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6659 6741
6660 printk(KERN_CONT "span %s level %s\n", str, sd->name); 6742 printk(KERN_CONT "span %s level %s\n", str, sd->name);
6661 6743
6662 if (!cpu_isset(cpu, sd->span)) { 6744 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
6663 printk(KERN_ERR "ERROR: domain->span does not contain " 6745 printk(KERN_ERR "ERROR: domain->span does not contain "
6664 "CPU%d\n", cpu); 6746 "CPU%d\n", cpu);
6665 } 6747 }
6666 if (!cpu_isset(cpu, group->cpumask)) { 6748 if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
6667 printk(KERN_ERR "ERROR: domain->groups does not contain" 6749 printk(KERN_ERR "ERROR: domain->groups does not contain"
6668 " CPU%d\n", cpu); 6750 " CPU%d\n", cpu);
6669 } 6751 }
@@ -6683,31 +6765,32 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6683 break; 6765 break;
6684 } 6766 }
6685 6767
6686 if (!cpus_weight(group->cpumask)) { 6768 if (!cpumask_weight(sched_group_cpus(group))) {
6687 printk(KERN_CONT "\n"); 6769 printk(KERN_CONT "\n");
6688 printk(KERN_ERR "ERROR: empty group\n"); 6770 printk(KERN_ERR "ERROR: empty group\n");
6689 break; 6771 break;
6690 } 6772 }
6691 6773
6692 if (cpus_intersects(*groupmask, group->cpumask)) { 6774 if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
6693 printk(KERN_CONT "\n"); 6775 printk(KERN_CONT "\n");
6694 printk(KERN_ERR "ERROR: repeated CPUs\n"); 6776 printk(KERN_ERR "ERROR: repeated CPUs\n");
6695 break; 6777 break;
6696 } 6778 }
6697 6779
6698 cpus_or(*groupmask, *groupmask, group->cpumask); 6780 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
6699 6781
6700 cpulist_scnprintf(str, sizeof(str), &group->cpumask); 6782 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
6701 printk(KERN_CONT " %s", str); 6783 printk(KERN_CONT " %s", str);
6702 6784
6703 group = group->next; 6785 group = group->next;
6704 } while (group != sd->groups); 6786 } while (group != sd->groups);
6705 printk(KERN_CONT "\n"); 6787 printk(KERN_CONT "\n");
6706 6788
6707 if (!cpus_equal(sd->span, *groupmask)) 6789 if (!cpumask_equal(sched_domain_span(sd), groupmask))
6708 printk(KERN_ERR "ERROR: groups don't span domain->span\n"); 6790 printk(KERN_ERR "ERROR: groups don't span domain->span\n");
6709 6791
6710 if (sd->parent && !cpus_subset(*groupmask, sd->parent->span)) 6792 if (sd->parent &&
6793 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
6711 printk(KERN_ERR "ERROR: parent span is not a superset " 6794 printk(KERN_ERR "ERROR: parent span is not a superset "
6712 "of domain->span\n"); 6795 "of domain->span\n");
6713 return 0; 6796 return 0;
@@ -6715,7 +6798,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6715 6798
6716static void sched_domain_debug(struct sched_domain *sd, int cpu) 6799static void sched_domain_debug(struct sched_domain *sd, int cpu)
6717{ 6800{
6718 cpumask_t *groupmask; 6801 cpumask_var_t groupmask;
6719 int level = 0; 6802 int level = 0;
6720 6803
6721 if (!sd) { 6804 if (!sd) {
@@ -6725,8 +6808,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6725 6808
6726 printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); 6809 printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
6727 6810
6728 groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 6811 if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
6729 if (!groupmask) {
6730 printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); 6812 printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
6731 return; 6813 return;
6732 } 6814 }
@@ -6739,7 +6821,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6739 if (!sd) 6821 if (!sd)
6740 break; 6822 break;
6741 } 6823 }
6742 kfree(groupmask); 6824 free_cpumask_var(groupmask);
6743} 6825}
6744#else /* !CONFIG_SCHED_DEBUG */ 6826#else /* !CONFIG_SCHED_DEBUG */
6745# define sched_domain_debug(sd, cpu) do { } while (0) 6827# define sched_domain_debug(sd, cpu) do { } while (0)
@@ -6747,7 +6829,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6747 6829
6748static int sd_degenerate(struct sched_domain *sd) 6830static int sd_degenerate(struct sched_domain *sd)
6749{ 6831{
6750 if (cpus_weight(sd->span) == 1) 6832 if (cpumask_weight(sched_domain_span(sd)) == 1)
6751 return 1; 6833 return 1;
6752 6834
6753 /* Following flags need at least 2 groups */ 6835 /* Following flags need at least 2 groups */
@@ -6778,7 +6860,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
6778 if (sd_degenerate(parent)) 6860 if (sd_degenerate(parent))
6779 return 1; 6861 return 1;
6780 6862
6781 if (!cpus_equal(sd->span, parent->span)) 6863 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
6782 return 0; 6864 return 0;
6783 6865
6784 /* Does parent contain flags not in child? */ 6866 /* Does parent contain flags not in child? */
@@ -6802,6 +6884,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
6802 return 1; 6884 return 1;
6803} 6885}
6804 6886
6887static void free_rootdomain(struct root_domain *rd)
6888{
6889 cpupri_cleanup(&rd->cpupri);
6890
6891 free_cpumask_var(rd->rto_mask);
6892 free_cpumask_var(rd->online);
6893 free_cpumask_var(rd->span);
6894 kfree(rd);
6895}
6896
6805static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6897static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6806{ 6898{
6807 unsigned long flags; 6899 unsigned long flags;
@@ -6811,38 +6903,63 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6811 if (rq->rd) { 6903 if (rq->rd) {
6812 struct root_domain *old_rd = rq->rd; 6904 struct root_domain *old_rd = rq->rd;
6813 6905
6814 if (cpu_isset(rq->cpu, old_rd->online)) 6906 if (cpumask_test_cpu(rq->cpu, old_rd->online))
6815 set_rq_offline(rq); 6907 set_rq_offline(rq);
6816 6908
6817 cpu_clear(rq->cpu, old_rd->span); 6909 cpumask_clear_cpu(rq->cpu, old_rd->span);
6818 6910
6819 if (atomic_dec_and_test(&old_rd->refcount)) 6911 if (atomic_dec_and_test(&old_rd->refcount))
6820 kfree(old_rd); 6912 free_rootdomain(old_rd);
6821 } 6913 }
6822 6914
6823 atomic_inc(&rd->refcount); 6915 atomic_inc(&rd->refcount);
6824 rq->rd = rd; 6916 rq->rd = rd;
6825 6917
6826 cpu_set(rq->cpu, rd->span); 6918 cpumask_set_cpu(rq->cpu, rd->span);
6827 if (cpu_isset(rq->cpu, cpu_online_map)) 6919 if (cpumask_test_cpu(rq->cpu, cpu_online_mask))
6828 set_rq_online(rq); 6920 set_rq_online(rq);
6829 6921
6830 spin_unlock_irqrestore(&rq->lock, flags); 6922 spin_unlock_irqrestore(&rq->lock, flags);
6831} 6923}
6832 6924
6833static void init_rootdomain(struct root_domain *rd) 6925static int init_rootdomain(struct root_domain *rd, bool bootmem)
6834{ 6926{
6835 memset(rd, 0, sizeof(*rd)); 6927 memset(rd, 0, sizeof(*rd));
6836 6928
6837 cpus_clear(rd->span); 6929 if (bootmem) {
6838 cpus_clear(rd->online); 6930 alloc_bootmem_cpumask_var(&def_root_domain.span);
6931 alloc_bootmem_cpumask_var(&def_root_domain.online);
6932 alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
6933 cpupri_init(&rd->cpupri, true);
6934 return 0;
6935 }
6936
6937 if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
6938 goto free_rd;
6939 if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
6940 goto free_span;
6941 if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
6942 goto free_online;
6943
6944 if (cpupri_init(&rd->cpupri, false) != 0)
6945 goto free_rto_mask;
6946 return 0;
6839 6947
6840 cpupri_init(&rd->cpupri); 6948free_rto_mask:
6949 free_cpumask_var(rd->rto_mask);
6950free_online:
6951 free_cpumask_var(rd->online);
6952free_span:
6953 free_cpumask_var(rd->span);
6954free_rd:
6955 kfree(rd);
6956 return -ENOMEM;
6841} 6957}
6842 6958
6843static void init_defrootdomain(void) 6959static void init_defrootdomain(void)
6844{ 6960{
6845 init_rootdomain(&def_root_domain); 6961 init_rootdomain(&def_root_domain, true);
6962
6846 atomic_set(&def_root_domain.refcount, 1); 6963 atomic_set(&def_root_domain.refcount, 1);
6847} 6964}
6848 6965
@@ -6854,7 +6971,10 @@ static struct root_domain *alloc_rootdomain(void)
6854 if (!rd) 6971 if (!rd)
6855 return NULL; 6972 return NULL;
6856 6973
6857 init_rootdomain(rd); 6974 if (init_rootdomain(rd, false) != 0) {
6975 kfree(rd);
6976 return NULL;
6977 }
6858 6978
6859 return rd; 6979 return rd;
6860} 6980}
@@ -6896,19 +7016,12 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
6896} 7016}
6897 7017
6898/* cpus with isolated domains */ 7018/* cpus with isolated domains */
6899static cpumask_t cpu_isolated_map = CPU_MASK_NONE; 7019static cpumask_var_t cpu_isolated_map;
6900 7020
6901/* Setup the mask of cpus configured for isolated domains */ 7021/* Setup the mask of cpus configured for isolated domains */
6902static int __init isolated_cpu_setup(char *str) 7022static int __init isolated_cpu_setup(char *str)
6903{ 7023{
6904 static int __initdata ints[NR_CPUS]; 7024 cpulist_parse(str, cpu_isolated_map);
6905 int i;
6906
6907 str = get_options(str, ARRAY_SIZE(ints), ints);
6908 cpus_clear(cpu_isolated_map);
6909 for (i = 1; i <= ints[0]; i++)
6910 if (ints[i] < NR_CPUS)
6911 cpu_set(ints[i], cpu_isolated_map);
6912 return 1; 7025 return 1;
6913} 7026}
6914 7027
@@ -6917,42 +7030,43 @@ __setup("isolcpus=", isolated_cpu_setup);
6917/* 7030/*
6918 * init_sched_build_groups takes the cpumask we wish to span, and a pointer 7031 * init_sched_build_groups takes the cpumask we wish to span, and a pointer
6919 * to a function which identifies what group(along with sched group) a CPU 7032 * to a function which identifies what group(along with sched group) a CPU
6920 * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS 7033 * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6921 * (due to the fact that we keep track of groups covered with a cpumask_t). 7034 * (due to the fact that we keep track of groups covered with a struct cpumask).
6922 * 7035 *
6923 * init_sched_build_groups will build a circular linked list of the groups 7036 * init_sched_build_groups will build a circular linked list of the groups
6924 * covered by the given span, and will set each group's ->cpumask correctly, 7037 * covered by the given span, and will set each group's ->cpumask correctly,
6925 * and ->cpu_power to 0. 7038 * and ->cpu_power to 0.
6926 */ 7039 */
6927static void 7040static void
6928init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, 7041init_sched_build_groups(const struct cpumask *span,
6929 int (*group_fn)(int cpu, const cpumask_t *cpu_map, 7042 const struct cpumask *cpu_map,
7043 int (*group_fn)(int cpu, const struct cpumask *cpu_map,
6930 struct sched_group **sg, 7044 struct sched_group **sg,
6931 cpumask_t *tmpmask), 7045 struct cpumask *tmpmask),
6932 cpumask_t *covered, cpumask_t *tmpmask) 7046 struct cpumask *covered, struct cpumask *tmpmask)
6933{ 7047{
6934 struct sched_group *first = NULL, *last = NULL; 7048 struct sched_group *first = NULL, *last = NULL;
6935 int i; 7049 int i;
6936 7050
6937 cpus_clear(*covered); 7051 cpumask_clear(covered);
6938 7052
6939 for_each_cpu_mask_nr(i, *span) { 7053 for_each_cpu(i, span) {
6940 struct sched_group *sg; 7054 struct sched_group *sg;
6941 int group = group_fn(i, cpu_map, &sg, tmpmask); 7055 int group = group_fn(i, cpu_map, &sg, tmpmask);
6942 int j; 7056 int j;
6943 7057
6944 if (cpu_isset(i, *covered)) 7058 if (cpumask_test_cpu(i, covered))
6945 continue; 7059 continue;
6946 7060
6947 cpus_clear(sg->cpumask); 7061 cpumask_clear(sched_group_cpus(sg));
6948 sg->__cpu_power = 0; 7062 sg->__cpu_power = 0;
6949 7063
6950 for_each_cpu_mask_nr(j, *span) { 7064 for_each_cpu(j, span) {
6951 if (group_fn(j, cpu_map, NULL, tmpmask) != group) 7065 if (group_fn(j, cpu_map, NULL, tmpmask) != group)
6952 continue; 7066 continue;
6953 7067
6954 cpu_set(j, *covered); 7068 cpumask_set_cpu(j, covered);
6955 cpu_set(j, sg->cpumask); 7069 cpumask_set_cpu(j, sched_group_cpus(sg));
6956 } 7070 }
6957 if (!first) 7071 if (!first)
6958 first = sg; 7072 first = sg;
@@ -7016,9 +7130,10 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
7016 * should be one that prevents unnecessary balancing, but also spreads tasks 7130 * should be one that prevents unnecessary balancing, but also spreads tasks
7017 * out optimally. 7131 * out optimally.
7018 */ 7132 */
7019static void sched_domain_node_span(int node, cpumask_t *span) 7133static void sched_domain_node_span(int node, struct cpumask *span)
7020{ 7134{
7021 nodemask_t used_nodes; 7135 nodemask_t used_nodes;
7136 /* FIXME: use cpumask_of_node() */
7022 node_to_cpumask_ptr(nodemask, node); 7137 node_to_cpumask_ptr(nodemask, node);
7023 int i; 7138 int i;
7024 7139
@@ -7040,18 +7155,33 @@ static void sched_domain_node_span(int node, cpumask_t *span)
7040int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 7155int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
7041 7156
7042/* 7157/*
7158 * The cpus mask in sched_group and sched_domain hangs off the end.
7159 * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
7160 * for nr_cpu_ids < CONFIG_NR_CPUS.
7161 */
7162struct static_sched_group {
7163 struct sched_group sg;
7164 DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
7165};
7166
7167struct static_sched_domain {
7168 struct sched_domain sd;
7169 DECLARE_BITMAP(span, CONFIG_NR_CPUS);
7170};
7171
7172/*
7043 * SMT sched-domains: 7173 * SMT sched-domains:
7044 */ 7174 */
7045#ifdef CONFIG_SCHED_SMT 7175#ifdef CONFIG_SCHED_SMT
7046static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 7176static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
7047static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); 7177static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
7048 7178
7049static int 7179static int
7050cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7180cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
7051 cpumask_t *unused) 7181 struct sched_group **sg, struct cpumask *unused)
7052{ 7182{
7053 if (sg) 7183 if (sg)
7054 *sg = &per_cpu(sched_group_cpus, cpu); 7184 *sg = &per_cpu(sched_group_cpus, cpu).sg;
7055 return cpu; 7185 return cpu;
7056} 7186}
7057#endif /* CONFIG_SCHED_SMT */ 7187#endif /* CONFIG_SCHED_SMT */
@@ -7060,56 +7190,55 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
7060 * multi-core sched-domains: 7190 * multi-core sched-domains:
7061 */ 7191 */
7062#ifdef CONFIG_SCHED_MC 7192#ifdef CONFIG_SCHED_MC
7063static DEFINE_PER_CPU(struct sched_domain, core_domains); 7193static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
7064static DEFINE_PER_CPU(struct sched_group, sched_group_core); 7194static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
7065#endif /* CONFIG_SCHED_MC */ 7195#endif /* CONFIG_SCHED_MC */
7066 7196
7067#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) 7197#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
7068static int 7198static int
7069cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7199cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
7070 cpumask_t *mask) 7200 struct sched_group **sg, struct cpumask *mask)
7071{ 7201{
7072 int group; 7202 int group;
7073 7203
7074 *mask = per_cpu(cpu_sibling_map, cpu); 7204 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
7075 cpus_and(*mask, *mask, *cpu_map); 7205 group = cpumask_first(mask);
7076 group = first_cpu(*mask);
7077 if (sg) 7206 if (sg)
7078 *sg = &per_cpu(sched_group_core, group); 7207 *sg = &per_cpu(sched_group_core, group).sg;
7079 return group; 7208 return group;
7080} 7209}
7081#elif defined(CONFIG_SCHED_MC) 7210#elif defined(CONFIG_SCHED_MC)
7082static int 7211static int
7083cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7212cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
7084 cpumask_t *unused) 7213 struct sched_group **sg, struct cpumask *unused)
7085{ 7214{
7086 if (sg) 7215 if (sg)
7087 *sg = &per_cpu(sched_group_core, cpu); 7216 *sg = &per_cpu(sched_group_core, cpu).sg;
7088 return cpu; 7217 return cpu;
7089} 7218}
7090#endif 7219#endif
7091 7220
7092static DEFINE_PER_CPU(struct sched_domain, phys_domains); 7221static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
7093static DEFINE_PER_CPU(struct sched_group, sched_group_phys); 7222static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
7094 7223
7095static int 7224static int
7096cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7225cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
7097 cpumask_t *mask) 7226 struct sched_group **sg, struct cpumask *mask)
7098{ 7227{
7099 int group; 7228 int group;
7100#ifdef CONFIG_SCHED_MC 7229#ifdef CONFIG_SCHED_MC
7101 *mask = *cpu_coregroup_mask(cpu); 7230 /* FIXME: Use cpu_coregroup_mask. */
7231 *mask = cpu_coregroup_map(cpu);
7102 cpus_and(*mask, *mask, *cpu_map); 7232 cpus_and(*mask, *mask, *cpu_map);
7103 group = first_cpu(*mask); 7233 group = cpumask_first(mask);
7104#elif defined(CONFIG_SCHED_SMT) 7234#elif defined(CONFIG_SCHED_SMT)
7105 *mask = per_cpu(cpu_sibling_map, cpu); 7235 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
7106 cpus_and(*mask, *mask, *cpu_map); 7236 group = cpumask_first(mask);
7107 group = first_cpu(*mask);
7108#else 7237#else
7109 group = cpu; 7238 group = cpu;
7110#endif 7239#endif
7111 if (sg) 7240 if (sg)
7112 *sg = &per_cpu(sched_group_phys, group); 7241 *sg = &per_cpu(sched_group_phys, group).sg;
7113 return group; 7242 return group;
7114} 7243}
7115 7244
@@ -7123,19 +7252,21 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains);
7123static struct sched_group ***sched_group_nodes_bycpu; 7252static struct sched_group ***sched_group_nodes_bycpu;
7124 7253
7125static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); 7254static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
7126static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); 7255static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
7127 7256
7128static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, 7257static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
7129 struct sched_group **sg, cpumask_t *nodemask) 7258 struct sched_group **sg,
7259 struct cpumask *nodemask)
7130{ 7260{
7131 int group; 7261 int group;
7262 /* FIXME: use cpumask_of_node */
7263 node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
7132 7264
7133 *nodemask = node_to_cpumask(cpu_to_node(cpu)); 7265 cpumask_and(nodemask, pnodemask, cpu_map);
7134 cpus_and(*nodemask, *nodemask, *cpu_map); 7266 group = cpumask_first(nodemask);
7135 group = first_cpu(*nodemask);
7136 7267
7137 if (sg) 7268 if (sg)
7138 *sg = &per_cpu(sched_group_allnodes, group); 7269 *sg = &per_cpu(sched_group_allnodes, group).sg;
7139 return group; 7270 return group;
7140} 7271}
7141 7272
@@ -7147,11 +7278,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
7147 if (!sg) 7278 if (!sg)
7148 return; 7279 return;
7149 do { 7280 do {
7150 for_each_cpu_mask_nr(j, sg->cpumask) { 7281 for_each_cpu(j, sched_group_cpus(sg)) {
7151 struct sched_domain *sd; 7282 struct sched_domain *sd;
7152 7283
7153 sd = &per_cpu(phys_domains, j); 7284 sd = &per_cpu(phys_domains, j).sd;
7154 if (j != first_cpu(sd->groups->cpumask)) { 7285 if (j != cpumask_first(sched_group_cpus(sd->groups))) {
7155 /* 7286 /*
7156 * Only add "power" once for each 7287 * Only add "power" once for each
7157 * physical package. 7288 * physical package.
@@ -7168,11 +7299,12 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
7168 7299
7169#ifdef CONFIG_NUMA 7300#ifdef CONFIG_NUMA
7170/* Free memory allocated for various sched_group structures */ 7301/* Free memory allocated for various sched_group structures */
7171static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) 7302static void free_sched_groups(const struct cpumask *cpu_map,
7303 struct cpumask *nodemask)
7172{ 7304{
7173 int cpu, i; 7305 int cpu, i;
7174 7306
7175 for_each_cpu_mask_nr(cpu, *cpu_map) { 7307 for_each_cpu(cpu, cpu_map) {
7176 struct sched_group **sched_group_nodes 7308 struct sched_group **sched_group_nodes
7177 = sched_group_nodes_bycpu[cpu]; 7309 = sched_group_nodes_bycpu[cpu];
7178 7310
@@ -7181,10 +7313,11 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
7181 7313
7182 for (i = 0; i < nr_node_ids; i++) { 7314 for (i = 0; i < nr_node_ids; i++) {
7183 struct sched_group *oldsg, *sg = sched_group_nodes[i]; 7315 struct sched_group *oldsg, *sg = sched_group_nodes[i];
7316 /* FIXME: Use cpumask_of_node */
7317 node_to_cpumask_ptr(pnodemask, i);
7184 7318
7185 *nodemask = node_to_cpumask(i); 7319 cpus_and(*nodemask, *pnodemask, *cpu_map);
7186 cpus_and(*nodemask, *nodemask, *cpu_map); 7320 if (cpumask_empty(nodemask))
7187 if (cpus_empty(*nodemask))
7188 continue; 7321 continue;
7189 7322
7190 if (sg == NULL) 7323 if (sg == NULL)
@@ -7202,7 +7335,8 @@ next_sg:
7202 } 7335 }
7203} 7336}
7204#else /* !CONFIG_NUMA */ 7337#else /* !CONFIG_NUMA */
7205static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) 7338static void free_sched_groups(const struct cpumask *cpu_map,
7339 struct cpumask *nodemask)
7206{ 7340{
7207} 7341}
7208#endif /* CONFIG_NUMA */ 7342#endif /* CONFIG_NUMA */
@@ -7228,7 +7362,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
7228 7362
7229 WARN_ON(!sd || !sd->groups); 7363 WARN_ON(!sd || !sd->groups);
7230 7364
7231 if (cpu != first_cpu(sd->groups->cpumask)) 7365 if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
7232 return; 7366 return;
7233 7367
7234 child = sd->child; 7368 child = sd->child;
@@ -7293,48 +7427,6 @@ SD_INIT_FUNC(CPU)
7293 SD_INIT_FUNC(MC) 7427 SD_INIT_FUNC(MC)
7294#endif 7428#endif
7295 7429
7296/*
7297 * To minimize stack usage kmalloc room for cpumasks and share the
7298 * space as the usage in build_sched_domains() dictates. Used only
7299 * if the amount of space is significant.
7300 */
7301struct allmasks {
7302 cpumask_t tmpmask; /* make this one first */
7303 union {
7304 cpumask_t nodemask;
7305 cpumask_t this_sibling_map;
7306 cpumask_t this_core_map;
7307 };
7308 cpumask_t send_covered;
7309
7310#ifdef CONFIG_NUMA
7311 cpumask_t domainspan;
7312 cpumask_t covered;
7313 cpumask_t notcovered;
7314#endif
7315};
7316
7317#if NR_CPUS > 128
7318#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
7319static inline void sched_cpumask_alloc(struct allmasks **masks)
7320{
7321 *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
7322}
7323static inline void sched_cpumask_free(struct allmasks *masks)
7324{
7325 kfree(masks);
7326}
7327#else
7328#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
7329static inline void sched_cpumask_alloc(struct allmasks **masks)
7330{ }
7331static inline void sched_cpumask_free(struct allmasks *masks)
7332{ }
7333#endif
7334
7335#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
7336 ((unsigned long)(a) + offsetof(struct allmasks, v))
7337
7338static int default_relax_domain_level = -1; 7430static int default_relax_domain_level = -1;
7339 7431
7340static int __init setup_relax_domain_level(char *str) 7432static int __init setup_relax_domain_level(char *str)
@@ -7374,17 +7466,38 @@ static void set_domain_attribute(struct sched_domain *sd,
7374 * Build sched domains for a given set of cpus and attach the sched domains 7466 * Build sched domains for a given set of cpus and attach the sched domains
7375 * to the individual cpus 7467 * to the individual cpus
7376 */ 7468 */
7377static int __build_sched_domains(const cpumask_t *cpu_map, 7469static int __build_sched_domains(const struct cpumask *cpu_map,
7378 struct sched_domain_attr *attr) 7470 struct sched_domain_attr *attr)
7379{ 7471{
7380 int i; 7472 int i, err = -ENOMEM;
7381 struct root_domain *rd; 7473 struct root_domain *rd;
7382 SCHED_CPUMASK_DECLARE(allmasks); 7474 cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
7383 cpumask_t *tmpmask; 7475 tmpmask;
7384#ifdef CONFIG_NUMA 7476#ifdef CONFIG_NUMA
7477 cpumask_var_t domainspan, covered, notcovered;
7385 struct sched_group **sched_group_nodes = NULL; 7478 struct sched_group **sched_group_nodes = NULL;
7386 int sd_allnodes = 0; 7479 int sd_allnodes = 0;
7387 7480
7481 if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
7482 goto out;
7483 if (!alloc_cpumask_var(&covered, GFP_KERNEL))
7484 goto free_domainspan;
7485 if (!alloc_cpumask_var(&notcovered, GFP_KERNEL))
7486 goto free_covered;
7487#endif
7488
7489 if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
7490 goto free_notcovered;
7491 if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
7492 goto free_nodemask;
7493 if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
7494 goto free_this_sibling_map;
7495 if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
7496 goto free_this_core_map;
7497 if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
7498 goto free_send_covered;
7499
7500#ifdef CONFIG_NUMA
7388 /* 7501 /*
7389 * Allocate the per-node list of sched groups 7502 * Allocate the per-node list of sched groups
7390 */ 7503 */
@@ -7392,54 +7505,37 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7392 GFP_KERNEL); 7505 GFP_KERNEL);
7393 if (!sched_group_nodes) { 7506 if (!sched_group_nodes) {
7394 printk(KERN_WARNING "Can not alloc sched group node list\n"); 7507 printk(KERN_WARNING "Can not alloc sched group node list\n");
7395 return -ENOMEM; 7508 goto free_tmpmask;
7396 } 7509 }
7397#endif 7510#endif
7398 7511
7399 rd = alloc_rootdomain(); 7512 rd = alloc_rootdomain();
7400 if (!rd) { 7513 if (!rd) {
7401 printk(KERN_WARNING "Cannot alloc root domain\n"); 7514 printk(KERN_WARNING "Cannot alloc root domain\n");
7402#ifdef CONFIG_NUMA 7515 goto free_sched_groups;
7403 kfree(sched_group_nodes);
7404#endif
7405 return -ENOMEM;
7406 } 7516 }
7407 7517
7408 /* get space for all scratch cpumask variables */
7409 sched_cpumask_alloc(&allmasks);
7410 if (!allmasks) {
7411 printk(KERN_WARNING "Cannot alloc cpumask array\n");
7412 kfree(rd);
7413#ifdef CONFIG_NUMA 7518#ifdef CONFIG_NUMA
7414 kfree(sched_group_nodes); 7519 sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
7415#endif
7416 return -ENOMEM;
7417 }
7418
7419 tmpmask = (cpumask_t *)allmasks;
7420
7421
7422#ifdef CONFIG_NUMA
7423 sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
7424#endif 7520#endif
7425 7521
7426 /* 7522 /*
7427 * Set up domains for cpus specified by the cpu_map. 7523 * Set up domains for cpus specified by the cpu_map.
7428 */ 7524 */
7429 for_each_cpu_mask_nr(i, *cpu_map) { 7525 for_each_cpu(i, cpu_map) {
7430 struct sched_domain *sd = NULL, *p; 7526 struct sched_domain *sd = NULL, *p;
7431 SCHED_CPUMASK_VAR(nodemask, allmasks);
7432 7527
7528 /* FIXME: use cpumask_of_node */
7433 *nodemask = node_to_cpumask(cpu_to_node(i)); 7529 *nodemask = node_to_cpumask(cpu_to_node(i));
7434 cpus_and(*nodemask, *nodemask, *cpu_map); 7530 cpus_and(*nodemask, *nodemask, *cpu_map);
7435 7531
7436#ifdef CONFIG_NUMA 7532#ifdef CONFIG_NUMA
7437 if (cpus_weight(*cpu_map) > 7533 if (cpumask_weight(cpu_map) >
7438 SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { 7534 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
7439 sd = &per_cpu(allnodes_domains, i); 7535 sd = &per_cpu(allnodes_domains, i);
7440 SD_INIT(sd, ALLNODES); 7536 SD_INIT(sd, ALLNODES);
7441 set_domain_attribute(sd, attr); 7537 set_domain_attribute(sd, attr);
7442 sd->span = *cpu_map; 7538 cpumask_copy(sched_domain_span(sd), cpu_map);
7443 cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); 7539 cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
7444 p = sd; 7540 p = sd;
7445 sd_allnodes = 1; 7541 sd_allnodes = 1;
@@ -7449,18 +7545,19 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7449 sd = &per_cpu(node_domains, i); 7545 sd = &per_cpu(node_domains, i);
7450 SD_INIT(sd, NODE); 7546 SD_INIT(sd, NODE);
7451 set_domain_attribute(sd, attr); 7547 set_domain_attribute(sd, attr);
7452 sched_domain_node_span(cpu_to_node(i), &sd->span); 7548 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
7453 sd->parent = p; 7549 sd->parent = p;
7454 if (p) 7550 if (p)
7455 p->child = sd; 7551 p->child = sd;
7456 cpus_and(sd->span, sd->span, *cpu_map); 7552 cpumask_and(sched_domain_span(sd),
7553 sched_domain_span(sd), cpu_map);
7457#endif 7554#endif
7458 7555
7459 p = sd; 7556 p = sd;
7460 sd = &per_cpu(phys_domains, i); 7557 sd = &per_cpu(phys_domains, i).sd;
7461 SD_INIT(sd, CPU); 7558 SD_INIT(sd, CPU);
7462 set_domain_attribute(sd, attr); 7559 set_domain_attribute(sd, attr);
7463 sd->span = *nodemask; 7560 cpumask_copy(sched_domain_span(sd), nodemask);
7464 sd->parent = p; 7561 sd->parent = p;
7465 if (p) 7562 if (p)
7466 p->child = sd; 7563 p->child = sd;
@@ -7468,11 +7565,12 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7468 7565
7469#ifdef CONFIG_SCHED_MC 7566#ifdef CONFIG_SCHED_MC
7470 p = sd; 7567 p = sd;
7471 sd = &per_cpu(core_domains, i); 7568 sd = &per_cpu(core_domains, i).sd;
7472 SD_INIT(sd, MC); 7569 SD_INIT(sd, MC);
7473 set_domain_attribute(sd, attr); 7570 set_domain_attribute(sd, attr);
7474 sd->span = *cpu_coregroup_mask(i); 7571 *sched_domain_span(sd) = cpu_coregroup_map(i);
7475 cpus_and(sd->span, sd->span, *cpu_map); 7572 cpumask_and(sched_domain_span(sd),
7573 sched_domain_span(sd), cpu_map);
7476 sd->parent = p; 7574 sd->parent = p;
7477 p->child = sd; 7575 p->child = sd;
7478 cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); 7576 cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7480,11 +7578,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7480 7578
7481#ifdef CONFIG_SCHED_SMT 7579#ifdef CONFIG_SCHED_SMT
7482 p = sd; 7580 p = sd;
7483 sd = &per_cpu(cpu_domains, i); 7581 sd = &per_cpu(cpu_domains, i).sd;
7484 SD_INIT(sd, SIBLING); 7582 SD_INIT(sd, SIBLING);
7485 set_domain_attribute(sd, attr); 7583 set_domain_attribute(sd, attr);
7486 sd->span = per_cpu(cpu_sibling_map, i); 7584 cpumask_and(sched_domain_span(sd),
7487 cpus_and(sd->span, sd->span, *cpu_map); 7585 &per_cpu(cpu_sibling_map, i), cpu_map);
7488 sd->parent = p; 7586 sd->parent = p;
7489 p->child = sd; 7587 p->child = sd;
7490 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); 7588 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7493,13 +7591,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7493 7591
7494#ifdef CONFIG_SCHED_SMT 7592#ifdef CONFIG_SCHED_SMT
7495 /* Set up CPU (sibling) groups */ 7593 /* Set up CPU (sibling) groups */
7496 for_each_cpu_mask_nr(i, *cpu_map) { 7594 for_each_cpu(i, cpu_map) {
7497 SCHED_CPUMASK_VAR(this_sibling_map, allmasks); 7595 cpumask_and(this_sibling_map,
7498 SCHED_CPUMASK_VAR(send_covered, allmasks); 7596 &per_cpu(cpu_sibling_map, i), cpu_map);
7499 7597 if (i != cpumask_first(this_sibling_map))
7500 *this_sibling_map = per_cpu(cpu_sibling_map, i);
7501 cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
7502 if (i != first_cpu(*this_sibling_map))
7503 continue; 7598 continue;
7504 7599
7505 init_sched_build_groups(this_sibling_map, cpu_map, 7600 init_sched_build_groups(this_sibling_map, cpu_map,
@@ -7510,13 +7605,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7510 7605
7511#ifdef CONFIG_SCHED_MC 7606#ifdef CONFIG_SCHED_MC
7512 /* Set up multi-core groups */ 7607 /* Set up multi-core groups */
7513 for_each_cpu_mask_nr(i, *cpu_map) { 7608 for_each_cpu(i, cpu_map) {
7514 SCHED_CPUMASK_VAR(this_core_map, allmasks); 7609 /* FIXME: Use cpu_coregroup_mask */
7515 SCHED_CPUMASK_VAR(send_covered, allmasks); 7610 *this_core_map = cpu_coregroup_map(i);
7516
7517 *this_core_map = *cpu_coregroup_mask(i);
7518 cpus_and(*this_core_map, *this_core_map, *cpu_map); 7611 cpus_and(*this_core_map, *this_core_map, *cpu_map);
7519 if (i != first_cpu(*this_core_map)) 7612 if (i != cpumask_first(this_core_map))
7520 continue; 7613 continue;
7521 7614
7522 init_sched_build_groups(this_core_map, cpu_map, 7615 init_sched_build_groups(this_core_map, cpu_map,
@@ -7527,12 +7620,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7527 7620
7528 /* Set up physical groups */ 7621 /* Set up physical groups */
7529 for (i = 0; i < nr_node_ids; i++) { 7622 for (i = 0; i < nr_node_ids; i++) {
7530 SCHED_CPUMASK_VAR(nodemask, allmasks); 7623 /* FIXME: Use cpumask_of_node */
7531 SCHED_CPUMASK_VAR(send_covered, allmasks);
7532
7533 *nodemask = node_to_cpumask(i); 7624 *nodemask = node_to_cpumask(i);
7534 cpus_and(*nodemask, *nodemask, *cpu_map); 7625 cpus_and(*nodemask, *nodemask, *cpu_map);
7535 if (cpus_empty(*nodemask)) 7626 if (cpumask_empty(nodemask))
7536 continue; 7627 continue;
7537 7628
7538 init_sched_build_groups(nodemask, cpu_map, 7629 init_sched_build_groups(nodemask, cpu_map,
@@ -7543,8 +7634,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7543#ifdef CONFIG_NUMA 7634#ifdef CONFIG_NUMA
7544 /* Set up node groups */ 7635 /* Set up node groups */
7545 if (sd_allnodes) { 7636 if (sd_allnodes) {
7546 SCHED_CPUMASK_VAR(send_covered, allmasks);
7547
7548 init_sched_build_groups(cpu_map, cpu_map, 7637 init_sched_build_groups(cpu_map, cpu_map,
7549 &cpu_to_allnodes_group, 7638 &cpu_to_allnodes_group,
7550 send_covered, tmpmask); 7639 send_covered, tmpmask);
@@ -7553,58 +7642,58 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7553 for (i = 0; i < nr_node_ids; i++) { 7642 for (i = 0; i < nr_node_ids; i++) {
7554 /* Set up node groups */ 7643 /* Set up node groups */
7555 struct sched_group *sg, *prev; 7644 struct sched_group *sg, *prev;
7556 SCHED_CPUMASK_VAR(nodemask, allmasks);
7557 SCHED_CPUMASK_VAR(domainspan, allmasks);
7558 SCHED_CPUMASK_VAR(covered, allmasks);
7559 int j; 7645 int j;
7560 7646
7647 /* FIXME: Use cpumask_of_node */
7561 *nodemask = node_to_cpumask(i); 7648 *nodemask = node_to_cpumask(i);
7562 cpus_clear(*covered); 7649 cpumask_clear(covered);
7563 7650
7564 cpus_and(*nodemask, *nodemask, *cpu_map); 7651 cpus_and(*nodemask, *nodemask, *cpu_map);
7565 if (cpus_empty(*nodemask)) { 7652 if (cpumask_empty(nodemask)) {
7566 sched_group_nodes[i] = NULL; 7653 sched_group_nodes[i] = NULL;
7567 continue; 7654 continue;
7568 } 7655 }
7569 7656
7570 sched_domain_node_span(i, domainspan); 7657 sched_domain_node_span(i, domainspan);
7571 cpus_and(*domainspan, *domainspan, *cpu_map); 7658 cpumask_and(domainspan, domainspan, cpu_map);
7572 7659
7573 sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); 7660 sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
7661 GFP_KERNEL, i);
7574 if (!sg) { 7662 if (!sg) {
7575 printk(KERN_WARNING "Can not alloc domain group for " 7663 printk(KERN_WARNING "Can not alloc domain group for "
7576 "node %d\n", i); 7664 "node %d\n", i);
7577 goto error; 7665 goto error;
7578 } 7666 }
7579 sched_group_nodes[i] = sg; 7667 sched_group_nodes[i] = sg;
7580 for_each_cpu_mask_nr(j, *nodemask) { 7668 for_each_cpu(j, nodemask) {
7581 struct sched_domain *sd; 7669 struct sched_domain *sd;
7582 7670
7583 sd = &per_cpu(node_domains, j); 7671 sd = &per_cpu(node_domains, j);
7584 sd->groups = sg; 7672 sd->groups = sg;
7585 } 7673 }
7586 sg->__cpu_power = 0; 7674 sg->__cpu_power = 0;
7587 sg->cpumask = *nodemask; 7675 cpumask_copy(sched_group_cpus(sg), nodemask);
7588 sg->next = sg; 7676 sg->next = sg;
7589 cpus_or(*covered, *covered, *nodemask); 7677 cpumask_or(covered, covered, nodemask);
7590 prev = sg; 7678 prev = sg;
7591 7679
7592 for (j = 0; j < nr_node_ids; j++) { 7680 for (j = 0; j < nr_node_ids; j++) {
7593 SCHED_CPUMASK_VAR(notcovered, allmasks);
7594 int n = (i + j) % nr_node_ids; 7681 int n = (i + j) % nr_node_ids;
7682 /* FIXME: Use cpumask_of_node */
7595 node_to_cpumask_ptr(pnodemask, n); 7683 node_to_cpumask_ptr(pnodemask, n);
7596 7684
7597 cpus_complement(*notcovered, *covered); 7685 cpumask_complement(notcovered, covered);
7598 cpus_and(*tmpmask, *notcovered, *cpu_map); 7686 cpumask_and(tmpmask, notcovered, cpu_map);
7599 cpus_and(*tmpmask, *tmpmask, *domainspan); 7687 cpumask_and(tmpmask, tmpmask, domainspan);
7600 if (cpus_empty(*tmpmask)) 7688 if (cpumask_empty(tmpmask))
7601 break; 7689 break;
7602 7690
7603 cpus_and(*tmpmask, *tmpmask, *pnodemask); 7691 cpumask_and(tmpmask, tmpmask, pnodemask);
7604 if (cpus_empty(*tmpmask)) 7692 if (cpumask_empty(tmpmask))
7605 continue; 7693 continue;
7606 7694
7607 sg = kmalloc_node(sizeof(struct sched_group), 7695 sg = kmalloc_node(sizeof(struct sched_group) +
7696 cpumask_size(),
7608 GFP_KERNEL, i); 7697 GFP_KERNEL, i);
7609 if (!sg) { 7698 if (!sg) {
7610 printk(KERN_WARNING 7699 printk(KERN_WARNING
@@ -7612,9 +7701,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7612 goto error; 7701 goto error;
7613 } 7702 }
7614 sg->__cpu_power = 0; 7703 sg->__cpu_power = 0;
7615 sg->cpumask = *tmpmask; 7704 cpumask_copy(sched_group_cpus(sg), tmpmask);
7616 sg->next = prev->next; 7705 sg->next = prev->next;
7617 cpus_or(*covered, *covered, *tmpmask); 7706 cpumask_or(covered, covered, tmpmask);
7618 prev->next = sg; 7707 prev->next = sg;
7619 prev = sg; 7708 prev = sg;
7620 } 7709 }
@@ -7623,22 +7712,22 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7623 7712
7624 /* Calculate CPU power for physical packages and nodes */ 7713 /* Calculate CPU power for physical packages and nodes */
7625#ifdef CONFIG_SCHED_SMT 7714#ifdef CONFIG_SCHED_SMT
7626 for_each_cpu_mask_nr(i, *cpu_map) { 7715 for_each_cpu(i, cpu_map) {
7627 struct sched_domain *sd = &per_cpu(cpu_domains, i); 7716 struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
7628 7717
7629 init_sched_groups_power(i, sd); 7718 init_sched_groups_power(i, sd);
7630 } 7719 }
7631#endif 7720#endif
7632#ifdef CONFIG_SCHED_MC 7721#ifdef CONFIG_SCHED_MC
7633 for_each_cpu_mask_nr(i, *cpu_map) { 7722 for_each_cpu(i, cpu_map) {
7634 struct sched_domain *sd = &per_cpu(core_domains, i); 7723 struct sched_domain *sd = &per_cpu(core_domains, i).sd;
7635 7724
7636 init_sched_groups_power(i, sd); 7725 init_sched_groups_power(i, sd);
7637 } 7726 }
7638#endif 7727#endif
7639 7728
7640 for_each_cpu_mask_nr(i, *cpu_map) { 7729 for_each_cpu(i, cpu_map) {
7641 struct sched_domain *sd = &per_cpu(phys_domains, i); 7730 struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
7642 7731
7643 init_sched_groups_power(i, sd); 7732 init_sched_groups_power(i, sd);
7644 } 7733 }
@@ -7650,53 +7739,78 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7650 if (sd_allnodes) { 7739 if (sd_allnodes) {
7651 struct sched_group *sg; 7740 struct sched_group *sg;
7652 7741
7653 cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg, 7742 cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
7654 tmpmask); 7743 tmpmask);
7655 init_numa_sched_groups_power(sg); 7744 init_numa_sched_groups_power(sg);
7656 } 7745 }
7657#endif 7746#endif
7658 7747
7659 /* Attach the domains */ 7748 /* Attach the domains */
7660 for_each_cpu_mask_nr(i, *cpu_map) { 7749 for_each_cpu(i, cpu_map) {
7661 struct sched_domain *sd; 7750 struct sched_domain *sd;
7662#ifdef CONFIG_SCHED_SMT 7751#ifdef CONFIG_SCHED_SMT
7663 sd = &per_cpu(cpu_domains, i); 7752 sd = &per_cpu(cpu_domains, i).sd;
7664#elif defined(CONFIG_SCHED_MC) 7753#elif defined(CONFIG_SCHED_MC)
7665 sd = &per_cpu(core_domains, i); 7754 sd = &per_cpu(core_domains, i).sd;
7666#else 7755#else
7667 sd = &per_cpu(phys_domains, i); 7756 sd = &per_cpu(phys_domains, i).sd;
7668#endif 7757#endif
7669 cpu_attach_domain(sd, rd, i); 7758 cpu_attach_domain(sd, rd, i);
7670 } 7759 }
7671 7760
7672 sched_cpumask_free(allmasks); 7761 err = 0;
7673 return 0; 7762
7763free_tmpmask:
7764 free_cpumask_var(tmpmask);
7765free_send_covered:
7766 free_cpumask_var(send_covered);
7767free_this_core_map:
7768 free_cpumask_var(this_core_map);
7769free_this_sibling_map:
7770 free_cpumask_var(this_sibling_map);
7771free_nodemask:
7772 free_cpumask_var(nodemask);
7773free_notcovered:
7774#ifdef CONFIG_NUMA
7775 free_cpumask_var(notcovered);
7776free_covered:
7777 free_cpumask_var(covered);
7778free_domainspan:
7779 free_cpumask_var(domainspan);
7780out:
7781#endif
7782 return err;
7783
7784free_sched_groups:
7785#ifdef CONFIG_NUMA
7786 kfree(sched_group_nodes);
7787#endif
7788 goto free_tmpmask;
7674 7789
7675#ifdef CONFIG_NUMA 7790#ifdef CONFIG_NUMA
7676error: 7791error:
7677 free_sched_groups(cpu_map, tmpmask); 7792 free_sched_groups(cpu_map, tmpmask);
7678 sched_cpumask_free(allmasks); 7793 free_rootdomain(rd);
7679 kfree(rd); 7794 goto free_tmpmask;
7680 return -ENOMEM;
7681#endif 7795#endif
7682} 7796}
7683 7797
7684static int build_sched_domains(const cpumask_t *cpu_map) 7798static int build_sched_domains(const struct cpumask *cpu_map)
7685{ 7799{
7686 return __build_sched_domains(cpu_map, NULL); 7800 return __build_sched_domains(cpu_map, NULL);
7687} 7801}
7688 7802
7689static cpumask_t *doms_cur; /* current sched domains */ 7803static struct cpumask *doms_cur; /* current sched domains */
7690static int ndoms_cur; /* number of sched domains in 'doms_cur' */ 7804static int ndoms_cur; /* number of sched domains in 'doms_cur' */
7691static struct sched_domain_attr *dattr_cur; 7805static struct sched_domain_attr *dattr_cur;
7692 /* attribues of custom domains in 'doms_cur' */ 7806 /* attribues of custom domains in 'doms_cur' */
7693 7807
7694/* 7808/*
7695 * Special case: If a kmalloc of a doms_cur partition (array of 7809 * Special case: If a kmalloc of a doms_cur partition (array of
7696 * cpumask_t) fails, then fallback to a single sched domain, 7810 * cpumask) fails, then fallback to a single sched domain,
7697 * as determined by the single cpumask_t fallback_doms. 7811 * as determined by the single cpumask fallback_doms.
7698 */ 7812 */
7699static cpumask_t fallback_doms; 7813static cpumask_var_t fallback_doms;
7700 7814
7701/* 7815/*
7702 * arch_update_cpu_topology lets virtualized architectures update the 7816 * arch_update_cpu_topology lets virtualized architectures update the
@@ -7713,16 +7827,16 @@ int __attribute__((weak)) arch_update_cpu_topology(void)
7713 * For now this just excludes isolated cpus, but could be used to 7827 * For now this just excludes isolated cpus, but could be used to
7714 * exclude other special cases in the future. 7828 * exclude other special cases in the future.
7715 */ 7829 */
7716static int arch_init_sched_domains(const cpumask_t *cpu_map) 7830static int arch_init_sched_domains(const struct cpumask *cpu_map)
7717{ 7831{
7718 int err; 7832 int err;
7719 7833
7720 arch_update_cpu_topology(); 7834 arch_update_cpu_topology();
7721 ndoms_cur = 1; 7835 ndoms_cur = 1;
7722 doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 7836 doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
7723 if (!doms_cur) 7837 if (!doms_cur)
7724 doms_cur = &fallback_doms; 7838 doms_cur = fallback_doms;
7725 cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); 7839 cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
7726 dattr_cur = NULL; 7840 dattr_cur = NULL;
7727 err = build_sched_domains(doms_cur); 7841 err = build_sched_domains(doms_cur);
7728 register_sched_domain_sysctl(); 7842 register_sched_domain_sysctl();
@@ -7730,8 +7844,8 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
7730 return err; 7844 return err;
7731} 7845}
7732 7846
7733static void arch_destroy_sched_domains(const cpumask_t *cpu_map, 7847static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
7734 cpumask_t *tmpmask) 7848 struct cpumask *tmpmask)
7735{ 7849{
7736 free_sched_groups(cpu_map, tmpmask); 7850 free_sched_groups(cpu_map, tmpmask);
7737} 7851}
@@ -7740,15 +7854,16 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
7740 * Detach sched domains from a group of cpus specified in cpu_map 7854 * Detach sched domains from a group of cpus specified in cpu_map
7741 * These cpus will now be attached to the NULL domain 7855 * These cpus will now be attached to the NULL domain
7742 */ 7856 */
7743static void detach_destroy_domains(const cpumask_t *cpu_map) 7857static void detach_destroy_domains(const struct cpumask *cpu_map)
7744{ 7858{
7745 cpumask_t tmpmask; 7859 /* Save because hotplug lock held. */
7860 static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
7746 int i; 7861 int i;
7747 7862
7748 for_each_cpu_mask_nr(i, *cpu_map) 7863 for_each_cpu(i, cpu_map)
7749 cpu_attach_domain(NULL, &def_root_domain, i); 7864 cpu_attach_domain(NULL, &def_root_domain, i);
7750 synchronize_sched(); 7865 synchronize_sched();
7751 arch_destroy_sched_domains(cpu_map, &tmpmask); 7866 arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
7752} 7867}
7753 7868
7754/* handle null as "default" */ 7869/* handle null as "default" */
@@ -7773,7 +7888,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7773 * doms_new[] to the current sched domain partitioning, doms_cur[]. 7888 * doms_new[] to the current sched domain partitioning, doms_cur[].
7774 * It destroys each deleted domain and builds each new domain. 7889 * It destroys each deleted domain and builds each new domain.
7775 * 7890 *
7776 * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. 7891 * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
7777 * The masks don't intersect (don't overlap.) We should setup one 7892 * The masks don't intersect (don't overlap.) We should setup one
7778 * sched domain for each mask. CPUs not in any of the cpumasks will 7893 * sched domain for each mask. CPUs not in any of the cpumasks will
7779 * not be load balanced. If the same cpumask appears both in the 7894 * not be load balanced. If the same cpumask appears both in the
@@ -7787,13 +7902,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7787 * the single partition 'fallback_doms', it also forces the domains 7902 * the single partition 'fallback_doms', it also forces the domains
7788 * to be rebuilt. 7903 * to be rebuilt.
7789 * 7904 *
7790 * If doms_new == NULL it will be replaced with cpu_online_map. 7905 * If doms_new == NULL it will be replaced with cpu_online_mask.
7791 * ndoms_new == 0 is a special case for destroying existing domains, 7906 * ndoms_new == 0 is a special case for destroying existing domains,
7792 * and it will not create the default domain. 7907 * and it will not create the default domain.
7793 * 7908 *
7794 * Call with hotplug lock held 7909 * Call with hotplug lock held
7795 */ 7910 */
7796void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, 7911/* FIXME: Change to struct cpumask *doms_new[] */
7912void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
7797 struct sched_domain_attr *dattr_new) 7913 struct sched_domain_attr *dattr_new)
7798{ 7914{
7799 int i, j, n; 7915 int i, j, n;
@@ -7812,7 +7928,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7812 /* Destroy deleted domains */ 7928 /* Destroy deleted domains */
7813 for (i = 0; i < ndoms_cur; i++) { 7929 for (i = 0; i < ndoms_cur; i++) {
7814 for (j = 0; j < n && !new_topology; j++) { 7930 for (j = 0; j < n && !new_topology; j++) {
7815 if (cpus_equal(doms_cur[i], doms_new[j]) 7931 if (cpumask_equal(&doms_cur[i], &doms_new[j])
7816 && dattrs_equal(dattr_cur, i, dattr_new, j)) 7932 && dattrs_equal(dattr_cur, i, dattr_new, j))
7817 goto match1; 7933 goto match1;
7818 } 7934 }
@@ -7824,15 +7940,15 @@ match1:
7824 7940
7825 if (doms_new == NULL) { 7941 if (doms_new == NULL) {
7826 ndoms_cur = 0; 7942 ndoms_cur = 0;
7827 doms_new = &fallback_doms; 7943 doms_new = fallback_doms;
7828 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); 7944 cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
7829 WARN_ON_ONCE(dattr_new); 7945 WARN_ON_ONCE(dattr_new);
7830 } 7946 }
7831 7947
7832 /* Build new domains */ 7948 /* Build new domains */
7833 for (i = 0; i < ndoms_new; i++) { 7949 for (i = 0; i < ndoms_new; i++) {
7834 for (j = 0; j < ndoms_cur && !new_topology; j++) { 7950 for (j = 0; j < ndoms_cur && !new_topology; j++) {
7835 if (cpus_equal(doms_new[i], doms_cur[j]) 7951 if (cpumask_equal(&doms_new[i], &doms_cur[j])
7836 && dattrs_equal(dattr_new, i, dattr_cur, j)) 7952 && dattrs_equal(dattr_new, i, dattr_cur, j))
7837 goto match2; 7953 goto match2;
7838 } 7954 }
@@ -7844,7 +7960,7 @@ match2:
7844 } 7960 }
7845 7961
7846 /* Remember the new sched domains */ 7962 /* Remember the new sched domains */
7847 if (doms_cur != &fallback_doms) 7963 if (doms_cur != fallback_doms)
7848 kfree(doms_cur); 7964 kfree(doms_cur);
7849 kfree(dattr_cur); /* kfree(NULL) is safe */ 7965 kfree(dattr_cur); /* kfree(NULL) is safe */
7850 doms_cur = doms_new; 7966 doms_cur = doms_new;
@@ -7873,14 +7989,25 @@ int arch_reinit_sched_domains(void)
7873static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) 7989static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
7874{ 7990{
7875 int ret; 7991 int ret;
7992 unsigned int level = 0;
7876 7993
7877 if (buf[0] != '0' && buf[0] != '1') 7994 if (sscanf(buf, "%u", &level) != 1)
7995 return -EINVAL;
7996
7997 /*
7998 * level is always be positive so don't check for
7999 * level < POWERSAVINGS_BALANCE_NONE which is 0
8000 * What happens on 0 or 1 byte write,
8001 * need to check for count as well?
8002 */
8003
8004 if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
7878 return -EINVAL; 8005 return -EINVAL;
7879 8006
7880 if (smt) 8007 if (smt)
7881 sched_smt_power_savings = (buf[0] == '1'); 8008 sched_smt_power_savings = level;
7882 else 8009 else
7883 sched_mc_power_savings = (buf[0] == '1'); 8010 sched_mc_power_savings = level;
7884 8011
7885 ret = arch_reinit_sched_domains(); 8012 ret = arch_reinit_sched_domains();
7886 8013
@@ -7984,7 +8111,9 @@ static int update_runtime(struct notifier_block *nfb,
7984 8111
7985void __init sched_init_smp(void) 8112void __init sched_init_smp(void)
7986{ 8113{
7987 cpumask_t non_isolated_cpus; 8114 cpumask_var_t non_isolated_cpus;
8115
8116 alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
7988 8117
7989#if defined(CONFIG_NUMA) 8118#if defined(CONFIG_NUMA)
7990 sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), 8119 sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
@@ -7993,10 +8122,10 @@ void __init sched_init_smp(void)
7993#endif 8122#endif
7994 get_online_cpus(); 8123 get_online_cpus();
7995 mutex_lock(&sched_domains_mutex); 8124 mutex_lock(&sched_domains_mutex);
7996 arch_init_sched_domains(&cpu_online_map); 8125 arch_init_sched_domains(cpu_online_mask);
7997 cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); 8126 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
7998 if (cpus_empty(non_isolated_cpus)) 8127 if (cpumask_empty(non_isolated_cpus))
7999 cpu_set(smp_processor_id(), non_isolated_cpus); 8128 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
8000 mutex_unlock(&sched_domains_mutex); 8129 mutex_unlock(&sched_domains_mutex);
8001 put_online_cpus(); 8130 put_online_cpus();
8002 8131
@@ -8011,9 +8140,13 @@ void __init sched_init_smp(void)
8011 init_hrtick(); 8140 init_hrtick();
8012 8141
8013 /* Move init over to a non-isolated CPU */ 8142 /* Move init over to a non-isolated CPU */
8014 if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) 8143 if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
8015 BUG(); 8144 BUG();
8016 sched_init_granularity(); 8145 sched_init_granularity();
8146 free_cpumask_var(non_isolated_cpus);
8147
8148 alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
8149 init_sched_rt_class();
8017} 8150}
8018#else 8151#else
8019void __init sched_init_smp(void) 8152void __init sched_init_smp(void)
@@ -8328,6 +8461,15 @@ void __init sched_init(void)
8328 */ 8461 */
8329 current->sched_class = &fair_sched_class; 8462 current->sched_class = &fair_sched_class;
8330 8463
8464 /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
8465 alloc_bootmem_cpumask_var(&nohz_cpu_mask);
8466#ifdef CONFIG_SMP
8467#ifdef CONFIG_NO_HZ
8468 alloc_bootmem_cpumask_var(&nohz.cpu_mask);
8469#endif
8470 alloc_bootmem_cpumask_var(&cpu_isolated_map);
8471#endif /* SMP */
8472
8331 scheduler_running = 1; 8473 scheduler_running = 1;
8332} 8474}
8333 8475
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 52154fefab7e..018b7be1db2e 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -67,24 +67,21 @@ static int convert_prio(int prio)
67 * Returns: (int)bool - CPUs were found 67 * Returns: (int)bool - CPUs were found
68 */ 68 */
69int cpupri_find(struct cpupri *cp, struct task_struct *p, 69int cpupri_find(struct cpupri *cp, struct task_struct *p,
70 cpumask_t *lowest_mask) 70 struct cpumask *lowest_mask)
71{ 71{
72 int idx = 0; 72 int idx = 0;
73 int task_pri = convert_prio(p->prio); 73 int task_pri = convert_prio(p->prio);
74 74
75 for_each_cpupri_active(cp->pri_active, idx) { 75 for_each_cpupri_active(cp->pri_active, idx) {
76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; 76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
77 cpumask_t mask;
78 77
79 if (idx >= task_pri) 78 if (idx >= task_pri)
80 break; 79 break;
81 80
82 cpus_and(mask, p->cpus_allowed, vec->mask); 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
83
84 if (cpus_empty(mask))
85 continue; 82 continue;
86 83
87 *lowest_mask = mask; 84 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
88 return 1; 85 return 1;
89 } 86 }
90 87
@@ -126,7 +123,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
126 vec->count--; 123 vec->count--;
127 if (!vec->count) 124 if (!vec->count)
128 clear_bit(oldpri, cp->pri_active); 125 clear_bit(oldpri, cp->pri_active);
129 cpu_clear(cpu, vec->mask); 126 cpumask_clear_cpu(cpu, vec->mask);
130 127
131 spin_unlock_irqrestore(&vec->lock, flags); 128 spin_unlock_irqrestore(&vec->lock, flags);
132 } 129 }
@@ -136,7 +133,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
136 133
137 spin_lock_irqsave(&vec->lock, flags); 134 spin_lock_irqsave(&vec->lock, flags);
138 135
139 cpu_set(cpu, vec->mask); 136 cpumask_set_cpu(cpu, vec->mask);
140 vec->count++; 137 vec->count++;
141 if (vec->count == 1) 138 if (vec->count == 1)
142 set_bit(newpri, cp->pri_active); 139 set_bit(newpri, cp->pri_active);
@@ -150,10 +147,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
150/** 147/**
151 * cpupri_init - initialize the cpupri structure 148 * cpupri_init - initialize the cpupri structure
152 * @cp: The cpupri context 149 * @cp: The cpupri context
150 * @bootmem: true if allocations need to use bootmem
153 * 151 *
154 * Returns: (void) 152 * Returns: -ENOMEM if memory fails.
155 */ 153 */
156void cpupri_init(struct cpupri *cp) 154int cpupri_init(struct cpupri *cp, bool bootmem)
157{ 155{
158 int i; 156 int i;
159 157
@@ -164,11 +162,30 @@ void cpupri_init(struct cpupri *cp)
164 162
165 spin_lock_init(&vec->lock); 163 spin_lock_init(&vec->lock);
166 vec->count = 0; 164 vec->count = 0;
167 cpus_clear(vec->mask); 165 if (bootmem)
166 alloc_bootmem_cpumask_var(&vec->mask);
167 else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
168 goto cleanup;
168 } 169 }
169 170
170 for_each_possible_cpu(i) 171 for_each_possible_cpu(i)
171 cp->cpu_to_pri[i] = CPUPRI_INVALID; 172 cp->cpu_to_pri[i] = CPUPRI_INVALID;
173 return 0;
174
175cleanup:
176 for (i--; i >= 0; i--)
177 free_cpumask_var(cp->pri_to_cpu[i].mask);
178 return -ENOMEM;
172} 179}
173 180
181/**
182 * cpupri_cleanup - clean up the cpupri structure
183 * @cp: The cpupri context
184 */
185void cpupri_cleanup(struct cpupri *cp)
186{
187 int i;
174 188
189 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
190 free_cpumask_var(cp->pri_to_cpu[i].mask);
191}
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index f25811b0f931..642a94ef8a0a 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -14,7 +14,7 @@
14struct cpupri_vec { 14struct cpupri_vec {
15 spinlock_t lock; 15 spinlock_t lock;
16 int count; 16 int count;
17 cpumask_t mask; 17 cpumask_var_t mask;
18}; 18};
19 19
20struct cpupri { 20struct cpupri {
@@ -27,7 +27,8 @@ struct cpupri {
27int cpupri_find(struct cpupri *cp, 27int cpupri_find(struct cpupri *cp,
28 struct task_struct *p, cpumask_t *lowest_mask); 28 struct task_struct *p, cpumask_t *lowest_mask);
29void cpupri_set(struct cpupri *cp, int cpu, int pri); 29void cpupri_set(struct cpupri *cp, int cpu, int pri);
30void cpupri_init(struct cpupri *cp); 30int cpupri_init(struct cpupri *cp, bool bootmem);
31void cpupri_cleanup(struct cpupri *cp);
31#else 32#else
32#define cpupri_set(cp, cpu, pri) do { } while (0) 33#define cpupri_set(cp, cpu, pri) do { } while (0)
33#define cpupri_init() do { } while (0) 34#define cpupri_init() do { } while (0)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5ad4440f0fc4..56c0efe902a7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1019,16 +1019,33 @@ static void yield_task_fair(struct rq *rq)
1019 * search starts with cpus closest then further out as needed, 1019 * search starts with cpus closest then further out as needed,
1020 * so we always favor a closer, idle cpu. 1020 * so we always favor a closer, idle cpu.
1021 * Domains may include CPUs that are not usable for migration, 1021 * Domains may include CPUs that are not usable for migration,
1022 * hence we need to mask them out (cpu_active_map) 1022 * hence we need to mask them out (cpu_active_mask)
1023 * 1023 *
1024 * Returns the CPU we should wake onto. 1024 * Returns the CPU we should wake onto.
1025 */ 1025 */
1026#if defined(ARCH_HAS_SCHED_WAKE_IDLE) 1026#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1027static int wake_idle(int cpu, struct task_struct *p) 1027static int wake_idle(int cpu, struct task_struct *p)
1028{ 1028{
1029 cpumask_t tmp;
1030 struct sched_domain *sd; 1029 struct sched_domain *sd;
1031 int i; 1030 int i;
1031 unsigned int chosen_wakeup_cpu;
1032 int this_cpu;
1033
1034 /*
1035 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
1036 * are idle and this is not a kernel thread and this task's affinity
1037 * allows it to be moved to preferred cpu, then just move!
1038 */
1039
1040 this_cpu = smp_processor_id();
1041 chosen_wakeup_cpu =
1042 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
1043
1044 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
1045 idle_cpu(cpu) && idle_cpu(this_cpu) &&
1046 p->mm && !(p->flags & PF_KTHREAD) &&
1047 cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
1048 return chosen_wakeup_cpu;
1032 1049
1033 /* 1050 /*
1034 * If it is idle, then it is the best cpu to run this task. 1051 * If it is idle, then it is the best cpu to run this task.
@@ -1046,10 +1063,9 @@ static int wake_idle(int cpu, struct task_struct *p)
1046 if ((sd->flags & SD_WAKE_IDLE) 1063 if ((sd->flags & SD_WAKE_IDLE)
1047 || ((sd->flags & SD_WAKE_IDLE_FAR) 1064 || ((sd->flags & SD_WAKE_IDLE_FAR)
1048 && !task_hot(p, task_rq(p)->clock, sd))) { 1065 && !task_hot(p, task_rq(p)->clock, sd))) {
1049 cpus_and(tmp, sd->span, p->cpus_allowed); 1066 for_each_cpu_and(i, sched_domain_span(sd),
1050 cpus_and(tmp, tmp, cpu_active_map); 1067 &p->cpus_allowed) {
1051 for_each_cpu_mask_nr(i, tmp) { 1068 if (cpu_active(i) && idle_cpu(i)) {
1052 if (idle_cpu(i)) {
1053 if (i != task_cpu(p)) { 1069 if (i != task_cpu(p)) {
1054 schedstat_inc(p, 1070 schedstat_inc(p,
1055 se.nr_wakeups_idle); 1071 se.nr_wakeups_idle);
@@ -1242,13 +1258,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
1242 * this_cpu and prev_cpu are present in: 1258 * this_cpu and prev_cpu are present in:
1243 */ 1259 */
1244 for_each_domain(this_cpu, sd) { 1260 for_each_domain(this_cpu, sd) {
1245 if (cpu_isset(prev_cpu, sd->span)) { 1261 if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
1246 this_sd = sd; 1262 this_sd = sd;
1247 break; 1263 break;
1248 } 1264 }
1249 } 1265 }
1250 1266
1251 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) 1267 if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
1252 goto out; 1268 goto out;
1253 1269
1254 /* 1270 /*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 51d2af3e6191..833b6d44483c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq)
15 if (!rq->online) 15 if (!rq->online)
16 return; 16 return;
17 17
18 cpu_set(rq->cpu, rq->rd->rto_mask); 18 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
19 /* 19 /*
20 * Make sure the mask is visible before we set 20 * Make sure the mask is visible before we set
21 * the overload count. That is checked to determine 21 * the overload count. That is checked to determine
@@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq)
34 34
35 /* the order here really doesn't matter */ 35 /* the order here really doesn't matter */
36 atomic_dec(&rq->rd->rto_count); 36 atomic_dec(&rq->rd->rto_count);
37 cpu_clear(rq->cpu, rq->rd->rto_mask); 37 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
38} 38}
39 39
40static void update_rt_migration(struct rq *rq) 40static void update_rt_migration(struct rq *rq)
@@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
139} 139}
140 140
141#ifdef CONFIG_SMP 141#ifdef CONFIG_SMP
142static inline cpumask_t sched_rt_period_mask(void) 142static inline const struct cpumask *sched_rt_period_mask(void)
143{ 143{
144 return cpu_rq(smp_processor_id())->rd->span; 144 return cpu_rq(smp_processor_id())->rd->span;
145} 145}
146#else 146#else
147static inline cpumask_t sched_rt_period_mask(void) 147static inline const struct cpumask *sched_rt_period_mask(void)
148{ 148{
149 return cpu_online_map; 149 return cpu_online_mask;
150} 150}
151#endif 151#endif
152 152
@@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
212 return rt_rq->rt_throttled; 212 return rt_rq->rt_throttled;
213} 213}
214 214
215static inline cpumask_t sched_rt_period_mask(void) 215static inline const struct cpumask *sched_rt_period_mask(void)
216{ 216{
217 return cpu_online_map; 217 return cpu_online_mask;
218} 218}
219 219
220static inline 220static inline
@@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
241 int i, weight, more = 0; 241 int i, weight, more = 0;
242 u64 rt_period; 242 u64 rt_period;
243 243
244 weight = cpus_weight(rd->span); 244 weight = cpumask_weight(rd->span);
245 245
246 spin_lock(&rt_b->rt_runtime_lock); 246 spin_lock(&rt_b->rt_runtime_lock);
247 rt_period = ktime_to_ns(rt_b->rt_period); 247 rt_period = ktime_to_ns(rt_b->rt_period);
248 for_each_cpu_mask_nr(i, rd->span) { 248 for_each_cpu(i, rd->span) {
249 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 249 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
250 s64 diff; 250 s64 diff;
251 251
@@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq)
324 /* 324 /*
325 * Greedy reclaim, take back as much as we can. 325 * Greedy reclaim, take back as much as we can.
326 */ 326 */
327 for_each_cpu_mask(i, rd->span) { 327 for_each_cpu(i, rd->span) {
328 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 328 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
329 s64 diff; 329 s64 diff;
330 330
@@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq)
429static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 429static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
430{ 430{
431 int i, idle = 1; 431 int i, idle = 1;
432 cpumask_t span; 432 const struct cpumask *span;
433 433
434 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 434 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
435 return 1; 435 return 1;
436 436
437 span = sched_rt_period_mask(); 437 span = sched_rt_period_mask();
438 for_each_cpu_mask(i, span) { 438 for_each_cpu(i, span) {
439 int enqueue = 0; 439 int enqueue = 0;
440 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 440 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
441 struct rq *rq = rq_of_rt_rq(rt_rq); 441 struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -805,17 +805,20 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
805 805
806static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 806static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
807{ 807{
808 cpumask_t mask; 808 cpumask_var_t mask;
809 809
810 if (rq->curr->rt.nr_cpus_allowed == 1) 810 if (rq->curr->rt.nr_cpus_allowed == 1)
811 return; 811 return;
812 812
813 if (p->rt.nr_cpus_allowed != 1 813 if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
814 && cpupri_find(&rq->rd->cpupri, p, &mask))
815 return; 814 return;
816 815
817 if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) 816 if (p->rt.nr_cpus_allowed != 1
818 return; 817 && cpupri_find(&rq->rd->cpupri, p, mask))
818 goto free;
819
820 if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
821 goto free;
819 822
820 /* 823 /*
821 * There appears to be other cpus that can accept 824 * There appears to be other cpus that can accept
@@ -824,6 +827,8 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
824 */ 827 */
825 requeue_task_rt(rq, p, 1); 828 requeue_task_rt(rq, p, 1);
826 resched_task(rq->curr); 829 resched_task(rq->curr);
830free:
831 free_cpumask_var(mask);
827} 832}
828 833
829#endif /* CONFIG_SMP */ 834#endif /* CONFIG_SMP */
@@ -914,7 +919,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
914static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 919static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
915{ 920{
916 if (!task_running(rq, p) && 921 if (!task_running(rq, p) &&
917 (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) && 922 (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
918 (p->rt.nr_cpus_allowed > 1)) 923 (p->rt.nr_cpus_allowed > 1))
919 return 1; 924 return 1;
920 return 0; 925 return 0;
@@ -953,7 +958,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
953 return next; 958 return next;
954} 959}
955 960
956static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); 961static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
957 962
958static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) 963static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
959{ 964{
@@ -973,7 +978,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
973static int find_lowest_rq(struct task_struct *task) 978static int find_lowest_rq(struct task_struct *task)
974{ 979{
975 struct sched_domain *sd; 980 struct sched_domain *sd;
976 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); 981 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
977 int this_cpu = smp_processor_id(); 982 int this_cpu = smp_processor_id();
978 int cpu = task_cpu(task); 983 int cpu = task_cpu(task);
979 984
@@ -988,7 +993,7 @@ static int find_lowest_rq(struct task_struct *task)
988 * I guess we might want to change cpupri_find() to ignore those 993 * I guess we might want to change cpupri_find() to ignore those
989 * in the first place. 994 * in the first place.
990 */ 995 */
991 cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); 996 cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
992 997
993 /* 998 /*
994 * At this point we have built a mask of cpus representing the 999 * At this point we have built a mask of cpus representing the
@@ -998,7 +1003,7 @@ static int find_lowest_rq(struct task_struct *task)
998 * We prioritize the last cpu that the task executed on since 1003 * We prioritize the last cpu that the task executed on since
999 * it is most likely cache-hot in that location. 1004 * it is most likely cache-hot in that location.
1000 */ 1005 */
1001 if (cpu_isset(cpu, *lowest_mask)) 1006 if (cpumask_test_cpu(cpu, lowest_mask))
1002 return cpu; 1007 return cpu;
1003 1008
1004 /* 1009 /*
@@ -1013,7 +1018,8 @@ static int find_lowest_rq(struct task_struct *task)
1013 cpumask_t domain_mask; 1018 cpumask_t domain_mask;
1014 int best_cpu; 1019 int best_cpu;
1015 1020
1016 cpus_and(domain_mask, sd->span, *lowest_mask); 1021 cpumask_and(&domain_mask, sched_domain_span(sd),
1022 lowest_mask);
1017 1023
1018 best_cpu = pick_optimal_cpu(this_cpu, 1024 best_cpu = pick_optimal_cpu(this_cpu,
1019 &domain_mask); 1025 &domain_mask);
@@ -1054,8 +1060,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1054 * Also make sure that it wasn't scheduled on its rq. 1060 * Also make sure that it wasn't scheduled on its rq.
1055 */ 1061 */
1056 if (unlikely(task_rq(task) != rq || 1062 if (unlikely(task_rq(task) != rq ||
1057 !cpu_isset(lowest_rq->cpu, 1063 !cpumask_test_cpu(lowest_rq->cpu,
1058 task->cpus_allowed) || 1064 &task->cpus_allowed) ||
1059 task_running(rq, task) || 1065 task_running(rq, task) ||
1060 !task->se.on_rq)) { 1066 !task->se.on_rq)) {
1061 1067
@@ -1176,7 +1182,7 @@ static int pull_rt_task(struct rq *this_rq)
1176 1182
1177 next = pick_next_task_rt(this_rq); 1183 next = pick_next_task_rt(this_rq);
1178 1184
1179 for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { 1185 for_each_cpu(cpu, this_rq->rd->rto_mask) {
1180 if (this_cpu == cpu) 1186 if (this_cpu == cpu)
1181 continue; 1187 continue;
1182 1188
@@ -1305,9 +1311,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
1305} 1311}
1306 1312
1307static void set_cpus_allowed_rt(struct task_struct *p, 1313static void set_cpus_allowed_rt(struct task_struct *p,
1308 const cpumask_t *new_mask) 1314 const struct cpumask *new_mask)
1309{ 1315{
1310 int weight = cpus_weight(*new_mask); 1316 int weight = cpumask_weight(new_mask);
1311 1317
1312 BUG_ON(!rt_task(p)); 1318 BUG_ON(!rt_task(p));
1313 1319
@@ -1328,7 +1334,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1328 update_rt_migration(rq); 1334 update_rt_migration(rq);
1329 } 1335 }
1330 1336
1331 p->cpus_allowed = *new_mask; 1337 cpumask_copy(&p->cpus_allowed, new_mask);
1332 p->rt.nr_cpus_allowed = weight; 1338 p->rt.nr_cpus_allowed = weight;
1333} 1339}
1334 1340
@@ -1371,6 +1377,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
1371 if (!rq->rt.rt_nr_running) 1377 if (!rq->rt.rt_nr_running)
1372 pull_rt_task(rq); 1378 pull_rt_task(rq);
1373} 1379}
1380
1381static inline void init_sched_rt_class(void)
1382{
1383 unsigned int i;
1384
1385 for_each_possible_cpu(i)
1386 alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
1387}
1374#endif /* CONFIG_SMP */ 1388#endif /* CONFIG_SMP */
1375 1389
1376/* 1390/*
@@ -1541,3 +1555,4 @@ static void print_rt_stats(struct seq_file *m, int cpu)
1541 rcu_read_unlock(); 1555 rcu_read_unlock();
1542} 1556}
1543#endif /* CONFIG_SCHED_DEBUG */ 1557#endif /* CONFIG_SCHED_DEBUG */
1558
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index b59fd9cdc1b6..f2773b5d1226 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -42,7 +42,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
42 for_each_domain(cpu, sd) { 42 for_each_domain(cpu, sd) {
43 enum cpu_idle_type itype; 43 enum cpu_idle_type itype;
44 44
45 cpumask_scnprintf(mask_str, mask_len, &sd->span); 45 cpumask_scnprintf(mask_str, mask_len,
46 sched_domain_span(sd));
46 seq_printf(seq, "domain%d %s", dcount++, mask_str); 47 seq_printf(seq, "domain%d %s", dcount++, mask_str);
47 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; 48 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
48 itype++) { 49 itype++) {
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 8f3fc2582d38..76a574bbef97 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void)
144 if (!ts->tick_stopped) 144 if (!ts->tick_stopped)
145 return; 145 return;
146 146
147 cpu_clear(cpu, nohz_cpu_mask); 147 cpumask_clear_cpu(cpu, nohz_cpu_mask);
148 now = ktime_get(); 148 now = ktime_get();
149 ts->idle_waketime = now; 149 ts->idle_waketime = now;
150 150
@@ -301,7 +301,7 @@ void tick_nohz_stop_sched_tick(int inidle)
301 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 301 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
302 302
303 if (delta_jiffies > 1) 303 if (delta_jiffies > 1)
304 cpu_set(cpu, nohz_cpu_mask); 304 cpumask_set_cpu(cpu, nohz_cpu_mask);
305 305
306 /* Skip reprogram of event if its not changed */ 306 /* Skip reprogram of event if its not changed */
307 if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) 307 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
@@ -319,7 +319,7 @@ void tick_nohz_stop_sched_tick(int inidle)
319 /* 319 /*
320 * sched tick not stopped! 320 * sched tick not stopped!
321 */ 321 */
322 cpu_clear(cpu, nohz_cpu_mask); 322 cpumask_clear_cpu(cpu, nohz_cpu_mask);
323 goto out; 323 goto out;
324 } 324 }
325 325
@@ -361,7 +361,7 @@ void tick_nohz_stop_sched_tick(int inidle)
361 * softirq. 361 * softirq.
362 */ 362 */
363 tick_do_update_jiffies64(ktime_get()); 363 tick_do_update_jiffies64(ktime_get());
364 cpu_clear(cpu, nohz_cpu_mask); 364 cpumask_clear_cpu(cpu, nohz_cpu_mask);
365 } 365 }
366 raise_softirq_irqoff(TIMER_SOFTIRQ); 366 raise_softirq_irqoff(TIMER_SOFTIRQ);
367out: 367out:
@@ -439,7 +439,7 @@ void tick_nohz_restart_sched_tick(void)
439 select_nohz_load_balancer(0); 439 select_nohz_load_balancer(0);
440 now = ktime_get(); 440 now = ktime_get();
441 tick_do_update_jiffies64(now); 441 tick_do_update_jiffies64(now);
442 cpu_clear(cpu, nohz_cpu_mask); 442 cpumask_clear_cpu(cpu, nohz_cpu_mask);
443 443
444 /* 444 /*
445 * We stopped the tick in idle. Update process times would miss the 445 * We stopped the tick in idle. Update process times would miss the