aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-02 16:41:36 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-02 16:41:36 -0500
commit923a789b49c7269a0245d5af6afe486188d940df (patch)
treec3f168427372e64f7467a794f313416da5086ba0 /arch/x86/kernel
parent103ceffb9501531f6931df6aebc11a05189201f0 (diff)
parentb840d79631c882786925303c2b0f4fefc31845ed (diff)
Merge branch 'linus' into x86/cleanups
Conflicts: arch/x86/kernel/reboot.c
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/apic.c34
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c45
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c108
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h18
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/genapic_flat_64.c107
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c81
-rw-r--r--arch/x86/kernel/genx2apic_phys.c74
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c61
-rw-r--r--arch/x86/kernel/hpet.c15
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic.c1021
-rw-r--r--arch/x86/kernel/ipi.c28
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c15
-rw-r--r--arch/x86/kernel/irq_64.c17
-rw-r--r--arch/x86/kernel/irqinit_32.c19
-rw-r--r--arch/x86/kernel/irqinit_64.c16
-rw-r--r--arch/x86/kernel/kvmclock.c10
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/pci-dma.c13
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c29
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c67
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c19
-rw-r--r--arch/x86/kernel/smp.c8
-rw-r--r--arch/x86/kernel/smpboot.c33
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/traps.c12
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
36 files changed, 1235 insertions, 673 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 88dd768eab6d..d364df03c1d6 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -109,6 +109,8 @@ obj-$(CONFIG_MICROCODE) += microcode.o
109 109
110obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o 110obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
111 111
112obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
113
112### 114###
113# 64 bit specific files 115# 64 bit specific files
114ifeq ($(CONFIG_X86_64),y) 116ifeq ($(CONFIG_X86_64),y)
@@ -122,7 +124,6 @@ ifeq ($(CONFIG_X86_64),y)
122 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 124 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
123 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 125 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
124 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o 126 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
125 obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
126 127
127 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o 128 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
128endif 129endif
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 66198cbe464d..d652515e2855 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -119,8 +119,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
119 119
120int first_system_vector = 0xfe; 120int first_system_vector = 0xfe;
121 121
122char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
123
124/* 122/*
125 * Debug level, exported for io_apic.c 123 * Debug level, exported for io_apic.c
126 */ 124 */
@@ -142,7 +140,7 @@ static int lapic_next_event(unsigned long delta,
142 struct clock_event_device *evt); 140 struct clock_event_device *evt);
143static void lapic_timer_setup(enum clock_event_mode mode, 141static void lapic_timer_setup(enum clock_event_mode mode,
144 struct clock_event_device *evt); 142 struct clock_event_device *evt);
145static void lapic_timer_broadcast(cpumask_t mask); 143static void lapic_timer_broadcast(const cpumask_t *mask);
146static void apic_pm_activate(void); 144static void apic_pm_activate(void);
147 145
148/* 146/*
@@ -455,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
455/* 453/*
456 * Local APIC timer broadcast function 454 * Local APIC timer broadcast function
457 */ 455 */
458static void lapic_timer_broadcast(cpumask_t mask) 456static void lapic_timer_broadcast(const cpumask_t *mask)
459{ 457{
460#ifdef CONFIG_SMP 458#ifdef CONFIG_SMP
461 send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 459 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@ -471,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void)
471 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 469 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
472 470
473 memcpy(levt, &lapic_clockevent, sizeof(*levt)); 471 memcpy(levt, &lapic_clockevent, sizeof(*levt));
474 levt->cpumask = cpumask_of_cpu(smp_processor_id()); 472 levt->cpumask = cpumask_of(smp_processor_id());
475 473
476 clockevents_register_device(levt); 474 clockevents_register_device(levt);
477} 475}
@@ -1807,28 +1805,32 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1807void __cpuinit generic_processor_info(int apicid, int version) 1805void __cpuinit generic_processor_info(int apicid, int version)
1808{ 1806{
1809 int cpu; 1807 int cpu;
1810 cpumask_t tmp_map;
1811 1808
1812 /* 1809 /*
1813 * Validate version 1810 * Validate version
1814 */ 1811 */
1815 if (version == 0x0) { 1812 if (version == 0x0) {
1816 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " 1813 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1817 "fixing up to 0x10. (tell your hw vendor)\n", 1814 "fixing up to 0x10. (tell your hw vendor)\n",
1818 version); 1815 version);
1819 version = 0x10; 1816 version = 0x10;
1820 } 1817 }
1821 apic_version[apicid] = version; 1818 apic_version[apicid] = version;
1822 1819
1823 if (num_processors >= NR_CPUS) { 1820 if (num_processors >= nr_cpu_ids) {
1824 pr_warning("WARNING: NR_CPUS limit of %i reached." 1821 int max = nr_cpu_ids;
1825 " Processor ignored.\n", NR_CPUS); 1822 int thiscpu = max + disabled_cpus;
1823
1824 pr_warning(
1825 "ACPI: NR_CPUS/possible_cpus limit of %i reached."
1826 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
1827
1828 disabled_cpus++;
1826 return; 1829 return;
1827 } 1830 }
1828 1831
1829 num_processors++; 1832 num_processors++;
1830 cpus_complement(tmp_map, cpu_present_map); 1833 cpu = cpumask_next_zero(-1, cpu_present_mask);
1831 cpu = first_cpu(tmp_map);
1832 1834
1833 physid_set(apicid, phys_cpu_present_map); 1835 physid_set(apicid, phys_cpu_present_map);
1834 if (apicid == boot_cpu_physical_apicid) { 1836 if (apicid == boot_cpu_physical_apicid) {
@@ -1878,8 +1880,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1878 } 1880 }
1879#endif 1881#endif
1880 1882
1881 cpu_set(cpu, cpu_possible_map); 1883 set_cpu_possible(cpu, true);
1882 cpu_set(cpu, cpu_present_map); 1884 set_cpu_present(cpu, true);
1883} 1885}
1884 1886
1885#ifdef CONFIG_X86_64 1887#ifdef CONFIG_X86_64
@@ -2081,7 +2083,7 @@ __cpuinit int apic_is_clustered_box(void)
2081 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); 2083 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2082 bitmap_zero(clustermap, NUM_APIC_CLUSTERS); 2084 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2083 2085
2084 for (i = 0; i < NR_CPUS; i++) { 2086 for (i = 0; i < nr_cpu_ids; i++) {
2085 /* are we being called early in kernel startup? */ 2087 /* are we being called early in kernel startup? */
2086 if (bios_cpu_apicid) { 2088 if (bios_cpu_apicid) {
2087 id = bios_cpu_apicid[i]; 2089 id = bios_cpu_apicid[i];
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 68b5d8681cbb..c6ecda64f5f1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
534 per_cpu(cpuid4_info, cpu) = NULL; 534 per_cpu(cpuid4_info, cpu) = NULL;
535} 535}
536 536
537static int __cpuinit detect_cache_attributes(unsigned int cpu) 537static void get_cpu_leaves(void *_retval)
538{ 538{
539 struct _cpuid4_info *this_leaf; 539 int j, *retval = _retval, cpu = smp_processor_id();
540 unsigned long j;
541 int retval;
542 cpumask_t oldmask;
543
544 if (num_cache_leaves == 0)
545 return -ENOENT;
546
547 per_cpu(cpuid4_info, cpu) = kzalloc(
548 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
549 if (per_cpu(cpuid4_info, cpu) == NULL)
550 return -ENOMEM;
551
552 oldmask = current->cpus_allowed;
553 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
554 if (retval)
555 goto out;
556 540
557 /* Do cpuid and store the results */ 541 /* Do cpuid and store the results */
558 for (j = 0; j < num_cache_leaves; j++) { 542 for (j = 0; j < num_cache_leaves; j++) {
543 struct _cpuid4_info *this_leaf;
559 this_leaf = CPUID4_INFO_IDX(cpu, j); 544 this_leaf = CPUID4_INFO_IDX(cpu, j);
560 retval = cpuid4_cache_lookup(j, this_leaf); 545 *retval = cpuid4_cache_lookup(j, this_leaf);
561 if (unlikely(retval < 0)) { 546 if (unlikely(*retval < 0)) {
562 int i; 547 int i;
563 548
564 for (i = 0; i < j; i++) 549 for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
567 } 552 }
568 cache_shared_cpu_map_setup(cpu, j); 553 cache_shared_cpu_map_setup(cpu, j);
569 } 554 }
570 set_cpus_allowed_ptr(current, &oldmask); 555}
556
557static int __cpuinit detect_cache_attributes(unsigned int cpu)
558{
559 int retval;
560
561 if (num_cache_leaves == 0)
562 return -ENOENT;
563
564 per_cpu(cpuid4_info, cpu) = kzalloc(
565 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
566 if (per_cpu(cpuid4_info, cpu) == NULL)
567 return -ENOMEM;
571 568
572out: 569 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
573 if (retval) { 570 if (retval) {
574 kfree(per_cpu(cpuid4_info, cpu)); 571 kfree(per_cpu(cpuid4_info, cpu));
575 per_cpu(cpuid4_info, cpu) = NULL; 572 per_cpu(cpuid4_info, cpu) = NULL;
@@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
626 cpumask_t *mask = &this_leaf->shared_cpu_map; 623 cpumask_t *mask = &this_leaf->shared_cpu_map;
627 624
628 n = type? 625 n = type?
629 cpulist_scnprintf(buf, len-2, *mask): 626 cpulist_scnprintf(buf, len-2, mask) :
630 cpumask_scnprintf(buf, len-2, *mask); 627 cpumask_scnprintf(buf, len-2, mask);
631 buf[n++] = '\n'; 628 buf[n++] = '\n';
632 buf[n] = '\0'; 629 buf[n] = '\0';
633 } 630 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 748c8f9e7a05..a5a5e0530370 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
83 * CPU Initialization 83 * CPU Initialization
84 */ 84 */
85 85
86struct thresh_restart {
87 struct threshold_block *b;
88 int reset;
89 u16 old_limit;
90};
91
86/* must be called with correct cpu affinity */ 92/* must be called with correct cpu affinity */
87static void threshold_restart_bank(struct threshold_block *b, 93static long threshold_restart_bank(void *_tr)
88 int reset, u16 old_limit)
89{ 94{
95 struct thresh_restart *tr = _tr;
90 u32 mci_misc_hi, mci_misc_lo; 96 u32 mci_misc_hi, mci_misc_lo;
91 97
92 rdmsr(b->address, mci_misc_lo, mci_misc_hi); 98 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
93 99
94 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 100 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
95 reset = 1; /* limit cannot be lower than err count */ 101 tr->reset = 1; /* limit cannot be lower than err count */
96 102
97 if (reset) { /* reset err count and overflow bit */ 103 if (tr->reset) { /* reset err count and overflow bit */
98 mci_misc_hi = 104 mci_misc_hi =
99 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 105 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
100 (THRESHOLD_MAX - b->threshold_limit); 106 (THRESHOLD_MAX - tr->b->threshold_limit);
101 } else if (old_limit) { /* change limit w/o reset */ 107 } else if (tr->old_limit) { /* change limit w/o reset */
102 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 108 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
103 (old_limit - b->threshold_limit); 109 (tr->old_limit - tr->b->threshold_limit);
104 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 110 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
105 (new_count & THRESHOLD_MAX); 111 (new_count & THRESHOLD_MAX);
106 } 112 }
107 113
108 b->interrupt_enable ? 114 tr->b->interrupt_enable ?
109 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 115 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
110 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 116 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
111 117
112 mci_misc_hi |= MASK_COUNT_EN_HI; 118 mci_misc_hi |= MASK_COUNT_EN_HI;
113 wrmsr(b->address, mci_misc_lo, mci_misc_hi); 119 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
120 return 0;
114} 121}
115 122
116/* cpu init entry point, called from mce.c with preempt off */ 123/* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
120 unsigned int cpu = smp_processor_id(); 127 unsigned int cpu = smp_processor_id();
121 u8 lvt_off; 128 u8 lvt_off;
122 u32 low = 0, high = 0, address = 0; 129 u32 low = 0, high = 0, address = 0;
130 struct thresh_restart tr;
123 131
124 for (bank = 0; bank < NR_BANKS; ++bank) { 132 for (bank = 0; bank < NR_BANKS; ++bank) {
125 for (block = 0; block < NR_BLOCKS; ++block) { 133 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
162 wrmsr(address, low, high); 170 wrmsr(address, low, high);
163 171
164 threshold_defaults.address = address; 172 threshold_defaults.address = address;
165 threshold_restart_bank(&threshold_defaults, 0, 0); 173 tr.b = &threshold_defaults;
174 tr.reset = 0;
175 tr.old_limit = 0;
176 threshold_restart_bank(&tr);
166 } 177 }
167 } 178 }
168} 179}
@@ -251,20 +262,6 @@ struct threshold_attr {
251 ssize_t(*store) (struct threshold_block *, const char *, size_t count); 262 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
252}; 263};
253 264
254static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
255 cpumask_t *newmask)
256{
257 *oldmask = current->cpus_allowed;
258 cpus_clear(*newmask);
259 cpu_set(cpu, *newmask);
260 set_cpus_allowed_ptr(current, newmask);
261}
262
263static void affinity_restore(const cpumask_t *oldmask)
264{
265 set_cpus_allowed_ptr(current, oldmask);
266}
267
268#define SHOW_FIELDS(name) \ 265#define SHOW_FIELDS(name) \
269static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ 266static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
270{ \ 267{ \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
277 const char *buf, size_t count) 274 const char *buf, size_t count)
278{ 275{
279 char *end; 276 char *end;
280 cpumask_t oldmask, newmask; 277 struct thresh_restart tr;
281 unsigned long new = simple_strtoul(buf, &end, 0); 278 unsigned long new = simple_strtoul(buf, &end, 0);
282 if (end == buf) 279 if (end == buf)
283 return -EINVAL; 280 return -EINVAL;
284 b->interrupt_enable = !!new; 281 b->interrupt_enable = !!new;
285 282
286 affinity_set(b->cpu, &oldmask, &newmask); 283 tr.b = b;
287 threshold_restart_bank(b, 0, 0); 284 tr.reset = 0;
288 affinity_restore(&oldmask); 285 tr.old_limit = 0;
286 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
289 287
290 return end - buf; 288 return end - buf;
291} 289}
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
294 const char *buf, size_t count) 292 const char *buf, size_t count)
295{ 293{
296 char *end; 294 char *end;
297 cpumask_t oldmask, newmask; 295 struct thresh_restart tr;
298 u16 old;
299 unsigned long new = simple_strtoul(buf, &end, 0); 296 unsigned long new = simple_strtoul(buf, &end, 0);
300 if (end == buf) 297 if (end == buf)
301 return -EINVAL; 298 return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
303 new = THRESHOLD_MAX; 300 new = THRESHOLD_MAX;
304 if (new < 1) 301 if (new < 1)
305 new = 1; 302 new = 1;
306 old = b->threshold_limit; 303 tr.old_limit = b->threshold_limit;
307 b->threshold_limit = new; 304 b->threshold_limit = new;
305 tr.b = b;
306 tr.reset = 0;
308 307
309 affinity_set(b->cpu, &oldmask, &newmask); 308 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
310 threshold_restart_bank(b, 0, old);
311 affinity_restore(&oldmask);
312 309
313 return end - buf; 310 return end - buf;
314} 311}
315 312
316static ssize_t show_error_count(struct threshold_block *b, char *buf) 313static long local_error_count(void *_b)
317{ 314{
318 u32 high, low; 315 struct threshold_block *b = _b;
319 cpumask_t oldmask, newmask; 316 u32 low, high;
320 affinity_set(b->cpu, &oldmask, &newmask); 317
321 rdmsr(b->address, low, high); 318 rdmsr(b->address, low, high);
322 affinity_restore(&oldmask); 319 return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
323 return sprintf(buf, "%x\n", 320}
324 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 321
322static ssize_t show_error_count(struct threshold_block *b, char *buf)
323{
324 return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
325} 325}
326 326
327static ssize_t store_error_count(struct threshold_block *b, 327static ssize_t store_error_count(struct threshold_block *b,
328 const char *buf, size_t count) 328 const char *buf, size_t count)
329{ 329{
330 cpumask_t oldmask, newmask; 330 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
331 affinity_set(b->cpu, &oldmask, &newmask); 331
332 threshold_restart_bank(b, 1, 0); 332 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
333 affinity_restore(&oldmask);
334 return 1; 333 return 1;
335} 334}
336 335
@@ -463,12 +462,19 @@ out_free:
463 return err; 462 return err;
464} 463}
465 464
465static long local_allocate_threshold_blocks(void *_bank)
466{
467 unsigned int *bank = _bank;
468
469 return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
470 MSR_IA32_MC0_MISC + *bank * 4);
471}
472
466/* symlinks sibling shared banks to first core. first core owns dir/files. */ 473/* symlinks sibling shared banks to first core. first core owns dir/files. */
467static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) 474static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
468{ 475{
469 int i, err = 0; 476 int i, err = 0;
470 struct threshold_bank *b = NULL; 477 struct threshold_bank *b = NULL;
471 cpumask_t oldmask, newmask;
472 char name[32]; 478 char name[32];
473 479
474 sprintf(name, "threshold_bank%i", bank); 480 sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
519 525
520 per_cpu(threshold_banks, cpu)[bank] = b; 526 per_cpu(threshold_banks, cpu)[bank] = b;
521 527
522 affinity_set(cpu, &oldmask, &newmask); 528 err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
523 err = allocate_threshold_blocks(cpu, bank, 0,
524 MSR_IA32_MC0_MISC + bank * 4);
525 affinity_restore(&oldmask);
526
527 if (err) 529 if (err)
528 goto out_free; 530 goto out_free;
529 531
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 4e8d77f01eeb..b59ddcc88cd8 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,14 +14,6 @@
14#include <asm/pat.h> 14#include <asm/pat.h>
15#include "mtrr.h" 15#include "mtrr.h"
16 16
17struct mtrr_state {
18 struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
19 mtrr_type fixed_ranges[NUM_FIXED_RANGES];
20 unsigned char enabled;
21 unsigned char have_fixed;
22 mtrr_type def_type;
23};
24
25struct fixed_range_block { 17struct fixed_range_block {
26 int base_msr; /* start address of an MTRR block */ 18 int base_msr; /* start address of an MTRR block */
27 int ranges; /* number of MTRRs in this block */ 19 int ranges; /* number of MTRRs in this block */
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = {
35}; 27};
36 28
37static unsigned long smp_changes_mask; 29static unsigned long smp_changes_mask;
38static struct mtrr_state mtrr_state = {};
39static int mtrr_state_set; 30static int mtrr_state_set;
40u64 mtrr_tom2; 31u64 mtrr_tom2;
41 32
33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state);
35
42#undef MODULE_PARAM_PREFIX 36#undef MODULE_PARAM_PREFIX
43#define MODULE_PARAM_PREFIX "mtrr." 37#define MODULE_PARAM_PREFIX "mtrr."
44 38
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 44fcb237bd52..d259e5d2e054 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
49 49
50u32 num_var_ranges = 0; 50u32 num_var_ranges = 0;
51 51
52unsigned int mtrr_usage_table[MAX_VAR_RANGES]; 52unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
53static DEFINE_MUTEX(mtrr_mutex); 53static DEFINE_MUTEX(mtrr_mutex);
54 54
55u64 size_or_mask, size_and_mask; 55u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
574 unsigned long lsize; 574 unsigned long lsize;
575}; 575};
576 576
577static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; 577static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
578 578
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 579static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
580{ 580{
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2dc4ec656b23..ffd60409cc6d 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,11 +8,6 @@
8#define MTRRcap_MSR 0x0fe 8#define MTRRcap_MSR 0x0fe
9#define MTRRdefType_MSR 0x2ff 9#define MTRRdefType_MSR 0x2ff
10 10
11#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
12#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
13
14#define NUM_FIXED_RANGES 88
15#define MAX_VAR_RANGES 256
16#define MTRRfix64K_00000_MSR 0x250 11#define MTRRfix64K_00000_MSR 0x250
17#define MTRRfix16K_80000_MSR 0x258 12#define MTRRfix16K_80000_MSR 0x258
18#define MTRRfix16K_A0000_MSR 0x259 13#define MTRRfix16K_A0000_MSR 0x259
@@ -29,11 +24,7 @@
29#define MTRR_CHANGE_MASK_VARIABLE 0x02 24#define MTRR_CHANGE_MASK_VARIABLE 0x02
30#define MTRR_CHANGE_MASK_DEFTYPE 0x04 25#define MTRR_CHANGE_MASK_DEFTYPE 0x04
31 26
32/* In the Intel processor's MTRR interface, the MTRR type is always held in 27extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
33 an 8 bit field: */
34typedef u8 mtrr_type;
35
36extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
37 28
38struct mtrr_ops { 29struct mtrr_ops {
39 u32 vendor; 30 u32 vendor;
@@ -70,13 +61,6 @@ struct set_mtrr_context {
70 u32 ccr3; 61 u32 ccr3;
71}; 62};
72 63
73struct mtrr_var_range {
74 u32 base_lo;
75 u32 base_hi;
76 u32 mask_lo;
77 u32 mask_hi;
78};
79
80void set_mtrr_done(struct set_mtrr_context *ctxt); 64void set_mtrr_done(struct set_mtrr_context *ctxt);
81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); 65void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); 66void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index d84a852e4cd7..c689d19e35ab 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,6 +26,7 @@
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/smp.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h>
29 30
30#include <mach_ipi.h> 31#include <mach_ipi.h>
31 32
@@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
49#endif 50#endif
50 crash_save_cpu(regs, cpu); 51 crash_save_cpu(regs, cpu);
51 52
53 /* Disable VMX or SVM if needed.
54 *
55 * We need to disable virtualization on all CPUs.
56 * Having VMX or SVM enabled on any CPU may break rebooting
57 * after the kdump kernel has finished its task.
58 */
59 cpu_emergency_vmxoff();
60 cpu_emergency_svm_disable();
61
52 disable_local_APIC(); 62 disable_local_APIC();
53} 63}
54 64
@@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
80 local_irq_disable(); 90 local_irq_disable();
81 91
82 kdump_nmi_shootdown_cpus(); 92 kdump_nmi_shootdown_cpus();
93
94 /* Booting kdump kernel with VMX or SVM enabled won't work,
95 * because (among other limitations) we can't disable paging
96 * with the virt flags.
97 */
98 cpu_emergency_vmxoff();
99 cpu_emergency_svm_disable();
100
83 lapic_shutdown(); 101 lapic_shutdown();
84#if defined(CONFIG_X86_IO_APIC) 102#if defined(CONFIG_X86_IO_APIC)
85 disable_IO_APIC(); 103 disable_IO_APIC();
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c0262791bda4..34185488e4fb 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
30 return 1; 30 return 1;
31} 31}
32 32
33static cpumask_t flat_target_cpus(void) 33static const struct cpumask *flat_target_cpus(void)
34{ 34{
35 return cpu_online_map; 35 return cpu_online_mask;
36} 36}
37 37
38static cpumask_t flat_vector_allocation_domain(int cpu) 38static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
39{ 39{
40 /* Careful. Some cpus do not strictly honor the set of cpus 40 /* Careful. Some cpus do not strictly honor the set of cpus
41 * specified in the interrupt destination when using lowest 41 * specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
45 * deliver interrupts to the wrong hyperthread when only one 45 * deliver interrupts to the wrong hyperthread when only one
46 * hyperthread was specified in the interrupt desitination. 46 * hyperthread was specified in the interrupt desitination.
47 */ 47 */
48 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 48 cpumask_clear(retmask);
49 return domain; 49 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
50} 50}
51 51
52/* 52/*
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void)
69 apic_write(APIC_LDR, val); 69 apic_write(APIC_LDR, val);
70} 70}
71 71
72static void flat_send_IPI_mask(cpumask_t cpumask, int vector) 72static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
73{ 73{
74 unsigned long mask = cpus_addr(cpumask)[0];
75 unsigned long flags; 74 unsigned long flags;
76 75
77 local_irq_save(flags); 76 local_irq_save(flags);
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
79 local_irq_restore(flags); 78 local_irq_restore(flags);
80} 79}
81 80
81static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
82{
83 unsigned long mask = cpumask_bits(cpumask)[0];
84
85 _flat_send_IPI_mask(mask, vector);
86}
87
88static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
89 int vector)
90{
91 unsigned long mask = cpumask_bits(cpumask)[0];
92 int cpu = smp_processor_id();
93
94 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask);
96 _flat_send_IPI_mask(mask, vector);
97}
98
82static void flat_send_IPI_allbutself(int vector) 99static void flat_send_IPI_allbutself(int vector)
83{ 100{
101 int cpu = smp_processor_id();
84#ifdef CONFIG_HOTPLUG_CPU 102#ifdef CONFIG_HOTPLUG_CPU
85 int hotplug = 1; 103 int hotplug = 1;
86#else 104#else
87 int hotplug = 0; 105 int hotplug = 0;
88#endif 106#endif
89 if (hotplug || vector == NMI_VECTOR) { 107 if (hotplug || vector == NMI_VECTOR) {
90 cpumask_t allbutme = cpu_online_map; 108 if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
109 unsigned long mask = cpumask_bits(cpu_online_mask)[0];
91 110
92 cpu_clear(smp_processor_id(), allbutme); 111 if (cpu < BITS_PER_LONG)
112 clear_bit(cpu, &mask);
93 113
94 if (!cpus_empty(allbutme)) 114 _flat_send_IPI_mask(mask, vector);
95 flat_send_IPI_mask(allbutme, vector); 115 }
96 } else if (num_online_cpus() > 1) { 116 } else if (num_online_cpus() > 1) {
97 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); 117 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
98 } 118 }
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
101static void flat_send_IPI_all(int vector) 121static void flat_send_IPI_all(int vector)
102{ 122{
103 if (vector == NMI_VECTOR) 123 if (vector == NMI_VECTOR)
104 flat_send_IPI_mask(cpu_online_map, vector); 124 flat_send_IPI_mask(cpu_online_mask, vector);
105 else 125 else
106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 126 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
107} 127}
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void)
135 return physid_isset(read_xapic_id(), phys_cpu_present_map); 155 return physid_isset(read_xapic_id(), phys_cpu_present_map);
136} 156}
137 157
138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 158static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
159{
160 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
161}
162
163static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
164 const struct cpumask *andmask)
139{ 165{
140 return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; 166 unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
167 unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
168
169 return mask1 & mask2;
141} 170}
142 171
143static unsigned int phys_pkg_id(int index_msb) 172static unsigned int phys_pkg_id(int index_msb)
@@ -157,8 +186,10 @@ struct genapic apic_flat = {
157 .send_IPI_all = flat_send_IPI_all, 186 .send_IPI_all = flat_send_IPI_all,
158 .send_IPI_allbutself = flat_send_IPI_allbutself, 187 .send_IPI_allbutself = flat_send_IPI_allbutself,
159 .send_IPI_mask = flat_send_IPI_mask, 188 .send_IPI_mask = flat_send_IPI_mask,
189 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
160 .send_IPI_self = apic_send_IPI_self, 190 .send_IPI_self = apic_send_IPI_self,
161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 191 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
192 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
162 .phys_pkg_id = phys_pkg_id, 193 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id, 194 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id, 195 .set_apic_id = set_apic_id,
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
188 return 0; 219 return 0;
189} 220}
190 221
191static cpumask_t physflat_target_cpus(void) 222static const struct cpumask *physflat_target_cpus(void)
192{ 223{
193 return cpu_online_map; 224 return cpu_online_mask;
194} 225}
195 226
196static cpumask_t physflat_vector_allocation_domain(int cpu) 227static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
197{ 228{
198 return cpumask_of_cpu(cpu); 229 cpumask_clear(retmask);
230 cpumask_set_cpu(cpu, retmask);
199} 231}
200 232
201static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) 233static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
202{ 234{
203 send_IPI_mask_sequence(cpumask, vector); 235 send_IPI_mask_sequence(cpumask, vector);
204} 236}
205 237
206static void physflat_send_IPI_allbutself(int vector) 238static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
239 int vector)
207{ 240{
208 cpumask_t allbutme = cpu_online_map; 241 send_IPI_mask_allbutself(cpumask, vector);
242}
209 243
210 cpu_clear(smp_processor_id(), allbutme); 244static void physflat_send_IPI_allbutself(int vector)
211 physflat_send_IPI_mask(allbutme, vector); 245{
246 send_IPI_mask_allbutself(cpu_online_mask, vector);
212} 247}
213 248
214static void physflat_send_IPI_all(int vector) 249static void physflat_send_IPI_all(int vector)
215{ 250{
216 physflat_send_IPI_mask(cpu_online_map, vector); 251 physflat_send_IPI_mask(cpu_online_mask, vector);
217} 252}
218 253
219static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) 254static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
220{ 255{
221 int cpu; 256 int cpu;
222 257
@@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
224 * We're using fixed IRQ delivery, can only return one phys APIC ID. 259 * We're using fixed IRQ delivery, can only return one phys APIC ID.
225 * May as well be the first. 260 * May as well be the first.
226 */ 261 */
227 cpu = first_cpu(cpumask); 262 cpu = cpumask_first(cpumask);
228 if ((unsigned)cpu < nr_cpu_ids) 263 if ((unsigned)cpu < nr_cpu_ids)
229 return per_cpu(x86_cpu_to_apicid, cpu); 264 return per_cpu(x86_cpu_to_apicid, cpu);
230 else 265 else
231 return BAD_APICID; 266 return BAD_APICID;
232} 267}
233 268
269static unsigned int
270physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
271 const struct cpumask *andmask)
272{
273 int cpu;
274
275 /*
276 * We're using fixed IRQ delivery, can only return one phys APIC ID.
277 * May as well be the first.
278 */
279 for_each_cpu_and(cpu, cpumask, andmask)
280 if (cpumask_test_cpu(cpu, cpu_online_mask))
281 break;
282 if (cpu < nr_cpu_ids)
283 return per_cpu(x86_cpu_to_apicid, cpu);
284 return BAD_APICID;
285}
286
234struct genapic apic_physflat = { 287struct genapic apic_physflat = {
235 .name = "physical flat", 288 .name = "physical flat",
236 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 289 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -243,8 +296,10 @@ struct genapic apic_physflat = {
243 .send_IPI_all = physflat_send_IPI_all, 296 .send_IPI_all = physflat_send_IPI_all,
244 .send_IPI_allbutself = physflat_send_IPI_allbutself, 297 .send_IPI_allbutself = physflat_send_IPI_allbutself,
245 .send_IPI_mask = physflat_send_IPI_mask, 298 .send_IPI_mask = physflat_send_IPI_mask,
299 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
246 .send_IPI_self = apic_send_IPI_self, 300 .send_IPI_self = apic_send_IPI_self,
247 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 301 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
302 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
248 .phys_pkg_id = phys_pkg_id, 303 .phys_pkg_id = phys_pkg_id,
249 .get_apic_id = get_apic_id, 304 .get_apic_id = get_apic_id,
250 .set_apic_id = set_apic_id, 305 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8eb48a6..6ce497cc372d 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
22 22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24 24
25static cpumask_t x2apic_target_cpus(void) 25static const struct cpumask *x2apic_target_cpus(void)
26{ 26{
27 return cpumask_of_cpu(0); 27 return cpumask_of(0);
28} 28}
29 29
30/* 30/*
31 * for now each logical cpu is in its own vector allocation domain. 31 * for now each logical cpu is in its own vector allocation domain.
32 */ 32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu) 33static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
34{ 34{
35 cpumask_t domain = CPU_MASK_NONE; 35 cpumask_clear(retmask);
36 cpu_set(cpu, domain); 36 cpumask_set_cpu(cpu, retmask);
37 return domain;
38} 37}
39 38
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 39static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register 55 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes. 56 * writes.
58 */ 57 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
60{ 59{
61 unsigned long flags; 60 unsigned long flags;
62 unsigned long query_cpu; 61 unsigned long query_cpu;
63 62
64 local_irq_save(flags); 63 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) { 64 for_each_cpu(query_cpu, mask)
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), 65 __x2apic_send_IPI_dest(
67 vector, APIC_DEST_LOGICAL); 66 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
68 } 67 vector, APIC_DEST_LOGICAL);
69 local_irq_restore(flags); 68 local_irq_restore(flags);
70} 69}
71 70
72static void x2apic_send_IPI_allbutself(int vector) 71static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
72 int vector)
73{ 73{
74 cpumask_t mask = cpu_online_map; 74 unsigned long flags;
75 unsigned long query_cpu;
76 unsigned long this_cpu = smp_processor_id();
75 77
76 cpu_clear(smp_processor_id(), mask); 78 local_irq_save(flags);
79 for_each_cpu(query_cpu, mask)
80 if (query_cpu != this_cpu)
81 __x2apic_send_IPI_dest(
82 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
83 vector, APIC_DEST_LOGICAL);
84 local_irq_restore(flags);
85}
86
87static void x2apic_send_IPI_allbutself(int vector)
88{
89 unsigned long flags;
90 unsigned long query_cpu;
91 unsigned long this_cpu = smp_processor_id();
77 92
78 if (!cpus_empty(mask)) 93 local_irq_save(flags);
79 x2apic_send_IPI_mask(mask, vector); 94 for_each_online_cpu(query_cpu)
95 if (query_cpu != this_cpu)
96 __x2apic_send_IPI_dest(
97 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
98 vector, APIC_DEST_LOGICAL);
99 local_irq_restore(flags);
80} 100}
81 101
82static void x2apic_send_IPI_all(int vector) 102static void x2apic_send_IPI_all(int vector)
83{ 103{
84 x2apic_send_IPI_mask(cpu_online_map, vector); 104 x2apic_send_IPI_mask(cpu_online_mask, vector);
85} 105}
86 106
87static int x2apic_apic_id_registered(void) 107static int x2apic_apic_id_registered(void)
@@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void)
89 return 1; 109 return 1;
90} 110}
91 111
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
93{ 113{
94 int cpu; 114 int cpu;
95 115
96 /* 116 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID. 117 * We're using fixed IRQ delivery, can only return one logical APIC ID.
98 * May as well be the first. 118 * May as well be the first.
99 */ 119 */
100 cpu = first_cpu(cpumask); 120 cpu = cpumask_first(cpumask);
101 if ((unsigned)cpu < NR_CPUS) 121 if ((unsigned)cpu < nr_cpu_ids)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu); 122 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else 123 else
104 return BAD_APICID; 124 return BAD_APICID;
105} 125}
106 126
127static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
128 const struct cpumask *andmask)
129{
130 int cpu;
131
132 /*
133 * We're using fixed IRQ delivery, can only return one logical APIC ID.
134 * May as well be the first.
135 */
136 for_each_cpu_and(cpu, cpumask, andmask)
137 if (cpumask_test_cpu(cpu, cpu_online_mask))
138 break;
139 if (cpu < nr_cpu_ids)
140 return per_cpu(x86_cpu_to_logical_apicid, cpu);
141 return BAD_APICID;
142}
143
107static unsigned int get_apic_id(unsigned long x) 144static unsigned int get_apic_id(unsigned long x)
108{ 145{
109 unsigned int id; 146 unsigned int id;
@@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = {
150 .send_IPI_all = x2apic_send_IPI_all, 187 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 188 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask, 189 .send_IPI_mask = x2apic_send_IPI_mask,
190 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
153 .send_IPI_self = x2apic_send_IPI_self, 191 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 192 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
193 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
155 .phys_pkg_id = phys_pkg_id, 194 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id, 195 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id, 196 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index a177c7880ab5..21bcc0e098ba 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31 31
32static cpumask_t x2apic_target_cpus(void) 32static const struct cpumask *x2apic_target_cpus(void)
33{ 33{
34 return cpumask_of_cpu(0); 34 return cpumask_of(0);
35} 35}
36 36
37static cpumask_t x2apic_vector_allocation_domain(int cpu) 37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
38{ 38{
39 cpumask_t domain = CPU_MASK_NONE; 39 cpumask_clear(retmask);
40 cpu_set(cpu, domain); 40 cpumask_set_cpu(cpu, retmask);
41 return domain;
42} 41}
43 42
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 43static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
54 x2apic_icr_write(cfg, apicid); 53 x2apic_icr_write(cfg, apicid);
55} 54}
56 55
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
58{ 57{
59 unsigned long flags; 58 unsigned long flags;
60 unsigned long query_cpu; 59 unsigned long query_cpu;
61 60
62 local_irq_save(flags); 61 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) { 62 for_each_cpu(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), 63 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL); 64 vector, APIC_DEST_PHYSICAL);
66 } 65 }
67 local_irq_restore(flags); 66 local_irq_restore(flags);
68} 67}
69 68
70static void x2apic_send_IPI_allbutself(int vector) 69static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
70 int vector)
71{ 71{
72 cpumask_t mask = cpu_online_map; 72 unsigned long flags;
73 unsigned long query_cpu;
74 unsigned long this_cpu = smp_processor_id();
75
76 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) {
78 if (query_cpu != this_cpu)
79 __x2apic_send_IPI_dest(
80 per_cpu(x86_cpu_to_apicid, query_cpu),
81 vector, APIC_DEST_PHYSICAL);
82 }
83 local_irq_restore(flags);
84}
73 85
74 cpu_clear(smp_processor_id(), mask); 86static void x2apic_send_IPI_allbutself(int vector)
87{
88 unsigned long flags;
89 unsigned long query_cpu;
90 unsigned long this_cpu = smp_processor_id();
75 91
76 if (!cpus_empty(mask)) 92 local_irq_save(flags);
77 x2apic_send_IPI_mask(mask, vector); 93 for_each_online_cpu(query_cpu)
94 if (query_cpu != this_cpu)
95 __x2apic_send_IPI_dest(
96 per_cpu(x86_cpu_to_apicid, query_cpu),
97 vector, APIC_DEST_PHYSICAL);
98 local_irq_restore(flags);
78} 99}
79 100
80static void x2apic_send_IPI_all(int vector) 101static void x2apic_send_IPI_all(int vector)
81{ 102{
82 x2apic_send_IPI_mask(cpu_online_map, vector); 103 x2apic_send_IPI_mask(cpu_online_mask, vector);
83} 104}
84 105
85static int x2apic_apic_id_registered(void) 106static int x2apic_apic_id_registered(void)
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void)
87 return 1; 108 return 1;
88} 109}
89 110
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 111static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
91{ 112{
92 int cpu; 113 int cpu;
93 114
@@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
95 * We're using fixed IRQ delivery, can only return one phys APIC ID. 116 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first. 117 * May as well be the first.
97 */ 118 */
98 cpu = first_cpu(cpumask); 119 cpu = cpumask_first(cpumask);
99 if ((unsigned)cpu < NR_CPUS) 120 if ((unsigned)cpu < nr_cpu_ids)
100 return per_cpu(x86_cpu_to_apicid, cpu); 121 return per_cpu(x86_cpu_to_apicid, cpu);
101 else 122 else
102 return BAD_APICID; 123 return BAD_APICID;
103} 124}
104 125
126static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
127 const struct cpumask *andmask)
128{
129 int cpu;
130
131 /*
132 * We're using fixed IRQ delivery, can only return one phys APIC ID.
133 * May as well be the first.
134 */
135 for_each_cpu_and(cpu, cpumask, andmask)
136 if (cpumask_test_cpu(cpu, cpu_online_mask))
137 break;
138 if (cpu < nr_cpu_ids)
139 return per_cpu(x86_cpu_to_apicid, cpu);
140 return BAD_APICID;
141}
142
105static unsigned int get_apic_id(unsigned long x) 143static unsigned int get_apic_id(unsigned long x)
106{ 144{
107 unsigned int id; 145 unsigned int id;
@@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = {
145 .send_IPI_all = x2apic_send_IPI_all, 183 .send_IPI_all = x2apic_send_IPI_all,
146 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 184 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
147 .send_IPI_mask = x2apic_send_IPI_mask, 185 .send_IPI_mask = x2apic_send_IPI_mask,
186 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
148 .send_IPI_self = x2apic_send_IPI_self, 187 .send_IPI_self = x2apic_send_IPI_self,
149 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 188 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
189 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
150 .phys_pkg_id = phys_pkg_id, 190 .phys_pkg_id = phys_pkg_id,
151 .get_apic_id = get_apic_id, 191 .get_apic_id = get_apic_id,
152 .set_apic_id = set_apic_id, 192 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index dece17289731..b193e082f6ce 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -79,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
79 79
80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 80/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
81 81
82static cpumask_t uv_target_cpus(void) 82static const struct cpumask *uv_target_cpus(void)
83{ 83{
84 return cpumask_of_cpu(0); 84 return cpumask_of(0);
85} 85}
86 86
87static cpumask_t uv_vector_allocation_domain(int cpu) 87static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
88{ 88{
89 cpumask_t domain = CPU_MASK_NONE; 89 cpumask_clear(retmask);
90 cpu_set(cpu, domain); 90 cpumask_set_cpu(cpu, retmask);
91 return domain;
92} 91}
93 92
94int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) 93int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -127,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector)
127 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 126 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
128} 127}
129 128
130static void uv_send_IPI_mask(cpumask_t mask, int vector) 129static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
131{ 130{
132 unsigned int cpu; 131 unsigned int cpu;
133 132
134 for_each_possible_cpu(cpu) 133 for_each_cpu(cpu, mask)
135 if (cpu_isset(cpu, mask)) 134 uv_send_IPI_one(cpu, vector);
135}
136
137static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
138{
139 unsigned int cpu;
140 unsigned int this_cpu = smp_processor_id();
141
142 for_each_cpu(cpu, mask)
143 if (cpu != this_cpu)
136 uv_send_IPI_one(cpu, vector); 144 uv_send_IPI_one(cpu, vector);
137} 145}
138 146
139static void uv_send_IPI_allbutself(int vector) 147static void uv_send_IPI_allbutself(int vector)
140{ 148{
141 cpumask_t mask = cpu_online_map; 149 unsigned int cpu;
142 150 unsigned int this_cpu = smp_processor_id();
143 cpu_clear(smp_processor_id(), mask);
144 151
145 if (!cpus_empty(mask)) 152 for_each_online_cpu(cpu)
146 uv_send_IPI_mask(mask, vector); 153 if (cpu != this_cpu)
154 uv_send_IPI_one(cpu, vector);
147} 155}
148 156
149static void uv_send_IPI_all(int vector) 157static void uv_send_IPI_all(int vector)
150{ 158{
151 uv_send_IPI_mask(cpu_online_map, vector); 159 uv_send_IPI_mask(cpu_online_mask, vector);
152} 160}
153 161
154static int uv_apic_id_registered(void) 162static int uv_apic_id_registered(void)
@@ -160,7 +168,7 @@ static void uv_init_apic_ldr(void)
160{ 168{
161} 169}
162 170
163static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 171static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
164{ 172{
165 int cpu; 173 int cpu;
166 174
@@ -168,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
168 * We're using fixed IRQ delivery, can only return one phys APIC ID. 176 * We're using fixed IRQ delivery, can only return one phys APIC ID.
169 * May as well be the first. 177 * May as well be the first.
170 */ 178 */
171 cpu = first_cpu(cpumask); 179 cpu = cpumask_first(cpumask);
172 if ((unsigned)cpu < nr_cpu_ids) 180 if ((unsigned)cpu < nr_cpu_ids)
173 return per_cpu(x86_cpu_to_apicid, cpu); 181 return per_cpu(x86_cpu_to_apicid, cpu);
174 else 182 else
175 return BAD_APICID; 183 return BAD_APICID;
176} 184}
177 185
186static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
187 const struct cpumask *andmask)
188{
189 int cpu;
190
191 /*
192 * We're using fixed IRQ delivery, can only return one phys APIC ID.
193 * May as well be the first.
194 */
195 for_each_cpu_and(cpu, cpumask, andmask)
196 if (cpumask_test_cpu(cpu, cpu_online_mask))
197 break;
198 if (cpu < nr_cpu_ids)
199 return per_cpu(x86_cpu_to_apicid, cpu);
200 return BAD_APICID;
201}
202
178static unsigned int get_apic_id(unsigned long x) 203static unsigned int get_apic_id(unsigned long x)
179{ 204{
180 unsigned int id; 205 unsigned int id;
@@ -222,8 +247,10 @@ struct genapic apic_x2apic_uv_x = {
222 .send_IPI_all = uv_send_IPI_all, 247 .send_IPI_all = uv_send_IPI_all,
223 .send_IPI_allbutself = uv_send_IPI_allbutself, 248 .send_IPI_allbutself = uv_send_IPI_allbutself,
224 .send_IPI_mask = uv_send_IPI_mask, 249 .send_IPI_mask = uv_send_IPI_mask,
250 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
225 .send_IPI_self = uv_send_IPI_self, 251 .send_IPI_self = uv_send_IPI_self,
226 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 252 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
253 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
227 .phys_pkg_id = phys_pkg_id, 254 .phys_pkg_id = phys_pkg_id,
228 .get_apic_id = get_apic_id, 255 .get_apic_id = get_apic_id,
229 .set_apic_id = set_apic_id, 256 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3f0a3edf0a57..cd759ad90690 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void)
248 * Start hpet with the boot cpu mask and make it 248 * Start hpet with the boot cpu mask and make it
249 * global after the IO_APIC has been initialized. 249 * global after the IO_APIC has been initialized.
250 */ 250 */
251 hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 251 hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
252 clockevents_register_device(&hpet_clockevent); 252 clockevents_register_device(&hpet_clockevent);
253 global_clock_event = &hpet_clockevent; 253 global_clock_event = &hpet_clockevent;
254 printk(KERN_DEBUG "hpet clockevent registered\n"); 254 printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
303 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); 303 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
304 hpet_setup_msi_irq(hdev->irq); 304 hpet_setup_msi_irq(hdev->irq);
305 disable_irq(hdev->irq); 305 disable_irq(hdev->irq);
306 irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); 306 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
307 enable_irq(hdev->irq); 307 enable_irq(hdev->irq);
308 } 308 }
309 break; 309 break;
@@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
451 return -1; 451 return -1;
452 452
453 disable_irq(dev->irq); 453 disable_irq(dev->irq);
454 irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); 454 irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
455 enable_irq(dev->irq); 455 enable_irq(dev->irq);
456 456
457 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", 457 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
502 /* 5 usec minimum reprogramming delta. */ 502 /* 5 usec minimum reprogramming delta. */
503 evt->min_delta_ns = 5000; 503 evt->min_delta_ns = 5000;
504 504
505 evt->cpumask = cpumask_of_cpu(hdev->cpu); 505 evt->cpumask = cpumask_of(hdev->cpu);
506 clockevents_register_device(evt); 506 clockevents_register_device(evt);
507} 507}
508 508
@@ -813,7 +813,7 @@ int __init hpet_enable(void)
813 813
814out_nohpet: 814out_nohpet:
815 hpet_clear_mapping(); 815 hpet_clear_mapping();
816 boot_hpet_disable = 1; 816 hpet_address = 0;
817 return 0; 817 return 0;
818} 818}
819 819
@@ -836,10 +836,11 @@ static __init int hpet_late_init(void)
836 836
837 hpet_address = force_hpet_address; 837 hpet_address = force_hpet_address;
838 hpet_enable(); 838 hpet_enable();
839 if (!hpet_virt_address)
840 return -ENODEV;
841 } 839 }
842 840
841 if (!hpet_virt_address)
842 return -ENODEV;
843
843 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 844 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
844 845
845 for_each_online_cpu(cpu) { 846 for_each_online_cpu(cpu) {
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3ece1f2..10f92fb532f3 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
114 * Start pit with the boot cpu mask and make it global after the 114 * Start pit with the boot cpu mask and make it global after the
115 * IO_APIC has been initialized. 115 * IO_APIC has been initialized.
116 */ 116 */
117 pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 117 pit_clockevent.cpumask = cpumask_of(smp_processor_id());
118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
119 pit_clockevent.shift); 119 pit_clockevent.shift);
120 pit_clockevent.max_delta_ns = 120 pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index d39918076bb4..df3bf269beab 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -10,7 +10,6 @@
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11#include <asm/desc.h> 11#include <asm/desc.h>
12 12
13static struct fs_struct init_fs = INIT_FS;
14static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 13static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
16struct mm_struct init_mm = INIT_MM(init_mm); 15struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index b8c8a8e99341..69911722b9d3 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,94 +108,277 @@ static int __init parse_noapic(char *str)
108early_param("noapic", parse_noapic); 108early_param("noapic", parse_noapic);
109 109
110struct irq_pin_list; 110struct irq_pin_list;
111
112/*
113 * This is performance-critical, we want to do it O(1)
114 *
115 * the indexing order of this array favors 1:1 mappings
116 * between pins and IRQs.
117 */
118
119struct irq_pin_list {
120 int apic, pin;
121 struct irq_pin_list *next;
122};
123
124static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
125{
126 struct irq_pin_list *pin;
127 int node;
128
129 node = cpu_to_node(cpu);
130
131 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
132 printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node);
133
134 return pin;
135}
136
111struct irq_cfg { 137struct irq_cfg {
112 unsigned int irq;
113 struct irq_pin_list *irq_2_pin; 138 struct irq_pin_list *irq_2_pin;
114 cpumask_t domain; 139 cpumask_var_t domain;
115 cpumask_t old_domain; 140 cpumask_var_t old_domain;
116 unsigned move_cleanup_count; 141 unsigned move_cleanup_count;
117 u8 vector; 142 u8 vector;
118 u8 move_in_progress : 1; 143 u8 move_in_progress : 1;
144#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
145 u8 move_desc_pending : 1;
146#endif
119}; 147};
120 148
121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 149/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
150#ifdef CONFIG_SPARSE_IRQ
151static struct irq_cfg irq_cfgx[] = {
152#else
122static struct irq_cfg irq_cfgx[NR_IRQS] = { 153static struct irq_cfg irq_cfgx[NR_IRQS] = {
123 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 154#endif
124 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 155 [0] = { .vector = IRQ0_VECTOR, },
125 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 156 [1] = { .vector = IRQ1_VECTOR, },
126 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, 157 [2] = { .vector = IRQ2_VECTOR, },
127 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, 158 [3] = { .vector = IRQ3_VECTOR, },
128 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, 159 [4] = { .vector = IRQ4_VECTOR, },
129 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, 160 [5] = { .vector = IRQ5_VECTOR, },
130 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, 161 [6] = { .vector = IRQ6_VECTOR, },
131 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, 162 [7] = { .vector = IRQ7_VECTOR, },
132 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, 163 [8] = { .vector = IRQ8_VECTOR, },
133 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, 164 [9] = { .vector = IRQ9_VECTOR, },
134 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, 165 [10] = { .vector = IRQ10_VECTOR, },
135 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, 166 [11] = { .vector = IRQ11_VECTOR, },
136 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, 167 [12] = { .vector = IRQ12_VECTOR, },
137 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, 168 [13] = { .vector = IRQ13_VECTOR, },
138 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, 169 [14] = { .vector = IRQ14_VECTOR, },
170 [15] = { .vector = IRQ15_VECTOR, },
139}; 171};
140 172
141#define for_each_irq_cfg(irq, cfg) \ 173int __init arch_early_irq_init(void)
142 for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) 174{
175 struct irq_cfg *cfg;
176 struct irq_desc *desc;
177 int count;
178 int i;
179
180 cfg = irq_cfgx;
181 count = ARRAY_SIZE(irq_cfgx);
182
183 for (i = 0; i < count; i++) {
184 desc = irq_to_desc(i);
185 desc->chip_data = &cfg[i];
186 alloc_bootmem_cpumask_var(&cfg[i].domain);
187 alloc_bootmem_cpumask_var(&cfg[i].old_domain);
188 if (i < NR_IRQS_LEGACY)
189 cpumask_setall(cfg[i].domain);
190 }
191
192 return 0;
193}
143 194
195#ifdef CONFIG_SPARSE_IRQ
144static struct irq_cfg *irq_cfg(unsigned int irq) 196static struct irq_cfg *irq_cfg(unsigned int irq)
145{ 197{
146 return irq < nr_irqs ? irq_cfgx + irq : NULL; 198 struct irq_cfg *cfg = NULL;
199 struct irq_desc *desc;
200
201 desc = irq_to_desc(irq);
202 if (desc)
203 cfg = desc->chip_data;
204
205 return cfg;
147} 206}
148 207
149static struct irq_cfg *irq_cfg_alloc(unsigned int irq) 208static struct irq_cfg *get_one_free_irq_cfg(int cpu)
150{ 209{
151 return irq_cfg(irq); 210 struct irq_cfg *cfg;
211 int node;
212
213 node = cpu_to_node(cpu);
214
215 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
216 if (cfg) {
217 /* FIXME: needs alloc_cpumask_var_node() */
218 if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
219 kfree(cfg);
220 cfg = NULL;
221 } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
222 free_cpumask_var(cfg->domain);
223 kfree(cfg);
224 cfg = NULL;
225 } else {
226 cpumask_clear(cfg->domain);
227 cpumask_clear(cfg->old_domain);
228 }
229 }
230 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
231
232 return cfg;
152} 233}
153 234
154/* 235int arch_init_chip_data(struct irq_desc *desc, int cpu)
155 * Rough estimation of how many shared IRQs there are, can be changed 236{
156 * anytime. 237 struct irq_cfg *cfg;
157 */
158#define MAX_PLUS_SHARED_IRQS NR_IRQS
159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
160 238
161/* 239 cfg = desc->chip_data;
162 * This is performance-critical, we want to do it O(1) 240 if (!cfg) {
163 * 241 desc->chip_data = get_one_free_irq_cfg(cpu);
164 * the indexing order of this array favors 1:1 mappings 242 if (!desc->chip_data) {
165 * between pins and IRQs. 243 printk(KERN_ERR "can not alloc irq_cfg\n");
166 */ 244 BUG_ON(1);
245 }
246 }
167 247
168struct irq_pin_list { 248 return 0;
169 int apic, pin; 249}
170 struct irq_pin_list *next;
171};
172 250
173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; 251#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
174static struct irq_pin_list *irq_2_pin_ptr;
175 252
176static void __init irq_2_pin_init(void) 253static void
254init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
177{ 255{
178 struct irq_pin_list *pin = irq_2_pin_head; 256 struct irq_pin_list *old_entry, *head, *tail, *entry;
179 int i; 257
258 cfg->irq_2_pin = NULL;
259 old_entry = old_cfg->irq_2_pin;
260 if (!old_entry)
261 return;
262
263 entry = get_one_free_irq_2_pin(cpu);
264 if (!entry)
265 return;
180 266
181 for (i = 1; i < PIN_MAP_SIZE; i++) 267 entry->apic = old_entry->apic;
182 pin[i-1].next = &pin[i]; 268 entry->pin = old_entry->pin;
269 head = entry;
270 tail = entry;
271 old_entry = old_entry->next;
272 while (old_entry) {
273 entry = get_one_free_irq_2_pin(cpu);
274 if (!entry) {
275 entry = head;
276 while (entry) {
277 head = entry->next;
278 kfree(entry);
279 entry = head;
280 }
281 /* still use the old one */
282 return;
283 }
284 entry->apic = old_entry->apic;
285 entry->pin = old_entry->pin;
286 tail->next = entry;
287 tail = entry;
288 old_entry = old_entry->next;
289 }
183 290
184 irq_2_pin_ptr = &pin[0]; 291 tail->next = NULL;
292 cfg->irq_2_pin = head;
185} 293}
186 294
187static struct irq_pin_list *get_one_free_irq_2_pin(void) 295static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
188{ 296{
189 struct irq_pin_list *pin = irq_2_pin_ptr; 297 struct irq_pin_list *entry, *next;
190 298
191 if (!pin) 299 if (old_cfg->irq_2_pin == cfg->irq_2_pin)
192 panic("can not get more irq_2_pin\n"); 300 return;
193 301
194 irq_2_pin_ptr = pin->next; 302 entry = old_cfg->irq_2_pin;
195 pin->next = NULL; 303
196 return pin; 304 while (entry) {
305 next = entry->next;
306 kfree(entry);
307 entry = next;
308 }
309 old_cfg->irq_2_pin = NULL;
310}
311
312void arch_init_copy_chip_data(struct irq_desc *old_desc,
313 struct irq_desc *desc, int cpu)
314{
315 struct irq_cfg *cfg;
316 struct irq_cfg *old_cfg;
317
318 cfg = get_one_free_irq_cfg(cpu);
319
320 if (!cfg)
321 return;
322
323 desc->chip_data = cfg;
324
325 old_cfg = old_desc->chip_data;
326
327 memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
328
329 init_copy_irq_2_pin(old_cfg, cfg, cpu);
197} 330}
198 331
332static void free_irq_cfg(struct irq_cfg *old_cfg)
333{
334 kfree(old_cfg);
335}
336
337void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
338{
339 struct irq_cfg *old_cfg, *cfg;
340
341 old_cfg = old_desc->chip_data;
342 cfg = desc->chip_data;
343
344 if (old_cfg == cfg)
345 return;
346
347 if (old_cfg) {
348 free_irq_2_pin(old_cfg, cfg);
349 free_irq_cfg(old_cfg);
350 old_desc->chip_data = NULL;
351 }
352}
353
354static void
355set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
356{
357 struct irq_cfg *cfg = desc->chip_data;
358
359 if (!cfg->move_in_progress) {
360 /* it means that domain is not changed */
361 if (!cpumask_intersects(&desc->affinity, mask))
362 cfg->move_desc_pending = 1;
363 }
364}
365#endif
366
367#else
368static struct irq_cfg *irq_cfg(unsigned int irq)
369{
370 return irq < nr_irqs ? irq_cfgx + irq : NULL;
371}
372
373#endif
374
375#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
376static inline void
377set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
378{
379}
380#endif
381
199struct io_apic { 382struct io_apic {
200 unsigned int index; 383 unsigned int index;
201 unsigned int unused[3]; 384 unsigned int unused[3];
@@ -237,11 +420,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
237 writel(value, &io_apic->data); 420 writel(value, &io_apic->data);
238} 421}
239 422
240static bool io_apic_level_ack_pending(unsigned int irq) 423static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
241{ 424{
242 struct irq_pin_list *entry; 425 struct irq_pin_list *entry;
243 unsigned long flags; 426 unsigned long flags;
244 struct irq_cfg *cfg = irq_cfg(irq);
245 427
246 spin_lock_irqsave(&ioapic_lock, flags); 428 spin_lock_irqsave(&ioapic_lock, flags);
247 entry = cfg->irq_2_pin; 429 entry = cfg->irq_2_pin;
@@ -323,13 +505,32 @@ static void ioapic_mask_entry(int apic, int pin)
323} 505}
324 506
325#ifdef CONFIG_SMP 507#ifdef CONFIG_SMP
326static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) 508static void send_cleanup_vector(struct irq_cfg *cfg)
509{
510 cpumask_var_t cleanup_mask;
511
512 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
513 unsigned int i;
514 cfg->move_cleanup_count = 0;
515 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
516 cfg->move_cleanup_count++;
517 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
518 send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
519 } else {
520 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
521 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
522 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
523 free_cpumask_var(cleanup_mask);
524 }
525 cfg->move_in_progress = 0;
526}
527
528static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
327{ 529{
328 int apic, pin; 530 int apic, pin;
329 struct irq_cfg *cfg;
330 struct irq_pin_list *entry; 531 struct irq_pin_list *entry;
532 u8 vector = cfg->vector;
331 533
332 cfg = irq_cfg(irq);
333 entry = cfg->irq_2_pin; 534 entry = cfg->irq_2_pin;
334 for (;;) { 535 for (;;) {
335 unsigned int reg; 536 unsigned int reg;
@@ -359,36 +560,61 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
359 } 560 }
360} 561}
361 562
362static int assign_irq_vector(int irq, cpumask_t mask); 563static int
564assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
363 565
364static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 566/*
567 * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
568 * of that, or returns BAD_APICID and leaves desc->affinity untouched.
569 */
570static unsigned int
571set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
572{
573 struct irq_cfg *cfg;
574 unsigned int irq;
575
576 if (!cpumask_intersects(mask, cpu_online_mask))
577 return BAD_APICID;
578
579 irq = desc->irq;
580 cfg = desc->chip_data;
581 if (assign_irq_vector(irq, cfg, mask))
582 return BAD_APICID;
583
584 cpumask_and(&desc->affinity, cfg->domain, mask);
585 set_extra_move_desc(desc, mask);
586 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
587}
588
589static void
590set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
365{ 591{
366 struct irq_cfg *cfg; 592 struct irq_cfg *cfg;
367 unsigned long flags; 593 unsigned long flags;
368 unsigned int dest; 594 unsigned int dest;
369 cpumask_t tmp; 595 unsigned int irq;
370 struct irq_desc *desc;
371 596
372 cpus_and(tmp, mask, cpu_online_map); 597 irq = desc->irq;
373 if (cpus_empty(tmp)) 598 cfg = desc->chip_data;
374 return;
375 599
376 cfg = irq_cfg(irq); 600 spin_lock_irqsave(&ioapic_lock, flags);
377 if (assign_irq_vector(irq, mask)) 601 dest = set_desc_affinity(desc, mask);
378 return; 602 if (dest != BAD_APICID) {
603 /* Only the high 8 bits are valid. */
604 dest = SET_APIC_LOGICAL_ID(dest);
605 __target_IO_APIC_irq(irq, dest, cfg);
606 }
607 spin_unlock_irqrestore(&ioapic_lock, flags);
608}
379 609
380 cpus_and(tmp, cfg->domain, mask); 610static void
381 dest = cpu_mask_to_apicid(tmp); 611set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
382 /* 612{
383 * Only the high 8 bits are valid. 613 struct irq_desc *desc;
384 */
385 dest = SET_APIC_LOGICAL_ID(dest);
386 614
387 desc = irq_to_desc(irq); 615 desc = irq_to_desc(irq);
388 spin_lock_irqsave(&ioapic_lock, flags); 616
389 __target_IO_APIC_irq(irq, dest, cfg->vector); 617 set_ioapic_affinity_irq_desc(desc, mask);
390 desc->affinity = mask;
391 spin_unlock_irqrestore(&ioapic_lock, flags);
392} 618}
393#endif /* CONFIG_SMP */ 619#endif /* CONFIG_SMP */
394 620
@@ -397,16 +623,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
397 * shared ISA-space IRQs, so we have to support them. We are super 623 * shared ISA-space IRQs, so we have to support them. We are super
398 * fast in the common case, and fast for shared ISA-space IRQs. 624 * fast in the common case, and fast for shared ISA-space IRQs.
399 */ 625 */
400static void add_pin_to_irq(unsigned int irq, int apic, int pin) 626static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
401{ 627{
402 struct irq_cfg *cfg;
403 struct irq_pin_list *entry; 628 struct irq_pin_list *entry;
404 629
405 /* first time to refer irq_cfg, so with new */
406 cfg = irq_cfg_alloc(irq);
407 entry = cfg->irq_2_pin; 630 entry = cfg->irq_2_pin;
408 if (!entry) { 631 if (!entry) {
409 entry = get_one_free_irq_2_pin(); 632 entry = get_one_free_irq_2_pin(cpu);
633 if (!entry) {
634 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
635 apic, pin);
636 return;
637 }
410 cfg->irq_2_pin = entry; 638 cfg->irq_2_pin = entry;
411 entry->apic = apic; 639 entry->apic = apic;
412 entry->pin = pin; 640 entry->pin = pin;
@@ -421,7 +649,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
421 entry = entry->next; 649 entry = entry->next;
422 } 650 }
423 651
424 entry->next = get_one_free_irq_2_pin(); 652 entry->next = get_one_free_irq_2_pin(cpu);
425 entry = entry->next; 653 entry = entry->next;
426 entry->apic = apic; 654 entry->apic = apic;
427 entry->pin = pin; 655 entry->pin = pin;
@@ -430,11 +658,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
430/* 658/*
431 * Reroute an IRQ to a different pin. 659 * Reroute an IRQ to a different pin.
432 */ 660 */
433static void __init replace_pin_at_irq(unsigned int irq, 661static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
434 int oldapic, int oldpin, 662 int oldapic, int oldpin,
435 int newapic, int newpin) 663 int newapic, int newpin)
436{ 664{
437 struct irq_cfg *cfg = irq_cfg(irq);
438 struct irq_pin_list *entry = cfg->irq_2_pin; 665 struct irq_pin_list *entry = cfg->irq_2_pin;
439 int replaced = 0; 666 int replaced = 0;
440 667
@@ -451,18 +678,16 @@ static void __init replace_pin_at_irq(unsigned int irq,
451 678
452 /* why? call replace before add? */ 679 /* why? call replace before add? */
453 if (!replaced) 680 if (!replaced)
454 add_pin_to_irq(irq, newapic, newpin); 681 add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
455} 682}
456 683
457static inline void io_apic_modify_irq(unsigned int irq, 684static inline void io_apic_modify_irq(struct irq_cfg *cfg,
458 int mask_and, int mask_or, 685 int mask_and, int mask_or,
459 void (*final)(struct irq_pin_list *entry)) 686 void (*final)(struct irq_pin_list *entry))
460{ 687{
461 int pin; 688 int pin;
462 struct irq_cfg *cfg;
463 struct irq_pin_list *entry; 689 struct irq_pin_list *entry;
464 690
465 cfg = irq_cfg(irq);
466 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { 691 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
467 unsigned int reg; 692 unsigned int reg;
468 pin = entry->pin; 693 pin = entry->pin;
@@ -475,9 +700,9 @@ static inline void io_apic_modify_irq(unsigned int irq,
475 } 700 }
476} 701}
477 702
478static void __unmask_IO_APIC_irq(unsigned int irq) 703static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
479{ 704{
480 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); 705 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
481} 706}
482 707
483#ifdef CONFIG_X86_64 708#ifdef CONFIG_X86_64
@@ -492,47 +717,64 @@ static void io_apic_sync(struct irq_pin_list *entry)
492 readl(&io_apic->data); 717 readl(&io_apic->data);
493} 718}
494 719
495static void __mask_IO_APIC_irq(unsigned int irq) 720static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
496{ 721{
497 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 722 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
498} 723}
499#else /* CONFIG_X86_32 */ 724#else /* CONFIG_X86_32 */
500static void __mask_IO_APIC_irq(unsigned int irq) 725static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
501{ 726{
502 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); 727 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
503} 728}
504 729
505static void __mask_and_edge_IO_APIC_irq(unsigned int irq) 730static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
506{ 731{
507 io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, 732 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
508 IO_APIC_REDIR_MASKED, NULL); 733 IO_APIC_REDIR_MASKED, NULL);
509} 734}
510 735
511static void __unmask_and_level_IO_APIC_irq(unsigned int irq) 736static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
512{ 737{
513 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 738 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
514 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 739 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
515} 740}
516#endif /* CONFIG_X86_32 */ 741#endif /* CONFIG_X86_32 */
517 742
518static void mask_IO_APIC_irq (unsigned int irq) 743static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
519{ 744{
745 struct irq_cfg *cfg = desc->chip_data;
520 unsigned long flags; 746 unsigned long flags;
521 747
748 BUG_ON(!cfg);
749
522 spin_lock_irqsave(&ioapic_lock, flags); 750 spin_lock_irqsave(&ioapic_lock, flags);
523 __mask_IO_APIC_irq(irq); 751 __mask_IO_APIC_irq(cfg);
524 spin_unlock_irqrestore(&ioapic_lock, flags); 752 spin_unlock_irqrestore(&ioapic_lock, flags);
525} 753}
526 754
527static void unmask_IO_APIC_irq (unsigned int irq) 755static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
528{ 756{
757 struct irq_cfg *cfg = desc->chip_data;
529 unsigned long flags; 758 unsigned long flags;
530 759
531 spin_lock_irqsave(&ioapic_lock, flags); 760 spin_lock_irqsave(&ioapic_lock, flags);
532 __unmask_IO_APIC_irq(irq); 761 __unmask_IO_APIC_irq(cfg);
533 spin_unlock_irqrestore(&ioapic_lock, flags); 762 spin_unlock_irqrestore(&ioapic_lock, flags);
534} 763}
535 764
765static void mask_IO_APIC_irq(unsigned int irq)
766{
767 struct irq_desc *desc = irq_to_desc(irq);
768
769 mask_IO_APIC_irq_desc(desc);
770}
771static void unmask_IO_APIC_irq(unsigned int irq)
772{
773 struct irq_desc *desc = irq_to_desc(irq);
774
775 unmask_IO_APIC_irq_desc(desc);
776}
777
536static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 778static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
537{ 779{
538 struct IO_APIC_route_entry entry; 780 struct IO_APIC_route_entry entry;
@@ -809,7 +1051,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
809 */ 1051 */
810static int EISA_ELCR(unsigned int irq) 1052static int EISA_ELCR(unsigned int irq)
811{ 1053{
812 if (irq < 16) { 1054 if (irq < NR_IRQS_LEGACY) {
813 unsigned int port = 0x4d0 + (irq >> 3); 1055 unsigned int port = 0x4d0 + (irq >> 3);
814 return (inb(port) >> (irq & 7)) & 1; 1056 return (inb(port) >> (irq & 7)) & 1;
815 } 1057 }
@@ -1034,7 +1276,8 @@ void unlock_vector_lock(void)
1034 spin_unlock(&vector_lock); 1276 spin_unlock(&vector_lock);
1035} 1277}
1036 1278
1037static int __assign_irq_vector(int irq, cpumask_t mask) 1279static int
1280__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1038{ 1281{
1039 /* 1282 /*
1040 * NOTE! The local APIC isn't very good at handling 1283 * NOTE! The local APIC isn't very good at handling
@@ -1049,52 +1292,49 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
1049 */ 1292 */
1050 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1293 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1051 unsigned int old_vector; 1294 unsigned int old_vector;
1052 int cpu; 1295 int cpu, err;
1053 struct irq_cfg *cfg; 1296 cpumask_var_t tmp_mask;
1054
1055 cfg = irq_cfg(irq);
1056
1057 /* Only try and allocate irqs on cpus that are present */
1058 cpus_and(mask, mask, cpu_online_map);
1059 1297
1060 if ((cfg->move_in_progress) || cfg->move_cleanup_count) 1298 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1061 return -EBUSY; 1299 return -EBUSY;
1062 1300
1301 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
1302 return -ENOMEM;
1303
1063 old_vector = cfg->vector; 1304 old_vector = cfg->vector;
1064 if (old_vector) { 1305 if (old_vector) {
1065 cpumask_t tmp; 1306 cpumask_and(tmp_mask, mask, cpu_online_mask);
1066 cpus_and(tmp, cfg->domain, mask); 1307 cpumask_and(tmp_mask, cfg->domain, tmp_mask);
1067 if (!cpus_empty(tmp)) 1308 if (!cpumask_empty(tmp_mask)) {
1309 free_cpumask_var(tmp_mask);
1068 return 0; 1310 return 0;
1311 }
1069 } 1312 }
1070 1313
1071 for_each_cpu_mask_nr(cpu, mask) { 1314 /* Only try and allocate irqs on cpus that are present */
1072 cpumask_t domain, new_mask; 1315 err = -ENOSPC;
1316 for_each_cpu_and(cpu, mask, cpu_online_mask) {
1073 int new_cpu; 1317 int new_cpu;
1074 int vector, offset; 1318 int vector, offset;
1075 1319
1076 domain = vector_allocation_domain(cpu); 1320 vector_allocation_domain(cpu, tmp_mask);
1077 cpus_and(new_mask, domain, cpu_online_map);
1078 1321
1079 vector = current_vector; 1322 vector = current_vector;
1080 offset = current_offset; 1323 offset = current_offset;
1081next: 1324next:
1082 vector += 8; 1325 vector += 8;
1083 if (vector >= first_system_vector) { 1326 if (vector >= first_system_vector) {
1084 /* If we run out of vectors on large boxen, must share them. */ 1327 /* If out of vectors on large boxen, must share them. */
1085 offset = (offset + 1) % 8; 1328 offset = (offset + 1) % 8;
1086 vector = FIRST_DEVICE_VECTOR + offset; 1329 vector = FIRST_DEVICE_VECTOR + offset;
1087 } 1330 }
1088 if (unlikely(current_vector == vector)) 1331 if (unlikely(current_vector == vector))
1089 continue; 1332 continue;
1090#ifdef CONFIG_X86_64 1333
1091 if (vector == IA32_SYSCALL_VECTOR) 1334 if (test_bit(vector, used_vectors))
1092 goto next;
1093#else
1094 if (vector == SYSCALL_VECTOR)
1095 goto next; 1335 goto next;
1096#endif 1336
1097 for_each_cpu_mask_nr(new_cpu, new_mask) 1337 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1098 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1338 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1099 goto next; 1339 goto next;
1100 /* Found one! */ 1340 /* Found one! */
@@ -1102,49 +1342,47 @@ next:
1102 current_offset = offset; 1342 current_offset = offset;
1103 if (old_vector) { 1343 if (old_vector) {
1104 cfg->move_in_progress = 1; 1344 cfg->move_in_progress = 1;
1105 cfg->old_domain = cfg->domain; 1345 cpumask_copy(cfg->old_domain, cfg->domain);
1106 } 1346 }
1107 for_each_cpu_mask_nr(new_cpu, new_mask) 1347 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1108 per_cpu(vector_irq, new_cpu)[vector] = irq; 1348 per_cpu(vector_irq, new_cpu)[vector] = irq;
1109 cfg->vector = vector; 1349 cfg->vector = vector;
1110 cfg->domain = domain; 1350 cpumask_copy(cfg->domain, tmp_mask);
1111 return 0; 1351 err = 0;
1352 break;
1112 } 1353 }
1113 return -ENOSPC; 1354 free_cpumask_var(tmp_mask);
1355 return err;
1114} 1356}
1115 1357
1116static int assign_irq_vector(int irq, cpumask_t mask) 1358static int
1359assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1117{ 1360{
1118 int err; 1361 int err;
1119 unsigned long flags; 1362 unsigned long flags;
1120 1363
1121 spin_lock_irqsave(&vector_lock, flags); 1364 spin_lock_irqsave(&vector_lock, flags);
1122 err = __assign_irq_vector(irq, mask); 1365 err = __assign_irq_vector(irq, cfg, mask);
1123 spin_unlock_irqrestore(&vector_lock, flags); 1366 spin_unlock_irqrestore(&vector_lock, flags);
1124 return err; 1367 return err;
1125} 1368}
1126 1369
1127static void __clear_irq_vector(int irq) 1370static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1128{ 1371{
1129 struct irq_cfg *cfg;
1130 cpumask_t mask;
1131 int cpu, vector; 1372 int cpu, vector;
1132 1373
1133 cfg = irq_cfg(irq);
1134 BUG_ON(!cfg->vector); 1374 BUG_ON(!cfg->vector);
1135 1375
1136 vector = cfg->vector; 1376 vector = cfg->vector;
1137 cpus_and(mask, cfg->domain, cpu_online_map); 1377 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1138 for_each_cpu_mask_nr(cpu, mask)
1139 per_cpu(vector_irq, cpu)[vector] = -1; 1378 per_cpu(vector_irq, cpu)[vector] = -1;
1140 1379
1141 cfg->vector = 0; 1380 cfg->vector = 0;
1142 cpus_clear(cfg->domain); 1381 cpumask_clear(cfg->domain);
1143 1382
1144 if (likely(!cfg->move_in_progress)) 1383 if (likely(!cfg->move_in_progress))
1145 return; 1384 return;
1146 cpus_and(mask, cfg->old_domain, cpu_online_map); 1385 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1147 for_each_cpu_mask_nr(cpu, mask) {
1148 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1386 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1149 vector++) { 1387 vector++) {
1150 if (per_cpu(vector_irq, cpu)[vector] != irq) 1388 if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1162,10 +1400,12 @@ void __setup_vector_irq(int cpu)
1162 /* This function must be called with vector_lock held */ 1400 /* This function must be called with vector_lock held */
1163 int irq, vector; 1401 int irq, vector;
1164 struct irq_cfg *cfg; 1402 struct irq_cfg *cfg;
1403 struct irq_desc *desc;
1165 1404
1166 /* Mark the inuse vectors */ 1405 /* Mark the inuse vectors */
1167 for_each_irq_cfg(irq, cfg) { 1406 for_each_irq_desc(irq, desc) {
1168 if (!cpu_isset(cpu, cfg->domain)) 1407 cfg = desc->chip_data;
1408 if (!cpumask_test_cpu(cpu, cfg->domain))
1169 continue; 1409 continue;
1170 vector = cfg->vector; 1410 vector = cfg->vector;
1171 per_cpu(vector_irq, cpu)[vector] = irq; 1411 per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1177,7 +1417,7 @@ void __setup_vector_irq(int cpu)
1177 continue; 1417 continue;
1178 1418
1179 cfg = irq_cfg(irq); 1419 cfg = irq_cfg(irq);
1180 if (!cpu_isset(cpu, cfg->domain)) 1420 if (!cpumask_test_cpu(cpu, cfg->domain))
1181 per_cpu(vector_irq, cpu)[vector] = -1; 1421 per_cpu(vector_irq, cpu)[vector] = -1;
1182 } 1422 }
1183} 1423}
@@ -1215,11 +1455,8 @@ static inline int IO_APIC_irq_trigger(int irq)
1215} 1455}
1216#endif 1456#endif
1217 1457
1218static void ioapic_register_intr(int irq, unsigned long trigger) 1458static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
1219{ 1459{
1220 struct irq_desc *desc;
1221
1222 desc = irq_to_desc(irq);
1223 1460
1224 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1461 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1225 trigger == IOAPIC_LEVEL) 1462 trigger == IOAPIC_LEVEL)
@@ -1311,23 +1548,22 @@ static int setup_ioapic_entry(int apic, int irq,
1311 return 0; 1548 return 0;
1312} 1549}
1313 1550
1314static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1551static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
1315 int trigger, int polarity) 1552 int trigger, int polarity)
1316{ 1553{
1317 struct irq_cfg *cfg; 1554 struct irq_cfg *cfg;
1318 struct IO_APIC_route_entry entry; 1555 struct IO_APIC_route_entry entry;
1319 cpumask_t mask; 1556 unsigned int dest;
1320 1557
1321 if (!IO_APIC_IRQ(irq)) 1558 if (!IO_APIC_IRQ(irq))
1322 return; 1559 return;
1323 1560
1324 cfg = irq_cfg(irq); 1561 cfg = desc->chip_data;
1325 1562
1326 mask = TARGET_CPUS; 1563 if (assign_irq_vector(irq, cfg, TARGET_CPUS))
1327 if (assign_irq_vector(irq, mask))
1328 return; 1564 return;
1329 1565
1330 cpus_and(mask, cfg->domain, mask); 1566 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
1331 1567
1332 apic_printk(APIC_VERBOSE,KERN_DEBUG 1568 apic_printk(APIC_VERBOSE,KERN_DEBUG
1333 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1569 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1337,16 +1573,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1337 1573
1338 1574
1339 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1575 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
1340 cpu_mask_to_apicid(mask), trigger, polarity, 1576 dest, trigger, polarity, cfg->vector)) {
1341 cfg->vector)) {
1342 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1577 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1343 mp_ioapics[apic].mp_apicid, pin); 1578 mp_ioapics[apic].mp_apicid, pin);
1344 __clear_irq_vector(irq); 1579 __clear_irq_vector(irq, cfg);
1345 return; 1580 return;
1346 } 1581 }
1347 1582
1348 ioapic_register_intr(irq, trigger); 1583 ioapic_register_intr(irq, desc, trigger);
1349 if (irq < 16) 1584 if (irq < NR_IRQS_LEGACY)
1350 disable_8259A_irq(irq); 1585 disable_8259A_irq(irq);
1351 1586
1352 ioapic_write_entry(apic, pin, entry); 1587 ioapic_write_entry(apic, pin, entry);
@@ -1356,6 +1591,9 @@ static void __init setup_IO_APIC_irqs(void)
1356{ 1591{
1357 int apic, pin, idx, irq; 1592 int apic, pin, idx, irq;
1358 int notcon = 0; 1593 int notcon = 0;
1594 struct irq_desc *desc;
1595 struct irq_cfg *cfg;
1596 int cpu = boot_cpu_id;
1359 1597
1360 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1598 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1361 1599
@@ -1387,9 +1625,15 @@ static void __init setup_IO_APIC_irqs(void)
1387 if (multi_timer_check(apic, irq)) 1625 if (multi_timer_check(apic, irq))
1388 continue; 1626 continue;
1389#endif 1627#endif
1390 add_pin_to_irq(irq, apic, pin); 1628 desc = irq_to_desc_alloc_cpu(irq, cpu);
1629 if (!desc) {
1630 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1631 continue;
1632 }
1633 cfg = desc->chip_data;
1634 add_pin_to_irq_cpu(cfg, cpu, apic, pin);
1391 1635
1392 setup_IO_APIC_irq(apic, pin, irq, 1636 setup_IO_APIC_irq(apic, pin, irq, desc,
1393 irq_trigger(idx), irq_polarity(idx)); 1637 irq_trigger(idx), irq_polarity(idx));
1394 } 1638 }
1395 } 1639 }
@@ -1448,6 +1692,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1448 union IO_APIC_reg_03 reg_03; 1692 union IO_APIC_reg_03 reg_03;
1449 unsigned long flags; 1693 unsigned long flags;
1450 struct irq_cfg *cfg; 1694 struct irq_cfg *cfg;
1695 struct irq_desc *desc;
1451 unsigned int irq; 1696 unsigned int irq;
1452 1697
1453 if (apic_verbosity == APIC_QUIET) 1698 if (apic_verbosity == APIC_QUIET)
@@ -1537,8 +1782,11 @@ __apicdebuginit(void) print_IO_APIC(void)
1537 } 1782 }
1538 } 1783 }
1539 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1784 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1540 for_each_irq_cfg(irq, cfg) { 1785 for_each_irq_desc(irq, desc) {
1541 struct irq_pin_list *entry = cfg->irq_2_pin; 1786 struct irq_pin_list *entry;
1787
1788 cfg = desc->chip_data;
1789 entry = cfg->irq_2_pin;
1542 if (!entry) 1790 if (!entry)
1543 continue; 1791 continue;
1544 printk(KERN_DEBUG "IRQ%d ", irq); 1792 printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2022,14 +2270,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2022{ 2270{
2023 int was_pending = 0; 2271 int was_pending = 0;
2024 unsigned long flags; 2272 unsigned long flags;
2273 struct irq_cfg *cfg;
2025 2274
2026 spin_lock_irqsave(&ioapic_lock, flags); 2275 spin_lock_irqsave(&ioapic_lock, flags);
2027 if (irq < 16) { 2276 if (irq < NR_IRQS_LEGACY) {
2028 disable_8259A_irq(irq); 2277 disable_8259A_irq(irq);
2029 if (i8259A_irq_pending(irq)) 2278 if (i8259A_irq_pending(irq))
2030 was_pending = 1; 2279 was_pending = 1;
2031 } 2280 }
2032 __unmask_IO_APIC_irq(irq); 2281 cfg = irq_cfg(irq);
2282 __unmask_IO_APIC_irq(cfg);
2033 spin_unlock_irqrestore(&ioapic_lock, flags); 2283 spin_unlock_irqrestore(&ioapic_lock, flags);
2034 2284
2035 return was_pending; 2285 return was_pending;
@@ -2043,7 +2293,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2043 unsigned long flags; 2293 unsigned long flags;
2044 2294
2045 spin_lock_irqsave(&vector_lock, flags); 2295 spin_lock_irqsave(&vector_lock, flags);
2046 send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); 2296 send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2047 spin_unlock_irqrestore(&vector_lock, flags); 2297 spin_unlock_irqrestore(&vector_lock, flags);
2048 2298
2049 return 1; 2299 return 1;
@@ -2092,35 +2342,35 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2092 * as simple as edge triggered migration and we can do the irq migration 2342 * as simple as edge triggered migration and we can do the irq migration
2093 * with a simple atomic update to IO-APIC RTE. 2343 * with a simple atomic update to IO-APIC RTE.
2094 */ 2344 */
2095static void migrate_ioapic_irq(int irq, cpumask_t mask) 2345static void
2346migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2096{ 2347{
2097 struct irq_cfg *cfg; 2348 struct irq_cfg *cfg;
2098 struct irq_desc *desc;
2099 cpumask_t tmp, cleanup_mask;
2100 struct irte irte; 2349 struct irte irte;
2101 int modify_ioapic_rte; 2350 int modify_ioapic_rte;
2102 unsigned int dest; 2351 unsigned int dest;
2103 unsigned long flags; 2352 unsigned long flags;
2353 unsigned int irq;
2104 2354
2105 cpus_and(tmp, mask, cpu_online_map); 2355 if (!cpumask_intersects(mask, cpu_online_mask))
2106 if (cpus_empty(tmp))
2107 return; 2356 return;
2108 2357
2358 irq = desc->irq;
2109 if (get_irte(irq, &irte)) 2359 if (get_irte(irq, &irte))
2110 return; 2360 return;
2111 2361
2112 if (assign_irq_vector(irq, mask)) 2362 cfg = desc->chip_data;
2363 if (assign_irq_vector(irq, cfg, mask))
2113 return; 2364 return;
2114 2365
2115 cfg = irq_cfg(irq); 2366 set_extra_move_desc(desc, mask);
2116 cpus_and(tmp, cfg->domain, mask); 2367
2117 dest = cpu_mask_to_apicid(tmp); 2368 dest = cpu_mask_to_apicid_and(cfg->domain, mask);
2118 2369
2119 desc = irq_to_desc(irq);
2120 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2370 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2121 if (modify_ioapic_rte) { 2371 if (modify_ioapic_rte) {
2122 spin_lock_irqsave(&ioapic_lock, flags); 2372 spin_lock_irqsave(&ioapic_lock, flags);
2123 __target_IO_APIC_irq(irq, dest, cfg->vector); 2373 __target_IO_APIC_irq(irq, dest, cfg);
2124 spin_unlock_irqrestore(&ioapic_lock, flags); 2374 spin_unlock_irqrestore(&ioapic_lock, flags);
2125 } 2375 }
2126 2376
@@ -2132,24 +2382,20 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
2132 */ 2382 */
2133 modify_irte(irq, &irte); 2383 modify_irte(irq, &irte);
2134 2384
2135 if (cfg->move_in_progress) { 2385 if (cfg->move_in_progress)
2136 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2386 send_cleanup_vector(cfg);
2137 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2138 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2139 cfg->move_in_progress = 0;
2140 }
2141 2387
2142 desc->affinity = mask; 2388 cpumask_copy(&desc->affinity, mask);
2143} 2389}
2144 2390
2145static int migrate_irq_remapped_level(int irq) 2391static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2146{ 2392{
2147 int ret = -1; 2393 int ret = -1;
2148 struct irq_desc *desc = irq_to_desc(irq); 2394 struct irq_cfg *cfg = desc->chip_data;
2149 2395
2150 mask_IO_APIC_irq(irq); 2396 mask_IO_APIC_irq_desc(desc);
2151 2397
2152 if (io_apic_level_ack_pending(irq)) { 2398 if (io_apic_level_ack_pending(cfg)) {
2153 /* 2399 /*
2154 * Interrupt in progress. Migrating irq now will change the 2400 * Interrupt in progress. Migrating irq now will change the
2155 * vector information in the IO-APIC RTE and that will confuse 2401 * vector information in the IO-APIC RTE and that will confuse
@@ -2161,14 +2407,15 @@ static int migrate_irq_remapped_level(int irq)
2161 } 2407 }
2162 2408
2163 /* everthing is clear. we have right of way */ 2409 /* everthing is clear. we have right of way */
2164 migrate_ioapic_irq(irq, desc->pending_mask); 2410 migrate_ioapic_irq_desc(desc, &desc->pending_mask);
2165 2411
2166 ret = 0; 2412 ret = 0;
2167 desc->status &= ~IRQ_MOVE_PENDING; 2413 desc->status &= ~IRQ_MOVE_PENDING;
2168 cpus_clear(desc->pending_mask); 2414 cpumask_clear(&desc->pending_mask);
2169 2415
2170unmask: 2416unmask:
2171 unmask_IO_APIC_irq(irq); 2417 unmask_IO_APIC_irq_desc(desc);
2418
2172 return ret; 2419 return ret;
2173} 2420}
2174 2421
@@ -2189,7 +2436,7 @@ static void ir_irq_migration(struct work_struct *work)
2189 continue; 2436 continue;
2190 } 2437 }
2191 2438
2192 desc->chip->set_affinity(irq, desc->pending_mask); 2439 desc->chip->set_affinity(irq, &desc->pending_mask);
2193 spin_unlock_irqrestore(&desc->lock, flags); 2440 spin_unlock_irqrestore(&desc->lock, flags);
2194 } 2441 }
2195 } 2442 }
@@ -2198,18 +2445,24 @@ static void ir_irq_migration(struct work_struct *work)
2198/* 2445/*
2199 * Migrates the IRQ destination in the process context. 2446 * Migrates the IRQ destination in the process context.
2200 */ 2447 */
2201static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 2448static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2449 const struct cpumask *mask)
2202{ 2450{
2203 struct irq_desc *desc = irq_to_desc(irq);
2204
2205 if (desc->status & IRQ_LEVEL) { 2451 if (desc->status & IRQ_LEVEL) {
2206 desc->status |= IRQ_MOVE_PENDING; 2452 desc->status |= IRQ_MOVE_PENDING;
2207 desc->pending_mask = mask; 2453 cpumask_copy(&desc->pending_mask, mask);
2208 migrate_irq_remapped_level(irq); 2454 migrate_irq_remapped_level_desc(desc);
2209 return; 2455 return;
2210 } 2456 }
2211 2457
2212 migrate_ioapic_irq(irq, mask); 2458 migrate_ioapic_irq_desc(desc, mask);
2459}
2460static void set_ir_ioapic_affinity_irq(unsigned int irq,
2461 const struct cpumask *mask)
2462{
2463 struct irq_desc *desc = irq_to_desc(irq);
2464
2465 set_ir_ioapic_affinity_irq_desc(desc, mask);
2213} 2466}
2214#endif 2467#endif
2215 2468
@@ -2228,6 +2481,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2228 struct irq_cfg *cfg; 2481 struct irq_cfg *cfg;
2229 irq = __get_cpu_var(vector_irq)[vector]; 2482 irq = __get_cpu_var(vector_irq)[vector];
2230 2483
2484 if (irq == -1)
2485 continue;
2486
2231 desc = irq_to_desc(irq); 2487 desc = irq_to_desc(irq);
2232 if (!desc) 2488 if (!desc)
2233 continue; 2489 continue;
@@ -2237,7 +2493,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2237 if (!cfg->move_cleanup_count) 2493 if (!cfg->move_cleanup_count)
2238 goto unlock; 2494 goto unlock;
2239 2495
2240 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) 2496 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2241 goto unlock; 2497 goto unlock;
2242 2498
2243 __get_cpu_var(vector_irq)[vector] = -1; 2499 __get_cpu_var(vector_irq)[vector] = -1;
@@ -2249,28 +2505,44 @@ unlock:
2249 irq_exit(); 2505 irq_exit();
2250} 2506}
2251 2507
2252static void irq_complete_move(unsigned int irq) 2508static void irq_complete_move(struct irq_desc **descp)
2253{ 2509{
2254 struct irq_cfg *cfg = irq_cfg(irq); 2510 struct irq_desc *desc = *descp;
2511 struct irq_cfg *cfg = desc->chip_data;
2255 unsigned vector, me; 2512 unsigned vector, me;
2256 2513
2257 if (likely(!cfg->move_in_progress)) 2514 if (likely(!cfg->move_in_progress)) {
2515#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2516 if (likely(!cfg->move_desc_pending))
2517 return;
2518
2519 /* domain has not changed, but affinity did */
2520 me = smp_processor_id();
2521 if (cpu_isset(me, desc->affinity)) {
2522 *descp = desc = move_irq_desc(desc, me);
2523 /* get the new one */
2524 cfg = desc->chip_data;
2525 cfg->move_desc_pending = 0;
2526 }
2527#endif
2258 return; 2528 return;
2529 }
2259 2530
2260 vector = ~get_irq_regs()->orig_ax; 2531 vector = ~get_irq_regs()->orig_ax;
2261 me = smp_processor_id(); 2532 me = smp_processor_id();
2262 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { 2533#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2263 cpumask_t cleanup_mask; 2534 *descp = desc = move_irq_desc(desc, me);
2535 /* get the new one */
2536 cfg = desc->chip_data;
2537#endif
2264 2538
2265 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2539 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2266 cfg->move_cleanup_count = cpus_weight(cleanup_mask); 2540 send_cleanup_vector(cfg);
2267 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2268 cfg->move_in_progress = 0;
2269 }
2270} 2541}
2271#else 2542#else
2272static inline void irq_complete_move(unsigned int irq) {} 2543static inline void irq_complete_move(struct irq_desc **descp) {}
2273#endif 2544#endif
2545
2274#ifdef CONFIG_INTR_REMAP 2546#ifdef CONFIG_INTR_REMAP
2275static void ack_x2apic_level(unsigned int irq) 2547static void ack_x2apic_level(unsigned int irq)
2276{ 2548{
@@ -2281,11 +2553,14 @@ static void ack_x2apic_edge(unsigned int irq)
2281{ 2553{
2282 ack_x2APIC_irq(); 2554 ack_x2APIC_irq();
2283} 2555}
2556
2284#endif 2557#endif
2285 2558
2286static void ack_apic_edge(unsigned int irq) 2559static void ack_apic_edge(unsigned int irq)
2287{ 2560{
2288 irq_complete_move(irq); 2561 struct irq_desc *desc = irq_to_desc(irq);
2562
2563 irq_complete_move(&desc);
2289 move_native_irq(irq); 2564 move_native_irq(irq);
2290 ack_APIC_irq(); 2565 ack_APIC_irq();
2291} 2566}
@@ -2294,18 +2569,21 @@ atomic_t irq_mis_count;
2294 2569
2295static void ack_apic_level(unsigned int irq) 2570static void ack_apic_level(unsigned int irq)
2296{ 2571{
2572 struct irq_desc *desc = irq_to_desc(irq);
2573
2297#ifdef CONFIG_X86_32 2574#ifdef CONFIG_X86_32
2298 unsigned long v; 2575 unsigned long v;
2299 int i; 2576 int i;
2300#endif 2577#endif
2578 struct irq_cfg *cfg;
2301 int do_unmask_irq = 0; 2579 int do_unmask_irq = 0;
2302 2580
2303 irq_complete_move(irq); 2581 irq_complete_move(&desc);
2304#ifdef CONFIG_GENERIC_PENDING_IRQ 2582#ifdef CONFIG_GENERIC_PENDING_IRQ
2305 /* If we are moving the irq we need to mask it */ 2583 /* If we are moving the irq we need to mask it */
2306 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { 2584 if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
2307 do_unmask_irq = 1; 2585 do_unmask_irq = 1;
2308 mask_IO_APIC_irq(irq); 2586 mask_IO_APIC_irq_desc(desc);
2309 } 2587 }
2310#endif 2588#endif
2311 2589
@@ -2329,7 +2607,8 @@ static void ack_apic_level(unsigned int irq)
2329 * operation to prevent an edge-triggered interrupt escaping meanwhile. 2607 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2330 * The idea is from Manfred Spraul. --macro 2608 * The idea is from Manfred Spraul. --macro
2331 */ 2609 */
2332 i = irq_cfg(irq)->vector; 2610 cfg = desc->chip_data;
2611 i = cfg->vector;
2333 2612
2334 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2613 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2335#endif 2614#endif
@@ -2368,17 +2647,18 @@ static void ack_apic_level(unsigned int irq)
2368 * accurate and is causing problems then it is a hardware bug 2647 * accurate and is causing problems then it is a hardware bug
2369 * and you can go talk to the chipset vendor about it. 2648 * and you can go talk to the chipset vendor about it.
2370 */ 2649 */
2371 if (!io_apic_level_ack_pending(irq)) 2650 cfg = desc->chip_data;
2651 if (!io_apic_level_ack_pending(cfg))
2372 move_masked_irq(irq); 2652 move_masked_irq(irq);
2373 unmask_IO_APIC_irq(irq); 2653 unmask_IO_APIC_irq_desc(desc);
2374 } 2654 }
2375 2655
2376#ifdef CONFIG_X86_32 2656#ifdef CONFIG_X86_32
2377 if (!(v & (1 << (i & 0x1f)))) { 2657 if (!(v & (1 << (i & 0x1f)))) {
2378 atomic_inc(&irq_mis_count); 2658 atomic_inc(&irq_mis_count);
2379 spin_lock(&ioapic_lock); 2659 spin_lock(&ioapic_lock);
2380 __mask_and_edge_IO_APIC_irq(irq); 2660 __mask_and_edge_IO_APIC_irq(cfg);
2381 __unmask_and_level_IO_APIC_irq(irq); 2661 __unmask_and_level_IO_APIC_irq(cfg);
2382 spin_unlock(&ioapic_lock); 2662 spin_unlock(&ioapic_lock);
2383 } 2663 }
2384#endif 2664#endif
@@ -2429,20 +2709,19 @@ static inline void init_IO_APIC_traps(void)
2429 * Also, we've got to be careful not to trash gate 2709 * Also, we've got to be careful not to trash gate
2430 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2710 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2431 */ 2711 */
2432 for_each_irq_cfg(irq, cfg) { 2712 for_each_irq_desc(irq, desc) {
2433 if (IO_APIC_IRQ(irq) && !cfg->vector) { 2713 cfg = desc->chip_data;
2714 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2434 /* 2715 /*
2435 * Hmm.. We don't have an entry for this, 2716 * Hmm.. We don't have an entry for this,
2436 * so default to an old-fashioned 8259 2717 * so default to an old-fashioned 8259
2437 * interrupt if we can.. 2718 * interrupt if we can..
2438 */ 2719 */
2439 if (irq < 16) 2720 if (irq < NR_IRQS_LEGACY)
2440 make_8259A_irq(irq); 2721 make_8259A_irq(irq);
2441 else { 2722 else
2442 desc = irq_to_desc(irq);
2443 /* Strange. Oh, well.. */ 2723 /* Strange. Oh, well.. */
2444 desc->chip = &no_irq_chip; 2724 desc->chip = &no_irq_chip;
2445 }
2446 } 2725 }
2447 } 2726 }
2448} 2727}
@@ -2467,7 +2746,7 @@ static void unmask_lapic_irq(unsigned int irq)
2467 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2746 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2468} 2747}
2469 2748
2470static void ack_lapic_irq (unsigned int irq) 2749static void ack_lapic_irq(unsigned int irq)
2471{ 2750{
2472 ack_APIC_irq(); 2751 ack_APIC_irq();
2473} 2752}
@@ -2479,11 +2758,8 @@ static struct irq_chip lapic_chip __read_mostly = {
2479 .ack = ack_lapic_irq, 2758 .ack = ack_lapic_irq,
2480}; 2759};
2481 2760
2482static void lapic_register_intr(int irq) 2761static void lapic_register_intr(int irq, struct irq_desc *desc)
2483{ 2762{
2484 struct irq_desc *desc;
2485
2486 desc = irq_to_desc(irq);
2487 desc->status &= ~IRQ_LEVEL; 2763 desc->status &= ~IRQ_LEVEL;
2488 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2764 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2489 "edge"); 2765 "edge");
@@ -2587,7 +2863,9 @@ int timer_through_8259 __initdata;
2587 */ 2863 */
2588static inline void __init check_timer(void) 2864static inline void __init check_timer(void)
2589{ 2865{
2590 struct irq_cfg *cfg = irq_cfg(0); 2866 struct irq_desc *desc = irq_to_desc(0);
2867 struct irq_cfg *cfg = desc->chip_data;
2868 int cpu = boot_cpu_id;
2591 int apic1, pin1, apic2, pin2; 2869 int apic1, pin1, apic2, pin2;
2592 unsigned long flags; 2870 unsigned long flags;
2593 unsigned int ver; 2871 unsigned int ver;
@@ -2602,7 +2880,7 @@ static inline void __init check_timer(void)
2602 * get/set the timer IRQ vector: 2880 * get/set the timer IRQ vector:
2603 */ 2881 */
2604 disable_8259A_irq(0); 2882 disable_8259A_irq(0);
2605 assign_irq_vector(0, TARGET_CPUS); 2883 assign_irq_vector(0, cfg, TARGET_CPUS);
2606 2884
2607 /* 2885 /*
2608 * As IRQ0 is to be enabled in the 8259A, the virtual 2886 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2653,10 +2931,10 @@ static inline void __init check_timer(void)
2653 * Ok, does IRQ0 through the IOAPIC work? 2931 * Ok, does IRQ0 through the IOAPIC work?
2654 */ 2932 */
2655 if (no_pin1) { 2933 if (no_pin1) {
2656 add_pin_to_irq(0, apic1, pin1); 2934 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
2657 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2935 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2658 } 2936 }
2659 unmask_IO_APIC_irq(0); 2937 unmask_IO_APIC_irq_desc(desc);
2660 if (timer_irq_works()) { 2938 if (timer_irq_works()) {
2661 if (nmi_watchdog == NMI_IO_APIC) { 2939 if (nmi_watchdog == NMI_IO_APIC) {
2662 setup_nmi(); 2940 setup_nmi();
@@ -2682,9 +2960,9 @@ static inline void __init check_timer(void)
2682 /* 2960 /*
2683 * legacy devices should be connected to IO APIC #0 2961 * legacy devices should be connected to IO APIC #0
2684 */ 2962 */
2685 replace_pin_at_irq(0, apic1, pin1, apic2, pin2); 2963 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
2686 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2964 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2687 unmask_IO_APIC_irq(0); 2965 unmask_IO_APIC_irq_desc(desc);
2688 enable_8259A_irq(0); 2966 enable_8259A_irq(0);
2689 if (timer_irq_works()) { 2967 if (timer_irq_works()) {
2690 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2968 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2716,7 +2994,7 @@ static inline void __init check_timer(void)
2716 apic_printk(APIC_QUIET, KERN_INFO 2994 apic_printk(APIC_QUIET, KERN_INFO
2717 "...trying to set up timer as Virtual Wire IRQ...\n"); 2995 "...trying to set up timer as Virtual Wire IRQ...\n");
2718 2996
2719 lapic_register_intr(0); 2997 lapic_register_intr(0, desc);
2720 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 2998 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
2721 enable_8259A_irq(0); 2999 enable_8259A_irq(0);
2722 3000
@@ -2901,22 +3179,26 @@ unsigned int create_irq_nr(unsigned int irq_want)
2901 unsigned int irq; 3179 unsigned int irq;
2902 unsigned int new; 3180 unsigned int new;
2903 unsigned long flags; 3181 unsigned long flags;
2904 struct irq_cfg *cfg_new; 3182 struct irq_cfg *cfg_new = NULL;
2905 3183 int cpu = boot_cpu_id;
2906 irq_want = nr_irqs - 1; 3184 struct irq_desc *desc_new = NULL;
2907 3185
2908 irq = 0; 3186 irq = 0;
2909 spin_lock_irqsave(&vector_lock, flags); 3187 spin_lock_irqsave(&vector_lock, flags);
2910 for (new = irq_want; new > 0; new--) { 3188 for (new = irq_want; new < NR_IRQS; new++) {
2911 if (platform_legacy_irq(new)) 3189 if (platform_legacy_irq(new))
2912 continue; 3190 continue;
2913 cfg_new = irq_cfg(new); 3191
2914 if (cfg_new && cfg_new->vector != 0) 3192 desc_new = irq_to_desc_alloc_cpu(new, cpu);
3193 if (!desc_new) {
3194 printk(KERN_INFO "can not get irq_desc for %d\n", new);
2915 continue; 3195 continue;
2916 /* check if need to create one */ 3196 }
2917 if (!cfg_new) 3197 cfg_new = desc_new->chip_data;
2918 cfg_new = irq_cfg_alloc(new); 3198
2919 if (__assign_irq_vector(new, TARGET_CPUS) == 0) 3199 if (cfg_new->vector != 0)
3200 continue;
3201 if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
2920 irq = new; 3202 irq = new;
2921 break; 3203 break;
2922 } 3204 }
@@ -2924,15 +3206,21 @@ unsigned int create_irq_nr(unsigned int irq_want)
2924 3206
2925 if (irq > 0) { 3207 if (irq > 0) {
2926 dynamic_irq_init(irq); 3208 dynamic_irq_init(irq);
3209 /* restore it, in case dynamic_irq_init clear it */
3210 if (desc_new)
3211 desc_new->chip_data = cfg_new;
2927 } 3212 }
2928 return irq; 3213 return irq;
2929} 3214}
2930 3215
3216static int nr_irqs_gsi = NR_IRQS_LEGACY;
2931int create_irq(void) 3217int create_irq(void)
2932{ 3218{
3219 unsigned int irq_want;
2933 int irq; 3220 int irq;
2934 3221
2935 irq = create_irq_nr(nr_irqs - 1); 3222 irq_want = nr_irqs_gsi;
3223 irq = create_irq_nr(irq_want);
2936 3224
2937 if (irq == 0) 3225 if (irq == 0)
2938 irq = -1; 3226 irq = -1;
@@ -2943,14 +3231,22 @@ int create_irq(void)
2943void destroy_irq(unsigned int irq) 3231void destroy_irq(unsigned int irq)
2944{ 3232{
2945 unsigned long flags; 3233 unsigned long flags;
3234 struct irq_cfg *cfg;
3235 struct irq_desc *desc;
2946 3236
3237 /* store it, in case dynamic_irq_cleanup clear it */
3238 desc = irq_to_desc(irq);
3239 cfg = desc->chip_data;
2947 dynamic_irq_cleanup(irq); 3240 dynamic_irq_cleanup(irq);
3241 /* connect back irq_cfg */
3242 if (desc)
3243 desc->chip_data = cfg;
2948 3244
2949#ifdef CONFIG_INTR_REMAP 3245#ifdef CONFIG_INTR_REMAP
2950 free_irte(irq); 3246 free_irte(irq);
2951#endif 3247#endif
2952 spin_lock_irqsave(&vector_lock, flags); 3248 spin_lock_irqsave(&vector_lock, flags);
2953 __clear_irq_vector(irq); 3249 __clear_irq_vector(irq, cfg);
2954 spin_unlock_irqrestore(&vector_lock, flags); 3250 spin_unlock_irqrestore(&vector_lock, flags);
2955} 3251}
2956 3252
@@ -2963,16 +3259,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2963 struct irq_cfg *cfg; 3259 struct irq_cfg *cfg;
2964 int err; 3260 int err;
2965 unsigned dest; 3261 unsigned dest;
2966 cpumask_t tmp;
2967 3262
2968 tmp = TARGET_CPUS; 3263 cfg = irq_cfg(irq);
2969 err = assign_irq_vector(irq, tmp); 3264 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
2970 if (err) 3265 if (err)
2971 return err; 3266 return err;
2972 3267
2973 cfg = irq_cfg(irq); 3268 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
2974 cpus_and(tmp, cfg->domain, tmp);
2975 dest = cpu_mask_to_apicid(tmp);
2976 3269
2977#ifdef CONFIG_INTR_REMAP 3270#ifdef CONFIG_INTR_REMAP
2978 if (irq_remapped(irq)) { 3271 if (irq_remapped(irq)) {
@@ -3026,64 +3319,48 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3026} 3319}
3027 3320
3028#ifdef CONFIG_SMP 3321#ifdef CONFIG_SMP
3029static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3322static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3030{ 3323{
3324 struct irq_desc *desc = irq_to_desc(irq);
3031 struct irq_cfg *cfg; 3325 struct irq_cfg *cfg;
3032 struct msi_msg msg; 3326 struct msi_msg msg;
3033 unsigned int dest; 3327 unsigned int dest;
3034 cpumask_t tmp;
3035 struct irq_desc *desc;
3036 3328
3037 cpus_and(tmp, mask, cpu_online_map); 3329 dest = set_desc_affinity(desc, mask);
3038 if (cpus_empty(tmp)) 3330 if (dest == BAD_APICID)
3039 return; 3331 return;
3040 3332
3041 if (assign_irq_vector(irq, mask)) 3333 cfg = desc->chip_data;
3042 return;
3043 3334
3044 cfg = irq_cfg(irq); 3335 read_msi_msg_desc(desc, &msg);
3045 cpus_and(tmp, cfg->domain, mask);
3046 dest = cpu_mask_to_apicid(tmp);
3047
3048 read_msi_msg(irq, &msg);
3049 3336
3050 msg.data &= ~MSI_DATA_VECTOR_MASK; 3337 msg.data &= ~MSI_DATA_VECTOR_MASK;
3051 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3338 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3052 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3339 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3053 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3340 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3054 3341
3055 write_msi_msg(irq, &msg); 3342 write_msi_msg_desc(desc, &msg);
3056 desc = irq_to_desc(irq);
3057 desc->affinity = mask;
3058} 3343}
3059
3060#ifdef CONFIG_INTR_REMAP 3344#ifdef CONFIG_INTR_REMAP
3061/* 3345/*
3062 * Migrate the MSI irq to another cpumask. This migration is 3346 * Migrate the MSI irq to another cpumask. This migration is
3063 * done in the process context using interrupt-remapping hardware. 3347 * done in the process context using interrupt-remapping hardware.
3064 */ 3348 */
3065static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3349static void
3350ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3066{ 3351{
3067 struct irq_cfg *cfg; 3352 struct irq_desc *desc = irq_to_desc(irq);
3353 struct irq_cfg *cfg = desc->chip_data;
3068 unsigned int dest; 3354 unsigned int dest;
3069 cpumask_t tmp, cleanup_mask;
3070 struct irte irte; 3355 struct irte irte;
3071 struct irq_desc *desc;
3072
3073 cpus_and(tmp, mask, cpu_online_map);
3074 if (cpus_empty(tmp))
3075 return;
3076 3356
3077 if (get_irte(irq, &irte)) 3357 if (get_irte(irq, &irte))
3078 return; 3358 return;
3079 3359
3080 if (assign_irq_vector(irq, mask)) 3360 dest = set_desc_affinity(desc, mask);
3361 if (dest == BAD_APICID)
3081 return; 3362 return;
3082 3363
3083 cfg = irq_cfg(irq);
3084 cpus_and(tmp, cfg->domain, mask);
3085 dest = cpu_mask_to_apicid(tmp);
3086
3087 irte.vector = cfg->vector; 3364 irte.vector = cfg->vector;
3088 irte.dest_id = IRTE_DEST(dest); 3365 irte.dest_id = IRTE_DEST(dest);
3089 3366
@@ -3097,16 +3374,10 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3097 * at the new destination. So, time to cleanup the previous 3374 * at the new destination. So, time to cleanup the previous
3098 * vector allocation. 3375 * vector allocation.
3099 */ 3376 */
3100 if (cfg->move_in_progress) { 3377 if (cfg->move_in_progress)
3101 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 3378 send_cleanup_vector(cfg);
3102 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
3103 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
3104 cfg->move_in_progress = 0;
3105 }
3106
3107 desc = irq_to_desc(irq);
3108 desc->affinity = mask;
3109} 3379}
3380
3110#endif 3381#endif
3111#endif /* CONFIG_SMP */ 3382#endif /* CONFIG_SMP */
3112 3383
@@ -3165,7 +3436,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3165} 3436}
3166#endif 3437#endif
3167 3438
3168static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) 3439static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3169{ 3440{
3170 int ret; 3441 int ret;
3171 struct msi_msg msg; 3442 struct msi_msg msg;
@@ -3174,7 +3445,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3174 if (ret < 0) 3445 if (ret < 0)
3175 return ret; 3446 return ret;
3176 3447
3177 set_irq_msi(irq, desc); 3448 set_irq_msi(irq, msidesc);
3178 write_msi_msg(irq, &msg); 3449 write_msi_msg(irq, &msg);
3179 3450
3180#ifdef CONFIG_INTR_REMAP 3451#ifdef CONFIG_INTR_REMAP
@@ -3194,26 +3465,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3194 return 0; 3465 return 0;
3195} 3466}
3196 3467
3197static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) 3468int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
3198{
3199 unsigned int irq;
3200
3201 irq = dev->bus->number;
3202 irq <<= 8;
3203 irq |= dev->devfn;
3204 irq <<= 12;
3205
3206 return irq;
3207}
3208
3209int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3210{ 3469{
3211 unsigned int irq; 3470 unsigned int irq;
3212 int ret; 3471 int ret;
3213 unsigned int irq_want; 3472 unsigned int irq_want;
3214 3473
3215 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3474 irq_want = nr_irqs_gsi;
3216
3217 irq = create_irq_nr(irq_want); 3475 irq = create_irq_nr(irq_want);
3218 if (irq == 0) 3476 if (irq == 0)
3219 return -1; 3477 return -1;
@@ -3227,7 +3485,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3227 goto error; 3485 goto error;
3228no_ir: 3486no_ir:
3229#endif 3487#endif
3230 ret = setup_msi_irq(dev, desc, irq); 3488 ret = setup_msi_irq(dev, msidesc, irq);
3231 if (ret < 0) { 3489 if (ret < 0) {
3232 destroy_irq(irq); 3490 destroy_irq(irq);
3233 return ret; 3491 return ret;
@@ -3245,7 +3503,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3245{ 3503{
3246 unsigned int irq; 3504 unsigned int irq;
3247 int ret, sub_handle; 3505 int ret, sub_handle;
3248 struct msi_desc *desc; 3506 struct msi_desc *msidesc;
3249 unsigned int irq_want; 3507 unsigned int irq_want;
3250 3508
3251#ifdef CONFIG_INTR_REMAP 3509#ifdef CONFIG_INTR_REMAP
@@ -3253,10 +3511,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3253 int index = 0; 3511 int index = 0;
3254#endif 3512#endif
3255 3513
3256 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3514 irq_want = nr_irqs_gsi;
3257 sub_handle = 0; 3515 sub_handle = 0;
3258 list_for_each_entry(desc, &dev->msi_list, list) { 3516 list_for_each_entry(msidesc, &dev->msi_list, list) {
3259 irq = create_irq_nr(irq_want--); 3517 irq = create_irq_nr(irq_want);
3518 irq_want++;
3260 if (irq == 0) 3519 if (irq == 0)
3261 return -1; 3520 return -1;
3262#ifdef CONFIG_INTR_REMAP 3521#ifdef CONFIG_INTR_REMAP
@@ -3288,7 +3547,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3288 } 3547 }
3289no_ir: 3548no_ir:
3290#endif 3549#endif
3291 ret = setup_msi_irq(dev, desc, irq); 3550 ret = setup_msi_irq(dev, msidesc, irq);
3292 if (ret < 0) 3551 if (ret < 0)
3293 goto error; 3552 goto error;
3294 sub_handle++; 3553 sub_handle++;
@@ -3307,24 +3566,18 @@ void arch_teardown_msi_irq(unsigned int irq)
3307 3566
3308#ifdef CONFIG_DMAR 3567#ifdef CONFIG_DMAR
3309#ifdef CONFIG_SMP 3568#ifdef CONFIG_SMP
3310static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 3569static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3311{ 3570{
3571 struct irq_desc *desc = irq_to_desc(irq);
3312 struct irq_cfg *cfg; 3572 struct irq_cfg *cfg;
3313 struct msi_msg msg; 3573 struct msi_msg msg;
3314 unsigned int dest; 3574 unsigned int dest;
3315 cpumask_t tmp;
3316 struct irq_desc *desc;
3317 3575
3318 cpus_and(tmp, mask, cpu_online_map); 3576 dest = set_desc_affinity(desc, mask);
3319 if (cpus_empty(tmp)) 3577 if (dest == BAD_APICID)
3320 return; 3578 return;
3321 3579
3322 if (assign_irq_vector(irq, mask)) 3580 cfg = desc->chip_data;
3323 return;
3324
3325 cfg = irq_cfg(irq);
3326 cpus_and(tmp, cfg->domain, mask);
3327 dest = cpu_mask_to_apicid(tmp);
3328 3581
3329 dmar_msi_read(irq, &msg); 3582 dmar_msi_read(irq, &msg);
3330 3583
@@ -3334,9 +3587,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
3334 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3587 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3335 3588
3336 dmar_msi_write(irq, &msg); 3589 dmar_msi_write(irq, &msg);
3337 desc = irq_to_desc(irq);
3338 desc->affinity = mask;
3339} 3590}
3591
3340#endif /* CONFIG_SMP */ 3592#endif /* CONFIG_SMP */
3341 3593
3342struct irq_chip dmar_msi_type = { 3594struct irq_chip dmar_msi_type = {
@@ -3368,24 +3620,18 @@ int arch_setup_dmar_msi(unsigned int irq)
3368#ifdef CONFIG_HPET_TIMER 3620#ifdef CONFIG_HPET_TIMER
3369 3621
3370#ifdef CONFIG_SMP 3622#ifdef CONFIG_SMP
3371static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) 3623static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3372{ 3624{
3625 struct irq_desc *desc = irq_to_desc(irq);
3373 struct irq_cfg *cfg; 3626 struct irq_cfg *cfg;
3374 struct irq_desc *desc;
3375 struct msi_msg msg; 3627 struct msi_msg msg;
3376 unsigned int dest; 3628 unsigned int dest;
3377 cpumask_t tmp;
3378 3629
3379 cpus_and(tmp, mask, cpu_online_map); 3630 dest = set_desc_affinity(desc, mask);
3380 if (cpus_empty(tmp)) 3631 if (dest == BAD_APICID)
3381 return; 3632 return;
3382 3633
3383 if (assign_irq_vector(irq, mask)) 3634 cfg = desc->chip_data;
3384 return;
3385
3386 cfg = irq_cfg(irq);
3387 cpus_and(tmp, cfg->domain, mask);
3388 dest = cpu_mask_to_apicid(tmp);
3389 3635
3390 hpet_msi_read(irq, &msg); 3636 hpet_msi_read(irq, &msg);
3391 3637
@@ -3395,9 +3641,8 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3395 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3641 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3396 3642
3397 hpet_msi_write(irq, &msg); 3643 hpet_msi_write(irq, &msg);
3398 desc = irq_to_desc(irq);
3399 desc->affinity = mask;
3400} 3644}
3645
3401#endif /* CONFIG_SMP */ 3646#endif /* CONFIG_SMP */
3402 3647
3403struct irq_chip hpet_msi_type = { 3648struct irq_chip hpet_msi_type = {
@@ -3450,28 +3695,21 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3450 write_ht_irq_msg(irq, &msg); 3695 write_ht_irq_msg(irq, &msg);
3451} 3696}
3452 3697
3453static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 3698static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3454{ 3699{
3700 struct irq_desc *desc = irq_to_desc(irq);
3455 struct irq_cfg *cfg; 3701 struct irq_cfg *cfg;
3456 unsigned int dest; 3702 unsigned int dest;
3457 cpumask_t tmp;
3458 struct irq_desc *desc;
3459 3703
3460 cpus_and(tmp, mask, cpu_online_map); 3704 dest = set_desc_affinity(desc, mask);
3461 if (cpus_empty(tmp)) 3705 if (dest == BAD_APICID)
3462 return; 3706 return;
3463 3707
3464 if (assign_irq_vector(irq, mask)) 3708 cfg = desc->chip_data;
3465 return;
3466
3467 cfg = irq_cfg(irq);
3468 cpus_and(tmp, cfg->domain, mask);
3469 dest = cpu_mask_to_apicid(tmp);
3470 3709
3471 target_ht_irq(irq, dest, cfg->vector); 3710 target_ht_irq(irq, dest, cfg->vector);
3472 desc = irq_to_desc(irq);
3473 desc->affinity = mask;
3474} 3711}
3712
3475#endif 3713#endif
3476 3714
3477static struct irq_chip ht_irq_chip = { 3715static struct irq_chip ht_irq_chip = {
@@ -3489,17 +3727,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3489{ 3727{
3490 struct irq_cfg *cfg; 3728 struct irq_cfg *cfg;
3491 int err; 3729 int err;
3492 cpumask_t tmp;
3493 3730
3494 tmp = TARGET_CPUS; 3731 cfg = irq_cfg(irq);
3495 err = assign_irq_vector(irq, tmp); 3732 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3496 if (!err) { 3733 if (!err) {
3497 struct ht_irq_msg msg; 3734 struct ht_irq_msg msg;
3498 unsigned dest; 3735 unsigned dest;
3499 3736
3500 cfg = irq_cfg(irq); 3737 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
3501 cpus_and(tmp, cfg->domain, tmp);
3502 dest = cpu_mask_to_apicid(tmp);
3503 3738
3504 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3739 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3505 3740
@@ -3535,7 +3770,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3535int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, 3770int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3536 unsigned long mmr_offset) 3771 unsigned long mmr_offset)
3537{ 3772{
3538 const cpumask_t *eligible_cpu = get_cpu_mask(cpu); 3773 const struct cpumask *eligible_cpu = cpumask_of(cpu);
3539 struct irq_cfg *cfg; 3774 struct irq_cfg *cfg;
3540 int mmr_pnode; 3775 int mmr_pnode;
3541 unsigned long mmr_value; 3776 unsigned long mmr_value;
@@ -3543,7 +3778,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3543 unsigned long flags; 3778 unsigned long flags;
3544 int err; 3779 int err;
3545 3780
3546 err = assign_irq_vector(irq, *eligible_cpu); 3781 cfg = irq_cfg(irq);
3782
3783 err = assign_irq_vector(irq, cfg, eligible_cpu);
3547 if (err != 0) 3784 if (err != 0)
3548 return err; 3785 return err;
3549 3786
@@ -3552,8 +3789,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3552 irq_name); 3789 irq_name);
3553 spin_unlock_irqrestore(&vector_lock, flags); 3790 spin_unlock_irqrestore(&vector_lock, flags);
3554 3791
3555 cfg = irq_cfg(irq);
3556
3557 mmr_value = 0; 3792 mmr_value = 0;
3558 entry = (struct uv_IO_APIC_route_entry *)&mmr_value; 3793 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3559 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); 3794 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3564,7 +3799,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3564 entry->polarity = 0; 3799 entry->polarity = 0;
3565 entry->trigger = 0; 3800 entry->trigger = 0;
3566 entry->mask = 0; 3801 entry->mask = 0;
3567 entry->dest = cpu_mask_to_apicid(*eligible_cpu); 3802 entry->dest = cpu_mask_to_apicid(eligible_cpu);
3568 3803
3569 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3804 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3570 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3805 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3605,9 +3840,16 @@ int __init io_apic_get_redir_entries (int ioapic)
3605 return reg_01.bits.entries; 3840 return reg_01.bits.entries;
3606} 3841}
3607 3842
3608int __init probe_nr_irqs(void) 3843void __init probe_nr_irqs_gsi(void)
3609{ 3844{
3610 return NR_IRQS; 3845 int idx;
3846 int nr = 0;
3847
3848 for (idx = 0; idx < nr_ioapics; idx++)
3849 nr += io_apic_get_redir_entries(idx) + 1;
3850
3851 if (nr > nr_irqs_gsi)
3852 nr_irqs_gsi = nr;
3611} 3853}
3612 3854
3613/* -------------------------------------------------------------------------- 3855/* --------------------------------------------------------------------------
@@ -3706,19 +3948,31 @@ int __init io_apic_get_version(int ioapic)
3706 3948
3707int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) 3949int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
3708{ 3950{
3951 struct irq_desc *desc;
3952 struct irq_cfg *cfg;
3953 int cpu = boot_cpu_id;
3954
3709 if (!IO_APIC_IRQ(irq)) { 3955 if (!IO_APIC_IRQ(irq)) {
3710 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 3956 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3711 ioapic); 3957 ioapic);
3712 return -EINVAL; 3958 return -EINVAL;
3713 } 3959 }
3714 3960
3961 desc = irq_to_desc_alloc_cpu(irq, cpu);
3962 if (!desc) {
3963 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3964 return 0;
3965 }
3966
3715 /* 3967 /*
3716 * IRQs < 16 are already in the irq_2_pin[] map 3968 * IRQs < 16 are already in the irq_2_pin[] map
3717 */ 3969 */
3718 if (irq >= 16) 3970 if (irq >= NR_IRQS_LEGACY) {
3719 add_pin_to_irq(irq, ioapic, pin); 3971 cfg = desc->chip_data;
3972 add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
3973 }
3720 3974
3721 setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); 3975 setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
3722 3976
3723 return 0; 3977 return 0;
3724} 3978}
@@ -3756,7 +4010,7 @@ void __init setup_ioapic_dest(void)
3756 int pin, ioapic, irq, irq_entry; 4010 int pin, ioapic, irq, irq_entry;
3757 struct irq_desc *desc; 4011 struct irq_desc *desc;
3758 struct irq_cfg *cfg; 4012 struct irq_cfg *cfg;
3759 cpumask_t mask; 4013 const struct cpumask *mask;
3760 4014
3761 if (skip_ioapic_setup == 1) 4015 if (skip_ioapic_setup == 1)
3762 return; 4016 return;
@@ -3772,9 +4026,10 @@ void __init setup_ioapic_dest(void)
3772 * when you have too many devices, because at that time only boot 4026 * when you have too many devices, because at that time only boot
3773 * cpu is online. 4027 * cpu is online.
3774 */ 4028 */
3775 cfg = irq_cfg(irq); 4029 desc = irq_to_desc(irq);
4030 cfg = desc->chip_data;
3776 if (!cfg->vector) { 4031 if (!cfg->vector) {
3777 setup_IO_APIC_irq(ioapic, pin, irq, 4032 setup_IO_APIC_irq(ioapic, pin, irq, desc,
3778 irq_trigger(irq_entry), 4033 irq_trigger(irq_entry),
3779 irq_polarity(irq_entry)); 4034 irq_polarity(irq_entry));
3780 continue; 4035 continue;
@@ -3784,19 +4039,18 @@ void __init setup_ioapic_dest(void)
3784 /* 4039 /*
3785 * Honour affinities which have been set in early boot 4040 * Honour affinities which have been set in early boot
3786 */ 4041 */
3787 desc = irq_to_desc(irq);
3788 if (desc->status & 4042 if (desc->status &
3789 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4043 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
3790 mask = desc->affinity; 4044 mask = &desc->affinity;
3791 else 4045 else
3792 mask = TARGET_CPUS; 4046 mask = TARGET_CPUS;
3793 4047
3794#ifdef CONFIG_INTR_REMAP 4048#ifdef CONFIG_INTR_REMAP
3795 if (intr_remapping_enabled) 4049 if (intr_remapping_enabled)
3796 set_ir_ioapic_affinity_irq(irq, mask); 4050 set_ir_ioapic_affinity_irq_desc(desc, mask);
3797 else 4051 else
3798#endif 4052#endif
3799 set_ioapic_affinity_irq(irq, mask); 4053 set_ioapic_affinity_irq_desc(desc, mask);
3800 } 4054 }
3801 4055
3802 } 4056 }
@@ -3845,7 +4099,6 @@ void __init ioapic_init_mappings(void)
3845 struct resource *ioapic_res; 4099 struct resource *ioapic_res;
3846 int i; 4100 int i;
3847 4101
3848 irq_2_pin_init();
3849 ioapic_res = ioapic_setup_resources(); 4102 ioapic_res = ioapic_setup_resources();
3850 for (i = 0; i < nr_ioapics; i++) { 4103 for (i = 0; i < nr_ioapics; i++) {
3851 if (smp_found_config) { 4104 if (smp_found_config) {
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e46f35..285bbf8831fa 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
116/* 116/*
117 * This is only used on smaller machines. 117 * This is only used on smaller machines.
118 */ 118 */
119void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) 119void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
120{ 120{
121 unsigned long mask = cpus_addr(cpumask)[0]; 121 unsigned long mask = cpumask_bits(cpumask)[0];
122 unsigned long flags; 122 unsigned long flags;
123 123
124 local_irq_save(flags); 124 local_irq_save(flags);
125 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); 125 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
126 __send_IPI_dest_field(mask, vector); 126 __send_IPI_dest_field(mask, vector);
127 local_irq_restore(flags); 127 local_irq_restore(flags);
128} 128}
129 129
130void send_IPI_mask_sequence(cpumask_t mask, int vector) 130void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
131{ 131{
132 unsigned long flags; 132 unsigned long flags;
133 unsigned int query_cpu; 133 unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
139 */ 139 */
140 140
141 local_irq_save(flags); 141 local_irq_save(flags);
142 for_each_possible_cpu(query_cpu) { 142 for_each_cpu(query_cpu, mask)
143 if (cpu_isset(query_cpu, mask)) { 143 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
144 local_irq_restore(flags);
145}
146
147void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
148{
149 unsigned long flags;
150 unsigned int query_cpu;
151 unsigned int this_cpu = smp_processor_id();
152
153 /* See Hack comment above */
154
155 local_irq_save(flags);
156 for_each_cpu(query_cpu, mask)
157 if (query_cpu != this_cpu)
144 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), 158 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
145 vector); 159 vector);
146 }
147 }
148 local_irq_restore(flags); 160 local_irq_restore(flags);
149} 161}
150 162
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc52f649..bce53e1352a0 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -9,6 +9,7 @@
9#include <asm/apic.h> 9#include <asm/apic.h>
10#include <asm/io_apic.h> 10#include <asm/io_apic.h>
11#include <asm/smp.h> 11#include <asm/smp.h>
12#include <asm/irq.h>
12 13
13atomic_t irq_err_count; 14atomic_t irq_err_count;
14 15
@@ -118,6 +119,9 @@ int show_interrupts(struct seq_file *p, void *v)
118 } 119 }
119 120
120 desc = irq_to_desc(i); 121 desc = irq_to_desc(i);
122 if (!desc)
123 return 0;
124
121 spin_lock_irqsave(&desc->lock, flags); 125 spin_lock_irqsave(&desc->lock, flags);
122#ifndef CONFIG_SMP 126#ifndef CONFIG_SMP
123 any_count = kstat_irqs(i); 127 any_count = kstat_irqs(i);
@@ -187,3 +191,5 @@ u64 arch_irq_stat(void)
187#endif 191#endif
188 return sum; 192 return sum;
189} 193}
194
195EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de0..9dc5588f336a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,25 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
233#ifdef CONFIG_HOTPLUG_CPU 233#ifdef CONFIG_HOTPLUG_CPU
234#include <mach_apic.h> 234#include <mach_apic.h>
235 235
236void fixup_irqs(cpumask_t map) 236/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
237void fixup_irqs(void)
237{ 238{
238 unsigned int irq; 239 unsigned int irq;
239 static int warned; 240 static int warned;
240 struct irq_desc *desc; 241 struct irq_desc *desc;
241 242
242 for_each_irq_desc(irq, desc) { 243 for_each_irq_desc(irq, desc) {
243 cpumask_t mask; 244 const struct cpumask *affinity;
244 245
246 if (!desc)
247 continue;
245 if (irq == 2) 248 if (irq == 2)
246 continue; 249 continue;
247 250
248 cpus_and(mask, desc->affinity, map); 251 affinity = &desc->affinity;
249 if (any_online_cpu(mask) == NR_CPUS) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
250 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
251 mask = map; 254 affinity = cpu_all_mask;
252 } 255 }
253 if (desc->chip->set_affinity) 256 if (desc->chip->set_affinity)
254 desc->chip->set_affinity(irq, mask); 257 desc->chip->set_affinity(irq, affinity);
255 else if (desc->action && !(warned++)) 258 else if (desc->action && !(warned++))
256 printk("Cannot set affinity for irq %i\n", irq); 259 printk("Cannot set affinity for irq %i\n", irq);
257 } 260 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1df869e5bd0b..6383d50f82ea 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -80,40 +80,43 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
80} 80}
81 81
82#ifdef CONFIG_HOTPLUG_CPU 82#ifdef CONFIG_HOTPLUG_CPU
83void fixup_irqs(cpumask_t map) 83/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
84void fixup_irqs(void)
84{ 85{
85 unsigned int irq; 86 unsigned int irq;
86 static int warned; 87 static int warned;
87 struct irq_desc *desc; 88 struct irq_desc *desc;
88 89
89 for_each_irq_desc(irq, desc) { 90 for_each_irq_desc(irq, desc) {
90 cpumask_t mask;
91 int break_affinity = 0; 91 int break_affinity = 0;
92 int set_affinity = 1; 92 int set_affinity = 1;
93 const struct cpumask *affinity;
93 94
95 if (!desc)
96 continue;
94 if (irq == 2) 97 if (irq == 2)
95 continue; 98 continue;
96 99
97 /* interrupt's are disabled at this point */ 100 /* interrupt's are disabled at this point */
98 spin_lock(&desc->lock); 101 spin_lock(&desc->lock);
99 102
103 affinity = &desc->affinity;
100 if (!irq_has_action(irq) || 104 if (!irq_has_action(irq) ||
101 cpus_equal(desc->affinity, map)) { 105 cpumask_equal(affinity, cpu_online_mask)) {
102 spin_unlock(&desc->lock); 106 spin_unlock(&desc->lock);
103 continue; 107 continue;
104 } 108 }
105 109
106 cpus_and(mask, desc->affinity, map); 110 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
107 if (cpus_empty(mask)) {
108 break_affinity = 1; 111 break_affinity = 1;
109 mask = map; 112 affinity = cpu_all_mask;
110 } 113 }
111 114
112 if (desc->chip->mask) 115 if (desc->chip->mask)
113 desc->chip->mask(irq); 116 desc->chip->mask(irq);
114 117
115 if (desc->chip->set_affinity) 118 if (desc->chip->set_affinity)
116 desc->chip->set_affinity(irq, mask); 119 desc->chip->set_affinity(irq, affinity);
117 else if (!(warned++)) 120 else if (!(warned++))
118 set_affinity = 0; 121 set_affinity = 0;
119 122
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 607db63044a5..84723295f88a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -68,8 +68,7 @@ void __init init_ISA_irqs (void)
68 /* 68 /*
69 * 16 old-style INTA-cycle interrupts: 69 * 16 old-style INTA-cycle interrupts:
70 */ 70 */
71 for (i = 0; i < 16; i++) { 71 for (i = 0; i < NR_IRQS_LEGACY; i++) {
72 /* first time call this irq_desc */
73 struct irq_desc *desc = irq_to_desc(i); 72 struct irq_desc *desc = irq_to_desc(i);
74 73
75 desc->status = IRQ_DISABLED; 74 desc->status = IRQ_DISABLED;
@@ -111,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
111 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 110 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
112}; 111};
113 112
113int vector_used_by_percpu_irq(unsigned int vector)
114{
115 int cpu;
116
117 for_each_online_cpu(cpu) {
118 if (per_cpu(vector_irq, cpu)[vector] != -1)
119 return 1;
120 }
121
122 return 0;
123}
124
114/* Overridden in paravirt.c */ 125/* Overridden in paravirt.c */
115void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 126void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
116 127
@@ -147,10 +158,12 @@ void __init native_init_IRQ(void)
147 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 158 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
148 159
149 /* IPI for single call function */ 160 /* IPI for single call function */
150 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); 161 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
162 call_function_single_interrupt);
151 163
152 /* Low priority IPI to cleanup after moving an irq */ 164 /* Low priority IPI to cleanup after moving an irq */
153 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 165 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
166 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
154#endif 167#endif
155 168
156#ifdef CONFIG_X86_LOCAL_APIC 169#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 8670b3ce626e..31ebfe38e96c 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -69,6 +69,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 69 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
70}; 70};
71 71
72int vector_used_by_percpu_irq(unsigned int vector)
73{
74 int cpu;
75
76 for_each_online_cpu(cpu) {
77 if (per_cpu(vector_irq, cpu)[vector] != -1)
78 return 1;
79 }
80
81 return 0;
82}
83
72void __init init_ISA_irqs(void) 84void __init init_ISA_irqs(void)
73{ 85{
74 int i; 86 int i;
@@ -76,8 +88,7 @@ void __init init_ISA_irqs(void)
76 init_bsp_APIC(); 88 init_bsp_APIC();
77 init_8259A(0); 89 init_8259A(0);
78 90
79 for (i = 0; i < 16; i++) { 91 for (i = 0; i < NR_IRQS_LEGACY; i++) {
80 /* first time call this irq_desc */
81 struct irq_desc *desc = irq_to_desc(i); 92 struct irq_desc *desc = irq_to_desc(i);
82 93
83 desc->status = IRQ_DISABLED; 94 desc->status = IRQ_DISABLED;
@@ -122,6 +133,7 @@ static void __init smp_intr_init(void)
122 133
123 /* Low priority IPI to cleanup after moving an irq */ 134 /* Low priority IPI to cleanup after moving an irq */
124 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 135 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
136 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
125#endif 137#endif
126} 138}
127 139
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e169ae9b6a62..652fce6d2cce 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
89 */ 89 */
90static unsigned long kvm_get_tsc_khz(void) 90static unsigned long kvm_get_tsc_khz(void)
91{ 91{
92 return preset_lpj; 92 struct pvclock_vcpu_time_info *src;
93 src = &per_cpu(hv_clock, 0);
94 return pvclock_tsc_khz(src);
93} 95}
94 96
95static void kvm_get_preset_lpj(void) 97static void kvm_get_preset_lpj(void)
96{ 98{
97 struct pvclock_vcpu_time_info *src;
98 unsigned long khz; 99 unsigned long khz;
99 u64 lpj; 100 u64 lpj;
100 101
101 src = &per_cpu(hv_clock, 0); 102 khz = kvm_get_tsc_khz();
102 khz = pvclock_tsc_khz(src);
103 103
104 lpj = ((u64)khz * 1000); 104 lpj = ((u64)khz * 1000);
105 do_div(lpj, HZ); 105 do_div(lpj, HZ);
@@ -194,5 +194,7 @@ void __init kvmclock_init(void)
194#endif 194#endif
195 kvm_get_preset_lpj(); 195 kvm_get_preset_lpj();
196 clocksource_register(&kvm_clock); 196 clocksource_register(&kvm_clock);
197 pv_info.paravirt_enabled = 1;
198 pv_info.name = "KVM";
197 } 199 }
198} 200}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b599518c322..c12314c9e86f 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
287 .set_mode = mfgpt_set_mode, 287 .set_mode = mfgpt_set_mode,
288 .set_next_event = mfgpt_next_event, 288 .set_next_event = mfgpt_next_event,
289 .rating = 250, 289 .rating = 250,
290 .cpumask = CPU_MASK_ALL, 290 .cpumask = cpu_all_mask,
291 .shift = 32 291 .shift = 32
292}; 292};
293 293
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 7a3dfceb90e4..19a1044a0cd9 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -101,11 +101,15 @@ static void __init dma32_free_bootmem(void)
101 dma32_bootmem_ptr = NULL; 101 dma32_bootmem_ptr = NULL;
102 dma32_bootmem_size = 0; 102 dma32_bootmem_size = 0;
103} 103}
104#endif
104 105
105void __init pci_iommu_alloc(void) 106void __init pci_iommu_alloc(void)
106{ 107{
108#ifdef CONFIG_X86_64
107 /* free the range so iommu could get some range less than 4G */ 109 /* free the range so iommu could get some range less than 4G */
108 dma32_free_bootmem(); 110 dma32_free_bootmem();
111#endif
112
109 /* 113 /*
110 * The order of these functions is important for 114 * The order of these functions is important for
111 * fall-back/fail-over reasons 115 * fall-back/fail-over reasons
@@ -121,15 +125,6 @@ void __init pci_iommu_alloc(void)
121 pci_swiotlb_init(); 125 pci_swiotlb_init();
122} 126}
123 127
124unsigned long iommu_nr_pages(unsigned long addr, unsigned long len)
125{
126 unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
127
128 return size >> PAGE_SHIFT;
129}
130EXPORT_SYMBOL(iommu_nr_pages);
131#endif
132
133void *dma_generic_alloc_coherent(struct device *dev, size_t size, 128void *dma_generic_alloc_coherent(struct device *dev, size_t size,
134 dma_addr_t *dma_addr, gfp_t flag) 129 dma_addr_t *dma_addr, gfp_t flag)
135{ 130{
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 3c539d111abb..242c3440687f 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -3,6 +3,8 @@
3#include <linux/pci.h> 3#include <linux/pci.h>
4#include <linux/cache.h> 4#include <linux/cache.h>
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/swiotlb.h>
7#include <linux/bootmem.h>
6#include <linux/dma-mapping.h> 8#include <linux/dma-mapping.h>
7 9
8#include <asm/iommu.h> 10#include <asm/iommu.h>
@@ -11,6 +13,31 @@
11 13
12int swiotlb __read_mostly; 14int swiotlb __read_mostly;
13 15
16void *swiotlb_alloc_boot(size_t size, unsigned long nslabs)
17{
18 return alloc_bootmem_low_pages(size);
19}
20
21void *swiotlb_alloc(unsigned order, unsigned long nslabs)
22{
23 return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
24}
25
26dma_addr_t swiotlb_phys_to_bus(phys_addr_t paddr)
27{
28 return paddr;
29}
30
31phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
32{
33 return baddr;
34}
35
36int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
37{
38 return 0;
39}
40
14static dma_addr_t 41static dma_addr_t
15swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, 42swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
16 int direction) 43 int direction)
@@ -50,8 +77,10 @@ struct dma_mapping_ops swiotlb_dma_ops = {
50void __init pci_swiotlb_init(void) 77void __init pci_swiotlb_init(void)
51{ 78{
52 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 79 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
80#ifdef CONFIG_X86_64
53 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) 81 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
54 swiotlb = 1; 82 swiotlb = 1;
83#endif
55 if (swiotlb_force) 84 if (swiotlb_force)
56 swiotlb = 1; 85 swiotlb = 1;
57 if (swiotlb) { 86 if (swiotlb) {
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 67465ed89310..309949e9e1c1 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -168,6 +168,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
168 ich_force_enable_hpet); 168 ich_force_enable_hpet);
169DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, 169DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
170 ich_force_enable_hpet); 170 ich_force_enable_hpet);
171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4,
172 ich_force_enable_hpet);
171DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, 173DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
172 ich_force_enable_hpet); 174 ich_force_enable_hpet);
173 175
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index a90913cccfb7..bf088c61fa40 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -13,6 +13,7 @@
13#include <asm/reboot_fixups.h> 13#include <asm/reboot_fixups.h>
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h> 15#include <asm/pci_x86.h>
16#include <asm/virtext.h>
16 17
17#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
18# include <linux/dmi.h> 19# include <linux/dmi.h>
@@ -39,6 +40,12 @@ int reboot_force;
39static int reboot_cpu = -1; 40static int reboot_cpu = -1;
40#endif 41#endif
41 42
43/* This is set if we need to go through the 'emergency' path.
44 * When machine_emergency_restart() is called, we may be on
45 * an inconsistent state and won't be able to do a clean cleanup
46 */
47static int reboot_emergency;
48
42/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 49/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
43bool port_cf9_safe = false; 50bool port_cf9_safe = false;
44 51
@@ -368,6 +375,48 @@ static inline void kb_wait(void)
368 } 375 }
369} 376}
370 377
378static void vmxoff_nmi(int cpu, struct die_args *args)
379{
380 cpu_emergency_vmxoff();
381}
382
383/* Use NMIs as IPIs to tell all CPUs to disable virtualization
384 */
385static void emergency_vmx_disable_all(void)
386{
387 /* Just make sure we won't change CPUs while doing this */
388 local_irq_disable();
389
390 /* We need to disable VMX on all CPUs before rebooting, otherwise
391 * we risk hanging up the machine, because the CPU ignore INIT
392 * signals when VMX is enabled.
393 *
394 * We can't take any locks and we may be on an inconsistent
395 * state, so we use NMIs as IPIs to tell the other CPUs to disable
396 * VMX and halt.
397 *
398 * For safety, we will avoid running the nmi_shootdown_cpus()
399 * stuff unnecessarily, but we don't have a way to check
400 * if other CPUs have VMX enabled. So we will call it only if the
401 * CPU we are running on has VMX enabled.
402 *
403 * We will miss cases where VMX is not enabled on all CPUs. This
404 * shouldn't do much harm because KVM always enable VMX on all
405 * CPUs anyway. But we can miss it on the small window where KVM
406 * is still enabling VMX.
407 */
408 if (cpu_has_vmx() && cpu_vmx_enabled()) {
409 /* Disable VMX on this CPU.
410 */
411 cpu_vmxoff();
412
413 /* Halt and disable VMX on the other CPUs */
414 nmi_shootdown_cpus(vmxoff_nmi);
415
416 }
417}
418
419
371void __attribute__((weak)) mach_reboot_fixups(void) 420void __attribute__((weak)) mach_reboot_fixups(void)
372{ 421{
373} 422}
@@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void)
376{ 425{
377 int i; 426 int i;
378 427
428 if (reboot_emergency)
429 emergency_vmx_disable_all();
430
379 /* Tell the BIOS if we want cold or warm reboot */ 431 /* Tell the BIOS if we want cold or warm reboot */
380 *((unsigned short *)__va(0x472)) = reboot_mode; 432 *((unsigned short *)__va(0x472)) = reboot_mode;
381 433
@@ -482,13 +534,19 @@ void native_machine_shutdown(void)
482#endif 534#endif
483} 535}
484 536
537static void __machine_emergency_restart(int emergency)
538{
539 reboot_emergency = emergency;
540 machine_ops.emergency_restart();
541}
542
485static void native_machine_restart(char *__unused) 543static void native_machine_restart(char *__unused)
486{ 544{
487 printk("machine restart\n"); 545 printk("machine restart\n");
488 546
489 if (!reboot_force) 547 if (!reboot_force)
490 machine_shutdown(); 548 machine_shutdown();
491 machine_emergency_restart(); 549 __machine_emergency_restart(0);
492} 550}
493 551
494static void native_machine_halt(void) 552static void native_machine_halt(void)
@@ -532,7 +590,7 @@ void machine_shutdown(void)
532 590
533void machine_emergency_restart(void) 591void machine_emergency_restart(void)
534{ 592{
535 machine_ops.emergency_restart(); 593 __machine_emergency_restart(1);
536} 594}
537 595
538void machine_restart(char *cmd) 596void machine_restart(char *cmd)
@@ -592,10 +650,7 @@ static int crash_nmi_callback(struct notifier_block *self,
592 650
593static void smp_send_nmi_allbutself(void) 651static void smp_send_nmi_allbutself(void)
594{ 652{
595 cpumask_t mask = cpu_online_map; 653 send_IPI_allbutself(NMI_VECTOR);
596 cpu_clear(safe_smp_processor_id(), mask);
597 if (!cpus_empty(mask))
598 send_IPI_mask(mask, NMI_VECTOR);
599} 654}
600 655
601static struct notifier_block crash_nmi_nb = { 656static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 08e02e8453c9..ae0d8042cf69 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -953,7 +953,7 @@ void __init setup_arch(char **cmdline_p)
953 ioapic_init_mappings(); 953 ioapic_init_mappings();
954 954
955 /* need to wait for io_apic is mapped */ 955 /* need to wait for io_apic is mapped */
956 nr_irqs = probe_nr_irqs(); 956 probe_nr_irqs_gsi();
957 957
958 kvm_guest_init(); 958 kvm_guest_init();
959 959
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb770..0b63b08e7530 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,6 +152,11 @@ void __init setup_per_cpu_areas(void)
152 old_size = PERCPU_ENOUGH_ROOM; 152 old_size = PERCPU_ENOUGH_ROOM;
153 align = max_t(unsigned long, PAGE_SIZE, align); 153 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align); 154 size = roundup(old_size, align);
155
156 printk(KERN_INFO
157 "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
158 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
159
155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 160 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
156 size); 161 size);
157 162
@@ -168,24 +173,24 @@ void __init setup_per_cpu_areas(void)
168 "cpu %d has no node %d or node-local memory\n", 173 "cpu %d has no node %d or node-local memory\n",
169 cpu, node); 174 cpu, node);
170 if (ptr) 175 if (ptr)
171 printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", 176 printk(KERN_DEBUG
177 "per cpu data for cpu%d at %016lx\n",
172 cpu, __pa(ptr)); 178 cpu, __pa(ptr));
173 } 179 }
174 else { 180 else {
175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 181 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
176 __pa(MAX_DMA_ADDRESS)); 182 __pa(MAX_DMA_ADDRESS));
177 if (ptr) 183 if (ptr)
178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 184 printk(KERN_DEBUG
179 cpu, node, __pa(ptr)); 185 "per cpu data for cpu%d on node%d "
186 "at %016lx\n",
187 cpu, node, __pa(ptr));
180 } 188 }
181#endif 189#endif
182 per_cpu_offset(cpu) = ptr - __per_cpu_start; 190 per_cpu_offset(cpu) = ptr - __per_cpu_start;
183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 191 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
184 } 192 }
185 193
186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
187 NR_CPUS, nr_cpu_ids, nr_node_ids);
188
189 /* Setup percpu data maps */ 194 /* Setup percpu data maps */
190 setup_per_cpu_maps(); 195 setup_per_cpu_maps();
191 196
@@ -282,7 +287,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
282 else 287 else
283 cpu_clear(cpu, *mask); 288 cpu_clear(cpu, *mask);
284 289
285 cpulist_scnprintf(buf, sizeof(buf), *mask); 290 cpulist_scnprintf(buf, sizeof(buf), mask);
286 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 291 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
287 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 292 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
288 } 293 }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7e558db362c1..beea2649a240 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 118 WARN_ON(1);
119 return; 119 return;
120 } 120 }
121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 121 send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 122}
123 123
124void native_send_call_func_single_ipi(int cpu) 124void native_send_call_func_single_ipi(int cpu)
125{ 125{
126 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); 126 send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
127} 127}
128 128
129void native_send_call_func_ipi(cpumask_t mask) 129void native_send_call_func_ipi(const struct cpumask *mask)
130{ 130{
131 cpumask_t allbutself; 131 cpumask_t allbutself;
132 132
133 allbutself = cpu_online_map; 133 allbutself = cpu_online_map;
134 cpu_clear(smp_processor_id(), allbutself); 134 cpu_clear(smp_processor_id(), allbutself);
135 135
136 if (cpus_equal(mask, allbutself) && 136 if (cpus_equal(*mask, allbutself) &&
137 cpus_equal(cpu_online_map, cpu_callout_map)) 137 cpus_equal(cpu_online_map, cpu_callout_map))
138 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 138 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
139 else 139 else
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f8500c969442..31869bf5fabd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings);
102/* Last level cache ID of each logical CPU */ 102/* Last level cache ID of each logical CPU */
103DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 103DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
104 104
105/* bitmap of online cpus */
106cpumask_t cpu_online_map __read_mostly;
107EXPORT_SYMBOL(cpu_online_map);
108
109cpumask_t cpu_callin_map; 105cpumask_t cpu_callin_map;
110cpumask_t cpu_callout_map; 106cpumask_t cpu_callout_map;
111cpumask_t cpu_possible_map;
112EXPORT_SYMBOL(cpu_possible_map);
113 107
114/* representing HT siblings of each logical CPU */ 108/* representing HT siblings of each logical CPU */
115DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); 109DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -1260,6 +1254,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1260 check_nmi_watchdog(); 1254 check_nmi_watchdog();
1261} 1255}
1262 1256
1257static int __initdata setup_possible_cpus = -1;
1258static int __init _setup_possible_cpus(char *str)
1259{
1260 get_option(&str, &setup_possible_cpus);
1261 return 0;
1262}
1263early_param("possible_cpus", _setup_possible_cpus);
1264
1265
1263/* 1266/*
1264 * cpu_possible_map should be static, it cannot change as cpu's 1267 * cpu_possible_map should be static, it cannot change as cpu's
1265 * are onlined, or offlined. The reason is per-cpu data-structures 1268 * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1272,7 +1275,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1272 * 1275 *
1273 * Three ways to find out the number of additional hotplug CPUs: 1276 * Three ways to find out the number of additional hotplug CPUs:
1274 * - If the BIOS specified disabled CPUs in ACPI/mptables use that. 1277 * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
1275 * - The user can overwrite it with additional_cpus=NUM 1278 * - The user can overwrite it with possible_cpus=NUM
1276 * - Otherwise don't reserve additional CPUs. 1279 * - Otherwise don't reserve additional CPUs.
1277 * We do this because additional CPUs waste a lot of memory. 1280 * We do this because additional CPUs waste a lot of memory.
1278 * -AK 1281 * -AK
@@ -1285,9 +1288,17 @@ __init void prefill_possible_map(void)
1285 if (!num_processors) 1288 if (!num_processors)
1286 num_processors = 1; 1289 num_processors = 1;
1287 1290
1288 possible = num_processors + disabled_cpus; 1291 if (setup_possible_cpus == -1)
1289 if (possible > NR_CPUS) 1292 possible = num_processors + disabled_cpus;
1290 possible = NR_CPUS; 1293 else
1294 possible = setup_possible_cpus;
1295
1296 if (possible > CONFIG_NR_CPUS) {
1297 printk(KERN_WARNING
1298 "%d Processors exceeds NR_CPUS limit of %d\n",
1299 possible, CONFIG_NR_CPUS);
1300 possible = CONFIG_NR_CPUS;
1301 }
1291 1302
1292 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1303 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
1293 possible, max_t(int, possible - num_processors, 0)); 1304 possible, max_t(int, possible - num_processors, 0));
@@ -1352,7 +1363,7 @@ void cpu_disable_common(void)
1352 lock_vector_lock(); 1363 lock_vector_lock();
1353 remove_cpu_from_maps(cpu); 1364 remove_cpu_from_maps(cpu);
1354 unlock_vector_lock(); 1365 unlock_vector_lock();
1355 fixup_irqs(cpu_online_map); 1366 fixup_irqs();
1356} 1367}
1357 1368
1358int native_cpu_disable(void) 1369int native_cpu_disable(void)
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 8da059f949be..ce5054642247 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
163 * We have to send the IPI only to 163 * We have to send the IPI only to
164 * CPUs affected. 164 * CPUs affected.
165 */ 165 */
166 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); 166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
167 167
168 while (!cpus_empty(flush_cpumask)) 168 while (!cpus_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */ 169 /* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 29887d7081a9..f8be6f1d2e48 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 191 * We have to send the IPI only to
192 * CPUs affected. 192 * CPUs affected.
193 */ 193 */
194 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); 194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
195 195
196 while (!cpus_empty(f->flush_cpumask)) 196 while (!cpus_empty(f->flush_cpumask))
197 cpu_relax(); 197 cpu_relax();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 961e26a69d55..ce6650eb64e9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,9 +72,6 @@
72 72
73#include "cpu/mcheck/mce.h" 73#include "cpu/mcheck/mce.h"
74 74
75DECLARE_BITMAP(used_vectors, NR_VECTORS);
76EXPORT_SYMBOL_GPL(used_vectors);
77
78asmlinkage int system_call(void); 75asmlinkage int system_call(void);
79 76
80/* Do we ignore FPU interrupts ? */ 77/* Do we ignore FPU interrupts ? */
@@ -89,6 +86,9 @@ gate_desc idt_table[256]
89 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 86 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
90#endif 87#endif
91 88
89DECLARE_BITMAP(used_vectors, NR_VECTORS);
90EXPORT_SYMBOL_GPL(used_vectors);
91
92static int ignore_nmis; 92static int ignore_nmis;
93 93
94static inline void conditional_sti(struct pt_regs *regs) 94static inline void conditional_sti(struct pt_regs *regs)
@@ -946,9 +946,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
946 946
947void __init trap_init(void) 947void __init trap_init(void)
948{ 948{
949#ifdef CONFIG_X86_32
950 int i; 949 int i;
951#endif
952 950
953#ifdef CONFIG_EISA 951#ifdef CONFIG_EISA
954 void __iomem *p = early_ioremap(0x0FFFD9, 4); 952 void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1005,11 +1003,15 @@ void __init trap_init(void)
1005 } 1003 }
1006 1004
1007 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 1005 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
1006#endif
1008 1007
1009 /* Reserve all the builtin and the syscall vector: */ 1008 /* Reserve all the builtin and the syscall vector: */
1010 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 1009 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
1011 set_bit(i, used_vectors); 1010 set_bit(i, used_vectors);
1012 1011
1012#ifdef CONFIG_X86_64
1013 set_bit(IA32_SYSCALL_VECTOR, used_vectors);
1014#else
1013 set_bit(SYSCALL_VECTOR, used_vectors); 1015 set_bit(SYSCALL_VECTOR, used_vectors);
1014#endif 1016#endif
1015 /* 1017 /*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07f8635..c4c1f9e09402 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
226 /* Upper bound is clockevent's use of ulong for cycle deltas. */ 226 /* Upper bound is clockevent's use of ulong for cycle deltas. */
227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); 227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
228 evt->min_delta_ns = clockevent_delta2ns(1, evt); 228 evt->min_delta_ns = clockevent_delta2ns(1, evt);
229 evt->cpumask = cpumask_of_cpu(cpu); 229 evt->cpumask = cpumask_of(cpu);
230 230
231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", 231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
232 evt->name, evt->mult, evt->shift); 232 evt->name, evt->mult, evt->shift);