aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/amd_iommu.c4
-rw-r--r--arch/x86/kernel/amd_iommu_init.c13
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c30
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/common.c3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c32
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c16
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c30
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c5
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c22
-rw-r--r--arch/x86/kernel/e820.c23
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/setup.c16
-rw-r--r--arch/x86/kernel/setup_percpu.c219
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c3
23 files changed, 288 insertions, 161 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6c327b852e23..430d5b24af7b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,6 +26,8 @@ CFLAGS_tsc.o := $(nostackp)
26CFLAGS_paravirt.o := $(nostackp) 26CFLAGS_paravirt.o := $(nostackp)
27GCOV_PROFILE_vsyscall_64.o := n 27GCOV_PROFILE_vsyscall_64.o := n
28GCOV_PROFILE_hpet.o := n 28GCOV_PROFILE_hpet.o := n
29GCOV_PROFILE_tsc.o := n
30GCOV_PROFILE_paravirt.o := n
29 31
30obj-y := process_$(BITS).o signal.o entry_$(BITS).o 32obj-y := process_$(BITS).o signal.o entry_$(BITS).o
31obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 33obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 9372f0406ad4..6c99f5037801 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1192,7 +1192,7 @@ out:
1192 return 0; 1192 return 0;
1193} 1193}
1194 1194
1195struct notifier_block device_nb = { 1195static struct notifier_block device_nb = {
1196 .notifier_call = device_change_notifier, 1196 .notifier_call = device_change_notifier,
1197}; 1197};
1198 1198
@@ -1763,7 +1763,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
1763 flag |= __GFP_ZERO; 1763 flag |= __GFP_ZERO;
1764 virt_addr = (void *)__get_free_pages(flag, get_order(size)); 1764 virt_addr = (void *)__get_free_pages(flag, get_order(size));
1765 if (!virt_addr) 1765 if (!virt_addr)
1766 return 0; 1766 return NULL;
1767 1767
1768 paddr = virt_to_phys(virt_addr); 1768 paddr = virt_to_phys(virt_addr);
1769 1769
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 10b2accd12ea..c1b17e97252e 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -472,6 +472,8 @@ static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
472 if (iommu->evt_buf == NULL) 472 if (iommu->evt_buf == NULL)
473 return NULL; 473 return NULL;
474 474
475 iommu->evt_buf_size = EVT_BUFFER_SIZE;
476
475 return iommu->evt_buf; 477 return iommu->evt_buf;
476} 478}
477 479
@@ -691,6 +693,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
691 693
692 devid = e->devid; 694 devid = e->devid;
693 devid_to = e->ext >> 8; 695 devid_to = e->ext >> 8;
696 set_dev_entry_from_acpi(iommu, devid , e->flags, 0);
694 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); 697 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
695 amd_iommu_alias_table[devid] = devid_to; 698 amd_iommu_alias_table[devid] = devid_to;
696 break; 699 break;
@@ -749,11 +752,13 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
749 752
750 devid = e->devid; 753 devid = e->devid;
751 for (dev_i = devid_start; dev_i <= devid; ++dev_i) { 754 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
752 if (alias) 755 if (alias) {
753 amd_iommu_alias_table[dev_i] = devid_to; 756 amd_iommu_alias_table[dev_i] = devid_to;
754 set_dev_entry_from_acpi(iommu, 757 set_dev_entry_from_acpi(iommu,
755 amd_iommu_alias_table[dev_i], 758 devid_to, flags, ext_flags);
756 flags, ext_flags); 759 }
760 set_dev_entry_from_acpi(iommu, dev_i,
761 flags, ext_flags);
757 } 762 }
758 break; 763 break;
759 default: 764 default:
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8c7c042ecad1..0a1c2830ec66 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -140,7 +140,6 @@ int x2apic_mode;
140#ifdef CONFIG_X86_X2APIC 140#ifdef CONFIG_X86_X2APIC
141/* x2apic enabled before OS handover */ 141/* x2apic enabled before OS handover */
142static int x2apic_preenabled; 142static int x2apic_preenabled;
143static int disable_x2apic;
144static __init int setup_nox2apic(char *str) 143static __init int setup_nox2apic(char *str)
145{ 144{
146 if (x2apic_enabled()) { 145 if (x2apic_enabled()) {
@@ -149,7 +148,6 @@ static __init int setup_nox2apic(char *str)
149 return 0; 148 return 0;
150 } 149 }
151 150
152 disable_x2apic = 1;
153 setup_clear_cpu_cap(X86_FEATURE_X2APIC); 151 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
154 return 0; 152 return 0;
155} 153}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4d0216fcb36c..90b5e6efa938 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1716,25 +1716,19 @@ __apicdebuginit(void) print_IO_APIC(void)
1716 return; 1716 return;
1717} 1717}
1718 1718
1719__apicdebuginit(void) print_APIC_bitfield(int base) 1719__apicdebuginit(void) print_APIC_field(int base)
1720{ 1720{
1721 unsigned int v; 1721 int i;
1722 int i, j;
1723 1722
1724 if (apic_verbosity == APIC_QUIET) 1723 if (apic_verbosity == APIC_QUIET)
1725 return; 1724 return;
1726 1725
1727 printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); 1726 printk(KERN_DEBUG);
1728 for (i = 0; i < 8; i++) { 1727
1729 v = apic_read(base + i*0x10); 1728 for (i = 0; i < 8; i++)
1730 for (j = 0; j < 32; j++) { 1729 printk(KERN_CONT "%08x", apic_read(base + i*0x10));
1731 if (v & (1<<j)) 1730
1732 printk("1"); 1731 printk(KERN_CONT "\n");
1733 else
1734 printk("0");
1735 }
1736 printk("\n");
1737 }
1738} 1732}
1739 1733
1740__apicdebuginit(void) print_local_APIC(void *dummy) 1734__apicdebuginit(void) print_local_APIC(void *dummy)
@@ -1745,7 +1739,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1745 if (apic_verbosity == APIC_QUIET) 1739 if (apic_verbosity == APIC_QUIET)
1746 return; 1740 return;
1747 1741
1748 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1742 printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1749 smp_processor_id(), hard_smp_processor_id()); 1743 smp_processor_id(), hard_smp_processor_id());
1750 v = apic_read(APIC_ID); 1744 v = apic_read(APIC_ID);
1751 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); 1745 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
@@ -1786,11 +1780,11 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1786 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); 1780 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1787 1781
1788 printk(KERN_DEBUG "... APIC ISR field:\n"); 1782 printk(KERN_DEBUG "... APIC ISR field:\n");
1789 print_APIC_bitfield(APIC_ISR); 1783 print_APIC_field(APIC_ISR);
1790 printk(KERN_DEBUG "... APIC TMR field:\n"); 1784 printk(KERN_DEBUG "... APIC TMR field:\n");
1791 print_APIC_bitfield(APIC_TMR); 1785 print_APIC_field(APIC_TMR);
1792 printk(KERN_DEBUG "... APIC IRR field:\n"); 1786 printk(KERN_DEBUG "... APIC IRR field:\n");
1793 print_APIC_bitfield(APIC_IRR); 1787 print_APIC_field(APIC_IRR);
1794 1788
1795 if (APIC_INTEGRATED(ver)) { /* !82489DX */ 1789 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1796 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 1790 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e5b27d8f1b47..28e5f5956042 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -258,13 +258,15 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
258{ 258{
259#ifdef CONFIG_X86_HT 259#ifdef CONFIG_X86_HT
260 unsigned bits; 260 unsigned bits;
261 int cpu = smp_processor_id();
261 262
262 bits = c->x86_coreid_bits; 263 bits = c->x86_coreid_bits;
263
264 /* Low order bits define the core id (index of core in socket) */ 264 /* Low order bits define the core id (index of core in socket) */
265 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); 265 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
266 /* Convert the initial APIC ID into the socket ID */ 266 /* Convert the initial APIC ID into the socket ID */
267 c->phys_proc_id = c->initial_apicid >> bits; 267 c->phys_proc_id = c->initial_apicid >> bits;
268 /* use socket ID also for last level cache */
269 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
268#endif 270#endif
269} 271}
270 272
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6b26d4deada0..f1961c07af9a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -848,9 +848,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
848#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 848#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
849 numa_add_cpu(smp_processor_id()); 849 numa_add_cpu(smp_processor_id());
850#endif 850#endif
851
852 /* Cap the iomem address space to what is addressable on all CPUs */
853 iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1;
854} 851}
855 852
856#ifdef CONFIG_X86_64 853#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 81cbe64ed6b4..2a50ef891000 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -299,7 +299,7 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
299static int transition_fid_vid(struct powernow_k8_data *data, 299static int transition_fid_vid(struct powernow_k8_data *data,
300 u32 reqfid, u32 reqvid) 300 u32 reqfid, u32 reqvid)
301{ 301{
302 if (core_voltage_pre_transition(data, reqvid)) 302 if (core_voltage_pre_transition(data, reqvid, reqfid))
303 return 1; 303 return 1;
304 304
305 if (core_frequency_transition(data, reqfid)) 305 if (core_frequency_transition(data, reqfid))
@@ -327,17 +327,20 @@ static int transition_fid_vid(struct powernow_k8_data *data,
327 327
328/* Phase 1 - core voltage transition ... setup voltage */ 328/* Phase 1 - core voltage transition ... setup voltage */
329static int core_voltage_pre_transition(struct powernow_k8_data *data, 329static int core_voltage_pre_transition(struct powernow_k8_data *data,
330 u32 reqvid) 330 u32 reqvid, u32 reqfid)
331{ 331{
332 u32 rvosteps = data->rvo; 332 u32 rvosteps = data->rvo;
333 u32 savefid = data->currfid; 333 u32 savefid = data->currfid;
334 u32 maxvid, lo; 334 u32 maxvid, lo, rvomult = 1;
335 335
336 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " 336 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
337 "reqvid 0x%x, rvo 0x%x\n", 337 "reqvid 0x%x, rvo 0x%x\n",
338 smp_processor_id(), 338 smp_processor_id(),
339 data->currfid, data->currvid, reqvid, data->rvo); 339 data->currfid, data->currvid, reqvid, data->rvo);
340 340
341 if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
342 rvomult = 2;
343 rvosteps *= rvomult;
341 rdmsr(MSR_FIDVID_STATUS, lo, maxvid); 344 rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
342 maxvid = 0x1f & (maxvid >> 16); 345 maxvid = 0x1f & (maxvid >> 16);
343 dprintk("ph1 maxvid=0x%x\n", maxvid); 346 dprintk("ph1 maxvid=0x%x\n", maxvid);
@@ -351,7 +354,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
351 return 1; 354 return 1;
352 } 355 }
353 356
354 while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { 357 while ((rvosteps > 0) &&
358 ((rvomult * data->rvo + data->currvid) > reqvid)) {
355 if (data->currvid == maxvid) { 359 if (data->currvid == maxvid) {
356 rvosteps = 0; 360 rvosteps = 0;
357 } else { 361 } else {
@@ -384,13 +388,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
384 u32 vcoreqfid, vcocurrfid, vcofiddiff; 388 u32 vcoreqfid, vcocurrfid, vcofiddiff;
385 u32 fid_interval, savevid = data->currvid; 389 u32 fid_interval, savevid = data->currvid;
386 390
387 if ((reqfid < HI_FID_TABLE_BOTTOM) &&
388 (data->currfid < HI_FID_TABLE_BOTTOM)) {
389 printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
390 "0x%x 0x%x\n", reqfid, data->currfid);
391 return 1;
392 }
393
394 if (data->currfid == reqfid) { 391 if (data->currfid == reqfid) {
395 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", 392 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
396 data->currfid); 393 data->currfid);
@@ -407,6 +404,9 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
407 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid 404 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
408 : vcoreqfid - vcocurrfid; 405 : vcoreqfid - vcocurrfid;
409 406
407 if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
408 vcofiddiff = 0;
409
410 while (vcofiddiff > 2) { 410 while (vcofiddiff > 2) {
411 (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); 411 (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
412 412
@@ -1081,14 +1081,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
1081 return 0; 1081 return 0;
1082 } 1082 }
1083 1083
1084 if ((fid < HI_FID_TABLE_BOTTOM) &&
1085 (data->currfid < HI_FID_TABLE_BOTTOM)) {
1086 printk(KERN_ERR PFX
1087 "ignoring illegal change in lo freq table-%x to 0x%x\n",
1088 data->currfid, fid);
1089 return 1;
1090 }
1091
1092 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", 1084 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
1093 smp_processor_id(), fid, vid); 1085 smp_processor_id(), fid, vid);
1094 freqs.old = find_khz_freq_from_fid(data->currfid); 1086 freqs.old = find_khz_freq_from_fid(data->currfid);
@@ -1267,7 +1259,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1267{ 1259{
1268 static const char ACPI_PSS_BIOS_BUG_MSG[] = 1260 static const char ACPI_PSS_BIOS_BUG_MSG[] =
1269 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" 1261 KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1270 KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; 1262 FW_BUG PFX "Try again with latest BIOS.\n";
1271 struct powernow_k8_data *data; 1263 struct powernow_k8_data *data;
1272 struct init_on_cpu init_on_cpu; 1264 struct init_on_cpu init_on_cpu;
1273 int rc; 1265 int rc;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index c9c1190b5e1f..02ce824073cb 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -215,7 +215,8 @@ struct pst_s {
215 215
216#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) 216#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
217 217
218static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); 218static int core_voltage_pre_transition(struct powernow_k8_data *data,
219 u32 reqvid, u32 regfid);
219static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); 220static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
220static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); 221static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
221 222
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 284d1de968bc..484c1e5f658e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -194,14 +194,14 @@ static void print_mce(struct mce *m)
194 m->cs, m->ip); 194 m->cs, m->ip);
195 if (m->cs == __KERNEL_CS) 195 if (m->cs == __KERNEL_CS)
196 print_symbol("{%s}", m->ip); 196 print_symbol("{%s}", m->ip);
197 printk("\n"); 197 printk(KERN_CONT "\n");
198 } 198 }
199 printk(KERN_EMERG "TSC %llx ", m->tsc); 199 printk(KERN_EMERG "TSC %llx ", m->tsc);
200 if (m->addr) 200 if (m->addr)
201 printk("ADDR %llx ", m->addr); 201 printk(KERN_CONT "ADDR %llx ", m->addr);
202 if (m->misc) 202 if (m->misc)
203 printk("MISC %llx ", m->misc); 203 printk(KERN_CONT "MISC %llx ", m->misc);
204 printk("\n"); 204 printk(KERN_CONT "\n");
205 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", 205 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
206 m->cpuvendor, m->cpuid, m->time, m->socketid, 206 m->cpuvendor, m->cpuid, m->time, m->socketid,
207 m->apicid); 207 m->apicid);
@@ -209,13 +209,13 @@ static void print_mce(struct mce *m)
209 209
210static void print_mce_head(void) 210static void print_mce_head(void)
211{ 211{
212 printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); 212 printk(KERN_EMERG "\nHARDWARE ERROR\n");
213} 213}
214 214
215static void print_mce_tail(void) 215static void print_mce_tail(void)
216{ 216{
217 printk(KERN_EMERG "This is not a software problem!\n" 217 printk(KERN_EMERG "This is not a software problem!\n"
218 KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); 218 "Run through mcelog --ascii to decode and contact your hardware vendor\n");
219} 219}
220 220
221#define PANIC_TIMEOUT 5 /* 5 seconds */ 221#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -1117,7 +1117,7 @@ static void mcheck_timer(unsigned long data)
1117 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); 1117 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
1118 1118
1119 t->expires = jiffies + *n; 1119 t->expires = jiffies + *n;
1120 add_timer(t); 1120 add_timer_on(t, smp_processor_id());
1121} 1121}
1122 1122
1123static void mce_do_trigger(struct work_struct *work) 1123static void mce_do_trigger(struct work_struct *work)
@@ -1321,7 +1321,7 @@ static void mce_init_timer(void)
1321 return; 1321 return;
1322 setup_timer(t, mcheck_timer, smp_processor_id()); 1322 setup_timer(t, mcheck_timer, smp_processor_id());
1323 t->expires = round_jiffies(jiffies + *n); 1323 t->expires = round_jiffies(jiffies + *n);
1324 add_timer(t); 1324 add_timer_on(t, smp_processor_id());
1325} 1325}
1326 1326
1327/* 1327/*
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 76dfef23f789..36c3dc7b8991 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -401,7 +401,7 @@ static const u64 amd_hw_cache_event_ids
401 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ 401 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
402 }, 402 },
403 [ C(OP_WRITE) ] = { 403 [ C(OP_WRITE) ] = {
404 [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ 404 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
405 [ C(RESULT_MISS) ] = 0, 405 [ C(RESULT_MISS) ] = 0,
406 }, 406 },
407 [ C(OP_PREFETCH) ] = { 407 [ C(OP_PREFETCH) ] = {
@@ -912,6 +912,8 @@ x86_perf_counter_set_period(struct perf_counter *counter,
912 err = checking_wrmsrl(hwc->counter_base + idx, 912 err = checking_wrmsrl(hwc->counter_base + idx,
913 (u64)(-left) & x86_pmu.counter_mask); 913 (u64)(-left) & x86_pmu.counter_mask);
914 914
915 perf_counter_update_userpage(counter);
916
915 return ret; 917 return ret;
916} 918}
917 919
@@ -969,13 +971,6 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
969 if (!x86_pmu.num_counters_fixed) 971 if (!x86_pmu.num_counters_fixed)
970 return -1; 972 return -1;
971 973
972 /*
973 * Quirk, IA32_FIXED_CTRs do not work on current Atom processors:
974 */
975 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
976 boot_cpu_data.x86_model == 28)
977 return -1;
978
979 event = hwc->config & ARCH_PERFMON_EVENT_MASK; 974 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
980 975
981 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 976 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
@@ -1041,6 +1036,8 @@ try_generic:
1041 x86_perf_counter_set_period(counter, hwc, idx); 1036 x86_perf_counter_set_period(counter, hwc, idx);
1042 x86_pmu.enable(hwc, idx); 1037 x86_pmu.enable(hwc, idx);
1043 1038
1039 perf_counter_update_userpage(counter);
1040
1044 return 0; 1041 return 0;
1045} 1042}
1046 1043
@@ -1133,6 +1130,8 @@ static void x86_pmu_disable(struct perf_counter *counter)
1133 x86_perf_counter_update(counter, hwc, idx); 1130 x86_perf_counter_update(counter, hwc, idx);
1134 cpuc->counters[idx] = NULL; 1131 cpuc->counters[idx] = NULL;
1135 clear_bit(idx, cpuc->used_mask); 1132 clear_bit(idx, cpuc->used_mask);
1133
1134 perf_counter_update_userpage(counter);
1136} 1135}
1137 1136
1138/* 1137/*
@@ -1428,8 +1427,6 @@ static int intel_pmu_init(void)
1428 */ 1427 */
1429 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 1428 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1430 1429
1431 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1432
1433 /* 1430 /*
1434 * Install the hw-cache-events table: 1431 * Install the hw-cache-events table:
1435 */ 1432 */
@@ -1499,21 +1496,22 @@ void __init init_hw_perf_counters(void)
1499 pr_cont("%s PMU driver.\n", x86_pmu.name); 1496 pr_cont("%s PMU driver.\n", x86_pmu.name);
1500 1497
1501 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1498 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1502 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1503 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", 1499 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1504 x86_pmu.num_counters, X86_PMC_MAX_GENERIC); 1500 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1501 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1505 } 1502 }
1506 perf_counter_mask = (1 << x86_pmu.num_counters) - 1; 1503 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1507 perf_max_counters = x86_pmu.num_counters; 1504 perf_max_counters = x86_pmu.num_counters;
1508 1505
1509 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1506 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1510 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1511 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", 1507 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1512 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); 1508 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1509 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1513 } 1510 }
1514 1511
1515 perf_counter_mask |= 1512 perf_counter_mask |=
1516 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; 1513 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1514 x86_pmu.intel_ctrl = perf_counter_mask;
1517 1515
1518 perf_counters_lapic_init(); 1516 perf_counters_lapic_init();
1519 register_die_notifier(&perf_counter_nmi_notifier); 1517 register_die_notifier(&perf_counter_nmi_notifier);
@@ -1563,6 +1561,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1563 1561
1564static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); 1562static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1565static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); 1563static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1564static DEFINE_PER_CPU(int, in_nmi_frame);
1566 1565
1567 1566
1568static void 1567static void
@@ -1578,7 +1577,9 @@ static void backtrace_warning(void *data, char *msg)
1578 1577
1579static int backtrace_stack(void *data, char *name) 1578static int backtrace_stack(void *data, char *name)
1580{ 1579{
1581 /* Process all stacks: */ 1580 per_cpu(in_nmi_frame, smp_processor_id()) =
1581 x86_is_stack_id(NMI_STACK, name);
1582
1582 return 0; 1583 return 0;
1583} 1584}
1584 1585
@@ -1586,6 +1587,9 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1586{ 1587{
1587 struct perf_callchain_entry *entry = data; 1588 struct perf_callchain_entry *entry = data;
1588 1589
1590 if (per_cpu(in_nmi_frame, smp_processor_id()))
1591 return;
1592
1589 if (reliable) 1593 if (reliable)
1590 callchain_store(entry, addr); 1594 callchain_store(entry, addr);
1591} 1595}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 5c481f6205bf..e60ed740d2b3 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -803,8 +803,3 @@ int __kprobes lapic_wd_event(unsigned nmi_hz)
803 wd_ops->rearm(wd, nmi_hz); 803 wd_ops->rearm(wd, nmi_hz);
804 return 1; 804 return 1;
805} 805}
806
807int lapic_watchdog_ok(void)
808{
809 return wd_ops != NULL;
810}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 95ea5fa7d444..c8405718a4c3 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,6 +22,7 @@
22#include "dumpstack.h" 22#include "dumpstack.h"
23 23
24int panic_on_unrecovered_nmi; 24int panic_on_unrecovered_nmi;
25int panic_on_io_nmi;
25unsigned int code_bytes = 64; 26unsigned int code_bytes = 64;
26int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; 27int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
27static int die_counter; 28static int die_counter;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index d593cd1f58dc..bca5fba91c9e 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -19,6 +19,12 @@
19 19
20#include "dumpstack.h" 20#include "dumpstack.h"
21 21
22/* Just a stub for now */
23int x86_is_stack_id(int id, char *name)
24{
25 return 0;
26}
27
22void dump_trace(struct task_struct *task, struct pt_regs *regs, 28void dump_trace(struct task_struct *task, struct pt_regs *regs,
23 unsigned long *stack, unsigned long bp, 29 unsigned long *stack, unsigned long bp,
24 const struct stacktrace_ops *ops, void *data) 30 const struct stacktrace_ops *ops, void *data)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index d35db5993fd6..54b0a3276766 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -19,10 +19,8 @@
19 19
20#include "dumpstack.h" 20#include "dumpstack.h"
21 21
22static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 22
23 unsigned *usedp, char **idp) 23static char x86_stack_ids[][8] = {
24{
25 static char ids[][8] = {
26 [DEBUG_STACK - 1] = "#DB", 24 [DEBUG_STACK - 1] = "#DB",
27 [NMI_STACK - 1] = "NMI", 25 [NMI_STACK - 1] = "NMI",
28 [DOUBLEFAULT_STACK - 1] = "#DF", 26 [DOUBLEFAULT_STACK - 1] = "#DF",
@@ -33,6 +31,15 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
33 N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" 31 N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
34#endif 32#endif
35 }; 33 };
34
35int x86_is_stack_id(int id, char *name)
36{
37 return x86_stack_ids[id - 1] == name;
38}
39
40static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
41 unsigned *usedp, char **idp)
42{
36 unsigned k; 43 unsigned k;
37 44
38 /* 45 /*
@@ -61,7 +68,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
61 if (*usedp & (1U << k)) 68 if (*usedp & (1U << k))
62 break; 69 break;
63 *usedp |= 1U << k; 70 *usedp |= 1U << k;
64 *idp = ids[k]; 71 *idp = x86_stack_ids[k];
65 return (unsigned long *)end; 72 return (unsigned long *)end;
66 } 73 }
67 /* 74 /*
@@ -81,12 +88,13 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
81 do { 88 do {
82 ++j; 89 ++j;
83 end -= EXCEPTION_STKSZ; 90 end -= EXCEPTION_STKSZ;
84 ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); 91 x86_stack_ids[j][4] = '1' +
92 (j - N_EXCEPTION_STACKS);
85 } while (stack < end - EXCEPTION_STKSZ); 93 } while (stack < end - EXCEPTION_STKSZ);
86 if (*usedp & (1U << j)) 94 if (*usedp & (1U << j))
87 break; 95 break;
88 *usedp |= 1U << j; 96 *usedp |= 1U << j;
89 *idp = ids[j]; 97 *idp = x86_stack_ids[j];
90 return (unsigned long *)end; 98 return (unsigned long *)end;
91 } 99 }
92#endif 100#endif
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7271fa33d791..5cb5725b2bae 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -627,10 +627,9 @@ __init void e820_setup_gap(void)
627#ifdef CONFIG_X86_64 627#ifdef CONFIG_X86_64
628 if (!found) { 628 if (!found) {
629 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; 629 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
630 printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " 630 printk(KERN_ERR
631 "address range\n" 631 "PCI: Warning: Cannot find a gap in the 32bit address range\n"
632 KERN_ERR "PCI: Unassigned devices with 32bit resource " 632 "PCI: Unassigned devices with 32bit resource registers may break!\n");
633 "registers may break!\n");
634 } 633 }
635#endif 634#endif
636 635
@@ -1383,6 +1382,8 @@ static unsigned long ram_alignment(resource_size_t pos)
1383 return 32*1024*1024; 1382 return 32*1024*1024;
1384} 1383}
1385 1384
1385#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
1386
1386void __init e820_reserve_resources_late(void) 1387void __init e820_reserve_resources_late(void)
1387{ 1388{
1388 int i; 1389 int i;
@@ -1400,17 +1401,19 @@ void __init e820_reserve_resources_late(void)
1400 * avoid stolen RAM: 1401 * avoid stolen RAM:
1401 */ 1402 */
1402 for (i = 0; i < e820.nr_map; i++) { 1403 for (i = 0; i < e820.nr_map; i++) {
1403 struct e820entry *entry = &e820_saved.map[i]; 1404 struct e820entry *entry = &e820.map[i];
1404 resource_size_t start, end; 1405 u64 start, end;
1405 1406
1406 if (entry->type != E820_RAM) 1407 if (entry->type != E820_RAM)
1407 continue; 1408 continue;
1408 start = entry->addr + entry->size; 1409 start = entry->addr + entry->size;
1409 end = round_up(start, ram_alignment(start)); 1410 end = round_up(start, ram_alignment(start)) - 1;
1410 if (start == end) 1411 if (end > MAX_RESOURCE_SIZE)
1412 end = MAX_RESOURCE_SIZE;
1413 if (start >= end)
1411 continue; 1414 continue;
1412 reserve_region_with_split(&iomem_resource, start, 1415 reserve_region_with_split(&iomem_resource, start, end,
1413 end - 1, "RAM buffer"); 1416 "RAM buffer");
1414 } 1417 }
1415} 1418}
1416 1419
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a78ecad0c900..c664d515f613 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -200,7 +200,7 @@ static void kvm_leave_lazy_mmu(void)
200 state->mode = paravirt_get_lazy_mode(); 200 state->mode = paravirt_get_lazy_mode();
201} 201}
202 202
203static void paravirt_ops_setup(void) 203static void __init paravirt_ops_setup(void)
204{ 204{
205 pv_info.name = "KVM"; 205 pv_info.name = "KVM";
206 pv_info.paravirt_enabled = 1; 206 pv_info.paravirt_enabled = 1;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 47630479b067..1a041bcf506b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -211,11 +211,11 @@ static __init int iommu_setup(char *p)
211#ifdef CONFIG_SWIOTLB 211#ifdef CONFIG_SWIOTLB
212 if (!strncmp(p, "soft", 4)) 212 if (!strncmp(p, "soft", 4))
213 swiotlb = 1; 213 swiotlb = 1;
214#endif
214 if (!strncmp(p, "pt", 2)) { 215 if (!strncmp(p, "pt", 2)) {
215 iommu_pass_through = 1; 216 iommu_pass_through = 1;
216 return 1; 217 return 1;
217 } 218 }
218#endif
219 219
220 gart_parse_options(p); 220 gart_parse_options(p);
221 221
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index cfd9f9063896..d2e56b8f48e7 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -675,7 +675,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
675 nommu: 675 nommu:
676 /* Should not happen anymore */ 676 /* Should not happen anymore */
677 printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" 677 printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
678 KERN_WARNING "falling back to iommu=soft.\n"); 678 "falling back to iommu=soft.\n");
679 return -1; 679 return -1;
680} 680}
681 681
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index be5ae80f897f..de2cab132844 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -289,6 +289,20 @@ void * __init extend_brk(size_t size, size_t align)
289 return ret; 289 return ret;
290} 290}
291 291
292#ifdef CONFIG_X86_64
293static void __init init_gbpages(void)
294{
295 if (direct_gbpages && cpu_has_gbpages)
296 printk(KERN_INFO "Using GB pages for direct mapping\n");
297 else
298 direct_gbpages = 0;
299}
300#else
301static inline void init_gbpages(void)
302{
303}
304#endif
305
292static void __init reserve_brk(void) 306static void __init reserve_brk(void)
293{ 307{
294 if (_brk_end > _brk_start) 308 if (_brk_end > _brk_start)
@@ -871,6 +885,8 @@ void __init setup_arch(char **cmdline_p)
871 885
872 reserve_brk(); 886 reserve_brk();
873 887
888 init_gbpages();
889
874 /* max_pfn_mapped is updated here */ 890 /* max_pfn_mapped is updated here */
875 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 891 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
876 max_pfn_mapped = max_low_pfn_mapped; 892 max_pfn_mapped = max_low_pfn_mapped;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9c3f0823e6aa..29a3eef7cf4a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -124,7 +124,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
124} 124}
125 125
126/* 126/*
127 * Remap allocator 127 * Large page remap allocator
128 * 128 *
129 * This allocator uses PMD page as unit. A PMD page is allocated for 129 * This allocator uses PMD page as unit. A PMD page is allocated for
130 * each cpu and each is remapped into vmalloc area using PMD mapping. 130 * each cpu and each is remapped into vmalloc area using PMD mapping.
@@ -137,105 +137,185 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
137 * better than only using 4k mappings while still being NUMA friendly. 137 * better than only using 4k mappings while still being NUMA friendly.
138 */ 138 */
139#ifdef CONFIG_NEED_MULTIPLE_NODES 139#ifdef CONFIG_NEED_MULTIPLE_NODES
140static size_t pcpur_size __initdata; 140struct pcpul_ent {
141static void **pcpur_ptrs __initdata; 141 unsigned int cpu;
142 void *ptr;
143};
144
145static size_t pcpul_size;
146static struct pcpul_ent *pcpul_map;
147static struct vm_struct pcpul_vm;
142 148
143static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) 149static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
144{ 150{
145 size_t off = (size_t)pageno << PAGE_SHIFT; 151 size_t off = (size_t)pageno << PAGE_SHIFT;
146 152
147 if (off >= pcpur_size) 153 if (off >= pcpul_size)
148 return NULL; 154 return NULL;
149 155
150 return virt_to_page(pcpur_ptrs[cpu] + off); 156 return virt_to_page(pcpul_map[cpu].ptr + off);
151} 157}
152 158
153static ssize_t __init setup_pcpu_remap(size_t static_size) 159static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
154{ 160{
155 static struct vm_struct vm; 161 size_t map_size, dyn_size;
156 size_t ptrs_size, dyn_size;
157 unsigned int cpu; 162 unsigned int cpu;
163 int i, j;
158 ssize_t ret; 164 ssize_t ret;
159 165
160 /* 166 if (!chosen) {
161 * If large page isn't supported, there's no benefit in doing 167 size_t vm_size = VMALLOC_END - VMALLOC_START;
162 * this. Also, on non-NUMA, embedding is better. 168 size_t tot_size = num_possible_cpus() * PMD_SIZE;
163 * 169
164 * NOTE: disabled for now. 170 /* on non-NUMA, embedding is better */
165 */ 171 if (!pcpu_need_numa())
166 if (true || !cpu_has_pse || !pcpu_need_numa()) 172 return -EINVAL;
173
174 /* don't consume more than 20% of vmalloc area */
175 if (tot_size > vm_size / 5) {
176 pr_info("PERCPU: too large chunk size %zuMB for "
177 "large page remap\n", tot_size >> 20);
178 return -EINVAL;
179 }
180 }
181
182 /* need PSE */
183 if (!cpu_has_pse) {
184 pr_warning("PERCPU: lpage allocator requires PSE\n");
167 return -EINVAL; 185 return -EINVAL;
186 }
168 187
169 /* 188 /*
170 * Currently supports only single page. Supporting multiple 189 * Currently supports only single page. Supporting multiple
171 * pages won't be too difficult if it ever becomes necessary. 190 * pages won't be too difficult if it ever becomes necessary.
172 */ 191 */
173 pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + 192 pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
174 PERCPU_DYNAMIC_RESERVE); 193 PERCPU_DYNAMIC_RESERVE);
175 if (pcpur_size > PMD_SIZE) { 194 if (pcpul_size > PMD_SIZE) {
176 pr_warning("PERCPU: static data is larger than large page, " 195 pr_warning("PERCPU: static data is larger than large page, "
177 "can't use large page\n"); 196 "can't use large page\n");
178 return -EINVAL; 197 return -EINVAL;
179 } 198 }
180 dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; 199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
181 200
182 /* allocate pointer array and alloc large pages */ 201 /* allocate pointer array and alloc large pages */
183 ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); 202 map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
184 pcpur_ptrs = alloc_bootmem(ptrs_size); 203 pcpul_map = alloc_bootmem(map_size);
185 204
186 for_each_possible_cpu(cpu) { 205 for_each_possible_cpu(cpu) {
187 pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); 206 pcpul_map[cpu].cpu = cpu;
188 if (!pcpur_ptrs[cpu]) 207 pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
208 PMD_SIZE);
209 if (!pcpul_map[cpu].ptr) {
210 pr_warning("PERCPU: failed to allocate large page "
211 "for cpu%u\n", cpu);
189 goto enomem; 212 goto enomem;
213 }
190 214
191 /* 215 /*
192 * Only use pcpur_size bytes and give back the rest. 216 * Only use pcpul_size bytes and give back the rest.
193 * 217 *
194 * Ingo: The 2MB up-rounding bootmem is needed to make 218 * Ingo: The 2MB up-rounding bootmem is needed to make
195 * sure the partial 2MB page is still fully RAM - it's 219 * sure the partial 2MB page is still fully RAM - it's
196 * not well-specified to have a PAT-incompatible area 220 * not well-specified to have a PAT-incompatible area
197 * (unmapped RAM, device memory, etc.) in that hole. 221 * (unmapped RAM, device memory, etc.) in that hole.
198 */ 222 */
199 free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), 223 free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
200 PMD_SIZE - pcpur_size); 224 PMD_SIZE - pcpul_size);
201 225
202 memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); 226 memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
203 } 227 }
204 228
205 /* allocate address and map */ 229 /* allocate address and map */
206 vm.flags = VM_ALLOC; 230 pcpul_vm.flags = VM_ALLOC;
207 vm.size = num_possible_cpus() * PMD_SIZE; 231 pcpul_vm.size = num_possible_cpus() * PMD_SIZE;
208 vm_area_register_early(&vm, PMD_SIZE); 232 vm_area_register_early(&pcpul_vm, PMD_SIZE);
209 233
210 for_each_possible_cpu(cpu) { 234 for_each_possible_cpu(cpu) {
211 pmd_t *pmd; 235 pmd_t *pmd, pmd_v;
212 236
213 pmd = populate_extra_pmd((unsigned long)vm.addr 237 pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
214 + cpu * PMD_SIZE); 238 cpu * PMD_SIZE);
215 set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), 239 pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
216 PAGE_KERNEL_LARGE)); 240 PAGE_KERNEL_LARGE);
241 set_pmd(pmd, pmd_v);
217 } 242 }
218 243
219 /* we're ready, commit */ 244 /* we're ready, commit */
220 pr_info("PERCPU: Remapped at %p with large pages, static data " 245 pr_info("PERCPU: Remapped at %p with large pages, static data "
221 "%zu bytes\n", vm.addr, static_size); 246 "%zu bytes\n", pcpul_vm.addr, static_size);
222 247
223 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 248 ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
224 PERCPU_FIRST_CHUNK_RESERVE, dyn_size, 249 PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
225 PMD_SIZE, vm.addr, NULL); 250 PMD_SIZE, pcpul_vm.addr, NULL);
226 goto out_free_ar; 251
252 /* sort pcpul_map array for pcpu_lpage_remapped() */
253 for (i = 0; i < num_possible_cpus() - 1; i++)
254 for (j = i + 1; j < num_possible_cpus(); j++)
255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
256 struct pcpul_ent tmp = pcpul_map[i];
257 pcpul_map[i] = pcpul_map[j];
258 pcpul_map[j] = tmp;
259 }
260
261 return ret;
227 262
228enomem: 263enomem:
229 for_each_possible_cpu(cpu) 264 for_each_possible_cpu(cpu)
230 if (pcpur_ptrs[cpu]) 265 if (pcpul_map[cpu].ptr)
231 free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); 266 free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
232 ret = -ENOMEM; 267 free_bootmem(__pa(pcpul_map), map_size);
233out_free_ar: 268 return -ENOMEM;
234 free_bootmem(__pa(pcpur_ptrs), ptrs_size); 269}
235 return ret; 270
271/**
272 * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
273 * @kaddr: the kernel address in question
274 *
275 * Determine whether @kaddr falls in the pcpul recycled area. This is
276 * used by pageattr to detect VM aliases and break up the pcpu PMD
277 * mapping such that the same physical page is not mapped under
278 * different attributes.
279 *
280 * The recycled area is always at the tail of a partially used PMD
281 * page.
282 *
283 * RETURNS:
284 * Address of corresponding remapped pcpu address if match is found;
285 * otherwise, NULL.
286 */
287void *pcpu_lpage_remapped(void *kaddr)
288{
289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
291 int left = 0, right = num_possible_cpus() - 1;
292 int pos;
293
294 /* pcpul in use at all? */
295 if (!pcpul_map)
296 return NULL;
297
298 /* okay, perform binary search */
299 while (left <= right) {
300 pos = (left + right) / 2;
301
302 if (pcpul_map[pos].ptr < pmd_addr)
303 left = pos + 1;
304 else if (pcpul_map[pos].ptr > pmd_addr)
305 right = pos - 1;
306 else {
307 /* it shouldn't be in the area for the first chunk */
308 WARN_ON(offset < pcpul_size);
309
310 return pcpul_vm.addr +
311 pcpul_map[pos].cpu * PMD_SIZE + offset;
312 }
313 }
314
315 return NULL;
236} 316}
237#else 317#else
238static ssize_t __init setup_pcpu_remap(size_t static_size) 318static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
239{ 319{
240 return -EINVAL; 320 return -EINVAL;
241} 321}
@@ -249,7 +329,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
249 * mapping so that it can use PMD mapping without additional TLB 329 * mapping so that it can use PMD mapping without additional TLB
250 * pressure. 330 * pressure.
251 */ 331 */
252static ssize_t __init setup_pcpu_embed(size_t static_size) 332static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
253{ 333{
254 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 334 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
255 335
@@ -258,7 +338,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
258 * this. Also, embedding allocation doesn't play well with 338 * this. Also, embedding allocation doesn't play well with
259 * NUMA. 339 * NUMA.
260 */ 340 */
261 if (!cpu_has_pse || pcpu_need_numa()) 341 if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
262 return -EINVAL; 342 return -EINVAL;
263 343
264 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, 344 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
@@ -308,8 +388,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
308 void *ptr; 388 void *ptr;
309 389
310 ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); 390 ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
311 if (!ptr) 391 if (!ptr) {
392 pr_warning("PERCPU: failed to allocate "
393 "4k page for cpu%u\n", cpu);
312 goto enomem; 394 goto enomem;
395 }
313 396
314 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); 397 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
315 pcpu4k_pages[j++] = virt_to_page(ptr); 398 pcpu4k_pages[j++] = virt_to_page(ptr);
@@ -333,6 +416,16 @@ out_free_ar:
333 return ret; 416 return ret;
334} 417}
335 418
419/* for explicit first chunk allocator selection */
420static char pcpu_chosen_alloc[16] __initdata;
421
422static int __init percpu_alloc_setup(char *str)
423{
424 strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
425 return 0;
426}
427early_param("percpu_alloc", percpu_alloc_setup);
428
336static inline void setup_percpu_segment(int cpu) 429static inline void setup_percpu_segment(int cpu)
337{ 430{
338#ifdef CONFIG_X86_32 431#ifdef CONFIG_X86_32
@@ -346,11 +439,6 @@ static inline void setup_percpu_segment(int cpu)
346#endif 439#endif
347} 440}
348 441
349/*
350 * Great future plan:
351 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
352 * Always point %gs to its beginning
353 */
354void __init setup_per_cpu_areas(void) 442void __init setup_per_cpu_areas(void)
355{ 443{
356 size_t static_size = __per_cpu_end - __per_cpu_start; 444 size_t static_size = __per_cpu_end - __per_cpu_start;
@@ -367,9 +455,26 @@ void __init setup_per_cpu_areas(void)
367 * of large page mappings. Please read comments on top of 455 * of large page mappings. Please read comments on top of
368 * each allocator for details. 456 * each allocator for details.
369 */ 457 */
370 ret = setup_pcpu_remap(static_size); 458 ret = -EINVAL;
371 if (ret < 0) 459 if (strlen(pcpu_chosen_alloc)) {
372 ret = setup_pcpu_embed(static_size); 460 if (strcmp(pcpu_chosen_alloc, "4k")) {
461 if (!strcmp(pcpu_chosen_alloc, "lpage"))
462 ret = setup_pcpu_lpage(static_size, true);
463 else if (!strcmp(pcpu_chosen_alloc, "embed"))
464 ret = setup_pcpu_embed(static_size, true);
465 else
466 pr_warning("PERCPU: unknown allocator %s "
467 "specified\n", pcpu_chosen_alloc);
468 if (ret < 0)
469 pr_warning("PERCPU: %s allocator failed (%zd), "
470 "falling back to 4k\n",
471 pcpu_chosen_alloc, ret);
472 }
473 } else {
474 ret = setup_pcpu_lpage(static_size, false);
475 if (ret < 0)
476 ret = setup_pcpu_embed(static_size, false);
477 }
373 if (ret < 0) 478 if (ret < 0)
374 ret = setup_pcpu_4k(static_size); 479 ret = setup_pcpu_4k(static_size);
375 if (ret < 0) 480 if (ret < 0)
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 124d40c575df..8ccabb8a2f6a 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -711,7 +711,6 @@ uv_activation_descriptor_init(int node, int pnode)
711 unsigned long pa; 711 unsigned long pa;
712 unsigned long m; 712 unsigned long m;
713 unsigned long n; 713 unsigned long n;
714 unsigned long mmr_image;
715 struct bau_desc *adp; 714 struct bau_desc *adp;
716 struct bau_desc *ad2; 715 struct bau_desc *ad2;
717 716
@@ -727,12 +726,8 @@ uv_activation_descriptor_init(int node, int pnode)
727 n = pa >> uv_nshift; 726 n = pa >> uv_nshift;
728 m = pa & uv_mmask; 727 m = pa & uv_mmask;
729 728
730 mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); 729 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
731 if (mmr_image) { 730 (n << UV_DESC_BASE_PNODE_SHIFT | m));
732 uv_write_global_mmr64(pnode, (unsigned long)
733 UVH_LB_BAU_SB_DESCRIPTOR_BASE,
734 (n << UV_DESC_BASE_PNODE_SHIFT | m));
735 }
736 731
737 /* 732 /*
738 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 733 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a0f48f5671c0..5204332f475d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -346,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
346 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); 346 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
347 show_registers(regs); 347 show_registers(regs);
348 348
349 if (panic_on_io_nmi)
350 panic("NMI IOCK error: Not continuing");
351
349 /* Re-enable the IOCK line, wait for a few seconds */ 352 /* Re-enable the IOCK line, wait for a few seconds */
350 reason = (reason & 0xf) | 8; 353 reason = (reason & 0xf) | 8;
351 outb(reason, 0x61); 354 outb(reason, 0x61);