aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c8
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c7
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/apic/probe_64.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c25
-rw-r--r--arch/x86/kernel/cpu/perf_event.c20
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/head_32.S16
-rw-r--r--arch/x86/kernel/hpet.c26
-rw-r--r--arch/x86/kernel/hw_breakpoint.c4
-rw-r--r--arch/x86/kernel/microcode_intel.c16
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c64
-rw-r--r--arch/x86/kernel/pvclock.c5
-rw-r--r--arch/x86/kernel/resource.c48
-rw-r--r--arch/x86/kernel/setup.c18
-rw-r--r--arch/x86/kernel/xsave.c3
18 files changed, 194 insertions, 82 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9e13763b6092..1e994754d323 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -45,6 +45,7 @@ obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
46obj-y += tsc.o io_delay.o rtc.o 46obj-y += tsc.o io_delay.o rtc.o
47obj-y += pci-iommu_table.o 47obj-y += pci-iommu_table.o
48obj-y += resource.o
48 49
49obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 50obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
50obj-y += process.o 51obj-y += process.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3f838d537392..78218135b48e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1389,6 +1389,14 @@ void __cpuinit end_local_APIC_setup(void)
1389 1389
1390 setup_apic_nmi_watchdog(NULL); 1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1391 apic_pm_activate();
1392
1393 /*
1394 * Now that local APIC setup is completed for BP, configure the fault
1395 * handling for interrupt remapping.
1396 */
1397 if (!smp_processor_id() && intr_remapping_enabled)
1398 enable_drhd_fault_handling();
1399
1392} 1400}
1393 1401
1394#ifdef CONFIG_X86_X2APIC 1402#ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..62f6e1e55b90 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,15 +17,16 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20/* For reliability, we're prepared to waste bits here. */
21static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22
23u64 hw_nmi_get_sample_period(void) 20u64 hw_nmi_get_sample_period(void)
24{ 21{
25 return (u64)(cpu_khz) * 1000 * 60; 22 return (u64)(cpu_khz) * 1000 * 60;
26} 23}
27 24
28#ifdef ARCH_HAS_NMI_WATCHDOG 25#ifdef ARCH_HAS_NMI_WATCHDOG
26
27/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29
29void arch_trigger_all_cpu_backtrace(void) 30void arch_trigger_all_cpu_backtrace(void)
30{ 31{
31 int i; 32 int i;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7cc0a721f628..fadcd743a74f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2430,13 +2430,12 @@ static void ack_apic_level(struct irq_data *data)
2430{ 2430{
2431 struct irq_cfg *cfg = data->chip_data; 2431 struct irq_cfg *cfg = data->chip_data;
2432 int i, do_unmask_irq = 0, irq = data->irq; 2432 int i, do_unmask_irq = 0, irq = data->irq;
2433 struct irq_desc *desc = irq_to_desc(irq);
2434 unsigned long v; 2433 unsigned long v;
2435 2434
2436 irq_complete_move(cfg); 2435 irq_complete_move(cfg);
2437#ifdef CONFIG_GENERIC_PENDING_IRQ 2436#ifdef CONFIG_GENERIC_PENDING_IRQ
2438 /* If we are moving the irq we need to mask it */ 2437 /* If we are moving the irq we need to mask it */
2439 if (unlikely(desc->status & IRQ_MOVE_PENDING)) { 2438 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
2440 do_unmask_irq = 1; 2439 do_unmask_irq = 1;
2441 mask_ioapic(cfg); 2440 mask_ioapic(cfg);
2442 } 2441 }
@@ -3413,6 +3412,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3413 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3412 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3414 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3413 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3415 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3414 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3415 msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
3416 3416
3417 dmar_msi_write(irq, &msg); 3417 dmar_msi_write(irq, &msg);
3418 3418
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index f9e4e6a54073..d8c4a6feb286 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void)
79 /* need to update phys_pkg_id */ 79 /* need to update phys_pkg_id */
80 apic->phys_pkg_id = apicid_phys_pkg_id; 80 apic->phys_pkg_id = apicid_phys_pkg_id;
81 } 81 }
82
83 /*
84 * Now that apic routing model is selected, configure the
85 * fault handling for intr remapping.
86 */
87 if (intr_remapping_enabled)
88 enable_drhd_fault_handling();
89} 82}
90 83
91/* Same for both flat and physical. */ 84/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 194539aea175..c1c52c341f40 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -44,6 +44,8 @@ static u64 gru_start_paddr, gru_end_paddr;
44static union uvh_apicid uvh_apicid; 44static union uvh_apicid uvh_apicid;
45int uv_min_hub_revision_id; 45int uv_min_hub_revision_id;
46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 46EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
47unsigned int uv_apicid_hibits;
48EXPORT_SYMBOL_GPL(uv_apicid_hibits);
47static DEFINE_SPINLOCK(uv_nmi_lock); 49static DEFINE_SPINLOCK(uv_nmi_lock);
48 50
49static inline bool is_GRU_range(u64 start, u64 end) 51static inline bool is_GRU_range(u64 start, u64 end)
@@ -85,6 +87,23 @@ static void __init early_get_apic_pnode_shift(void)
85 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; 87 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
86} 88}
87 89
90/*
91 * Add an extra bit as dictated by bios to the destination apicid of
92 * interrupts potentially passing through the UV HUB. This prevents
93 * a deadlock between interrupts and IO port operations.
94 */
95static void __init uv_set_apicid_hibit(void)
96{
97 union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
98 unsigned long *mmr;
99
100 mmr = early_ioremap(UV_LOCAL_MMR_BASE |
101 UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
102 apicid_mask.v = *mmr;
103 early_iounmap(mmr, sizeof(*mmr));
104 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
105}
106
88static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 107static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
89{ 108{
90 int nodeid; 109 int nodeid;
@@ -102,6 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
102 __get_cpu_var(x2apic_extra_bits) = 121 __get_cpu_var(x2apic_extra_bits) =
103 nodeid << (uvh_apicid.s.pnode_shift - 1); 122 nodeid << (uvh_apicid.s.pnode_shift - 1);
104 uv_system_type = UV_NON_UNIQUE_APIC; 123 uv_system_type = UV_NON_UNIQUE_APIC;
124 uv_set_apicid_hibit();
105 return 1; 125 return 1;
106 } 126 }
107 } 127 }
@@ -155,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
155 int pnode; 175 int pnode;
156 176
157 pnode = uv_apicid_to_pnode(phys_apicid); 177 pnode = uv_apicid_to_pnode(phys_apicid);
178 phys_apicid |= uv_apicid_hibits;
158 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 179 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
159 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 180 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
160 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 181 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
@@ -236,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
236 int cpu = cpumask_first(cpumask); 257 int cpu = cpumask_first(cpumask);
237 258
238 if ((unsigned)cpu < nr_cpu_ids) 259 if ((unsigned)cpu < nr_cpu_ids)
239 return per_cpu(x86_cpu_to_apicid, cpu); 260 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
240 else 261 else
241 return BAD_APICID; 262 return BAD_APICID;
242} 263}
@@ -255,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
255 if (cpumask_test_cpu(cpu, cpu_online_mask)) 276 if (cpumask_test_cpu(cpu, cpu_online_mask))
256 break; 277 break;
257 } 278 }
258 return per_cpu(x86_cpu_to_apicid, cpu); 279 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
259} 280}
260 281
261static unsigned int x2apic_get_apic_id(unsigned long x) 282static unsigned int x2apic_get_apic_id(unsigned long x)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed6310183efb..6d75b9145b13 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -381,6 +381,20 @@ static void release_pmc_hardware(void) {}
381 381
382#endif 382#endif
383 383
384static bool check_hw_exists(void)
385{
386 u64 val, val_new = 0;
387 int ret = 0;
388
389 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new)
393 return false;
394
395 return true;
396}
397
384static void reserve_ds_buffers(void); 398static void reserve_ds_buffers(void);
385static void release_ds_buffers(void); 399static void release_ds_buffers(void);
386 400
@@ -1372,6 +1386,12 @@ void __init init_hw_perf_events(void)
1372 1386
1373 pmu_check_apic(); 1387 pmu_check_apic();
1374 1388
1389 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) {
1391 pr_cont("Broken PMU hardware detected, software events only.\n");
1392 return;
1393 }
1394
1375 pr_cont("%s PMU driver.\n", x86_pmu.name); 1395 pr_cont("%s PMU driver.\n", x86_pmu.name);
1376 1396
1377 if (x86_pmu.quirks) 1397 if (x86_pmu.quirks)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 59e175e89599..591e60104278 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,7 +395,7 @@ sysenter_past_esp:
395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
396 * pushed above; +8 corresponds to copy_thread's esp0 setting. 396 * pushed above; +8 corresponds to copy_thread's esp0 setting.
397 */ 397 */
398 pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp) 398 pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
399 CFI_REL_OFFSET eip, 0 399 CFI_REL_OFFSET eip, 0
400 400
401 pushl_cfi %eax 401 pushl_cfi %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fe2690d71c0c..e3ba417e8697 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
295 .endm 295 .endm
296 296
297/* save partial stack frame */ 297/* save partial stack frame */
298 .pushsection .kprobes.text, "ax"
298ENTRY(save_args) 299ENTRY(save_args)
299 XCPT_FRAME 300 XCPT_FRAME
300 cld 301 cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
334 ret 335 ret
335 CFI_ENDPROC 336 CFI_ENDPROC
336END(save_args) 337END(save_args)
338 .popsection
337 339
338ENTRY(save_rest) 340ENTRY(save_rest)
339 PARTIAL_FRAME 1 REST_SKIP+8 341 PARTIAL_FRAME 1 REST_SKIP+8
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index bcece91dd311..c0dbd9ac24f0 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -60,16 +60,18 @@
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) 60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif 61#endif
62 62
63/* Number of possible pages in the lowmem region */
64LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT)
65
63/* Enough space to fit pagetables for the low memory linear map */ 66/* Enough space to fit pagetables for the low memory linear map */
64MAPPING_BEYOND_END = \ 67MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
65 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
66 68
67/* 69/*
68 * Worst-case size of the kernel mapping we need to make: 70 * Worst-case size of the kernel mapping we need to make:
69 * the worst-case size of the kernel itself, plus the extra we need 71 * a relocatable kernel can live anywhere in lowmem, so we need to be able
70 * to map for the linear map. 72 * to map all of lowmem.
71 */ 73 */
72KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT 74KERNEL_PAGES = LOWMEM_PAGES
73 75
74INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm 76INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
75RESERVE_BRK(pagetables, INIT_MAP_SIZE) 77RESERVE_BRK(pagetables, INIT_MAP_SIZE)
@@ -620,13 +622,13 @@ ENTRY(initial_code)
620__PAGE_ALIGNED_BSS 622__PAGE_ALIGNED_BSS
621 .align PAGE_SIZE_asm 623 .align PAGE_SIZE_asm
622#ifdef CONFIG_X86_PAE 624#ifdef CONFIG_X86_PAE
623initial_pg_pmd: 625ENTRY(initial_pg_pmd)
624 .fill 1024*KPMDS,4,0 626 .fill 1024*KPMDS,4,0
625#else 627#else
626ENTRY(initial_page_table) 628ENTRY(initial_page_table)
627 .fill 1024,4,0 629 .fill 1024,4,0
628#endif 630#endif
629initial_pg_fixmap: 631ENTRY(initial_pg_fixmap)
630 .fill 1024,4,0 632 .fill 1024,4,0
631ENTRY(empty_zero_page) 633ENTRY(empty_zero_page)
632 .fill 4096,1,0 634 .fill 4096,1,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ae03cab4352e..4ff5968f12d2 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -27,6 +27,9 @@
27#define HPET_DEV_FSB_CAP 0x1000 27#define HPET_DEV_FSB_CAP 0x1000
28#define HPET_DEV_PERI_CAP 0x2000 28#define HPET_DEV_PERI_CAP 0x2000
29 29
30#define HPET_MIN_CYCLES 128
31#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
32
30#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) 33#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
31 34
32/* 35/*
@@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_register(void)
299 /* Calculate the min / max delta */ 302 /* Calculate the min / max delta */
300 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, 303 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
301 &hpet_clockevent); 304 &hpet_clockevent);
302 /* 5 usec minimum reprogramming delta. */ 305 /* Setup minimum reprogramming delta. */
303 hpet_clockevent.min_delta_ns = 5000; 306 hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
307 &hpet_clockevent);
304 308
305 /* 309 /*
306 * Start hpet with the boot cpu mask and make it 310 * Start hpet with the boot cpu mask and make it
@@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long delta,
393 * the wraparound into account) nor a simple count down event 397 * the wraparound into account) nor a simple count down event
394 * mode. Further the write to the comparator register is 398 * mode. Further the write to the comparator register is
395 * delayed internally up to two HPET clock cycles in certain 399 * delayed internally up to two HPET clock cycles in certain
396 * chipsets (ATI, ICH9,10). We worked around that by reading 400 * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even
397 * back the compare register, but that required another 401 * longer delays. We worked around that by reading back the
398 * workaround for ICH9,10 chips where the first readout after 402 * compare register, but that required another workaround for
399 * write can return the old stale value. We already have a 403 * ICH9,10 chips where the first readout after write can
400 * minimum delta of 5us enforced, but a NMI or SMI hitting 404 * return the old stale value. We already had a minimum
405 * programming delta of 5us enforced, but a NMI or SMI hitting
401 * between the counter readout and the comparator write can 406 * between the counter readout and the comparator write can
402 * move us behind that point easily. Now instead of reading 407 * move us behind that point easily. Now instead of reading
403 * the compare register back several times, we make the ETIME 408 * the compare register back several times, we make the ETIME
404 * decision based on the following: Return ETIME if the 409 * decision based on the following: Return ETIME if the
405 * counter value after the write is less than 8 HPET cycles 410 * counter value after the write is less than HPET_MIN_CYCLES
406 * away from the event or if the counter is already ahead of 411 * away from the event or if the counter is already ahead of
407 * the event. 412 * the event. The minimum programming delta for the generic
413 * clockevents code is set to 1.5 * HPET_MIN_CYCLES.
408 */ 414 */
409 res = (s32)(cnt - hpet_readl(HPET_COUNTER)); 415 res = (s32)(cnt - hpet_readl(HPET_COUNTER));
410 416
411 return res < 8 ? -ETIME : 0; 417 return res < HPET_MIN_CYCLES ? -ETIME : 0;
412} 418}
413 419
414static void hpet_legacy_set_mode(enum clock_event_mode mode, 420static void hpet_legacy_set_mode(enum clock_event_mode mode,
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index ff15c9dcc25d..42c594254507 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
433 dr6_p = (unsigned long *)ERR_PTR(args->err); 433 dr6_p = (unsigned long *)ERR_PTR(args->err);
434 dr6 = *dr6_p; 434 dr6 = *dr6_p;
435 435
436 /* If it's a single step, TRAP bits are random */
437 if (dr6 & DR_STEP)
438 return NOTIFY_DONE;
439
436 /* Do an early return if no trap bits are set in DR6 */ 440 /* Do an early return if no trap bits are set in DR6 */
437 if ((dr6 & DR_TRAP_BITS) == 0) 441 if ((dr6 & DR_TRAP_BITS) == 0)
438 return NOTIFY_DONE; 442 return NOTIFY_DONE;
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index dcb65cc0a053..1a1b606d3e92 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -364,8 +364,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
364 364
365 /* For performance reasons, reuse mc area when possible */ 365 /* For performance reasons, reuse mc area when possible */
366 if (!mc || mc_size > curr_mc_size) { 366 if (!mc || mc_size > curr_mc_size) {
367 if (mc) 367 vfree(mc);
368 vfree(mc);
369 mc = vmalloc(mc_size); 368 mc = vmalloc(mc_size);
370 if (!mc) 369 if (!mc)
371 break; 370 break;
@@ -374,13 +373,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
374 373
375 if (get_ucode_data(mc, ucode_ptr, mc_size) || 374 if (get_ucode_data(mc, ucode_ptr, mc_size) ||
376 microcode_sanity_check(mc) < 0) { 375 microcode_sanity_check(mc) < 0) {
377 vfree(mc);
378 break; 376 break;
379 } 377 }
380 378
381 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { 379 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
382 if (new_mc) 380 vfree(new_mc);
383 vfree(new_mc);
384 new_rev = mc_header.rev; 381 new_rev = mc_header.rev;
385 new_mc = mc; 382 new_mc = mc;
386 mc = NULL; /* trigger new vmalloc */ 383 mc = NULL; /* trigger new vmalloc */
@@ -390,12 +387,10 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
390 leftover -= mc_size; 387 leftover -= mc_size;
391 } 388 }
392 389
393 if (mc) 390 vfree(mc);
394 vfree(mc);
395 391
396 if (leftover) { 392 if (leftover) {
397 if (new_mc) 393 vfree(new_mc);
398 vfree(new_mc);
399 state = UCODE_ERROR; 394 state = UCODE_ERROR;
400 goto out; 395 goto out;
401 } 396 }
@@ -405,8 +400,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
405 goto out; 400 goto out;
406 } 401 }
407 402
408 if (uci->mc) 403 vfree(uci->mc);
409 vfree(uci->mc);
410 uci->mc = (struct microcode_intel *)new_mc; 404 uci->mc = (struct microcode_intel *)new_mc;
411 405
412 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", 406 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 6da143c2a6b8..ac861b8348e2 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -25,7 +25,6 @@ struct pci_hostbridge_probe {
25}; 25};
26 26
27static u64 __cpuinitdata fam10h_pci_mmconf_base; 27static u64 __cpuinitdata fam10h_pci_mmconf_base;
28static int __cpuinitdata fam10h_pci_mmconf_base_status;
29 28
30static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { 29static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
31 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, 30 { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
@@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2)
44 return start1 - start2; 43 return start1 - start2;
45} 44}
46 45
47/*[47:0] */ 46#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT)
48/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ 47#define MMCONF_MASK (~(MMCONF_UNIT - 1))
48#define MMCONF_SIZE (MMCONF_UNIT << 8)
49/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */
49#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) 50#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
50#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) 51#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40))
51static void __cpuinit get_fam10h_pci_mmconf_base(void) 52static void __cpuinit get_fam10h_pci_mmconf_base(void)
52{ 53{
53 int i; 54 int i;
@@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
64 struct range range[8]; 65 struct range range[8];
65 66
66 /* only try to get setting from BSP */ 67 /* only try to get setting from BSP */
67 /* -1 or 1 */ 68 if (fam10h_pci_mmconf_base)
68 if (fam10h_pci_mmconf_base_status)
69 return; 69 return;
70 70
71 if (!early_pci_allowed()) 71 if (!early_pci_allowed())
72 goto fail; 72 return;
73 73
74 found = 0; 74 found = 0;
75 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { 75 for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
@@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
91 } 91 }
92 92
93 if (!found) 93 if (!found)
94 goto fail; 94 return;
95 95
96 /* SYS_CFG */ 96 /* SYS_CFG */
97 address = MSR_K8_SYSCFG; 97 address = MSR_K8_SYSCFG;
@@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
99 99
100 /* TOP_MEM2 is not enabled? */ 100 /* TOP_MEM2 is not enabled? */
101 if (!(val & (1<<21))) { 101 if (!(val & (1<<21))) {
102 tom2 = 0; 102 tom2 = 1ULL << 32;
103 } else { 103 } else {
104 /* TOP_MEM2 */ 104 /* TOP_MEM2 */
105 address = MSR_K8_TOP_MEM2; 105 address = MSR_K8_TOP_MEM2;
106 rdmsrl(address, val); 106 rdmsrl(address, val);
107 tom2 = val & (0xffffULL<<32); 107 tom2 = max(val & 0xffffff800000ULL, 1ULL << 32);
108 } 108 }
109 109
110 if (base <= tom2) 110 if (base <= tom2)
111 base = tom2 + (1ULL<<32); 111 base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK;
112 112
113 /* 113 /*
114 * need to check if the range is in the high mmio range that is 114 * need to check if the range is in the high mmio range that is
@@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
123 if (!(reg & 3)) 123 if (!(reg & 3))
124 continue; 124 continue;
125 125
126 start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ 126 start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/
127 reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); 127 reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
128 end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ 128 end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/
129 129
130 if (!end) 130 if (end < tom2)
131 continue; 131 continue;
132 132
133 range[hi_mmio_num].start = start; 133 range[hi_mmio_num].start = start;
@@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
143 143
144 if (range[hi_mmio_num - 1].end < base) 144 if (range[hi_mmio_num - 1].end < base)
145 goto out; 145 goto out;
146 if (range[0].start > base) 146 if (range[0].start > base + MMCONF_SIZE)
147 goto out; 147 goto out;
148 148
149 /* need to find one window */ 149 /* need to find one window */
150 base = range[0].start - (1ULL << 32); 150 base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT;
151 if ((base > tom2) && BASE_VALID(base)) 151 if ((base > tom2) && BASE_VALID(base))
152 goto out; 152 goto out;
153 base = range[hi_mmio_num - 1].end + (1ULL << 32); 153 base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK;
154 if ((base > tom2) && BASE_VALID(base)) 154 if (BASE_VALID(base))
155 goto out; 155 goto out;
156 /* need to find window between ranges */ 156 /* need to find window between ranges */
157 if (hi_mmio_num > 1) 157 for (i = 1; i < hi_mmio_num; i++) {
158 for (i = 0; i < hi_mmio_num - 1; i++) { 158 base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK;
159 if (range[i + 1].start > (range[i].end + (1ULL << 32))) { 159 val = range[i].start & MMCONF_MASK;
160 base = range[i].end + (1ULL << 32); 160 if (val >= base + MMCONF_SIZE && BASE_VALID(base))
161 if ((base > tom2) && BASE_VALID(base)) 161 goto out;
162 goto out;
163 }
164 } 162 }
165
166fail:
167 fam10h_pci_mmconf_base_status = -1;
168 return; 163 return;
164
169out: 165out:
170 fam10h_pci_mmconf_base = base; 166 fam10h_pci_mmconf_base = base;
171 fam10h_pci_mmconf_base_status = 1;
172} 167}
173 168
174void __cpuinit fam10h_check_enable_mmcfg(void) 169void __cpuinit fam10h_check_enable_mmcfg(void)
@@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
190 185
191 /* only trust the one handle 256 buses, if acpi=off */ 186 /* only trust the one handle 256 buses, if acpi=off */
192 if (!acpi_pci_disabled || busnbits >= 8) { 187 if (!acpi_pci_disabled || busnbits >= 8) {
193 u64 base; 188 u64 base = val & MMCONF_MASK;
194 base = val & (0xffffULL << 32); 189
195 if (fam10h_pci_mmconf_base_status <= 0) { 190 if (!fam10h_pci_mmconf_base) {
196 fam10h_pci_mmconf_base = base; 191 fam10h_pci_mmconf_base = base;
197 fam10h_pci_mmconf_base_status = 1;
198 return; 192 return;
199 } else if (fam10h_pci_mmconf_base == base) 193 } else if (fam10h_pci_mmconf_base == base)
200 return; 194 return;
@@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
206 * with 256 buses 200 * with 256 buses
207 */ 201 */
208 get_fam10h_pci_mmconf_base(); 202 get_fam10h_pci_mmconf_base();
209 if (fam10h_pci_mmconf_base_status <= 0) 203 if (!fam10h_pci_mmconf_base) {
204 pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
210 return; 205 return;
206 }
211 207
212 printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); 208 printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
213 val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | 209 val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 008b91eefa18..42eb3300dfc6 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -83,6 +83,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
83 83
84static atomic64_t last_value = ATOMIC64_INIT(0); 84static atomic64_t last_value = ATOMIC64_INIT(0);
85 85
86void pvclock_resume(void)
87{
88 atomic64_set(&last_value, 0);
89}
90
86cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 91cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
87{ 92{
88 struct pvclock_shadow_time shadow; 93 struct pvclock_shadow_time shadow;
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
new file mode 100644
index 000000000000..2a26819bb6a8
--- /dev/null
+++ b/arch/x86/kernel/resource.c
@@ -0,0 +1,48 @@
1#include <linux/ioport.h>
2#include <asm/e820.h>
3
4static void resource_clip(struct resource *res, resource_size_t start,
5 resource_size_t end)
6{
7 resource_size_t low = 0, high = 0;
8
9 if (res->end < start || res->start > end)
10 return; /* no conflict */
11
12 if (res->start < start)
13 low = start - res->start;
14
15 if (res->end > end)
16 high = res->end - end;
17
18 /* Keep the area above or below the conflict, whichever is larger */
19 if (low > high)
20 res->end = start - 1;
21 else
22 res->start = end + 1;
23}
24
25static void remove_e820_regions(struct resource *avail)
26{
27 int i;
28 struct e820entry *entry;
29
30 for (i = 0; i < e820.nr_map; i++) {
31 entry = &e820.map[i];
32
33 resource_clip(avail, entry->addr,
34 entry->addr + entry->size - 1);
35 }
36}
37
38void arch_remove_reservations(struct resource *avail)
39{
40 /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */
41 if (avail->flags & IORESOURCE_MEM) {
42 if (avail->start < BIOS_END)
43 avail->start = BIOS_END;
44 resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
45
46 remove_e820_regions(avail);
47 }
48}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 21c6746338af..a0f52af256a0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -501,7 +501,18 @@ static inline unsigned long long get_total_mem(void)
501 return total << PAGE_SHIFT; 501 return total << PAGE_SHIFT;
502} 502}
503 503
504#define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF 504/*
505 * Keep the crash kernel below this limit. On 32 bits earlier kernels
506 * would limit the kernel to the low 512 MiB due to mapping restrictions.
507 * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
508 * limit once kexec-tools are fixed.
509 */
510#ifdef CONFIG_X86_32
511# define CRASH_KERNEL_ADDR_MAX (512 << 20)
512#else
513# define CRASH_KERNEL_ADDR_MAX (896 << 20)
514#endif
515
505static void __init reserve_crashkernel(void) 516static void __init reserve_crashkernel(void)
506{ 517{
507 unsigned long long total_mem; 518 unsigned long long total_mem;
@@ -520,10 +531,10 @@ static void __init reserve_crashkernel(void)
520 const unsigned long long alignment = 16<<20; /* 16M */ 531 const unsigned long long alignment = 16<<20; /* 16M */
521 532
522 /* 533 /*
523 * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX 534 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
524 */ 535 */
525 crash_base = memblock_find_in_range(alignment, 536 crash_base = memblock_find_in_range(alignment,
526 DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment); 537 CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
527 538
528 if (crash_base == MEMBLOCK_ERROR) { 539 if (crash_base == MEMBLOCK_ERROR) {
529 pr_info("crashkernel reservation failed - No suitable area found.\n"); 540 pr_info("crashkernel reservation failed - No suitable area found.\n");
@@ -769,7 +780,6 @@ void __init setup_arch(char **cmdline_p)
769 780
770 x86_init.oem.arch_setup(); 781 x86_init.oem.arch_setup();
771 782
772 resource_alloc_from_bottom = 0;
773 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; 783 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
774 setup_memory_map(); 784 setup_memory_map();
775 parse_setup_data(); 785 parse_setup_data();
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 9c253bd65e24..547128546cc3 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -394,7 +394,8 @@ static void __init setup_xstate_init(void)
394 * Setup init_xstate_buf to represent the init state of 394 * Setup init_xstate_buf to represent the init state of
395 * all the features managed by the xsave 395 * all the features managed by the xsave
396 */ 396 */
397 init_xstate_buf = alloc_bootmem(xstate_size); 397 init_xstate_buf = alloc_bootmem_align(xstate_size,
398 __alignof__(struct xsave_struct));
398 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; 399 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
399 400
400 clts(); 401 clts();