aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-05-10 11:05:24 -0400
committerIngo Molnar <mingo@elte.hu>2011-05-10 11:05:45 -0400
commit932fed4e2e42c3d730c01bb63b1c4f812c533d5b (patch)
tree11b1afac3a40d253cdb905c42901edfaae5e196e /arch/x86
parent57d524154ffe99d27fb55e0e30ddbad9f4c35806 (diff)
parent693d92a1bbc9e42681c42ed190bd42b636ca876f (diff)
Merge commit 'v2.6.39-rc7' into perf/core
Merge reason: pull in the latest fixes. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/memory.c2
-rw-r--r--arch/x86/include/asm/io_apic.h2
-rw-r--r--arch/x86/kernel/apic/io_apic.c10
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c87
-rw-r--r--arch/x86/kernel/devicetree.c2
-rw-r--r--arch/x86/kernel/ptrace.c36
-rw-r--r--arch/x86/kernel/reboot_32.S12
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts6
-rw-r--r--arch/x86/xen/mmu.c125
11 files changed, 221 insertions, 65 deletions
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index cae3feb1035e..db75d07c3645 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -91,7 +91,7 @@ static int detect_memory_e801(void)
91 if (oreg.ax > 15*1024) { 91 if (oreg.ax > 15*1024) {
92 return -1; /* Bogus! */ 92 return -1; /* Bogus! */
93 } else if (oreg.ax == 15*1024) { 93 } else if (oreg.ax == 15*1024) {
94 boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax; 94 boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
95 } else { 95 } else {
96 /* 96 /*
97 * This ignores memory above 16MB if we have a memory 97 * This ignores memory above 16MB if we have a memory
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index c4bd267dfc50..a97a240f67f3 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -150,7 +150,7 @@ void setup_IO_APIC_irq_extra(u32 gsi);
150extern void ioapic_and_gsi_init(void); 150extern void ioapic_and_gsi_init(void);
151extern void ioapic_insert_resources(void); 151extern void ioapic_insert_resources(void);
152 152
153int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr); 153int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
154 154
155extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); 155extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
156extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); 156extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 68df09bba92e..45fd33d1fd3a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -128,8 +128,8 @@ static int __init parse_noapic(char *str)
128} 128}
129early_param("noapic", parse_noapic); 129early_param("noapic", parse_noapic);
130 130
131static int io_apic_setup_irq_pin_once(unsigned int irq, int node, 131static int io_apic_setup_irq_pin(unsigned int irq, int node,
132 struct io_apic_irq_attr *attr); 132 struct io_apic_irq_attr *attr);
133 133
134/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ 134/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
135void mp_save_irq(struct mpc_intsrc *m) 135void mp_save_irq(struct mpc_intsrc *m)
@@ -3570,7 +3570,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3570} 3570}
3571#endif /* CONFIG_HT_IRQ */ 3571#endif /* CONFIG_HT_IRQ */
3572 3572
3573int 3573static int
3574io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) 3574io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3575{ 3575{
3576 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); 3576 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
@@ -3585,8 +3585,8 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3585 return ret; 3585 return ret;
3586} 3586}
3587 3587
3588static int io_apic_setup_irq_pin_once(unsigned int irq, int node, 3588int io_apic_setup_irq_pin_once(unsigned int irq, int node,
3589 struct io_apic_irq_attr *attr) 3589 struct io_apic_irq_attr *attr)
3590{ 3590{
3591 unsigned int id = attr->ioapic, pin = attr->ioapic_pin; 3591 unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
3592 int ret; 3592 int ret;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3532d3bf8105..bb9eb29a52dd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
698 */ 698 */
699 699
700const int amd_erratum_400[] = 700const int amd_erratum_400[] =
701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), 701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf),
702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
703EXPORT_SYMBOL_GPL(amd_erratum_400); 703EXPORT_SYMBOL_GPL(amd_erratum_400);
704 704
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 61cbf4831d71..41178c826c48 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
184 }, 184 },
185 }, 185 },
186 [ C(LL ) ] = { 186 [ C(LL ) ] = {
187 /*
188 * TBD: Need Off-core Response Performance Monitoring support
189 */
190 [ C(OP_READ) ] = { 187 [ C(OP_READ) ] = {
191 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 188 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
192 [ C(RESULT_ACCESS) ] = 0x01b7, 189 [ C(RESULT_ACCESS) ] = 0x01b7,
193 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 190 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
194 [ C(RESULT_MISS) ] = 0x01bb, 191 [ C(RESULT_MISS) ] = 0x01b7,
195 }, 192 },
196 [ C(OP_WRITE) ] = { 193 [ C(OP_WRITE) ] = {
197 /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ 194 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
198 [ C(RESULT_ACCESS) ] = 0x01b7, 195 [ C(RESULT_ACCESS) ] = 0x01b7,
199 /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ 196 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
200 [ C(RESULT_MISS) ] = 0x01bb, 197 [ C(RESULT_MISS) ] = 0x01b7,
201 }, 198 },
202 [ C(OP_PREFETCH) ] = { 199 [ C(OP_PREFETCH) ] = {
203 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 200 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
204 [ C(RESULT_ACCESS) ] = 0x01b7, 201 [ C(RESULT_ACCESS) ] = 0x01b7,
205 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 202 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
206 [ C(RESULT_MISS) ] = 0x01bb, 203 [ C(RESULT_MISS) ] = 0x01b7,
207 }, 204 },
208 }, 205 },
209 [ C(DTLB) ] = { 206 [ C(DTLB) ] = {
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
285 }, 282 },
286 [ C(LL ) ] = { 283 [ C(LL ) ] = {
287 [ C(OP_READ) ] = { 284 [ C(OP_READ) ] = {
288 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 285 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
289 [ C(RESULT_ACCESS) ] = 0x01b7, 286 [ C(RESULT_ACCESS) ] = 0x01b7,
290 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 287 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
291 [ C(RESULT_MISS) ] = 0x01bb, 288 [ C(RESULT_MISS) ] = 0x01b7,
292 }, 289 },
293 /* 290 /*
294 * Use RFO, not WRITEBACK, because a write miss would typically occur 291 * Use RFO, not WRITEBACK, because a write miss would typically occur
295 * on RFO. 292 * on RFO.
296 */ 293 */
297 [ C(OP_WRITE) ] = { 294 [ C(OP_WRITE) ] = {
298 /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ 295 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
299 [ C(RESULT_ACCESS) ] = 0x01bb, 296 [ C(RESULT_ACCESS) ] = 0x01b7,
300 /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ 297 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
301 [ C(RESULT_MISS) ] = 0x01b7, 298 [ C(RESULT_MISS) ] = 0x01b7,
302 }, 299 },
303 [ C(OP_PREFETCH) ] = { 300 [ C(OP_PREFETCH) ] = {
304 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 301 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
305 [ C(RESULT_ACCESS) ] = 0x01b7, 302 [ C(RESULT_ACCESS) ] = 0x01b7,
306 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 303 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
307 [ C(RESULT_MISS) ] = 0x01bb, 304 [ C(RESULT_MISS) ] = 0x01b7,
308 }, 305 },
309 }, 306 },
310 [ C(DTLB) ] = { 307 [ C(DTLB) ] = {
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
352}; 349};
353 350
354/* 351/*
355 * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 352 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
353 * See IA32 SDM Vol 3B 30.6.1.3
356 */ 354 */
357 355
358#define DMND_DATA_RD (1 << 0) 356#define NHM_DMND_DATA_RD (1 << 0)
359#define DMND_RFO (1 << 1) 357#define NHM_DMND_RFO (1 << 1)
360#define DMND_WB (1 << 3) 358#define NHM_DMND_IFETCH (1 << 2)
361#define PF_DATA_RD (1 << 4) 359#define NHM_DMND_WB (1 << 3)
362#define PF_DATA_RFO (1 << 5) 360#define NHM_PF_DATA_RD (1 << 4)
363#define RESP_UNCORE_HIT (1 << 8) 361#define NHM_PF_DATA_RFO (1 << 5)
364#define RESP_MISS (0xf600) /* non uncore hit */ 362#define NHM_PF_IFETCH (1 << 6)
363#define NHM_OFFCORE_OTHER (1 << 7)
364#define NHM_UNCORE_HIT (1 << 8)
365#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
366#define NHM_OTHER_CORE_HITM (1 << 10)
367 /* reserved */
368#define NHM_REMOTE_CACHE_FWD (1 << 12)
369#define NHM_REMOTE_DRAM (1 << 13)
370#define NHM_LOCAL_DRAM (1 << 14)
371#define NHM_NON_DRAM (1 << 15)
372
373#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
374
375#define NHM_DMND_READ (NHM_DMND_DATA_RD)
376#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
377#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
378
379#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
380#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
381#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
365 382
366static __initconst const u64 nehalem_hw_cache_extra_regs 383static __initconst const u64 nehalem_hw_cache_extra_regs
367 [PERF_COUNT_HW_CACHE_MAX] 384 [PERF_COUNT_HW_CACHE_MAX]
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
370{ 387{
371 [ C(LL ) ] = { 388 [ C(LL ) ] = {
372 [ C(OP_READ) ] = { 389 [ C(OP_READ) ] = {
373 [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, 390 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
374 [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, 391 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
375 }, 392 },
376 [ C(OP_WRITE) ] = { 393 [ C(OP_WRITE) ] = {
377 [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, 394 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
378 [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, 395 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
379 }, 396 },
380 [ C(OP_PREFETCH) ] = { 397 [ C(OP_PREFETCH) ] = {
381 [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, 398 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
382 [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, 399 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
383 }, 400 },
384 } 401 }
385}; 402};
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 706a9fb46a58..e90f08458e6b 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -391,7 +391,7 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
391 391
392 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); 392 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
393 393
394 return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr); 394 return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr);
395} 395}
396 396
397static void __init ioapic_add_ofnode(struct device_node *np) 397static void __init ioapic_add_ofnode(struct device_node *np)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45892dc4b72a..f65e5b521dbd 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
608 unsigned len, type; 608 unsigned len, type;
609 struct perf_event *bp; 609 struct perf_event *bp;
610 610
611 if (ptrace_get_breakpoints(tsk) < 0)
612 return -ESRCH;
613
611 data &= ~DR_CONTROL_RESERVED; 614 data &= ~DR_CONTROL_RESERVED;
612 old_dr7 = ptrace_get_dr7(thread->ptrace_bps); 615 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
613restore: 616restore:
@@ -655,6 +658,9 @@ restore:
655 } 658 }
656 goto restore; 659 goto restore;
657 } 660 }
661
662 ptrace_put_breakpoints(tsk);
663
658 return ((orig_ret < 0) ? orig_ret : rc); 664 return ((orig_ret < 0) ? orig_ret : rc);
659} 665}
660 666
@@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
668 674
669 if (n < HBP_NUM) { 675 if (n < HBP_NUM) {
670 struct perf_event *bp; 676 struct perf_event *bp;
677
678 if (ptrace_get_breakpoints(tsk) < 0)
679 return -ESRCH;
680
671 bp = thread->ptrace_bps[n]; 681 bp = thread->ptrace_bps[n];
672 if (!bp) 682 if (!bp)
673 return 0; 683 val = 0;
674 val = bp->hw.info.address; 684 else
685 val = bp->hw.info.address;
686
687 ptrace_put_breakpoints(tsk);
675 } else if (n == 6) { 688 } else if (n == 6) {
676 val = thread->debugreg6; 689 val = thread->debugreg6;
677 } else if (n == 7) { 690 } else if (n == 7) {
@@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
686 struct perf_event *bp; 699 struct perf_event *bp;
687 struct thread_struct *t = &tsk->thread; 700 struct thread_struct *t = &tsk->thread;
688 struct perf_event_attr attr; 701 struct perf_event_attr attr;
702 int err = 0;
703
704 if (ptrace_get_breakpoints(tsk) < 0)
705 return -ESRCH;
689 706
690 if (!t->ptrace_bps[nr]) { 707 if (!t->ptrace_bps[nr]) {
691 ptrace_breakpoint_init(&attr); 708 ptrace_breakpoint_init(&attr);
@@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
709 * writing for the user. And anyway this is the previous 726 * writing for the user. And anyway this is the previous
710 * behaviour. 727 * behaviour.
711 */ 728 */
712 if (IS_ERR(bp)) 729 if (IS_ERR(bp)) {
713 return PTR_ERR(bp); 730 err = PTR_ERR(bp);
731 goto put;
732 }
714 733
715 t->ptrace_bps[nr] = bp; 734 t->ptrace_bps[nr] = bp;
716 } else { 735 } else {
717 int err;
718
719 bp = t->ptrace_bps[nr]; 736 bp = t->ptrace_bps[nr];
720 737
721 attr = bp->attr; 738 attr = bp->attr;
722 attr.bp_addr = addr; 739 attr.bp_addr = addr;
723 err = modify_user_hw_breakpoint(bp, &attr); 740 err = modify_user_hw_breakpoint(bp, &attr);
724 if (err)
725 return err;
726 } 741 }
727 742
728 743put:
729 return 0; 744 ptrace_put_breakpoints(tsk);
745 return err;
730} 746}
731 747
732/* 748/*
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S
index 29092b38d816..1d5c46df0d78 100644
--- a/arch/x86/kernel/reboot_32.S
+++ b/arch/x86/kernel/reboot_32.S
@@ -21,26 +21,26 @@ r_base = .
21 /* Get our own relocated address */ 21 /* Get our own relocated address */
22 call 1f 22 call 1f
231: popl %ebx 231: popl %ebx
24 subl $1b, %ebx 24 subl $(1b - r_base), %ebx
25 25
26 /* Compute the equivalent real-mode segment */ 26 /* Compute the equivalent real-mode segment */
27 movl %ebx, %ecx 27 movl %ebx, %ecx
28 shrl $4, %ecx 28 shrl $4, %ecx
29 29
30 /* Patch post-real-mode segment jump */ 30 /* Patch post-real-mode segment jump */
31 movw dispatch_table(%ebx,%eax,2),%ax 31 movw (dispatch_table - r_base)(%ebx,%eax,2),%ax
32 movw %ax, 101f(%ebx) 32 movw %ax, (101f - r_base)(%ebx)
33 movw %cx, 102f(%ebx) 33 movw %cx, (102f - r_base)(%ebx)
34 34
35 /* Set up the IDT for real mode. */ 35 /* Set up the IDT for real mode. */
36 lidtl machine_real_restart_idt(%ebx) 36 lidtl (machine_real_restart_idt - r_base)(%ebx)
37 37
38 /* 38 /*
39 * Set up a GDT from which we can load segment descriptors for real 39 * Set up a GDT from which we can load segment descriptors for real
40 * mode. The GDT is not used in real mode; it is just needed here to 40 * mode. The GDT is not used in real mode; it is just needed here to
41 * prepare the descriptors. 41 * prepare the descriptors.
42 */ 42 */
43 lgdtl machine_real_restart_gdt(%ebx) 43 lgdtl (machine_real_restart_gdt - r_base)(%ebx)
44 44
45 /* 45 /*
46 * Load the data segment registers with 16-bit compatible values 46 * Load the data segment registers with 16-bit compatible values
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e8c00cc72033..85b52fc03084 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -306,7 +306,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
306 bi->end = min(bi->end, high); 306 bi->end = min(bi->end, high);
307 307
308 /* and there's no empty block */ 308 /* and there's no empty block */
309 if (bi->start == bi->end) { 309 if (bi->start >= bi->end) {
310 numa_remove_memblk_from(i--, mi); 310 numa_remove_memblk_from(i--, mi);
311 continue; 311 continue;
312 } 312 }
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index 2d6d226f2b10..e70be38ce039 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -347,7 +347,7 @@
347 "pciclass0c03"; 347 "pciclass0c03";
348 348
349 reg = <0x16800 0x0 0x0 0x0 0x0>; 349 reg = <0x16800 0x0 0x0 0x0 0x0>;
350 interrupts = <22 3>; 350 interrupts = <22 1>;
351 }; 351 };
352 352
353 usb@d,1 { 353 usb@d,1 {
@@ -357,7 +357,7 @@
357 "pciclass0c03"; 357 "pciclass0c03";
358 358
359 reg = <0x16900 0x0 0x0 0x0 0x0>; 359 reg = <0x16900 0x0 0x0 0x0 0x0>;
360 interrupts = <22 3>; 360 interrupts = <22 1>;
361 }; 361 };
362 362
363 sata@e,0 { 363 sata@e,0 {
@@ -367,7 +367,7 @@
367 "pciclass0106"; 367 "pciclass0106";
368 368
369 reg = <0x17000 0x0 0x0 0x0 0x0>; 369 reg = <0x17000 0x0 0x0 0x0 0x0>;
370 interrupts = <23 3>; 370 interrupts = <23 1>;
371 }; 371 };
372 372
373 flash@f,0 { 373 flash@f,0 {
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aef7af92b28b..55c965b38c27 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1463,6 +1463,119 @@ static int xen_pgd_alloc(struct mm_struct *mm)
1463 return ret; 1463 return ret;
1464} 1464}
1465 1465
1466#ifdef CONFIG_X86_64
1467static __initdata u64 __last_pgt_set_rw = 0;
1468static __initdata u64 __pgt_buf_start = 0;
1469static __initdata u64 __pgt_buf_end = 0;
1470static __initdata u64 __pgt_buf_top = 0;
1471/*
1472 * As a consequence of the commit:
1473 *
1474 * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
1475 * Author: Yinghai Lu <yinghai@kernel.org>
1476 * Date: Fri Dec 17 16:58:28 2010 -0800
1477 *
1478 * x86-64, mm: Put early page table high
1479 *
1480 * at some point init_memory_mapping is going to reach the pagetable pages
1481 * area and map those pages too (mapping them as normal memory that falls
1482 * in the range of addresses passed to init_memory_mapping as argument).
1483 * Some of those pages are already pagetable pages (they are in the range
1484 * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
1485 * everything is fine.
1486 * Some of these pages are not pagetable pages yet (they fall in the range
1487 * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
1488 * are going to be mapped RW. When these pages become pagetable pages and
1489 * are hooked into the pagetable, xen will find that the guest has already
1490 * a RW mapping of them somewhere and fail the operation.
1491 * The reason Xen requires pagetables to be RO is that the hypervisor needs
1492 * to verify that the pagetables are valid before using them. The validation
1493 * operations are called "pinning".
1494 *
1495 * In order to fix the issue we mark all the pages in the entire range
1496 * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
1497 * is completed only the range pgt_buf_start-pgt_buf_end is reserved by
1498 * init_memory_mapping. Hence the kernel is going to crash as soon as one
1499 * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
1500 * ranges are RO).
1501 *
1502 * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_
1503 * the init_memory_mapping has completed (in a perfect world we would
1504 * call this function from init_memory_mapping, but lets ignore that).
1505 *
1506 * Because we are called _after_ init_memory_mapping the pgt_buf_[start,
1507 * end,top] have all changed to new values (b/c init_memory_mapping
1508 * is called and setting up another new page-table). Hence, the first time
1509 * we enter this function, we save away the pgt_buf_start value and update
1510 * the pgt_buf_[end,top].
1511 *
1512 * When we detect that the "old" pgt_buf_start through pgt_buf_end
1513 * PFNs have been reserved (so memblock_x86_reserve_range has been called),
1514 * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.
1515 *
1516 * And then we update those "old" pgt_buf_[end|top] with the new ones
1517 * so that we can redo this on the next pagetable.
1518 */
1519static __init void mark_rw_past_pgt(void) {
1520
1521 if (pgt_buf_end > pgt_buf_start) {
1522 u64 addr, size;
1523
1524 /* Save it away. */
1525 if (!__pgt_buf_start) {
1526 __pgt_buf_start = pgt_buf_start;
1527 __pgt_buf_end = pgt_buf_end;
1528 __pgt_buf_top = pgt_buf_top;
1529 return;
1530 }
1531 /* If we get the range that starts at __pgt_buf_end that means
1532 * the range is reserved, and that in 'init_memory_mapping'
1533 * the 'memblock_x86_reserve_range' has been called with the
1534 * outdated __pgt_buf_start, __pgt_buf_end (the "new"
1535 * pgt_buf_[start|end|top] refer now to a new pagetable.
1536 * Note: we are called _after_ the pgt_buf_[..] have been
1537 * updated.*/
1538
1539 addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start),
1540 &size, PAGE_SIZE);
1541
1542 /* Still not reserved, meaning 'memblock_x86_reserve_range'
1543 * hasn't been called yet. Update the _end and _top.*/
1544 if (addr == PFN_PHYS(__pgt_buf_start)) {
1545 __pgt_buf_end = pgt_buf_end;
1546 __pgt_buf_top = pgt_buf_top;
1547 return;
1548 }
1549
1550 /* OK, the area is reserved, meaning it is time for us to
1551 * set RW for the old end->top PFNs. */
1552
1553 /* ..unless we had already done this. */
1554 if (__pgt_buf_end == __last_pgt_set_rw)
1555 return;
1556
1557 addr = PFN_PHYS(__pgt_buf_end);
1558
1559 /* set as RW the rest */
1560 printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n",
1561 PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top));
1562
1563 while (addr < PFN_PHYS(__pgt_buf_top)) {
1564 make_lowmem_page_readwrite(__va(addr));
1565 addr += PAGE_SIZE;
1566 }
1567 /* And update everything so that we are ready for the next
1568 * pagetable (the one created for regions past 4GB) */
1569 __last_pgt_set_rw = __pgt_buf_end;
1570 __pgt_buf_start = pgt_buf_start;
1571 __pgt_buf_end = pgt_buf_end;
1572 __pgt_buf_top = pgt_buf_top;
1573 }
1574 return;
1575}
1576#else
1577static __init void mark_rw_past_pgt(void) { }
1578#endif
1466static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) 1579static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1467{ 1580{
1468#ifdef CONFIG_X86_64 1581#ifdef CONFIG_X86_64
@@ -1489,13 +1602,21 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1489 unsigned long pfn = pte_pfn(pte); 1602 unsigned long pfn = pte_pfn(pte);
1490 1603
1491 /* 1604 /*
1605 * A bit of optimization. We do not need to call the workaround
1606 * when xen_set_pte_init is called with a PTE with 0 as PFN.
1607 * That is b/c the pagetable at that point are just being populated
1608 * with empty values and we can save some cycles by not calling
1609 * the 'memblock' code.*/
1610 if (pfn)
1611 mark_rw_past_pgt();
1612 /*
1492 * If the new pfn is within the range of the newly allocated 1613 * If the new pfn is within the range of the newly allocated
1493 * kernel pagetable, and it isn't being mapped into an 1614 * kernel pagetable, and it isn't being mapped into an
1494 * early_ioremap fixmap slot as a freshly allocated page, make sure 1615 * early_ioremap fixmap slot as a freshly allocated page, make sure
1495 * it is RO. 1616 * it is RO.
1496 */ 1617 */
1497 if (((!is_early_ioremap_ptep(ptep) && 1618 if (((!is_early_ioremap_ptep(ptep) &&
1498 pfn >= pgt_buf_start && pfn < pgt_buf_end)) || 1619 pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
1499 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) 1620 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
1500 pte = pte_wrprotect(pte); 1621 pte = pte_wrprotect(pte);
1501 1622
@@ -1997,6 +2118,8 @@ __init void xen_ident_map_ISA(void)
1997 2118
1998static __init void xen_post_allocator_init(void) 2119static __init void xen_post_allocator_init(void)
1999{ 2120{
2121 mark_rw_past_pgt();
2122
2000#ifdef CONFIG_XEN_DEBUG 2123#ifdef CONFIG_XEN_DEBUG
2001 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); 2124 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
2002#endif 2125#endif