aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c7
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/perf_event_p4.h1
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h2
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/xen/page.h47
-rw-r--r--arch/x86/kernel/acpi/boot.c14
-rw-r--r--arch/x86/kernel/apb_timer.c2
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c11
-rw-r--r--arch/x86/kernel/early-quirks.c16
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/numa_64.c6
-rw-r--r--arch/x86/mm/pageattr.c18
-rw-r--r--arch/x86/mm/pgtable.c11
-rw-r--r--arch/x86/pci/ce4100.c7
-rw-r--r--arch/x86/platform/ce4100/ce4100.c2
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c3
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/xen/Kconfig8
-rw-r--r--arch/x86/xen/mmu.c82
-rw-r--r--arch/x86/xen/p2m.c324
-rw-r--r--arch/x86/xen/setup.c59
31 files changed, 603 insertions, 94 deletions
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 646aa78ba5fd..46a823882437 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -62,7 +62,12 @@ int main(int argc, char *argv[])
62 if (fseek(f, -4L, SEEK_END)) { 62 if (fseek(f, -4L, SEEK_END)) {
63 perror(argv[1]); 63 perror(argv[1]);
64 } 64 }
65 fread(&olen, sizeof olen, 1, f); 65
66 if (fread(&olen, sizeof(olen), 1, f) != 1) {
67 perror(argv[1]);
68 return 1;
69 }
70
66 ilen = ftell(f); 71 ilen = ftell(f);
67 olen = getle32(&olen); 72 olen = getle32(&olen);
68 fclose(f); 73 fclose(f);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 211ca3f7fd16..4ea15ca89b2b 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -88,6 +88,7 @@ extern int acpi_disabled;
88extern int acpi_pci_disabled; 88extern int acpi_pci_disabled;
89extern int acpi_skip_timer_override; 89extern int acpi_skip_timer_override;
90extern int acpi_use_timer_override; 90extern int acpi_use_timer_override;
91extern int acpi_fix_pin2_polarity;
91 92
92extern u8 acpi_sci_flags; 93extern u8 acpi_sci_flags;
93extern int acpi_sci_override_gsi; 94extern int acpi_sci_override_gsi;
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
new file mode 100644
index 000000000000..e656ad8c0a2e
--- /dev/null
+++ b/arch/x86/include/asm/ce4100.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_CE4100_H_
2#define _ASM_CE4100_H_
3
4int ce4100_pci_init(void);
5
6#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4d0dfa0d998e..43a18c77676d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -36,6 +36,11 @@
36#define MSR_IA32_PERFCTR1 0x000000c2 36#define MSR_IA32_PERFCTR1 0x000000c2
37#define MSR_FSB_FREQ 0x000000cd 37#define MSR_FSB_FREQ 0x000000cd
38 38
39#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
40#define NHM_C3_AUTO_DEMOTE (1UL << 25)
41#define NHM_C1_AUTO_DEMOTE (1UL << 26)
42#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
43
39#define MSR_MTRRcap 0x000000fe 44#define MSR_MTRRcap 0x000000fe
40#define MSR_IA32_BBL_CR_CTL 0x00000119 45#define MSR_IA32_BBL_CR_CTL 0x00000119
41 46
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index e2f6a99f14ab..cc29086e30cd 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -22,6 +22,7 @@
22 22
23#define ARCH_P4_CNTRVAL_BITS (40) 23#define ARCH_P4_CNTRVAL_BITS (40)
24#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) 24#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
25#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
25 26
26#define P4_ESCR_EVENT_MASK 0x7e000000U 27#define P4_ESCR_EVENT_MASK 0x7e000000U
27#define P4_ESCR_EVENT_SHIFT 25 28#define P4_ESCR_EVENT_SHIFT 25
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 6c22bf353f26..725b77831993 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -34,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
34 */ 34 */
35 CMOS_WRITE(0, 0xf); 35 CMOS_WRITE(0, 0xf);
36 36
37 *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0; 37 *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0;
38} 38}
39 39
40static inline void __init smpboot_setup_io_apic(void) 40static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index ce1d54c8a433..3e094af443c3 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -176,7 +176,7 @@ struct bau_msg_payload {
176struct bau_msg_header { 176struct bau_msg_header {
177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
178 /* bits 5:0 */ 178 /* bits 5:0 */
179 unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ 179 unsigned int base_dest_nodeid:15; /* nasid of the */
180 /* bits 20:6 */ /* first bit in uvhub map */ 180 /* bits 20:6 */ /* first bit in uvhub map */
181 unsigned int command:8; /* message type */ 181 unsigned int command:8; /* message type */
182 /* bits 28:21 */ 182 /* bits 28:21 */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f25bdf238a33..c61934fbf22a 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -29,8 +29,10 @@ typedef struct xpaddr {
29 29
30/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ 30/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
31#define INVALID_P2M_ENTRY (~0UL) 31#define INVALID_P2M_ENTRY (~0UL)
32#define FOREIGN_FRAME_BIT (1UL<<31) 32#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
33#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
33#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) 34#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
35#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
34 36
35/* Maximum amount of memory we can handle in a domain in pages */ 37/* Maximum amount of memory we can handle in a domain in pages */
36#define MAX_DOMAIN_PAGES \ 38#define MAX_DOMAIN_PAGES \
@@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order;
41 43
42extern unsigned long get_phys_to_machine(unsigned long pfn); 44extern unsigned long get_phys_to_machine(unsigned long pfn);
43extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); 45extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
46extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
47extern unsigned long set_phys_range_identity(unsigned long pfn_s,
48 unsigned long pfn_e);
44 49
45extern int m2p_add_override(unsigned long mfn, struct page *page); 50extern int m2p_add_override(unsigned long mfn, struct page *page);
46extern int m2p_remove_override(struct page *page); 51extern int m2p_remove_override(struct page *page);
47extern struct page *m2p_find_override(unsigned long mfn); 52extern struct page *m2p_find_override(unsigned long mfn);
48extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); 53extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
49 54
55#ifdef CONFIG_XEN_DEBUG_FS
56extern int p2m_dump_show(struct seq_file *m, void *v);
57#endif
50static inline unsigned long pfn_to_mfn(unsigned long pfn) 58static inline unsigned long pfn_to_mfn(unsigned long pfn)
51{ 59{
52 unsigned long mfn; 60 unsigned long mfn;
@@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
57 mfn = get_phys_to_machine(pfn); 65 mfn = get_phys_to_machine(pfn);
58 66
59 if (mfn != INVALID_P2M_ENTRY) 67 if (mfn != INVALID_P2M_ENTRY)
60 mfn &= ~FOREIGN_FRAME_BIT; 68 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
61 69
62 return mfn; 70 return mfn;
63} 71}
@@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
73static inline unsigned long mfn_to_pfn(unsigned long mfn) 81static inline unsigned long mfn_to_pfn(unsigned long mfn)
74{ 82{
75 unsigned long pfn; 83 unsigned long pfn;
84 int ret = 0;
76 85
77 if (xen_feature(XENFEAT_auto_translated_physmap)) 86 if (xen_feature(XENFEAT_auto_translated_physmap))
78 return mfn; 87 return mfn;
79 88
89 if (unlikely((mfn >> machine_to_phys_order) != 0)) {
90 pfn = ~0;
91 goto try_override;
92 }
80 pfn = 0; 93 pfn = 0;
81 /* 94 /*
82 * The array access can fail (e.g., device space beyond end of RAM). 95 * The array access can fail (e.g., device space beyond end of RAM).
83 * In such cases it doesn't matter what we return (we return garbage), 96 * In such cases it doesn't matter what we return (we return garbage),
84 * but we must handle the fault without crashing! 97 * but we must handle the fault without crashing!
85 */ 98 */
86 __get_user(pfn, &machine_to_phys_mapping[mfn]); 99 ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
87 100try_override:
88 /* 101 /* ret might be < 0 if there are no entries in the m2p for mfn */
89 * If this appears to be a foreign mfn (because the pfn 102 if (ret < 0)
90 * doesn't map back to the mfn), then check the local override 103 pfn = ~0;
91 * table to see if there's a better pfn to use. 104 else if (get_phys_to_machine(pfn) != mfn)
105 /*
106 * If this appears to be a foreign mfn (because the pfn
107 * doesn't map back to the mfn), then check the local override
108 * table to see if there's a better pfn to use.
109 *
110 * m2p_find_override_pfn returns ~0 if it doesn't find anything.
111 */
112 pfn = m2p_find_override_pfn(mfn, ~0);
113
114 /*
115 * pfn is ~0 if there are no entries in the m2p for mfn or if the
116 * entry doesn't map back to the mfn and m2p_override doesn't have a
117 * valid entry for it.
92 */ 118 */
93 if (get_phys_to_machine(pfn) != mfn) 119 if (pfn == ~0 &&
94 pfn = m2p_find_override_pfn(mfn, pfn); 120 get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
121 pfn = mfn;
95 122
96 return pfn; 123 return pfn;
97} 124}
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b3a71137983a..3e6e2d68f761 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -72,6 +72,7 @@ u8 acpi_sci_flags __initdata;
72int acpi_sci_override_gsi __initdata; 72int acpi_sci_override_gsi __initdata;
73int acpi_skip_timer_override __initdata; 73int acpi_skip_timer_override __initdata;
74int acpi_use_timer_override __initdata; 74int acpi_use_timer_override __initdata;
75int acpi_fix_pin2_polarity __initdata;
75 76
76#ifdef CONFIG_X86_LOCAL_APIC 77#ifdef CONFIG_X86_LOCAL_APIC
77static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 78static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -415,10 +416,15 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
415 return 0; 416 return 0;
416 } 417 }
417 418
418 if (acpi_skip_timer_override && 419 if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
419 intsrc->source_irq == 0 && intsrc->global_irq == 2) { 420 if (acpi_skip_timer_override) {
420 printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); 421 printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
421 return 0; 422 return 0;
423 }
424 if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
425 intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
426 printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
427 }
422 } 428 }
423 429
424 mp_override_legacy_irq(intsrc->source_irq, 430 mp_override_legacy_irq(intsrc->source_irq,
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 51ef31a89be9..51d4e1663066 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -284,7 +284,7 @@ static int __init apbt_clockevent_register(void)
284 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); 284 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
285 285
286 if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { 286 if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
287 apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; 287 adev->evt.rating = APBT_CLOCKEVENT_RATING - 100;
288 global_clock_event = &adev->evt; 288 global_clock_event = &adev->evt;
289 printk(KERN_DEBUG "%s clockevent registered as global\n", 289 printk(KERN_DEBUG "%s clockevent registered as global\n",
290 global_clock_event->name); 290 global_clock_event->name);
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 13a389179514..452932d34730 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void)
106 addr += size; 106 addr += size;
107 } 107 }
108 108
109 printk(KERN_INFO "Scanning %d areas for low memory corruption\n", 109 if (num_scan_areas)
110 num_scan_areas); 110 printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas);
111} 111}
112 112
113 113
@@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy)
143{ 143{
144 check_for_bios_corruption(); 144 check_for_bios_corruption();
145 schedule_delayed_work(&bios_check_work, 145 schedule_delayed_work(&bios_check_work,
146 round_jiffies_relative(corruption_check_period*HZ)); 146 round_jiffies_relative(corruption_check_period*HZ));
147} 147}
148 148
149static int start_periodic_check_for_corruption(void) 149static int start_periodic_check_for_corruption(void)
150{ 150{
151 if (!memory_corruption_check || corruption_check_period == 0) 151 if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0)
152 return 0; 152 return 0;
153 153
154 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", 154 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index bd1cac747f67..52c93648e492 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -158,9 +158,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
158{ 158{
159 if (c->x86 == 0x06) { 159 if (c->x86 == 0x06) {
160 if (cpu_has(c, X86_FEATURE_EST)) 160 if (cpu_has(c, X86_FEATURE_EST))
161 printk(KERN_WARNING PFX "Warning: EST-capable CPU " 161 printk_once(KERN_WARNING PFX "Warning: EST-capable "
162 "detected. The acpi-cpufreq module offers " 162 "CPU detected. The acpi-cpufreq module offers "
163 "voltage scaling in addition of frequency " 163 "voltage scaling in addition to frequency "
164 "scaling. You should use that instead of " 164 "scaling. You should use that instead of "
165 "p4-clockmod, if possible.\n"); 165 "p4-clockmod, if possible.\n");
166 switch (c->x86_model) { 166 switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 4f6f679f2799..4a5a42b842ad 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
195cmd_incomplete: 195cmd_incomplete:
196 iowrite16(0, &pcch_hdr->status); 196 iowrite16(0, &pcch_hdr->status);
197 spin_unlock(&pcc_lock); 197 spin_unlock(&pcc_lock);
198 return -EINVAL; 198 return 0;
199} 199}
200 200
201static int pcc_cpufreq_target(struct cpufreq_policy *policy, 201static int pcc_cpufreq_target(struct cpufreq_policy *policy,
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 35c7e65e59be..c567dec854f6 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1537,6 +1537,7 @@ static struct notifier_block cpb_nb = {
1537static int __cpuinit powernowk8_init(void) 1537static int __cpuinit powernowk8_init(void)
1538{ 1538{
1539 unsigned int i, supported_cpus = 0, cpu; 1539 unsigned int i, supported_cpus = 0, cpu;
1540 int rv;
1540 1541
1541 for_each_online_cpu(i) { 1542 for_each_online_cpu(i) {
1542 int rc; 1543 int rc;
@@ -1555,14 +1556,14 @@ static int __cpuinit powernowk8_init(void)
1555 1556
1556 cpb_capable = true; 1557 cpb_capable = true;
1557 1558
1558 register_cpu_notifier(&cpb_nb);
1559
1560 msrs = msrs_alloc(); 1559 msrs = msrs_alloc();
1561 if (!msrs) { 1560 if (!msrs) {
1562 printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); 1561 printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
1563 return -ENOMEM; 1562 return -ENOMEM;
1564 } 1563 }
1565 1564
1565 register_cpu_notifier(&cpb_nb);
1566
1566 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); 1567 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1567 1568
1568 for_each_cpu(cpu, cpu_online_mask) { 1569 for_each_cpu(cpu, cpu_online_mask) {
@@ -1574,7 +1575,13 @@ static int __cpuinit powernowk8_init(void)
1574 (cpb_enabled ? "on" : "off")); 1575 (cpb_enabled ? "on" : "off"));
1575 } 1576 }
1576 1577
1577 return cpufreq_register_driver(&cpufreq_amd64_driver); 1578 rv = cpufreq_register_driver(&cpufreq_amd64_driver);
1579 if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
1580 unregister_cpu_notifier(&cpb_nb);
1581 msrs_free(msrs);
1582 msrs = NULL;
1583 }
1584 return rv;
1578} 1585}
1579 1586
1580/* driver entry point for term */ 1587/* driver entry point for term */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index f7a0993c1e7c..ff751a9f182b 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -770,9 +770,14 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
770 return 1; 770 return 1;
771 } 771 }
772 772
773 /* it might be unflagged overflow */ 773 /*
774 rdmsrl(hwc->event_base + hwc->idx, v); 774 * In some circumstances the overflow might issue an NMI but did
775 if (!(v & ARCH_P4_CNTRVAL_MASK)) 775 * not set P4_CCCR_OVF bit. Because a counter holds a negative value
776 * we simply check for high bit being set, if it's cleared it means
777 * the counter has reached zero value and continued counting before
778 * real NMI signal was received:
779 */
780 if (!(v & ARCH_P4_UNFLAGGED_BIT))
776 return 1; 781 return 1;
777 782
778 return 0; 783 return 0;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 76b8cd953dee..9efbdcc56425 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -143,15 +143,10 @@ static void __init ati_bugs(int num, int slot, int func)
143 143
144static u32 __init ati_sbx00_rev(int num, int slot, int func) 144static u32 __init ati_sbx00_rev(int num, int slot, int func)
145{ 145{
146 u32 old, d; 146 u32 d;
147 147
148 d = read_pci_config(num, slot, func, 0x70);
149 old = d;
150 d &= ~(1<<8);
151 write_pci_config(num, slot, func, 0x70, d);
152 d = read_pci_config(num, slot, func, 0x8); 148 d = read_pci_config(num, slot, func, 0x8);
153 d &= 0xff; 149 d &= 0xff;
154 write_pci_config(num, slot, func, 0x70, old);
155 150
156 return d; 151 return d;
157} 152}
@@ -160,13 +155,16 @@ static void __init ati_bugs_contd(int num, int slot, int func)
160{ 155{
161 u32 d, rev; 156 u32 d, rev;
162 157
163 if (acpi_use_timer_override)
164 return;
165
166 rev = ati_sbx00_rev(num, slot, func); 158 rev = ati_sbx00_rev(num, slot, func);
159 if (rev >= 0x40)
160 acpi_fix_pin2_polarity = 1;
161
167 if (rev > 0x13) 162 if (rev > 0x13)
168 return; 163 return;
169 164
165 if (acpi_use_timer_override)
166 return;
167
170 /* check for IRQ0 interrupt swap */ 168 /* check for IRQ0 interrupt swap */
171 d = read_pci_config(num, slot, func, 0x64); 169 d = read_pci_config(num, slot, func, 0x64);
172 if (!(d & (1<<14))) 170 if (!(d & (1<<14)))
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index fc7aae1e2bc7..715037caeb43 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -285,6 +285,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
285 DMI_MATCH(DMI_BOARD_NAME, "P4S800"), 285 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
286 }, 286 },
287 }, 287 },
288 { /* Handle problems with rebooting on VersaLogic Menlow boards */
289 .callback = set_bios_reboot,
290 .ident = "VersaLogic Menlow based board",
291 .matches = {
292 DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"),
293 DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
294 },
295 },
288 { } 296 { }
289}; 297};
290 298
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 54ce246a383e..63fec1531e89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2777,6 +2777,8 @@ static int dr_interception(struct vcpu_svm *svm)
2777 kvm_register_write(&svm->vcpu, reg, val); 2777 kvm_register_write(&svm->vcpu, reg, val);
2778 } 2778 }
2779 2779
2780 skip_emulated_instruction(&svm->vcpu);
2781
2780 return 1; 2782 return 1;
2781} 2783}
2782 2784
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d90ceb882a4..20e3f8702d1e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void)
229 for (address = VMALLOC_START & PMD_MASK; 229 for (address = VMALLOC_START & PMD_MASK;
230 address >= TASK_SIZE && address < FIXADDR_TOP; 230 address >= TASK_SIZE && address < FIXADDR_TOP;
231 address += PMD_SIZE) { 231 address += PMD_SIZE) {
232
233 unsigned long flags;
234 struct page *page; 232 struct page *page;
235 233
236 spin_lock_irqsave(&pgd_lock, flags); 234 spin_lock(&pgd_lock);
237 list_for_each_entry(page, &pgd_list, lru) { 235 list_for_each_entry(page, &pgd_list, lru) {
238 spinlock_t *pgt_lock; 236 spinlock_t *pgt_lock;
239 pmd_t *ret; 237 pmd_t *ret;
240 238
239 /* the pgt_lock only for Xen */
241 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 240 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
242 241
243 spin_lock(pgt_lock); 242 spin_lock(pgt_lock);
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void)
247 if (!ret) 246 if (!ret)
248 break; 247 break;
249 } 248 }
250 spin_unlock_irqrestore(&pgd_lock, flags); 249 spin_unlock(&pgd_lock);
251 } 250 }
252} 251}
253 252
@@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
828 unsigned long address, unsigned int fault) 827 unsigned long address, unsigned int fault)
829{ 828{
830 if (fault & VM_FAULT_OOM) { 829 if (fault & VM_FAULT_OOM) {
830 /* Kernel mode? Handle exceptions or die: */
831 if (!(error_code & PF_USER)) {
832 up_read(&current->mm->mmap_sem);
833 no_context(regs, error_code, address);
834 return;
835 }
836
831 out_of_memory(regs, error_code, address); 837 out_of_memory(regs, error_code, address);
832 } else { 838 } else {
833 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 839 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a59296af80..c14a5422e152 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
105 105
106 for (address = start; address <= end; address += PGDIR_SIZE) { 106 for (address = start; address <= end; address += PGDIR_SIZE) {
107 const pgd_t *pgd_ref = pgd_offset_k(address); 107 const pgd_t *pgd_ref = pgd_offset_k(address);
108 unsigned long flags;
109 struct page *page; 108 struct page *page;
110 109
111 if (pgd_none(*pgd_ref)) 110 if (pgd_none(*pgd_ref))
112 continue; 111 continue;
113 112
114 spin_lock_irqsave(&pgd_lock, flags); 113 spin_lock(&pgd_lock);
115 list_for_each_entry(page, &pgd_list, lru) { 114 list_for_each_entry(page, &pgd_list, lru) {
116 pgd_t *pgd; 115 pgd_t *pgd;
117 spinlock_t *pgt_lock; 116 spinlock_t *pgt_lock;
118 117
119 pgd = (pgd_t *)page_address(page) + pgd_index(address); 118 pgd = (pgd_t *)page_address(page) + pgd_index(address);
119 /* the pgt_lock only for Xen */
120 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 120 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
121 spin_lock(pgt_lock); 121 spin_lock(pgt_lock);
122 122
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
128 128
129 spin_unlock(pgt_lock); 129 spin_unlock(pgt_lock);
130 } 130 }
131 spin_unlock_irqrestore(&pgd_lock, flags); 131 spin_unlock(&pgd_lock);
132 } 132 }
133} 133}
134 134
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea1551eebc..1337c51b07d7 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -780,11 +780,7 @@ void __cpuinit numa_add_cpu(int cpu)
780 int physnid; 780 int physnid;
781 int nid = NUMA_NO_NODE; 781 int nid = NUMA_NO_NODE;
782 782
783 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); 783 nid = early_cpu_to_node(cpu);
784 if (apicid != BAD_APICID)
785 nid = apicid_to_node[apicid];
786 if (nid == NUMA_NO_NODE)
787 nid = early_cpu_to_node(cpu);
788 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); 784 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
789 785
790 /* 786 /*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d343b3c81f3c..90825f2eb0f4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
57 57
58void update_page_count(int level, unsigned long pages) 58void update_page_count(int level, unsigned long pages)
59{ 59{
60 unsigned long flags;
61
62 /* Protect against CPA */ 60 /* Protect against CPA */
63 spin_lock_irqsave(&pgd_lock, flags); 61 spin_lock(&pgd_lock);
64 direct_pages_count[level] += pages; 62 direct_pages_count[level] += pages;
65 spin_unlock_irqrestore(&pgd_lock, flags); 63 spin_unlock(&pgd_lock);
66} 64}
67 65
68static void split_page_count(int level) 66static void split_page_count(int level)
@@ -394,7 +392,7 @@ static int
394try_preserve_large_page(pte_t *kpte, unsigned long address, 392try_preserve_large_page(pte_t *kpte, unsigned long address,
395 struct cpa_data *cpa) 393 struct cpa_data *cpa)
396{ 394{
397 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; 395 unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
398 pte_t new_pte, old_pte, *tmp; 396 pte_t new_pte, old_pte, *tmp;
399 pgprot_t old_prot, new_prot, req_prot; 397 pgprot_t old_prot, new_prot, req_prot;
400 int i, do_split = 1; 398 int i, do_split = 1;
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
403 if (cpa->force_split) 401 if (cpa->force_split)
404 return 1; 402 return 1;
405 403
406 spin_lock_irqsave(&pgd_lock, flags); 404 spin_lock(&pgd_lock);
407 /* 405 /*
408 * Check for races, another CPU might have split this page 406 * Check for races, another CPU might have split this page
409 * up already: 407 * up already:
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
498 } 496 }
499 497
500out_unlock: 498out_unlock:
501 spin_unlock_irqrestore(&pgd_lock, flags); 499 spin_unlock(&pgd_lock);
502 500
503 return do_split; 501 return do_split;
504} 502}
505 503
506static int split_large_page(pte_t *kpte, unsigned long address) 504static int split_large_page(pte_t *kpte, unsigned long address)
507{ 505{
508 unsigned long flags, pfn, pfninc = 1; 506 unsigned long pfn, pfninc = 1;
509 unsigned int i, level; 507 unsigned int i, level;
510 pte_t *pbase, *tmp; 508 pte_t *pbase, *tmp;
511 pgprot_t ref_prot; 509 pgprot_t ref_prot;
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
519 if (!base) 517 if (!base)
520 return -ENOMEM; 518 return -ENOMEM;
521 519
522 spin_lock_irqsave(&pgd_lock, flags); 520 spin_lock(&pgd_lock);
523 /* 521 /*
524 * Check for races, another CPU might have split this page 522 * Check for races, another CPU might have split this page
525 * up for us already: 523 * up for us already:
@@ -591,7 +589,7 @@ out_unlock:
591 */ 589 */
592 if (base) 590 if (base)
593 __free_page(base); 591 __free_page(base);
594 spin_unlock_irqrestore(&pgd_lock, flags); 592 spin_unlock(&pgd_lock);
595 593
596 return 0; 594 return 0;
597} 595}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 500242d3c96d..0113d19c8aa6 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
121 121
122static void pgd_dtor(pgd_t *pgd) 122static void pgd_dtor(pgd_t *pgd)
123{ 123{
124 unsigned long flags; /* can be called from interrupt context */
125
126 if (SHARED_KERNEL_PMD) 124 if (SHARED_KERNEL_PMD)
127 return; 125 return;
128 126
129 spin_lock_irqsave(&pgd_lock, flags); 127 spin_lock(&pgd_lock);
130 pgd_list_del(pgd); 128 pgd_list_del(pgd);
131 spin_unlock_irqrestore(&pgd_lock, flags); 129 spin_unlock(&pgd_lock);
132} 130}
133 131
134/* 132/*
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
260{ 258{
261 pgd_t *pgd; 259 pgd_t *pgd;
262 pmd_t *pmds[PREALLOCATED_PMDS]; 260 pmd_t *pmds[PREALLOCATED_PMDS];
263 unsigned long flags;
264 261
265 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); 262 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
266 263
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
280 * respect to anything walking the pgd_list, so that they 277 * respect to anything walking the pgd_list, so that they
281 * never see a partially populated pgd. 278 * never see a partially populated pgd.
282 */ 279 */
283 spin_lock_irqsave(&pgd_lock, flags); 280 spin_lock(&pgd_lock);
284 281
285 pgd_ctor(mm, pgd); 282 pgd_ctor(mm, pgd);
286 pgd_prepopulate_pmd(mm, pgd, pmds); 283 pgd_prepopulate_pmd(mm, pgd, pmds);
287 284
288 spin_unlock_irqrestore(&pgd_lock, flags); 285 spin_unlock(&pgd_lock);
289 286
290 return pgd; 287 return pgd;
291 288
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 85b68ef5e809..9260b3eb18d4 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -34,6 +34,7 @@
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/init.h> 35#include <linux/init.h>
36 36
37#include <asm/ce4100.h>
37#include <asm/pci_x86.h> 38#include <asm/pci_x86.h>
38 39
39struct sim_reg { 40struct sim_reg {
@@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = {
306 .write = ce4100_conf_write, 307 .write = ce4100_conf_write,
307}; 308};
308 309
309static int __init ce4100_pci_init(void) 310int __init ce4100_pci_init(void)
310{ 311{
311 init_sim_regs(); 312 init_sim_regs();
312 raw_pci_ops = &ce4100_pci_conf; 313 raw_pci_ops = &ce4100_pci_conf;
313 return 0; 314 /* Indicate caller that it should invoke pci_legacy_init() */
315 return 1;
314} 316}
315subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index d2c0d51a7178..cd6f184c3b3f 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -15,6 +15,7 @@
15#include <linux/serial_reg.h> 15#include <linux/serial_reg.h>
16#include <linux/serial_8250.h> 16#include <linux/serial_8250.h>
17 17
18#include <asm/ce4100.h>
18#include <asm/setup.h> 19#include <asm/setup.h>
19#include <asm/io.h> 20#include <asm/io.h>
20 21
@@ -129,4 +130,5 @@ void __init x86_ce4100_early_setup(void)
129 x86_init.resources.probe_roms = x86_init_noop; 130 x86_init.resources.probe_roms = x86_init_noop;
130 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 131 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
131 x86_init.mpparse.find_smp_config = sdv_find_smp_config; 132 x86_init.mpparse.find_smp_config = sdv_find_smp_config;
133 x86_init.pci.init = ce4100_pci_init;
132} 134}
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index dab874647530..044bda5b3174 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -140,8 +140,7 @@ void * __init prom_early_alloc(unsigned long size)
140 * wasted bootmem) and hand off chunks of it to callers. 140 * wasted bootmem) and hand off chunks of it to callers.
141 */ 141 */
142 res = alloc_bootmem(chunk_size); 142 res = alloc_bootmem(chunk_size);
143 if (!res) 143 BUG_ON(!res);
144 return NULL;
145 prom_early_allocated += chunk_size; 144 prom_early_allocated += chunk_size;
146 memset(res, 0, chunk_size); 145 memset(res, 0, chunk_size);
147 free_mem = chunk_size; 146 free_mem = chunk_size;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index df58e9cad96a..a7b38d35c29a 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1364,11 +1364,11 @@ uv_activation_descriptor_init(int node, int pnode)
1364 memset(bd2, 0, sizeof(struct bau_desc)); 1364 memset(bd2, 0, sizeof(struct bau_desc));
1365 bd2->header.sw_ack_flag = 1; 1365 bd2->header.sw_ack_flag = 1;
1366 /* 1366 /*
1367 * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub 1367 * base_dest_nodeid is the nasid of the first uvhub
1368 * in the partition. The bit map will indicate uvhub numbers, 1368 * in the partition. The bit map will indicate uvhub numbers,
1369 * which are 0-N in a partition. Pnodes are unique system-wide. 1369 * which are 0-N in a partition. Pnodes are unique system-wide.
1370 */ 1370 */
1371 bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; 1371 bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
1372 bd2->header.dest_subnodeid = 0x10; /* the LB */ 1372 bd2->header.dest_subnodeid = 0x10; /* the LB */
1373 bd2->header.command = UV_NET_ENDPOINT_INTD; 1373 bd2->header.command = UV_NET_ENDPOINT_INTD;
1374 bd2->header.int_both = 1; 1374 bd2->header.int_both = 1;
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892e4bc3..e4343fe488ed 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
48 help 48 help
49 Enable statistics output and various tuning options in debugfs. 49 Enable statistics output and various tuning options in debugfs.
50 Enabling this option may incur a significant performance overhead. 50 Enabling this option may incur a significant performance overhead.
51
52config XEN_DEBUG
53 bool "Enable Xen debug checks"
54 depends on XEN
55 default n
56 help
57 Enable various WARN_ON checks in the Xen MMU code.
58 Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61ad574..832765c0fb8c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/gfp.h> 47#include <linux/gfp.h>
48#include <linux/memblock.h> 48#include <linux/memblock.h>
49#include <linux/seq_file.h>
49 50
50#include <asm/pgtable.h> 51#include <asm/pgtable.h>
51#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
416 if (val & _PAGE_PRESENT) { 417 if (val & _PAGE_PRESENT) {
417 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; 418 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
418 pteval_t flags = val & PTE_FLAGS_MASK; 419 pteval_t flags = val & PTE_FLAGS_MASK;
419 unsigned long mfn = pfn_to_mfn(pfn); 420 unsigned long mfn;
420 421
422 if (!xen_feature(XENFEAT_auto_translated_physmap))
423 mfn = get_phys_to_machine(pfn);
424 else
425 mfn = pfn;
421 /* 426 /*
422 * If there's no mfn for the pfn, then just create an 427 * If there's no mfn for the pfn, then just create an
423 * empty non-present pte. Unfortunately this loses 428 * empty non-present pte. Unfortunately this loses
@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
427 if (unlikely(mfn == INVALID_P2M_ENTRY)) { 432 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
428 mfn = 0; 433 mfn = 0;
429 flags = 0; 434 flags = 0;
435 } else {
436 /*
437 * Paramount to do this test _after_ the
438 * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
439 * IDENTITY_FRAME_BIT resolves to true.
440 */
441 mfn &= ~FOREIGN_FRAME_BIT;
442 if (mfn & IDENTITY_FRAME_BIT) {
443 mfn &= ~IDENTITY_FRAME_BIT;
444 flags |= _PAGE_IOMAP;
445 }
430 } 446 }
431
432 val = ((pteval_t)mfn << PAGE_SHIFT) | flags; 447 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
433 } 448 }
434 449
@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
532} 547}
533PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 548PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
534 549
550#ifdef CONFIG_XEN_DEBUG
551pte_t xen_make_pte_debug(pteval_t pte)
552{
553 phys_addr_t addr = (pte & PTE_PFN_MASK);
554 phys_addr_t other_addr;
555 bool io_page = false;
556 pte_t _pte;
557
558 if (pte & _PAGE_IOMAP)
559 io_page = true;
560
561 _pte = xen_make_pte(pte);
562
563 if (!addr)
564 return _pte;
565
566 if (io_page &&
567 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
568 other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
569 WARN(addr != other_addr,
570 "0x%lx is using VM_IO, but it is 0x%lx!\n",
571 (unsigned long)addr, (unsigned long)other_addr);
572 } else {
573 pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
574 other_addr = (_pte.pte & PTE_PFN_MASK);
575 WARN((addr == other_addr) && (!io_page) && (!iomap_set),
576 "0x%lx is missing VM_IO (and wasn't fixed)!\n",
577 (unsigned long)addr);
578 }
579
580 return _pte;
581}
582PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
583#endif
584
535pgd_t xen_make_pgd(pgdval_t pgd) 585pgd_t xen_make_pgd(pgdval_t pgd)
536{ 586{
537 pgd = pte_pfn_to_mfn(pgd); 587 pgd = pte_pfn_to_mfn(pgd);
@@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
986 */ 1036 */
987void xen_mm_pin_all(void) 1037void xen_mm_pin_all(void)
988{ 1038{
989 unsigned long flags;
990 struct page *page; 1039 struct page *page;
991 1040
992 spin_lock_irqsave(&pgd_lock, flags); 1041 spin_lock(&pgd_lock);
993 1042
994 list_for_each_entry(page, &pgd_list, lru) { 1043 list_for_each_entry(page, &pgd_list, lru) {
995 if (!PagePinned(page)) { 1044 if (!PagePinned(page)) {
@@ -998,7 +1047,7 @@ void xen_mm_pin_all(void)
998 } 1047 }
999 } 1048 }
1000 1049
1001 spin_unlock_irqrestore(&pgd_lock, flags); 1050 spin_unlock(&pgd_lock);
1002} 1051}
1003 1052
1004/* 1053/*
@@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
1099 */ 1148 */
1100void xen_mm_unpin_all(void) 1149void xen_mm_unpin_all(void)
1101{ 1150{
1102 unsigned long flags;
1103 struct page *page; 1151 struct page *page;
1104 1152
1105 spin_lock_irqsave(&pgd_lock, flags); 1153 spin_lock(&pgd_lock);
1106 1154
1107 list_for_each_entry(page, &pgd_list, lru) { 1155 list_for_each_entry(page, &pgd_list, lru) {
1108 if (PageSavePinned(page)) { 1156 if (PageSavePinned(page)) {
@@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void)
1112 } 1160 }
1113 } 1161 }
1114 1162
1115 spin_unlock_irqrestore(&pgd_lock, flags); 1163 spin_unlock(&pgd_lock);
1116} 1164}
1117 1165
1118void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 1166void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void)
1942 1990
1943static __init void xen_post_allocator_init(void) 1991static __init void xen_post_allocator_init(void)
1944{ 1992{
1993#ifdef CONFIG_XEN_DEBUG
1994 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
1995#endif
1945 pv_mmu_ops.set_pte = xen_set_pte; 1996 pv_mmu_ops.set_pte = xen_set_pte;
1946 pv_mmu_ops.set_pmd = xen_set_pmd; 1997 pv_mmu_ops.set_pmd = xen_set_pmd;
1947 pv_mmu_ops.set_pud = xen_set_pud; 1998 pv_mmu_ops.set_pud = xen_set_pud;
@@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2074 in_frames[i] = virt_to_mfn(vaddr); 2125 in_frames[i] = virt_to_mfn(vaddr);
2075 2126
2076 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); 2127 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2077 set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); 2128 __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2078 2129
2079 if (out_frames) 2130 if (out_frames)
2080 out_frames[i] = virt_to_pfn(vaddr); 2131 out_frames[i] = virt_to_pfn(vaddr);
@@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
2353 2404
2354#ifdef CONFIG_XEN_DEBUG_FS 2405#ifdef CONFIG_XEN_DEBUG_FS
2355 2406
2407static int p2m_dump_open(struct inode *inode, struct file *filp)
2408{
2409 return single_open(filp, p2m_dump_show, NULL);
2410}
2411
2412static const struct file_operations p2m_dump_fops = {
2413 .open = p2m_dump_open,
2414 .read = seq_read,
2415 .llseek = seq_lseek,
2416 .release = single_release,
2417};
2418
2356static struct dentry *d_mmu_debug; 2419static struct dentry *d_mmu_debug;
2357 2420
2358static int __init xen_mmu_debugfs(void) 2421static int __init xen_mmu_debugfs(void)
@@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void)
2408 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, 2471 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
2409 &mmu_stats.prot_commit_batched); 2472 &mmu_stats.prot_commit_batched);
2410 2473
2474 debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
2411 return 0; 2475 return 0;
2412} 2476}
2413fs_initcall(xen_mmu_debugfs); 2477fs_initcall(xen_mmu_debugfs);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index fd12d7ce7ff9..00fe5604c593 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -23,6 +23,129 @@
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always 23 * P2M_PER_PAGE depends on the architecture, as a mfn is always
24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to 24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
25 * 512 and 1024 entries respectively. 25 * 512 and 1024 entries respectively.
26 *
27 * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
28 *
29 * However not all entries are filled with MFNs. Specifically for all other
30 * leaf entries, or for the top root, or middle one, for which there is a void
31 * entry, we assume it is "missing". So (for example)
32 * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
33 *
34 * We also have the possibility of setting 1-1 mappings on certain regions, so
35 * that:
36 * pfn_to_mfn(0xc0000)=0xc0000
37 *
38 * The benefit of this is, that we can assume for non-RAM regions (think
39 * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
40 * get the PFN value to match the MFN.
41 *
42 * For this to work efficiently we have one new page p2m_identity and
43 * allocate (via reserved_brk) any other pages we need to cover the sides
44 * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
45 * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
46 * no other fancy value).
47 *
48 * On lookup we spot that the entry points to p2m_identity and return the
49 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
50 * If the entry points to an allocated page, we just proceed as before and
51 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
52 * appropriate functions (pfn_to_mfn).
53 *
54 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
55 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
56 * non-identity pfn. To protect ourselves against we elect to set (and get) the
57 * IDENTITY_FRAME_BIT on all identity mapped PFNs.
58 *
59 * This simplistic diagram is used to explain the more subtle piece of code.
60 * There is also a digram of the P2M at the end that can help.
61 * Imagine your E820 looking as so:
62 *
63 * 1GB 2GB
64 * /-------------------+---------\/----\ /----------\ /---+-----\
65 * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
66 * \-------------------+---------/\----/ \----------/ \---+-----/
67 * ^- 1029MB ^- 2001MB
68 *
69 * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
70 * 2048MB = 524288 (0x80000)]
71 *
72 * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
73 * is actually not present (would have to kick the balloon driver to put it in).
74 *
75 * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
76 * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
77 * of the PFN and the end PFN (263424 and 512256 respectively). The first step
78 * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
79 * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
80 * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
81 * to end pfn. We reserve_brk top leaf pages if they are missing (means they
82 * point to p2m_mid_missing).
83 *
84 * With the E820 example above, 263424 is not 1GB aligned so we allocate a
85 * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
86 * Each entry in the allocate page is "missing" (points to p2m_missing).
87 *
88 * Next stage is to determine if we need to do a more granular boundary check
89 * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
90 * We check if the start pfn and end pfn violate that boundary check, and if
91 * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
92 * granularity of setting which PFNs are missing and which ones are identity.
93 * In our example 263424 and 512256 both fail the check so we reserve_brk two
94 * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
95 * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
96 *
97 * At this point we would at minimum reserve_brk one page, but could be up to
98 * three. Each call to set_phys_range_identity has at maximum a three page
99 * cost. If we were to query the P2M at this stage, all those entries from
100 * start PFN through end PFN (so 1029MB -> 2001MB) would return
101 * INVALID_P2M_ENTRY ("missing").
102 *
103 * The next step is to walk from the start pfn to the end pfn setting
104 * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
105 * If we find that the middle leaf is pointing to p2m_missing we can swap it
106 * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
107 * point we do not need to worry about boundary aligment (so no need to
108 * reserve_brk a middle page, figure out which PFNs are "missing" and which
109 * ones are identity), as that has been done earlier. If we find that the
110 * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
111 * that page (which covers 512 PFNs) and set the appropriate PFN with
112 * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
113 * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
114 * IDENTITY_FRAME_BIT set.
115 *
116 * All other regions that are void (or not filled) either point to p2m_missing
117 * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
118 * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
119 * contain the INVALID_P2M_ENTRY value and are considered "missing."
120 *
121 * This is what the p2m ends up looking (for the E820 above) with this
122 * fabulous drawing:
123 *
124 * p2m /--------------\
125 * /-----\ | &mfn_list[0],| /-----------------\
126 * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
127 * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
128 * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
129 * |-----| \ | [p2m_identity]+\\ | .... |
130 * | 2 |--\ \-------------------->| ... | \\ \----------------/
131 * |-----| \ \---------------/ \\
132 * | 3 |\ \ \\ p2m_identity
133 * |-----| \ \-------------------->/---------------\ /-----------------\
134 * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
135 * \-----/ / | [p2m_identity]+-->| ..., ~0 |
136 * / /---------------\ | .... | \-----------------/
137 * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
138 * / | IDENTITY[@256]|<----/ \---------------/
139 * / | ~0, ~0, .... |
140 * | \---------------/
141 * |
142 * p2m_missing p2m_missing
143 * /------------------\ /------------\
144 * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
145 * | [p2m_mid_missing]+---->| ..., ~0 |
146 * \------------------/ \------------/
147 *
148 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
26 */ 149 */
27 150
28#include <linux/init.h> 151#include <linux/init.h>
@@ -30,6 +153,7 @@
30#include <linux/list.h> 153#include <linux/list.h>
31#include <linux/hash.h> 154#include <linux/hash.h>
32#include <linux/sched.h> 155#include <linux/sched.h>
156#include <linux/seq_file.h>
33 157
34#include <asm/cache.h> 158#include <asm/cache.h>
35#include <asm/setup.h> 159#include <asm/setup.h>
@@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
59static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); 183static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
60static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); 184static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
61 185
186static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
187
62RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 188RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
63RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 189RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
64 190
191/* We might hit two boundary violations at the start and end, at max each
192 * boundary violation will require three middle nodes. */
193RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
194
65static inline unsigned p2m_top_index(unsigned long pfn) 195static inline unsigned p2m_top_index(unsigned long pfn)
66{ 196{
67 BUG_ON(pfn >= MAX_P2M_PFN); 197 BUG_ON(pfn >= MAX_P2M_PFN);
@@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
221 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); 351 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
222 p2m_top_init(p2m_top); 352 p2m_top_init(p2m_top);
223 353
354 p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
355 p2m_init(p2m_identity);
356
224 /* 357 /*
225 * The domain builder gives us a pre-constructed p2m array in 358 * The domain builder gives us a pre-constructed p2m array in
226 * mfn_list for all the pages initially given to us, so we just 359 * mfn_list for all the pages initially given to us, so we just
@@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
266 mididx = p2m_mid_index(pfn); 399 mididx = p2m_mid_index(pfn);
267 idx = p2m_index(pfn); 400 idx = p2m_index(pfn);
268 401
402 /*
403 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
404 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
405 * would be wrong.
406 */
407 if (p2m_top[topidx][mididx] == p2m_identity)
408 return IDENTITY_FRAME(pfn);
409
269 return p2m_top[topidx][mididx][idx]; 410 return p2m_top[topidx][mididx][idx];
270} 411}
271EXPORT_SYMBOL_GPL(get_phys_to_machine); 412EXPORT_SYMBOL_GPL(get_phys_to_machine);
@@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn)
335 p2m_top_mfn_p[topidx] = mid_mfn; 476 p2m_top_mfn_p[topidx] = mid_mfn;
336 } 477 }
337 478
338 if (p2m_top[topidx][mididx] == p2m_missing) { 479 if (p2m_top[topidx][mididx] == p2m_identity ||
480 p2m_top[topidx][mididx] == p2m_missing) {
339 /* p2m leaf page is missing */ 481 /* p2m leaf page is missing */
340 unsigned long *p2m; 482 unsigned long *p2m;
483 unsigned long *p2m_orig = p2m_top[topidx][mididx];
341 484
342 p2m = alloc_p2m_page(); 485 p2m = alloc_p2m_page();
343 if (!p2m) 486 if (!p2m)
@@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn)
345 488
346 p2m_init(p2m); 489 p2m_init(p2m);
347 490
348 if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) 491 if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
349 free_p2m_page(p2m); 492 free_p2m_page(p2m);
350 else 493 else
351 mid_mfn[mididx] = virt_to_mfn(p2m); 494 mid_mfn[mididx] = virt_to_mfn(p2m);
@@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn)
354 return true; 497 return true;
355} 498}
356 499
500bool __early_alloc_p2m(unsigned long pfn)
501{
502 unsigned topidx, mididx, idx;
503
504 topidx = p2m_top_index(pfn);
505 mididx = p2m_mid_index(pfn);
506 idx = p2m_index(pfn);
507
508 /* Pfff.. No boundary cross-over, lets get out. */
509 if (!idx)
510 return false;
511
512 WARN(p2m_top[topidx][mididx] == p2m_identity,
513 "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
514 topidx, mididx);
515
516 /*
517 * Could be done by xen_build_dynamic_phys_to_machine..
518 */
519 if (p2m_top[topidx][mididx] != p2m_missing)
520 return false;
521
522 /* Boundary cross-over for the edges: */
523 if (idx) {
524 unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
525
526 p2m_init(p2m);
527
528 p2m_top[topidx][mididx] = p2m;
529
530 }
531 return idx != 0;
532}
533unsigned long set_phys_range_identity(unsigned long pfn_s,
534 unsigned long pfn_e)
535{
536 unsigned long pfn;
537
538 if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
539 return 0;
540
541 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
542 return pfn_e - pfn_s;
543
544 if (pfn_s > pfn_e)
545 return 0;
546
547 for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
548 pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
549 pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
550 {
551 unsigned topidx = p2m_top_index(pfn);
552 if (p2m_top[topidx] == p2m_mid_missing) {
553 unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
554
555 p2m_mid_init(mid);
556
557 p2m_top[topidx] = mid;
558 }
559 }
560
561 __early_alloc_p2m(pfn_s);
562 __early_alloc_p2m(pfn_e);
563
564 for (pfn = pfn_s; pfn < pfn_e; pfn++)
565 if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
566 break;
567
568 if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
569 "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
570 (pfn_e - pfn_s) - (pfn - pfn_s)))
571 printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
572
573 return pfn - pfn_s;
574}
575
357/* Try to install p2m mapping; fail if intermediate bits missing */ 576/* Try to install p2m mapping; fail if intermediate bits missing */
358bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) 577bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
359{ 578{
360 unsigned topidx, mididx, idx; 579 unsigned topidx, mididx, idx;
361 580
581 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
582 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
583 return true;
584 }
362 if (unlikely(pfn >= MAX_P2M_PFN)) { 585 if (unlikely(pfn >= MAX_P2M_PFN)) {
363 BUG_ON(mfn != INVALID_P2M_ENTRY); 586 BUG_ON(mfn != INVALID_P2M_ENTRY);
364 return true; 587 return true;
@@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
368 mididx = p2m_mid_index(pfn); 591 mididx = p2m_mid_index(pfn);
369 idx = p2m_index(pfn); 592 idx = p2m_index(pfn);
370 593
594 /* For sparse holes were the p2m leaf has real PFN along with
595 * PCI holes, stick in the PFN as the MFN value.
596 */
597 if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
598 if (p2m_top[topidx][mididx] == p2m_identity)
599 return true;
600
601 /* Swap over from MISSING to IDENTITY if needed. */
602 if (p2m_top[topidx][mididx] == p2m_missing) {
603 WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
604 p2m_identity) != p2m_missing);
605 return true;
606 }
607 }
608
371 if (p2m_top[topidx][mididx] == p2m_missing) 609 if (p2m_top[topidx][mididx] == p2m_missing)
372 return mfn == INVALID_P2M_ENTRY; 610 return mfn == INVALID_P2M_ENTRY;
373 611
@@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
378 616
379bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) 617bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
380{ 618{
381 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
382 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
383 return true;
384 }
385
386 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 619 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
387 if (!alloc_p2m(pfn)) 620 if (!alloc_p2m(pfn))
388 return false; 621 return false;
@@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
520 return ret; 753 return ret;
521} 754}
522EXPORT_SYMBOL_GPL(m2p_find_override_pfn); 755EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
756
757#ifdef CONFIG_XEN_DEBUG_FS
758
759int p2m_dump_show(struct seq_file *m, void *v)
760{
761 static const char * const level_name[] = { "top", "middle",
762 "entry", "abnormal" };
763 static const char * const type_name[] = { "identity", "missing",
764 "pfn", "abnormal"};
765#define TYPE_IDENTITY 0
766#define TYPE_MISSING 1
767#define TYPE_PFN 2
768#define TYPE_UNKNOWN 3
769 unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
770 unsigned int uninitialized_var(prev_level);
771 unsigned int uninitialized_var(prev_type);
772
773 if (!p2m_top)
774 return 0;
775
776 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
777 unsigned topidx = p2m_top_index(pfn);
778 unsigned mididx = p2m_mid_index(pfn);
779 unsigned idx = p2m_index(pfn);
780 unsigned lvl, type;
781
782 lvl = 4;
783 type = TYPE_UNKNOWN;
784 if (p2m_top[topidx] == p2m_mid_missing) {
785 lvl = 0; type = TYPE_MISSING;
786 } else if (p2m_top[topidx] == NULL) {
787 lvl = 0; type = TYPE_UNKNOWN;
788 } else if (p2m_top[topidx][mididx] == NULL) {
789 lvl = 1; type = TYPE_UNKNOWN;
790 } else if (p2m_top[topidx][mididx] == p2m_identity) {
791 lvl = 1; type = TYPE_IDENTITY;
792 } else if (p2m_top[topidx][mididx] == p2m_missing) {
793 lvl = 1; type = TYPE_MISSING;
794 } else if (p2m_top[topidx][mididx][idx] == 0) {
795 lvl = 2; type = TYPE_UNKNOWN;
796 } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
797 lvl = 2; type = TYPE_IDENTITY;
798 } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
799 lvl = 2; type = TYPE_MISSING;
800 } else if (p2m_top[topidx][mididx][idx] == pfn) {
801 lvl = 2; type = TYPE_PFN;
802 } else if (p2m_top[topidx][mididx][idx] != pfn) {
803 lvl = 2; type = TYPE_PFN;
804 }
805 if (pfn == 0) {
806 prev_level = lvl;
807 prev_type = type;
808 }
809 if (pfn == MAX_DOMAIN_PAGES-1) {
810 lvl = 3;
811 type = TYPE_UNKNOWN;
812 }
813 if (prev_type != type) {
814 seq_printf(m, " [0x%lx->0x%lx] %s\n",
815 prev_pfn_type, pfn, type_name[prev_type]);
816 prev_pfn_type = pfn;
817 prev_type = type;
818 }
819 if (prev_level != lvl) {
820 seq_printf(m, " [0x%lx->0x%lx] level %s\n",
821 prev_pfn_level, pfn, level_name[prev_level]);
822 prev_pfn_level = pfn;
823 prev_level = lvl;
824 }
825 }
826 return 0;
827#undef TYPE_IDENTITY
828#undef TYPE_MISSING
829#undef TYPE_PFN
830#undef TYPE_UNKNOWN
831}
832#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 010ba2e62de5..fa0269a99377 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
52 52
53static __init void xen_add_extra_mem(unsigned long pages) 53static __init void xen_add_extra_mem(unsigned long pages)
54{ 54{
55 unsigned long pfn;
56
55 u64 size = (u64)pages * PAGE_SIZE; 57 u64 size = (u64)pages * PAGE_SIZE;
56 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; 58 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
57 59
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
66 xen_extra_mem_size += size; 68 xen_extra_mem_size += size;
67 69
68 xen_max_p2m_pfn = PFN_DOWN(extra_start + size); 70 xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
71
72 for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
73 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
69} 74}
70 75
71static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 76static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
104 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 109 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
105 start, end, ret); 110 start, end, ret);
106 if (ret == 1) { 111 if (ret == 1) {
107 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
108 len++; 113 len++;
109 } 114 }
110 } 115 }
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
138 return released; 143 return released;
139} 144}
140 145
146static unsigned long __init xen_set_identity(const struct e820entry *list,
147 ssize_t map_size)
148{
149 phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
150 phys_addr_t start_pci = last;
151 const struct e820entry *entry;
152 unsigned long identity = 0;
153 int i;
154
155 for (i = 0, entry = list; i < map_size; i++, entry++) {
156 phys_addr_t start = entry->addr;
157 phys_addr_t end = start + entry->size;
158
159 if (start < last)
160 start = last;
161
162 if (end <= start)
163 continue;
164
165 /* Skip over the 1MB region. */
166 if (last > end)
167 continue;
168
169 if (entry->type == E820_RAM) {
170 if (start > start_pci)
171 identity += set_phys_range_identity(
172 PFN_UP(start_pci), PFN_DOWN(start));
173
174 /* Without saving 'last' we would gooble RAM too
175 * at the end of the loop. */
176 last = end;
177 start_pci = end;
178 continue;
179 }
180 start_pci = min(start, start_pci);
181 last = end;
182 }
183 if (last > start_pci)
184 identity += set_phys_range_identity(
185 PFN_UP(start_pci), PFN_DOWN(last));
186 return identity;
187}
141/** 188/**
142 * machine_specific_memory_setup - Hook for machine specific memory setup. 189 * machine_specific_memory_setup - Hook for machine specific memory setup.
143 **/ 190 **/
144char * __init xen_memory_setup(void) 191char * __init xen_memory_setup(void)
145{ 192{
146 static struct e820entry map[E820MAX] __initdata; 193 static struct e820entry map[E820MAX] __initdata;
194 static struct e820entry map_raw[E820MAX] __initdata;
147 195
148 unsigned long max_pfn = xen_start_info->nr_pages; 196 unsigned long max_pfn = xen_start_info->nr_pages;
149 unsigned long long mem_end; 197 unsigned long long mem_end;
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
151 struct xen_memory_map memmap; 199 struct xen_memory_map memmap;
152 unsigned long extra_pages = 0; 200 unsigned long extra_pages = 0;
153 unsigned long extra_limit; 201 unsigned long extra_limit;
202 unsigned long identity_pages = 0;
154 int i; 203 int i;
155 int op; 204 int op;
156 205
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
176 } 225 }
177 BUG_ON(rc); 226 BUG_ON(rc);
178 227
228 memcpy(map_raw, map, sizeof(map));
179 e820.nr_map = 0; 229 e820.nr_map = 0;
180 xen_extra_mem_start = mem_end; 230 xen_extra_mem_start = mem_end;
181 for (i = 0; i < memmap.nr_entries; i++) { 231 for (i = 0; i < memmap.nr_entries; i++) {
@@ -260,6 +310,13 @@ char * __init xen_memory_setup(void)
260 310
261 xen_add_extra_mem(extra_pages); 311 xen_add_extra_mem(extra_pages);
262 312
313 /*
314 * Set P2M for all non-RAM pages and E820 gaps to be identity
315 * type PFNs. We supply it with the non-sanitized version
316 * of the E820.
317 */
318 identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
319 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
263 return "Xen"; 320 return "Xen";
264} 321}
265 322