diff options
author | Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> | 2010-11-16 14:06:22 -0500 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> | 2010-11-16 14:06:22 -0500 |
commit | 20b4755e4fbb226eb42951bd40b53fcbce9ef944 (patch) | |
tree | 43da70e0b32ee423d3643ecd422821383411ab72 /arch/x86/xen | |
parent | 744f9f104ea262de1dc3e29265870c649f0d9473 (diff) | |
parent | e53beacd23d9cb47590da6a7a7f6d417b941a994 (diff) |
Merge commit 'v2.6.37-rc2' into upstream/xenfs
* commit 'v2.6.37-rc2': (10093 commits)
Linux 2.6.37-rc2
capabilities/syslog: open code cap_syslog logic to fix build failure
i2c: Sanity checks on adapter registration
i2c: Mark i2c_adapter.id as deprecated
i2c: Drivers shouldn't include <linux/i2c-id.h>
i2c: Delete unused adapter IDs
i2c: Remove obsolete cleanup for clientdata
include/linux/kernel.h: Move logging bits to include/linux/printk.h
Fix gcc 4.5.1 miscompiling drivers/char/i8k.c (again)
hwmon: (w83795) Check for BEEP pin availability
hwmon: (w83795) Clear intrusion alarm immediately
hwmon: (w83795) Read the intrusion state properly
hwmon: (w83795) Print the actual temperature channels as sources
hwmon: (w83795) List all usable temperature sources
hwmon: (w83795) Expose fan control method
hwmon: (w83795) Fix fan control mode attributes
hwmon: (lm95241) Check validity of input values
hwmon: Change mail address of Hans J. Koch
PCI: sysfs: fix printk warnings
GFS2: Fix inode deallocation race
...
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 21 | ||||
-rw-r--r-- | arch/x86/xen/debugfs.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 27 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 504 | ||||
-rw-r--r-- | arch/x86/xen/mmu.h | 1 | ||||
-rw-r--r-- | arch/x86/xen/pci-swiotlb-xen.c | 9 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 134 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 32 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 3 |
10 files changed, 613 insertions, 121 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 68128a1b401a..5b54892e4bc3 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -13,21 +13,28 @@ config XEN | |||
13 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
14 | Xen hypervisor. | 14 | Xen hypervisor. |
15 | 15 | ||
16 | config XEN_DOM0 | ||
17 | def_bool y | ||
18 | depends on XEN && PCI_XEN && SWIOTLB_XEN | ||
19 | depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI | ||
20 | |||
21 | # Dummy symbol since people have come to rely on the PRIVILEGED_GUEST | ||
22 | # name in tools. | ||
23 | config XEN_PRIVILEGED_GUEST | ||
24 | def_bool XEN_DOM0 | ||
25 | |||
16 | config XEN_PVHVM | 26 | config XEN_PVHVM |
17 | def_bool y | 27 | def_bool y |
18 | depends on XEN | 28 | depends on XEN |
19 | depends on X86_LOCAL_APIC | 29 | depends on X86_LOCAL_APIC |
20 | 30 | ||
21 | config XEN_MAX_DOMAIN_MEMORY | 31 | config XEN_MAX_DOMAIN_MEMORY |
22 | int "Maximum allowed size of a domain in gigabytes" | 32 | int |
23 | default 8 if X86_32 | 33 | default 128 |
24 | default 32 if X86_64 | ||
25 | depends on XEN | 34 | depends on XEN |
26 | help | 35 | help |
27 | The pseudo-physical to machine address array is sized | 36 | This only affects the sizing of some bss arrays, the unused |
28 | according to the maximum possible memory size of a Xen | 37 | portions of which are freed. |
29 | domain. This array uses 1 page per gigabyte, so there's no | ||
30 | need to be too stingy here. | ||
31 | 38 | ||
32 | config XEN_SAVE_RESTORE | 39 | config XEN_SAVE_RESTORE |
33 | bool | 40 | bool |
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index 1304bcec8ee5..7c0fedd98ea0 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c | |||
@@ -106,6 +106,7 @@ static const struct file_operations u32_array_fops = { | |||
106 | .open = u32_array_open, | 106 | .open = u32_array_open, |
107 | .release= xen_array_release, | 107 | .release= xen_array_release, |
108 | .read = u32_array_read, | 108 | .read = u32_array_read, |
109 | .llseek = no_llseek, | ||
109 | }; | 110 | }; |
110 | 111 | ||
111 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | 112 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7d46c8441418..235c0f4d3861 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/console.h> | 30 | #include <linux/console.h> |
31 | #include <linux/pci.h> | 31 | #include <linux/pci.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/memblock.h> | ||
33 | 34 | ||
34 | #include <xen/xen.h> | 35 | #include <xen/xen.h> |
35 | #include <xen/interface/xen.h> | 36 | #include <xen/interface/xen.h> |
@@ -45,6 +46,7 @@ | |||
45 | #include <asm/paravirt.h> | 46 | #include <asm/paravirt.h> |
46 | #include <asm/apic.h> | 47 | #include <asm/apic.h> |
47 | #include <asm/page.h> | 48 | #include <asm/page.h> |
49 | #include <asm/xen/pci.h> | ||
48 | #include <asm/xen/hypercall.h> | 50 | #include <asm/xen/hypercall.h> |
49 | #include <asm/xen/hypervisor.h> | 51 | #include <asm/xen/hypervisor.h> |
50 | #include <asm/fixmap.h> | 52 | #include <asm/fixmap.h> |
@@ -58,7 +60,6 @@ | |||
58 | #include <asm/pgtable.h> | 60 | #include <asm/pgtable.h> |
59 | #include <asm/tlbflush.h> | 61 | #include <asm/tlbflush.h> |
60 | #include <asm/reboot.h> | 62 | #include <asm/reboot.h> |
61 | #include <asm/setup.h> | ||
62 | #include <asm/stackprotector.h> | 63 | #include <asm/stackprotector.h> |
63 | #include <asm/hypervisor.h> | 64 | #include <asm/hypervisor.h> |
64 | 65 | ||
@@ -135,9 +136,6 @@ static void xen_vcpu_setup(int cpu) | |||
135 | info.mfn = arbitrary_virt_to_mfn(vcpup); | 136 | info.mfn = arbitrary_virt_to_mfn(vcpup); |
136 | info.offset = offset_in_page(vcpup); | 137 | info.offset = offset_in_page(vcpup); |
137 | 138 | ||
138 | printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", | ||
139 | cpu, vcpup, info.mfn, info.offset); | ||
140 | |||
141 | /* Check to see if the hypervisor will put the vcpu_info | 139 | /* Check to see if the hypervisor will put the vcpu_info |
142 | structure where we want it, which allows direct access via | 140 | structure where we want it, which allows direct access via |
143 | a percpu-variable. */ | 141 | a percpu-variable. */ |
@@ -151,9 +149,6 @@ static void xen_vcpu_setup(int cpu) | |||
151 | /* This cpu is using the registered vcpu info, even if | 149 | /* This cpu is using the registered vcpu info, even if |
152 | later ones fail to. */ | 150 | later ones fail to. */ |
153 | per_cpu(xen_vcpu, cpu) = vcpup; | 151 | per_cpu(xen_vcpu, cpu) = vcpup; |
154 | |||
155 | printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n", | ||
156 | cpu, vcpup); | ||
157 | } | 152 | } |
158 | } | 153 | } |
159 | 154 | ||
@@ -242,6 +237,7 @@ static __init void xen_init_cpuid_mask(void) | |||
242 | cpuid_leaf1_edx_mask = | 237 | cpuid_leaf1_edx_mask = |
243 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ | 238 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ |
244 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | 239 | (1 << X86_FEATURE_MCA) | /* disable MCA */ |
240 | (1 << X86_FEATURE_MTRR) | /* disable MTRR */ | ||
245 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 241 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
246 | 242 | ||
247 | if (!xen_initial_domain()) | 243 | if (!xen_initial_domain()) |
@@ -835,6 +831,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
835 | Xen console noise. */ | 831 | Xen console noise. */ |
836 | break; | 832 | break; |
837 | 833 | ||
834 | case MSR_IA32_CR_PAT: | ||
835 | if (smp_processor_id() == 0) | ||
836 | xen_set_pat(((u64)high << 32) | low); | ||
837 | break; | ||
838 | |||
838 | default: | 839 | default: |
839 | ret = native_write_msr_safe(msr, low, high); | 840 | ret = native_write_msr_safe(msr, low, high); |
840 | } | 841 | } |
@@ -873,8 +874,6 @@ void xen_setup_vcpu_info_placement(void) | |||
873 | /* xen_vcpu_setup managed to place the vcpu_info within the | 874 | /* xen_vcpu_setup managed to place the vcpu_info within the |
874 | percpu area for all cpus, so make use of it */ | 875 | percpu area for all cpus, so make use of it */ |
875 | if (have_vcpu_info_placement) { | 876 | if (have_vcpu_info_placement) { |
876 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); | ||
877 | |||
878 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); | 877 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); |
879 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); | 878 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); |
880 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); | 879 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); |
@@ -1018,7 +1017,7 @@ static void xen_reboot(int reason) | |||
1018 | struct sched_shutdown r = { .reason = reason }; | 1017 | struct sched_shutdown r = { .reason = reason }; |
1019 | 1018 | ||
1020 | #ifdef CONFIG_SMP | 1019 | #ifdef CONFIG_SMP |
1021 | smp_send_stop(); | 1020 | stop_other_cpus(); |
1022 | #endif | 1021 | #endif |
1023 | 1022 | ||
1024 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) | 1023 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) |
@@ -1183,8 +1182,14 @@ asmlinkage void __init xen_start_kernel(void) | |||
1183 | local_irq_disable(); | 1182 | local_irq_disable(); |
1184 | early_boot_irqs_off(); | 1183 | early_boot_irqs_off(); |
1185 | 1184 | ||
1185 | memblock_init(); | ||
1186 | |||
1186 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1187 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1187 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); | 1188 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); |
1189 | xen_ident_map_ISA(); | ||
1190 | |||
1191 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | ||
1192 | xen_build_mfn_list_list(); | ||
1188 | 1193 | ||
1189 | init_mm.pgd = pgd; | 1194 | init_mm.pgd = pgd; |
1190 | 1195 | ||
@@ -1220,6 +1225,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1220 | add_preferred_console("xenboot", 0, NULL); | 1225 | add_preferred_console("xenboot", 0, NULL); |
1221 | add_preferred_console("tty", 0, NULL); | 1226 | add_preferred_console("tty", 0, NULL); |
1222 | add_preferred_console("hvc", 0, NULL); | 1227 | add_preferred_console("hvc", 0, NULL); |
1228 | if (pci_xen) | ||
1229 | x86_init.pci.arch_init = pci_xen_init; | ||
1223 | } else { | 1230 | } else { |
1224 | /* Make sure ACS will be enabled */ | 1231 | /* Make sure ACS will be enabled */ |
1225 | pci_request_acs(); | 1232 | pci_request_acs(); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 792de4349c79..276c67bba5aa 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/vmalloc.h> | 45 | #include <linux/vmalloc.h> |
46 | #include <linux/module.h> | 46 | #include <linux/module.h> |
47 | #include <linux/gfp.h> | 47 | #include <linux/gfp.h> |
48 | #include <linux/memblock.h> | ||
48 | 49 | ||
49 | #include <asm/pgtable.h> | 50 | #include <asm/pgtable.h> |
50 | #include <asm/tlbflush.h> | 51 | #include <asm/tlbflush.h> |
@@ -55,6 +56,8 @@ | |||
55 | #include <asm/e820.h> | 56 | #include <asm/e820.h> |
56 | #include <asm/linkage.h> | 57 | #include <asm/linkage.h> |
57 | #include <asm/page.h> | 58 | #include <asm/page.h> |
59 | #include <asm/init.h> | ||
60 | #include <asm/pat.h> | ||
58 | 61 | ||
59 | #include <asm/xen/hypercall.h> | 62 | #include <asm/xen/hypercall.h> |
60 | #include <asm/xen/hypervisor.h> | 63 | #include <asm/xen/hypervisor.h> |
@@ -138,7 +141,8 @@ static inline void check_zero(void) | |||
138 | * large enough to allocate page table pages to allocate the rest. | 141 | * large enough to allocate page table pages to allocate the rest. |
139 | * Each page can map 2MB. | 142 | * Each page can map 2MB. |
140 | */ | 143 | */ |
141 | static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; | 144 | #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) |
145 | static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); | ||
142 | 146 | ||
143 | #ifdef CONFIG_X86_64 | 147 | #ifdef CONFIG_X86_64 |
144 | /* l3 pud for userspace vsyscall mapping */ | 148 | /* l3 pud for userspace vsyscall mapping */ |
@@ -169,49 +173,182 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
169 | */ | 173 | */ |
170 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | 174 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) |
171 | 175 | ||
176 | /* | ||
177 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
178 | * guests themselves, but it must also access and update the p2m array | ||
179 | * during suspend/resume when all the pages are reallocated. | ||
180 | * | ||
181 | * The p2m table is logically a flat array, but we implement it as a | ||
182 | * three-level tree to allow the address space to be sparse. | ||
183 | * | ||
184 | * Xen | ||
185 | * | | ||
186 | * p2m_top p2m_top_mfn | ||
187 | * / \ / \ | ||
188 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
189 | * / \ / \ / / | ||
190 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
191 | * | ||
192 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
193 | * | ||
194 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
195 | * maximum representable pseudo-physical address space is: | ||
196 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
197 | * | ||
198 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
199 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
200 | * 512 and 1024 entries respectively. | ||
201 | */ | ||
202 | |||
203 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
172 | 204 | ||
173 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | 205 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) |
174 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | 206 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) |
207 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
175 | 208 | ||
176 | /* Placeholder for holes in the address space */ | 209 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) |
177 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data = | ||
178 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | ||
179 | 210 | ||
180 | /* Array of pointers to pages containing p2m entries */ | 211 | /* Placeholders for holes in the address space */ |
181 | static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data = | 212 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); |
182 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | 213 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); |
214 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
183 | 215 | ||
184 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | 216 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); |
185 | static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss; | 217 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); |
218 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
186 | 219 | ||
187 | static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] | 220 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
188 | __page_aligned_bss; | 221 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
189 | 222 | ||
190 | static inline unsigned p2m_top_index(unsigned long pfn) | 223 | static inline unsigned p2m_top_index(unsigned long pfn) |
191 | { | 224 | { |
192 | BUG_ON(pfn >= MAX_DOMAIN_PAGES); | 225 | BUG_ON(pfn >= MAX_P2M_PFN); |
193 | return pfn / P2M_ENTRIES_PER_PAGE; | 226 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); |
227 | } | ||
228 | |||
229 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
230 | { | ||
231 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
194 | } | 232 | } |
195 | 233 | ||
196 | static inline unsigned p2m_index(unsigned long pfn) | 234 | static inline unsigned p2m_index(unsigned long pfn) |
197 | { | 235 | { |
198 | return pfn % P2M_ENTRIES_PER_PAGE; | 236 | return pfn % P2M_PER_PAGE; |
237 | } | ||
238 | |||
239 | static void p2m_top_init(unsigned long ***top) | ||
240 | { | ||
241 | unsigned i; | ||
242 | |||
243 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
244 | top[i] = p2m_mid_missing; | ||
245 | } | ||
246 | |||
247 | static void p2m_top_mfn_init(unsigned long *top) | ||
248 | { | ||
249 | unsigned i; | ||
250 | |||
251 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
252 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
253 | } | ||
254 | |||
255 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
256 | { | ||
257 | unsigned i; | ||
258 | |||
259 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
260 | top[i] = p2m_mid_missing_mfn; | ||
199 | } | 261 | } |
200 | 262 | ||
201 | /* Build the parallel p2m_top_mfn structures */ | 263 | static void p2m_mid_init(unsigned long **mid) |
264 | { | ||
265 | unsigned i; | ||
266 | |||
267 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
268 | mid[i] = p2m_missing; | ||
269 | } | ||
270 | |||
271 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
272 | { | ||
273 | unsigned i; | ||
274 | |||
275 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
276 | mid[i] = virt_to_mfn(p2m_missing); | ||
277 | } | ||
278 | |||
279 | static void p2m_init(unsigned long *p2m) | ||
280 | { | ||
281 | unsigned i; | ||
282 | |||
283 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
284 | p2m[i] = INVALID_P2M_ENTRY; | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
289 | * | ||
290 | * This is called both at boot time, and after resuming from suspend: | ||
291 | * - At boot time we're called very early, and must use extend_brk() | ||
292 | * to allocate memory. | ||
293 | * | ||
294 | * - After resume we're called from within stop_machine, but the mfn | ||
295 | * tree should alreay be completely allocated. | ||
296 | */ | ||
202 | void xen_build_mfn_list_list(void) | 297 | void xen_build_mfn_list_list(void) |
203 | { | 298 | { |
204 | unsigned pfn, idx; | 299 | unsigned long pfn; |
205 | 300 | ||
206 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | 301 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
207 | unsigned topidx = p2m_top_index(pfn); | 302 | if (p2m_top_mfn == NULL) { |
303 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
304 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
305 | |||
306 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
307 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
208 | 308 | ||
209 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | 309 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); |
310 | p2m_top_mfn_init(p2m_top_mfn); | ||
311 | } else { | ||
312 | /* Reinitialise, mfn's all change after migration */ | ||
313 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
210 | } | 314 | } |
211 | 315 | ||
212 | for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | 316 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { |
213 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | 317 | unsigned topidx = p2m_top_index(pfn); |
214 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | 318 | unsigned mididx = p2m_mid_index(pfn); |
319 | unsigned long **mid; | ||
320 | unsigned long *mid_mfn_p; | ||
321 | |||
322 | mid = p2m_top[topidx]; | ||
323 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
324 | |||
325 | /* Don't bother allocating any mfn mid levels if | ||
326 | * they're just missing, just update the stored mfn, | ||
327 | * since all could have changed over a migrate. | ||
328 | */ | ||
329 | if (mid == p2m_mid_missing) { | ||
330 | BUG_ON(mididx); | ||
331 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
332 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
333 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
334 | continue; | ||
335 | } | ||
336 | |||
337 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
338 | /* | ||
339 | * XXX boot-time only! We should never find | ||
340 | * missing parts of the mfn tree after | ||
341 | * runtime. extend_brk() will BUG if we call | ||
342 | * it too late. | ||
343 | */ | ||
344 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
345 | p2m_mid_mfn_init(mid_mfn_p); | ||
346 | |||
347 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
348 | } | ||
349 | |||
350 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
351 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
215 | } | 352 | } |
216 | } | 353 | } |
217 | 354 | ||
@@ -220,8 +357,8 @@ void xen_setup_mfn_list_list(void) | |||
220 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | 357 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
221 | 358 | ||
222 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | 359 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = |
223 | virt_to_mfn(p2m_top_mfn_list); | 360 | virt_to_mfn(p2m_top_mfn); |
224 | HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; | 361 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; |
225 | } | 362 | } |
226 | 363 | ||
227 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | 364 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ |
@@ -229,98 +366,176 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
229 | { | 366 | { |
230 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | 367 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; |
231 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | 368 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); |
232 | unsigned pfn; | 369 | unsigned long pfn; |
370 | |||
371 | xen_max_p2m_pfn = max_pfn; | ||
372 | |||
373 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
374 | p2m_init(p2m_missing); | ||
375 | |||
376 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
377 | p2m_mid_init(p2m_mid_missing); | ||
378 | |||
379 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
380 | p2m_top_init(p2m_top); | ||
233 | 381 | ||
234 | for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | 382 | /* |
383 | * The domain builder gives us a pre-constructed p2m array in | ||
384 | * mfn_list for all the pages initially given to us, so we just | ||
385 | * need to graft that into our tree structure. | ||
386 | */ | ||
387 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
235 | unsigned topidx = p2m_top_index(pfn); | 388 | unsigned topidx = p2m_top_index(pfn); |
389 | unsigned mididx = p2m_mid_index(pfn); | ||
236 | 390 | ||
237 | p2m_top[topidx] = &mfn_list[pfn]; | 391 | if (p2m_top[topidx] == p2m_mid_missing) { |
238 | } | 392 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); |
393 | p2m_mid_init(mid); | ||
239 | 394 | ||
240 | xen_build_mfn_list_list(); | 395 | p2m_top[topidx] = mid; |
396 | } | ||
397 | |||
398 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
399 | } | ||
241 | } | 400 | } |
242 | 401 | ||
243 | unsigned long get_phys_to_machine(unsigned long pfn) | 402 | unsigned long get_phys_to_machine(unsigned long pfn) |
244 | { | 403 | { |
245 | unsigned topidx, idx; | 404 | unsigned topidx, mididx, idx; |
246 | 405 | ||
247 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) | 406 | if (unlikely(pfn >= MAX_P2M_PFN)) |
248 | return INVALID_P2M_ENTRY; | 407 | return INVALID_P2M_ENTRY; |
249 | 408 | ||
250 | topidx = p2m_top_index(pfn); | 409 | topidx = p2m_top_index(pfn); |
410 | mididx = p2m_mid_index(pfn); | ||
251 | idx = p2m_index(pfn); | 411 | idx = p2m_index(pfn); |
252 | return p2m_top[topidx][idx]; | 412 | |
413 | return p2m_top[topidx][mididx][idx]; | ||
253 | } | 414 | } |
254 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | 415 | EXPORT_SYMBOL_GPL(get_phys_to_machine); |
255 | 416 | ||
256 | /* install a new p2m_top page */ | 417 | static void *alloc_p2m_page(void) |
257 | bool install_p2mtop_page(unsigned long pfn, unsigned long *p) | ||
258 | { | 418 | { |
259 | unsigned topidx = p2m_top_index(pfn); | 419 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); |
260 | unsigned long **pfnp, *mfnp; | 420 | } |
261 | unsigned i; | ||
262 | 421 | ||
263 | pfnp = &p2m_top[topidx]; | 422 | static void free_p2m_page(void *p) |
264 | mfnp = &p2m_top_mfn[topidx]; | 423 | { |
424 | free_page((unsigned long)p); | ||
425 | } | ||
265 | 426 | ||
266 | for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | 427 | /* |
267 | p[i] = INVALID_P2M_ENTRY; | 428 | * Fully allocate the p2m structure for a given pfn. We need to check |
429 | * that both the top and mid levels are allocated, and make sure the | ||
430 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
431 | * the new pages are installed with cmpxchg; if we lose the race then | ||
432 | * simply free the page we allocated and use the one that's there. | ||
433 | */ | ||
434 | static bool alloc_p2m(unsigned long pfn) | ||
435 | { | ||
436 | unsigned topidx, mididx; | ||
437 | unsigned long ***top_p, **mid; | ||
438 | unsigned long *top_mfn_p, *mid_mfn; | ||
268 | 439 | ||
269 | if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) { | 440 | topidx = p2m_top_index(pfn); |
270 | *mfnp = virt_to_mfn(p); | 441 | mididx = p2m_mid_index(pfn); |
271 | return true; | 442 | |
443 | top_p = &p2m_top[topidx]; | ||
444 | mid = *top_p; | ||
445 | |||
446 | if (mid == p2m_mid_missing) { | ||
447 | /* Mid level is missing, allocate a new one */ | ||
448 | mid = alloc_p2m_page(); | ||
449 | if (!mid) | ||
450 | return false; | ||
451 | |||
452 | p2m_mid_init(mid); | ||
453 | |||
454 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
455 | free_p2m_page(mid); | ||
272 | } | 456 | } |
273 | 457 | ||
274 | return false; | 458 | top_mfn_p = &p2m_top_mfn[topidx]; |
275 | } | 459 | mid_mfn = p2m_top_mfn_p[topidx]; |
276 | 460 | ||
277 | static void alloc_p2m(unsigned long pfn) | 461 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); |
278 | { | ||
279 | unsigned long *p; | ||
280 | 462 | ||
281 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | 463 | if (mid_mfn == p2m_mid_missing_mfn) { |
282 | BUG_ON(p == NULL); | 464 | /* Separately check the mid mfn level */ |
465 | unsigned long missing_mfn; | ||
466 | unsigned long mid_mfn_mfn; | ||
283 | 467 | ||
284 | if (!install_p2mtop_page(pfn, p)) | 468 | mid_mfn = alloc_p2m_page(); |
285 | free_page((unsigned long)p); | 469 | if (!mid_mfn) |
470 | return false; | ||
471 | |||
472 | p2m_mid_mfn_init(mid_mfn); | ||
473 | |||
474 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
475 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
476 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
477 | free_p2m_page(mid_mfn); | ||
478 | else | ||
479 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
480 | } | ||
481 | |||
482 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
483 | /* p2m leaf page is missing */ | ||
484 | unsigned long *p2m; | ||
485 | |||
486 | p2m = alloc_p2m_page(); | ||
487 | if (!p2m) | ||
488 | return false; | ||
489 | |||
490 | p2m_init(p2m); | ||
491 | |||
492 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
493 | free_p2m_page(p2m); | ||
494 | else | ||
495 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
496 | } | ||
497 | |||
498 | return true; | ||
286 | } | 499 | } |
287 | 500 | ||
288 | /* Try to install p2m mapping; fail if intermediate bits missing */ | 501 | /* Try to install p2m mapping; fail if intermediate bits missing */ |
289 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 502 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
290 | { | 503 | { |
291 | unsigned topidx, idx; | 504 | unsigned topidx, mididx, idx; |
292 | 505 | ||
293 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { | 506 | if (unlikely(pfn >= MAX_P2M_PFN)) { |
294 | BUG_ON(mfn != INVALID_P2M_ENTRY); | 507 | BUG_ON(mfn != INVALID_P2M_ENTRY); |
295 | return true; | 508 | return true; |
296 | } | 509 | } |
297 | 510 | ||
298 | topidx = p2m_top_index(pfn); | 511 | topidx = p2m_top_index(pfn); |
299 | if (p2m_top[topidx] == p2m_missing) { | 512 | mididx = p2m_mid_index(pfn); |
300 | if (mfn == INVALID_P2M_ENTRY) | ||
301 | return true; | ||
302 | return false; | ||
303 | } | ||
304 | |||
305 | idx = p2m_index(pfn); | 513 | idx = p2m_index(pfn); |
306 | p2m_top[topidx][idx] = mfn; | 514 | |
515 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
516 | return mfn == INVALID_P2M_ENTRY; | ||
517 | |||
518 | p2m_top[topidx][mididx][idx] = mfn; | ||
307 | 519 | ||
308 | return true; | 520 | return true; |
309 | } | 521 | } |
310 | 522 | ||
311 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 523 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
312 | { | 524 | { |
313 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | 525 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { |
314 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | 526 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); |
315 | return; | 527 | return true; |
316 | } | 528 | } |
317 | 529 | ||
318 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | 530 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
319 | alloc_p2m(pfn); | 531 | if (!alloc_p2m(pfn)) |
532 | return false; | ||
320 | 533 | ||
321 | if (!__set_phys_to_machine(pfn, mfn)) | 534 | if (!__set_phys_to_machine(pfn, mfn)) |
322 | BUG(); | 535 | return false; |
323 | } | 536 | } |
537 | |||
538 | return true; | ||
324 | } | 539 | } |
325 | 540 | ||
326 | unsigned long arbitrary_virt_to_mfn(void *vaddr) | 541 | unsigned long arbitrary_virt_to_mfn(void *vaddr) |
@@ -359,7 +574,8 @@ void make_lowmem_page_readonly(void *vaddr) | |||
359 | unsigned int level; | 574 | unsigned int level; |
360 | 575 | ||
361 | pte = lookup_address(address, &level); | 576 | pte = lookup_address(address, &level); |
362 | BUG_ON(pte == NULL); | 577 | if (pte == NULL) |
578 | return; /* vaddr missing */ | ||
363 | 579 | ||
364 | ptev = pte_wrprotect(*pte); | 580 | ptev = pte_wrprotect(*pte); |
365 | 581 | ||
@@ -374,7 +590,8 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
374 | unsigned int level; | 590 | unsigned int level; |
375 | 591 | ||
376 | pte = lookup_address(address, &level); | 592 | pte = lookup_address(address, &level); |
377 | BUG_ON(pte == NULL); | 593 | if (pte == NULL) |
594 | return; /* vaddr missing */ | ||
378 | 595 | ||
379 | ptev = pte_mkwrite(*pte); | 596 | ptev = pte_mkwrite(*pte); |
380 | 597 | ||
@@ -563,7 +780,20 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) | |||
563 | if (val & _PAGE_PRESENT) { | 780 | if (val & _PAGE_PRESENT) { |
564 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; | 781 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; |
565 | pteval_t flags = val & PTE_FLAGS_MASK; | 782 | pteval_t flags = val & PTE_FLAGS_MASK; |
566 | val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | 783 | unsigned long mfn = pfn_to_mfn(pfn); |
784 | |||
785 | /* | ||
786 | * If there's no mfn for the pfn, then just create an | ||
787 | * empty non-present pte. Unfortunately this loses | ||
788 | * information about the original pfn, so | ||
789 | * pte_mfn_to_pfn is asymmetric. | ||
790 | */ | ||
791 | if (unlikely(mfn == INVALID_P2M_ENTRY)) { | ||
792 | mfn = 0; | ||
793 | flags = 0; | ||
794 | } | ||
795 | |||
796 | val = ((pteval_t)mfn << PAGE_SHIFT) | flags; | ||
567 | } | 797 | } |
568 | 798 | ||
569 | return val; | 799 | return val; |
@@ -585,10 +815,18 @@ static pteval_t iomap_pte(pteval_t val) | |||
585 | 815 | ||
586 | pteval_t xen_pte_val(pte_t pte) | 816 | pteval_t xen_pte_val(pte_t pte) |
587 | { | 817 | { |
588 | if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP)) | 818 | pteval_t pteval = pte.pte; |
589 | return pte.pte; | 819 | |
820 | /* If this is a WC pte, convert back from Xen WC to Linux WC */ | ||
821 | if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) { | ||
822 | WARN_ON(!pat_enabled); | ||
823 | pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; | ||
824 | } | ||
825 | |||
826 | if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) | ||
827 | return pteval; | ||
590 | 828 | ||
591 | return pte_mfn_to_pfn(pte.pte); | 829 | return pte_mfn_to_pfn(pteval); |
592 | } | 830 | } |
593 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); | 831 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); |
594 | 832 | ||
@@ -598,10 +836,48 @@ pgdval_t xen_pgd_val(pgd_t pgd) | |||
598 | } | 836 | } |
599 | PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); | 837 | PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); |
600 | 838 | ||
839 | /* | ||
840 | * Xen's PAT setup is part of its ABI, though I assume entries 6 & 7 | ||
841 | * are reserved for now, to correspond to the Intel-reserved PAT | ||
842 | * types. | ||
843 | * | ||
844 | * We expect Linux's PAT set as follows: | ||
845 | * | ||
846 | * Idx PTE flags Linux Xen Default | ||
847 | * 0 WB WB WB | ||
848 | * 1 PWT WC WT WT | ||
849 | * 2 PCD UC- UC- UC- | ||
850 | * 3 PCD PWT UC UC UC | ||
851 | * 4 PAT WB WC WB | ||
852 | * 5 PAT PWT WC WP WT | ||
853 | * 6 PAT PCD UC- UC UC- | ||
854 | * 7 PAT PCD PWT UC UC UC | ||
855 | */ | ||
856 | |||
857 | void xen_set_pat(u64 pat) | ||
858 | { | ||
859 | /* We expect Linux to use a PAT setting of | ||
860 | * UC UC- WC WB (ignoring the PAT flag) */ | ||
861 | WARN_ON(pat != 0x0007010600070106ull); | ||
862 | } | ||
863 | |||
601 | pte_t xen_make_pte(pteval_t pte) | 864 | pte_t xen_make_pte(pteval_t pte) |
602 | { | 865 | { |
603 | phys_addr_t addr = (pte & PTE_PFN_MASK); | 866 | phys_addr_t addr = (pte & PTE_PFN_MASK); |
604 | 867 | ||
868 | /* If Linux is trying to set a WC pte, then map to the Xen WC. | ||
869 | * If _PAGE_PAT is set, then it probably means it is really | ||
870 | * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope | ||
871 | * things work out OK... | ||
872 | * | ||
873 | * (We should never see kernel mappings with _PAGE_PSE set, | ||
874 | * but we could see hugetlbfs mappings, I think.). | ||
875 | */ | ||
876 | if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) { | ||
877 | if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT) | ||
878 | pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; | ||
879 | } | ||
880 | |||
605 | /* | 881 | /* |
606 | * Unprivileged domains are allowed to do IOMAPpings for | 882 | * Unprivileged domains are allowed to do IOMAPpings for |
607 | * PCI passthrough, but not map ISA space. The ISA | 883 | * PCI passthrough, but not map ISA space. The ISA |
@@ -1514,13 +1790,25 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
1514 | #endif | 1790 | #endif |
1515 | } | 1791 | } |
1516 | 1792 | ||
1517 | #ifdef CONFIG_X86_32 | ||
1518 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | 1793 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) |
1519 | { | 1794 | { |
1795 | unsigned long pfn = pte_pfn(pte); | ||
1796 | |||
1797 | #ifdef CONFIG_X86_32 | ||
1520 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ | 1798 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ |
1521 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) | 1799 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) |
1522 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & | 1800 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & |
1523 | pte_val_ma(pte)); | 1801 | pte_val_ma(pte)); |
1802 | #endif | ||
1803 | |||
1804 | /* | ||
1805 | * If the new pfn is within the range of the newly allocated | ||
1806 | * kernel pagetable, and it isn't being mapped into an | ||
1807 | * early_ioremap fixmap slot, make sure it is RO. | ||
1808 | */ | ||
1809 | if (!is_early_ioremap_ptep(ptep) && | ||
1810 | pfn >= e820_table_start && pfn < e820_table_end) | ||
1811 | pte = pte_wrprotect(pte); | ||
1524 | 1812 | ||
1525 | return pte; | 1813 | return pte; |
1526 | } | 1814 | } |
@@ -1533,7 +1821,6 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
1533 | 1821 | ||
1534 | xen_set_pte(ptep, pte); | 1822 | xen_set_pte(ptep, pte); |
1535 | } | 1823 | } |
1536 | #endif | ||
1537 | 1824 | ||
1538 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1825 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
1539 | { | 1826 | { |
@@ -1688,6 +1975,7 @@ static void *m2v(phys_addr_t maddr) | |||
1688 | return __ka(m2p(maddr)); | 1975 | return __ka(m2p(maddr)); |
1689 | } | 1976 | } |
1690 | 1977 | ||
1978 | /* Set the page permissions on an identity-mapped pages */ | ||
1691 | static void set_page_prot(void *addr, pgprot_t prot) | 1979 | static void set_page_prot(void *addr, pgprot_t prot) |
1692 | { | 1980 | { |
1693 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | 1981 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; |
@@ -1703,6 +1991,9 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1703 | unsigned ident_pte; | 1991 | unsigned ident_pte; |
1704 | unsigned long pfn; | 1992 | unsigned long pfn; |
1705 | 1993 | ||
1994 | level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES, | ||
1995 | PAGE_SIZE); | ||
1996 | |||
1706 | ident_pte = 0; | 1997 | ident_pte = 0; |
1707 | pfn = 0; | 1998 | pfn = 0; |
1708 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { | 1999 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { |
@@ -1713,7 +2004,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1713 | pte_page = m2v(pmd[pmdidx].pmd); | 2004 | pte_page = m2v(pmd[pmdidx].pmd); |
1714 | else { | 2005 | else { |
1715 | /* Check for free pte pages */ | 2006 | /* Check for free pte pages */ |
1716 | if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) | 2007 | if (ident_pte == LEVEL1_IDENT_ENTRIES) |
1717 | break; | 2008 | break; |
1718 | 2009 | ||
1719 | pte_page = &level1_ident_pgt[ident_pte]; | 2010 | pte_page = &level1_ident_pgt[ident_pte]; |
@@ -1820,7 +2111,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1820 | __xen_write_cr3(true, __pa(pgd)); | 2111 | __xen_write_cr3(true, __pa(pgd)); |
1821 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 2112 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1822 | 2113 | ||
1823 | reserve_early(__pa(xen_start_info->pt_base), | 2114 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
1824 | __pa(xen_start_info->pt_base + | 2115 | __pa(xen_start_info->pt_base + |
1825 | xen_start_info->nr_pt_frames * PAGE_SIZE), | 2116 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
1826 | "XEN PAGETABLES"); | 2117 | "XEN PAGETABLES"); |
@@ -1828,13 +2119,15 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1828 | return pgd; | 2119 | return pgd; |
1829 | } | 2120 | } |
1830 | #else /* !CONFIG_X86_64 */ | 2121 | #else /* !CONFIG_X86_64 */ |
1831 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; | 2122 | static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD); |
1832 | 2123 | ||
1833 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 2124 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, |
1834 | unsigned long max_pfn) | 2125 | unsigned long max_pfn) |
1835 | { | 2126 | { |
1836 | pmd_t *kernel_pmd; | 2127 | pmd_t *kernel_pmd; |
1837 | 2128 | ||
2129 | level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | ||
2130 | |||
1838 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + | 2131 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + |
1839 | xen_start_info->nr_pt_frames * PAGE_SIZE + | 2132 | xen_start_info->nr_pt_frames * PAGE_SIZE + |
1840 | 512*1024); | 2133 | 512*1024); |
@@ -1858,7 +2151,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1858 | 2151 | ||
1859 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); | 2152 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); |
1860 | 2153 | ||
1861 | reserve_early(__pa(xen_start_info->pt_base), | 2154 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
1862 | __pa(xen_start_info->pt_base + | 2155 | __pa(xen_start_info->pt_base + |
1863 | xen_start_info->nr_pt_frames * PAGE_SIZE), | 2156 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
1864 | "XEN PAGETABLES"); | 2157 | "XEN PAGETABLES"); |
@@ -1867,6 +2160,8 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1867 | } | 2160 | } |
1868 | #endif /* CONFIG_X86_64 */ | 2161 | #endif /* CONFIG_X86_64 */ |
1869 | 2162 | ||
2163 | static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; | ||
2164 | |||
1870 | static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | 2165 | static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) |
1871 | { | 2166 | { |
1872 | pte_t pte; | 2167 | pte_t pte; |
@@ -1887,15 +2182,28 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1887 | #else | 2182 | #else |
1888 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | 2183 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: |
1889 | #endif | 2184 | #endif |
1890 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1891 | case FIX_APIC_BASE: /* maps dummy local APIC */ | ||
1892 | #endif | ||
1893 | case FIX_TEXT_POKE0: | 2185 | case FIX_TEXT_POKE0: |
1894 | case FIX_TEXT_POKE1: | 2186 | case FIX_TEXT_POKE1: |
1895 | /* All local page mappings */ | 2187 | /* All local page mappings */ |
1896 | pte = pfn_pte(phys, prot); | 2188 | pte = pfn_pte(phys, prot); |
1897 | break; | 2189 | break; |
1898 | 2190 | ||
2191 | #ifdef CONFIG_X86_LOCAL_APIC | ||
2192 | case FIX_APIC_BASE: /* maps dummy local APIC */ | ||
2193 | pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); | ||
2194 | break; | ||
2195 | #endif | ||
2196 | |||
2197 | #ifdef CONFIG_X86_IO_APIC | ||
2198 | case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END: | ||
2199 | /* | ||
2200 | * We just don't map the IO APIC - all access is via | ||
2201 | * hypercalls. Keep the address in the pte for reference. | ||
2202 | */ | ||
2203 | pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); | ||
2204 | break; | ||
2205 | #endif | ||
2206 | |||
1899 | case FIX_PARAVIRT_BOOTMAP: | 2207 | case FIX_PARAVIRT_BOOTMAP: |
1900 | /* This is an MFN, but it isn't an IO mapping from the | 2208 | /* This is an MFN, but it isn't an IO mapping from the |
1901 | IO domain */ | 2209 | IO domain */ |
@@ -1920,6 +2228,29 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1920 | #endif | 2228 | #endif |
1921 | } | 2229 | } |
1922 | 2230 | ||
2231 | __init void xen_ident_map_ISA(void) | ||
2232 | { | ||
2233 | unsigned long pa; | ||
2234 | |||
2235 | /* | ||
2236 | * If we're dom0, then linear map the ISA machine addresses into | ||
2237 | * the kernel's address space. | ||
2238 | */ | ||
2239 | if (!xen_initial_domain()) | ||
2240 | return; | ||
2241 | |||
2242 | xen_raw_printk("Xen: setup ISA identity maps\n"); | ||
2243 | |||
2244 | for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) { | ||
2245 | pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO); | ||
2246 | |||
2247 | if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0)) | ||
2248 | BUG(); | ||
2249 | } | ||
2250 | |||
2251 | xen_flush_tlb(); | ||
2252 | } | ||
2253 | |||
1923 | static __init void xen_post_allocator_init(void) | 2254 | static __init void xen_post_allocator_init(void) |
1924 | { | 2255 | { |
1925 | pv_mmu_ops.set_pte = xen_set_pte; | 2256 | pv_mmu_ops.set_pte = xen_set_pte; |
@@ -1975,14 +2306,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1975 | .alloc_pte = xen_alloc_pte_init, | 2306 | .alloc_pte = xen_alloc_pte_init, |
1976 | .release_pte = xen_release_pte_init, | 2307 | .release_pte = xen_release_pte_init, |
1977 | .alloc_pmd = xen_alloc_pmd_init, | 2308 | .alloc_pmd = xen_alloc_pmd_init, |
1978 | .alloc_pmd_clone = paravirt_nop, | ||
1979 | .release_pmd = xen_release_pmd_init, | 2309 | .release_pmd = xen_release_pmd_init, |
1980 | 2310 | ||
1981 | #ifdef CONFIG_X86_64 | ||
1982 | .set_pte = xen_set_pte, | ||
1983 | #else | ||
1984 | .set_pte = xen_set_pte_init, | 2311 | .set_pte = xen_set_pte_init, |
1985 | #endif | ||
1986 | .set_pte_at = xen_set_pte_at, | 2312 | .set_pte_at = xen_set_pte_at, |
1987 | .set_pmd = xen_set_pmd_hyper, | 2313 | .set_pmd = xen_set_pmd_hyper, |
1988 | 2314 | ||
@@ -2033,6 +2359,8 @@ void __init xen_init_mmu_ops(void) | |||
2033 | pv_mmu_ops = xen_mmu_ops; | 2359 | pv_mmu_ops = xen_mmu_ops; |
2034 | 2360 | ||
2035 | vmap_lazy_unmap = false; | 2361 | vmap_lazy_unmap = false; |
2362 | |||
2363 | memset(dummy_mapping, 0xff, PAGE_SIZE); | ||
2036 | } | 2364 | } |
2037 | 2365 | ||
2038 | /* Protected by xen_reservation_lock. */ | 2366 | /* Protected by xen_reservation_lock. */ |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index fa938c4aa2f7..537bb9aab777 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -12,7 +12,6 @@ enum pt_level { | |||
12 | 12 | ||
13 | 13 | ||
14 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 14 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
15 | bool install_p2mtop_page(unsigned long pfn, unsigned long *p); | ||
16 | 15 | ||
17 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 16 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
18 | 17 | ||
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index a013ec9d0c54..bfd0632fe65e 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c | |||
@@ -1,10 +1,12 @@ | |||
1 | /* Glue code to lib/swiotlb-xen.c */ | 1 | /* Glue code to lib/swiotlb-xen.c */ |
2 | 2 | ||
3 | #include <linux/dma-mapping.h> | 3 | #include <linux/dma-mapping.h> |
4 | #include <linux/pci.h> | ||
4 | #include <xen/swiotlb-xen.h> | 5 | #include <xen/swiotlb-xen.h> |
5 | 6 | ||
6 | #include <asm/xen/hypervisor.h> | 7 | #include <asm/xen/hypervisor.h> |
7 | #include <xen/xen.h> | 8 | #include <xen/xen.h> |
9 | #include <asm/iommu_table.h> | ||
8 | 10 | ||
9 | int xen_swiotlb __read_mostly; | 11 | int xen_swiotlb __read_mostly; |
10 | 12 | ||
@@ -54,5 +56,12 @@ void __init pci_xen_swiotlb_init(void) | |||
54 | if (xen_swiotlb) { | 56 | if (xen_swiotlb) { |
55 | xen_swiotlb_init(1); | 57 | xen_swiotlb_init(1); |
56 | dma_ops = &xen_swiotlb_dma_ops; | 58 | dma_ops = &xen_swiotlb_dma_ops; |
59 | |||
60 | /* Make sure ACS will be enabled */ | ||
61 | pci_request_acs(); | ||
57 | } | 62 | } |
58 | } | 63 | } |
64 | IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, | ||
65 | 0, | ||
66 | pci_xen_swiotlb_init, | ||
67 | 0); | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 328b00305426..769c4b01fa32 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/pm.h> | 10 | #include <linux/pm.h> |
11 | #include <linux/memblock.h> | ||
11 | 12 | ||
12 | #include <asm/elf.h> | 13 | #include <asm/elf.h> |
13 | #include <asm/vdso.h> | 14 | #include <asm/vdso.h> |
@@ -17,8 +18,10 @@ | |||
17 | #include <asm/xen/hypervisor.h> | 18 | #include <asm/xen/hypervisor.h> |
18 | #include <asm/xen/hypercall.h> | 19 | #include <asm/xen/hypercall.h> |
19 | 20 | ||
21 | #include <xen/xen.h> | ||
20 | #include <xen/page.h> | 22 | #include <xen/page.h> |
21 | #include <xen/interface/callback.h> | 23 | #include <xen/interface/callback.h> |
24 | #include <xen/interface/memory.h> | ||
22 | #include <xen/interface/physdev.h> | 25 | #include <xen/interface/physdev.h> |
23 | #include <xen/interface/memory.h> | 26 | #include <xen/interface/memory.h> |
24 | #include <xen/features.h> | 27 | #include <xen/features.h> |
@@ -33,6 +36,39 @@ extern void xen_sysenter_target(void); | |||
33 | extern void xen_syscall_target(void); | 36 | extern void xen_syscall_target(void); |
34 | extern void xen_syscall32_target(void); | 37 | extern void xen_syscall32_target(void); |
35 | 38 | ||
39 | /* Amount of extra memory space we add to the e820 ranges */ | ||
40 | phys_addr_t xen_extra_mem_start, xen_extra_mem_size; | ||
41 | |||
42 | /* | ||
43 | * The maximum amount of extra memory compared to the base size. The | ||
44 | * main scaling factor is the size of struct page. At extreme ratios | ||
45 | * of base:extra, all the base memory can be filled with page | ||
46 | * structures for the extra memory, leaving no space for anything | ||
47 | * else. | ||
48 | * | ||
49 | * 10x seems like a reasonable balance between scaling flexibility and | ||
50 | * leaving a practically usable system. | ||
51 | */ | ||
52 | #define EXTRA_MEM_RATIO (10) | ||
53 | |||
54 | static __init void xen_add_extra_mem(unsigned long pages) | ||
55 | { | ||
56 | u64 size = (u64)pages * PAGE_SIZE; | ||
57 | u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; | ||
58 | |||
59 | if (!pages) | ||
60 | return; | ||
61 | |||
62 | e820_add_region(extra_start, size, E820_RAM); | ||
63 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
64 | |||
65 | memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA"); | ||
66 | |||
67 | xen_extra_mem_size += size; | ||
68 | |||
69 | xen_max_p2m_pfn = PFN_DOWN(extra_start + size); | ||
70 | } | ||
71 | |||
36 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | 72 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, |
37 | phys_addr_t end_addr) | 73 | phys_addr_t end_addr) |
38 | { | 74 | { |
@@ -82,16 +118,18 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, | |||
82 | const struct e820map *e820) | 118 | const struct e820map *e820) |
83 | { | 119 | { |
84 | phys_addr_t max_addr = PFN_PHYS(max_pfn); | 120 | phys_addr_t max_addr = PFN_PHYS(max_pfn); |
85 | phys_addr_t last_end = 0; | 121 | phys_addr_t last_end = ISA_END_ADDRESS; |
86 | unsigned long released = 0; | 122 | unsigned long released = 0; |
87 | int i; | 123 | int i; |
88 | 124 | ||
125 | /* Free any unused memory above the low 1Mbyte. */ | ||
89 | for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { | 126 | for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { |
90 | phys_addr_t end = e820->map[i].addr; | 127 | phys_addr_t end = e820->map[i].addr; |
91 | end = min(max_addr, end); | 128 | end = min(max_addr, end); |
92 | 129 | ||
93 | released += xen_release_chunk(last_end, end); | 130 | if (last_end < end) |
94 | last_end = e820->map[i].addr + e820->map[i].size; | 131 | released += xen_release_chunk(last_end, end); |
132 | last_end = max(last_end, e820->map[i].addr + e820->map[i].size); | ||
95 | } | 133 | } |
96 | 134 | ||
97 | if (last_end < max_addr) | 135 | if (last_end < max_addr) |
@@ -104,21 +142,75 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, | |||
104 | /** | 142 | /** |
105 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 143 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
106 | **/ | 144 | **/ |
107 | |||
108 | char * __init xen_memory_setup(void) | 145 | char * __init xen_memory_setup(void) |
109 | { | 146 | { |
147 | static struct e820entry map[E820MAX] __initdata; | ||
148 | |||
110 | unsigned long max_pfn = xen_start_info->nr_pages; | 149 | unsigned long max_pfn = xen_start_info->nr_pages; |
150 | unsigned long long mem_end; | ||
151 | int rc; | ||
152 | struct xen_memory_map memmap; | ||
153 | unsigned long extra_pages = 0; | ||
154 | unsigned long extra_limit; | ||
155 | int i; | ||
156 | int op; | ||
111 | 157 | ||
112 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); | 158 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); |
159 | mem_end = PFN_PHYS(max_pfn); | ||
160 | |||
161 | memmap.nr_entries = E820MAX; | ||
162 | set_xen_guest_handle(memmap.buffer, map); | ||
163 | |||
164 | op = xen_initial_domain() ? | ||
165 | XENMEM_machine_memory_map : | ||
166 | XENMEM_memory_map; | ||
167 | rc = HYPERVISOR_memory_op(op, &memmap); | ||
168 | if (rc == -ENOSYS) { | ||
169 | BUG_ON(xen_initial_domain()); | ||
170 | memmap.nr_entries = 1; | ||
171 | map[0].addr = 0ULL; | ||
172 | map[0].size = mem_end; | ||
173 | /* 8MB slack (to balance backend allocations). */ | ||
174 | map[0].size += 8ULL << 20; | ||
175 | map[0].type = E820_RAM; | ||
176 | rc = 0; | ||
177 | } | ||
178 | BUG_ON(rc); | ||
113 | 179 | ||
114 | e820.nr_map = 0; | 180 | e820.nr_map = 0; |
181 | xen_extra_mem_start = mem_end; | ||
182 | for (i = 0; i < memmap.nr_entries; i++) { | ||
183 | unsigned long long end = map[i].addr + map[i].size; | ||
184 | |||
185 | if (map[i].type == E820_RAM) { | ||
186 | if (map[i].addr < mem_end && end > mem_end) { | ||
187 | /* Truncate region to max_mem. */ | ||
188 | u64 delta = end - mem_end; | ||
115 | 189 | ||
116 | e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); | 190 | map[i].size -= delta; |
191 | extra_pages += PFN_DOWN(delta); | ||
192 | |||
193 | end = mem_end; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | if (end > xen_extra_mem_start) | ||
198 | xen_extra_mem_start = end; | ||
199 | |||
200 | /* If region is non-RAM or below mem_end, add what remains */ | ||
201 | if ((map[i].type != E820_RAM || map[i].addr < mem_end) && | ||
202 | map[i].size > 0) | ||
203 | e820_add_region(map[i].addr, map[i].size, map[i].type); | ||
204 | } | ||
117 | 205 | ||
118 | /* | 206 | /* |
119 | * Even though this is normal, usable memory under Xen, reserve | 207 | * In domU, the ISA region is normal, usable memory, but we |
120 | * ISA memory anyway because too many things think they can poke | 208 | * reserve ISA memory anyway because too many things poke |
121 | * about in there. | 209 | * about in there. |
210 | * | ||
211 | * In Dom0, the host E820 information can leave gaps in the | ||
212 | * ISA range, which would cause us to release those pages. To | ||
213 | * avoid this, we unconditionally reserve them here. | ||
122 | */ | 214 | */ |
123 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, | 215 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, |
124 | E820_RESERVED); | 216 | E820_RESERVED); |
@@ -129,13 +221,35 @@ char * __init xen_memory_setup(void) | |||
129 | * - xen_start_info | 221 | * - xen_start_info |
130 | * See comment above "struct start_info" in <xen/interface/xen.h> | 222 | * See comment above "struct start_info" in <xen/interface/xen.h> |
131 | */ | 223 | */ |
132 | reserve_early(__pa(xen_start_info->mfn_list), | 224 | memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), |
133 | __pa(xen_start_info->pt_base), | 225 | __pa(xen_start_info->pt_base), |
134 | "XEN START INFO"); | 226 | "XEN START INFO"); |
135 | 227 | ||
136 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 228 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
137 | 229 | ||
138 | xen_return_unused_memory(xen_start_info->nr_pages, &e820); | 230 | extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); |
231 | |||
232 | /* | ||
233 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO | ||
234 | * factor the base size. On non-highmem systems, the base | ||
235 | * size is the full initial memory allocation; on highmem it | ||
236 | * is limited to the max size of lowmem, so that it doesn't | ||
237 | * get completely filled. | ||
238 | * | ||
239 | * In principle there could be a problem in lowmem systems if | ||
240 | * the initial memory is also very large with respect to | ||
241 | * lowmem, but we won't try to deal with that here. | ||
242 | */ | ||
243 | extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), | ||
244 | max_pfn + extra_pages); | ||
245 | |||
246 | if (extra_limit >= max_pfn) | ||
247 | extra_pages = extra_limit - max_pfn; | ||
248 | else | ||
249 | extra_pages = 0; | ||
250 | |||
251 | if (!xen_initial_domain()) | ||
252 | xen_add_extra_mem(extra_pages); | ||
139 | 253 | ||
140 | return "Xen"; | 254 | return "Xen"; |
141 | } | 255 | } |
@@ -260,7 +374,5 @@ void __init xen_arch_setup(void) | |||
260 | 374 | ||
261 | pm_idle = xen_idle; | 375 | pm_idle = xen_idle; |
262 | 376 | ||
263 | paravirt_disable_iospace(); | ||
264 | |||
265 | fiddle_vdso(); | 377 | fiddle_vdso(); |
266 | } | 378 | } |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 25f232b18a82..72a4c7959045 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/xen/interface.h> | 28 | #include <asm/xen/interface.h> |
29 | #include <asm/xen/hypercall.h> | 29 | #include <asm/xen/hypercall.h> |
30 | 30 | ||
31 | #include <xen/xen.h> | ||
31 | #include <xen/page.h> | 32 | #include <xen/page.h> |
32 | #include <xen/events.h> | 33 | #include <xen/events.h> |
33 | 34 | ||
@@ -156,11 +157,35 @@ static void __init xen_fill_possible_map(void) | |||
156 | { | 157 | { |
157 | int i, rc; | 158 | int i, rc; |
158 | 159 | ||
160 | if (xen_initial_domain()) | ||
161 | return; | ||
162 | |||
163 | for (i = 0; i < nr_cpu_ids; i++) { | ||
164 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | ||
165 | if (rc >= 0) { | ||
166 | num_processors++; | ||
167 | set_cpu_possible(i, true); | ||
168 | } | ||
169 | } | ||
170 | } | ||
171 | |||
172 | static void __init xen_filter_cpu_maps(void) | ||
173 | { | ||
174 | int i, rc; | ||
175 | |||
176 | if (!xen_initial_domain()) | ||
177 | return; | ||
178 | |||
179 | num_processors = 0; | ||
180 | disabled_cpus = 0; | ||
159 | for (i = 0; i < nr_cpu_ids; i++) { | 181 | for (i = 0; i < nr_cpu_ids; i++) { |
160 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | 182 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); |
161 | if (rc >= 0) { | 183 | if (rc >= 0) { |
162 | num_processors++; | 184 | num_processors++; |
163 | set_cpu_possible(i, true); | 185 | set_cpu_possible(i, true); |
186 | } else { | ||
187 | set_cpu_possible(i, false); | ||
188 | set_cpu_present(i, false); | ||
164 | } | 189 | } |
165 | } | 190 | } |
166 | } | 191 | } |
@@ -174,6 +199,7 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
174 | old memory can be recycled */ | 199 | old memory can be recycled */ |
175 | make_lowmem_page_readwrite(xen_initial_gdt); | 200 | make_lowmem_page_readwrite(xen_initial_gdt); |
176 | 201 | ||
202 | xen_filter_cpu_maps(); | ||
177 | xen_setup_vcpu_info_placement(); | 203 | xen_setup_vcpu_info_placement(); |
178 | } | 204 | } |
179 | 205 | ||
@@ -400,9 +426,9 @@ static void stop_self(void *v) | |||
400 | BUG(); | 426 | BUG(); |
401 | } | 427 | } |
402 | 428 | ||
403 | static void xen_smp_send_stop(void) | 429 | static void xen_stop_other_cpus(int wait) |
404 | { | 430 | { |
405 | smp_call_function(stop_self, NULL, 0); | 431 | smp_call_function(stop_self, NULL, wait); |
406 | } | 432 | } |
407 | 433 | ||
408 | static void xen_smp_send_reschedule(int cpu) | 434 | static void xen_smp_send_reschedule(int cpu) |
@@ -470,7 +496,7 @@ static const struct smp_ops xen_smp_ops __initdata = { | |||
470 | .cpu_disable = xen_cpu_disable, | 496 | .cpu_disable = xen_cpu_disable, |
471 | .play_dead = xen_play_dead, | 497 | .play_dead = xen_play_dead, |
472 | 498 | ||
473 | .smp_send_stop = xen_smp_send_stop, | 499 | .stop_other_cpus = xen_stop_other_cpus, |
474 | .smp_send_reschedule = xen_smp_send_reschedule, | 500 | .smp_send_reschedule = xen_smp_send_reschedule, |
475 | 501 | ||
476 | .send_call_func_ipi = xen_smp_send_call_function_ipi, | 502 | .send_call_func_ipi = xen_smp_send_call_function_ipi, |
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index e0500646585d..23e061b9327b 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -224,7 +224,7 @@ static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enab | |||
224 | goto out; | 224 | goto out; |
225 | } | 225 | } |
226 | 226 | ||
227 | flags = __raw_local_save_flags(); | 227 | flags = arch_local_save_flags(); |
228 | if (irq_enable) { | 228 | if (irq_enable) { |
229 | ADD_STATS(taken_slow_irqenable, 1); | 229 | ADD_STATS(taken_slow_irqenable, 1); |
230 | raw_local_irq_enable(); | 230 | raw_local_irq_enable(); |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 7c8ab86163e9..64044747348e 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -30,6 +30,9 @@ void xen_setup_machphys_mapping(void); | |||
30 | pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | 30 | pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); |
31 | void xen_ident_map_ISA(void); | 31 | void xen_ident_map_ISA(void); |
32 | void xen_reserve_top(void); | 32 | void xen_reserve_top(void); |
33 | extern unsigned long xen_max_p2m_pfn; | ||
34 | |||
35 | void xen_set_pat(u64); | ||
33 | 36 | ||
34 | char * __init xen_memory_setup(void); | 37 | char * __init xen_memory_setup(void); |
35 | void __init xen_arch_setup(void); | 38 | void __init xen_arch_setup(void); |