diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/x86/xen | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 16 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/xen/apic.c | 34 | ||||
-rw-r--r-- | arch/x86/xen/debugfs.c | 104 | ||||
-rw-r--r-- | arch/x86/xen/debugfs.h | 4 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 409 | ||||
-rw-r--r-- | arch/x86/xen/grant-table.c | 46 | ||||
-rw-r--r-- | arch/x86/xen/irq.c | 9 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 433 | ||||
-rw-r--r-- | arch/x86/xen/multicalls.h | 2 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 449 | ||||
-rw-r--r-- | arch/x86/xen/pci-swiotlb-xen.c | 56 | ||||
-rw-r--r-- | arch/x86/xen/platform-pci-unplug.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 442 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 169 | ||||
-rw-r--r-- | arch/x86/xen/smp.h | 12 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 39 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 16 | ||||
-rw-r--r-- | arch/x86/xen/vga.c | 7 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm.S | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_32.S | 6 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 56 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 11 |
24 files changed, 653 insertions, 1674 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 131dacd2748..5cc821cb2e0 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -6,9 +6,8 @@ config XEN | |||
6 | bool "Xen guest support" | 6 | bool "Xen guest support" |
7 | select PARAVIRT | 7 | select PARAVIRT |
8 | select PARAVIRT_CLOCK | 8 | select PARAVIRT_CLOCK |
9 | select XEN_HAVE_PVMMU | ||
10 | depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) | 9 | depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) |
11 | depends on X86_TSC | 10 | depends on X86_CMPXCHG && X86_TSC |
12 | help | 11 | help |
13 | This is the Linux Xen port. Enabling this will allow the | 12 | This is the Linux Xen port. Enabling this will allow the |
14 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
@@ -26,12 +25,12 @@ config XEN_PRIVILEGED_GUEST | |||
26 | 25 | ||
27 | config XEN_PVHVM | 26 | config XEN_PVHVM |
28 | def_bool y | 27 | def_bool y |
29 | depends on XEN && PCI && X86_LOCAL_APIC | 28 | depends on XEN |
29 | depends on X86_LOCAL_APIC | ||
30 | 30 | ||
31 | config XEN_MAX_DOMAIN_MEMORY | 31 | config XEN_MAX_DOMAIN_MEMORY |
32 | int | 32 | int |
33 | default 500 if X86_64 | 33 | default 128 |
34 | default 64 if X86_32 | ||
35 | depends on XEN | 34 | depends on XEN |
36 | help | 35 | help |
37 | This only affects the sizing of some bss arrays, the unused | 36 | This only affects the sizing of some bss arrays, the unused |
@@ -51,3 +50,10 @@ config XEN_DEBUG_FS | |||
51 | Enable statistics output and various tuning options in debugfs. | 50 | Enable statistics output and various tuning options in debugfs. |
52 | Enabling this option may incur a significant performance overhead. | 51 | Enabling this option may incur a significant performance overhead. |
53 | 52 | ||
53 | config XEN_DEBUG | ||
54 | bool "Enable Xen debug checks" | ||
55 | depends on XEN | ||
56 | default n | ||
57 | help | ||
58 | Enable various WARN_ON checks in the Xen MMU code. | ||
59 | Enabling this option WILL incur a significant performance overhead. | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 96ab2c09cb6..add2c2d729c 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -20,5 +20,5 @@ obj-$(CONFIG_EVENT_TRACING) += trace.o | |||
20 | obj-$(CONFIG_SMP) += smp.o | 20 | obj-$(CONFIG_SMP) += smp.o |
21 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 21 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
22 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o | 22 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o |
23 | obj-$(CONFIG_XEN_DOM0) += apic.o vga.o | 23 | obj-$(CONFIG_XEN_DOM0) += vga.o |
24 | obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o | 24 | obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o |
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c deleted file mode 100644 index 7005ced5d1a..00000000000 --- a/arch/x86/xen/apic.c +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | |||
3 | #include <asm/x86_init.h> | ||
4 | #include <asm/apic.h> | ||
5 | #include <asm/xen/hypercall.h> | ||
6 | |||
7 | #include <xen/xen.h> | ||
8 | #include <xen/interface/physdev.h> | ||
9 | #include "xen-ops.h" | ||
10 | |||
11 | static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) | ||
12 | { | ||
13 | struct physdev_apic apic_op; | ||
14 | int ret; | ||
15 | |||
16 | apic_op.apic_physbase = mpc_ioapic_addr(apic); | ||
17 | apic_op.reg = reg; | ||
18 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); | ||
19 | if (!ret) | ||
20 | return apic_op.value; | ||
21 | |||
22 | /* fallback to return an emulated IO_APIC values */ | ||
23 | if (reg == 0x1) | ||
24 | return 0x00170020; | ||
25 | else if (reg == 0x0) | ||
26 | return apic << 24; | ||
27 | |||
28 | return 0xfd; | ||
29 | } | ||
30 | |||
31 | void __init xen_init_apic(void) | ||
32 | { | ||
33 | x86_io_apic_ops.read = xen_io_apic_read; | ||
34 | } | ||
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index c8377fb26cd..7c0fedd98ea 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c | |||
@@ -19,3 +19,107 @@ struct dentry * __init xen_init_debugfs(void) | |||
19 | return d_xen_debug; | 19 | return d_xen_debug; |
20 | } | 20 | } |
21 | 21 | ||
22 | struct array_data | ||
23 | { | ||
24 | void *array; | ||
25 | unsigned elements; | ||
26 | }; | ||
27 | |||
28 | static int u32_array_open(struct inode *inode, struct file *file) | ||
29 | { | ||
30 | file->private_data = NULL; | ||
31 | return nonseekable_open(inode, file); | ||
32 | } | ||
33 | |||
34 | static size_t format_array(char *buf, size_t bufsize, const char *fmt, | ||
35 | u32 *array, unsigned array_size) | ||
36 | { | ||
37 | size_t ret = 0; | ||
38 | unsigned i; | ||
39 | |||
40 | for(i = 0; i < array_size; i++) { | ||
41 | size_t len; | ||
42 | |||
43 | len = snprintf(buf, bufsize, fmt, array[i]); | ||
44 | len++; /* ' ' or '\n' */ | ||
45 | ret += len; | ||
46 | |||
47 | if (buf) { | ||
48 | buf += len; | ||
49 | bufsize -= len; | ||
50 | buf[-1] = (i == array_size-1) ? '\n' : ' '; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | ret++; /* \0 */ | ||
55 | if (buf) | ||
56 | *buf = '\0'; | ||
57 | |||
58 | return ret; | ||
59 | } | ||
60 | |||
61 | static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size) | ||
62 | { | ||
63 | size_t len = format_array(NULL, 0, fmt, array, array_size); | ||
64 | char *ret; | ||
65 | |||
66 | ret = kmalloc(len, GFP_KERNEL); | ||
67 | if (ret == NULL) | ||
68 | return NULL; | ||
69 | |||
70 | format_array(ret, len, fmt, array, array_size); | ||
71 | return ret; | ||
72 | } | ||
73 | |||
74 | static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, | ||
75 | loff_t *ppos) | ||
76 | { | ||
77 | struct inode *inode = file->f_path.dentry->d_inode; | ||
78 | struct array_data *data = inode->i_private; | ||
79 | size_t size; | ||
80 | |||
81 | if (*ppos == 0) { | ||
82 | if (file->private_data) { | ||
83 | kfree(file->private_data); | ||
84 | file->private_data = NULL; | ||
85 | } | ||
86 | |||
87 | file->private_data = format_array_alloc("%u", data->array, data->elements); | ||
88 | } | ||
89 | |||
90 | size = 0; | ||
91 | if (file->private_data) | ||
92 | size = strlen(file->private_data); | ||
93 | |||
94 | return simple_read_from_buffer(buf, len, ppos, file->private_data, size); | ||
95 | } | ||
96 | |||
97 | static int xen_array_release(struct inode *inode, struct file *file) | ||
98 | { | ||
99 | kfree(file->private_data); | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static const struct file_operations u32_array_fops = { | ||
105 | .owner = THIS_MODULE, | ||
106 | .open = u32_array_open, | ||
107 | .release= xen_array_release, | ||
108 | .read = u32_array_read, | ||
109 | .llseek = no_llseek, | ||
110 | }; | ||
111 | |||
112 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | ||
113 | struct dentry *parent, | ||
114 | u32 *array, unsigned elements) | ||
115 | { | ||
116 | struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); | ||
117 | |||
118 | if (data == NULL) | ||
119 | return NULL; | ||
120 | |||
121 | data->array = array; | ||
122 | data->elements = elements; | ||
123 | |||
124 | return debugfs_create_file(name, mode, parent, data, &u32_array_fops); | ||
125 | } | ||
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h index 12ebf3325c7..e2813208483 100644 --- a/arch/x86/xen/debugfs.h +++ b/arch/x86/xen/debugfs.h | |||
@@ -3,4 +3,8 @@ | |||
3 | 3 | ||
4 | struct dentry * __init xen_init_debugfs(void); | 4 | struct dentry * __init xen_init_debugfs(void); |
5 | 5 | ||
6 | struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, | ||
7 | struct dentry *parent, | ||
8 | u32 *array, unsigned elements); | ||
9 | |||
6 | #endif /* _XEN_DEBUGFS_H */ | 10 | #endif /* _XEN_DEBUGFS_H */ |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 138e5667409..46c8069ae98 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -33,18 +33,15 @@ | |||
33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
34 | 34 | ||
35 | #include <xen/xen.h> | 35 | #include <xen/xen.h> |
36 | #include <xen/events.h> | ||
37 | #include <xen/interface/xen.h> | 36 | #include <xen/interface/xen.h> |
38 | #include <xen/interface/version.h> | 37 | #include <xen/interface/version.h> |
39 | #include <xen/interface/physdev.h> | 38 | #include <xen/interface/physdev.h> |
40 | #include <xen/interface/vcpu.h> | 39 | #include <xen/interface/vcpu.h> |
41 | #include <xen/interface/memory.h> | 40 | #include <xen/interface/memory.h> |
42 | #include <xen/interface/xen-mca.h> | ||
43 | #include <xen/features.h> | 41 | #include <xen/features.h> |
44 | #include <xen/page.h> | 42 | #include <xen/page.h> |
45 | #include <xen/hvm.h> | 43 | #include <xen/hvm.h> |
46 | #include <xen/hvc-console.h> | 44 | #include <xen/hvc-console.h> |
47 | #include <xen/acpi.h> | ||
48 | 45 | ||
49 | #include <asm/paravirt.h> | 46 | #include <asm/paravirt.h> |
50 | #include <asm/apic.h> | 47 | #include <asm/apic.h> |
@@ -65,20 +62,9 @@ | |||
65 | #include <asm/reboot.h> | 62 | #include <asm/reboot.h> |
66 | #include <asm/stackprotector.h> | 63 | #include <asm/stackprotector.h> |
67 | #include <asm/hypervisor.h> | 64 | #include <asm/hypervisor.h> |
68 | #include <asm/mwait.h> | ||
69 | #include <asm/pci_x86.h> | ||
70 | |||
71 | #ifdef CONFIG_ACPI | ||
72 | #include <linux/acpi.h> | ||
73 | #include <asm/acpi.h> | ||
74 | #include <acpi/pdc_intel.h> | ||
75 | #include <acpi/processor.h> | ||
76 | #include <xen/interface/platform.h> | ||
77 | #endif | ||
78 | 65 | ||
79 | #include "xen-ops.h" | 66 | #include "xen-ops.h" |
80 | #include "mmu.h" | 67 | #include "mmu.h" |
81 | #include "smp.h" | ||
82 | #include "multicalls.h" | 68 | #include "multicalls.h" |
83 | 69 | ||
84 | EXPORT_SYMBOL_GPL(hypercall_page); | 70 | EXPORT_SYMBOL_GPL(hypercall_page); |
@@ -109,7 +95,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); | |||
109 | * Point at some empty memory to start with. We map the real shared_info | 95 | * Point at some empty memory to start with. We map the real shared_info |
110 | * page as soon as fixmap is up and running. | 96 | * page as soon as fixmap is up and running. |
111 | */ | 97 | */ |
112 | struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; | 98 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; |
113 | 99 | ||
114 | /* | 100 | /* |
115 | * Flag to determine whether vcpu info placement is available on all | 101 | * Flag to determine whether vcpu info placement is available on all |
@@ -126,19 +112,6 @@ struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; | |||
126 | */ | 112 | */ |
127 | static int have_vcpu_info_placement = 1; | 113 | static int have_vcpu_info_placement = 1; |
128 | 114 | ||
129 | struct tls_descs { | ||
130 | struct desc_struct desc[3]; | ||
131 | }; | ||
132 | |||
133 | /* | ||
134 | * Updating the 3 TLS descriptors in the GDT on every task switch is | ||
135 | * surprisingly expensive so we avoid updating them if they haven't | ||
136 | * changed. Since Xen writes different descriptors than the one | ||
137 | * passed in the update_descriptor hypercall we keep shadow copies to | ||
138 | * compare against. | ||
139 | */ | ||
140 | static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); | ||
141 | |||
142 | static void clamp_max_cpus(void) | 115 | static void clamp_max_cpus(void) |
143 | { | 116 | { |
144 | #ifdef CONFIG_SMP | 117 | #ifdef CONFIG_SMP |
@@ -193,11 +166,10 @@ void xen_vcpu_restore(void) | |||
193 | { | 166 | { |
194 | int cpu; | 167 | int cpu; |
195 | 168 | ||
196 | for_each_possible_cpu(cpu) { | 169 | for_each_online_cpu(cpu) { |
197 | bool other_cpu = (cpu != smp_processor_id()); | 170 | bool other_cpu = (cpu != smp_processor_id()); |
198 | bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL); | ||
199 | 171 | ||
200 | if (other_cpu && is_up && | 172 | if (other_cpu && |
201 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) | 173 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) |
202 | BUG(); | 174 | BUG(); |
203 | 175 | ||
@@ -206,7 +178,7 @@ void xen_vcpu_restore(void) | |||
206 | if (have_vcpu_info_placement) | 178 | if (have_vcpu_info_placement) |
207 | xen_vcpu_setup(cpu); | 179 | xen_vcpu_setup(cpu); |
208 | 180 | ||
209 | if (other_cpu && is_up && | 181 | if (other_cpu && |
210 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) | 182 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) |
211 | BUG(); | 183 | BUG(); |
212 | } | 184 | } |
@@ -224,39 +196,17 @@ static void __init xen_banner(void) | |||
224 | version >> 16, version & 0xffff, extra.extraversion, | 196 | version >> 16, version & 0xffff, extra.extraversion, |
225 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | 197 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); |
226 | } | 198 | } |
227 | /* Check if running on Xen version (major, minor) or later */ | ||
228 | bool | ||
229 | xen_running_on_version_or_later(unsigned int major, unsigned int minor) | ||
230 | { | ||
231 | unsigned int version; | ||
232 | |||
233 | if (!xen_domain()) | ||
234 | return false; | ||
235 | |||
236 | version = HYPERVISOR_xen_version(XENVER_version, NULL); | ||
237 | if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) || | ||
238 | ((version >> 16) > major)) | ||
239 | return true; | ||
240 | return false; | ||
241 | } | ||
242 | |||
243 | #define CPUID_THERM_POWER_LEAF 6 | ||
244 | #define APERFMPERF_PRESENT 0 | ||
245 | 199 | ||
246 | static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; | 200 | static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; |
247 | static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; | 201 | static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; |
248 | 202 | ||
249 | static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask; | ||
250 | static __read_mostly unsigned int cpuid_leaf5_ecx_val; | ||
251 | static __read_mostly unsigned int cpuid_leaf5_edx_val; | ||
252 | |||
253 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, | 203 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, |
254 | unsigned int *cx, unsigned int *dx) | 204 | unsigned int *cx, unsigned int *dx) |
255 | { | 205 | { |
256 | unsigned maskebx = ~0; | 206 | unsigned maskebx = ~0; |
257 | unsigned maskecx = ~0; | 207 | unsigned maskecx = ~0; |
258 | unsigned maskedx = ~0; | 208 | unsigned maskedx = ~0; |
259 | unsigned setecx = 0; | 209 | |
260 | /* | 210 | /* |
261 | * Mask out inconvenient features, to try and disable as many | 211 | * Mask out inconvenient features, to try and disable as many |
262 | * unsupported kernel subsystems as possible. | 212 | * unsupported kernel subsystems as possible. |
@@ -264,23 +214,9 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
264 | switch (*ax) { | 214 | switch (*ax) { |
265 | case 1: | 215 | case 1: |
266 | maskecx = cpuid_leaf1_ecx_mask; | 216 | maskecx = cpuid_leaf1_ecx_mask; |
267 | setecx = cpuid_leaf1_ecx_set_mask; | ||
268 | maskedx = cpuid_leaf1_edx_mask; | 217 | maskedx = cpuid_leaf1_edx_mask; |
269 | break; | 218 | break; |
270 | 219 | ||
271 | case CPUID_MWAIT_LEAF: | ||
272 | /* Synthesize the values.. */ | ||
273 | *ax = 0; | ||
274 | *bx = 0; | ||
275 | *cx = cpuid_leaf5_ecx_val; | ||
276 | *dx = cpuid_leaf5_edx_val; | ||
277 | return; | ||
278 | |||
279 | case CPUID_THERM_POWER_LEAF: | ||
280 | /* Disabling APERFMPERF for kernel usage */ | ||
281 | maskecx = ~(1 << APERFMPERF_PRESENT); | ||
282 | break; | ||
283 | |||
284 | case 0xb: | 220 | case 0xb: |
285 | /* Suppress extended topology stuff */ | 221 | /* Suppress extended topology stuff */ |
286 | maskebx = 0; | 222 | maskebx = 0; |
@@ -296,89 +232,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
296 | 232 | ||
297 | *bx &= maskebx; | 233 | *bx &= maskebx; |
298 | *cx &= maskecx; | 234 | *cx &= maskecx; |
299 | *cx |= setecx; | ||
300 | *dx &= maskedx; | 235 | *dx &= maskedx; |
301 | |||
302 | } | 236 | } |
303 | 237 | ||
304 | static bool __init xen_check_mwait(void) | ||
305 | { | ||
306 | #ifdef CONFIG_ACPI | ||
307 | struct xen_platform_op op = { | ||
308 | .cmd = XENPF_set_processor_pminfo, | ||
309 | .u.set_pminfo.id = -1, | ||
310 | .u.set_pminfo.type = XEN_PM_PDC, | ||
311 | }; | ||
312 | uint32_t buf[3]; | ||
313 | unsigned int ax, bx, cx, dx; | ||
314 | unsigned int mwait_mask; | ||
315 | |||
316 | /* We need to determine whether it is OK to expose the MWAIT | ||
317 | * capability to the kernel to harvest deeper than C3 states from ACPI | ||
318 | * _CST using the processor_harvest_xen.c module. For this to work, we | ||
319 | * need to gather the MWAIT_LEAF values (which the cstate.c code | ||
320 | * checks against). The hypervisor won't expose the MWAIT flag because | ||
321 | * it would break backwards compatibility; so we will find out directly | ||
322 | * from the hardware and hypercall. | ||
323 | */ | ||
324 | if (!xen_initial_domain()) | ||
325 | return false; | ||
326 | |||
327 | /* | ||
328 | * When running under platform earlier than Xen4.2, do not expose | ||
329 | * mwait, to avoid the risk of loading native acpi pad driver | ||
330 | */ | ||
331 | if (!xen_running_on_version_or_later(4, 2)) | ||
332 | return false; | ||
333 | |||
334 | ax = 1; | ||
335 | cx = 0; | ||
336 | |||
337 | native_cpuid(&ax, &bx, &cx, &dx); | ||
338 | |||
339 | mwait_mask = (1 << (X86_FEATURE_EST % 32)) | | ||
340 | (1 << (X86_FEATURE_MWAIT % 32)); | ||
341 | |||
342 | if ((cx & mwait_mask) != mwait_mask) | ||
343 | return false; | ||
344 | |||
345 | /* We need to emulate the MWAIT_LEAF and for that we need both | ||
346 | * ecx and edx. The hypercall provides only partial information. | ||
347 | */ | ||
348 | |||
349 | ax = CPUID_MWAIT_LEAF; | ||
350 | bx = 0; | ||
351 | cx = 0; | ||
352 | dx = 0; | ||
353 | |||
354 | native_cpuid(&ax, &bx, &cx, &dx); | ||
355 | |||
356 | /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so, | ||
357 | * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3. | ||
358 | */ | ||
359 | buf[0] = ACPI_PDC_REVISION_ID; | ||
360 | buf[1] = 1; | ||
361 | buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP); | ||
362 | |||
363 | set_xen_guest_handle(op.u.set_pminfo.pdc, buf); | ||
364 | |||
365 | if ((HYPERVISOR_dom0_op(&op) == 0) && | ||
366 | (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { | ||
367 | cpuid_leaf5_ecx_val = cx; | ||
368 | cpuid_leaf5_edx_val = dx; | ||
369 | } | ||
370 | return true; | ||
371 | #else | ||
372 | return false; | ||
373 | #endif | ||
374 | } | ||
375 | static void __init xen_init_cpuid_mask(void) | 238 | static void __init xen_init_cpuid_mask(void) |
376 | { | 239 | { |
377 | unsigned int ax, bx, cx, dx; | 240 | unsigned int ax, bx, cx, dx; |
378 | unsigned int xsave_mask; | 241 | unsigned int xsave_mask; |
379 | 242 | ||
380 | cpuid_leaf1_edx_mask = | 243 | cpuid_leaf1_edx_mask = |
381 | ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ | 244 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ |
245 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | ||
246 | (1 << X86_FEATURE_MTRR) | /* disable MTRR */ | ||
382 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 247 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
383 | 248 | ||
384 | if (!xen_initial_domain()) | 249 | if (!xen_initial_domain()) |
@@ -386,7 +251,6 @@ static void __init xen_init_cpuid_mask(void) | |||
386 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ | 251 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ |
387 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ | 252 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ |
388 | ax = 1; | 253 | ax = 1; |
389 | cx = 0; | ||
390 | xen_cpuid(&ax, &bx, &cx, &dx); | 254 | xen_cpuid(&ax, &bx, &cx, &dx); |
391 | 255 | ||
392 | xsave_mask = | 256 | xsave_mask = |
@@ -396,8 +260,6 @@ static void __init xen_init_cpuid_mask(void) | |||
396 | /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ | 260 | /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ |
397 | if ((cx & xsave_mask) != xsave_mask) | 261 | if ((cx & xsave_mask) != xsave_mask) |
398 | cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ | 262 | cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ |
399 | if (xen_check_mwait()) | ||
400 | cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); | ||
401 | } | 263 | } |
402 | 264 | ||
403 | static void xen_set_debugreg(int reg, unsigned long val) | 265 | static void xen_set_debugreg(int reg, unsigned long val) |
@@ -575,28 +437,12 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) | |||
575 | BUG(); | 437 | BUG(); |
576 | } | 438 | } |
577 | 439 | ||
578 | static inline bool desc_equal(const struct desc_struct *d1, | ||
579 | const struct desc_struct *d2) | ||
580 | { | ||
581 | return d1->a == d2->a && d1->b == d2->b; | ||
582 | } | ||
583 | |||
584 | static void load_TLS_descriptor(struct thread_struct *t, | 440 | static void load_TLS_descriptor(struct thread_struct *t, |
585 | unsigned int cpu, unsigned int i) | 441 | unsigned int cpu, unsigned int i) |
586 | { | 442 | { |
587 | struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i]; | 443 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); |
588 | struct desc_struct *gdt; | 444 | xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); |
589 | xmaddr_t maddr; | 445 | struct multicall_space mc = __xen_mc_entry(0); |
590 | struct multicall_space mc; | ||
591 | |||
592 | if (desc_equal(shadow, &t->tls_array[i])) | ||
593 | return; | ||
594 | |||
595 | *shadow = t->tls_array[i]; | ||
596 | |||
597 | gdt = get_cpu_gdt_table(cpu); | ||
598 | maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | ||
599 | mc = __xen_mc_entry(0); | ||
600 | 446 | ||
601 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); | 447 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); |
602 | } | 448 | } |
@@ -678,8 +524,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
678 | /* | 524 | /* |
679 | * Look for known traps using IST, and substitute them | 525 | * Look for known traps using IST, and substitute them |
680 | * appropriately. The debugger ones are the only ones we care | 526 | * appropriately. The debugger ones are the only ones we care |
681 | * about. Xen will handle faults like double_fault, | 527 | * about. Xen will handle faults like double_fault and |
682 | * so we should never see them. Warn if | 528 | * machine_check, so we should never see them. Warn if |
683 | * there's an unexpected IST-using fault handler. | 529 | * there's an unexpected IST-using fault handler. |
684 | */ | 530 | */ |
685 | if (addr == (unsigned long)debug) | 531 | if (addr == (unsigned long)debug) |
@@ -694,11 +540,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
694 | return 0; | 540 | return 0; |
695 | #ifdef CONFIG_X86_MCE | 541 | #ifdef CONFIG_X86_MCE |
696 | } else if (addr == (unsigned long)machine_check) { | 542 | } else if (addr == (unsigned long)machine_check) { |
697 | /* | 543 | return 0; |
698 | * when xen hypervisor inject vMCE to guest, | ||
699 | * use native mce handler to handle it | ||
700 | */ | ||
701 | ; | ||
702 | #endif | 544 | #endif |
703 | } else { | 545 | } else { |
704 | /* Some other trap using IST? */ | 546 | /* Some other trap using IST? */ |
@@ -875,40 +717,9 @@ static void xen_io_delay(void) | |||
875 | } | 717 | } |
876 | 718 | ||
877 | #ifdef CONFIG_X86_LOCAL_APIC | 719 | #ifdef CONFIG_X86_LOCAL_APIC |
878 | static unsigned long xen_set_apic_id(unsigned int x) | ||
879 | { | ||
880 | WARN_ON(1); | ||
881 | return x; | ||
882 | } | ||
883 | static unsigned int xen_get_apic_id(unsigned long x) | ||
884 | { | ||
885 | return ((x)>>24) & 0xFFu; | ||
886 | } | ||
887 | static u32 xen_apic_read(u32 reg) | 720 | static u32 xen_apic_read(u32 reg) |
888 | { | 721 | { |
889 | struct xen_platform_op op = { | 722 | return 0; |
890 | .cmd = XENPF_get_cpuinfo, | ||
891 | .interface_version = XENPF_INTERFACE_VERSION, | ||
892 | .u.pcpu_info.xen_cpuid = 0, | ||
893 | }; | ||
894 | int ret = 0; | ||
895 | |||
896 | /* Shouldn't need this as APIC is turned off for PV, and we only | ||
897 | * get called on the bootup processor. But just in case. */ | ||
898 | if (!xen_initial_domain() || smp_processor_id()) | ||
899 | return 0; | ||
900 | |||
901 | if (reg == APIC_LVR) | ||
902 | return 0x10; | ||
903 | |||
904 | if (reg != APIC_ID) | ||
905 | return 0; | ||
906 | |||
907 | ret = HYPERVISOR_dom0_op(&op); | ||
908 | if (ret) | ||
909 | return 0; | ||
910 | |||
911 | return op.u.pcpu_info.apic_id << 24; | ||
912 | } | 723 | } |
913 | 724 | ||
914 | static void xen_apic_write(u32 reg, u32 val) | 725 | static void xen_apic_write(u32 reg, u32 val) |
@@ -946,16 +757,6 @@ static void set_xen_basic_apic_ops(void) | |||
946 | apic->icr_write = xen_apic_icr_write; | 757 | apic->icr_write = xen_apic_icr_write; |
947 | apic->wait_icr_idle = xen_apic_wait_icr_idle; | 758 | apic->wait_icr_idle = xen_apic_wait_icr_idle; |
948 | apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; | 759 | apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; |
949 | apic->set_apic_id = xen_set_apic_id; | ||
950 | apic->get_apic_id = xen_get_apic_id; | ||
951 | |||
952 | #ifdef CONFIG_SMP | ||
953 | apic->send_IPI_allbutself = xen_send_IPI_allbutself; | ||
954 | apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself; | ||
955 | apic->send_IPI_mask = xen_send_IPI_mask; | ||
956 | apic->send_IPI_all = xen_send_IPI_all; | ||
957 | apic->send_IPI_self = xen_send_IPI_self; | ||
958 | #endif | ||
959 | } | 760 | } |
960 | 761 | ||
961 | #endif | 762 | #endif |
@@ -975,11 +776,11 @@ static DEFINE_PER_CPU(unsigned long, xen_cr0_value); | |||
975 | 776 | ||
976 | static unsigned long xen_read_cr0(void) | 777 | static unsigned long xen_read_cr0(void) |
977 | { | 778 | { |
978 | unsigned long cr0 = this_cpu_read(xen_cr0_value); | 779 | unsigned long cr0 = percpu_read(xen_cr0_value); |
979 | 780 | ||
980 | if (unlikely(cr0 == 0)) { | 781 | if (unlikely(cr0 == 0)) { |
981 | cr0 = native_read_cr0(); | 782 | cr0 = native_read_cr0(); |
982 | this_cpu_write(xen_cr0_value, cr0); | 783 | percpu_write(xen_cr0_value, cr0); |
983 | } | 784 | } |
984 | 785 | ||
985 | return cr0; | 786 | return cr0; |
@@ -989,7 +790,7 @@ static void xen_write_cr0(unsigned long cr0) | |||
989 | { | 790 | { |
990 | struct multicall_space mcs; | 791 | struct multicall_space mcs; |
991 | 792 | ||
992 | this_cpu_write(xen_cr0_value, cr0); | 793 | percpu_write(xen_cr0_value, cr0); |
993 | 794 | ||
994 | /* Only pay attention to cr0.TS; everything else is | 795 | /* Only pay attention to cr0.TS; everything else is |
995 | ignored. */ | 796 | ignored. */ |
@@ -1007,16 +808,7 @@ static void xen_write_cr4(unsigned long cr4) | |||
1007 | 808 | ||
1008 | native_write_cr4(cr4); | 809 | native_write_cr4(cr4); |
1009 | } | 810 | } |
1010 | #ifdef CONFIG_X86_64 | 811 | |
1011 | static inline unsigned long xen_read_cr8(void) | ||
1012 | { | ||
1013 | return 0; | ||
1014 | } | ||
1015 | static inline void xen_write_cr8(unsigned long val) | ||
1016 | { | ||
1017 | BUG_ON(val); | ||
1018 | } | ||
1019 | #endif | ||
1020 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | 812 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) |
1021 | { | 813 | { |
1022 | int ret; | 814 | int ret; |
@@ -1083,7 +875,7 @@ void xen_setup_shared_info(void) | |||
1083 | xen_setup_mfn_list_list(); | 875 | xen_setup_mfn_list_list(); |
1084 | } | 876 | } |
1085 | 877 | ||
1086 | /* This is called once we have the cpu_possible_mask */ | 878 | /* This is called once we have the cpu_possible_map */ |
1087 | void xen_setup_vcpu_info_placement(void) | 879 | void xen_setup_vcpu_info_placement(void) |
1088 | { | 880 | { |
1089 | int cpu; | 881 | int cpu; |
@@ -1185,21 +977,13 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { | |||
1185 | .read_cr4_safe = native_read_cr4_safe, | 977 | .read_cr4_safe = native_read_cr4_safe, |
1186 | .write_cr4 = xen_write_cr4, | 978 | .write_cr4 = xen_write_cr4, |
1187 | 979 | ||
1188 | #ifdef CONFIG_X86_64 | ||
1189 | .read_cr8 = xen_read_cr8, | ||
1190 | .write_cr8 = xen_write_cr8, | ||
1191 | #endif | ||
1192 | |||
1193 | .wbinvd = native_wbinvd, | 980 | .wbinvd = native_wbinvd, |
1194 | 981 | ||
1195 | .read_msr = native_read_msr_safe, | 982 | .read_msr = native_read_msr_safe, |
1196 | .write_msr = xen_write_msr_safe, | 983 | .write_msr = xen_write_msr_safe, |
1197 | |||
1198 | .read_tsc = native_read_tsc, | 984 | .read_tsc = native_read_tsc, |
1199 | .read_pmc = native_read_pmc, | 985 | .read_pmc = native_read_pmc, |
1200 | 986 | ||
1201 | .read_tscp = native_read_tscp, | ||
1202 | |||
1203 | .iret = xen_iret, | 987 | .iret = xen_iret, |
1204 | .irq_enable_sysexit = xen_sysexit, | 988 | .irq_enable_sysexit = xen_sysexit, |
1205 | #ifdef CONFIG_X86_64 | 989 | #ifdef CONFIG_X86_64 |
@@ -1327,6 +1111,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1327 | { | 1111 | { |
1328 | struct physdev_set_iopl set_iopl; | 1112 | struct physdev_set_iopl set_iopl; |
1329 | int rc; | 1113 | int rc; |
1114 | pgd_t *pgd; | ||
1330 | 1115 | ||
1331 | if (!xen_start_info) | 1116 | if (!xen_start_info) |
1332 | return; | 1117 | return; |
@@ -1355,9 +1140,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1355 | 1140 | ||
1356 | /* Prevent unwanted bits from being set in PTEs. */ | 1141 | /* Prevent unwanted bits from being set in PTEs. */ |
1357 | __supported_pte_mask &= ~_PAGE_GLOBAL; | 1142 | __supported_pte_mask &= ~_PAGE_GLOBAL; |
1358 | #if 0 | ||
1359 | if (!xen_initial_domain()) | 1143 | if (!xen_initial_domain()) |
1360 | #endif | ||
1361 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | 1144 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); |
1362 | 1145 | ||
1363 | __supported_pte_mask |= _PAGE_IOMAP; | 1146 | __supported_pte_mask |= _PAGE_IOMAP; |
@@ -1418,6 +1201,12 @@ asmlinkage void __init xen_start_kernel(void) | |||
1418 | acpi_numa = -1; | 1201 | acpi_numa = -1; |
1419 | #endif | 1202 | #endif |
1420 | 1203 | ||
1204 | pgd = (pgd_t *)xen_start_info->pt_base; | ||
1205 | |||
1206 | if (!xen_initial_domain()) | ||
1207 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | ||
1208 | |||
1209 | __supported_pte_mask |= _PAGE_IOMAP; | ||
1421 | /* Don't do the full vcpu_info placement stuff until we have a | 1210 | /* Don't do the full vcpu_info placement stuff until we have a |
1422 | possible map and a non-dummy shared_info. */ | 1211 | possible map and a non-dummy shared_info. */ |
1423 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | 1212 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; |
@@ -1425,8 +1214,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1425 | local_irq_disable(); | 1214 | local_irq_disable(); |
1426 | early_boot_irqs_disabled = true; | 1215 | early_boot_irqs_disabled = true; |
1427 | 1216 | ||
1217 | memblock_init(); | ||
1218 | |||
1428 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1219 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1429 | xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); | 1220 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); |
1221 | xen_ident_map_ISA(); | ||
1430 | 1222 | ||
1431 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | 1223 | /* Allocate and initialize top and mid mfn levels for p2m structure */ |
1432 | xen_build_mfn_list_list(); | 1224 | xen_build_mfn_list_list(); |
@@ -1477,34 +1269,16 @@ asmlinkage void __init xen_start_kernel(void) | |||
1477 | const struct dom0_vga_console_info *info = | 1269 | const struct dom0_vga_console_info *info = |
1478 | (void *)((char *)xen_start_info + | 1270 | (void *)((char *)xen_start_info + |
1479 | xen_start_info->console.dom0.info_off); | 1271 | xen_start_info->console.dom0.info_off); |
1480 | struct xen_platform_op op = { | ||
1481 | .cmd = XENPF_firmware_info, | ||
1482 | .interface_version = XENPF_INTERFACE_VERSION, | ||
1483 | .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, | ||
1484 | }; | ||
1485 | 1272 | ||
1486 | xen_init_vga(info, xen_start_info->console.dom0.info_size); | 1273 | xen_init_vga(info, xen_start_info->console.dom0.info_size); |
1487 | xen_start_info->console.domU.mfn = 0; | 1274 | xen_start_info->console.domU.mfn = 0; |
1488 | xen_start_info->console.domU.evtchn = 0; | 1275 | xen_start_info->console.domU.evtchn = 0; |
1489 | 1276 | ||
1490 | if (HYPERVISOR_dom0_op(&op) == 0) | ||
1491 | boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; | ||
1492 | |||
1493 | xen_init_apic(); | ||
1494 | |||
1495 | /* Make sure ACS will be enabled */ | 1277 | /* Make sure ACS will be enabled */ |
1496 | pci_request_acs(); | 1278 | pci_request_acs(); |
1497 | |||
1498 | xen_acpi_sleep_register(); | ||
1499 | |||
1500 | /* Avoid searching for BIOS MP tables */ | ||
1501 | x86_init.mpparse.find_smp_config = x86_init_noop; | ||
1502 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | ||
1503 | } | 1279 | } |
1504 | #ifdef CONFIG_PCI | 1280 | |
1505 | /* PCI BIOS service won't work from a PV guest. */ | 1281 | |
1506 | pci_probe &= ~PCI_PROBE_BIOS; | ||
1507 | #endif | ||
1508 | xen_raw_console_write("about to get started...\n"); | 1282 | xen_raw_console_write("about to get started...\n"); |
1509 | 1283 | ||
1510 | xen_setup_runstate_info(0); | 1284 | xen_setup_runstate_info(0); |
@@ -1517,84 +1291,64 @@ asmlinkage void __init xen_start_kernel(void) | |||
1517 | #endif | 1291 | #endif |
1518 | } | 1292 | } |
1519 | 1293 | ||
1520 | #ifdef CONFIG_XEN_PVHVM | 1294 | static int init_hvm_pv_info(int *major, int *minor) |
1521 | #define HVM_SHARED_INFO_ADDR 0xFE700000UL | 1295 | { |
1522 | static struct shared_info *xen_hvm_shared_info; | 1296 | uint32_t eax, ebx, ecx, edx, pages, msr, base; |
1523 | static unsigned long xen_hvm_sip_phys; | 1297 | u64 pfn; |
1524 | static int xen_major, xen_minor; | 1298 | |
1299 | base = xen_cpuid_base(); | ||
1300 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | ||
1301 | |||
1302 | *major = eax >> 16; | ||
1303 | *minor = eax & 0xffff; | ||
1304 | printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); | ||
1305 | |||
1306 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | ||
1307 | |||
1308 | pfn = __pa(hypercall_page); | ||
1309 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | ||
1310 | |||
1311 | xen_setup_features(); | ||
1312 | |||
1313 | pv_info.name = "Xen HVM"; | ||
1314 | |||
1315 | xen_domain_type = XEN_HVM_DOMAIN; | ||
1316 | |||
1317 | return 0; | ||
1318 | } | ||
1525 | 1319 | ||
1526 | static void xen_hvm_connect_shared_info(unsigned long pfn) | 1320 | void __ref xen_hvm_init_shared_info(void) |
1527 | { | 1321 | { |
1322 | int cpu; | ||
1528 | struct xen_add_to_physmap xatp; | 1323 | struct xen_add_to_physmap xatp; |
1324 | static struct shared_info *shared_info_page = 0; | ||
1529 | 1325 | ||
1326 | if (!shared_info_page) | ||
1327 | shared_info_page = (struct shared_info *) | ||
1328 | extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1530 | xatp.domid = DOMID_SELF; | 1329 | xatp.domid = DOMID_SELF; |
1531 | xatp.idx = 0; | 1330 | xatp.idx = 0; |
1532 | xatp.space = XENMAPSPACE_shared_info; | 1331 | xatp.space = XENMAPSPACE_shared_info; |
1533 | xatp.gpfn = pfn; | 1332 | xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; |
1534 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | 1333 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) |
1535 | BUG(); | 1334 | BUG(); |
1536 | 1335 | ||
1537 | } | 1336 | HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; |
1538 | static void __init xen_hvm_set_shared_info(struct shared_info *sip) | ||
1539 | { | ||
1540 | int cpu; | ||
1541 | |||
1542 | HYPERVISOR_shared_info = sip; | ||
1543 | 1337 | ||
1544 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info | 1338 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info |
1545 | * page, we use it in the event channel upcall and in some pvclock | 1339 | * page, we use it in the event channel upcall and in some pvclock |
1546 | * related functions. We don't need the vcpu_info placement | 1340 | * related functions. We don't need the vcpu_info placement |
1547 | * optimizations because we don't use any pv_mmu or pv_irq op on | 1341 | * optimizations because we don't use any pv_mmu or pv_irq op on |
1548 | * HVM. */ | 1342 | * HVM. |
1549 | for_each_online_cpu(cpu) | 1343 | * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is |
1344 | * online but xen_hvm_init_shared_info is run at resume time too and | ||
1345 | * in that case multiple vcpus might be online. */ | ||
1346 | for_each_online_cpu(cpu) { | ||
1550 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1347 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
1551 | } | ||
1552 | |||
1553 | /* Reconnect the shared_info pfn to a (new) mfn */ | ||
1554 | void xen_hvm_resume_shared_info(void) | ||
1555 | { | ||
1556 | xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); | ||
1557 | } | ||
1558 | |||
1559 | /* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage. | ||
1560 | * On these old tools the shared info page will be placed in E820_Ram. | ||
1561 | * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects | ||
1562 | * that nothing is mapped up to HVM_SHARED_INFO_ADDR. | ||
1563 | * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used | ||
1564 | * here for the shared info page. */ | ||
1565 | static void __init xen_hvm_init_shared_info(void) | ||
1566 | { | ||
1567 | if (xen_major < 4) { | ||
1568 | xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1569 | xen_hvm_sip_phys = __pa(xen_hvm_shared_info); | ||
1570 | } else { | ||
1571 | xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR; | ||
1572 | set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys); | ||
1573 | xen_hvm_shared_info = | ||
1574 | (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); | ||
1575 | } | 1348 | } |
1576 | xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); | ||
1577 | xen_hvm_set_shared_info(xen_hvm_shared_info); | ||
1578 | } | ||
1579 | |||
1580 | static void __init init_hvm_pv_info(void) | ||
1581 | { | ||
1582 | uint32_t ecx, edx, pages, msr, base; | ||
1583 | u64 pfn; | ||
1584 | |||
1585 | base = xen_cpuid_base(); | ||
1586 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | ||
1587 | |||
1588 | pfn = __pa(hypercall_page); | ||
1589 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | ||
1590 | |||
1591 | xen_setup_features(); | ||
1592 | |||
1593 | pv_info.name = "Xen HVM"; | ||
1594 | |||
1595 | xen_domain_type = XEN_HVM_DOMAIN; | ||
1596 | } | 1349 | } |
1597 | 1350 | ||
1351 | #ifdef CONFIG_XEN_PVHVM | ||
1598 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | 1352 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, |
1599 | unsigned long action, void *hcpu) | 1353 | unsigned long action, void *hcpu) |
1600 | { | 1354 | { |
@@ -1617,7 +1371,12 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { | |||
1617 | 1371 | ||
1618 | static void __init xen_hvm_guest_init(void) | 1372 | static void __init xen_hvm_guest_init(void) |
1619 | { | 1373 | { |
1620 | init_hvm_pv_info(); | 1374 | int r; |
1375 | int major, minor; | ||
1376 | |||
1377 | r = init_hvm_pv_info(&major, &minor); | ||
1378 | if (r < 0) | ||
1379 | return; | ||
1621 | 1380 | ||
1622 | xen_hvm_init_shared_info(); | 1381 | xen_hvm_init_shared_info(); |
1623 | 1382 | ||
@@ -1633,22 +1392,12 @@ static void __init xen_hvm_guest_init(void) | |||
1633 | 1392 | ||
1634 | static bool __init xen_hvm_platform(void) | 1393 | static bool __init xen_hvm_platform(void) |
1635 | { | 1394 | { |
1636 | uint32_t eax, ebx, ecx, edx, base; | ||
1637 | |||
1638 | if (xen_pv_domain()) | 1395 | if (xen_pv_domain()) |
1639 | return false; | 1396 | return false; |
1640 | 1397 | ||
1641 | base = xen_cpuid_base(); | 1398 | if (!xen_cpuid_base()) |
1642 | if (!base) | ||
1643 | return false; | 1399 | return false; |
1644 | 1400 | ||
1645 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | ||
1646 | |||
1647 | xen_major = eax >> 16; | ||
1648 | xen_minor = eax & 0xffff; | ||
1649 | |||
1650 | printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor); | ||
1651 | |||
1652 | return true; | 1401 | return true; |
1653 | } | 1402 | } |
1654 | 1403 | ||
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 3a5f55d5190..49ba9b5224d 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -54,20 +54,6 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page, | |||
54 | return 0; | 54 | return 0; |
55 | } | 55 | } |
56 | 56 | ||
57 | /* | ||
58 | * This function is used to map shared frames to store grant status. It is | ||
59 | * different from map_pte_fn above, the frames type here is uint64_t. | ||
60 | */ | ||
61 | static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, | ||
62 | unsigned long addr, void *data) | ||
63 | { | ||
64 | uint64_t **frames = (uint64_t **)data; | ||
65 | |||
66 | set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); | ||
67 | (*frames)++; | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | 57 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, |
72 | unsigned long addr, void *data) | 58 | unsigned long addr, void *data) |
73 | { | 59 | { |
@@ -78,14 +64,14 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | |||
78 | 64 | ||
79 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | 65 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, |
80 | unsigned long max_nr_gframes, | 66 | unsigned long max_nr_gframes, |
81 | void **__shared) | 67 | struct grant_entry **__shared) |
82 | { | 68 | { |
83 | int rc; | 69 | int rc; |
84 | void *shared = *__shared; | 70 | struct grant_entry *shared = *__shared; |
85 | 71 | ||
86 | if (shared == NULL) { | 72 | if (shared == NULL) { |
87 | struct vm_struct *area = | 73 | struct vm_struct *area = |
88 | alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); | 74 | xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes); |
89 | BUG_ON(area == NULL); | 75 | BUG_ON(area == NULL); |
90 | shared = area->addr; | 76 | shared = area->addr; |
91 | *__shared = shared; | 77 | *__shared = shared; |
@@ -97,30 +83,8 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | |||
97 | return rc; | 83 | return rc; |
98 | } | 84 | } |
99 | 85 | ||
100 | int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, | 86 | void arch_gnttab_unmap_shared(struct grant_entry *shared, |
101 | unsigned long max_nr_gframes, | 87 | unsigned long nr_gframes) |
102 | grant_status_t **__shared) | ||
103 | { | ||
104 | int rc; | ||
105 | grant_status_t *shared = *__shared; | ||
106 | |||
107 | if (shared == NULL) { | ||
108 | /* No need to pass in PTE as we are going to do it | ||
109 | * in apply_to_page_range anyhow. */ | ||
110 | struct vm_struct *area = | ||
111 | alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); | ||
112 | BUG_ON(area == NULL); | ||
113 | shared = area->addr; | ||
114 | *__shared = shared; | ||
115 | } | ||
116 | |||
117 | rc = apply_to_page_range(&init_mm, (unsigned long)shared, | ||
118 | PAGE_SIZE * nr_gframes, | ||
119 | map_pte_fn_status, &frames); | ||
120 | return rc; | ||
121 | } | ||
122 | |||
123 | void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) | ||
124 | { | 88 | { |
125 | apply_to_page_range(&init_mm, (unsigned long)shared, | 89 | apply_to_page_range(&init_mm, (unsigned long)shared, |
126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); | 90 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 01a4dc015ae..8bbb465b6f0 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <xen/interface/xen.h> | 5 | #include <xen/interface/xen.h> |
6 | #include <xen/interface/sched.h> | 6 | #include <xen/interface/sched.h> |
7 | #include <xen/interface/vcpu.h> | 7 | #include <xen/interface/vcpu.h> |
8 | #include <xen/events.h> | ||
9 | 8 | ||
10 | #include <asm/xen/hypercall.h> | 9 | #include <asm/xen/hypercall.h> |
11 | #include <asm/xen/hypervisor.h> | 10 | #include <asm/xen/hypervisor.h> |
@@ -27,7 +26,7 @@ static unsigned long xen_save_fl(void) | |||
27 | struct vcpu_info *vcpu; | 26 | struct vcpu_info *vcpu; |
28 | unsigned long flags; | 27 | unsigned long flags; |
29 | 28 | ||
30 | vcpu = this_cpu_read(xen_vcpu); | 29 | vcpu = percpu_read(xen_vcpu); |
31 | 30 | ||
32 | /* flag has opposite sense of mask */ | 31 | /* flag has opposite sense of mask */ |
33 | flags = !vcpu->evtchn_upcall_mask; | 32 | flags = !vcpu->evtchn_upcall_mask; |
@@ -51,7 +50,7 @@ static void xen_restore_fl(unsigned long flags) | |||
51 | make sure we're don't switch CPUs between getting the vcpu | 50 | make sure we're don't switch CPUs between getting the vcpu |
52 | pointer and updating the mask. */ | 51 | pointer and updating the mask. */ |
53 | preempt_disable(); | 52 | preempt_disable(); |
54 | vcpu = this_cpu_read(xen_vcpu); | 53 | vcpu = percpu_read(xen_vcpu); |
55 | vcpu->evtchn_upcall_mask = flags; | 54 | vcpu->evtchn_upcall_mask = flags; |
56 | preempt_enable_no_resched(); | 55 | preempt_enable_no_resched(); |
57 | 56 | ||
@@ -73,7 +72,7 @@ static void xen_irq_disable(void) | |||
73 | make sure we're don't switch CPUs between getting the vcpu | 72 | make sure we're don't switch CPUs between getting the vcpu |
74 | pointer and updating the mask. */ | 73 | pointer and updating the mask. */ |
75 | preempt_disable(); | 74 | preempt_disable(); |
76 | this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1; | 75 | percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; |
77 | preempt_enable_no_resched(); | 76 | preempt_enable_no_resched(); |
78 | } | 77 | } |
79 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); | 78 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); |
@@ -87,7 +86,7 @@ static void xen_irq_enable(void) | |||
87 | the caller is confused and is trying to re-enable interrupts | 86 | the caller is confused and is trying to re-enable interrupts |
88 | on an indeterminate processor. */ | 87 | on an indeterminate processor. */ |
89 | 88 | ||
90 | vcpu = this_cpu_read(xen_vcpu); | 89 | vcpu = percpu_read(xen_vcpu); |
91 | vcpu->evtchn_upcall_mask = 0; | 90 | vcpu->evtchn_upcall_mask = 0; |
92 | 91 | ||
93 | /* Doesn't matter if we get preempted here, because any | 92 | /* Doesn't matter if we get preempted here, because any |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 01de35c7722..3dd53f997b1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -47,7 +47,6 @@ | |||
47 | #include <linux/gfp.h> | 47 | #include <linux/gfp.h> |
48 | #include <linux/memblock.h> | 48 | #include <linux/memblock.h> |
49 | #include <linux/seq_file.h> | 49 | #include <linux/seq_file.h> |
50 | #include <linux/crash_dump.h> | ||
51 | 50 | ||
52 | #include <trace/events/xen.h> | 51 | #include <trace/events/xen.h> |
53 | 52 | ||
@@ -85,7 +84,6 @@ | |||
85 | */ | 84 | */ |
86 | DEFINE_SPINLOCK(xen_reservation_lock); | 85 | DEFINE_SPINLOCK(xen_reservation_lock); |
87 | 86 | ||
88 | #ifdef CONFIG_X86_32 | ||
89 | /* | 87 | /* |
90 | * Identity map, in addition to plain kernel map. This needs to be | 88 | * Identity map, in addition to plain kernel map. This needs to be |
91 | * large enough to allocate page table pages to allocate the rest. | 89 | * large enough to allocate page table pages to allocate the rest. |
@@ -93,7 +91,7 @@ DEFINE_SPINLOCK(xen_reservation_lock); | |||
93 | */ | 91 | */ |
94 | #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) | 92 | #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) |
95 | static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); | 93 | static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); |
96 | #endif | 94 | |
97 | #ifdef CONFIG_X86_64 | 95 | #ifdef CONFIG_X86_64 |
98 | /* l3 pud for userspace vsyscall mapping */ | 96 | /* l3 pud for userspace vsyscall mapping */ |
99 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; | 97 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; |
@@ -310,20 +308,8 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) | |||
310 | 308 | ||
311 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) | 309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) |
312 | { | 310 | { |
313 | if (!xen_batched_set_pte(ptep, pteval)) { | 311 | if (!xen_batched_set_pte(ptep, pteval)) |
314 | /* | 312 | native_set_pte(ptep, pteval); |
315 | * Could call native_set_pte() here and trap and | ||
316 | * emulate the PTE write but with 32-bit guests this | ||
317 | * needs two traps (one for each of the two 32-bit | ||
318 | * words in the PTE) so do one hypercall directly | ||
319 | * instead. | ||
320 | */ | ||
321 | struct mmu_update u; | ||
322 | |||
323 | u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; | ||
324 | u.val = pte_val_ma(pteval); | ||
325 | HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); | ||
326 | } | ||
327 | } | 313 | } |
328 | 314 | ||
329 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | 315 | static void xen_set_pte(pte_t *ptep, pte_t pteval) |
@@ -367,13 +353,8 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) | |||
367 | { | 353 | { |
368 | if (val & _PAGE_PRESENT) { | 354 | if (val & _PAGE_PRESENT) { |
369 | unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; | 355 | unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; |
370 | unsigned long pfn = mfn_to_pfn(mfn); | ||
371 | |||
372 | pteval_t flags = val & PTE_FLAGS_MASK; | 356 | pteval_t flags = val & PTE_FLAGS_MASK; |
373 | if (unlikely(pfn == ~0)) | 357 | val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; |
374 | val = flags & ~_PAGE_PRESENT; | ||
375 | else | ||
376 | val = ((pteval_t)pfn << PAGE_SHIFT) | flags; | ||
377 | } | 358 | } |
378 | 359 | ||
379 | return val; | 360 | return val; |
@@ -434,13 +415,13 @@ static pteval_t iomap_pte(pteval_t val) | |||
434 | static pteval_t xen_pte_val(pte_t pte) | 415 | static pteval_t xen_pte_val(pte_t pte) |
435 | { | 416 | { |
436 | pteval_t pteval = pte.pte; | 417 | pteval_t pteval = pte.pte; |
437 | #if 0 | 418 | |
438 | /* If this is a WC pte, convert back from Xen WC to Linux WC */ | 419 | /* If this is a WC pte, convert back from Xen WC to Linux WC */ |
439 | if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) { | 420 | if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) { |
440 | WARN_ON(!pat_enabled); | 421 | WARN_ON(!pat_enabled); |
441 | pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; | 422 | pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; |
442 | } | 423 | } |
443 | #endif | 424 | |
444 | if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) | 425 | if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) |
445 | return pteval; | 426 | return pteval; |
446 | 427 | ||
@@ -482,7 +463,7 @@ void xen_set_pat(u64 pat) | |||
482 | static pte_t xen_make_pte(pteval_t pte) | 463 | static pte_t xen_make_pte(pteval_t pte) |
483 | { | 464 | { |
484 | phys_addr_t addr = (pte & PTE_PFN_MASK); | 465 | phys_addr_t addr = (pte & PTE_PFN_MASK); |
485 | #if 0 | 466 | |
486 | /* If Linux is trying to set a WC pte, then map to the Xen WC. | 467 | /* If Linux is trying to set a WC pte, then map to the Xen WC. |
487 | * If _PAGE_PAT is set, then it probably means it is really | 468 | * If _PAGE_PAT is set, then it probably means it is really |
488 | * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope | 469 | * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope |
@@ -495,7 +476,7 @@ static pte_t xen_make_pte(pteval_t pte) | |||
495 | if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT) | 476 | if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT) |
496 | pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; | 477 | pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; |
497 | } | 478 | } |
498 | #endif | 479 | |
499 | /* | 480 | /* |
500 | * Unprivileged domains are allowed to do IOMAPpings for | 481 | * Unprivileged domains are allowed to do IOMAPpings for |
501 | * PCI passthrough, but not map ISA space. The ISA | 482 | * PCI passthrough, but not map ISA space. The ISA |
@@ -514,6 +495,41 @@ static pte_t xen_make_pte(pteval_t pte) | |||
514 | } | 495 | } |
515 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); | 496 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); |
516 | 497 | ||
498 | #ifdef CONFIG_XEN_DEBUG | ||
499 | pte_t xen_make_pte_debug(pteval_t pte) | ||
500 | { | ||
501 | phys_addr_t addr = (pte & PTE_PFN_MASK); | ||
502 | phys_addr_t other_addr; | ||
503 | bool io_page = false; | ||
504 | pte_t _pte; | ||
505 | |||
506 | if (pte & _PAGE_IOMAP) | ||
507 | io_page = true; | ||
508 | |||
509 | _pte = xen_make_pte(pte); | ||
510 | |||
511 | if (!addr) | ||
512 | return _pte; | ||
513 | |||
514 | if (io_page && | ||
515 | (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { | ||
516 | other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; | ||
517 | WARN_ONCE(addr != other_addr, | ||
518 | "0x%lx is using VM_IO, but it is 0x%lx!\n", | ||
519 | (unsigned long)addr, (unsigned long)other_addr); | ||
520 | } else { | ||
521 | pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; | ||
522 | other_addr = (_pte.pte & PTE_PFN_MASK); | ||
523 | WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set), | ||
524 | "0x%lx is missing VM_IO (and wasn't fixed)!\n", | ||
525 | (unsigned long)addr); | ||
526 | } | ||
527 | |||
528 | return _pte; | ||
529 | } | ||
530 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); | ||
531 | #endif | ||
532 | |||
517 | static pgd_t xen_make_pgd(pgdval_t pgd) | 533 | static pgd_t xen_make_pgd(pgdval_t pgd) |
518 | { | 534 | { |
519 | pgd = pte_pfn_to_mfn(pgd); | 535 | pgd = pte_pfn_to_mfn(pgd); |
@@ -1090,14 +1106,14 @@ static void drop_other_mm_ref(void *info) | |||
1090 | struct mm_struct *mm = info; | 1106 | struct mm_struct *mm = info; |
1091 | struct mm_struct *active_mm; | 1107 | struct mm_struct *active_mm; |
1092 | 1108 | ||
1093 | active_mm = this_cpu_read(cpu_tlbstate.active_mm); | 1109 | active_mm = percpu_read(cpu_tlbstate.active_mm); |
1094 | 1110 | ||
1095 | if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) | 1111 | if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) |
1096 | leave_mm(smp_processor_id()); | 1112 | leave_mm(smp_processor_id()); |
1097 | 1113 | ||
1098 | /* If this cpu still has a stale cr3 reference, then make sure | 1114 | /* If this cpu still has a stale cr3 reference, then make sure |
1099 | it has been flushed. */ | 1115 | it has been flushed. */ |
1100 | if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd)) | 1116 | if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) |
1101 | load_cr3(swapper_pg_dir); | 1117 | load_cr3(swapper_pg_dir); |
1102 | } | 1118 | } |
1103 | 1119 | ||
@@ -1176,7 +1192,9 @@ static void xen_exit_mmap(struct mm_struct *mm) | |||
1176 | spin_unlock(&mm->page_table_lock); | 1192 | spin_unlock(&mm->page_table_lock); |
1177 | } | 1193 | } |
1178 | 1194 | ||
1179 | static void xen_post_allocator_init(void); | 1195 | static void __init xen_pagetable_setup_start(pgd_t *base) |
1196 | { | ||
1197 | } | ||
1180 | 1198 | ||
1181 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | 1199 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) |
1182 | { | 1200 | { |
@@ -1192,121 +1210,29 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | |||
1192 | } | 1210 | } |
1193 | } | 1211 | } |
1194 | 1212 | ||
1195 | #ifdef CONFIG_X86_64 | 1213 | static void xen_post_allocator_init(void); |
1196 | static void __init xen_cleanhighmap(unsigned long vaddr, | ||
1197 | unsigned long vaddr_end) | ||
1198 | { | ||
1199 | unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; | ||
1200 | pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); | ||
1201 | 1214 | ||
1202 | /* NOTE: The loop is more greedy than the cleanup_highmap variant. | 1215 | static void __init xen_pagetable_setup_done(pgd_t *base) |
1203 | * We include the PMD passed in on _both_ boundaries. */ | ||
1204 | for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); | ||
1205 | pmd++, vaddr += PMD_SIZE) { | ||
1206 | if (pmd_none(*pmd)) | ||
1207 | continue; | ||
1208 | if (vaddr < (unsigned long) _text || vaddr > kernel_end) | ||
1209 | set_pmd(pmd, __pmd(0)); | ||
1210 | } | ||
1211 | /* In case we did something silly, we should crash in this function | ||
1212 | * instead of somewhere later and be confusing. */ | ||
1213 | xen_mc_flush(); | ||
1214 | } | ||
1215 | #endif | ||
1216 | static void __init xen_pagetable_init(void) | ||
1217 | { | 1216 | { |
1218 | #ifdef CONFIG_X86_64 | ||
1219 | unsigned long size; | ||
1220 | unsigned long addr; | ||
1221 | #endif | ||
1222 | paging_init(); | ||
1223 | xen_setup_shared_info(); | 1217 | xen_setup_shared_info(); |
1224 | #ifdef CONFIG_X86_64 | ||
1225 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1226 | unsigned long new_mfn_list; | ||
1227 | |||
1228 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
1229 | |||
1230 | /* On 32-bit, we get zero so this never gets executed. */ | ||
1231 | new_mfn_list = xen_revector_p2m_tree(); | ||
1232 | if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { | ||
1233 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | ||
1234 | memset((void *)xen_start_info->mfn_list, 0xff, size); | ||
1235 | |||
1236 | /* We should be in __ka space. */ | ||
1237 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); | ||
1238 | addr = xen_start_info->mfn_list; | ||
1239 | /* We roundup to the PMD, which means that if anybody at this stage is | ||
1240 | * using the __ka address of xen_start_info or xen_start_info->shared_info | ||
1241 | * they are in going to crash. Fortunatly we have already revectored | ||
1242 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ | ||
1243 | size = roundup(size, PMD_SIZE); | ||
1244 | xen_cleanhighmap(addr, addr + size); | ||
1245 | |||
1246 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
1247 | memblock_free(__pa(xen_start_info->mfn_list), size); | ||
1248 | /* And revector! Bye bye old array */ | ||
1249 | xen_start_info->mfn_list = new_mfn_list; | ||
1250 | } else | ||
1251 | goto skip; | ||
1252 | } | ||
1253 | /* At this stage, cleanup_highmap has already cleaned __ka space | ||
1254 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | ||
1255 | * the ramdisk). We continue on, erasing PMD entries that point to page | ||
1256 | * tables - do note that they are accessible at this stage via __va. | ||
1257 | * For good measure we also round up to the PMD - which means that if | ||
1258 | * anybody is using __ka address to the initial boot-stack - and try | ||
1259 | * to use it - they are going to crash. The xen_start_info has been | ||
1260 | * taken care of already in xen_setup_kernel_pagetable. */ | ||
1261 | addr = xen_start_info->pt_base; | ||
1262 | size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); | ||
1263 | |||
1264 | xen_cleanhighmap(addr, addr + size); | ||
1265 | xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); | ||
1266 | #ifdef DEBUG | ||
1267 | /* This is superflous and is not neccessary, but you know what | ||
1268 | * lets do it. The MODULES_VADDR -> MODULES_END should be clear of | ||
1269 | * anything at this stage. */ | ||
1270 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); | ||
1271 | #endif | ||
1272 | skip: | ||
1273 | #endif | ||
1274 | xen_post_allocator_init(); | 1218 | xen_post_allocator_init(); |
1275 | } | 1219 | } |
1220 | |||
1276 | static void xen_write_cr2(unsigned long cr2) | 1221 | static void xen_write_cr2(unsigned long cr2) |
1277 | { | 1222 | { |
1278 | this_cpu_read(xen_vcpu)->arch.cr2 = cr2; | 1223 | percpu_read(xen_vcpu)->arch.cr2 = cr2; |
1279 | } | 1224 | } |
1280 | 1225 | ||
1281 | static unsigned long xen_read_cr2(void) | 1226 | static unsigned long xen_read_cr2(void) |
1282 | { | 1227 | { |
1283 | return this_cpu_read(xen_vcpu)->arch.cr2; | 1228 | return percpu_read(xen_vcpu)->arch.cr2; |
1284 | } | 1229 | } |
1285 | 1230 | ||
1286 | unsigned long xen_read_cr2_direct(void) | 1231 | unsigned long xen_read_cr2_direct(void) |
1287 | { | 1232 | { |
1288 | return this_cpu_read(xen_vcpu_info.arch.cr2); | 1233 | return percpu_read(xen_vcpu_info.arch.cr2); |
1289 | } | 1234 | } |
1290 | 1235 | ||
1291 | void xen_flush_tlb_all(void) | ||
1292 | { | ||
1293 | struct mmuext_op *op; | ||
1294 | struct multicall_space mcs; | ||
1295 | |||
1296 | trace_xen_mmu_flush_tlb_all(0); | ||
1297 | |||
1298 | preempt_disable(); | ||
1299 | |||
1300 | mcs = xen_mc_entry(sizeof(*op)); | ||
1301 | |||
1302 | op = mcs.args; | ||
1303 | op->cmd = MMUEXT_TLB_FLUSH_ALL; | ||
1304 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
1305 | |||
1306 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
1307 | |||
1308 | preempt_enable(); | ||
1309 | } | ||
1310 | static void xen_flush_tlb(void) | 1236 | static void xen_flush_tlb(void) |
1311 | { | 1237 | { |
1312 | struct mmuext_op *op; | 1238 | struct mmuext_op *op; |
@@ -1348,8 +1274,7 @@ static void xen_flush_tlb_single(unsigned long addr) | |||
1348 | } | 1274 | } |
1349 | 1275 | ||
1350 | static void xen_flush_tlb_others(const struct cpumask *cpus, | 1276 | static void xen_flush_tlb_others(const struct cpumask *cpus, |
1351 | struct mm_struct *mm, unsigned long start, | 1277 | struct mm_struct *mm, unsigned long va) |
1352 | unsigned long end) | ||
1353 | { | 1278 | { |
1354 | struct { | 1279 | struct { |
1355 | struct mmuext_op op; | 1280 | struct mmuext_op op; |
@@ -1361,7 +1286,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1361 | } *args; | 1286 | } *args; |
1362 | struct multicall_space mcs; | 1287 | struct multicall_space mcs; |
1363 | 1288 | ||
1364 | trace_xen_mmu_flush_tlb_others(cpus, mm, start, end); | 1289 | trace_xen_mmu_flush_tlb_others(cpus, mm, va); |
1365 | 1290 | ||
1366 | if (cpumask_empty(cpus)) | 1291 | if (cpumask_empty(cpus)) |
1367 | return; /* nothing to do */ | 1292 | return; /* nothing to do */ |
@@ -1374,10 +1299,11 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1374 | cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); | 1299 | cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); |
1375 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); | 1300 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); |
1376 | 1301 | ||
1377 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; | 1302 | if (va == TLB_FLUSH_ALL) { |
1378 | if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { | 1303 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; |
1304 | } else { | ||
1379 | args->op.cmd = MMUEXT_INVLPG_MULTI; | 1305 | args->op.cmd = MMUEXT_INVLPG_MULTI; |
1380 | args->op.arg1.linear_addr = start; | 1306 | args->op.arg1.linear_addr = va; |
1381 | } | 1307 | } |
1382 | 1308 | ||
1383 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); | 1309 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); |
@@ -1387,12 +1313,12 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1387 | 1313 | ||
1388 | static unsigned long xen_read_cr3(void) | 1314 | static unsigned long xen_read_cr3(void) |
1389 | { | 1315 | { |
1390 | return this_cpu_read(xen_cr3); | 1316 | return percpu_read(xen_cr3); |
1391 | } | 1317 | } |
1392 | 1318 | ||
1393 | static void set_current_cr3(void *v) | 1319 | static void set_current_cr3(void *v) |
1394 | { | 1320 | { |
1395 | this_cpu_write(xen_current_cr3, (unsigned long)v); | 1321 | percpu_write(xen_current_cr3, (unsigned long)v); |
1396 | } | 1322 | } |
1397 | 1323 | ||
1398 | static void __xen_write_cr3(bool kernel, unsigned long cr3) | 1324 | static void __xen_write_cr3(bool kernel, unsigned long cr3) |
@@ -1415,7 +1341,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) | |||
1415 | xen_extend_mmuext_op(&op); | 1341 | xen_extend_mmuext_op(&op); |
1416 | 1342 | ||
1417 | if (kernel) { | 1343 | if (kernel) { |
1418 | this_cpu_write(xen_cr3, cr3); | 1344 | percpu_write(xen_cr3, cr3); |
1419 | 1345 | ||
1420 | /* Update xen_current_cr3 once the batch has actually | 1346 | /* Update xen_current_cr3 once the batch has actually |
1421 | been submitted. */ | 1347 | been submitted. */ |
@@ -1431,7 +1357,7 @@ static void xen_write_cr3(unsigned long cr3) | |||
1431 | 1357 | ||
1432 | /* Update while interrupts are disabled, so its atomic with | 1358 | /* Update while interrupts are disabled, so its atomic with |
1433 | respect to ipis */ | 1359 | respect to ipis */ |
1434 | this_cpu_write(xen_cr3, cr3); | 1360 | percpu_write(xen_cr3, cr3); |
1435 | 1361 | ||
1436 | __xen_write_cr3(true, cr3); | 1362 | __xen_write_cr3(true, cr3); |
1437 | 1363 | ||
@@ -1520,28 +1446,13 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1520 | } | 1446 | } |
1521 | #endif /* CONFIG_X86_64 */ | 1447 | #endif /* CONFIG_X86_64 */ |
1522 | 1448 | ||
1523 | /* | 1449 | /* Init-time set_pte while constructing initial pagetables, which |
1524 | * Init-time set_pte while constructing initial pagetables, which | 1450 | doesn't allow RO pagetable pages to be remapped RW */ |
1525 | * doesn't allow RO page table pages to be remapped RW. | ||
1526 | * | ||
1527 | * If there is no MFN for this PFN then this page is initially | ||
1528 | * ballooned out so clear the PTE (as in decrease_reservation() in | ||
1529 | * drivers/xen/balloon.c). | ||
1530 | * | ||
1531 | * Many of these PTE updates are done on unpinned and writable pages | ||
1532 | * and doing a hypercall for these is unnecessary and expensive. At | ||
1533 | * this point it is not possible to tell if a page is pinned or not, | ||
1534 | * so always write the PTE directly and rely on Xen trapping and | ||
1535 | * emulating any updates as necessary. | ||
1536 | */ | ||
1537 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) | 1451 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
1538 | { | 1452 | { |
1539 | if (pte_mfn(pte) != INVALID_P2M_ENTRY) | 1453 | pte = mask_rw_pte(ptep, pte); |
1540 | pte = mask_rw_pte(ptep, pte); | ||
1541 | else | ||
1542 | pte = __pte_ma(0); | ||
1543 | 1454 | ||
1544 | native_set_pte(ptep, pte); | 1455 | xen_set_pte(ptep, pte); |
1545 | } | 1456 | } |
1546 | 1457 | ||
1547 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1458 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
@@ -1747,7 +1658,7 @@ static void set_page_prot(void *addr, pgprot_t prot) | |||
1747 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) | 1658 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) |
1748 | BUG(); | 1659 | BUG(); |
1749 | } | 1660 | } |
1750 | #ifdef CONFIG_X86_32 | 1661 | |
1751 | static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | 1662 | static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) |
1752 | { | 1663 | { |
1753 | unsigned pmdidx, pteidx; | 1664 | unsigned pmdidx, pteidx; |
@@ -1798,7 +1709,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1798 | 1709 | ||
1799 | set_page_prot(pmd, PAGE_KERNEL_RO); | 1710 | set_page_prot(pmd, PAGE_KERNEL_RO); |
1800 | } | 1711 | } |
1801 | #endif | 1712 | |
1802 | void __init xen_setup_machphys_mapping(void) | 1713 | void __init xen_setup_machphys_mapping(void) |
1803 | { | 1714 | { |
1804 | struct xen_machphys_mapping mapping; | 1715 | struct xen_machphys_mapping mapping; |
@@ -1826,20 +1737,7 @@ static void convert_pfn_mfn(void *v) | |||
1826 | for (i = 0; i < PTRS_PER_PTE; i++) | 1737 | for (i = 0; i < PTRS_PER_PTE; i++) |
1827 | pte[i] = xen_make_pte(pte[i].pte); | 1738 | pte[i] = xen_make_pte(pte[i].pte); |
1828 | } | 1739 | } |
1829 | static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, | 1740 | |
1830 | unsigned long addr) | ||
1831 | { | ||
1832 | if (*pt_base == PFN_DOWN(__pa(addr))) { | ||
1833 | set_page_prot((void *)addr, PAGE_KERNEL); | ||
1834 | clear_page((void *)addr); | ||
1835 | (*pt_base)++; | ||
1836 | } | ||
1837 | if (*pt_end == PFN_DOWN(__pa(addr))) { | ||
1838 | set_page_prot((void *)addr, PAGE_KERNEL); | ||
1839 | clear_page((void *)addr); | ||
1840 | (*pt_end)--; | ||
1841 | } | ||
1842 | } | ||
1843 | /* | 1741 | /* |
1844 | * Set up the initial kernel pagetable. | 1742 | * Set up the initial kernel pagetable. |
1845 | * | 1743 | * |
@@ -1851,13 +1749,11 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, | |||
1851 | * of the physical mapping once some sort of allocator has been set | 1749 | * of the physical mapping once some sort of allocator has been set |
1852 | * up. | 1750 | * up. |
1853 | */ | 1751 | */ |
1854 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1752 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, |
1753 | unsigned long max_pfn) | ||
1855 | { | 1754 | { |
1856 | pud_t *l3; | 1755 | pud_t *l3; |
1857 | pmd_t *l2; | 1756 | pmd_t *l2; |
1858 | unsigned long addr[3]; | ||
1859 | unsigned long pt_base, pt_end; | ||
1860 | unsigned i; | ||
1861 | 1757 | ||
1862 | /* max_pfn_mapped is the last pfn mapped in the initial memory | 1758 | /* max_pfn_mapped is the last pfn mapped in the initial memory |
1863 | * mappings. Considering that on Xen after the kernel mappings we | 1759 | * mappings. Considering that on Xen after the kernel mappings we |
@@ -1865,53 +1761,32 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1865 | * set max_pfn_mapped to the last real pfn mapped. */ | 1761 | * set max_pfn_mapped to the last real pfn mapped. */ |
1866 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); | 1762 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); |
1867 | 1763 | ||
1868 | pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); | ||
1869 | pt_end = pt_base + xen_start_info->nr_pt_frames; | ||
1870 | |||
1871 | /* Zap identity mapping */ | 1764 | /* Zap identity mapping */ |
1872 | init_level4_pgt[0] = __pgd(0); | 1765 | init_level4_pgt[0] = __pgd(0); |
1873 | 1766 | ||
1874 | /* Pre-constructed entries are in pfn, so convert to mfn */ | 1767 | /* Pre-constructed entries are in pfn, so convert to mfn */ |
1875 | /* L4[272] -> level3_ident_pgt | ||
1876 | * L4[511] -> level3_kernel_pgt */ | ||
1877 | convert_pfn_mfn(init_level4_pgt); | 1768 | convert_pfn_mfn(init_level4_pgt); |
1878 | |||
1879 | /* L3_i[0] -> level2_ident_pgt */ | ||
1880 | convert_pfn_mfn(level3_ident_pgt); | 1769 | convert_pfn_mfn(level3_ident_pgt); |
1881 | /* L3_k[510] -> level2_kernel_pgt | ||
1882 | * L3_i[511] -> level2_fixmap_pgt */ | ||
1883 | convert_pfn_mfn(level3_kernel_pgt); | 1770 | convert_pfn_mfn(level3_kernel_pgt); |
1884 | 1771 | ||
1885 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ | ||
1886 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | 1772 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); |
1887 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | 1773 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); |
1888 | 1774 | ||
1889 | addr[0] = (unsigned long)pgd; | 1775 | memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); |
1890 | addr[1] = (unsigned long)l3; | 1776 | memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); |
1891 | addr[2] = (unsigned long)l2; | 1777 | |
1892 | /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: | ||
1893 | * Both L4[272][0] and L4[511][511] have entries that point to the same | ||
1894 | * L2 (PMD) tables. Meaning that if you modify it in __va space | ||
1895 | * it will be also modified in the __ka space! (But if you just | ||
1896 | * modify the PMD table to point to other PTE's or none, then you | ||
1897 | * are OK - which is what cleanup_highmap does) */ | ||
1898 | copy_page(level2_ident_pgt, l2); | ||
1899 | /* Graft it onto L4[511][511] */ | ||
1900 | copy_page(level2_kernel_pgt, l2); | ||
1901 | |||
1902 | /* Get [511][510] and graft that in level2_fixmap_pgt */ | ||
1903 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); | 1778 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); |
1904 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); | 1779 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); |
1905 | copy_page(level2_fixmap_pgt, l2); | 1780 | memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); |
1906 | /* Note that we don't do anything with level1_fixmap_pgt which | 1781 | |
1907 | * we don't need. */ | 1782 | /* Set up identity map */ |
1783 | xen_map_identity_early(level2_ident_pgt, max_pfn); | ||
1908 | 1784 | ||
1909 | /* Make pagetable pieces RO */ | 1785 | /* Make pagetable pieces RO */ |
1910 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1786 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); |
1911 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | 1787 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); |
1912 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | 1788 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); |
1913 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | 1789 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); |
1914 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | ||
1915 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1790 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); |
1916 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1791 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); |
1917 | 1792 | ||
@@ -1922,28 +1797,24 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1922 | /* Unpin Xen-provided one */ | 1797 | /* Unpin Xen-provided one */ |
1923 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 1798 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
1924 | 1799 | ||
1800 | /* Switch over */ | ||
1801 | pgd = init_level4_pgt; | ||
1802 | |||
1925 | /* | 1803 | /* |
1926 | * At this stage there can be no user pgd, and no page | 1804 | * At this stage there can be no user pgd, and no page |
1927 | * structure to attach it to, so make sure we just set kernel | 1805 | * structure to attach it to, so make sure we just set kernel |
1928 | * pgd. | 1806 | * pgd. |
1929 | */ | 1807 | */ |
1930 | xen_mc_batch(); | 1808 | xen_mc_batch(); |
1931 | __xen_write_cr3(true, __pa(init_level4_pgt)); | 1809 | __xen_write_cr3(true, __pa(pgd)); |
1932 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 1810 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1933 | 1811 | ||
1934 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are | 1812 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
1935 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for | 1813 | __pa(xen_start_info->pt_base + |
1936 | * the initial domain. For guests using the toolstack, they are in: | 1814 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
1937 | * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only | 1815 | "XEN PAGETABLES"); |
1938 | * rip out the [L4] (pgd), but for guests we shave off three pages. | ||
1939 | */ | ||
1940 | for (i = 0; i < ARRAY_SIZE(addr); i++) | ||
1941 | check_pt_base(&pt_base, &pt_end, addr[i]); | ||
1942 | 1816 | ||
1943 | /* Our (by three pages) smaller Xen pagetable that we are using */ | 1817 | return pgd; |
1944 | memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); | ||
1945 | /* Revector the xen_start_info */ | ||
1946 | xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); | ||
1947 | } | 1818 | } |
1948 | #else /* !CONFIG_X86_64 */ | 1819 | #else /* !CONFIG_X86_64 */ |
1949 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); | 1820 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
@@ -1968,7 +1839,8 @@ static void __init xen_write_cr3_init(unsigned long cr3) | |||
1968 | */ | 1839 | */ |
1969 | swapper_kernel_pmd = | 1840 | swapper_kernel_pmd = |
1970 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | 1841 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); |
1971 | copy_page(swapper_kernel_pmd, initial_kernel_pmd); | 1842 | memcpy(swapper_kernel_pmd, initial_kernel_pmd, |
1843 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
1972 | swapper_pg_dir[KERNEL_PGD_BOUNDARY] = | 1844 | swapper_pg_dir[KERNEL_PGD_BOUNDARY] = |
1973 | __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); | 1845 | __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); |
1974 | set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); | 1846 | set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); |
@@ -1985,7 +1857,8 @@ static void __init xen_write_cr3_init(unsigned long cr3) | |||
1985 | pv_mmu_ops.write_cr3 = &xen_write_cr3; | 1857 | pv_mmu_ops.write_cr3 = &xen_write_cr3; |
1986 | } | 1858 | } |
1987 | 1859 | ||
1988 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1860 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, |
1861 | unsigned long max_pfn) | ||
1989 | { | 1862 | { |
1990 | pmd_t *kernel_pmd; | 1863 | pmd_t *kernel_pmd; |
1991 | 1864 | ||
@@ -1997,11 +1870,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1997 | 512*1024); | 1870 | 512*1024); |
1998 | 1871 | ||
1999 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | 1872 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
2000 | copy_page(initial_kernel_pmd, kernel_pmd); | 1873 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); |
2001 | 1874 | ||
2002 | xen_map_identity_early(initial_kernel_pmd, max_pfn); | 1875 | xen_map_identity_early(initial_kernel_pmd, max_pfn); |
2003 | 1876 | ||
2004 | copy_page(initial_page_table, pgd); | 1877 | memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); |
2005 | initial_page_table[KERNEL_PGD_BOUNDARY] = | 1878 | initial_page_table[KERNEL_PGD_BOUNDARY] = |
2006 | __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); | 1879 | __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); |
2007 | 1880 | ||
@@ -2015,8 +1888,12 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
2015 | PFN_DOWN(__pa(initial_page_table))); | 1888 | PFN_DOWN(__pa(initial_page_table))); |
2016 | xen_write_cr3(__pa(initial_page_table)); | 1889 | xen_write_cr3(__pa(initial_page_table)); |
2017 | 1890 | ||
2018 | memblock_reserve(__pa(xen_start_info->pt_base), | 1891 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
2019 | xen_start_info->nr_pt_frames * PAGE_SIZE); | 1892 | __pa(xen_start_info->pt_base + |
1893 | xen_start_info->nr_pt_frames * PAGE_SIZE), | ||
1894 | "XEN PAGETABLES"); | ||
1895 | |||
1896 | return initial_page_table; | ||
2020 | } | 1897 | } |
2021 | #endif /* CONFIG_X86_64 */ | 1898 | #endif /* CONFIG_X86_64 */ |
2022 | 1899 | ||
@@ -2090,8 +1967,34 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2090 | #endif | 1967 | #endif |
2091 | } | 1968 | } |
2092 | 1969 | ||
1970 | void __init xen_ident_map_ISA(void) | ||
1971 | { | ||
1972 | unsigned long pa; | ||
1973 | |||
1974 | /* | ||
1975 | * If we're dom0, then linear map the ISA machine addresses into | ||
1976 | * the kernel's address space. | ||
1977 | */ | ||
1978 | if (!xen_initial_domain()) | ||
1979 | return; | ||
1980 | |||
1981 | xen_raw_printk("Xen: setup ISA identity maps\n"); | ||
1982 | |||
1983 | for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) { | ||
1984 | pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO); | ||
1985 | |||
1986 | if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0)) | ||
1987 | BUG(); | ||
1988 | } | ||
1989 | |||
1990 | xen_flush_tlb(); | ||
1991 | } | ||
1992 | |||
2093 | static void __init xen_post_allocator_init(void) | 1993 | static void __init xen_post_allocator_init(void) |
2094 | { | 1994 | { |
1995 | #ifdef CONFIG_XEN_DEBUG | ||
1996 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); | ||
1997 | #endif | ||
2095 | pv_mmu_ops.set_pte = xen_set_pte; | 1998 | pv_mmu_ops.set_pte = xen_set_pte; |
2096 | pv_mmu_ops.set_pmd = xen_set_pmd; | 1999 | pv_mmu_ops.set_pmd = xen_set_pmd; |
2097 | pv_mmu_ops.set_pud = xen_set_pud; | 2000 | pv_mmu_ops.set_pud = xen_set_pud; |
@@ -2198,7 +2101,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2198 | void __init xen_init_mmu_ops(void) | 2101 | void __init xen_init_mmu_ops(void) |
2199 | { | 2102 | { |
2200 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; | 2103 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; |
2201 | x86_init.paging.pagetable_init = xen_pagetable_init; | 2104 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; |
2105 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; | ||
2202 | pv_mmu_ops = xen_mmu_ops; | 2106 | pv_mmu_ops = xen_mmu_ops; |
2203 | 2107 | ||
2204 | memset(dummy_mapping, 0xff, PAGE_SIZE); | 2108 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
@@ -2401,43 +2305,6 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) | |||
2401 | EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); | 2305 | EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); |
2402 | 2306 | ||
2403 | #ifdef CONFIG_XEN_PVHVM | 2307 | #ifdef CONFIG_XEN_PVHVM |
2404 | #ifdef CONFIG_PROC_VMCORE | ||
2405 | /* | ||
2406 | * This function is used in two contexts: | ||
2407 | * - the kdump kernel has to check whether a pfn of the crashed kernel | ||
2408 | * was a ballooned page. vmcore is using this function to decide | ||
2409 | * whether to access a pfn of the crashed kernel. | ||
2410 | * - the kexec kernel has to check whether a pfn was ballooned by the | ||
2411 | * previous kernel. If the pfn is ballooned, handle it properly. | ||
2412 | * Returns 0 if the pfn is not backed by a RAM page, the caller may | ||
2413 | * handle the pfn special in this case. | ||
2414 | */ | ||
2415 | static int xen_oldmem_pfn_is_ram(unsigned long pfn) | ||
2416 | { | ||
2417 | struct xen_hvm_get_mem_type a = { | ||
2418 | .domid = DOMID_SELF, | ||
2419 | .pfn = pfn, | ||
2420 | }; | ||
2421 | int ram; | ||
2422 | |||
2423 | if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) | ||
2424 | return -ENXIO; | ||
2425 | |||
2426 | switch (a.mem_type) { | ||
2427 | case HVMMEM_mmio_dm: | ||
2428 | ram = 0; | ||
2429 | break; | ||
2430 | case HVMMEM_ram_rw: | ||
2431 | case HVMMEM_ram_ro: | ||
2432 | default: | ||
2433 | ram = 1; | ||
2434 | break; | ||
2435 | } | ||
2436 | |||
2437 | return ram; | ||
2438 | } | ||
2439 | #endif | ||
2440 | |||
2441 | static void xen_hvm_exit_mmap(struct mm_struct *mm) | 2308 | static void xen_hvm_exit_mmap(struct mm_struct *mm) |
2442 | { | 2309 | { |
2443 | struct xen_hvm_pagetable_dying a; | 2310 | struct xen_hvm_pagetable_dying a; |
@@ -2468,9 +2335,6 @@ void __init xen_hvm_init_mmu_ops(void) | |||
2468 | { | 2335 | { |
2469 | if (is_pagetable_dying_supported()) | 2336 | if (is_pagetable_dying_supported()) |
2470 | pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; | 2337 | pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; |
2471 | #ifdef CONFIG_PROC_VMCORE | ||
2472 | register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram); | ||
2473 | #endif | ||
2474 | } | 2338 | } |
2475 | #endif | 2339 | #endif |
2476 | 2340 | ||
@@ -2497,10 +2361,8 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, | |||
2497 | 2361 | ||
2498 | int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | 2362 | int xen_remap_domain_mfn_range(struct vm_area_struct *vma, |
2499 | unsigned long addr, | 2363 | unsigned long addr, |
2500 | xen_pfn_t mfn, int nr, | 2364 | unsigned long mfn, int nr, |
2501 | pgprot_t prot, unsigned domid, | 2365 | pgprot_t prot, unsigned domid) |
2502 | struct page **pages) | ||
2503 | |||
2504 | { | 2366 | { |
2505 | struct remap_data rmd; | 2367 | struct remap_data rmd; |
2506 | struct mmu_update mmu_update[REMAP_BATCH_SIZE]; | 2368 | struct mmu_update mmu_update[REMAP_BATCH_SIZE]; |
@@ -2508,12 +2370,10 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | |||
2508 | unsigned long range; | 2370 | unsigned long range; |
2509 | int err = 0; | 2371 | int err = 0; |
2510 | 2372 | ||
2511 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
2512 | return -EINVAL; | ||
2513 | |||
2514 | prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); | 2373 | prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); |
2515 | 2374 | ||
2516 | BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); | 2375 | BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == |
2376 | (VM_PFNMAP | VM_RESERVED | VM_IO))); | ||
2517 | 2377 | ||
2518 | rmd.mfn = mfn; | 2378 | rmd.mfn = mfn; |
2519 | rmd.prot = prot; | 2379 | rmd.prot = prot; |
@@ -2528,8 +2388,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | |||
2528 | if (err) | 2388 | if (err) |
2529 | goto out; | 2389 | goto out; |
2530 | 2390 | ||
2531 | err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); | 2391 | err = -EFAULT; |
2532 | if (err < 0) | 2392 | if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) |
2533 | goto out; | 2393 | goto out; |
2534 | 2394 | ||
2535 | nr -= batch; | 2395 | nr -= batch; |
@@ -2539,19 +2399,22 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | |||
2539 | err = 0; | 2399 | err = 0; |
2540 | out: | 2400 | out: |
2541 | 2401 | ||
2542 | xen_flush_tlb_all(); | 2402 | flush_tlb_all(); |
2543 | 2403 | ||
2544 | return err; | 2404 | return err; |
2545 | } | 2405 | } |
2546 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); | 2406 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); |
2547 | 2407 | ||
2548 | /* Returns: 0 success */ | 2408 | #ifdef CONFIG_XEN_DEBUG_FS |
2549 | int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, | 2409 | static int p2m_dump_open(struct inode *inode, struct file *filp) |
2550 | int numpgs, struct page **pages) | ||
2551 | { | 2410 | { |
2552 | if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) | 2411 | return single_open(filp, p2m_dump_show, NULL); |
2553 | return 0; | ||
2554 | |||
2555 | return -EINVAL; | ||
2556 | } | 2412 | } |
2557 | EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); | 2413 | |
2414 | static const struct file_operations p2m_dump_fops = { | ||
2415 | .open = p2m_dump_open, | ||
2416 | .read = seq_read, | ||
2417 | .llseek = seq_lseek, | ||
2418 | .release = single_release, | ||
2419 | }; | ||
2420 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 9c2e74f9096..dee79b78a90 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h | |||
@@ -47,7 +47,7 @@ static inline void xen_mc_issue(unsigned mode) | |||
47 | xen_mc_flush(); | 47 | xen_mc_flush(); |
48 | 48 | ||
49 | /* restore flags saved in xen_mc_batch */ | 49 | /* restore flags saved in xen_mc_batch */ |
50 | local_irq_restore(this_cpu_read(xen_mc_irq_flags)); | 50 | local_irq_restore(percpu_read(xen_mc_irq_flags)); |
51 | } | 51 | } |
52 | 52 | ||
53 | /* Set up a callback to be called when the current batch is flushed */ | 53 | /* Set up a callback to be called when the current batch is flushed */ |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 95fb2aa5927..58efeb9d544 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -22,7 +22,7 @@ | |||
22 | * | 22 | * |
23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | 23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always |
24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | 24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to |
25 | * 512 and 1024 entries respectively. | 25 | * 512 and 1024 entries respectively. |
26 | * | 26 | * |
27 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. | 27 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. |
28 | * | 28 | * |
@@ -139,11 +139,11 @@ | |||
139 | * / | ~0, ~0, .... | | 139 | * / | ~0, ~0, .... | |
140 | * | \---------------/ | 140 | * | \---------------/ |
141 | * | | 141 | * | |
142 | * p2m_mid_missing p2m_missing | 142 | * p2m_missing p2m_missing |
143 | * /-----------------\ /------------\ | 143 | * /------------------\ /------------\ |
144 | * | [p2m_missing] +---->| ~0, ~0, ~0 | | 144 | * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | |
145 | * | [p2m_missing] +---->| ..., ~0 | | 145 | * | [p2m_mid_missing]+---->| ..., ~0 | |
146 | * \-----------------/ \------------/ | 146 | * \------------------/ \------------/ |
147 | * | 147 | * |
148 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) | 148 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) |
149 | */ | 149 | */ |
@@ -161,9 +161,7 @@ | |||
161 | #include <asm/xen/page.h> | 161 | #include <asm/xen/page.h> |
162 | #include <asm/xen/hypercall.h> | 162 | #include <asm/xen/hypercall.h> |
163 | #include <asm/xen/hypervisor.h> | 163 | #include <asm/xen/hypervisor.h> |
164 | #include <xen/grant_table.h> | ||
165 | 164 | ||
166 | #include "multicalls.h" | ||
167 | #include "xen-ops.h" | 165 | #include "xen-ops.h" |
168 | 166 | ||
169 | static void __init m2p_override_init(void); | 167 | static void __init m2p_override_init(void); |
@@ -194,13 +192,6 @@ RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID | |||
194 | * boundary violation will require three middle nodes. */ | 192 | * boundary violation will require three middle nodes. */ |
195 | RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); | 193 | RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); |
196 | 194 | ||
197 | /* When we populate back during bootup, the amount of pages can vary. The | ||
198 | * max we have is seen is 395979, but that does not mean it can't be more. | ||
199 | * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle | ||
200 | * it can re-use Xen provided mfn_list array, so we only need to allocate at | ||
201 | * most three P2M top nodes. */ | ||
202 | RESERVE_BRK(p2m_populated, PAGE_SIZE * 3); | ||
203 | |||
204 | static inline unsigned p2m_top_index(unsigned long pfn) | 195 | static inline unsigned p2m_top_index(unsigned long pfn) |
205 | { | 196 | { |
206 | BUG_ON(pfn >= MAX_P2M_PFN); | 197 | BUG_ON(pfn >= MAX_P2M_PFN); |
@@ -396,85 +387,7 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
396 | 387 | ||
397 | m2p_override_init(); | 388 | m2p_override_init(); |
398 | } | 389 | } |
399 | #ifdef CONFIG_X86_64 | ||
400 | #include <linux/bootmem.h> | ||
401 | unsigned long __init xen_revector_p2m_tree(void) | ||
402 | { | ||
403 | unsigned long va_start; | ||
404 | unsigned long va_end; | ||
405 | unsigned long pfn; | ||
406 | unsigned long pfn_free = 0; | ||
407 | unsigned long *mfn_list = NULL; | ||
408 | unsigned long size; | ||
409 | |||
410 | va_start = xen_start_info->mfn_list; | ||
411 | /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), | ||
412 | * so make sure it is rounded up to that */ | ||
413 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | ||
414 | va_end = va_start + size; | ||
415 | |||
416 | /* If we were revectored already, don't do it again. */ | ||
417 | if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) | ||
418 | return 0; | ||
419 | |||
420 | mfn_list = alloc_bootmem_align(size, PAGE_SIZE); | ||
421 | if (!mfn_list) { | ||
422 | pr_warn("Could not allocate space for a new P2M tree!\n"); | ||
423 | return xen_start_info->mfn_list; | ||
424 | } | ||
425 | /* Fill it out with INVALID_P2M_ENTRY value */ | ||
426 | memset(mfn_list, 0xFF, size); | ||
427 | |||
428 | for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { | ||
429 | unsigned topidx = p2m_top_index(pfn); | ||
430 | unsigned mididx; | ||
431 | unsigned long *mid_p; | ||
432 | |||
433 | if (!p2m_top[topidx]) | ||
434 | continue; | ||
435 | |||
436 | if (p2m_top[topidx] == p2m_mid_missing) | ||
437 | continue; | ||
438 | |||
439 | mididx = p2m_mid_index(pfn); | ||
440 | mid_p = p2m_top[topidx][mididx]; | ||
441 | if (!mid_p) | ||
442 | continue; | ||
443 | if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) | ||
444 | continue; | ||
445 | |||
446 | if ((unsigned long)mid_p == INVALID_P2M_ENTRY) | ||
447 | continue; | ||
448 | |||
449 | /* The old va. Rebase it on mfn_list */ | ||
450 | if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { | ||
451 | unsigned long *new; | ||
452 | 390 | ||
453 | if (pfn_free > (size / sizeof(unsigned long))) { | ||
454 | WARN(1, "Only allocated for %ld pages, but we want %ld!\n", | ||
455 | size / sizeof(unsigned long), pfn_free); | ||
456 | return 0; | ||
457 | } | ||
458 | new = &mfn_list[pfn_free]; | ||
459 | |||
460 | copy_page(new, mid_p); | ||
461 | p2m_top[topidx][mididx] = &mfn_list[pfn_free]; | ||
462 | p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); | ||
463 | |||
464 | pfn_free += P2M_PER_PAGE; | ||
465 | |||
466 | } | ||
467 | /* This should be the leafs allocated for identity from _brk. */ | ||
468 | } | ||
469 | return (unsigned long)mfn_list; | ||
470 | |||
471 | } | ||
472 | #else | ||
473 | unsigned long __init xen_revector_p2m_tree(void) | ||
474 | { | ||
475 | return 0; | ||
476 | } | ||
477 | #endif | ||
478 | unsigned long get_phys_to_machine(unsigned long pfn) | 391 | unsigned long get_phys_to_machine(unsigned long pfn) |
479 | { | 392 | { |
480 | unsigned topidx, mididx, idx; | 393 | unsigned topidx, mididx, idx; |
@@ -508,7 +421,7 @@ static void free_p2m_page(void *p) | |||
508 | free_page((unsigned long)p); | 421 | free_page((unsigned long)p); |
509 | } | 422 | } |
510 | 423 | ||
511 | /* | 424 | /* |
512 | * Fully allocate the p2m structure for a given pfn. We need to check | 425 | * Fully allocate the p2m structure for a given pfn. We need to check |
513 | * that both the top and mid levels are allocated, and make sure the | 426 | * that both the top and mid levels are allocated, and make sure the |
514 | * parallel mfn tree is kept in sync. We may race with other cpus, so | 427 | * parallel mfn tree is kept in sync. We may race with other cpus, so |
@@ -584,18 +497,16 @@ static bool alloc_p2m(unsigned long pfn) | |||
584 | return true; | 497 | return true; |
585 | } | 498 | } |
586 | 499 | ||
587 | static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary) | 500 | static bool __init __early_alloc_p2m(unsigned long pfn) |
588 | { | 501 | { |
589 | unsigned topidx, mididx, idx; | 502 | unsigned topidx, mididx, idx; |
590 | unsigned long *p2m; | ||
591 | unsigned long *mid_mfn_p; | ||
592 | 503 | ||
593 | topidx = p2m_top_index(pfn); | 504 | topidx = p2m_top_index(pfn); |
594 | mididx = p2m_mid_index(pfn); | 505 | mididx = p2m_mid_index(pfn); |
595 | idx = p2m_index(pfn); | 506 | idx = p2m_index(pfn); |
596 | 507 | ||
597 | /* Pfff.. No boundary cross-over, lets get out. */ | 508 | /* Pfff.. No boundary cross-over, lets get out. */ |
598 | if (!idx && check_boundary) | 509 | if (!idx) |
599 | return false; | 510 | return false; |
600 | 511 | ||
601 | WARN(p2m_top[topidx][mididx] == p2m_identity, | 512 | WARN(p2m_top[topidx][mididx] == p2m_identity, |
@@ -609,153 +520,24 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary | |||
609 | return false; | 520 | return false; |
610 | 521 | ||
611 | /* Boundary cross-over for the edges: */ | 522 | /* Boundary cross-over for the edges: */ |
612 | p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); | 523 | if (idx) { |
613 | 524 | unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); | |
614 | p2m_init(p2m); | 525 | unsigned long *mid_mfn_p; |
615 | |||
616 | p2m_top[topidx][mididx] = p2m; | ||
617 | |||
618 | /* For save/restore we need to MFN of the P2M saved */ | ||
619 | |||
620 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
621 | WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), | ||
622 | "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", | ||
623 | topidx, mididx); | ||
624 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
625 | |||
626 | return true; | ||
627 | } | ||
628 | |||
629 | static bool __init early_alloc_p2m(unsigned long pfn) | ||
630 | { | ||
631 | unsigned topidx = p2m_top_index(pfn); | ||
632 | unsigned long *mid_mfn_p; | ||
633 | unsigned long **mid; | ||
634 | |||
635 | mid = p2m_top[topidx]; | ||
636 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
637 | if (mid == p2m_mid_missing) { | ||
638 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
639 | |||
640 | p2m_mid_init(mid); | ||
641 | |||
642 | p2m_top[topidx] = mid; | ||
643 | |||
644 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
645 | } | ||
646 | /* And the save/restore P2M tables.. */ | ||
647 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
648 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
649 | p2m_mid_mfn_init(mid_mfn_p); | ||
650 | |||
651 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
652 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
653 | /* Note: we don't set mid_mfn_p[midix] here, | ||
654 | * look in early_alloc_p2m_middle */ | ||
655 | } | ||
656 | return true; | ||
657 | } | ||
658 | |||
659 | /* | ||
660 | * Skim over the P2M tree looking at pages that are either filled with | ||
661 | * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and | ||
662 | * replace the P2M leaf with a p2m_missing or p2m_identity. | ||
663 | * Stick the old page in the new P2M tree location. | ||
664 | */ | ||
665 | bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) | ||
666 | { | ||
667 | unsigned topidx; | ||
668 | unsigned mididx; | ||
669 | unsigned ident_pfns; | ||
670 | unsigned inv_pfns; | ||
671 | unsigned long *p2m; | ||
672 | unsigned long *mid_mfn_p; | ||
673 | unsigned idx; | ||
674 | unsigned long pfn; | ||
675 | |||
676 | /* We only look when this entails a P2M middle layer */ | ||
677 | if (p2m_index(set_pfn)) | ||
678 | return false; | ||
679 | |||
680 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { | ||
681 | topidx = p2m_top_index(pfn); | ||
682 | |||
683 | if (!p2m_top[topidx]) | ||
684 | continue; | ||
685 | |||
686 | if (p2m_top[topidx] == p2m_mid_missing) | ||
687 | continue; | ||
688 | |||
689 | mididx = p2m_mid_index(pfn); | ||
690 | p2m = p2m_top[topidx][mididx]; | ||
691 | if (!p2m) | ||
692 | continue; | ||
693 | |||
694 | if ((p2m == p2m_missing) || (p2m == p2m_identity)) | ||
695 | continue; | ||
696 | |||
697 | if ((unsigned long)p2m == INVALID_P2M_ENTRY) | ||
698 | continue; | ||
699 | |||
700 | ident_pfns = 0; | ||
701 | inv_pfns = 0; | ||
702 | for (idx = 0; idx < P2M_PER_PAGE; idx++) { | ||
703 | /* IDENTITY_PFNs are 1:1 */ | ||
704 | if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) | ||
705 | ident_pfns++; | ||
706 | else if (p2m[idx] == INVALID_P2M_ENTRY) | ||
707 | inv_pfns++; | ||
708 | else | ||
709 | break; | ||
710 | } | ||
711 | if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) | ||
712 | goto found; | ||
713 | } | ||
714 | return false; | ||
715 | found: | ||
716 | /* Found one, replace old with p2m_identity or p2m_missing */ | ||
717 | p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); | ||
718 | /* And the other for save/restore.. */ | ||
719 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
720 | /* NOTE: Even if it is a p2m_identity it should still be point to | ||
721 | * a page filled with INVALID_P2M_ENTRY entries. */ | ||
722 | mid_mfn_p[mididx] = virt_to_mfn(p2m_missing); | ||
723 | |||
724 | /* Reset where we want to stick the old page in. */ | ||
725 | topidx = p2m_top_index(set_pfn); | ||
726 | mididx = p2m_mid_index(set_pfn); | ||
727 | |||
728 | /* This shouldn't happen */ | ||
729 | if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) | ||
730 | early_alloc_p2m(set_pfn); | ||
731 | |||
732 | if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) | ||
733 | return false; | ||
734 | |||
735 | p2m_init(p2m); | ||
736 | p2m_top[topidx][mididx] = p2m; | ||
737 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
738 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
739 | 526 | ||
740 | return true; | 527 | p2m_init(p2m); |
741 | } | ||
742 | bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
743 | { | ||
744 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
745 | if (!early_alloc_p2m(pfn)) | ||
746 | return false; | ||
747 | 528 | ||
748 | if (early_can_reuse_p2m_middle(pfn, mfn)) | 529 | p2m_top[topidx][mididx] = p2m; |
749 | return __set_phys_to_machine(pfn, mfn); | ||
750 | 530 | ||
751 | if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) | 531 | /* For save/restore we need to MFN of the P2M saved */ |
752 | return false; | 532 | |
533 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
534 | WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), | ||
535 | "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", | ||
536 | topidx, mididx); | ||
537 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
753 | 538 | ||
754 | if (!__set_phys_to_machine(pfn, mfn)) | ||
755 | return false; | ||
756 | } | 539 | } |
757 | 540 | return idx != 0; | |
758 | return true; | ||
759 | } | 541 | } |
760 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, | 542 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
761 | unsigned long pfn_e) | 543 | unsigned long pfn_e) |
@@ -775,11 +557,35 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, | |||
775 | pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); | 557 | pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); |
776 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) | 558 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) |
777 | { | 559 | { |
778 | WARN_ON(!early_alloc_p2m(pfn)); | 560 | unsigned topidx = p2m_top_index(pfn); |
561 | unsigned long *mid_mfn_p; | ||
562 | unsigned long **mid; | ||
563 | |||
564 | mid = p2m_top[topidx]; | ||
565 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
566 | if (mid == p2m_mid_missing) { | ||
567 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
568 | |||
569 | p2m_mid_init(mid); | ||
570 | |||
571 | p2m_top[topidx] = mid; | ||
572 | |||
573 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
574 | } | ||
575 | /* And the save/restore P2M tables.. */ | ||
576 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
577 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
578 | p2m_mid_mfn_init(mid_mfn_p); | ||
579 | |||
580 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
581 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
582 | /* Note: we don't set mid_mfn_p[midix] here, | ||
583 | * look in __early_alloc_p2m */ | ||
584 | } | ||
779 | } | 585 | } |
780 | 586 | ||
781 | early_alloc_p2m_middle(pfn_s, true); | 587 | __early_alloc_p2m(pfn_s); |
782 | early_alloc_p2m_middle(pfn_e, true); | 588 | __early_alloc_p2m(pfn_e); |
783 | 589 | ||
784 | for (pfn = pfn_s; pfn < pfn_e; pfn++) | 590 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
785 | if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) | 591 | if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) |
@@ -870,15 +676,13 @@ static unsigned long mfn_hash(unsigned long mfn) | |||
870 | } | 676 | } |
871 | 677 | ||
872 | /* Add an MFN override for a particular page */ | 678 | /* Add an MFN override for a particular page */ |
873 | int m2p_add_override(unsigned long mfn, struct page *page, | 679 | int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) |
874 | struct gnttab_map_grant_ref *kmap_op) | ||
875 | { | 680 | { |
876 | unsigned long flags; | 681 | unsigned long flags; |
877 | unsigned long pfn; | 682 | unsigned long pfn; |
878 | unsigned long uninitialized_var(address); | 683 | unsigned long uninitialized_var(address); |
879 | unsigned level; | 684 | unsigned level; |
880 | pte_t *ptep = NULL; | 685 | pte_t *ptep = NULL; |
881 | int ret = 0; | ||
882 | 686 | ||
883 | pfn = page_to_pfn(page); | 687 | pfn = page_to_pfn(page); |
884 | if (!PageHighMem(page)) { | 688 | if (!PageHighMem(page)) { |
@@ -888,52 +692,24 @@ int m2p_add_override(unsigned long mfn, struct page *page, | |||
888 | "m2p_add_override: pfn %lx not mapped", pfn)) | 692 | "m2p_add_override: pfn %lx not mapped", pfn)) |
889 | return -EINVAL; | 693 | return -EINVAL; |
890 | } | 694 | } |
891 | WARN_ON(PagePrivate(page)); | 695 | |
892 | SetPagePrivate(page); | 696 | page->private = mfn; |
893 | set_page_private(page, mfn); | ||
894 | page->index = pfn_to_mfn(pfn); | 697 | page->index = pfn_to_mfn(pfn); |
895 | 698 | ||
896 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) | 699 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) |
897 | return -ENOMEM; | 700 | return -ENOMEM; |
898 | 701 | ||
899 | if (kmap_op != NULL) { | 702 | if (clear_pte && !PageHighMem(page)) |
900 | if (!PageHighMem(page)) { | 703 | /* Just zap old mapping for now */ |
901 | struct multicall_space mcs = | 704 | pte_clear(&init_mm, address, ptep); |
902 | xen_mc_entry(sizeof(*kmap_op)); | ||
903 | |||
904 | MULTI_grant_table_op(mcs.mc, | ||
905 | GNTTABOP_map_grant_ref, kmap_op, 1); | ||
906 | |||
907 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
908 | } | ||
909 | } | ||
910 | spin_lock_irqsave(&m2p_override_lock, flags); | 705 | spin_lock_irqsave(&m2p_override_lock, flags); |
911 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | 706 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); |
912 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 707 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
913 | 708 | ||
914 | /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in | ||
915 | * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other | ||
916 | * pfn so that the following mfn_to_pfn(mfn) calls will return the | ||
917 | * pfn from the m2p_override (the backend pfn) instead. | ||
918 | * We need to do this because the pages shared by the frontend | ||
919 | * (xen-blkfront) can be already locked (lock_page, called by | ||
920 | * do_read_cache_page); when the userspace backend tries to use them | ||
921 | * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so | ||
922 | * do_blockdev_direct_IO is going to try to lock the same pages | ||
923 | * again resulting in a deadlock. | ||
924 | * As a side effect get_user_pages_fast might not be safe on the | ||
925 | * frontend pages while they are being shared with the backend, | ||
926 | * because mfn_to_pfn (that ends up being called by GUPF) will | ||
927 | * return the backend pfn rather than the frontend pfn. */ | ||
928 | ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); | ||
929 | if (ret == 0 && get_phys_to_machine(pfn) == mfn) | ||
930 | set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); | ||
931 | |||
932 | return 0; | 709 | return 0; |
933 | } | 710 | } |
934 | EXPORT_SYMBOL_GPL(m2p_add_override); | 711 | EXPORT_SYMBOL_GPL(m2p_add_override); |
935 | int m2p_remove_override(struct page *page, | 712 | int m2p_remove_override(struct page *page, bool clear_pte) |
936 | struct gnttab_map_grant_ref *kmap_op) | ||
937 | { | 713 | { |
938 | unsigned long flags; | 714 | unsigned long flags; |
939 | unsigned long mfn; | 715 | unsigned long mfn; |
@@ -941,7 +717,6 @@ int m2p_remove_override(struct page *page, | |||
941 | unsigned long uninitialized_var(address); | 717 | unsigned long uninitialized_var(address); |
942 | unsigned level; | 718 | unsigned level; |
943 | pte_t *ptep = NULL; | 719 | pte_t *ptep = NULL; |
944 | int ret = 0; | ||
945 | 720 | ||
946 | pfn = page_to_pfn(page); | 721 | pfn = page_to_pfn(page); |
947 | mfn = get_phys_to_machine(pfn); | 722 | mfn = get_phys_to_machine(pfn); |
@@ -960,69 +735,13 @@ int m2p_remove_override(struct page *page, | |||
960 | spin_lock_irqsave(&m2p_override_lock, flags); | 735 | spin_lock_irqsave(&m2p_override_lock, flags); |
961 | list_del(&page->lru); | 736 | list_del(&page->lru); |
962 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 737 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
963 | WARN_ON(!PagePrivate(page)); | ||
964 | ClearPagePrivate(page); | ||
965 | |||
966 | set_phys_to_machine(pfn, page->index); | 738 | set_phys_to_machine(pfn, page->index); |
967 | if (kmap_op != NULL) { | ||
968 | if (!PageHighMem(page)) { | ||
969 | struct multicall_space mcs; | ||
970 | struct gnttab_unmap_grant_ref *unmap_op; | ||
971 | |||
972 | /* | ||
973 | * It might be that we queued all the m2p grant table | ||
974 | * hypercalls in a multicall, then m2p_remove_override | ||
975 | * get called before the multicall has actually been | ||
976 | * issued. In this case handle is going to -1 because | ||
977 | * it hasn't been modified yet. | ||
978 | */ | ||
979 | if (kmap_op->handle == -1) | ||
980 | xen_mc_flush(); | ||
981 | /* | ||
982 | * Now if kmap_op->handle is negative it means that the | ||
983 | * hypercall actually returned an error. | ||
984 | */ | ||
985 | if (kmap_op->handle == GNTST_general_error) { | ||
986 | printk(KERN_WARNING "m2p_remove_override: " | ||
987 | "pfn %lx mfn %lx, failed to modify kernel mappings", | ||
988 | pfn, mfn); | ||
989 | return -1; | ||
990 | } | ||
991 | |||
992 | mcs = xen_mc_entry( | ||
993 | sizeof(struct gnttab_unmap_grant_ref)); | ||
994 | unmap_op = mcs.args; | ||
995 | unmap_op->host_addr = kmap_op->host_addr; | ||
996 | unmap_op->handle = kmap_op->handle; | ||
997 | unmap_op->dev_bus_addr = 0; | ||
998 | |||
999 | MULTI_grant_table_op(mcs.mc, | ||
1000 | GNTTABOP_unmap_grant_ref, unmap_op, 1); | ||
1001 | |||
1002 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
1003 | |||
1004 | set_pte_at(&init_mm, address, ptep, | ||
1005 | pfn_pte(pfn, PAGE_KERNEL)); | ||
1006 | __flush_tlb_single(address); | ||
1007 | kmap_op->host_addr = 0; | ||
1008 | } | ||
1009 | } | ||
1010 | 739 | ||
1011 | /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present | 740 | if (clear_pte && !PageHighMem(page)) |
1012 | * somewhere in this domain, even before being added to the | 741 | set_pte_at(&init_mm, address, ptep, |
1013 | * m2p_override (see comment above in m2p_add_override). | 742 | pfn_pte(pfn, PAGE_KERNEL)); |
1014 | * If there are no other entries in the m2p_override corresponding | 743 | /* No tlb flush necessary because the caller already |
1015 | * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for | 744 | * left the pte unmapped. */ |
1016 | * the original pfn (the one shared by the frontend): the backend | ||
1017 | * cannot do any IO on this page anymore because it has been | ||
1018 | * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of | ||
1019 | * the original pfn causes mfn_to_pfn(mfn) to return the frontend | ||
1020 | * pfn again. */ | ||
1021 | mfn &= ~FOREIGN_FRAME_BIT; | ||
1022 | ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); | ||
1023 | if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && | ||
1024 | m2p_find_override(mfn) == NULL) | ||
1025 | set_phys_to_machine(pfn, mfn); | ||
1026 | 745 | ||
1027 | return 0; | 746 | return 0; |
1028 | } | 747 | } |
@@ -1039,7 +758,7 @@ struct page *m2p_find_override(unsigned long mfn) | |||
1039 | spin_lock_irqsave(&m2p_override_lock, flags); | 758 | spin_lock_irqsave(&m2p_override_lock, flags); |
1040 | 759 | ||
1041 | list_for_each_entry(p, bucket, lru) { | 760 | list_for_each_entry(p, bucket, lru) { |
1042 | if (page_private(p) == mfn) { | 761 | if (p->private == mfn) { |
1043 | ret = p; | 762 | ret = p; |
1044 | break; | 763 | break; |
1045 | } | 764 | } |
@@ -1063,21 +782,17 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | |||
1063 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | 782 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); |
1064 | 783 | ||
1065 | #ifdef CONFIG_XEN_DEBUG_FS | 784 | #ifdef CONFIG_XEN_DEBUG_FS |
1066 | #include <linux/debugfs.h> | 785 | |
1067 | #include "debugfs.h" | 786 | int p2m_dump_show(struct seq_file *m, void *v) |
1068 | static int p2m_dump_show(struct seq_file *m, void *v) | ||
1069 | { | 787 | { |
1070 | static const char * const level_name[] = { "top", "middle", | 788 | static const char * const level_name[] = { "top", "middle", |
1071 | "entry", "abnormal", "error"}; | 789 | "entry", "abnormal" }; |
790 | static const char * const type_name[] = { "identity", "missing", | ||
791 | "pfn", "abnormal"}; | ||
1072 | #define TYPE_IDENTITY 0 | 792 | #define TYPE_IDENTITY 0 |
1073 | #define TYPE_MISSING 1 | 793 | #define TYPE_MISSING 1 |
1074 | #define TYPE_PFN 2 | 794 | #define TYPE_PFN 2 |
1075 | #define TYPE_UNKNOWN 3 | 795 | #define TYPE_UNKNOWN 3 |
1076 | static const char * const type_name[] = { | ||
1077 | [TYPE_IDENTITY] = "identity", | ||
1078 | [TYPE_MISSING] = "missing", | ||
1079 | [TYPE_PFN] = "pfn", | ||
1080 | [TYPE_UNKNOWN] = "abnormal"}; | ||
1081 | unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; | 796 | unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; |
1082 | unsigned int uninitialized_var(prev_level); | 797 | unsigned int uninitialized_var(prev_level); |
1083 | unsigned int uninitialized_var(prev_type); | 798 | unsigned int uninitialized_var(prev_type); |
@@ -1141,32 +856,4 @@ static int p2m_dump_show(struct seq_file *m, void *v) | |||
1141 | #undef TYPE_PFN | 856 | #undef TYPE_PFN |
1142 | #undef TYPE_UNKNOWN | 857 | #undef TYPE_UNKNOWN |
1143 | } | 858 | } |
1144 | 859 | #endif | |
1145 | static int p2m_dump_open(struct inode *inode, struct file *filp) | ||
1146 | { | ||
1147 | return single_open(filp, p2m_dump_show, NULL); | ||
1148 | } | ||
1149 | |||
1150 | static const struct file_operations p2m_dump_fops = { | ||
1151 | .open = p2m_dump_open, | ||
1152 | .read = seq_read, | ||
1153 | .llseek = seq_lseek, | ||
1154 | .release = single_release, | ||
1155 | }; | ||
1156 | |||
1157 | static struct dentry *d_mmu_debug; | ||
1158 | |||
1159 | static int __init xen_p2m_debugfs(void) | ||
1160 | { | ||
1161 | struct dentry *d_xen = xen_init_debugfs(); | ||
1162 | |||
1163 | if (d_xen == NULL) | ||
1164 | return -ENOMEM; | ||
1165 | |||
1166 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | ||
1167 | |||
1168 | debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); | ||
1169 | return 0; | ||
1170 | } | ||
1171 | fs_initcall(xen_p2m_debugfs); | ||
1172 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 969570491c3..b480d4207a4 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c | |||
@@ -8,20 +8,12 @@ | |||
8 | #include <xen/xen.h> | 8 | #include <xen/xen.h> |
9 | #include <asm/iommu_table.h> | 9 | #include <asm/iommu_table.h> |
10 | 10 | ||
11 | |||
12 | #include <asm/xen/swiotlb-xen.h> | ||
13 | #ifdef CONFIG_X86_64 | ||
14 | #include <asm/iommu.h> | ||
15 | #include <asm/dma.h> | ||
16 | #endif | ||
17 | #include <linux/export.h> | ||
18 | |||
19 | int xen_swiotlb __read_mostly; | 11 | int xen_swiotlb __read_mostly; |
20 | 12 | ||
21 | static struct dma_map_ops xen_swiotlb_dma_ops = { | 13 | static struct dma_map_ops xen_swiotlb_dma_ops = { |
22 | .mapping_error = xen_swiotlb_dma_mapping_error, | 14 | .mapping_error = xen_swiotlb_dma_mapping_error, |
23 | .alloc = xen_swiotlb_alloc_coherent, | 15 | .alloc_coherent = xen_swiotlb_alloc_coherent, |
24 | .free = xen_swiotlb_free_coherent, | 16 | .free_coherent = xen_swiotlb_free_coherent, |
25 | .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, | 17 | .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, |
26 | .sync_single_for_device = xen_swiotlb_sync_single_for_device, | 18 | .sync_single_for_device = xen_swiotlb_sync_single_for_device, |
27 | .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, | 19 | .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, |
@@ -42,64 +34,34 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { | |||
42 | int __init pci_xen_swiotlb_detect(void) | 34 | int __init pci_xen_swiotlb_detect(void) |
43 | { | 35 | { |
44 | 36 | ||
45 | if (!xen_pv_domain()) | ||
46 | return 0; | ||
47 | |||
48 | /* If running as PV guest, either iommu=soft, or swiotlb=force will | 37 | /* If running as PV guest, either iommu=soft, or swiotlb=force will |
49 | * activate this IOMMU. If running as PV privileged, activate it | 38 | * activate this IOMMU. If running as PV privileged, activate it |
50 | * irregardless. | 39 | * irregardless. |
51 | */ | 40 | */ |
52 | if ((xen_initial_domain() || swiotlb || swiotlb_force)) | 41 | if ((xen_initial_domain() || swiotlb || swiotlb_force) && |
42 | (xen_pv_domain())) | ||
53 | xen_swiotlb = 1; | 43 | xen_swiotlb = 1; |
54 | 44 | ||
55 | /* If we are running under Xen, we MUST disable the native SWIOTLB. | 45 | /* If we are running under Xen, we MUST disable the native SWIOTLB. |
56 | * Don't worry about swiotlb_force flag activating the native, as | 46 | * Don't worry about swiotlb_force flag activating the native, as |
57 | * the 'swiotlb' flag is the only one turning it on. */ | 47 | * the 'swiotlb' flag is the only one turning it on. */ |
58 | swiotlb = 0; | 48 | if (xen_pv_domain()) |
49 | swiotlb = 0; | ||
59 | 50 | ||
60 | #ifdef CONFIG_X86_64 | ||
61 | /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0 | ||
62 | * (so no iommu=X command line over-writes). | ||
63 | * Considering that PV guests do not want the *native SWIOTLB* but | ||
64 | * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here. | ||
65 | */ | ||
66 | if (max_pfn > MAX_DMA32_PFN) | ||
67 | no_iommu = 1; | ||
68 | #endif | ||
69 | return xen_swiotlb; | 51 | return xen_swiotlb; |
70 | } | 52 | } |
71 | 53 | ||
72 | void __init pci_xen_swiotlb_init(void) | 54 | void __init pci_xen_swiotlb_init(void) |
73 | { | 55 | { |
74 | if (xen_swiotlb) { | 56 | if (xen_swiotlb) { |
75 | xen_swiotlb_init(1, true /* early */); | 57 | xen_swiotlb_init(1); |
76 | dma_ops = &xen_swiotlb_dma_ops; | 58 | dma_ops = &xen_swiotlb_dma_ops; |
77 | 59 | ||
78 | /* Make sure ACS will be enabled */ | 60 | /* Make sure ACS will be enabled */ |
79 | pci_request_acs(); | 61 | pci_request_acs(); |
80 | } | 62 | } |
81 | } | 63 | } |
82 | |||
83 | int pci_xen_swiotlb_init_late(void) | ||
84 | { | ||
85 | int rc; | ||
86 | |||
87 | if (xen_swiotlb) | ||
88 | return 0; | ||
89 | |||
90 | rc = xen_swiotlb_init(1, false /* late */); | ||
91 | if (rc) | ||
92 | return rc; | ||
93 | |||
94 | dma_ops = &xen_swiotlb_dma_ops; | ||
95 | /* Make sure ACS will be enabled */ | ||
96 | pci_request_acs(); | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); | ||
101 | |||
102 | IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, | 64 | IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, |
103 | NULL, | 65 | 0, |
104 | pci_xen_swiotlb_init, | 66 | pci_xen_swiotlb_init, |
105 | NULL); | 67 | 0); |
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 0a7852483ff..ffcf2615640 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <linux/module.h> | 24 | #include <linux/module.h> |
25 | 25 | ||
26 | #include <xen/platform_pci.h> | 26 | #include <xen/platform_pci.h> |
27 | #include "xen-ops.h" | ||
28 | 27 | ||
29 | #define XEN_PLATFORM_ERR_MAGIC -1 | 28 | #define XEN_PLATFORM_ERR_MAGIC -1 |
30 | #define XEN_PLATFORM_ERR_PROTOCOL -2 | 29 | #define XEN_PLATFORM_ERR_PROTOCOL -2 |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8971a26d21a..e1913024687 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -10,14 +10,12 @@ | |||
10 | #include <linux/pm.h> | 10 | #include <linux/pm.h> |
11 | #include <linux/memblock.h> | 11 | #include <linux/memblock.h> |
12 | #include <linux/cpuidle.h> | 12 | #include <linux/cpuidle.h> |
13 | #include <linux/cpufreq.h> | ||
14 | 13 | ||
15 | #include <asm/elf.h> | 14 | #include <asm/elf.h> |
16 | #include <asm/vdso.h> | 15 | #include <asm/vdso.h> |
17 | #include <asm/e820.h> | 16 | #include <asm/e820.h> |
18 | #include <asm/setup.h> | 17 | #include <asm/setup.h> |
19 | #include <asm/acpi.h> | 18 | #include <asm/acpi.h> |
20 | #include <asm/numa.h> | ||
21 | #include <asm/xen/hypervisor.h> | 19 | #include <asm/xen/hypervisor.h> |
22 | #include <asm/xen/hypercall.h> | 20 | #include <asm/xen/hypercall.h> |
23 | 21 | ||
@@ -27,6 +25,7 @@ | |||
27 | #include <xen/interface/memory.h> | 25 | #include <xen/interface/memory.h> |
28 | #include <xen/interface/physdev.h> | 26 | #include <xen/interface/physdev.h> |
29 | #include <xen/features.h> | 27 | #include <xen/features.h> |
28 | |||
30 | #include "xen-ops.h" | 29 | #include "xen-ops.h" |
31 | #include "vdso.h" | 30 | #include "vdso.h" |
32 | 31 | ||
@@ -38,10 +37,7 @@ extern void xen_syscall_target(void); | |||
38 | extern void xen_syscall32_target(void); | 37 | extern void xen_syscall32_target(void); |
39 | 38 | ||
40 | /* Amount of extra memory space we add to the e820 ranges */ | 39 | /* Amount of extra memory space we add to the e820 ranges */ |
41 | struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; | 40 | phys_addr_t xen_extra_mem_start, xen_extra_mem_size; |
42 | |||
43 | /* Number of pages released from the initial allocation. */ | ||
44 | unsigned long xen_released_pages; | ||
45 | 41 | ||
46 | /* | 42 | /* |
47 | * The maximum amount of extra memory compared to the base size. The | 43 | * The maximum amount of extra memory compared to the base size. The |
@@ -55,225 +51,138 @@ unsigned long xen_released_pages; | |||
55 | */ | 51 | */ |
56 | #define EXTRA_MEM_RATIO (10) | 52 | #define EXTRA_MEM_RATIO (10) |
57 | 53 | ||
58 | static void __init xen_add_extra_mem(u64 start, u64 size) | 54 | static void __init xen_add_extra_mem(unsigned long pages) |
59 | { | 55 | { |
60 | unsigned long pfn; | 56 | unsigned long pfn; |
61 | int i; | ||
62 | 57 | ||
63 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { | 58 | u64 size = (u64)pages * PAGE_SIZE; |
64 | /* Add new region. */ | 59 | u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; |
65 | if (xen_extra_mem[i].size == 0) { | ||
66 | xen_extra_mem[i].start = start; | ||
67 | xen_extra_mem[i].size = size; | ||
68 | break; | ||
69 | } | ||
70 | /* Append to existing region. */ | ||
71 | if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) { | ||
72 | xen_extra_mem[i].size += size; | ||
73 | break; | ||
74 | } | ||
75 | } | ||
76 | if (i == XEN_EXTRA_MEM_MAX_REGIONS) | ||
77 | printk(KERN_WARNING "Warning: not enough extra memory regions\n"); | ||
78 | 60 | ||
79 | memblock_reserve(start, size); | 61 | if (!pages) |
62 | return; | ||
80 | 63 | ||
81 | xen_max_p2m_pfn = PFN_DOWN(start + size); | 64 | e820_add_region(extra_start, size, E820_RAM); |
82 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | 65 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
83 | unsigned long mfn = pfn_to_mfn(pfn); | ||
84 | 66 | ||
85 | if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) | 67 | memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA"); |
86 | continue; | ||
87 | WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", | ||
88 | pfn, mfn); | ||
89 | 68 | ||
69 | xen_extra_mem_size += size; | ||
70 | |||
71 | xen_max_p2m_pfn = PFN_DOWN(extra_start + size); | ||
72 | |||
73 | for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) | ||
90 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 74 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
91 | } | ||
92 | } | 75 | } |
93 | 76 | ||
94 | static unsigned long __init xen_do_chunk(unsigned long start, | 77 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, |
95 | unsigned long end, bool release) | 78 | phys_addr_t end_addr) |
96 | { | 79 | { |
97 | struct xen_memory_reservation reservation = { | 80 | struct xen_memory_reservation reservation = { |
98 | .address_bits = 0, | 81 | .address_bits = 0, |
99 | .extent_order = 0, | 82 | .extent_order = 0, |
100 | .domid = DOMID_SELF | 83 | .domid = DOMID_SELF |
101 | }; | 84 | }; |
85 | unsigned long start, end; | ||
102 | unsigned long len = 0; | 86 | unsigned long len = 0; |
103 | unsigned long pfn; | 87 | unsigned long pfn; |
104 | int ret; | 88 | int ret; |
105 | 89 | ||
106 | for (pfn = start; pfn < end; pfn++) { | 90 | start = PFN_UP(start_addr); |
107 | unsigned long frame; | 91 | end = PFN_DOWN(end_addr); |
92 | |||
93 | if (end <= start) | ||
94 | return 0; | ||
95 | |||
96 | for(pfn = start; pfn < end; pfn++) { | ||
108 | unsigned long mfn = pfn_to_mfn(pfn); | 97 | unsigned long mfn = pfn_to_mfn(pfn); |
109 | 98 | ||
110 | if (release) { | 99 | /* Make sure pfn exists to start with */ |
111 | /* Make sure pfn exists to start with */ | 100 | if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) |
112 | if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) | 101 | continue; |
113 | continue; | 102 | |
114 | frame = mfn; | 103 | set_xen_guest_handle(reservation.extent_start, &mfn); |
115 | } else { | ||
116 | if (mfn != INVALID_P2M_ENTRY) | ||
117 | continue; | ||
118 | frame = pfn; | ||
119 | } | ||
120 | set_xen_guest_handle(reservation.extent_start, &frame); | ||
121 | reservation.nr_extents = 1; | 104 | reservation.nr_extents = 1; |
122 | 105 | ||
123 | ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, | 106 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, |
124 | &reservation); | 107 | &reservation); |
125 | WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", | 108 | WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); |
126 | release ? "release" : "populate", pfn, ret); | ||
127 | |||
128 | if (ret == 1) { | 109 | if (ret == 1) { |
129 | if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { | 110 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
130 | if (release) | ||
131 | break; | ||
132 | set_xen_guest_handle(reservation.extent_start, &frame); | ||
133 | reservation.nr_extents = 1; | ||
134 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
135 | &reservation); | ||
136 | break; | ||
137 | } | ||
138 | len++; | 111 | len++; |
139 | } else | 112 | } |
140 | break; | ||
141 | } | 113 | } |
142 | if (len) | 114 | printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", |
143 | printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", | 115 | start, end, len); |
144 | release ? "Freeing" : "Populating", | ||
145 | start, end, len, | ||
146 | release ? "freed" : "added"); | ||
147 | 116 | ||
148 | return len; | 117 | return len; |
149 | } | 118 | } |
150 | 119 | ||
151 | static unsigned long __init xen_release_chunk(unsigned long start, | 120 | static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, |
152 | unsigned long end) | 121 | const struct e820map *e820) |
153 | { | 122 | { |
154 | return xen_do_chunk(start, end, true); | 123 | phys_addr_t max_addr = PFN_PHYS(max_pfn); |
155 | } | 124 | phys_addr_t last_end = ISA_END_ADDRESS; |
156 | 125 | unsigned long released = 0; | |
157 | static unsigned long __init xen_populate_chunk( | 126 | int i; |
158 | const struct e820entry *list, size_t map_size, | ||
159 | unsigned long max_pfn, unsigned long *last_pfn, | ||
160 | unsigned long credits_left) | ||
161 | { | ||
162 | const struct e820entry *entry; | ||
163 | unsigned int i; | ||
164 | unsigned long done = 0; | ||
165 | unsigned long dest_pfn; | ||
166 | |||
167 | for (i = 0, entry = list; i < map_size; i++, entry++) { | ||
168 | unsigned long s_pfn; | ||
169 | unsigned long e_pfn; | ||
170 | unsigned long pfns; | ||
171 | long capacity; | ||
172 | |||
173 | if (credits_left <= 0) | ||
174 | break; | ||
175 | |||
176 | if (entry->type != E820_RAM) | ||
177 | continue; | ||
178 | |||
179 | e_pfn = PFN_DOWN(entry->addr + entry->size); | ||
180 | |||
181 | /* We only care about E820 after the xen_start_info->nr_pages */ | ||
182 | if (e_pfn <= max_pfn) | ||
183 | continue; | ||
184 | |||
185 | s_pfn = PFN_UP(entry->addr); | ||
186 | /* If the E820 falls within the nr_pages, we want to start | ||
187 | * at the nr_pages PFN. | ||
188 | * If that would mean going past the E820 entry, skip it | ||
189 | */ | ||
190 | if (s_pfn <= max_pfn) { | ||
191 | capacity = e_pfn - max_pfn; | ||
192 | dest_pfn = max_pfn; | ||
193 | } else { | ||
194 | capacity = e_pfn - s_pfn; | ||
195 | dest_pfn = s_pfn; | ||
196 | } | ||
197 | 127 | ||
198 | if (credits_left < capacity) | 128 | /* Free any unused memory above the low 1Mbyte. */ |
199 | capacity = credits_left; | 129 | for (i = 0; i < e820->nr_map && last_end < max_addr; i++) { |
130 | phys_addr_t end = e820->map[i].addr; | ||
131 | end = min(max_addr, end); | ||
200 | 132 | ||
201 | pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); | 133 | if (last_end < end) |
202 | done += pfns; | 134 | released += xen_release_chunk(last_end, end); |
203 | *last_pfn = (dest_pfn + pfns); | 135 | last_end = max(last_end, e820->map[i].addr + e820->map[i].size); |
204 | if (pfns < capacity) | ||
205 | break; | ||
206 | credits_left -= pfns; | ||
207 | } | 136 | } |
208 | return done; | ||
209 | } | ||
210 | 137 | ||
211 | static void __init xen_set_identity_and_release_chunk( | 138 | if (last_end < max_addr) |
212 | unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, | 139 | released += xen_release_chunk(last_end, max_addr); |
213 | unsigned long *released, unsigned long *identity) | ||
214 | { | ||
215 | unsigned long pfn; | ||
216 | 140 | ||
217 | /* | 141 | printk(KERN_INFO "released %lu pages of unused memory\n", released); |
218 | * If the PFNs are currently mapped, the VA mapping also needs | 142 | return released; |
219 | * to be updated to be 1:1. | ||
220 | */ | ||
221 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) | ||
222 | (void)HYPERVISOR_update_va_mapping( | ||
223 | (unsigned long)__va(pfn << PAGE_SHIFT), | ||
224 | mfn_pte(pfn, PAGE_KERNEL_IO), 0); | ||
225 | |||
226 | if (start_pfn < nr_pages) | ||
227 | *released += xen_release_chunk( | ||
228 | start_pfn, min(end_pfn, nr_pages)); | ||
229 | |||
230 | *identity += set_phys_range_identity(start_pfn, end_pfn); | ||
231 | } | 143 | } |
232 | 144 | ||
233 | static unsigned long __init xen_set_identity_and_release( | 145 | static unsigned long __init xen_set_identity(const struct e820entry *list, |
234 | const struct e820entry *list, size_t map_size, unsigned long nr_pages) | 146 | ssize_t map_size) |
235 | { | 147 | { |
236 | phys_addr_t start = 0; | 148 | phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; |
237 | unsigned long released = 0; | 149 | phys_addr_t start_pci = last; |
238 | unsigned long identity = 0; | ||
239 | const struct e820entry *entry; | 150 | const struct e820entry *entry; |
151 | unsigned long identity = 0; | ||
240 | int i; | 152 | int i; |
241 | 153 | ||
242 | /* | ||
243 | * Combine non-RAM regions and gaps until a RAM region (or the | ||
244 | * end of the map) is reached, then set the 1:1 map and | ||
245 | * release the pages (if available) in those non-RAM regions. | ||
246 | * | ||
247 | * The combined non-RAM regions are rounded to a whole number | ||
248 | * of pages so any partial pages are accessible via the 1:1 | ||
249 | * mapping. This is needed for some BIOSes that put (for | ||
250 | * example) the DMI tables in a reserved region that begins on | ||
251 | * a non-page boundary. | ||
252 | */ | ||
253 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 154 | for (i = 0, entry = list; i < map_size; i++, entry++) { |
254 | phys_addr_t end = entry->addr + entry->size; | 155 | phys_addr_t start = entry->addr; |
255 | if (entry->type == E820_RAM || i == map_size - 1) { | 156 | phys_addr_t end = start + entry->size; |
256 | unsigned long start_pfn = PFN_DOWN(start); | ||
257 | unsigned long end_pfn = PFN_UP(end); | ||
258 | 157 | ||
259 | if (entry->type == E820_RAM) | 158 | if (start < last) |
260 | end_pfn = PFN_UP(entry->addr); | 159 | start = last; |
261 | 160 | ||
262 | if (start_pfn < end_pfn) | 161 | if (end <= start) |
263 | xen_set_identity_and_release_chunk( | 162 | continue; |
264 | start_pfn, end_pfn, nr_pages, | ||
265 | &released, &identity); | ||
266 | 163 | ||
267 | start = end; | 164 | /* Skip over the 1MB region. */ |
268 | } | 165 | if (last > end) |
269 | } | 166 | continue; |
270 | 167 | ||
271 | if (released) | 168 | if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) { |
272 | printk(KERN_INFO "Released %lu pages of unused memory\n", released); | 169 | if (start > start_pci) |
273 | if (identity) | 170 | identity += set_phys_range_identity( |
274 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); | 171 | PFN_UP(start_pci), PFN_DOWN(start)); |
275 | 172 | ||
276 | return released; | 173 | /* Without saving 'last' we would gooble RAM too |
174 | * at the end of the loop. */ | ||
175 | last = end; | ||
176 | start_pci = end; | ||
177 | continue; | ||
178 | } | ||
179 | start_pci = min(start, start_pci); | ||
180 | last = end; | ||
181 | } | ||
182 | if (last > start_pci) | ||
183 | identity += set_phys_range_identity( | ||
184 | PFN_UP(start_pci), PFN_DOWN(last)); | ||
185 | return identity; | ||
277 | } | 186 | } |
278 | 187 | ||
279 | static unsigned long __init xen_get_max_pages(void) | 188 | static unsigned long __init xen_get_max_pages(void) |
@@ -300,34 +209,21 @@ static unsigned long __init xen_get_max_pages(void) | |||
300 | return min(max_pages, MAX_DOMAIN_PAGES); | 209 | return min(max_pages, MAX_DOMAIN_PAGES); |
301 | } | 210 | } |
302 | 211 | ||
303 | static void xen_align_and_add_e820_region(u64 start, u64 size, int type) | ||
304 | { | ||
305 | u64 end = start + size; | ||
306 | |||
307 | /* Align RAM regions to page boundaries. */ | ||
308 | if (type == E820_RAM) { | ||
309 | start = PAGE_ALIGN(start); | ||
310 | end &= ~((u64)PAGE_SIZE - 1); | ||
311 | } | ||
312 | |||
313 | e820_add_region(start, end - start, type); | ||
314 | } | ||
315 | |||
316 | /** | 212 | /** |
317 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 213 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
318 | **/ | 214 | **/ |
319 | char * __init xen_memory_setup(void) | 215 | char * __init xen_memory_setup(void) |
320 | { | 216 | { |
321 | static struct e820entry map[E820MAX] __initdata; | 217 | static struct e820entry map[E820MAX] __initdata; |
218 | static struct e820entry map_raw[E820MAX] __initdata; | ||
322 | 219 | ||
323 | unsigned long max_pfn = xen_start_info->nr_pages; | 220 | unsigned long max_pfn = xen_start_info->nr_pages; |
324 | unsigned long long mem_end; | 221 | unsigned long long mem_end; |
325 | int rc; | 222 | int rc; |
326 | struct xen_memory_map memmap; | 223 | struct xen_memory_map memmap; |
327 | unsigned long max_pages; | ||
328 | unsigned long last_pfn = 0; | ||
329 | unsigned long extra_pages = 0; | 224 | unsigned long extra_pages = 0; |
330 | unsigned long populated; | 225 | unsigned long extra_limit; |
226 | unsigned long identity_pages = 0; | ||
331 | int i; | 227 | int i; |
332 | int op; | 228 | int op; |
333 | 229 | ||
@@ -353,34 +249,84 @@ char * __init xen_memory_setup(void) | |||
353 | } | 249 | } |
354 | BUG_ON(rc); | 250 | BUG_ON(rc); |
355 | 251 | ||
356 | /* Make sure the Xen-supplied memory map is well-ordered. */ | 252 | memcpy(map_raw, map, sizeof(map)); |
357 | sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); | 253 | e820.nr_map = 0; |
254 | xen_extra_mem_start = mem_end; | ||
255 | for (i = 0; i < memmap.nr_entries; i++) { | ||
256 | unsigned long long end; | ||
257 | |||
258 | /* Guard against non-page aligned E820 entries. */ | ||
259 | if (map[i].type == E820_RAM) | ||
260 | map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; | ||
261 | |||
262 | end = map[i].addr + map[i].size; | ||
263 | if (map[i].type == E820_RAM && end > mem_end) { | ||
264 | /* RAM off the end - may be partially included */ | ||
265 | u64 delta = min(map[i].size, end - mem_end); | ||
266 | |||
267 | map[i].size -= delta; | ||
268 | end -= delta; | ||
269 | |||
270 | extra_pages += PFN_DOWN(delta); | ||
271 | /* | ||
272 | * Set RAM below 4GB that is not for us to be unusable. | ||
273 | * This prevents "System RAM" address space from being | ||
274 | * used as potential resource for I/O address (happens | ||
275 | * when 'allocate_resource' is called). | ||
276 | */ | ||
277 | if (delta && | ||
278 | (xen_initial_domain() && end < 0x100000000ULL)) | ||
279 | e820_add_region(end, delta, E820_UNUSABLE); | ||
280 | } | ||
281 | |||
282 | if (map[i].size > 0 && end > xen_extra_mem_start) | ||
283 | xen_extra_mem_start = end; | ||
358 | 284 | ||
359 | max_pages = xen_get_max_pages(); | 285 | /* Add region if any remains */ |
360 | if (max_pages > max_pfn) | 286 | if (map[i].size > 0) |
361 | extra_pages += max_pages - max_pfn; | 287 | e820_add_region(map[i].addr, map[i].size, map[i].type); |
288 | } | ||
289 | /* Align the balloon area so that max_low_pfn does not get set | ||
290 | * to be at the _end_ of the PCI gap at the far end (fee01000). | ||
291 | * Note that xen_extra_mem_start gets set in the loop above to be | ||
292 | * past the last E820 region. */ | ||
293 | if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32))) | ||
294 | xen_extra_mem_start = (1ULL<<32); | ||
362 | 295 | ||
363 | /* | 296 | /* |
364 | * Set P2M for all non-RAM pages and E820 gaps to be identity | 297 | * In domU, the ISA region is normal, usable memory, but we |
365 | * type PFNs. Any RAM pages that would be made inaccesible by | 298 | * reserve ISA memory anyway because too many things poke |
366 | * this are first released. | 299 | * about in there. |
300 | * | ||
301 | * In Dom0, the host E820 information can leave gaps in the | ||
302 | * ISA range, which would cause us to release those pages. To | ||
303 | * avoid this, we unconditionally reserve them here. | ||
367 | */ | 304 | */ |
368 | xen_released_pages = xen_set_identity_and_release( | 305 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, |
369 | map, memmap.nr_entries, max_pfn); | 306 | E820_RESERVED); |
370 | 307 | ||
371 | /* | 308 | /* |
372 | * Populate back the non-RAM pages and E820 gaps that had been | 309 | * Reserve Xen bits: |
373 | * released. */ | 310 | * - mfn_list |
374 | populated = xen_populate_chunk(map, memmap.nr_entries, | 311 | * - xen_start_info |
375 | max_pfn, &last_pfn, xen_released_pages); | 312 | * See comment above "struct start_info" in <xen/interface/xen.h> |
313 | */ | ||
314 | memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), | ||
315 | __pa(xen_start_info->pt_base), | ||
316 | "XEN START INFO"); | ||
376 | 317 | ||
377 | xen_released_pages -= populated; | 318 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
378 | extra_pages += xen_released_pages; | ||
379 | 319 | ||
380 | if (last_pfn > max_pfn) { | 320 | extra_limit = xen_get_max_pages(); |
381 | max_pfn = min(MAX_DOMAIN_PAGES, last_pfn); | 321 | if (max_pfn + extra_pages > extra_limit) { |
382 | mem_end = PFN_PHYS(max_pfn); | 322 | if (extra_limit > max_pfn) |
323 | extra_pages = extra_limit - max_pfn; | ||
324 | else | ||
325 | extra_pages = 0; | ||
383 | } | 326 | } |
327 | |||
328 | extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820); | ||
329 | |||
384 | /* | 330 | /* |
385 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO | 331 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO |
386 | * factor the base size. On non-highmem systems, the base | 332 | * factor the base size. On non-highmem systems, the base |
@@ -392,70 +338,23 @@ char * __init xen_memory_setup(void) | |||
392 | * the initial memory is also very large with respect to | 338 | * the initial memory is also very large with respect to |
393 | * lowmem, but we won't try to deal with that here. | 339 | * lowmem, but we won't try to deal with that here. |
394 | */ | 340 | */ |
395 | extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), | 341 | extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), |
396 | extra_pages); | 342 | max_pfn + extra_pages); |
397 | i = 0; | ||
398 | while (i < memmap.nr_entries) { | ||
399 | u64 addr = map[i].addr; | ||
400 | u64 size = map[i].size; | ||
401 | u32 type = map[i].type; | ||
402 | |||
403 | if (type == E820_RAM) { | ||
404 | if (addr < mem_end) { | ||
405 | size = min(size, mem_end - addr); | ||
406 | } else if (extra_pages) { | ||
407 | size = min(size, (u64)extra_pages * PAGE_SIZE); | ||
408 | extra_pages -= size / PAGE_SIZE; | ||
409 | xen_add_extra_mem(addr, size); | ||
410 | } else | ||
411 | type = E820_UNUSABLE; | ||
412 | } | ||
413 | 343 | ||
414 | xen_align_and_add_e820_region(addr, size, type); | 344 | if (extra_limit >= max_pfn) |
345 | extra_pages = extra_limit - max_pfn; | ||
346 | else | ||
347 | extra_pages = 0; | ||
415 | 348 | ||
416 | map[i].addr += size; | 349 | xen_add_extra_mem(extra_pages); |
417 | map[i].size -= size; | ||
418 | if (map[i].size == 0) | ||
419 | i++; | ||
420 | } | ||
421 | 350 | ||
422 | /* | 351 | /* |
423 | * In domU, the ISA region is normal, usable memory, but we | 352 | * Set P2M for all non-RAM pages and E820 gaps to be identity |
424 | * reserve ISA memory anyway because too many things poke | 353 | * type PFNs. We supply it with the non-sanitized version |
425 | * about in there. | 354 | * of the E820. |
426 | */ | ||
427 | e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, | ||
428 | E820_RESERVED); | ||
429 | |||
430 | /* | ||
431 | * Reserve Xen bits: | ||
432 | * - mfn_list | ||
433 | * - xen_start_info | ||
434 | * See comment above "struct start_info" in <xen/interface/xen.h> | ||
435 | * We tried to make the the memblock_reserve more selective so | ||
436 | * that it would be clear what region is reserved. Sadly we ran | ||
437 | * in the problem wherein on a 64-bit hypervisor with a 32-bit | ||
438 | * initial domain, the pt_base has the cr3 value which is not | ||
439 | * neccessarily where the pagetable starts! As Jan put it: " | ||
440 | * Actually, the adjustment turns out to be correct: The page | ||
441 | * tables for a 32-on-64 dom0 get allocated in the order "first L1", | ||
442 | * "first L2", "first L3", so the offset to the page table base is | ||
443 | * indeed 2. When reading xen/include/public/xen.h's comment | ||
444 | * very strictly, this is not a violation (since there nothing is said | ||
445 | * that the first thing in the page table space is pointed to by | ||
446 | * pt_base; I admit that this seems to be implied though, namely | ||
447 | * do I think that it is implied that the page table space is the | ||
448 | * range [pt_base, pt_base + nt_pt_frames), whereas that | ||
449 | * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), | ||
450 | * which - without a priori knowledge - the kernel would have | ||
451 | * difficulty to figure out)." - so lets just fall back to the | ||
452 | * easy way and reserve the whole region. | ||
453 | */ | 355 | */ |
454 | memblock_reserve(__pa(xen_start_info->mfn_list), | 356 | identity_pages = xen_set_identity(map_raw, memmap.nr_entries); |
455 | xen_start_info->pt_base - xen_start_info->mfn_list); | 357 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages); |
456 | |||
457 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
458 | |||
459 | return "Xen"; | 358 | return "Xen"; |
460 | } | 359 | } |
461 | 360 | ||
@@ -560,10 +459,7 @@ void __init xen_arch_setup(void) | |||
560 | boot_cpu_data.hlt_works_ok = 1; | 459 | boot_cpu_data.hlt_works_ok = 1; |
561 | #endif | 460 | #endif |
562 | disable_cpuidle(); | 461 | disable_cpuidle(); |
563 | disable_cpufreq(); | 462 | boot_option_idle_override = IDLE_HALT; |
564 | WARN_ON(set_pm_idle_to_default()); | 463 | WARN_ON(set_pm_idle_to_default()); |
565 | fiddle_vdso(); | 464 | fiddle_vdso(); |
566 | #ifdef CONFIG_NUMA | ||
567 | numa_off = 1; | ||
568 | #endif | ||
569 | } | 465 | } |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4f7d2599b48..041d4fe9dfe 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/err.h> | 16 | #include <linux/err.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/irq_work.h> | ||
20 | 19 | ||
21 | #include <asm/paravirt.h> | 20 | #include <asm/paravirt.h> |
22 | #include <asm/desc.h> | 21 | #include <asm/desc.h> |
@@ -42,12 +41,10 @@ cpumask_var_t xen_cpu_initialized_map; | |||
42 | static DEFINE_PER_CPU(int, xen_resched_irq); | 41 | static DEFINE_PER_CPU(int, xen_resched_irq); |
43 | static DEFINE_PER_CPU(int, xen_callfunc_irq); | 42 | static DEFINE_PER_CPU(int, xen_callfunc_irq); |
44 | static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); | 43 | static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); |
45 | static DEFINE_PER_CPU(int, xen_irq_work); | ||
46 | static DEFINE_PER_CPU(int, xen_debug_irq) = -1; | 44 | static DEFINE_PER_CPU(int, xen_debug_irq) = -1; |
47 | 45 | ||
48 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | 46 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); |
49 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); | 47 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); |
50 | static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); | ||
51 | 48 | ||
52 | /* | 49 | /* |
53 | * Reschedule call back. | 50 | * Reschedule call back. |
@@ -62,7 +59,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | |||
62 | 59 | ||
63 | static void __cpuinit cpu_bringup(void) | 60 | static void __cpuinit cpu_bringup(void) |
64 | { | 61 | { |
65 | int cpu; | 62 | int cpu = smp_processor_id(); |
66 | 63 | ||
67 | cpu_init(); | 64 | cpu_init(); |
68 | touch_softlockup_watchdog(); | 65 | touch_softlockup_watchdog(); |
@@ -78,12 +75,8 @@ static void __cpuinit cpu_bringup(void) | |||
78 | 75 | ||
79 | xen_setup_cpu_clockevents(); | 76 | xen_setup_cpu_clockevents(); |
80 | 77 | ||
81 | notify_cpu_starting(cpu); | ||
82 | |||
83 | set_cpu_online(cpu, true); | 78 | set_cpu_online(cpu, true); |
84 | 79 | percpu_write(cpu_state, CPU_ONLINE); | |
85 | this_cpu_write(cpu_state, CPU_ONLINE); | ||
86 | |||
87 | wmb(); | 80 | wmb(); |
88 | 81 | ||
89 | /* We can take interrupts now: we're officially "up". */ | 82 | /* We can take interrupts now: we're officially "up". */ |
@@ -144,17 +137,6 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
144 | goto fail; | 137 | goto fail; |
145 | per_cpu(xen_callfuncsingle_irq, cpu) = rc; | 138 | per_cpu(xen_callfuncsingle_irq, cpu) = rc; |
146 | 139 | ||
147 | callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu); | ||
148 | rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR, | ||
149 | cpu, | ||
150 | xen_irq_work_interrupt, | ||
151 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
152 | callfunc_name, | ||
153 | NULL); | ||
154 | if (rc < 0) | ||
155 | goto fail; | ||
156 | per_cpu(xen_irq_work, cpu) = rc; | ||
157 | |||
158 | return 0; | 140 | return 0; |
159 | 141 | ||
160 | fail: | 142 | fail: |
@@ -167,8 +149,6 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
167 | if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) | 149 | if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) |
168 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), | 150 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), |
169 | NULL); | 151 | NULL); |
170 | if (per_cpu(xen_irq_work, cpu) >= 0) | ||
171 | unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); | ||
172 | 152 | ||
173 | return rc; | 153 | return rc; |
174 | } | 154 | } |
@@ -192,7 +172,6 @@ static void __init xen_fill_possible_map(void) | |||
192 | static void __init xen_filter_cpu_maps(void) | 172 | static void __init xen_filter_cpu_maps(void) |
193 | { | 173 | { |
194 | int i, rc; | 174 | int i, rc; |
195 | unsigned int subtract = 0; | ||
196 | 175 | ||
197 | if (!xen_initial_domain()) | 176 | if (!xen_initial_domain()) |
198 | return; | 177 | return; |
@@ -207,22 +186,8 @@ static void __init xen_filter_cpu_maps(void) | |||
207 | } else { | 186 | } else { |
208 | set_cpu_possible(i, false); | 187 | set_cpu_possible(i, false); |
209 | set_cpu_present(i, false); | 188 | set_cpu_present(i, false); |
210 | subtract++; | ||
211 | } | 189 | } |
212 | } | 190 | } |
213 | #ifdef CONFIG_HOTPLUG_CPU | ||
214 | /* This is akin to using 'nr_cpus' on the Linux command line. | ||
215 | * Which is OK as when we use 'dom0_max_vcpus=X' we can only | ||
216 | * have up to X, while nr_cpu_ids is greater than X. This | ||
217 | * normally is not a problem, except when CPU hotplugging | ||
218 | * is involved and then there might be more than X CPUs | ||
219 | * in the guest - which will not work as there is no | ||
220 | * hypercall to expand the max number of VCPUs an already | ||
221 | * running guest has. So cap it up to X. */ | ||
222 | if (subtract) | ||
223 | nr_cpu_ids = nr_cpu_ids - subtract; | ||
224 | #endif | ||
225 | |||
226 | } | 191 | } |
227 | 192 | ||
228 | static void __init xen_smp_prepare_boot_cpu(void) | 193 | static void __init xen_smp_prepare_boot_cpu(void) |
@@ -254,7 +219,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
254 | } | 219 | } |
255 | xen_init_lock_cpu(0); | 220 | xen_init_lock_cpu(0); |
256 | 221 | ||
257 | smp_store_boot_cpu_info(); | 222 | smp_store_cpu_info(0); |
258 | cpu_data(0).x86_max_cores = 1; | 223 | cpu_data(0).x86_max_cores = 1; |
259 | 224 | ||
260 | for_each_possible_cpu(i) { | 225 | for_each_possible_cpu(i) { |
@@ -279,8 +244,18 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
279 | set_cpu_possible(cpu, false); | 244 | set_cpu_possible(cpu, false); |
280 | } | 245 | } |
281 | 246 | ||
282 | for_each_possible_cpu(cpu) | 247 | for_each_possible_cpu (cpu) { |
248 | struct task_struct *idle; | ||
249 | |||
250 | if (cpu == 0) | ||
251 | continue; | ||
252 | |||
253 | idle = fork_idle(cpu); | ||
254 | if (IS_ERR(idle)) | ||
255 | panic("failed fork for CPU %d", cpu); | ||
256 | |||
283 | set_cpu_present(cpu, true); | 257 | set_cpu_present(cpu, true); |
258 | } | ||
284 | } | 259 | } |
285 | 260 | ||
286 | static int __cpuinit | 261 | static int __cpuinit |
@@ -350,8 +325,9 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
350 | return 0; | 325 | return 0; |
351 | } | 326 | } |
352 | 327 | ||
353 | static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) | 328 | static int __cpuinit xen_cpu_up(unsigned int cpu) |
354 | { | 329 | { |
330 | struct task_struct *idle = idle_task(cpu); | ||
355 | int rc; | 331 | int rc; |
356 | 332 | ||
357 | per_cpu(current_task, cpu) = idle; | 333 | per_cpu(current_task, cpu) = idle; |
@@ -377,8 +353,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
377 | return rc; | 353 | return rc; |
378 | 354 | ||
379 | if (num_online_cpus() == 1) | 355 | if (num_online_cpus() == 1) |
380 | /* Just in case we booted with a single CPU. */ | 356 | alternatives_smp_switch(1); |
381 | alternatives_enable_smp(); | ||
382 | 357 | ||
383 | rc = xen_smp_intr_init(cpu); | 358 | rc = xen_smp_intr_init(cpu); |
384 | if (rc) | 359 | if (rc) |
@@ -422,9 +397,11 @@ static void xen_cpu_die(unsigned int cpu) | |||
422 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); | 397 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); |
423 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); | 398 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); |
424 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); | 399 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); |
425 | unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); | ||
426 | xen_uninit_lock_cpu(cpu); | 400 | xen_uninit_lock_cpu(cpu); |
427 | xen_teardown_timer(cpu); | 401 | xen_teardown_timer(cpu); |
402 | |||
403 | if (num_online_cpus() == 1) | ||
404 | alternatives_smp_switch(0); | ||
428 | } | 405 | } |
429 | 406 | ||
430 | static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ | 407 | static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ |
@@ -432,13 +409,6 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ | |||
432 | play_dead_common(); | 409 | play_dead_common(); |
433 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | 410 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); |
434 | cpu_bringup(); | 411 | cpu_bringup(); |
435 | /* | ||
436 | * Balance out the preempt calls - as we are running in cpu_idle | ||
437 | * loop which has been called at bootup from cpu_bringup_and_idle. | ||
438 | * The cpucpu_bringup_and_idle called cpu_bringup which made a | ||
439 | * preempt_disable() So this preempt_enable will balance it out. | ||
440 | */ | ||
441 | preempt_enable(); | ||
442 | } | 412 | } |
443 | 413 | ||
444 | #else /* !CONFIG_HOTPLUG_CPU */ | 414 | #else /* !CONFIG_HOTPLUG_CPU */ |
@@ -482,8 +452,8 @@ static void xen_smp_send_reschedule(int cpu) | |||
482 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | 452 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); |
483 | } | 453 | } |
484 | 454 | ||
485 | static void __xen_send_IPI_mask(const struct cpumask *mask, | 455 | static void xen_send_IPI_mask(const struct cpumask *mask, |
486 | int vector) | 456 | enum ipi_vector vector) |
487 | { | 457 | { |
488 | unsigned cpu; | 458 | unsigned cpu; |
489 | 459 | ||
@@ -495,7 +465,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) | |||
495 | { | 465 | { |
496 | int cpu; | 466 | int cpu; |
497 | 467 | ||
498 | __xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | 468 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); |
499 | 469 | ||
500 | /* Make sure other vcpus get a chance to run if they need to. */ | 470 | /* Make sure other vcpus get a chance to run if they need to. */ |
501 | for_each_cpu(cpu, mask) { | 471 | for_each_cpu(cpu, mask) { |
@@ -508,86 +478,10 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) | |||
508 | 478 | ||
509 | static void xen_smp_send_call_function_single_ipi(int cpu) | 479 | static void xen_smp_send_call_function_single_ipi(int cpu) |
510 | { | 480 | { |
511 | __xen_send_IPI_mask(cpumask_of(cpu), | 481 | xen_send_IPI_mask(cpumask_of(cpu), |
512 | XEN_CALL_FUNCTION_SINGLE_VECTOR); | 482 | XEN_CALL_FUNCTION_SINGLE_VECTOR); |
513 | } | 483 | } |
514 | 484 | ||
515 | static inline int xen_map_vector(int vector) | ||
516 | { | ||
517 | int xen_vector; | ||
518 | |||
519 | switch (vector) { | ||
520 | case RESCHEDULE_VECTOR: | ||
521 | xen_vector = XEN_RESCHEDULE_VECTOR; | ||
522 | break; | ||
523 | case CALL_FUNCTION_VECTOR: | ||
524 | xen_vector = XEN_CALL_FUNCTION_VECTOR; | ||
525 | break; | ||
526 | case CALL_FUNCTION_SINGLE_VECTOR: | ||
527 | xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR; | ||
528 | break; | ||
529 | case IRQ_WORK_VECTOR: | ||
530 | xen_vector = XEN_IRQ_WORK_VECTOR; | ||
531 | break; | ||
532 | default: | ||
533 | xen_vector = -1; | ||
534 | printk(KERN_ERR "xen: vector 0x%x is not implemented\n", | ||
535 | vector); | ||
536 | } | ||
537 | |||
538 | return xen_vector; | ||
539 | } | ||
540 | |||
541 | void xen_send_IPI_mask(const struct cpumask *mask, | ||
542 | int vector) | ||
543 | { | ||
544 | int xen_vector = xen_map_vector(vector); | ||
545 | |||
546 | if (xen_vector >= 0) | ||
547 | __xen_send_IPI_mask(mask, xen_vector); | ||
548 | } | ||
549 | |||
550 | void xen_send_IPI_all(int vector) | ||
551 | { | ||
552 | int xen_vector = xen_map_vector(vector); | ||
553 | |||
554 | if (xen_vector >= 0) | ||
555 | __xen_send_IPI_mask(cpu_online_mask, xen_vector); | ||
556 | } | ||
557 | |||
558 | void xen_send_IPI_self(int vector) | ||
559 | { | ||
560 | int xen_vector = xen_map_vector(vector); | ||
561 | |||
562 | if (xen_vector >= 0) | ||
563 | xen_send_IPI_one(smp_processor_id(), xen_vector); | ||
564 | } | ||
565 | |||
566 | void xen_send_IPI_mask_allbutself(const struct cpumask *mask, | ||
567 | int vector) | ||
568 | { | ||
569 | unsigned cpu; | ||
570 | unsigned int this_cpu = smp_processor_id(); | ||
571 | |||
572 | if (!(num_online_cpus() > 1)) | ||
573 | return; | ||
574 | |||
575 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | ||
576 | if (this_cpu == cpu) | ||
577 | continue; | ||
578 | |||
579 | xen_smp_send_call_function_single_ipi(cpu); | ||
580 | } | ||
581 | } | ||
582 | |||
583 | void xen_send_IPI_allbutself(int vector) | ||
584 | { | ||
585 | int xen_vector = xen_map_vector(vector); | ||
586 | |||
587 | if (xen_vector >= 0) | ||
588 | xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector); | ||
589 | } | ||
590 | |||
591 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) | 485 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) |
592 | { | 486 | { |
593 | irq_enter(); | 487 | irq_enter(); |
@@ -608,16 +502,6 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) | |||
608 | return IRQ_HANDLED; | 502 | return IRQ_HANDLED; |
609 | } | 503 | } |
610 | 504 | ||
611 | static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) | ||
612 | { | ||
613 | irq_enter(); | ||
614 | irq_work_run(); | ||
615 | inc_irq_stat(apic_irq_work_irqs); | ||
616 | irq_exit(); | ||
617 | |||
618 | return IRQ_HANDLED; | ||
619 | } | ||
620 | |||
621 | static const struct smp_ops xen_smp_ops __initconst = { | 505 | static const struct smp_ops xen_smp_ops __initconst = { |
622 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | 506 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, |
623 | .smp_prepare_cpus = xen_smp_prepare_cpus, | 507 | .smp_prepare_cpus = xen_smp_prepare_cpus, |
@@ -650,10 +534,10 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | |||
650 | xen_init_lock_cpu(0); | 534 | xen_init_lock_cpu(0); |
651 | } | 535 | } |
652 | 536 | ||
653 | static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) | 537 | static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) |
654 | { | 538 | { |
655 | int rc; | 539 | int rc; |
656 | rc = native_cpu_up(cpu, tidle); | 540 | rc = native_cpu_up(cpu); |
657 | WARN_ON (xen_smp_intr_init(cpu)); | 541 | WARN_ON (xen_smp_intr_init(cpu)); |
658 | return rc; | 542 | return rc; |
659 | } | 543 | } |
@@ -664,7 +548,6 @@ static void xen_hvm_cpu_die(unsigned int cpu) | |||
664 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); | 548 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); |
665 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); | 549 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); |
666 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); | 550 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); |
667 | unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); | ||
668 | native_cpu_die(cpu); | 551 | native_cpu_die(cpu); |
669 | } | 552 | } |
670 | 553 | ||
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h deleted file mode 100644 index 8981a76d081..00000000000 --- a/arch/x86/xen/smp.h +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | #ifndef _XEN_SMP_H | ||
2 | |||
3 | extern void xen_send_IPI_mask(const struct cpumask *mask, | ||
4 | int vector); | ||
5 | extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, | ||
6 | int vector); | ||
7 | extern void xen_send_IPI_allbutself(int vector); | ||
8 | extern void physflat_send_IPI_allbutself(int vector); | ||
9 | extern void xen_send_IPI_all(int vector); | ||
10 | extern void xen_send_IPI_self(int vector); | ||
11 | |||
12 | #endif | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 83e866d714c..cc9b1e182fc 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -116,26 +116,9 @@ static inline void spin_time_accum_blocked(u64 start) | |||
116 | } | 116 | } |
117 | #endif /* CONFIG_XEN_DEBUG_FS */ | 117 | #endif /* CONFIG_XEN_DEBUG_FS */ |
118 | 118 | ||
119 | /* | ||
120 | * Size struct xen_spinlock so it's the same as arch_spinlock_t. | ||
121 | */ | ||
122 | #if NR_CPUS < 256 | ||
123 | typedef u8 xen_spinners_t; | ||
124 | # define inc_spinners(xl) \ | ||
125 | asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory"); | ||
126 | # define dec_spinners(xl) \ | ||
127 | asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory"); | ||
128 | #else | ||
129 | typedef u16 xen_spinners_t; | ||
130 | # define inc_spinners(xl) \ | ||
131 | asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory"); | ||
132 | # define dec_spinners(xl) \ | ||
133 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); | ||
134 | #endif | ||
135 | |||
136 | struct xen_spinlock { | 119 | struct xen_spinlock { |
137 | unsigned char lock; /* 0 -> free; 1 -> locked */ | 120 | unsigned char lock; /* 0 -> free; 1 -> locked */ |
138 | xen_spinners_t spinners; /* count of waiting cpus */ | 121 | unsigned short spinners; /* count of waiting cpus */ |
139 | }; | 122 | }; |
140 | 123 | ||
141 | static int xen_spin_is_locked(struct arch_spinlock *lock) | 124 | static int xen_spin_is_locked(struct arch_spinlock *lock) |
@@ -181,7 +164,8 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | |||
181 | 164 | ||
182 | wmb(); /* set lock of interest before count */ | 165 | wmb(); /* set lock of interest before count */ |
183 | 166 | ||
184 | inc_spinners(xl); | 167 | asm(LOCK_PREFIX " incw %0" |
168 | : "+m" (xl->spinners) : : "memory"); | ||
185 | 169 | ||
186 | return prev; | 170 | return prev; |
187 | } | 171 | } |
@@ -192,7 +176,8 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | |||
192 | */ | 176 | */ |
193 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | 177 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) |
194 | { | 178 | { |
195 | dec_spinners(xl); | 179 | asm(LOCK_PREFIX " decw %0" |
180 | : "+m" (xl->spinners) : : "memory"); | ||
196 | wmb(); /* decrement count before restoring lock */ | 181 | wmb(); /* decrement count before restoring lock */ |
197 | __this_cpu_write(lock_spinners, prev); | 182 | __this_cpu_write(lock_spinners, prev); |
198 | } | 183 | } |
@@ -388,8 +373,6 @@ void xen_uninit_lock_cpu(int cpu) | |||
388 | 373 | ||
389 | void __init xen_init_spinlocks(void) | 374 | void __init xen_init_spinlocks(void) |
390 | { | 375 | { |
391 | BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); | ||
392 | |||
393 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | 376 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; |
394 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | 377 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; |
395 | pv_lock_ops.spin_lock = xen_spin_lock; | 378 | pv_lock_ops.spin_lock = xen_spin_lock; |
@@ -440,12 +423,12 @@ static int __init xen_spinlock_debugfs(void) | |||
440 | debugfs_create_u64("time_total", 0444, d_spin_debug, | 423 | debugfs_create_u64("time_total", 0444, d_spin_debug, |
441 | &spinlock_stats.time_total); | 424 | &spinlock_stats.time_total); |
442 | 425 | ||
443 | debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | 426 | xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug, |
444 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | 427 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); |
445 | debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | 428 | xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, |
446 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | 429 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); |
447 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | 430 | xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, |
448 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | 431 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); |
449 | 432 | ||
450 | return 0; | 433 | return 0; |
451 | } | 434 | } |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index ae8a00c39de..45329c8c226 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) | |||
30 | { | 30 | { |
31 | #ifdef CONFIG_XEN_PVHVM | 31 | #ifdef CONFIG_XEN_PVHVM |
32 | int cpu; | 32 | int cpu; |
33 | xen_hvm_resume_shared_info(); | 33 | xen_hvm_init_shared_info(); |
34 | xen_callback_vector(); | 34 | xen_callback_vector(); |
35 | xen_unplug_emulated_devices(); | 35 | xen_unplug_emulated_devices(); |
36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { | 36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0296a952250..163b4679556 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -201,22 +201,8 @@ static unsigned long xen_get_wallclock(void) | |||
201 | 201 | ||
202 | static int xen_set_wallclock(unsigned long now) | 202 | static int xen_set_wallclock(unsigned long now) |
203 | { | 203 | { |
204 | struct xen_platform_op op; | ||
205 | int rc; | ||
206 | |||
207 | /* do nothing for domU */ | 204 | /* do nothing for domU */ |
208 | if (!xen_initial_domain()) | 205 | return -1; |
209 | return -1; | ||
210 | |||
211 | op.cmd = XENPF_settime; | ||
212 | op.u.settime.secs = now; | ||
213 | op.u.settime.nsecs = 0; | ||
214 | op.u.settime.system_time = xen_clocksource_read(); | ||
215 | |||
216 | rc = HYPERVISOR_dom0_op(&op); | ||
217 | WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); | ||
218 | |||
219 | return rc; | ||
220 | } | 206 | } |
221 | 207 | ||
222 | static struct clocksource xen_clocksource __read_mostly = { | 208 | static struct clocksource xen_clocksource __read_mostly = { |
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index 6722e3733f0..1cd7f4d11e2 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c | |||
@@ -35,7 +35,6 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) | |||
35 | info->u.text_mode_3.font_height; | 35 | info->u.text_mode_3.font_height; |
36 | break; | 36 | break; |
37 | 37 | ||
38 | case XEN_VGATYPE_EFI_LFB: | ||
39 | case XEN_VGATYPE_VESA_LFB: | 38 | case XEN_VGATYPE_VESA_LFB: |
40 | if (size < offsetof(struct dom0_vga_console_info, | 39 | if (size < offsetof(struct dom0_vga_console_info, |
41 | u.vesa_lfb.gbl_caps)) | 40 | u.vesa_lfb.gbl_caps)) |
@@ -55,12 +54,6 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) | |||
55 | screen_info->blue_pos = info->u.vesa_lfb.blue_pos; | 54 | screen_info->blue_pos = info->u.vesa_lfb.blue_pos; |
56 | screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; | 55 | screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; |
57 | screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; | 56 | screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; |
58 | |||
59 | if (info->video_type == XEN_VGATYPE_EFI_LFB) { | ||
60 | screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; | ||
61 | break; | ||
62 | } | ||
63 | |||
64 | if (size >= offsetof(struct dom0_vga_console_info, | 57 | if (size >= offsetof(struct dom0_vga_console_info, |
65 | u.vesa_lfb.gbl_caps) | 58 | u.vesa_lfb.gbl_caps) |
66 | + sizeof(info->u.vesa_lfb.gbl_caps)) | 59 | + sizeof(info->u.vesa_lfb.gbl_caps)) |
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 3e45aa00071..79d7362ad6d 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S | |||
@@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct) | |||
96 | 96 | ||
97 | /* check for unmasked and pending */ | 97 | /* check for unmasked and pending */ |
98 | cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending | 98 | cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending |
99 | jnz 1f | 99 | jz 1f |
100 | 2: call check_events | 100 | 2: call check_events |
101 | 1: | 101 | 1: |
102 | ENDPATCH(xen_restore_fl_direct) | 102 | ENDPATCH(xen_restore_fl_direct) |
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index f9643fc50de..b040b0e518c 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <asm/thread_info.h> | 14 | #include <asm/thread_info.h> |
15 | #include <asm/processor-flags.h> | 15 | #include <asm/processor-flags.h> |
16 | #include <asm/segment.h> | 16 | #include <asm/segment.h> |
17 | #include <asm/asm.h> | ||
18 | 17 | ||
19 | #include <xen/interface/xen.h> | 18 | #include <xen/interface/xen.h> |
20 | 19 | ||
@@ -138,7 +137,10 @@ iret_restore_end: | |||
138 | 137 | ||
139 | 1: iret | 138 | 1: iret |
140 | xen_iret_end_crit: | 139 | xen_iret_end_crit: |
141 | _ASM_EXTABLE(1b, iret_exc) | 140 | .section __ex_table, "a" |
141 | .align 4 | ||
142 | .long 1b, iret_exc | ||
143 | .previous | ||
142 | 144 | ||
143 | hyper_iret: | 145 | hyper_iret: |
144 | /* put this out of line since its very rarely used */ | 146 | /* put this out of line since its very rarely used */ |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7faed5869e5..aaa7291c925 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -28,61 +28,9 @@ ENTRY(startup_xen) | |||
28 | __FINIT | 28 | __FINIT |
29 | 29 | ||
30 | .pushsection .text | 30 | .pushsection .text |
31 | .balign PAGE_SIZE | 31 | .align PAGE_SIZE |
32 | ENTRY(hypercall_page) | 32 | ENTRY(hypercall_page) |
33 | #define NEXT_HYPERCALL(x) \ | 33 | .skip PAGE_SIZE |
34 | ENTRY(xen_hypercall_##x) \ | ||
35 | .skip 32 | ||
36 | |||
37 | NEXT_HYPERCALL(set_trap_table) | ||
38 | NEXT_HYPERCALL(mmu_update) | ||
39 | NEXT_HYPERCALL(set_gdt) | ||
40 | NEXT_HYPERCALL(stack_switch) | ||
41 | NEXT_HYPERCALL(set_callbacks) | ||
42 | NEXT_HYPERCALL(fpu_taskswitch) | ||
43 | NEXT_HYPERCALL(sched_op_compat) | ||
44 | NEXT_HYPERCALL(platform_op) | ||
45 | NEXT_HYPERCALL(set_debugreg) | ||
46 | NEXT_HYPERCALL(get_debugreg) | ||
47 | NEXT_HYPERCALL(update_descriptor) | ||
48 | NEXT_HYPERCALL(ni) | ||
49 | NEXT_HYPERCALL(memory_op) | ||
50 | NEXT_HYPERCALL(multicall) | ||
51 | NEXT_HYPERCALL(update_va_mapping) | ||
52 | NEXT_HYPERCALL(set_timer_op) | ||
53 | NEXT_HYPERCALL(event_channel_op_compat) | ||
54 | NEXT_HYPERCALL(xen_version) | ||
55 | NEXT_HYPERCALL(console_io) | ||
56 | NEXT_HYPERCALL(physdev_op_compat) | ||
57 | NEXT_HYPERCALL(grant_table_op) | ||
58 | NEXT_HYPERCALL(vm_assist) | ||
59 | NEXT_HYPERCALL(update_va_mapping_otherdomain) | ||
60 | NEXT_HYPERCALL(iret) | ||
61 | NEXT_HYPERCALL(vcpu_op) | ||
62 | NEXT_HYPERCALL(set_segment_base) | ||
63 | NEXT_HYPERCALL(mmuext_op) | ||
64 | NEXT_HYPERCALL(xsm_op) | ||
65 | NEXT_HYPERCALL(nmi_op) | ||
66 | NEXT_HYPERCALL(sched_op) | ||
67 | NEXT_HYPERCALL(callback_op) | ||
68 | NEXT_HYPERCALL(xenoprof_op) | ||
69 | NEXT_HYPERCALL(event_channel_op) | ||
70 | NEXT_HYPERCALL(physdev_op) | ||
71 | NEXT_HYPERCALL(hvm_op) | ||
72 | NEXT_HYPERCALL(sysctl) | ||
73 | NEXT_HYPERCALL(domctl) | ||
74 | NEXT_HYPERCALL(kexec_op) | ||
75 | NEXT_HYPERCALL(tmem_op) /* 38 */ | ||
76 | ENTRY(xen_hypercall_rsvr) | ||
77 | .skip 320 | ||
78 | NEXT_HYPERCALL(mca) /* 48 */ | ||
79 | NEXT_HYPERCALL(arch_1) | ||
80 | NEXT_HYPERCALL(arch_2) | ||
81 | NEXT_HYPERCALL(arch_3) | ||
82 | NEXT_HYPERCALL(arch_4) | ||
83 | NEXT_HYPERCALL(arch_5) | ||
84 | NEXT_HYPERCALL(arch_6) | ||
85 | .balign PAGE_SIZE | ||
86 | .popsection | 34 | .popsection |
87 | 35 | ||
88 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") | 36 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index d2e73d19d36..b095739ccd4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -27,7 +27,8 @@ void xen_setup_mfn_list_list(void); | |||
27 | void xen_setup_shared_info(void); | 27 | void xen_setup_shared_info(void); |
28 | void xen_build_mfn_list_list(void); | 28 | void xen_build_mfn_list_list(void); |
29 | void xen_setup_machphys_mapping(void); | 29 | void xen_setup_machphys_mapping(void); |
30 | void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | 30 | pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); |
31 | void xen_ident_map_ISA(void); | ||
31 | void xen_reserve_top(void); | 32 | void xen_reserve_top(void); |
32 | extern unsigned long xen_max_p2m_pfn; | 33 | extern unsigned long xen_max_p2m_pfn; |
33 | 34 | ||
@@ -35,16 +36,16 @@ void xen_set_pat(u64); | |||
35 | 36 | ||
36 | char * __init xen_memory_setup(void); | 37 | char * __init xen_memory_setup(void); |
37 | void __init xen_arch_setup(void); | 38 | void __init xen_arch_setup(void); |
39 | void __init xen_init_IRQ(void); | ||
38 | void xen_enable_sysenter(void); | 40 | void xen_enable_sysenter(void); |
39 | void xen_enable_syscall(void); | 41 | void xen_enable_syscall(void); |
40 | void xen_vcpu_restore(void); | 42 | void xen_vcpu_restore(void); |
41 | 43 | ||
42 | void xen_callback_vector(void); | 44 | void xen_callback_vector(void); |
43 | void xen_hvm_resume_shared_info(void); | 45 | void xen_hvm_init_shared_info(void); |
44 | void xen_unplug_emulated_devices(void); | 46 | void xen_unplug_emulated_devices(void); |
45 | 47 | ||
46 | void __init xen_build_dynamic_phys_to_machine(void); | 48 | void __init xen_build_dynamic_phys_to_machine(void); |
47 | unsigned long __init xen_revector_p2m_tree(void); | ||
48 | 49 | ||
49 | void xen_init_irq_ops(void); | 50 | void xen_init_irq_ops(void); |
50 | void xen_setup_timer(int cpu); | 51 | void xen_setup_timer(int cpu); |
@@ -91,15 +92,11 @@ struct dom0_vga_console_info; | |||
91 | 92 | ||
92 | #ifdef CONFIG_XEN_DOM0 | 93 | #ifdef CONFIG_XEN_DOM0 |
93 | void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); | 94 | void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); |
94 | void __init xen_init_apic(void); | ||
95 | #else | 95 | #else |
96 | static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, | 96 | static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, |
97 | size_t size) | 97 | size_t size) |
98 | { | 98 | { |
99 | } | 99 | } |
100 | static inline void __init xen_init_apic(void) | ||
101 | { | ||
102 | } | ||
103 | #endif | 100 | #endif |
104 | 101 | ||
105 | /* Declare an asm function, along with symbols needed to make it | 102 | /* Declare an asm function, along with symbols needed to make it |