aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/ldt.c9
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c37
-rw-r--r--arch/x86/kernel/paravirt.c27
-rw-r--r--arch/x86/xen/Kconfig10
-rw-r--r--arch/x86/xen/Makefile12
-rw-r--r--arch/x86/xen/debugfs.c123
-rw-r--r--arch/x86/xen/debugfs.h10
-rw-r--r--arch/x86/xen/enlighten.c187
-rw-r--r--arch/x86/xen/irq.c143
-rw-r--r--arch/x86/xen/mmu.c268
-rw-r--r--arch/x86/xen/multicalls.c115
-rw-r--r--arch/x86/xen/smp.c167
-rw-r--r--arch/x86/xen/spinlock.c423
-rw-r--r--arch/x86/xen/time.c4
-rw-r--r--arch/x86/xen/xen-asm_32.S2
-rw-r--r--arch/x86/xen/xen-asm_64.S2
-rw-r--r--arch/x86/xen/xen-ops.h6
-rw-r--r--drivers/block/xen-blkfront.c2
-rw-r--r--drivers/char/hvc_xen.c6
-rw-r--r--drivers/input/xen-kbdfront.c4
-rw-r--r--drivers/net/xen-netfront.c6
-rw-r--r--drivers/video/xen-fbfront.c4
-rw-r--r--drivers/xen/balloon.c176
-rw-r--r--drivers/xen/events.c36
-rw-r--r--drivers/xen/grant-table.c2
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c8
-rw-r--r--include/asm-x86/desc.h15
-rw-r--r--include/asm-x86/paravirt.h20
-rw-r--r--include/asm-x86/spinlock.h9
-rw-r--r--include/asm-x86/xen/hypervisor.h14
-rw-r--r--include/linux/kernel.h2
-rw-r--r--include/xen/events.h2
-rw-r--r--lib/cmdline.c2
34 files changed, 1305 insertions, 552 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..d679cb2c79b4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE
10# Do not profile debug and lowlevel utilities 10# Do not profile debug and lowlevel utilities
11CFLAGS_REMOVE_tsc.o = -pg 11CFLAGS_REMOVE_tsc.o = -pg
12CFLAGS_REMOVE_rtc.o = -pg 12CFLAGS_REMOVE_rtc.o = -pg
13CFLAGS_REMOVE_paravirt.o = -pg 13CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
14endif 14endif
15 15
16# 16#
@@ -89,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
89obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o 89obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
90obj-$(CONFIG_KVM_GUEST) += kvm.o 90obj-$(CONFIG_KVM_GUEST) += kvm.o
91obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 91obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
92obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 92obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
93obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o 93obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
94 94
95obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 95obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f06f4f..6e388412a854 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -51,6 +51,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
51 memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, 51 memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
52 (mincount - oldsize) * LDT_ENTRY_SIZE); 52 (mincount - oldsize) * LDT_ENTRY_SIZE);
53 53
54 paravirt_alloc_ldt(newldt, mincount);
55
54#ifdef CONFIG_X86_64 56#ifdef CONFIG_X86_64
55 /* CHECKME: Do we really need this ? */ 57 /* CHECKME: Do we really need this ? */
56 wmb(); 58 wmb();
@@ -73,6 +75,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
73#endif 75#endif
74 } 76 }
75 if (oldsize) { 77 if (oldsize) {
78 paravirt_free_ldt(oldldt, oldsize);
76 if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) 79 if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
77 vfree(oldldt); 80 vfree(oldldt);
78 else 81 else
@@ -84,10 +87,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
84static inline int copy_ldt(mm_context_t *new, mm_context_t *old) 87static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
85{ 88{
86 int err = alloc_ldt(new, old->size, 0); 89 int err = alloc_ldt(new, old->size, 0);
90 int i;
87 91
88 if (err < 0) 92 if (err < 0)
89 return err; 93 return err;
90 memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); 94
95 for(i = 0; i < old->size; i++)
96 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
91 return 0; 97 return 0;
92} 98}
93 99
@@ -124,6 +130,7 @@ void destroy_context(struct mm_struct *mm)
124 if (mm == current->active_mm) 130 if (mm == current->active_mm)
125 clear_LDT(); 131 clear_LDT();
126#endif 132#endif
133 paravirt_free_ldt(mm->context.ldt, mm->context.size);
127 if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) 134 if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
128 vfree(mm->context.ldt); 135 vfree(mm->context.ldt);
129 else 136 else
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
new file mode 100644
index 000000000000..0e9f1982b1dd
--- /dev/null
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -0,0 +1,37 @@
1/*
2 * Split spinlock implementation out into its own file, so it can be
3 * compiled in a FTRACE-compatible way.
4 */
5#include <linux/spinlock.h>
6#include <linux/module.h>
7
8#include <asm/paravirt.h>
9
10static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
11{
12 __raw_spin_lock(lock);
13}
14
15struct pv_lock_ops pv_lock_ops = {
16#ifdef CONFIG_SMP
17 .spin_is_locked = __ticket_spin_is_locked,
18 .spin_is_contended = __ticket_spin_is_contended,
19
20 .spin_lock = __ticket_spin_lock,
21 .spin_lock_flags = default_spin_lock_flags,
22 .spin_trylock = __ticket_spin_trylock,
23 .spin_unlock = __ticket_spin_unlock,
24#endif
25};
26EXPORT_SYMBOL(pv_lock_ops);
27
28void __init paravirt_use_bytelocks(void)
29{
30#ifdef CONFIG_SMP
31 pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
32 pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
33 pv_lock_ops.spin_lock = __byte_spin_lock;
34 pv_lock_ops.spin_trylock = __byte_spin_trylock;
35 pv_lock_ops.spin_unlock = __byte_spin_unlock;
36#endif
37}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 300da17e61cb..7faea1817d05 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
268 return __get_cpu_var(paravirt_lazy_mode); 268 return __get_cpu_var(paravirt_lazy_mode);
269} 269}
270 270
271void __init paravirt_use_bytelocks(void)
272{
273#ifdef CONFIG_SMP
274 pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
275 pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
276 pv_lock_ops.spin_lock = __byte_spin_lock;
277 pv_lock_ops.spin_trylock = __byte_spin_trylock;
278 pv_lock_ops.spin_unlock = __byte_spin_unlock;
279#endif
280}
281
282struct pv_info pv_info = { 271struct pv_info pv_info = {
283 .name = "bare hardware", 272 .name = "bare hardware",
284 .paravirt_enabled = 0, 273 .paravirt_enabled = 0,
@@ -348,6 +337,10 @@ struct pv_cpu_ops pv_cpu_ops = {
348 .write_ldt_entry = native_write_ldt_entry, 337 .write_ldt_entry = native_write_ldt_entry,
349 .write_gdt_entry = native_write_gdt_entry, 338 .write_gdt_entry = native_write_gdt_entry,
350 .write_idt_entry = native_write_idt_entry, 339 .write_idt_entry = native_write_idt_entry,
340
341 .alloc_ldt = paravirt_nop,
342 .free_ldt = paravirt_nop,
343
351 .load_sp0 = native_load_sp0, 344 .load_sp0 = native_load_sp0,
352 345
353#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) 346#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
@@ -461,18 +454,6 @@ struct pv_mmu_ops pv_mmu_ops = {
461 .set_fixmap = native_set_fixmap, 454 .set_fixmap = native_set_fixmap,
462}; 455};
463 456
464struct pv_lock_ops pv_lock_ops = {
465#ifdef CONFIG_SMP
466 .spin_is_locked = __ticket_spin_is_locked,
467 .spin_is_contended = __ticket_spin_is_contended,
468
469 .spin_lock = __ticket_spin_lock,
470 .spin_trylock = __ticket_spin_trylock,
471 .spin_unlock = __ticket_spin_unlock,
472#endif
473};
474EXPORT_SYMBOL(pv_lock_ops);
475
476EXPORT_SYMBOL_GPL(pv_time_ops); 457EXPORT_SYMBOL_GPL(pv_time_ops);
477EXPORT_SYMBOL (pv_cpu_ops); 458EXPORT_SYMBOL (pv_cpu_ops);
478EXPORT_SYMBOL (pv_mmu_ops); 459EXPORT_SYMBOL (pv_mmu_ops);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 3815e425f470..d3e68465ace9 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -27,4 +27,12 @@ config XEN_MAX_DOMAIN_MEMORY
27config XEN_SAVE_RESTORE 27config XEN_SAVE_RESTORE
28 bool 28 bool
29 depends on PM 29 depends on PM
30 default y \ No newline at end of file 30 default y
31
32config XEN_DEBUG_FS
33 bool "Enable Xen debug and tuning parameters in debugfs"
34 depends on XEN && DEBUG_FS
35 default n
36 help
37 Enable statistics output and various tuning options in debugfs.
38 Enabling this option may incur a significant performance overhead. \ No newline at end of file
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 59c1e539aed2..313947940a1a 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,12 @@
1obj-y := enlighten.o setup.o multicalls.o mmu.o \ 1ifdef CONFIG_FTRACE
2# Do not profile debug and lowlevel utilities
3CFLAGS_REMOVE_spinlock.o = -pg
4CFLAGS_REMOVE_time.o = -pg
5CFLAGS_REMOVE_irq.o = -pg
6endif
7
8obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
2 time.o xen-asm_$(BITS).o grant-table.o suspend.o 9 time.o xen-asm_$(BITS).o grant-table.o suspend.o
3 10
4obj-$(CONFIG_SMP) += smp.o 11obj-$(CONFIG_SMP) += smp.o spinlock.o
12obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o \ No newline at end of file
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
new file mode 100644
index 000000000000..b53225d2cac3
--- /dev/null
+++ b/arch/x86/xen/debugfs.c
@@ -0,0 +1,123 @@
1#include <linux/init.h>
2#include <linux/debugfs.h>
3#include <linux/module.h>
4
5#include "debugfs.h"
6
7static struct dentry *d_xen_debug;
8
9struct dentry * __init xen_init_debugfs(void)
10{
11 if (!d_xen_debug) {
12 d_xen_debug = debugfs_create_dir("xen", NULL);
13
14 if (!d_xen_debug)
15 pr_warning("Could not create 'xen' debugfs directory\n");
16 }
17
18 return d_xen_debug;
19}
20
21struct array_data
22{
23 void *array;
24 unsigned elements;
25};
26
27static int u32_array_open(struct inode *inode, struct file *file)
28{
29 file->private_data = NULL;
30 return nonseekable_open(inode, file);
31}
32
33static size_t format_array(char *buf, size_t bufsize, const char *fmt,
34 u32 *array, unsigned array_size)
35{
36 size_t ret = 0;
37 unsigned i;
38
39 for(i = 0; i < array_size; i++) {
40 size_t len;
41
42 len = snprintf(buf, bufsize, fmt, array[i]);
43 len++; /* ' ' or '\n' */
44 ret += len;
45
46 if (buf) {
47 buf += len;
48 bufsize -= len;
49 buf[-1] = (i == array_size-1) ? '\n' : ' ';
50 }
51 }
52
53 ret++; /* \0 */
54 if (buf)
55 *buf = '\0';
56
57 return ret;
58}
59
60static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size)
61{
62 size_t len = format_array(NULL, 0, fmt, array, array_size);
63 char *ret;
64
65 ret = kmalloc(len, GFP_KERNEL);
66 if (ret == NULL)
67 return NULL;
68
69 format_array(ret, len, fmt, array, array_size);
70 return ret;
71}
72
73static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
74 loff_t *ppos)
75{
76 struct inode *inode = file->f_path.dentry->d_inode;
77 struct array_data *data = inode->i_private;
78 size_t size;
79
80 if (*ppos == 0) {
81 if (file->private_data) {
82 kfree(file->private_data);
83 file->private_data = NULL;
84 }
85
86 file->private_data = format_array_alloc("%u", data->array, data->elements);
87 }
88
89 size = 0;
90 if (file->private_data)
91 size = strlen(file->private_data);
92
93 return simple_read_from_buffer(buf, len, ppos, file->private_data, size);
94}
95
96static int xen_array_release(struct inode *inode, struct file *file)
97{
98 kfree(file->private_data);
99
100 return 0;
101}
102
103static struct file_operations u32_array_fops = {
104 .owner = THIS_MODULE,
105 .open = u32_array_open,
106 .release= xen_array_release,
107 .read = u32_array_read,
108};
109
110struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
111 struct dentry *parent,
112 u32 *array, unsigned elements)
113{
114 struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
115
116 if (data == NULL)
117 return NULL;
118
119 data->array = array;
120 data->elements = elements;
121
122 return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
123}
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h
new file mode 100644
index 000000000000..e28132084832
--- /dev/null
+++ b/arch/x86/xen/debugfs.h
@@ -0,0 +1,10 @@
1#ifndef _XEN_DEBUGFS_H
2#define _XEN_DEBUGFS_H
3
4struct dentry * __init xen_init_debugfs(void);
5
6struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
7 struct dentry *parent,
8 u32 *array, unsigned elements);
9
10#endif /* _XEN_DEBUGFS_H */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9ff6e3cbf08f..b106e825d266 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -30,7 +30,6 @@
30#include <xen/interface/xen.h> 30#include <xen/interface/xen.h>
31#include <xen/interface/physdev.h> 31#include <xen/interface/physdev.h>
32#include <xen/interface/vcpu.h> 32#include <xen/interface/vcpu.h>
33#include <xen/interface/sched.h>
34#include <xen/features.h> 33#include <xen/features.h>
35#include <xen/page.h> 34#include <xen/page.h>
36#include <xen/hvc-console.h> 35#include <xen/hvc-console.h>
@@ -57,6 +56,9 @@ EXPORT_SYMBOL_GPL(hypercall_page);
57DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); 56DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
58DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); 57DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
59 58
59enum xen_domain_type xen_domain_type = XEN_NATIVE;
60EXPORT_SYMBOL_GPL(xen_domain_type);
61
60/* 62/*
61 * Identity map, in addition to plain kernel map. This needs to be 63 * Identity map, in addition to plain kernel map. This needs to be
62 * large enough to allocate page table pages to allocate the rest. 64 * large enough to allocate page table pages to allocate the rest.
@@ -226,103 +228,68 @@ static unsigned long xen_get_debugreg(int reg)
226 return HYPERVISOR_get_debugreg(reg); 228 return HYPERVISOR_get_debugreg(reg);
227} 229}
228 230
229static unsigned long xen_save_fl(void) 231static void xen_leave_lazy(void)
230{ 232{
231 struct vcpu_info *vcpu; 233 paravirt_leave_lazy(paravirt_get_lazy_mode());
232 unsigned long flags; 234 xen_mc_flush();
233
234 vcpu = x86_read_percpu(xen_vcpu);
235
236 /* flag has opposite sense of mask */
237 flags = !vcpu->evtchn_upcall_mask;
238
239 /* convert to IF type flag
240 -0 -> 0x00000000
241 -1 -> 0xffffffff
242 */
243 return (-flags) & X86_EFLAGS_IF;
244} 235}
245 236
246static void xen_restore_fl(unsigned long flags) 237static unsigned long xen_store_tr(void)
247{ 238{
248 struct vcpu_info *vcpu; 239 return 0;
249
250 /* convert from IF type flag */
251 flags = !(flags & X86_EFLAGS_IF);
252
253 /* There's a one instruction preempt window here. We need to
254 make sure we're don't switch CPUs between getting the vcpu
255 pointer and updating the mask. */
256 preempt_disable();
257 vcpu = x86_read_percpu(xen_vcpu);
258 vcpu->evtchn_upcall_mask = flags;
259 preempt_enable_no_resched();
260
261 /* Doesn't matter if we get preempted here, because any
262 pending event will get dealt with anyway. */
263
264 if (flags == 0) {
265 preempt_check_resched();
266 barrier(); /* unmask then check (avoid races) */
267 if (unlikely(vcpu->evtchn_upcall_pending))
268 force_evtchn_callback();
269 }
270} 240}
271 241
272static void xen_irq_disable(void) 242/*
243 * Set the page permissions for a particular virtual address. If the
244 * address is a vmalloc mapping (or other non-linear mapping), then
245 * find the linear mapping of the page and also set its protections to
246 * match.
247 */
248static void set_aliased_prot(void *v, pgprot_t prot)
273{ 249{
274 /* There's a one instruction preempt window here. We need to 250 int level;
275 make sure we're don't switch CPUs between getting the vcpu 251 pte_t *ptep;
276 pointer and updating the mask. */ 252 pte_t pte;
277 preempt_disable(); 253 unsigned long pfn;
278 x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; 254 struct page *page;
279 preempt_enable_no_resched();
280}
281 255
282static void xen_irq_enable(void) 256 ptep = lookup_address((unsigned long)v, &level);
283{ 257 BUG_ON(ptep == NULL);
284 struct vcpu_info *vcpu;
285 258
286 /* We don't need to worry about being preempted here, since 259 pfn = pte_pfn(*ptep);
287 either a) interrupts are disabled, so no preemption, or b) 260 page = pfn_to_page(pfn);
288 the caller is confused and is trying to re-enable interrupts
289 on an indeterminate processor. */
290 261
291 vcpu = x86_read_percpu(xen_vcpu); 262 pte = pfn_pte(pfn, prot);
292 vcpu->evtchn_upcall_mask = 0;
293 263
294 /* Doesn't matter if we get preempted here, because any 264 if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
295 pending event will get dealt with anyway. */ 265 BUG();
296 266
297 barrier(); /* unmask then check (avoid races) */ 267 if (!PageHighMem(page)) {
298 if (unlikely(vcpu->evtchn_upcall_pending)) 268 void *av = __va(PFN_PHYS(pfn));
299 force_evtchn_callback();
300}
301 269
302static void xen_safe_halt(void) 270 if (av != v)
303{ 271 if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
304 /* Blocking includes an implicit local_irq_enable(). */ 272 BUG();
305 if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) 273 } else
306 BUG(); 274 kmap_flush_unused();
307} 275}
308 276
309static void xen_halt(void) 277static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
310{ 278{
311 if (irqs_disabled()) 279 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
312 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); 280 int i;
313 else
314 xen_safe_halt();
315}
316 281
317static void xen_leave_lazy(void) 282 for(i = 0; i < entries; i += entries_per_page)
318{ 283 set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
319 paravirt_leave_lazy(paravirt_get_lazy_mode());
320 xen_mc_flush();
321} 284}
322 285
323static unsigned long xen_store_tr(void) 286static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
324{ 287{
325 return 0; 288 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
289 int i;
290
291 for(i = 0; i < entries; i += entries_per_page)
292 set_aliased_prot(ldt + i, PAGE_KERNEL);
326} 293}
327 294
328static void xen_set_ldt(const void *addr, unsigned entries) 295static void xen_set_ldt(const void *addr, unsigned entries)
@@ -425,8 +392,7 @@ static void xen_load_gs_index(unsigned int idx)
425static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, 392static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
426 const void *ptr) 393 const void *ptr)
427{ 394{
428 unsigned long lp = (unsigned long)&dt[entrynum]; 395 xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
429 xmaddr_t mach_lp = virt_to_machine(lp);
430 u64 entry = *(u64 *)ptr; 396 u64 entry = *(u64 *)ptr;
431 397
432 preempt_disable(); 398 preempt_disable();
@@ -559,7 +525,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
559} 525}
560 526
561static void xen_load_sp0(struct tss_struct *tss, 527static void xen_load_sp0(struct tss_struct *tss,
562 struct thread_struct *thread) 528 struct thread_struct *thread)
563{ 529{
564 struct multicall_space mcs = xen_mc_entry(0); 530 struct multicall_space mcs = xen_mc_entry(0);
565 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); 531 MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
@@ -803,6 +769,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
803 ret = -EFAULT; 769 ret = -EFAULT;
804 break; 770 break;
805#endif 771#endif
772
773 case MSR_STAR:
774 case MSR_CSTAR:
775 case MSR_LSTAR:
776 case MSR_SYSCALL_MASK:
777 case MSR_IA32_SYSENTER_CS:
778 case MSR_IA32_SYSENTER_ESP:
779 case MSR_IA32_SYSENTER_EIP:
780 /* Fast syscall setup is all done in hypercalls, so
781 these are all ignored. Stub them out here to stop
782 Xen console noise. */
783 break;
784
806 default: 785 default:
807 ret = native_write_msr_safe(msr, low, high); 786 ret = native_write_msr_safe(msr, low, high);
808 } 787 }
@@ -846,7 +825,7 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
846 SetPagePinned(page); 825 SetPagePinned(page);
847 826
848 if (!PageHighMem(page)) { 827 if (!PageHighMem(page)) {
849 make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); 828 make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
850 if (level == PT_PTE) 829 if (level == PT_PTE)
851 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); 830 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
852 } else 831 } else
@@ -1220,6 +1199,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1220 .load_gs_index = xen_load_gs_index, 1199 .load_gs_index = xen_load_gs_index,
1221#endif 1200#endif
1222 1201
1202 .alloc_ldt = xen_alloc_ldt,
1203 .free_ldt = xen_free_ldt,
1204
1223 .store_gdt = native_store_gdt, 1205 .store_gdt = native_store_gdt,
1224 .store_idt = native_store_idt, 1206 .store_idt = native_store_idt,
1225 .store_tr = xen_store_tr, 1207 .store_tr = xen_store_tr,
@@ -1241,36 +1223,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1241 }, 1223 },
1242}; 1224};
1243 1225
1244static void __init __xen_init_IRQ(void)
1245{
1246#ifdef CONFIG_X86_64
1247 int i;
1248
1249 /* Create identity vector->irq map */
1250 for(i = 0; i < NR_VECTORS; i++) {
1251 int cpu;
1252
1253 for_each_possible_cpu(cpu)
1254 per_cpu(vector_irq, cpu)[i] = i;
1255 }
1256#endif /* CONFIG_X86_64 */
1257
1258 xen_init_IRQ();
1259}
1260
1261static const struct pv_irq_ops xen_irq_ops __initdata = {
1262 .init_IRQ = __xen_init_IRQ,
1263 .save_fl = xen_save_fl,
1264 .restore_fl = xen_restore_fl,
1265 .irq_disable = xen_irq_disable,
1266 .irq_enable = xen_irq_enable,
1267 .safe_halt = xen_safe_halt,
1268 .halt = xen_halt,
1269#ifdef CONFIG_X86_64
1270 .adjust_exception_frame = xen_adjust_exception_frame,
1271#endif
1272};
1273
1274static const struct pv_apic_ops xen_apic_ops __initdata = { 1226static const struct pv_apic_ops xen_apic_ops __initdata = {
1275#ifdef CONFIG_X86_LOCAL_APIC 1227#ifdef CONFIG_X86_LOCAL_APIC
1276 .apic_write = xen_apic_write, 1228 .apic_write = xen_apic_write,
@@ -1324,7 +1276,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1324 .ptep_modify_prot_commit = __ptep_modify_prot_commit, 1276 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
1325 1277
1326 .pte_val = xen_pte_val, 1278 .pte_val = xen_pte_val,
1327 .pte_flags = native_pte_val, 1279 .pte_flags = native_pte_flags,
1328 .pgd_val = xen_pgd_val, 1280 .pgd_val = xen_pgd_val,
1329 1281
1330 .make_pte = xen_make_pte, 1282 .make_pte = xen_make_pte,
@@ -1664,6 +1616,8 @@ asmlinkage void __init xen_start_kernel(void)
1664 if (!xen_start_info) 1616 if (!xen_start_info)
1665 return; 1617 return;
1666 1618
1619 xen_domain_type = XEN_PV_DOMAIN;
1620
1667 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); 1621 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
1668 1622
1669 xen_setup_features(); 1623 xen_setup_features();
@@ -1673,10 +1627,11 @@ asmlinkage void __init xen_start_kernel(void)
1673 pv_init_ops = xen_init_ops; 1627 pv_init_ops = xen_init_ops;
1674 pv_time_ops = xen_time_ops; 1628 pv_time_ops = xen_time_ops;
1675 pv_cpu_ops = xen_cpu_ops; 1629 pv_cpu_ops = xen_cpu_ops;
1676 pv_irq_ops = xen_irq_ops;
1677 pv_apic_ops = xen_apic_ops; 1630 pv_apic_ops = xen_apic_ops;
1678 pv_mmu_ops = xen_mmu_ops; 1631 pv_mmu_ops = xen_mmu_ops;
1679 1632
1633 xen_init_irq_ops();
1634
1680 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1635 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1681 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1636 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1682 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1637 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
@@ -1700,7 +1655,7 @@ asmlinkage void __init xen_start_kernel(void)
1700 1655
1701 /* Prevent unwanted bits from being set in PTEs. */ 1656 /* Prevent unwanted bits from being set in PTEs. */
1702 __supported_pte_mask &= ~_PAGE_GLOBAL; 1657 __supported_pte_mask &= ~_PAGE_GLOBAL;
1703 if (!is_initial_xendomain()) 1658 if (!xen_initial_domain())
1704 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); 1659 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1705 1660
1706 /* Don't do the full vcpu_info placement stuff until we have a 1661 /* Don't do the full vcpu_info placement stuff until we have a
@@ -1735,7 +1690,7 @@ asmlinkage void __init xen_start_kernel(void)
1735 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1690 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1736 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); 1691 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1737 1692
1738 if (!is_initial_xendomain()) { 1693 if (!xen_initial_domain()) {
1739 add_preferred_console("xenboot", 0, NULL); 1694 add_preferred_console("xenboot", 0, NULL);
1740 add_preferred_console("tty", 0, NULL); 1695 add_preferred_console("tty", 0, NULL);
1741 add_preferred_console("hvc", 0, NULL); 1696 add_preferred_console("hvc", 0, NULL);
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
new file mode 100644
index 000000000000..28b85ab8422e
--- /dev/null
+++ b/arch/x86/xen/irq.c
@@ -0,0 +1,143 @@
1#include <linux/hardirq.h>
2
3#include <xen/interface/xen.h>
4#include <xen/interface/sched.h>
5#include <xen/interface/vcpu.h>
6
7#include <asm/xen/hypercall.h>
8#include <asm/xen/hypervisor.h>
9
10#include "xen-ops.h"
11
12/*
13 * Force a proper event-channel callback from Xen after clearing the
14 * callback mask. We do this in a very simple manner, by making a call
15 * down into Xen. The pending flag will be checked by Xen on return.
16 */
17void xen_force_evtchn_callback(void)
18{
19 (void)HYPERVISOR_xen_version(0, NULL);
20}
21
22static void __init __xen_init_IRQ(void)
23{
24#ifdef CONFIG_X86_64
25 int i;
26
27 /* Create identity vector->irq map */
28 for(i = 0; i < NR_VECTORS; i++) {
29 int cpu;
30
31 for_each_possible_cpu(cpu)
32 per_cpu(vector_irq, cpu)[i] = i;
33 }
34#endif /* CONFIG_X86_64 */
35
36 xen_init_IRQ();
37}
38
39static unsigned long xen_save_fl(void)
40{
41 struct vcpu_info *vcpu;
42 unsigned long flags;
43
44 vcpu = x86_read_percpu(xen_vcpu);
45
46 /* flag has opposite sense of mask */
47 flags = !vcpu->evtchn_upcall_mask;
48
49 /* convert to IF type flag
50 -0 -> 0x00000000
51 -1 -> 0xffffffff
52 */
53 return (-flags) & X86_EFLAGS_IF;
54}
55
56static void xen_restore_fl(unsigned long flags)
57{
58 struct vcpu_info *vcpu;
59
60 /* convert from IF type flag */
61 flags = !(flags & X86_EFLAGS_IF);
62
63 /* There's a one instruction preempt window here. We need to
64 make sure we're don't switch CPUs between getting the vcpu
65 pointer and updating the mask. */
66 preempt_disable();
67 vcpu = x86_read_percpu(xen_vcpu);
68 vcpu->evtchn_upcall_mask = flags;
69 preempt_enable_no_resched();
70
71 /* Doesn't matter if we get preempted here, because any
72 pending event will get dealt with anyway. */
73
74 if (flags == 0) {
75 preempt_check_resched();
76 barrier(); /* unmask then check (avoid races) */
77 if (unlikely(vcpu->evtchn_upcall_pending))
78 xen_force_evtchn_callback();
79 }
80}
81
82static void xen_irq_disable(void)
83{
84 /* There's a one instruction preempt window here. We need to
85 make sure we're don't switch CPUs between getting the vcpu
86 pointer and updating the mask. */
87 preempt_disable();
88 x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
89 preempt_enable_no_resched();
90}
91
92static void xen_irq_enable(void)
93{
94 struct vcpu_info *vcpu;
95
96 /* We don't need to worry about being preempted here, since
97 either a) interrupts are disabled, so no preemption, or b)
98 the caller is confused and is trying to re-enable interrupts
99 on an indeterminate processor. */
100
101 vcpu = x86_read_percpu(xen_vcpu);
102 vcpu->evtchn_upcall_mask = 0;
103
104 /* Doesn't matter if we get preempted here, because any
105 pending event will get dealt with anyway. */
106
107 barrier(); /* unmask then check (avoid races) */
108 if (unlikely(vcpu->evtchn_upcall_pending))
109 xen_force_evtchn_callback();
110}
111
112static void xen_safe_halt(void)
113{
114 /* Blocking includes an implicit local_irq_enable(). */
115 if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
116 BUG();
117}
118
119static void xen_halt(void)
120{
121 if (irqs_disabled())
122 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
123 else
124 xen_safe_halt();
125}
126
127static const struct pv_irq_ops xen_irq_ops __initdata = {
128 .init_IRQ = __xen_init_IRQ,
129 .save_fl = xen_save_fl,
130 .restore_fl = xen_restore_fl,
131 .irq_disable = xen_irq_disable,
132 .irq_enable = xen_irq_enable,
133 .safe_halt = xen_safe_halt,
134 .halt = xen_halt,
135#ifdef CONFIG_X86_64
136 .adjust_exception_frame = xen_adjust_exception_frame,
137#endif
138};
139
140void __init xen_init_irq_ops()
141{
142 pv_irq_ops = xen_irq_ops;
143}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aa37469da696..f5af913fd7b0 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -40,6 +40,7 @@
40 */ 40 */
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/debugfs.h>
43#include <linux/bug.h> 44#include <linux/bug.h>
44 45
45#include <asm/pgtable.h> 46#include <asm/pgtable.h>
@@ -57,6 +58,61 @@
57 58
58#include "multicalls.h" 59#include "multicalls.h"
59#include "mmu.h" 60#include "mmu.h"
61#include "debugfs.h"
62
63#define MMU_UPDATE_HISTO 30
64
65#ifdef CONFIG_XEN_DEBUG_FS
66
67static struct {
68 u32 pgd_update;
69 u32 pgd_update_pinned;
70 u32 pgd_update_batched;
71
72 u32 pud_update;
73 u32 pud_update_pinned;
74 u32 pud_update_batched;
75
76 u32 pmd_update;
77 u32 pmd_update_pinned;
78 u32 pmd_update_batched;
79
80 u32 pte_update;
81 u32 pte_update_pinned;
82 u32 pte_update_batched;
83
84 u32 mmu_update;
85 u32 mmu_update_extended;
86 u32 mmu_update_histo[MMU_UPDATE_HISTO];
87
88 u32 prot_commit;
89 u32 prot_commit_batched;
90
91 u32 set_pte_at;
92 u32 set_pte_at_batched;
93 u32 set_pte_at_pinned;
94 u32 set_pte_at_current;
95 u32 set_pte_at_kernel;
96} mmu_stats;
97
98static u8 zero_stats;
99
100static inline void check_zero(void)
101{
102 if (unlikely(zero_stats)) {
103 memset(&mmu_stats, 0, sizeof(mmu_stats));
104 zero_stats = 0;
105 }
106}
107
108#define ADD_STATS(elem, val) \
109 do { check_zero(); mmu_stats.elem += (val); } while(0)
110
111#else /* !CONFIG_XEN_DEBUG_FS */
112
113#define ADD_STATS(elem, val) do { (void)(val); } while(0)
114
115#endif /* CONFIG_XEN_DEBUG_FS */
60 116
61/* 117/*
62 * Just beyond the highest usermode address. STACK_TOP_MAX has a 118 * Just beyond the highest usermode address. STACK_TOP_MAX has a
@@ -229,25 +285,35 @@ void make_lowmem_page_readwrite(void *vaddr)
229} 285}
230 286
231 287
232static bool page_pinned(void *ptr) 288static bool xen_page_pinned(void *ptr)
233{ 289{
234 struct page *page = virt_to_page(ptr); 290 struct page *page = virt_to_page(ptr);
235 291
236 return PagePinned(page); 292 return PagePinned(page);
237} 293}
238 294
239static void extend_mmu_update(const struct mmu_update *update) 295static void xen_extend_mmu_update(const struct mmu_update *update)
240{ 296{
241 struct multicall_space mcs; 297 struct multicall_space mcs;
242 struct mmu_update *u; 298 struct mmu_update *u;
243 299
244 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); 300 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
245 301
246 if (mcs.mc != NULL) 302 if (mcs.mc != NULL) {
303 ADD_STATS(mmu_update_extended, 1);
304 ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1);
305
247 mcs.mc->args[1]++; 306 mcs.mc->args[1]++;
248 else { 307
308 if (mcs.mc->args[1] < MMU_UPDATE_HISTO)
309 ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1);
310 else
311 ADD_STATS(mmu_update_histo[0], 1);
312 } else {
313 ADD_STATS(mmu_update, 1);
249 mcs = __xen_mc_entry(sizeof(*u)); 314 mcs = __xen_mc_entry(sizeof(*u));
250 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); 315 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
316 ADD_STATS(mmu_update_histo[1], 1);
251 } 317 }
252 318
253 u = mcs.args; 319 u = mcs.args;
@@ -265,7 +331,9 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
265 /* ptr may be ioremapped for 64-bit pagetable setup */ 331 /* ptr may be ioremapped for 64-bit pagetable setup */
266 u.ptr = arbitrary_virt_to_machine(ptr).maddr; 332 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
267 u.val = pmd_val_ma(val); 333 u.val = pmd_val_ma(val);
268 extend_mmu_update(&u); 334 xen_extend_mmu_update(&u);
335
336 ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
269 337
270 xen_mc_issue(PARAVIRT_LAZY_MMU); 338 xen_mc_issue(PARAVIRT_LAZY_MMU);
271 339
@@ -274,13 +342,17 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
274 342
275void xen_set_pmd(pmd_t *ptr, pmd_t val) 343void xen_set_pmd(pmd_t *ptr, pmd_t val)
276{ 344{
345 ADD_STATS(pmd_update, 1);
346
277 /* If page is not pinned, we can just update the entry 347 /* If page is not pinned, we can just update the entry
278 directly */ 348 directly */
279 if (!page_pinned(ptr)) { 349 if (!xen_page_pinned(ptr)) {
280 *ptr = val; 350 *ptr = val;
281 return; 351 return;
282 } 352 }
283 353
354 ADD_STATS(pmd_update_pinned, 1);
355
284 xen_set_pmd_hyper(ptr, val); 356 xen_set_pmd_hyper(ptr, val);
285} 357}
286 358
@@ -300,12 +372,18 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
300 if (mm == &init_mm) 372 if (mm == &init_mm)
301 preempt_disable(); 373 preempt_disable();
302 374
375 ADD_STATS(set_pte_at, 1);
376// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
377 ADD_STATS(set_pte_at_current, mm == current->mm);
378 ADD_STATS(set_pte_at_kernel, mm == &init_mm);
379
303 if (mm == current->mm || mm == &init_mm) { 380 if (mm == current->mm || mm == &init_mm) {
304 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { 381 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
305 struct multicall_space mcs; 382 struct multicall_space mcs;
306 mcs = xen_mc_entry(0); 383 mcs = xen_mc_entry(0);
307 384
308 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); 385 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
386 ADD_STATS(set_pte_at_batched, 1);
309 xen_mc_issue(PARAVIRT_LAZY_MMU); 387 xen_mc_issue(PARAVIRT_LAZY_MMU);
310 goto out; 388 goto out;
311 } else 389 } else
@@ -334,7 +412,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
334 412
335 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; 413 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
336 u.val = pte_val_ma(pte); 414 u.val = pte_val_ma(pte);
337 extend_mmu_update(&u); 415 xen_extend_mmu_update(&u);
416
417 ADD_STATS(prot_commit, 1);
418 ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
338 419
339 xen_mc_issue(PARAVIRT_LAZY_MMU); 420 xen_mc_issue(PARAVIRT_LAZY_MMU);
340} 421}
@@ -400,7 +481,9 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
400 /* ptr may be ioremapped for 64-bit pagetable setup */ 481 /* ptr may be ioremapped for 64-bit pagetable setup */
401 u.ptr = arbitrary_virt_to_machine(ptr).maddr; 482 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
402 u.val = pud_val_ma(val); 483 u.val = pud_val_ma(val);
403 extend_mmu_update(&u); 484 xen_extend_mmu_update(&u);
485
486 ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
404 487
405 xen_mc_issue(PARAVIRT_LAZY_MMU); 488 xen_mc_issue(PARAVIRT_LAZY_MMU);
406 489
@@ -409,18 +492,26 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
409 492
410void xen_set_pud(pud_t *ptr, pud_t val) 493void xen_set_pud(pud_t *ptr, pud_t val)
411{ 494{
495 ADD_STATS(pud_update, 1);
496
412 /* If page is not pinned, we can just update the entry 497 /* If page is not pinned, we can just update the entry
413 directly */ 498 directly */
414 if (!page_pinned(ptr)) { 499 if (!xen_page_pinned(ptr)) {
415 *ptr = val; 500 *ptr = val;
416 return; 501 return;
417 } 502 }
418 503
504 ADD_STATS(pud_update_pinned, 1);
505
419 xen_set_pud_hyper(ptr, val); 506 xen_set_pud_hyper(ptr, val);
420} 507}
421 508
422void xen_set_pte(pte_t *ptep, pte_t pte) 509void xen_set_pte(pte_t *ptep, pte_t pte)
423{ 510{
511 ADD_STATS(pte_update, 1);
512// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
513 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
514
424#ifdef CONFIG_X86_PAE 515#ifdef CONFIG_X86_PAE
425 ptep->pte_high = pte.pte_high; 516 ptep->pte_high = pte.pte_high;
426 smp_wmb(); 517 smp_wmb();
@@ -490,7 +581,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
490 581
491 u.ptr = virt_to_machine(ptr).maddr; 582 u.ptr = virt_to_machine(ptr).maddr;
492 u.val = pgd_val_ma(val); 583 u.val = pgd_val_ma(val);
493 extend_mmu_update(&u); 584 xen_extend_mmu_update(&u);
494} 585}
495 586
496/* 587/*
@@ -517,17 +608,22 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
517{ 608{
518 pgd_t *user_ptr = xen_get_user_pgd(ptr); 609 pgd_t *user_ptr = xen_get_user_pgd(ptr);
519 610
611 ADD_STATS(pgd_update, 1);
612
520 /* If page is not pinned, we can just update the entry 613 /* If page is not pinned, we can just update the entry
521 directly */ 614 directly */
522 if (!page_pinned(ptr)) { 615 if (!xen_page_pinned(ptr)) {
523 *ptr = val; 616 *ptr = val;
524 if (user_ptr) { 617 if (user_ptr) {
525 WARN_ON(page_pinned(user_ptr)); 618 WARN_ON(xen_page_pinned(user_ptr));
526 *user_ptr = val; 619 *user_ptr = val;
527 } 620 }
528 return; 621 return;
529 } 622 }
530 623
624 ADD_STATS(pgd_update_pinned, 1);
625 ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
626
531 /* If it's pinned, then we can at least batch the kernel and 627 /* If it's pinned, then we can at least batch the kernel and
532 user updates together. */ 628 user updates together. */
533 xen_mc_batch(); 629 xen_mc_batch();
@@ -555,8 +651,8 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
555 * For 64-bit, we must skip the Xen hole in the middle of the address 651 * For 64-bit, we must skip the Xen hole in the middle of the address
556 * space, just after the big x86-64 virtual hole. 652 * space, just after the big x86-64 virtual hole.
557 */ 653 */
558static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), 654static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
559 unsigned long limit) 655 unsigned long limit)
560{ 656{
561 int flush = 0; 657 int flush = 0;
562 unsigned hole_low, hole_high; 658 unsigned hole_low, hole_high;
@@ -590,8 +686,6 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
590 pmdidx_limit = 0; 686 pmdidx_limit = 0;
591#endif 687#endif
592 688
593 flush |= (*func)(virt_to_page(pgd), PT_PGD);
594
595 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { 689 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
596 pud_t *pud; 690 pud_t *pud;
597 691
@@ -637,12 +731,18 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
637 } 731 }
638 } 732 }
639 } 733 }
734
640out: 735out:
736 /* Do the top level last, so that the callbacks can use it as
737 a cue to do final things like tlb flushes. */
738 flush |= (*func)(virt_to_page(pgd), PT_PGD);
641 739
642 return flush; 740 return flush;
643} 741}
644 742
645static spinlock_t *lock_pte(struct page *page) 743/* If we're using split pte locks, then take the page's lock and
744 return a pointer to it. Otherwise return NULL. */
745static spinlock_t *xen_pte_lock(struct page *page)
646{ 746{
647 spinlock_t *ptl = NULL; 747 spinlock_t *ptl = NULL;
648 748
@@ -654,7 +754,7 @@ static spinlock_t *lock_pte(struct page *page)
654 return ptl; 754 return ptl;
655} 755}
656 756
657static void do_unlock(void *v) 757static void xen_pte_unlock(void *v)
658{ 758{
659 spinlock_t *ptl = v; 759 spinlock_t *ptl = v;
660 spin_unlock(ptl); 760 spin_unlock(ptl);
@@ -672,7 +772,7 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
672 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 772 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
673} 773}
674 774
675static int pin_page(struct page *page, enum pt_level level) 775static int xen_pin_page(struct page *page, enum pt_level level)
676{ 776{
677 unsigned pgfl = TestSetPagePinned(page); 777 unsigned pgfl = TestSetPagePinned(page);
678 int flush; 778 int flush;
@@ -691,21 +791,40 @@ static int pin_page(struct page *page, enum pt_level level)
691 791
692 flush = 0; 792 flush = 0;
693 793
794 /*
795 * We need to hold the pagetable lock between the time
796 * we make the pagetable RO and when we actually pin
797 * it. If we don't, then other users may come in and
798 * attempt to update the pagetable by writing it,
799 * which will fail because the memory is RO but not
800 * pinned, so Xen won't do the trap'n'emulate.
801 *
802 * If we're using split pte locks, we can't hold the
803 * entire pagetable's worth of locks during the
804 * traverse, because we may wrap the preempt count (8
805 * bits). The solution is to mark RO and pin each PTE
806 * page while holding the lock. This means the number
807 * of locks we end up holding is never more than a
808 * batch size (~32 entries, at present).
809 *
810 * If we're not using split pte locks, we needn't pin
811 * the PTE pages independently, because we're
812 * protected by the overall pagetable lock.
813 */
694 ptl = NULL; 814 ptl = NULL;
695 if (level == PT_PTE) 815 if (level == PT_PTE)
696 ptl = lock_pte(page); 816 ptl = xen_pte_lock(page);
697 817
698 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, 818 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
699 pfn_pte(pfn, PAGE_KERNEL_RO), 819 pfn_pte(pfn, PAGE_KERNEL_RO),
700 level == PT_PGD ? UVMF_TLB_FLUSH : 0); 820 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
701 821
702 if (level == PT_PTE) 822 if (ptl) {
703 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); 823 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
704 824
705 if (ptl) {
706 /* Queue a deferred unlock for when this batch 825 /* Queue a deferred unlock for when this batch
707 is completed. */ 826 is completed. */
708 xen_mc_callback(do_unlock, ptl); 827 xen_mc_callback(xen_pte_unlock, ptl);
709 } 828 }
710 } 829 }
711 830
@@ -719,7 +838,7 @@ void xen_pgd_pin(pgd_t *pgd)
719{ 838{
720 xen_mc_batch(); 839 xen_mc_batch();
721 840
722 if (pgd_walk(pgd, pin_page, USER_LIMIT)) { 841 if (xen_pgd_walk(pgd, xen_pin_page, USER_LIMIT)) {
723 /* re-enable interrupts for kmap_flush_unused */ 842 /* re-enable interrupts for kmap_flush_unused */
724 xen_mc_issue(0); 843 xen_mc_issue(0);
725 kmap_flush_unused(); 844 kmap_flush_unused();
@@ -733,14 +852,14 @@ void xen_pgd_pin(pgd_t *pgd)
733 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); 852 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
734 853
735 if (user_pgd) { 854 if (user_pgd) {
736 pin_page(virt_to_page(user_pgd), PT_PGD); 855 xen_pin_page(virt_to_page(user_pgd), PT_PGD);
737 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); 856 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
738 } 857 }
739 } 858 }
740#else /* CONFIG_X86_32 */ 859#else /* CONFIG_X86_32 */
741#ifdef CONFIG_X86_PAE 860#ifdef CONFIG_X86_PAE
742 /* Need to make sure unshared kernel PMD is pinnable */ 861 /* Need to make sure unshared kernel PMD is pinnable */
743 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); 862 xen_pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
744#endif 863#endif
745 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); 864 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
746#endif /* CONFIG_X86_64 */ 865#endif /* CONFIG_X86_64 */
@@ -775,7 +894,7 @@ void xen_mm_pin_all(void)
775 * that's before we have page structures to store the bits. So do all 894 * that's before we have page structures to store the bits. So do all
776 * the book-keeping now. 895 * the book-keeping now.
777 */ 896 */
778static __init int mark_pinned(struct page *page, enum pt_level level) 897static __init int xen_mark_pinned(struct page *page, enum pt_level level)
779{ 898{
780 SetPagePinned(page); 899 SetPagePinned(page);
781 return 0; 900 return 0;
@@ -783,10 +902,10 @@ static __init int mark_pinned(struct page *page, enum pt_level level)
783 902
784void __init xen_mark_init_mm_pinned(void) 903void __init xen_mark_init_mm_pinned(void)
785{ 904{
786 pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); 905 xen_pgd_walk(init_mm.pgd, xen_mark_pinned, FIXADDR_TOP);
787} 906}
788 907
789static int unpin_page(struct page *page, enum pt_level level) 908static int xen_unpin_page(struct page *page, enum pt_level level)
790{ 909{
791 unsigned pgfl = TestClearPagePinned(page); 910 unsigned pgfl = TestClearPagePinned(page);
792 911
@@ -796,10 +915,18 @@ static int unpin_page(struct page *page, enum pt_level level)
796 spinlock_t *ptl = NULL; 915 spinlock_t *ptl = NULL;
797 struct multicall_space mcs; 916 struct multicall_space mcs;
798 917
918 /*
919 * Do the converse to pin_page. If we're using split
920 * pte locks, we must be holding the lock for while
921 * the pte page is unpinned but still RO to prevent
922 * concurrent updates from seeing it in this
923 * partially-pinned state.
924 */
799 if (level == PT_PTE) { 925 if (level == PT_PTE) {
800 ptl = lock_pte(page); 926 ptl = xen_pte_lock(page);
801 927
802 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); 928 if (ptl)
929 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
803 } 930 }
804 931
805 mcs = __xen_mc_entry(0); 932 mcs = __xen_mc_entry(0);
@@ -810,7 +937,7 @@ static int unpin_page(struct page *page, enum pt_level level)
810 937
811 if (ptl) { 938 if (ptl) {
812 /* unlock when batch completed */ 939 /* unlock when batch completed */
813 xen_mc_callback(do_unlock, ptl); 940 xen_mc_callback(xen_pte_unlock, ptl);
814 } 941 }
815 } 942 }
816 943
@@ -830,17 +957,17 @@ static void xen_pgd_unpin(pgd_t *pgd)
830 957
831 if (user_pgd) { 958 if (user_pgd) {
832 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); 959 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
833 unpin_page(virt_to_page(user_pgd), PT_PGD); 960 xen_unpin_page(virt_to_page(user_pgd), PT_PGD);
834 } 961 }
835 } 962 }
836#endif 963#endif
837 964
838#ifdef CONFIG_X86_PAE 965#ifdef CONFIG_X86_PAE
839 /* Need to make sure unshared kernel PMD is unpinned */ 966 /* Need to make sure unshared kernel PMD is unpinned */
840 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); 967 xen_unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
841#endif 968#endif
842 969
843 pgd_walk(pgd, unpin_page, USER_LIMIT); 970 xen_pgd_walk(pgd, xen_unpin_page, USER_LIMIT);
844 971
845 xen_mc_issue(0); 972 xen_mc_issue(0);
846} 973}
@@ -907,7 +1034,7 @@ static void drop_other_mm_ref(void *info)
907 } 1034 }
908} 1035}
909 1036
910static void drop_mm_ref(struct mm_struct *mm) 1037static void xen_drop_mm_ref(struct mm_struct *mm)
911{ 1038{
912 cpumask_t mask; 1039 cpumask_t mask;
913 unsigned cpu; 1040 unsigned cpu;
@@ -937,7 +1064,7 @@ static void drop_mm_ref(struct mm_struct *mm)
937 smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); 1064 smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
938} 1065}
939#else 1066#else
940static void drop_mm_ref(struct mm_struct *mm) 1067static void xen_drop_mm_ref(struct mm_struct *mm)
941{ 1068{
942 if (current->active_mm == mm) 1069 if (current->active_mm == mm)
943 load_cr3(swapper_pg_dir); 1070 load_cr3(swapper_pg_dir);
@@ -961,14 +1088,77 @@ static void drop_mm_ref(struct mm_struct *mm)
961void xen_exit_mmap(struct mm_struct *mm) 1088void xen_exit_mmap(struct mm_struct *mm)
962{ 1089{
963 get_cpu(); /* make sure we don't move around */ 1090 get_cpu(); /* make sure we don't move around */
964 drop_mm_ref(mm); 1091 xen_drop_mm_ref(mm);
965 put_cpu(); 1092 put_cpu();
966 1093
967 spin_lock(&mm->page_table_lock); 1094 spin_lock(&mm->page_table_lock);
968 1095
969 /* pgd may not be pinned in the error exit path of execve */ 1096 /* pgd may not be pinned in the error exit path of execve */
970 if (page_pinned(mm->pgd)) 1097 if (xen_page_pinned(mm->pgd))
971 xen_pgd_unpin(mm->pgd); 1098 xen_pgd_unpin(mm->pgd);
972 1099
973 spin_unlock(&mm->page_table_lock); 1100 spin_unlock(&mm->page_table_lock);
974} 1101}
1102
1103#ifdef CONFIG_XEN_DEBUG_FS
1104
1105static struct dentry *d_mmu_debug;
1106
1107static int __init xen_mmu_debugfs(void)
1108{
1109 struct dentry *d_xen = xen_init_debugfs();
1110
1111 if (d_xen == NULL)
1112 return -ENOMEM;
1113
1114 d_mmu_debug = debugfs_create_dir("mmu", d_xen);
1115
1116 debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats);
1117
1118 debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update);
1119 debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug,
1120 &mmu_stats.pgd_update_pinned);
1121 debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug,
1122 &mmu_stats.pgd_update_pinned);
1123
1124 debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update);
1125 debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug,
1126 &mmu_stats.pud_update_pinned);
1127 debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug,
1128 &mmu_stats.pud_update_pinned);
1129
1130 debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update);
1131 debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug,
1132 &mmu_stats.pmd_update_pinned);
1133 debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug,
1134 &mmu_stats.pmd_update_pinned);
1135
1136 debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update);
1137// debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug,
1138// &mmu_stats.pte_update_pinned);
1139 debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug,
1140 &mmu_stats.pte_update_pinned);
1141
1142 debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update);
1143 debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug,
1144 &mmu_stats.mmu_update_extended);
1145 xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
1146 mmu_stats.mmu_update_histo, 20);
1147
1148 debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
1149 debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
1150 &mmu_stats.set_pte_at_batched);
1151 debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
1152 &mmu_stats.set_pte_at_current);
1153 debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
1154 &mmu_stats.set_pte_at_kernel);
1155
1156 debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
1157 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
1158 &mmu_stats.prot_commit_batched);
1159
1160 return 0;
1161}
1162fs_initcall(xen_mmu_debugfs);
1163
1164#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 9efd1c6c9776..8ea8a0d0b0de 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -21,16 +21,20 @@
21 */ 21 */
22#include <linux/percpu.h> 22#include <linux/percpu.h>
23#include <linux/hardirq.h> 23#include <linux/hardirq.h>
24#include <linux/debugfs.h>
24 25
25#include <asm/xen/hypercall.h> 26#include <asm/xen/hypercall.h>
26 27
27#include "multicalls.h" 28#include "multicalls.h"
29#include "debugfs.h"
30
31#define MC_BATCH 32
28 32
29#define MC_DEBUG 1 33#define MC_DEBUG 1
30 34
31#define MC_BATCH 32
32#define MC_ARGS (MC_BATCH * 16) 35#define MC_ARGS (MC_BATCH * 16)
33 36
37
34struct mc_buffer { 38struct mc_buffer {
35 struct multicall_entry entries[MC_BATCH]; 39 struct multicall_entry entries[MC_BATCH];
36#if MC_DEBUG 40#if MC_DEBUG
@@ -47,6 +51,76 @@ struct mc_buffer {
47static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); 51static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
48DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); 52DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
49 53
54/* flush reasons 0- slots, 1- args, 2- callbacks */
55enum flush_reasons
56{
57 FL_SLOTS,
58 FL_ARGS,
59 FL_CALLBACKS,
60
61 FL_N_REASONS
62};
63
64#ifdef CONFIG_XEN_DEBUG_FS
65#define NHYPERCALLS 40 /* not really */
66
67static struct {
68 unsigned histo[MC_BATCH+1];
69
70 unsigned issued;
71 unsigned arg_total;
72 unsigned hypercalls;
73 unsigned histo_hypercalls[NHYPERCALLS];
74
75 unsigned flush[FL_N_REASONS];
76} mc_stats;
77
78static u8 zero_stats;
79
80static inline void check_zero(void)
81{
82 if (unlikely(zero_stats)) {
83 memset(&mc_stats, 0, sizeof(mc_stats));
84 zero_stats = 0;
85 }
86}
87
88static void mc_add_stats(const struct mc_buffer *mc)
89{
90 int i;
91
92 check_zero();
93
94 mc_stats.issued++;
95 mc_stats.hypercalls += mc->mcidx;
96 mc_stats.arg_total += mc->argidx;
97
98 mc_stats.histo[mc->mcidx]++;
99 for(i = 0; i < mc->mcidx; i++) {
100 unsigned op = mc->entries[i].op;
101 if (op < NHYPERCALLS)
102 mc_stats.histo_hypercalls[op]++;
103 }
104}
105
106static void mc_stats_flush(enum flush_reasons idx)
107{
108 check_zero();
109
110 mc_stats.flush[idx]++;
111}
112
113#else /* !CONFIG_XEN_DEBUG_FS */
114
115static inline void mc_add_stats(const struct mc_buffer *mc)
116{
117}
118
119static inline void mc_stats_flush(enum flush_reasons idx)
120{
121}
122#endif /* CONFIG_XEN_DEBUG_FS */
123
50void xen_mc_flush(void) 124void xen_mc_flush(void)
51{ 125{
52 struct mc_buffer *b = &__get_cpu_var(mc_buffer); 126 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
@@ -60,6 +134,8 @@ void xen_mc_flush(void)
60 something in the middle */ 134 something in the middle */
61 local_irq_save(flags); 135 local_irq_save(flags);
62 136
137 mc_add_stats(b);
138
63 if (b->mcidx) { 139 if (b->mcidx) {
64#if MC_DEBUG 140#if MC_DEBUG
65 memcpy(b->debug, b->entries, 141 memcpy(b->debug, b->entries,
@@ -115,6 +191,7 @@ struct multicall_space __xen_mc_entry(size_t args)
115 191
116 if (b->mcidx == MC_BATCH || 192 if (b->mcidx == MC_BATCH ||
117 (argidx + args) > MC_ARGS) { 193 (argidx + args) > MC_ARGS) {
194 mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS);
118 xen_mc_flush(); 195 xen_mc_flush();
119 argidx = roundup(b->argidx, sizeof(u64)); 196 argidx = roundup(b->argidx, sizeof(u64));
120 } 197 }
@@ -158,10 +235,44 @@ void xen_mc_callback(void (*fn)(void *), void *data)
158 struct mc_buffer *b = &__get_cpu_var(mc_buffer); 235 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
159 struct callback *cb; 236 struct callback *cb;
160 237
161 if (b->cbidx == MC_BATCH) 238 if (b->cbidx == MC_BATCH) {
239 mc_stats_flush(FL_CALLBACKS);
162 xen_mc_flush(); 240 xen_mc_flush();
241 }
163 242
164 cb = &b->callbacks[b->cbidx++]; 243 cb = &b->callbacks[b->cbidx++];
165 cb->fn = fn; 244 cb->fn = fn;
166 cb->data = data; 245 cb->data = data;
167} 246}
247
248#ifdef CONFIG_XEN_DEBUG_FS
249
250static struct dentry *d_mc_debug;
251
252static int __init xen_mc_debugfs(void)
253{
254 struct dentry *d_xen = xen_init_debugfs();
255
256 if (d_xen == NULL)
257 return -ENOMEM;
258
259 d_mc_debug = debugfs_create_dir("multicalls", d_xen);
260
261 debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats);
262
263 debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued);
264 debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls);
265 debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total);
266
267 xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug,
268 mc_stats.histo, MC_BATCH);
269 xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug,
270 mc_stats.histo_hypercalls, NHYPERCALLS);
271 xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug,
272 mc_stats.flush, FL_N_REASONS);
273
274 return 0;
275}
276fs_initcall(xen_mc_debugfs);
277
278#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d8faf79a0a1d..baca7f2fbd8a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -15,7 +15,6 @@
15 * This does not handle HOTPLUG_CPU yet. 15 * This does not handle HOTPLUG_CPU yet.
16 */ 16 */
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/kernel_stat.h>
19#include <linux/err.h> 18#include <linux/err.h>
20#include <linux/smp.h> 19#include <linux/smp.h>
21 20
@@ -36,8 +35,6 @@
36#include "xen-ops.h" 35#include "xen-ops.h"
37#include "mmu.h" 36#include "mmu.h"
38 37
39static void __cpuinit xen_init_lock_cpu(int cpu);
40
41cpumask_t xen_cpu_initialized_map; 38cpumask_t xen_cpu_initialized_map;
42 39
43static DEFINE_PER_CPU(int, resched_irq); 40static DEFINE_PER_CPU(int, resched_irq);
@@ -419,170 +416,6 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
419 return IRQ_HANDLED; 416 return IRQ_HANDLED;
420} 417}
421 418
422struct xen_spinlock {
423 unsigned char lock; /* 0 -> free; 1 -> locked */
424 unsigned short spinners; /* count of waiting cpus */
425};
426
427static int xen_spin_is_locked(struct raw_spinlock *lock)
428{
429 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
430
431 return xl->lock != 0;
432}
433
434static int xen_spin_is_contended(struct raw_spinlock *lock)
435{
436 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
437
438 /* Not strictly true; this is only the count of contended
439 lock-takers entering the slow path. */
440 return xl->spinners != 0;
441}
442
443static int xen_spin_trylock(struct raw_spinlock *lock)
444{
445 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
446 u8 old = 1;
447
448 asm("xchgb %b0,%1"
449 : "+q" (old), "+m" (xl->lock) : : "memory");
450
451 return old == 0;
452}
453
454static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
455static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
456
457static inline void spinning_lock(struct xen_spinlock *xl)
458{
459 __get_cpu_var(lock_spinners) = xl;
460 wmb(); /* set lock of interest before count */
461 asm(LOCK_PREFIX " incw %0"
462 : "+m" (xl->spinners) : : "memory");
463}
464
465static inline void unspinning_lock(struct xen_spinlock *xl)
466{
467 asm(LOCK_PREFIX " decw %0"
468 : "+m" (xl->spinners) : : "memory");
469 wmb(); /* decrement count before clearing lock */
470 __get_cpu_var(lock_spinners) = NULL;
471}
472
473static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
474{
475 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
476 int irq = __get_cpu_var(lock_kicker_irq);
477 int ret;
478
479 /* If kicker interrupts not initialized yet, just spin */
480 if (irq == -1)
481 return 0;
482
483 /* announce we're spinning */
484 spinning_lock(xl);
485
486 /* clear pending */
487 xen_clear_irq_pending(irq);
488
489 /* check again make sure it didn't become free while
490 we weren't looking */
491 ret = xen_spin_trylock(lock);
492 if (ret)
493 goto out;
494
495 /* block until irq becomes pending */
496 xen_poll_irq(irq);
497 kstat_this_cpu.irqs[irq]++;
498
499out:
500 unspinning_lock(xl);
501 return ret;
502}
503
504static void xen_spin_lock(struct raw_spinlock *lock)
505{
506 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
507 int timeout;
508 u8 oldval;
509
510 do {
511 timeout = 1 << 10;
512
513 asm("1: xchgb %1,%0\n"
514 " testb %1,%1\n"
515 " jz 3f\n"
516 "2: rep;nop\n"
517 " cmpb $0,%0\n"
518 " je 1b\n"
519 " dec %2\n"
520 " jnz 2b\n"
521 "3:\n"
522 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
523 : "1" (1)
524 : "memory");
525
526 } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
527}
528
529static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
530{
531 int cpu;
532
533 for_each_online_cpu(cpu) {
534 /* XXX should mix up next cpu selection */
535 if (per_cpu(lock_spinners, cpu) == xl) {
536 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
537 break;
538 }
539 }
540}
541
542static void xen_spin_unlock(struct raw_spinlock *lock)
543{
544 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
545
546 smp_wmb(); /* make sure no writes get moved after unlock */
547 xl->lock = 0; /* release lock */
548
549 /* make sure unlock happens before kick */
550 barrier();
551
552 if (unlikely(xl->spinners))
553 xen_spin_unlock_slow(xl);
554}
555
556static __cpuinit void xen_init_lock_cpu(int cpu)
557{
558 int irq;
559 const char *name;
560
561 name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
562 irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
563 cpu,
564 xen_reschedule_interrupt,
565 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
566 name,
567 NULL);
568
569 if (irq >= 0) {
570 disable_irq(irq); /* make sure it's never delivered */
571 per_cpu(lock_kicker_irq, cpu) = irq;
572 }
573
574 printk("cpu %d spinlock event irq %d\n", cpu, irq);
575}
576
577static void __init xen_init_spinlocks(void)
578{
579 pv_lock_ops.spin_is_locked = xen_spin_is_locked;
580 pv_lock_ops.spin_is_contended = xen_spin_is_contended;
581 pv_lock_ops.spin_lock = xen_spin_lock;
582 pv_lock_ops.spin_trylock = xen_spin_trylock;
583 pv_lock_ops.spin_unlock = xen_spin_unlock;
584}
585
586static const struct smp_ops xen_smp_ops __initdata = { 419static const struct smp_ops xen_smp_ops __initdata = {
587 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, 420 .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
588 .smp_prepare_cpus = xen_smp_prepare_cpus, 421 .smp_prepare_cpus = xen_smp_prepare_cpus,
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
new file mode 100644
index 000000000000..d072823bc06d
--- /dev/null
+++ b/arch/x86/xen/spinlock.c
@@ -0,0 +1,423 @@
1/*
2 * Split spinlock implementation out into its own file, so it can be
3 * compiled in a FTRACE-compatible way.
4 */
5#include <linux/kernel_stat.h>
6#include <linux/spinlock.h>
7#include <linux/debugfs.h>
8#include <linux/log2.h>
9
10#include <asm/paravirt.h>
11
12#include <xen/interface/xen.h>
13#include <xen/events.h>
14
15#include "xen-ops.h"
16#include "debugfs.h"
17
18#ifdef CONFIG_XEN_DEBUG_FS
19static struct xen_spinlock_stats
20{
21 u64 taken;
22 u32 taken_slow;
23 u32 taken_slow_nested;
24 u32 taken_slow_pickup;
25 u32 taken_slow_spurious;
26 u32 taken_slow_irqenable;
27
28 u64 released;
29 u32 released_slow;
30 u32 released_slow_kicked;
31
32#define HISTO_BUCKETS 30
33 u32 histo_spin_total[HISTO_BUCKETS+1];
34 u32 histo_spin_spinning[HISTO_BUCKETS+1];
35 u32 histo_spin_blocked[HISTO_BUCKETS+1];
36
37 u64 time_total;
38 u64 time_spinning;
39 u64 time_blocked;
40} spinlock_stats;
41
42static u8 zero_stats;
43
44static unsigned lock_timeout = 1 << 10;
45#define TIMEOUT lock_timeout
46
47static inline void check_zero(void)
48{
49 if (unlikely(zero_stats)) {
50 memset(&spinlock_stats, 0, sizeof(spinlock_stats));
51 zero_stats = 0;
52 }
53}
54
55#define ADD_STATS(elem, val) \
56 do { check_zero(); spinlock_stats.elem += (val); } while(0)
57
58static inline u64 spin_time_start(void)
59{
60 return xen_clocksource_read();
61}
62
63static void __spin_time_accum(u64 delta, u32 *array)
64{
65 unsigned index = ilog2(delta);
66
67 check_zero();
68
69 if (index < HISTO_BUCKETS)
70 array[index]++;
71 else
72 array[HISTO_BUCKETS]++;
73}
74
75static inline void spin_time_accum_spinning(u64 start)
76{
77 u32 delta = xen_clocksource_read() - start;
78
79 __spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
80 spinlock_stats.time_spinning += delta;
81}
82
83static inline void spin_time_accum_total(u64 start)
84{
85 u32 delta = xen_clocksource_read() - start;
86
87 __spin_time_accum(delta, spinlock_stats.histo_spin_total);
88 spinlock_stats.time_total += delta;
89}
90
91static inline void spin_time_accum_blocked(u64 start)
92{
93 u32 delta = xen_clocksource_read() - start;
94
95 __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
96 spinlock_stats.time_blocked += delta;
97}
98#else /* !CONFIG_XEN_DEBUG_FS */
99#define TIMEOUT (1 << 10)
100#define ADD_STATS(elem, val) do { (void)(val); } while(0)
101
102static inline u64 spin_time_start(void)
103{
104 return 0;
105}
106
107static inline void spin_time_accum_total(u64 start)
108{
109}
110static inline void spin_time_accum_spinning(u64 start)
111{
112}
113static inline void spin_time_accum_blocked(u64 start)
114{
115}
116#endif /* CONFIG_XEN_DEBUG_FS */
117
118struct xen_spinlock {
119 unsigned char lock; /* 0 -> free; 1 -> locked */
120 unsigned short spinners; /* count of waiting cpus */
121};
122
123static int xen_spin_is_locked(struct raw_spinlock *lock)
124{
125 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
126
127 return xl->lock != 0;
128}
129
130static int xen_spin_is_contended(struct raw_spinlock *lock)
131{
132 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
133
134 /* Not strictly true; this is only the count of contended
135 lock-takers entering the slow path. */
136 return xl->spinners != 0;
137}
138
139static int xen_spin_trylock(struct raw_spinlock *lock)
140{
141 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
142 u8 old = 1;
143
144 asm("xchgb %b0,%1"
145 : "+q" (old), "+m" (xl->lock) : : "memory");
146
147 return old == 0;
148}
149
150static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
151static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
152
153/*
154 * Mark a cpu as interested in a lock. Returns the CPU's previous
155 * lock of interest, in case we got preempted by an interrupt.
156 */
157static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
158{
159 struct xen_spinlock *prev;
160
161 prev = __get_cpu_var(lock_spinners);
162 __get_cpu_var(lock_spinners) = xl;
163
164 wmb(); /* set lock of interest before count */
165
166 asm(LOCK_PREFIX " incw %0"
167 : "+m" (xl->spinners) : : "memory");
168
169 return prev;
170}
171
172/*
173 * Mark a cpu as no longer interested in a lock. Restores previous
174 * lock of interest (NULL for none).
175 */
176static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
177{
178 asm(LOCK_PREFIX " decw %0"
179 : "+m" (xl->spinners) : : "memory");
180 wmb(); /* decrement count before restoring lock */
181 __get_cpu_var(lock_spinners) = prev;
182}
183
184static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable)
185{
186 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
187 struct xen_spinlock *prev;
188 int irq = __get_cpu_var(lock_kicker_irq);
189 int ret;
190 unsigned long flags;
191 u64 start;
192
193 /* If kicker interrupts not initialized yet, just spin */
194 if (irq == -1)
195 return 0;
196
197 start = spin_time_start();
198
199 /* announce we're spinning */
200 prev = spinning_lock(xl);
201
202 flags = __raw_local_save_flags();
203 if (irq_enable) {
204 ADD_STATS(taken_slow_irqenable, 1);
205 raw_local_irq_enable();
206 }
207
208 ADD_STATS(taken_slow, 1);
209 ADD_STATS(taken_slow_nested, prev != NULL);
210
211 do {
212 /* clear pending */
213 xen_clear_irq_pending(irq);
214
215 /* check again make sure it didn't become free while
216 we weren't looking */
217 ret = xen_spin_trylock(lock);
218 if (ret) {
219 ADD_STATS(taken_slow_pickup, 1);
220
221 /*
222 * If we interrupted another spinlock while it
223 * was blocking, make sure it doesn't block
224 * without rechecking the lock.
225 */
226 if (prev != NULL)
227 xen_set_irq_pending(irq);
228 goto out;
229 }
230
231 /*
232 * Block until irq becomes pending. If we're
233 * interrupted at this point (after the trylock but
234 * before entering the block), then the nested lock
235 * handler guarantees that the irq will be left
236 * pending if there's any chance the lock became free;
237 * xen_poll_irq() returns immediately if the irq is
238 * pending.
239 */
240 xen_poll_irq(irq);
241 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
242 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
243
244 kstat_this_cpu.irqs[irq]++;
245
246out:
247 raw_local_irq_restore(flags);
248 unspinning_lock(xl, prev);
249 spin_time_accum_blocked(start);
250
251 return ret;
252}
253
254static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
255{
256 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
257 unsigned timeout;
258 u8 oldval;
259 u64 start_spin;
260
261 ADD_STATS(taken, 1);
262
263 start_spin = spin_time_start();
264
265 do {
266 u64 start_spin_fast = spin_time_start();
267
268 timeout = TIMEOUT;
269
270 asm("1: xchgb %1,%0\n"
271 " testb %1,%1\n"
272 " jz 3f\n"
273 "2: rep;nop\n"
274 " cmpb $0,%0\n"
275 " je 1b\n"
276 " dec %2\n"
277 " jnz 2b\n"
278 "3:\n"
279 : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
280 : "1" (1)
281 : "memory");
282
283 spin_time_accum_spinning(start_spin_fast);
284
285 } while (unlikely(oldval != 0 &&
286 (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
287
288 spin_time_accum_total(start_spin);
289}
290
291static void xen_spin_lock(struct raw_spinlock *lock)
292{
293 __xen_spin_lock(lock, false);
294}
295
296static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
297{
298 __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
299}
300
301static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
302{
303 int cpu;
304
305 ADD_STATS(released_slow, 1);
306
307 for_each_online_cpu(cpu) {
308 /* XXX should mix up next cpu selection */
309 if (per_cpu(lock_spinners, cpu) == xl) {
310 ADD_STATS(released_slow_kicked, 1);
311 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
312 break;
313 }
314 }
315}
316
317static void xen_spin_unlock(struct raw_spinlock *lock)
318{
319 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
320
321 ADD_STATS(released, 1);
322
323 smp_wmb(); /* make sure no writes get moved after unlock */
324 xl->lock = 0; /* release lock */
325
326 /* make sure unlock happens before kick */
327 barrier();
328
329 if (unlikely(xl->spinners))
330 xen_spin_unlock_slow(xl);
331}
332
333static irqreturn_t dummy_handler(int irq, void *dev_id)
334{
335 BUG();
336 return IRQ_HANDLED;
337}
338
339void __cpuinit xen_init_lock_cpu(int cpu)
340{
341 int irq;
342 const char *name;
343
344 name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
345 irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
346 cpu,
347 dummy_handler,
348 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
349 name,
350 NULL);
351
352 if (irq >= 0) {
353 disable_irq(irq); /* make sure it's never delivered */
354 per_cpu(lock_kicker_irq, cpu) = irq;
355 }
356
357 printk("cpu %d spinlock event irq %d\n", cpu, irq);
358}
359
360void __init xen_init_spinlocks(void)
361{
362 pv_lock_ops.spin_is_locked = xen_spin_is_locked;
363 pv_lock_ops.spin_is_contended = xen_spin_is_contended;
364 pv_lock_ops.spin_lock = xen_spin_lock;
365 pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
366 pv_lock_ops.spin_trylock = xen_spin_trylock;
367 pv_lock_ops.spin_unlock = xen_spin_unlock;
368}
369
370#ifdef CONFIG_XEN_DEBUG_FS
371
372static struct dentry *d_spin_debug;
373
374static int __init xen_spinlock_debugfs(void)
375{
376 struct dentry *d_xen = xen_init_debugfs();
377
378 if (d_xen == NULL)
379 return -ENOMEM;
380
381 d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
382
383 debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
384
385 debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
386
387 debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
388 debugfs_create_u32("taken_slow", 0444, d_spin_debug,
389 &spinlock_stats.taken_slow);
390 debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
391 &spinlock_stats.taken_slow_nested);
392 debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
393 &spinlock_stats.taken_slow_pickup);
394 debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
395 &spinlock_stats.taken_slow_spurious);
396 debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
397 &spinlock_stats.taken_slow_irqenable);
398
399 debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
400 debugfs_create_u32("released_slow", 0444, d_spin_debug,
401 &spinlock_stats.released_slow);
402 debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
403 &spinlock_stats.released_slow_kicked);
404
405 debugfs_create_u64("time_spinning", 0444, d_spin_debug,
406 &spinlock_stats.time_spinning);
407 debugfs_create_u64("time_blocked", 0444, d_spin_debug,
408 &spinlock_stats.time_blocked);
409 debugfs_create_u64("time_total", 0444, d_spin_debug,
410 &spinlock_stats.time_total);
411
412 xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
413 spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
414 xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
415 spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
416 xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
417 spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
418
419 return 0;
420}
421fs_initcall(xen_spinlock_debugfs);
422
423#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 685b77470fc3..20182d9072c4 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -30,8 +30,6 @@
30#define TIMER_SLOP 100000 30#define TIMER_SLOP 100000
31#define NS_PER_TICK (1000000000LL / HZ) 31#define NS_PER_TICK (1000000000LL / HZ)
32 32
33static cycle_t xen_clocksource_read(void);
34
35/* runstate info updated by Xen */ 33/* runstate info updated by Xen */
36static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); 34static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
37 35
@@ -213,7 +211,7 @@ unsigned long xen_tsc_khz(void)
213 return xen_khz; 211 return xen_khz;
214} 212}
215 213
216static cycle_t xen_clocksource_read(void) 214cycle_t xen_clocksource_read(void)
217{ 215{
218 struct pvclock_vcpu_time_info *src; 216 struct pvclock_vcpu_time_info *src;
219 cycle_t ret; 217 cycle_t ret;
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 2497a30f41de..42786f59d9c0 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -298,7 +298,7 @@ check_events:
298 push %eax 298 push %eax
299 push %ecx 299 push %ecx
300 push %edx 300 push %edx
301 call force_evtchn_callback 301 call xen_force_evtchn_callback
302 pop %edx 302 pop %edx
303 pop %ecx 303 pop %ecx
304 pop %eax 304 pop %eax
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 7f58304fafb3..3b9bda46487a 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -122,7 +122,7 @@ check_events:
122 push %r9 122 push %r9
123 push %r10 123 push %r10
124 push %r11 124 push %r11
125 call force_evtchn_callback 125 call xen_force_evtchn_callback
126 pop %r11 126 pop %r11
127 pop %r10 127 pop %r10
128 pop %r9 128 pop %r9
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index dd3c23152a2e..1e8bfdaa20d3 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -2,6 +2,7 @@
2#define XEN_OPS_H 2#define XEN_OPS_H
3 3
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/clocksource.h>
5#include <linux/irqreturn.h> 6#include <linux/irqreturn.h>
6#include <xen/xen-ops.h> 7#include <xen/xen-ops.h>
7 8
@@ -31,7 +32,9 @@ void xen_vcpu_restore(void);
31 32
32void __init xen_build_dynamic_phys_to_machine(void); 33void __init xen_build_dynamic_phys_to_machine(void);
33 34
35void xen_init_irq_ops(void);
34void xen_setup_timer(int cpu); 36void xen_setup_timer(int cpu);
37cycle_t xen_clocksource_read(void);
35void xen_setup_cpu_clockevents(void); 38void xen_setup_cpu_clockevents(void);
36unsigned long xen_tsc_khz(void); 39unsigned long xen_tsc_khz(void);
37void __init xen_time_init(void); 40void __init xen_time_init(void);
@@ -50,6 +53,9 @@ void __init xen_setup_vcpu_info_placement(void);
50#ifdef CONFIG_SMP 53#ifdef CONFIG_SMP
51void xen_smp_init(void); 54void xen_smp_init(void);
52 55
56void __init xen_init_spinlocks(void);
57__cpuinit void xen_init_lock_cpu(int cpu);
58
53extern cpumask_t xen_cpu_initialized_map; 59extern cpumask_t xen_cpu_initialized_map;
54#else 60#else
55static inline void xen_smp_init(void) {} 61static inline void xen_smp_init(void) {}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3ca643cafccd..d5e753255153 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1032,7 +1032,7 @@ static struct xenbus_driver blkfront = {
1032 1032
1033static int __init xlblk_init(void) 1033static int __init xlblk_init(void)
1034{ 1034{
1035 if (!is_running_on_xen()) 1035 if (!xen_domain())
1036 return -ENODEV; 1036 return -ENODEV;
1037 1037
1038 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { 1038 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index 6b70aa66a587..538ceea5e7df 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -108,8 +108,8 @@ static int __init xen_init(void)
108{ 108{
109 struct hvc_struct *hp; 109 struct hvc_struct *hp;
110 110
111 if (!is_running_on_xen() || 111 if (!xen_pv_domain() ||
112 is_initial_xendomain() || 112 xen_initial_domain() ||
113 !xen_start_info->console.domU.evtchn) 113 !xen_start_info->console.domU.evtchn)
114 return -ENODEV; 114 return -ENODEV;
115 115
@@ -142,7 +142,7 @@ static void __exit xen_fini(void)
142 142
143static int xen_cons_init(void) 143static int xen_cons_init(void)
144{ 144{
145 if (!is_running_on_xen()) 145 if (!xen_pv_domain())
146 return 0; 146 return 0;
147 147
148 hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); 148 hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
index 9ce3b3baf3a2..3ab6362f043c 100644
--- a/drivers/input/xen-kbdfront.c
+++ b/drivers/input/xen-kbdfront.c
@@ -335,11 +335,11 @@ static struct xenbus_driver xenkbd = {
335 335
336static int __init xenkbd_init(void) 336static int __init xenkbd_init(void)
337{ 337{
338 if (!is_running_on_xen()) 338 if (!xen_domain())
339 return -ENODEV; 339 return -ENODEV;
340 340
341 /* Nothing to do if running in dom0. */ 341 /* Nothing to do if running in dom0. */
342 if (is_initial_xendomain()) 342 if (xen_initial_domain())
343 return -ENODEV; 343 return -ENODEV;
344 344
345 return xenbus_register_frontend(&xenkbd); 345 return xenbus_register_frontend(&xenkbd);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index c749bdba214c..3c3dd403f5dd 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1794,10 +1794,10 @@ static struct xenbus_driver netfront = {
1794 1794
1795static int __init netif_init(void) 1795static int __init netif_init(void)
1796{ 1796{
1797 if (!is_running_on_xen()) 1797 if (!xen_domain())
1798 return -ENODEV; 1798 return -ENODEV;
1799 1799
1800 if (is_initial_xendomain()) 1800 if (xen_initial_domain())
1801 return 0; 1801 return 0;
1802 1802
1803 printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); 1803 printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
@@ -1809,7 +1809,7 @@ module_init(netif_init);
1809 1809
1810static void __exit netif_exit(void) 1810static void __exit netif_exit(void)
1811{ 1811{
1812 if (is_initial_xendomain()) 1812 if (xen_initial_domain())
1813 return; 1813 return;
1814 1814
1815 xenbus_unregister_driver(&netfront); 1815 xenbus_unregister_driver(&netfront);
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index 47ed39b52f9c..a463b3dd837b 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -680,11 +680,11 @@ static struct xenbus_driver xenfb = {
680 680
681static int __init xenfb_init(void) 681static int __init xenfb_init(void)
682{ 682{
683 if (!is_running_on_xen()) 683 if (!xen_domain())
684 return -ENODEV; 684 return -ENODEV;
685 685
686 /* Nothing to do if running in dom0. */ 686 /* Nothing to do if running in dom0. */
687 if (is_initial_xendomain()) 687 if (xen_initial_domain())
688 return -ENODEV; 688 return -ENODEV;
689 689
690 return xenbus_register_frontend(&xenfb); 690 return xenbus_register_frontend(&xenfb);
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index d4427cb86979..a51f3e17a5fd 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -60,7 +60,7 @@
60 60
61#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 61#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
62 62
63#define BALLOON_CLASS_NAME "memory" 63#define BALLOON_CLASS_NAME "xen_memory"
64 64
65struct balloon_stats { 65struct balloon_stats {
66 /* We aim for 'current allocation' == 'target allocation'. */ 66 /* We aim for 'current allocation' == 'target allocation'. */
@@ -226,9 +226,8 @@ static int increase_reservation(unsigned long nr_pages)
226 } 226 }
227 227
228 set_xen_guest_handle(reservation.extent_start, frame_list); 228 set_xen_guest_handle(reservation.extent_start, frame_list);
229 reservation.nr_extents = nr_pages; 229 reservation.nr_extents = nr_pages;
230 rc = HYPERVISOR_memory_op( 230 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
231 XENMEM_populate_physmap, &reservation);
232 if (rc < nr_pages) { 231 if (rc < nr_pages) {
233 if (rc > 0) { 232 if (rc > 0) {
234 int ret; 233 int ret;
@@ -236,7 +235,7 @@ static int increase_reservation(unsigned long nr_pages)
236 /* We hit the Xen hard limit: reprobe. */ 235 /* We hit the Xen hard limit: reprobe. */
237 reservation.nr_extents = rc; 236 reservation.nr_extents = rc;
238 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 237 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
239 &reservation); 238 &reservation);
240 BUG_ON(ret != rc); 239 BUG_ON(ret != rc);
241 } 240 }
242 if (rc >= 0) 241 if (rc >= 0)
@@ -420,7 +419,7 @@ static int __init balloon_init(void)
420 unsigned long pfn; 419 unsigned long pfn;
421 struct page *page; 420 struct page *page;
422 421
423 if (!is_running_on_xen()) 422 if (!xen_pv_domain())
424 return -ENODEV; 423 return -ENODEV;
425 424
426 pr_info("xen_balloon: Initialising balloon driver.\n"); 425 pr_info("xen_balloon: Initialising balloon driver.\n");
@@ -464,136 +463,13 @@ static void balloon_exit(void)
464 463
465module_exit(balloon_exit); 464module_exit(balloon_exit);
466 465
467static void balloon_update_driver_allowance(long delta) 466#define BALLOON_SHOW(name, format, args...) \
468{ 467 static ssize_t show_##name(struct sys_device *dev, \
469 unsigned long flags; 468 struct sysdev_attribute *attr, \
470 469 char *buf) \
471 spin_lock_irqsave(&balloon_lock, flags); 470 { \
472 balloon_stats.driver_pages += delta; 471 return sprintf(buf, format, ##args); \
473 spin_unlock_irqrestore(&balloon_lock, flags); 472 } \
474}
475
476static int dealloc_pte_fn(
477 pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
478{
479 unsigned long mfn = pte_mfn(*pte);
480 int ret;
481 struct xen_memory_reservation reservation = {
482 .nr_extents = 1,
483 .extent_order = 0,
484 .domid = DOMID_SELF
485 };
486 set_xen_guest_handle(reservation.extent_start, &mfn);
487 set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
488 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
489 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
490 BUG_ON(ret != 1);
491 return 0;
492}
493
494static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
495{
496 unsigned long vaddr, flags;
497 struct page *page, **pagevec;
498 int i, ret;
499
500 pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
501 if (pagevec == NULL)
502 return NULL;
503
504 for (i = 0; i < nr_pages; i++) {
505 page = pagevec[i] = alloc_page(GFP_KERNEL);
506 if (page == NULL)
507 goto err;
508
509 vaddr = (unsigned long)page_address(page);
510
511 scrub_page(page);
512
513 spin_lock_irqsave(&balloon_lock, flags);
514
515 if (xen_feature(XENFEAT_auto_translated_physmap)) {
516 unsigned long gmfn = page_to_pfn(page);
517 struct xen_memory_reservation reservation = {
518 .nr_extents = 1,
519 .extent_order = 0,
520 .domid = DOMID_SELF
521 };
522 set_xen_guest_handle(reservation.extent_start, &gmfn);
523 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
524 &reservation);
525 if (ret == 1)
526 ret = 0; /* success */
527 } else {
528 ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
529 dealloc_pte_fn, NULL);
530 }
531
532 if (ret != 0) {
533 spin_unlock_irqrestore(&balloon_lock, flags);
534 __free_page(page);
535 goto err;
536 }
537
538 totalram_pages = --balloon_stats.current_pages;
539
540 spin_unlock_irqrestore(&balloon_lock, flags);
541 }
542
543 out:
544 schedule_work(&balloon_worker);
545 flush_tlb_all();
546 return pagevec;
547
548 err:
549 spin_lock_irqsave(&balloon_lock, flags);
550 while (--i >= 0)
551 balloon_append(pagevec[i]);
552 spin_unlock_irqrestore(&balloon_lock, flags);
553 kfree(pagevec);
554 pagevec = NULL;
555 goto out;
556}
557
558static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
559{
560 unsigned long flags;
561 int i;
562
563 if (pagevec == NULL)
564 return;
565
566 spin_lock_irqsave(&balloon_lock, flags);
567 for (i = 0; i < nr_pages; i++) {
568 BUG_ON(page_count(pagevec[i]) != 1);
569 balloon_append(pagevec[i]);
570 }
571 spin_unlock_irqrestore(&balloon_lock, flags);
572
573 kfree(pagevec);
574
575 schedule_work(&balloon_worker);
576}
577
578static void balloon_release_driver_page(struct page *page)
579{
580 unsigned long flags;
581
582 spin_lock_irqsave(&balloon_lock, flags);
583 balloon_append(page);
584 balloon_stats.driver_pages--;
585 spin_unlock_irqrestore(&balloon_lock, flags);
586
587 schedule_work(&balloon_worker);
588}
589
590
591#define BALLOON_SHOW(name, format, args...) \
592 static ssize_t show_##name(struct sys_device *dev, \
593 char *buf) \
594 { \
595 return sprintf(buf, format, ##args); \
596 } \
597 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) 473 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
598 474
599BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); 475BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
@@ -604,7 +480,8 @@ BALLOON_SHOW(hard_limit_kb,
604 (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); 480 (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
605BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); 481BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
606 482
607static ssize_t show_target_kb(struct sys_device *dev, char *buf) 483static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
484 char *buf)
608{ 485{
609 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); 486 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
610} 487}
@@ -614,19 +491,14 @@ static ssize_t store_target_kb(struct sys_device *dev,
614 const char *buf, 491 const char *buf,
615 size_t count) 492 size_t count)
616{ 493{
617 char memstring[64], *endchar; 494 char *endchar;
618 unsigned long long target_bytes; 495 unsigned long long target_bytes;
619 496
620 if (!capable(CAP_SYS_ADMIN)) 497 if (!capable(CAP_SYS_ADMIN))
621 return -EPERM; 498 return -EPERM;
622 499
623 if (count <= 1) 500 target_bytes = memparse(buf, &endchar);
624 return -EBADMSG; /* runt */
625 if (count > sizeof(memstring))
626 return -EFBIG; /* too long */
627 strcpy(memstring, buf);
628 501
629 target_bytes = memparse(memstring, &endchar);
630 balloon_set_new_target(target_bytes >> PAGE_SHIFT); 502 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
631 503
632 return count; 504 return count;
@@ -694,20 +566,4 @@ static int register_balloon(struct sys_device *sysdev)
694 return error; 566 return error;
695} 567}
696 568
697static void unregister_balloon(struct sys_device *sysdev)
698{
699 int i;
700
701 sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
702 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
703 sysdev_remove_file(sysdev, balloon_attrs[i]);
704 sysdev_unregister(sysdev);
705 sysdev_class_unregister(&balloon_sysdev_class);
706}
707
708static void balloon_sysfs_exit(void)
709{
710 unregister_balloon(&balloon_sysdev);
711}
712
713MODULE_LICENSE("GPL"); 569MODULE_LICENSE("GPL");
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 0e0c28574af8..b6c2b8f16bee 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -84,17 +84,6 @@ static int irq_bindcount[NR_IRQS];
84/* Xen will never allocate port zero for any purpose. */ 84/* Xen will never allocate port zero for any purpose. */
85#define VALID_EVTCHN(chn) ((chn) != 0) 85#define VALID_EVTCHN(chn) ((chn) != 0)
86 86
87/*
88 * Force a proper event-channel callback from Xen after clearing the
89 * callback mask. We do this in a very simple manner, by making a call
90 * down into Xen. The pending flag will be checked by Xen on return.
91 */
92void force_evtchn_callback(void)
93{
94 (void)HYPERVISOR_xen_version(0, NULL);
95}
96EXPORT_SYMBOL_GPL(force_evtchn_callback);
97
98static struct irq_chip xen_dynamic_chip; 87static struct irq_chip xen_dynamic_chip;
99 88
100/* Constructor for packed IRQ information. */ 89/* Constructor for packed IRQ information. */
@@ -175,6 +164,12 @@ static inline void set_evtchn(int port)
175 sync_set_bit(port, &s->evtchn_pending[0]); 164 sync_set_bit(port, &s->evtchn_pending[0]);
176} 165}
177 166
167static inline int test_evtchn(int port)
168{
169 struct shared_info *s = HYPERVISOR_shared_info;
170 return sync_test_bit(port, &s->evtchn_pending[0]);
171}
172
178 173
179/** 174/**
180 * notify_remote_via_irq - send event to remote end of event channel via irq 175 * notify_remote_via_irq - send event to remote end of event channel via irq
@@ -743,6 +738,25 @@ void xen_clear_irq_pending(int irq)
743 clear_evtchn(evtchn); 738 clear_evtchn(evtchn);
744} 739}
745 740
741void xen_set_irq_pending(int irq)
742{
743 int evtchn = evtchn_from_irq(irq);
744
745 if (VALID_EVTCHN(evtchn))
746 set_evtchn(evtchn);
747}
748
749bool xen_test_irq_pending(int irq)
750{
751 int evtchn = evtchn_from_irq(irq);
752 bool ret = false;
753
754 if (VALID_EVTCHN(evtchn))
755 ret = test_evtchn(evtchn);
756
757 return ret;
758}
759
746/* Poll waiting for an irq to become pending. In the usual case, the 760/* Poll waiting for an irq to become pending. In the usual case, the
747 irq will be disabled so it won't deliver an interrupt. */ 761 irq will be disabled so it won't deliver an interrupt. */
748void xen_poll_irq(int irq) 762void xen_poll_irq(int irq)
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index e9e11168616a..06592b9da83c 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -508,7 +508,7 @@ static int __devinit gnttab_init(void)
508 unsigned int max_nr_glist_frames, nr_glist_frames; 508 unsigned int max_nr_glist_frames, nr_glist_frames;
509 unsigned int nr_init_grefs; 509 unsigned int nr_init_grefs;
510 510
511 if (!is_running_on_xen()) 511 if (!xen_domain())
512 return -ENODEV; 512 return -ENODEV;
513 513
514 nr_grant_frames = 1; 514 nr_grant_frames = 1;
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 57ceb5346b74..7f24a98a446f 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -814,7 +814,7 @@ static int __init xenbus_probe_init(void)
814 DPRINTK(""); 814 DPRINTK("");
815 815
816 err = -ENODEV; 816 err = -ENODEV;
817 if (!is_running_on_xen()) 817 if (!xen_domain())
818 goto out_error; 818 goto out_error;
819 819
820 /* Register ourselves with the kernel bus subsystem */ 820 /* Register ourselves with the kernel bus subsystem */
@@ -829,7 +829,7 @@ static int __init xenbus_probe_init(void)
829 /* 829 /*
830 * Domain0 doesn't have a store_evtchn or store_mfn yet. 830 * Domain0 doesn't have a store_evtchn or store_mfn yet.
831 */ 831 */
832 if (is_initial_xendomain()) { 832 if (xen_initial_domain()) {
833 /* dom0 not yet supported */ 833 /* dom0 not yet supported */
834 } else { 834 } else {
835 xenstored_ready = 1; 835 xenstored_ready = 1;
@@ -846,7 +846,7 @@ static int __init xenbus_probe_init(void)
846 goto out_unreg_back; 846 goto out_unreg_back;
847 } 847 }
848 848
849 if (!is_initial_xendomain()) 849 if (!xen_initial_domain())
850 xenbus_probe(NULL); 850 xenbus_probe(NULL);
851 851
852 return 0; 852 return 0;
@@ -937,7 +937,7 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
937 unsigned long timeout = jiffies + 10*HZ; 937 unsigned long timeout = jiffies + 10*HZ;
938 struct device_driver *drv = xendrv ? &xendrv->driver : NULL; 938 struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
939 939
940 if (!ready_to_wait_for_devices || !is_running_on_xen()) 940 if (!ready_to_wait_for_devices || !xen_domain())
941 return; 941 return;
942 942
943 while (exists_disconnected_device(drv)) { 943 while (exists_disconnected_device(drv)) {
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index a44c4dc70590..06f786f4b4fb 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_struct *desc,
24 desc->d = info->seg_32bit; 24 desc->d = info->seg_32bit;
25 desc->g = info->limit_in_pages; 25 desc->g = info->limit_in_pages;
26 desc->base2 = (info->base_addr & 0xff000000) >> 24; 26 desc->base2 = (info->base_addr & 0xff000000) >> 24;
27 /*
28 * Don't allow setting of the lm bit. It is useless anyway
29 * because 64bit system calls require __USER_CS:
30 */
31 desc->l = 0;
27} 32}
28 33
29extern struct desc_ptr idt_descr; 34extern struct desc_ptr idt_descr;
@@ -97,7 +102,15 @@ static inline int desc_empty(const void *ptr)
97 native_write_gdt_entry(dt, entry, desc, type) 102 native_write_gdt_entry(dt, entry, desc, type)
98#define write_idt_entry(dt, entry, g) \ 103#define write_idt_entry(dt, entry, g) \
99 native_write_idt_entry(dt, entry, g) 104 native_write_idt_entry(dt, entry, g)
100#endif 105
106static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
107{
108}
109
110static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
111{
112}
113#endif /* CONFIG_PARAVIRT */
101 114
102static inline void native_write_idt_entry(gate_desc *idt, int entry, 115static inline void native_write_idt_entry(gate_desc *idt, int entry,
103 const gate_desc *gate) 116 const gate_desc *gate)
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index fbbde93f12d6..8e9b1266898c 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -124,6 +124,9 @@ struct pv_cpu_ops {
124 int entrynum, const void *desc, int size); 124 int entrynum, const void *desc, int size);
125 void (*write_idt_entry)(gate_desc *, 125 void (*write_idt_entry)(gate_desc *,
126 int entrynum, const gate_desc *gate); 126 int entrynum, const gate_desc *gate);
127 void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
128 void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
129
127 void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); 130 void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
128 131
129 void (*set_iopl_mask)(unsigned mask); 132 void (*set_iopl_mask)(unsigned mask);
@@ -330,6 +333,7 @@ struct pv_lock_ops {
330 int (*spin_is_locked)(struct raw_spinlock *lock); 333 int (*spin_is_locked)(struct raw_spinlock *lock);
331 int (*spin_is_contended)(struct raw_spinlock *lock); 334 int (*spin_is_contended)(struct raw_spinlock *lock);
332 void (*spin_lock)(struct raw_spinlock *lock); 335 void (*spin_lock)(struct raw_spinlock *lock);
336 void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
333 int (*spin_trylock)(struct raw_spinlock *lock); 337 int (*spin_trylock)(struct raw_spinlock *lock);
334 void (*spin_unlock)(struct raw_spinlock *lock); 338 void (*spin_unlock)(struct raw_spinlock *lock);
335}; 339};
@@ -824,6 +828,16 @@ do { \
824 (aux) = __aux; \ 828 (aux) = __aux; \
825} while (0) 829} while (0)
826 830
831static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
832{
833 PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
834}
835
836static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
837{
838 PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
839}
840
827static inline void load_TR_desc(void) 841static inline void load_TR_desc(void)
828{ 842{
829 PVOP_VCALL0(pv_cpu_ops.load_tr_desc); 843 PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
@@ -1401,6 +1415,12 @@ static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
1401 PVOP_VCALL1(pv_lock_ops.spin_lock, lock); 1415 PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
1402} 1416}
1403 1417
1418static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock,
1419 unsigned long flags)
1420{
1421 PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
1422}
1423
1404static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) 1424static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
1405{ 1425{
1406 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); 1426 return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index e39c790dbfd2..b755ea86367e 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -182,8 +182,6 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
182} 182}
183#endif 183#endif
184 184
185#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
186
187#ifdef CONFIG_PARAVIRT 185#ifdef CONFIG_PARAVIRT
188/* 186/*
189 * Define virtualization-friendly old-style lock byte lock, for use in 187 * Define virtualization-friendly old-style lock byte lock, for use in
@@ -272,6 +270,13 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
272{ 270{
273 __ticket_spin_unlock(lock); 271 __ticket_spin_unlock(lock);
274} 272}
273
274static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
275 unsigned long flags)
276{
277 __raw_spin_lock(lock);
278}
279
275#endif /* CONFIG_PARAVIRT */ 280#endif /* CONFIG_PARAVIRT */
276 281
277static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) 282static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h
index 04ee0610014a..fca066febc35 100644
--- a/include/asm-x86/xen/hypervisor.h
+++ b/include/asm-x86/xen/hypervisor.h
@@ -54,7 +54,6 @@
54/* arch/i386/kernel/setup.c */ 54/* arch/i386/kernel/setup.c */
55extern struct shared_info *HYPERVISOR_shared_info; 55extern struct shared_info *HYPERVISOR_shared_info;
56extern struct start_info *xen_start_info; 56extern struct start_info *xen_start_info;
57#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
58 57
59/* arch/i386/mach-xen/evtchn.c */ 58/* arch/i386/mach-xen/evtchn.c */
60/* Force a proper event-channel callback from Xen. */ 59/* Force a proper event-channel callback from Xen. */
@@ -67,6 +66,17 @@ u64 jiffies_to_st(unsigned long jiffies);
67#define MULTI_UVMFLAGS_INDEX 3 66#define MULTI_UVMFLAGS_INDEX 3
68#define MULTI_UVMDOMID_INDEX 4 67#define MULTI_UVMDOMID_INDEX 4
69 68
70#define is_running_on_xen() (xen_start_info ? 1 : 0) 69enum xen_domain_type {
70 XEN_NATIVE,
71 XEN_PV_DOMAIN,
72 XEN_HVM_DOMAIN,
73};
74
75extern enum xen_domain_type xen_domain_type;
76
77#define xen_domain() (xen_domain_type != XEN_NATIVE)
78#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN)
79#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
80#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN)
71 81
72#endif /* __HYPERVISOR_H__ */ 82#endif /* __HYPERVISOR_H__ */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2651f805ba6d..75d81f157d2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -182,7 +182,7 @@ extern int vsscanf(const char *, const char *, va_list)
182 182
183extern int get_option(char **str, int *pint); 183extern int get_option(char **str, int *pint);
184extern char *get_options(const char *str, int nints, int *ints); 184extern char *get_options(const char *str, int nints, int *ints);
185extern unsigned long long memparse(char *ptr, char **retptr); 185extern unsigned long long memparse(const char *ptr, char **retptr);
186 186
187extern int core_kernel_text(unsigned long addr); 187extern int core_kernel_text(unsigned long addr);
188extern int __kernel_text_address(unsigned long addr); 188extern int __kernel_text_address(unsigned long addr);
diff --git a/include/xen/events.h b/include/xen/events.h
index 4680ff3fbc91..0d5f1adc0363 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -46,6 +46,8 @@ extern void xen_irq_resume(void);
46 46
47/* Clear an irq's pending state, in preparation for polling on it */ 47/* Clear an irq's pending state, in preparation for polling on it */
48void xen_clear_irq_pending(int irq); 48void xen_clear_irq_pending(int irq);
49void xen_set_irq_pending(int irq);
50bool xen_test_irq_pending(int irq);
49 51
50/* Poll waiting for an irq to become pending. In the usual case, the 52/* Poll waiting for an irq to become pending. In the usual case, the
51 irq will be disabled so it won't deliver an interrupt. */ 53 irq will be disabled so it won't deliver an interrupt. */
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 5ba8a942a478..f5f3ad8b62ff 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -126,7 +126,7 @@ char *get_options(const char *str, int nints, int *ints)
126 * megabyte, or one gigabyte, respectively. 126 * megabyte, or one gigabyte, respectively.
127 */ 127 */
128 128
129unsigned long long memparse(char *ptr, char **retptr) 129unsigned long long memparse(const char *ptr, char **retptr)
130{ 130{
131 char *endptr; /* local pointer to end of parsed string */ 131 char *endptr; /* local pointer to end of parsed string */
132 132