aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 20:45:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 20:45:39 -0400
commitcf39c8e5352b4fb9efedfe7e9acb566a85ed847c (patch)
tree58d9f4b8c2ac48134264f1480cfc35b36462c4f4 /drivers/xen
parent3398d252a4da80c47fe9b802184fa0a792387732 (diff)
parent23b7eaf8220721892975610dd0ae5c846a34dcb4 (diff)
Merge tag 'stable/for-linus-3.12-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull Xen updates from Konrad Rzeszutek Wilk: "A couple of features and a ton of bug-fixes. There is also some maintership changes. Jeremy is enjoying the full-time work at the startup and as much as he would love to help - he can't find the time. I have a bunch of other things that I promised to work on - paravirt diet, get SWIOTLB working everywhere, etc, but haven't been able to find the time. As such both David Vrabel and Boris Ostrovsky have graciously volunteered to help with the maintership role. They will keep the lid on regressions, bug-fixes, etc. I will be in the background to help - but eventually there will be less of me doing the Xen GIT pulls and more of them. Stefano is still doing the ARM/ARM64 and will continue on doing so. Features: - Xen Trusted Platform Module (TPM) frontend driver - with the backend in MiniOS. - Scalability improvements in event channel. - Two extra Xen co-maintainers (David, Boris) and one going away (Jeremy) Bug-fixes: - Make the 1:1 mapping work during early bootup on selective regions. - Add scratch page to balloon driver to deal with unexpected code still holding on stale pages. - Allow NMIs on PV guests (64-bit only) - Remove unnecessary TLB flush in M2P code. - Fixes duplicate callbacks in Xen granttable code. - Fixes in PRIVCMD_MMAPBATCH ioctls to allow retries - Fix for events being lost due to rescheduling on different VCPUs. - More documentation" * tag 'stable/for-linus-3.12-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (23 commits) hvc_xen: Remove unnecessary __GFP_ZERO from kzalloc drivers/xen-tpmfront: Fix compile issue with missing option. xen/balloon: don't set P2M entry for auto translated guest xen/evtchn: double free on error Xen: Fix retry calls into PRIVCMD_MMAPBATCH*. xen/pvhvm: Initialize xen panic handler for PVHVM guests xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping xen: fix ARM build after 6efa20e4 MAINTAINERS: Remove Jeremy from the Xen subsystem. xen/events: document behaviour when scanning the start word for events x86/xen: during early setup, only 1:1 map the ISA region x86/xen: disable premption when enabling local irqs swiotlb-xen: replace dma_length with sg_dma_len() macro swiotlb: replace dma_length with sg_dma_len() macro xen/balloon: set a mapping for ballooned out pages xen/evtchn: improve scalability by using per-user locks xen/p2m: avoid unneccesary TLB flush in m2p_remove_override() MAINTAINERS: Add in two extra co-maintainers of the Xen tree. MAINTAINERS: Update the Xen subsystem's with proper mailing list. xen: replace strict_strtoul() with kstrtoul() ...
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/balloon.c74
-rw-r--r--drivers/xen/events.c30
-rw-r--r--drivers/xen/evtchn.c191
-rw-r--r--drivers/xen/gntdev.c11
-rw-r--r--drivers/xen/grant-table.c13
-rw-r--r--drivers/xen/privcmd.c83
-rw-r--r--drivers/xen/swiotlb-xen.c8
-rw-r--r--drivers/xen/xen-selfballoon.c54
8 files changed, 322 insertions, 142 deletions
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 2a2ef97697b2..3101cf6daf56 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -38,6 +38,7 @@
38 38
39#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 39#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
40 40
41#include <linux/cpu.h>
41#include <linux/kernel.h> 42#include <linux/kernel.h>
42#include <linux/sched.h> 43#include <linux/sched.h>
43#include <linux/errno.h> 44#include <linux/errno.h>
@@ -52,6 +53,7 @@
52#include <linux/notifier.h> 53#include <linux/notifier.h>
53#include <linux/memory.h> 54#include <linux/memory.h>
54#include <linux/memory_hotplug.h> 55#include <linux/memory_hotplug.h>
56#include <linux/percpu-defs.h>
55 57
56#include <asm/page.h> 58#include <asm/page.h>
57#include <asm/pgalloc.h> 59#include <asm/pgalloc.h>
@@ -90,6 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats);
90 92
91/* We increase/decrease in batches which fit in a page */ 93/* We increase/decrease in batches which fit in a page */
92static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; 94static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
95static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
96
93 97
94/* List of ballooned pages, threaded through the mem_map array. */ 98/* List of ballooned pages, threaded through the mem_map array. */
95static LIST_HEAD(ballooned_pages); 99static LIST_HEAD(ballooned_pages);
@@ -412,7 +416,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
412 if (xen_pv_domain() && !PageHighMem(page)) { 416 if (xen_pv_domain() && !PageHighMem(page)) {
413 ret = HYPERVISOR_update_va_mapping( 417 ret = HYPERVISOR_update_va_mapping(
414 (unsigned long)__va(pfn << PAGE_SHIFT), 418 (unsigned long)__va(pfn << PAGE_SHIFT),
415 __pte_ma(0), 0); 419 pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)),
420 PAGE_KERNEL_RO), 0);
416 BUG_ON(ret); 421 BUG_ON(ret);
417 } 422 }
418#endif 423#endif
@@ -425,7 +430,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
425 /* No more mappings: invalidate P2M and add to balloon. */ 430 /* No more mappings: invalidate P2M and add to balloon. */
426 for (i = 0; i < nr_pages; i++) { 431 for (i = 0; i < nr_pages; i++) {
427 pfn = mfn_to_pfn(frame_list[i]); 432 pfn = mfn_to_pfn(frame_list[i]);
428 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 433 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
434 unsigned long p;
435 struct page *pg;
436 pg = __get_cpu_var(balloon_scratch_page);
437 p = page_to_pfn(pg);
438 __set_phys_to_machine(pfn, pfn_to_mfn(p));
439 }
429 balloon_append(pfn_to_page(pfn)); 440 balloon_append(pfn_to_page(pfn));
430 } 441 }
431 442
@@ -480,6 +491,18 @@ static void balloon_process(struct work_struct *work)
480 mutex_unlock(&balloon_mutex); 491 mutex_unlock(&balloon_mutex);
481} 492}
482 493
494struct page *get_balloon_scratch_page(void)
495{
496 struct page *ret = get_cpu_var(balloon_scratch_page);
497 BUG_ON(ret == NULL);
498 return ret;
499}
500
501void put_balloon_scratch_page(void)
502{
503 put_cpu_var(balloon_scratch_page);
504}
505
483/* Resets the Xen limit, sets new target, and kicks off processing. */ 506/* Resets the Xen limit, sets new target, and kicks off processing. */
484void balloon_set_new_target(unsigned long target) 507void balloon_set_new_target(unsigned long target)
485{ 508{
@@ -573,13 +596,47 @@ static void __init balloon_add_region(unsigned long start_pfn,
573 } 596 }
574} 597}
575 598
599static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
600 unsigned long action, void *hcpu)
601{
602 int cpu = (long)hcpu;
603 switch (action) {
604 case CPU_UP_PREPARE:
605 if (per_cpu(balloon_scratch_page, cpu) != NULL)
606 break;
607 per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
608 if (per_cpu(balloon_scratch_page, cpu) == NULL) {
609 pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
610 return NOTIFY_BAD;
611 }
612 break;
613 default:
614 break;
615 }
616 return NOTIFY_OK;
617}
618
619static struct notifier_block balloon_cpu_notifier __cpuinitdata = {
620 .notifier_call = balloon_cpu_notify,
621};
622
576static int __init balloon_init(void) 623static int __init balloon_init(void)
577{ 624{
578 int i; 625 int i, cpu;
579 626
580 if (!xen_domain()) 627 if (!xen_domain())
581 return -ENODEV; 628 return -ENODEV;
582 629
630 for_each_online_cpu(cpu)
631 {
632 per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
633 if (per_cpu(balloon_scratch_page, cpu) == NULL) {
634 pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
635 return -ENOMEM;
636 }
637 }
638 register_cpu_notifier(&balloon_cpu_notifier);
639
583 pr_info("Initialising balloon driver\n"); 640 pr_info("Initialising balloon driver\n");
584 641
585 balloon_stats.current_pages = xen_pv_domain() 642 balloon_stats.current_pages = xen_pv_domain()
@@ -616,4 +673,15 @@ static int __init balloon_init(void)
616 673
617subsys_initcall(balloon_init); 674subsys_initcall(balloon_init);
618 675
676static int __init balloon_clear(void)
677{
678 int cpu;
679
680 for_each_possible_cpu(cpu)
681 per_cpu(balloon_scratch_page, cpu) = NULL;
682
683 return 0;
684}
685early_initcall(balloon_clear);
686
619MODULE_LICENSE("GPL"); 687MODULE_LICENSE("GPL");
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 5e8be462aed5..4035e833ea26 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -56,6 +56,7 @@
56#include <xen/interface/hvm/params.h> 56#include <xen/interface/hvm/params.h>
57#include <xen/interface/physdev.h> 57#include <xen/interface/physdev.h>
58#include <xen/interface/sched.h> 58#include <xen/interface/sched.h>
59#include <xen/interface/vcpu.h>
59#include <asm/hw_irq.h> 60#include <asm/hw_irq.h>
60 61
61/* 62/*
@@ -1212,7 +1213,17 @@ EXPORT_SYMBOL_GPL(evtchn_put);
1212 1213
1213void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) 1214void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1214{ 1215{
1215 int irq = per_cpu(ipi_to_irq, cpu)[vector]; 1216 int irq;
1217
1218#ifdef CONFIG_X86
1219 if (unlikely(vector == XEN_NMI_VECTOR)) {
1220 int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
1221 if (rc < 0)
1222 printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1223 return;
1224 }
1225#endif
1226 irq = per_cpu(ipi_to_irq, cpu)[vector];
1216 BUG_ON(irq < 0); 1227 BUG_ON(irq < 0);
1217 notify_remote_via_irq(irq); 1228 notify_remote_via_irq(irq);
1218} 1229}
@@ -1379,14 +1390,21 @@ static void __xen_evtchn_do_upcall(void)
1379 1390
1380 pending_bits = active_evtchns(cpu, s, word_idx); 1391 pending_bits = active_evtchns(cpu, s, word_idx);
1381 bit_idx = 0; /* usually scan entire word from start */ 1392 bit_idx = 0; /* usually scan entire word from start */
1393 /*
1394 * We scan the starting word in two parts.
1395 *
1396 * 1st time: start in the middle, scanning the
1397 * upper bits.
1398 *
1399 * 2nd time: scan the whole word (not just the
1400 * parts skipped in the first pass) -- if an
1401 * event in the previously scanned bits is
1402 * pending again it would just be scanned on
1403 * the next loop anyway.
1404 */
1382 if (word_idx == start_word_idx) { 1405 if (word_idx == start_word_idx) {
1383 /* We scan the starting word in two parts */
1384 if (i == 0) 1406 if (i == 0)
1385 /* 1st time: start in the middle */
1386 bit_idx = start_bit_idx; 1407 bit_idx = start_bit_idx;
1387 else
1388 /* 2nd time: mask bits done already */
1389 bit_idx &= (1UL << start_bit_idx) - 1;
1390 } 1408 }
1391 1409
1392 do { 1410 do {
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b6165e047f48..8b3a69a06c39 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -57,6 +57,7 @@
57 57
58struct per_user_data { 58struct per_user_data {
59 struct mutex bind_mutex; /* serialize bind/unbind operations */ 59 struct mutex bind_mutex; /* serialize bind/unbind operations */
60 struct rb_root evtchns;
60 61
61 /* Notification ring, accessed via /dev/xen/evtchn. */ 62 /* Notification ring, accessed via /dev/xen/evtchn. */
62#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) 63#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
@@ -64,6 +65,7 @@ struct per_user_data {
64 evtchn_port_t *ring; 65 evtchn_port_t *ring;
65 unsigned int ring_cons, ring_prod, ring_overflow; 66 unsigned int ring_cons, ring_prod, ring_overflow;
66 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 67 struct mutex ring_cons_mutex; /* protect against concurrent readers */
68 spinlock_t ring_prod_lock; /* product against concurrent interrupts */
67 69
68 /* Processes wait on this queue when ring is empty. */ 70 /* Processes wait on this queue when ring is empty. */
69 wait_queue_head_t evtchn_wait; 71 wait_queue_head_t evtchn_wait;
@@ -71,54 +73,79 @@ struct per_user_data {
71 const char *name; 73 const char *name;
72}; 74};
73 75
74/* 76struct user_evtchn {
75 * Who's bound to each port? This is logically an array of struct 77 struct rb_node node;
76 * per_user_data *, but we encode the current enabled-state in bit 0. 78 struct per_user_data *user;
77 */ 79 unsigned port;
78static unsigned long *port_user; 80 bool enabled;
79static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ 81};
80 82
81static inline struct per_user_data *get_port_user(unsigned port) 83static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
82{ 84{
83 return (struct per_user_data *)(port_user[port] & ~1); 85 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
84}
85 86
86static inline void set_port_user(unsigned port, struct per_user_data *u) 87 while (*new) {
87{ 88 struct user_evtchn *this;
88 port_user[port] = (unsigned long)u; 89
90 this = container_of(*new, struct user_evtchn, node);
91
92 parent = *new;
93 if (this->port < evtchn->port)
94 new = &((*new)->rb_left);
95 else if (this->port > evtchn->port)
96 new = &((*new)->rb_right);
97 else
98 return -EEXIST;
99 }
100
101 /* Add new node and rebalance tree. */
102 rb_link_node(&evtchn->node, parent, new);
103 rb_insert_color(&evtchn->node, &u->evtchns);
104
105 return 0;
89} 106}
90 107
91static inline bool get_port_enabled(unsigned port) 108static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
92{ 109{
93 return port_user[port] & 1; 110 rb_erase(&evtchn->node, &u->evtchns);
111 kfree(evtchn);
94} 112}
95 113
96static inline void set_port_enabled(unsigned port, bool enabled) 114static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
97{ 115{
98 if (enabled) 116 struct rb_node *node = u->evtchns.rb_node;
99 port_user[port] |= 1; 117
100 else 118 while (node) {
101 port_user[port] &= ~1; 119 struct user_evtchn *evtchn;
120
121 evtchn = container_of(node, struct user_evtchn, node);
122
123 if (evtchn->port < port)
124 node = node->rb_left;
125 else if (evtchn->port > port)
126 node = node->rb_right;
127 else
128 return evtchn;
129 }
130 return NULL;
102} 131}
103 132
104static irqreturn_t evtchn_interrupt(int irq, void *data) 133static irqreturn_t evtchn_interrupt(int irq, void *data)
105{ 134{
106 unsigned int port = (unsigned long)data; 135 struct user_evtchn *evtchn = data;
107 struct per_user_data *u; 136 struct per_user_data *u = evtchn->user;
108
109 spin_lock(&port_user_lock);
110
111 u = get_port_user(port);
112 137
113 WARN(!get_port_enabled(port), 138 WARN(!evtchn->enabled,
114 "Interrupt for port %d, but apparently not enabled; per-user %p\n", 139 "Interrupt for port %d, but apparently not enabled; per-user %p\n",
115 port, u); 140 evtchn->port, u);
116 141
117 disable_irq_nosync(irq); 142 disable_irq_nosync(irq);
118 set_port_enabled(port, false); 143 evtchn->enabled = false;
144
145 spin_lock(&u->ring_prod_lock);
119 146
120 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { 147 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
121 u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; 148 u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
122 wmb(); /* Ensure ring contents visible */ 149 wmb(); /* Ensure ring contents visible */
123 if (u->ring_cons == u->ring_prod++) { 150 if (u->ring_cons == u->ring_prod++) {
124 wake_up_interruptible(&u->evtchn_wait); 151 wake_up_interruptible(&u->evtchn_wait);
@@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
128 } else 155 } else
129 u->ring_overflow = 1; 156 u->ring_overflow = 1;
130 157
131 spin_unlock(&port_user_lock); 158 spin_unlock(&u->ring_prod_lock);
132 159
133 return IRQ_HANDLED; 160 return IRQ_HANDLED;
134} 161}
@@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
229 if (copy_from_user(kbuf, buf, count) != 0) 256 if (copy_from_user(kbuf, buf, count) != 0)
230 goto out; 257 goto out;
231 258
232 spin_lock_irq(&port_user_lock); 259 mutex_lock(&u->bind_mutex);
233 260
234 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { 261 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
235 unsigned port = kbuf[i]; 262 unsigned port = kbuf[i];
263 struct user_evtchn *evtchn;
236 264
237 if (port < NR_EVENT_CHANNELS && 265 evtchn = find_evtchn(u, port);
238 get_port_user(port) == u && 266 if (evtchn && !evtchn->enabled) {
239 !get_port_enabled(port)) { 267 evtchn->enabled = true;
240 set_port_enabled(port, true);
241 enable_irq(irq_from_evtchn(port)); 268 enable_irq(irq_from_evtchn(port));
242 } 269 }
243 } 270 }
244 271
245 spin_unlock_irq(&port_user_lock); 272 mutex_unlock(&u->bind_mutex);
246 273
247 rc = count; 274 rc = count;
248 275
@@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
253 280
254static int evtchn_bind_to_user(struct per_user_data *u, int port) 281static int evtchn_bind_to_user(struct per_user_data *u, int port)
255{ 282{
283 struct user_evtchn *evtchn;
284 struct evtchn_close close;
256 int rc = 0; 285 int rc = 0;
257 286
258 /* 287 /*
@@ -263,35 +292,46 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
263 * interrupt handler yet, and our caller has already 292 * interrupt handler yet, and our caller has already
264 * serialized bind operations.) 293 * serialized bind operations.)
265 */ 294 */
266 BUG_ON(get_port_user(port) != NULL); 295
267 set_port_user(port, u); 296 evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
268 set_port_enabled(port, true); /* start enabled */ 297 if (!evtchn)
298 return -ENOMEM;
299
300 evtchn->user = u;
301 evtchn->port = port;
302 evtchn->enabled = true; /* start enabled */
303
304 rc = add_evtchn(u, evtchn);
305 if (rc < 0)
306 goto err;
269 307
270 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, 308 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
271 u->name, (void *)(unsigned long)port); 309 u->name, evtchn);
272 if (rc >= 0) 310 if (rc < 0)
273 rc = evtchn_make_refcounted(port); 311 goto err;
274 else {
275 /* bind failed, should close the port now */
276 struct evtchn_close close;
277 close.port = port;
278 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
279 BUG();
280 set_port_user(port, NULL);
281 }
282 312
313 rc = evtchn_make_refcounted(port);
314 return rc;
315
316err:
317 /* bind failed, should close the port now */
318 close.port = port;
319 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
320 BUG();
321 del_evtchn(u, evtchn);
283 return rc; 322 return rc;
284} 323}
285 324
286static void evtchn_unbind_from_user(struct per_user_data *u, int port) 325static void evtchn_unbind_from_user(struct per_user_data *u,
326 struct user_evtchn *evtchn)
287{ 327{
288 int irq = irq_from_evtchn(port); 328 int irq = irq_from_evtchn(evtchn->port);
289 329
290 BUG_ON(irq < 0); 330 BUG_ON(irq < 0);
291 331
292 unbind_from_irqhandler(irq, (void *)(unsigned long)port); 332 unbind_from_irqhandler(irq, evtchn);
293 333
294 set_port_user(port, NULL); 334 del_evtchn(u, evtchn);
295} 335}
296 336
297static long evtchn_ioctl(struct file *file, 337static long evtchn_ioctl(struct file *file,
@@ -370,6 +410,7 @@ static long evtchn_ioctl(struct file *file,
370 410
371 case IOCTL_EVTCHN_UNBIND: { 411 case IOCTL_EVTCHN_UNBIND: {
372 struct ioctl_evtchn_unbind unbind; 412 struct ioctl_evtchn_unbind unbind;
413 struct user_evtchn *evtchn;
373 414
374 rc = -EFAULT; 415 rc = -EFAULT;
375 if (copy_from_user(&unbind, uarg, sizeof(unbind))) 416 if (copy_from_user(&unbind, uarg, sizeof(unbind)))
@@ -380,29 +421,27 @@ static long evtchn_ioctl(struct file *file,
380 break; 421 break;
381 422
382 rc = -ENOTCONN; 423 rc = -ENOTCONN;
383 if (get_port_user(unbind.port) != u) 424 evtchn = find_evtchn(u, unbind.port);
425 if (!evtchn)
384 break; 426 break;
385 427
386 disable_irq(irq_from_evtchn(unbind.port)); 428 disable_irq(irq_from_evtchn(unbind.port));
387 429 evtchn_unbind_from_user(u, evtchn);
388 evtchn_unbind_from_user(u, unbind.port);
389
390 rc = 0; 430 rc = 0;
391 break; 431 break;
392 } 432 }
393 433
394 case IOCTL_EVTCHN_NOTIFY: { 434 case IOCTL_EVTCHN_NOTIFY: {
395 struct ioctl_evtchn_notify notify; 435 struct ioctl_evtchn_notify notify;
436 struct user_evtchn *evtchn;
396 437
397 rc = -EFAULT; 438 rc = -EFAULT;
398 if (copy_from_user(&notify, uarg, sizeof(notify))) 439 if (copy_from_user(&notify, uarg, sizeof(notify)))
399 break; 440 break;
400 441
401 if (notify.port >= NR_EVENT_CHANNELS) { 442 rc = -ENOTCONN;
402 rc = -EINVAL; 443 evtchn = find_evtchn(u, notify.port);
403 } else if (get_port_user(notify.port) != u) { 444 if (evtchn) {
404 rc = -ENOTCONN;
405 } else {
406 notify_remote_via_evtchn(notify.port); 445 notify_remote_via_evtchn(notify.port);
407 rc = 0; 446 rc = 0;
408 } 447 }
@@ -412,9 +451,9 @@ static long evtchn_ioctl(struct file *file,
412 case IOCTL_EVTCHN_RESET: { 451 case IOCTL_EVTCHN_RESET: {
413 /* Initialise the ring to empty. Clear errors. */ 452 /* Initialise the ring to empty. Clear errors. */
414 mutex_lock(&u->ring_cons_mutex); 453 mutex_lock(&u->ring_cons_mutex);
415 spin_lock_irq(&port_user_lock); 454 spin_lock_irq(&u->ring_prod_lock);
416 u->ring_cons = u->ring_prod = u->ring_overflow = 0; 455 u->ring_cons = u->ring_prod = u->ring_overflow = 0;
417 spin_unlock_irq(&port_user_lock); 456 spin_unlock_irq(&u->ring_prod_lock);
418 mutex_unlock(&u->ring_cons_mutex); 457 mutex_unlock(&u->ring_cons_mutex);
419 rc = 0; 458 rc = 0;
420 break; 459 break;
@@ -473,6 +512,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
473 512
474 mutex_init(&u->bind_mutex); 513 mutex_init(&u->bind_mutex);
475 mutex_init(&u->ring_cons_mutex); 514 mutex_init(&u->ring_cons_mutex);
515 spin_lock_init(&u->ring_prod_lock);
476 516
477 filp->private_data = u; 517 filp->private_data = u;
478 518
@@ -481,15 +521,15 @@ static int evtchn_open(struct inode *inode, struct file *filp)
481 521
482static int evtchn_release(struct inode *inode, struct file *filp) 522static int evtchn_release(struct inode *inode, struct file *filp)
483{ 523{
484 int i;
485 struct per_user_data *u = filp->private_data; 524 struct per_user_data *u = filp->private_data;
525 struct rb_node *node;
486 526
487 for (i = 0; i < NR_EVENT_CHANNELS; i++) { 527 while ((node = u->evtchns.rb_node)) {
488 if (get_port_user(i) != u) 528 struct user_evtchn *evtchn;
489 continue;
490 529
491 disable_irq(irq_from_evtchn(i)); 530 evtchn = rb_entry(node, struct user_evtchn, node);
492 evtchn_unbind_from_user(get_port_user(i), i); 531 disable_irq(irq_from_evtchn(evtchn->port));
532 evtchn_unbind_from_user(u, evtchn);
493 } 533 }
494 534
495 free_page((unsigned long)u->ring); 535 free_page((unsigned long)u->ring);
@@ -523,12 +563,6 @@ static int __init evtchn_init(void)
523 if (!xen_domain()) 563 if (!xen_domain())
524 return -ENODEV; 564 return -ENODEV;
525 565
526 port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
527 if (port_user == NULL)
528 return -ENOMEM;
529
530 spin_lock_init(&port_user_lock);
531
532 /* Create '/dev/xen/evtchn'. */ 566 /* Create '/dev/xen/evtchn'. */
533 err = misc_register(&evtchn_miscdev); 567 err = misc_register(&evtchn_miscdev);
534 if (err != 0) { 568 if (err != 0) {
@@ -543,9 +577,6 @@ static int __init evtchn_init(void)
543 577
544static void __exit evtchn_cleanup(void) 578static void __exit evtchn_cleanup(void)
545{ 579{
546 kfree(port_user);
547 port_user = NULL;
548
549 misc_deregister(&evtchn_miscdev); 580 misc_deregister(&evtchn_miscdev);
550} 581}
551 582
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eab5427c75f5..e41c79c986ea 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -272,19 +272,12 @@ static int map_grant_pages(struct grant_map *map)
272 * with find_grant_ptes. 272 * with find_grant_ptes.
273 */ 273 */
274 for (i = 0; i < map->count; i++) { 274 for (i = 0; i < map->count; i++) {
275 unsigned level;
276 unsigned long address = (unsigned long) 275 unsigned long address = (unsigned long)
277 pfn_to_kaddr(page_to_pfn(map->pages[i])); 276 pfn_to_kaddr(page_to_pfn(map->pages[i]));
278 pte_t *ptep;
279 u64 pte_maddr = 0;
280 BUG_ON(PageHighMem(map->pages[i])); 277 BUG_ON(PageHighMem(map->pages[i]));
281 278
282 ptep = lookup_address(address, &level); 279 gnttab_set_map_op(&map->kmap_ops[i], address,
283 pte_maddr = arbitrary_virt_to_machine(ptep).maddr; 280 map->flags | GNTMAP_host_map,
284 gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
285 map->flags |
286 GNTMAP_host_map |
287 GNTMAP_contains_pte,
288 map->grants[i].ref, 281 map->grants[i].ref,
289 map->grants[i].domid); 282 map->grants[i].domid);
290 } 283 }
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 04cdeb8e3719..c4d2298893b1 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -730,9 +730,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback,
730 void (*fn)(void *), void *arg, u16 count) 730 void (*fn)(void *), void *arg, u16 count)
731{ 731{
732 unsigned long flags; 732 unsigned long flags;
733 struct gnttab_free_callback *cb;
734
733 spin_lock_irqsave(&gnttab_list_lock, flags); 735 spin_lock_irqsave(&gnttab_list_lock, flags);
734 if (callback->next) 736
735 goto out; 737 /* Check if the callback is already on the list */
738 cb = gnttab_free_callback_list;
739 while (cb) {
740 if (cb == callback)
741 goto out;
742 cb = cb->next;
743 }
744
736 callback->fn = fn; 745 callback->fn = fn;
737 callback->arg = arg; 746 callback->arg = arg;
738 callback->count = count; 747 callback->count = count;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index f8e5dd701ecb..8e74590fa1bb 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -43,9 +43,10 @@ MODULE_LICENSE("GPL");
43 43
44#define PRIV_VMA_LOCKED ((void *)1) 44#define PRIV_VMA_LOCKED ((void *)1)
45 45
46#ifndef HAVE_ARCH_PRIVCMD_MMAP 46static int privcmd_vma_range_is_mapped(
47static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); 47 struct vm_area_struct *vma,
48#endif 48 unsigned long addr,
49 unsigned long nr_pages);
49 50
50static long privcmd_ioctl_hypercall(void __user *udata) 51static long privcmd_ioctl_hypercall(void __user *udata)
51{ 52{
@@ -225,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata)
225 vma = find_vma(mm, msg->va); 226 vma = find_vma(mm, msg->va);
226 rc = -EINVAL; 227 rc = -EINVAL;
227 228
228 if (!vma || (msg->va != vma->vm_start) || 229 if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
229 !privcmd_enforce_singleshot_mapping(vma))
230 goto out_up; 230 goto out_up;
231 vma->vm_private_data = PRIV_VMA_LOCKED;
231 } 232 }
232 233
233 state.va = vma->vm_start; 234 state.va = vma->vm_start;
@@ -358,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
358 kfree(pages); 359 kfree(pages);
359 return -ENOMEM; 360 return -ENOMEM;
360 } 361 }
361 BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED); 362 BUG_ON(vma->vm_private_data != NULL);
362 vma->vm_private_data = pages; 363 vma->vm_private_data = pages;
363 364
364 return 0; 365 return 0;
@@ -421,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
421 422
422 vma = find_vma(mm, m.addr); 423 vma = find_vma(mm, m.addr);
423 if (!vma || 424 if (!vma ||
424 vma->vm_ops != &privcmd_vm_ops || 425 vma->vm_ops != &privcmd_vm_ops) {
425 (m.addr != vma->vm_start) ||
426 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
427 !privcmd_enforce_singleshot_mapping(vma)) {
428 up_write(&mm->mmap_sem);
429 ret = -EINVAL; 426 ret = -EINVAL;
430 goto out; 427 goto out_unlock;
431 } 428 }
432 if (xen_feature(XENFEAT_auto_translated_physmap)) { 429
433 ret = alloc_empty_pages(vma, m.num); 430 /*
434 if (ret < 0) { 431 * Caller must either:
435 up_write(&mm->mmap_sem); 432 *
436 goto out; 433 * Map the whole VMA range, which will also allocate all the
434 * pages required for the auto_translated_physmap case.
435 *
436 * Or
437 *
438 * Map unmapped holes left from a previous map attempt (e.g.,
439 * because those foreign frames were previously paged out).
440 */
441 if (vma->vm_private_data == NULL) {
442 if (m.addr != vma->vm_start ||
443 m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
444 ret = -EINVAL;
445 goto out_unlock;
446 }
447 if (xen_feature(XENFEAT_auto_translated_physmap)) {
448 ret = alloc_empty_pages(vma, m.num);
449 if (ret < 0)
450 goto out_unlock;
451 } else
452 vma->vm_private_data = PRIV_VMA_LOCKED;
453 } else {
454 if (m.addr < vma->vm_start ||
455 m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
456 ret = -EINVAL;
457 goto out_unlock;
458 }
459 if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
460 ret = -EINVAL;
461 goto out_unlock;
437 } 462 }
438 } 463 }
439 464
@@ -466,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
466 491
467out: 492out:
468 free_page_list(&pagelist); 493 free_page_list(&pagelist);
469
470 return ret; 494 return ret;
495
496out_unlock:
497 up_write(&mm->mmap_sem);
498 goto out;
471} 499}
472 500
473static long privcmd_ioctl(struct file *file, 501static long privcmd_ioctl(struct file *file,
@@ -540,9 +568,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
540 return 0; 568 return 0;
541} 569}
542 570
543static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) 571/*
572 * For MMAPBATCH*. This allows asserting the singleshot mapping
573 * on a per pfn/pte basis. Mapping calls that fail with ENOENT
574 * can be then retried until success.
575 */
576static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
577 unsigned long addr, void *data)
578{
579 return pte_none(*pte) ? 0 : -EBUSY;
580}
581
582static int privcmd_vma_range_is_mapped(
583 struct vm_area_struct *vma,
584 unsigned long addr,
585 unsigned long nr_pages)
544{ 586{
545 return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED); 587 return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
588 is_mapped_fn, NULL) != 0;
546} 589}
547 590
548const struct file_operations xen_privcmd_fops = { 591const struct file_operations xen_privcmd_fops = {
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index aadffcf7db9b..1b2277c311d2 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -506,13 +506,13 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
506 to do proper error handling. */ 506 to do proper error handling. */
507 xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, 507 xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
508 attrs); 508 attrs);
509 sgl[0].dma_length = 0; 509 sg_dma_len(sgl) = 0;
510 return DMA_ERROR_CODE; 510 return DMA_ERROR_CODE;
511 } 511 }
512 sg->dma_address = xen_phys_to_bus(map); 512 sg->dma_address = xen_phys_to_bus(map);
513 } else 513 } else
514 sg->dma_address = dev_addr; 514 sg->dma_address = dev_addr;
515 sg->dma_length = sg->length; 515 sg_dma_len(sg) = sg->length;
516 } 516 }
517 return nelems; 517 return nelems;
518} 518}
@@ -533,7 +533,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
533 BUG_ON(dir == DMA_NONE); 533 BUG_ON(dir == DMA_NONE);
534 534
535 for_each_sg(sgl, sg, nelems, i) 535 for_each_sg(sgl, sg, nelems, i)
536 xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); 536 xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
537 537
538} 538}
539EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); 539EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
@@ -555,7 +555,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
555 555
556 for_each_sg(sgl, sg, nelems, i) 556 for_each_sg(sgl, sg, nelems, i)
557 xen_swiotlb_sync_single(hwdev, sg->dma_address, 557 xen_swiotlb_sync_single(hwdev, sg->dma_address,
558 sg->dma_length, dir, target); 558 sg_dma_len(sg), dir, target);
559} 559}
560 560
561void 561void
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 02817a85f877..21e18c18c7a1 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -265,8 +265,10 @@ static ssize_t store_selfballooning(struct device *dev,
265 if (!capable(CAP_SYS_ADMIN)) 265 if (!capable(CAP_SYS_ADMIN))
266 return -EPERM; 266 return -EPERM;
267 267
268 err = strict_strtoul(buf, 10, &tmp); 268 err = kstrtoul(buf, 10, &tmp);
269 if (err || ((tmp != 0) && (tmp != 1))) 269 if (err)
270 return err;
271 if ((tmp != 0) && (tmp != 1))
270 return -EINVAL; 272 return -EINVAL;
271 273
272 xen_selfballooning_enabled = !!tmp; 274 xen_selfballooning_enabled = !!tmp;
@@ -292,8 +294,10 @@ static ssize_t store_selfballoon_interval(struct device *dev,
292 294
293 if (!capable(CAP_SYS_ADMIN)) 295 if (!capable(CAP_SYS_ADMIN))
294 return -EPERM; 296 return -EPERM;
295 err = strict_strtoul(buf, 10, &val); 297 err = kstrtoul(buf, 10, &val);
296 if (err || val == 0) 298 if (err)
299 return err;
300 if (val == 0)
297 return -EINVAL; 301 return -EINVAL;
298 selfballoon_interval = val; 302 selfballoon_interval = val;
299 return count; 303 return count;
@@ -314,8 +318,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev,
314 318
315 if (!capable(CAP_SYS_ADMIN)) 319 if (!capable(CAP_SYS_ADMIN))
316 return -EPERM; 320 return -EPERM;
317 err = strict_strtoul(buf, 10, &val); 321 err = kstrtoul(buf, 10, &val);
318 if (err || val == 0) 322 if (err)
323 return err;
324 if (val == 0)
319 return -EINVAL; 325 return -EINVAL;
320 selfballoon_downhysteresis = val; 326 selfballoon_downhysteresis = val;
321 return count; 327 return count;
@@ -337,8 +343,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev,
337 343
338 if (!capable(CAP_SYS_ADMIN)) 344 if (!capable(CAP_SYS_ADMIN))
339 return -EPERM; 345 return -EPERM;
340 err = strict_strtoul(buf, 10, &val); 346 err = kstrtoul(buf, 10, &val);
341 if (err || val == 0) 347 if (err)
348 return err;
349 if (val == 0)
342 return -EINVAL; 350 return -EINVAL;
343 selfballoon_uphysteresis = val; 351 selfballoon_uphysteresis = val;
344 return count; 352 return count;
@@ -360,8 +368,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev,
360 368
361 if (!capable(CAP_SYS_ADMIN)) 369 if (!capable(CAP_SYS_ADMIN))
362 return -EPERM; 370 return -EPERM;
363 err = strict_strtoul(buf, 10, &val); 371 err = kstrtoul(buf, 10, &val);
364 if (err || val == 0) 372 if (err)
373 return err;
374 if (val == 0)
365 return -EINVAL; 375 return -EINVAL;
366 selfballoon_min_usable_mb = val; 376 selfballoon_min_usable_mb = val;
367 return count; 377 return count;
@@ -384,8 +394,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev,
384 394
385 if (!capable(CAP_SYS_ADMIN)) 395 if (!capable(CAP_SYS_ADMIN))
386 return -EPERM; 396 return -EPERM;
387 err = strict_strtoul(buf, 10, &val); 397 err = kstrtoul(buf, 10, &val);
388 if (err || val == 0) 398 if (err)
399 return err;
400 if (val == 0)
389 return -EINVAL; 401 return -EINVAL;
390 selfballoon_reserved_mb = val; 402 selfballoon_reserved_mb = val;
391 return count; 403 return count;
@@ -410,8 +422,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev,
410 422
411 if (!capable(CAP_SYS_ADMIN)) 423 if (!capable(CAP_SYS_ADMIN))
412 return -EPERM; 424 return -EPERM;
413 err = strict_strtoul(buf, 10, &tmp); 425 err = kstrtoul(buf, 10, &tmp);
414 if (err || ((tmp != 0) && (tmp != 1))) 426 if (err)
427 return err;
428 if ((tmp != 0) && (tmp != 1))
415 return -EINVAL; 429 return -EINVAL;
416 frontswap_selfshrinking = !!tmp; 430 frontswap_selfshrinking = !!tmp;
417 if (!was_enabled && !xen_selfballooning_enabled && 431 if (!was_enabled && !xen_selfballooning_enabled &&
@@ -437,8 +451,10 @@ static ssize_t store_frontswap_inertia(struct device *dev,
437 451
438 if (!capable(CAP_SYS_ADMIN)) 452 if (!capable(CAP_SYS_ADMIN))
439 return -EPERM; 453 return -EPERM;
440 err = strict_strtoul(buf, 10, &val); 454 err = kstrtoul(buf, 10, &val);
441 if (err || val == 0) 455 if (err)
456 return err;
457 if (val == 0)
442 return -EINVAL; 458 return -EINVAL;
443 frontswap_inertia = val; 459 frontswap_inertia = val;
444 frontswap_inertia_counter = val; 460 frontswap_inertia_counter = val;
@@ -460,8 +476,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev,
460 476
461 if (!capable(CAP_SYS_ADMIN)) 477 if (!capable(CAP_SYS_ADMIN))
462 return -EPERM; 478 return -EPERM;
463 err = strict_strtoul(buf, 10, &val); 479 err = kstrtoul(buf, 10, &val);
464 if (err || val == 0) 480 if (err)
481 return err;
482 if (val == 0)
465 return -EINVAL; 483 return -EINVAL;
466 frontswap_hysteresis = val; 484 frontswap_hysteresis = val;
467 return count; 485 return count;