aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorArnd Bergmann <arnd@arndb.de>2010-02-18 00:45:36 -0500
committerDavid S. Miller <davem@davemloft.net>2010-02-18 17:08:37 -0500
commit02df55d28c6001a3cdb7a997a34a0b01f01d015e (patch)
treeea1bf32f7464294ce90e288bf5440323e9ca6e7f /drivers
parent37ee3d5b3e979a168536e7e2f15bd1e769cb4122 (diff)
macvtap: rework object lifetime rules
This reworks the change done by the previous patch in a more complete way. The original macvtap code has a number of problems resulting from the use of RCU for protecting the access to struct macvtap_queue from open files. This includes - need for GFP_ATOMIC allocations for skbs - potential deadlocks when copy_*_user sleeps - inability to work with vhost-net Changing the lifetime of macvtap_queue to always depend on the open file solves all these. The RCU reference simply moves one step down to the reference on the macvlan_dev, which we only need for nonblocking operations. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Sridhar Samudrala <sri@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/macvtap.c183
1 files changed, 91 insertions, 92 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index fe7656bf68c6..705099749766 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -60,30 +60,19 @@ static struct cdev macvtap_cdev;
60 60
61/* 61/*
62 * RCU usage: 62 * RCU usage:
63 * The macvtap_queue is referenced both from the chardev struct file 63 * The macvtap_queue and the macvlan_dev are loosely coupled, the
64 * and from the struct macvlan_dev using rcu_read_lock. 64 * pointers from one to the other can only be read while rcu_read_lock
65 * or macvtap_lock is held.
65 * 66 *
66 * We never actually update the contents of a macvtap_queue atomically 67 * Both the file and the macvlan_dev hold a reference on the macvtap_queue
67 * with RCU but it is used for race-free destruction of a queue when 68 * through sock_hold(&q->sk). When the macvlan_dev goes away first,
68 * either the file or the macvlan_dev goes away. Pointers back to 69 * q->vlan becomes inaccessible. When the files gets closed,
69 * the dev and the file are implicitly valid as long as the queue 70 * macvtap_get_queue() fails.
70 * exists.
71 * 71 *
72 * The callbacks from macvlan are always done with rcu_read_lock held 72 * There may still be references to the struct sock inside of the
73 * already. For calls from file_operations, we use the rcu_read_lock_bh 73 * queue from outbound SKBs, but these never reference back to the
74 * to get a reference count on the socket and the device. 74 * file or the dev. The data structure is freed through __sk_free
75 * 75 * when both our references and any pending SKBs are gone.
76 * When destroying a queue, we remove the pointers from the file and
77 * from the dev and then synchronize_rcu to make sure no thread is
78 * still using the queue. There may still be references to the struct
79 * sock inside of the queue from outbound SKBs, but these never
80 * reference back to the file or the dev. The data structure is freed
81 * through __sk_free when both our references and any pending SKBs
82 * are gone.
83 *
84 * macvtap_lock is only used to prevent multiple concurrent open()
85 * calls to assign a new vlan->tap pointer. It could be moved into
86 * the macvlan_dev itself but is extremely rarely used.
87 */ 76 */
88static DEFINE_SPINLOCK(macvtap_lock); 77static DEFINE_SPINLOCK(macvtap_lock);
89 78
@@ -101,11 +90,12 @@ static int macvtap_set_queue(struct net_device *dev, struct file *file,
101 goto out; 90 goto out;
102 91
103 err = 0; 92 err = 0;
104 q->vlan = vlan; 93 rcu_assign_pointer(q->vlan, vlan);
105 rcu_assign_pointer(vlan->tap, q); 94 rcu_assign_pointer(vlan->tap, q);
95 sock_hold(&q->sk);
106 96
107 q->file = file; 97 q->file = file;
108 rcu_assign_pointer(file->private_data, q); 98 file->private_data = q;
109 99
110out: 100out:
111 spin_unlock(&macvtap_lock); 101 spin_unlock(&macvtap_lock);
@@ -113,28 +103,25 @@ out:
113} 103}
114 104
115/* 105/*
116 * We must destroy each queue exactly once, when either 106 * The file owning the queue got closed, give up both
117 * the netdev or the file go away. 107 * the reference that the files holds as well as the
108 * one from the macvlan_dev if that still exists.
118 * 109 *
119 * Using the spinlock makes sure that we don't get 110 * Using the spinlock makes sure that we don't get
120 * to the queue again after destroying it. 111 * to the queue again after destroying it.
121 *
122 * synchronize_rcu serializes with the packet flow
123 * that uses rcu_read_lock.
124 */ 112 */
125static void macvtap_del_queue(struct macvtap_queue **qp) 113static void macvtap_put_queue(struct macvtap_queue *q)
126{ 114{
127 struct macvtap_queue *q; 115 struct macvlan_dev *vlan;
128 116
129 spin_lock(&macvtap_lock); 117 spin_lock(&macvtap_lock);
130 q = rcu_dereference(*qp); 118 vlan = rcu_dereference(q->vlan);
131 if (!q) { 119 if (vlan) {
132 spin_unlock(&macvtap_lock); 120 rcu_assign_pointer(vlan->tap, NULL);
133 return; 121 rcu_assign_pointer(q->vlan, NULL);
122 sock_put(&q->sk);
134 } 123 }
135 124
136 rcu_assign_pointer(q->vlan->tap, NULL);
137 rcu_assign_pointer(q->file->private_data, NULL);
138 spin_unlock(&macvtap_lock); 125 spin_unlock(&macvtap_lock);
139 126
140 synchronize_rcu(); 127 synchronize_rcu();
@@ -152,29 +139,29 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
152 return rcu_dereference(vlan->tap); 139 return rcu_dereference(vlan->tap);
153} 140}
154 141
142/*
143 * The net_device is going away, give up the reference
144 * that it holds on the queue (all the queues one day)
145 * and safely set the pointer from the queues to NULL.
146 */
155static void macvtap_del_queues(struct net_device *dev) 147static void macvtap_del_queues(struct net_device *dev)
156{ 148{
157 struct macvlan_dev *vlan = netdev_priv(dev); 149 struct macvlan_dev *vlan = netdev_priv(dev);
158 macvtap_del_queue(&vlan->tap);
159}
160
161static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file)
162{
163 struct macvtap_queue *q; 150 struct macvtap_queue *q;
164 rcu_read_lock_bh(); 151
165 q = rcu_dereference(file->private_data); 152 spin_lock(&macvtap_lock);
166 if (q) { 153 q = rcu_dereference(vlan->tap);
167 sock_hold(&q->sk); 154 if (!q) {
168 dev_hold(q->vlan->dev); 155 spin_unlock(&macvtap_lock);
156 return;
169 } 157 }
170 rcu_read_unlock_bh();
171 return q;
172}
173 158
174static inline void macvtap_file_put_queue(struct macvtap_queue *q) 159 rcu_assign_pointer(vlan->tap, NULL);
175{ 160 rcu_assign_pointer(q->vlan, NULL);
161 spin_unlock(&macvtap_lock);
162
163 synchronize_rcu();
176 sock_put(&q->sk); 164 sock_put(&q->sk);
177 dev_put(q->vlan->dev);
178} 165}
179 166
180/* 167/*
@@ -284,7 +271,6 @@ static int macvtap_open(struct inode *inode, struct file *file)
284 q->sock.type = SOCK_RAW; 271 q->sock.type = SOCK_RAW;
285 q->sock.state = SS_CONNECTED; 272 q->sock.state = SS_CONNECTED;
286 sock_init_data(&q->sock, &q->sk); 273 sock_init_data(&q->sock, &q->sk);
287 q->sk.sk_allocation = GFP_ATOMIC; /* for now */
288 q->sk.sk_write_space = macvtap_sock_write_space; 274 q->sk.sk_write_space = macvtap_sock_write_space;
289 275
290 err = macvtap_set_queue(dev, file, q); 276 err = macvtap_set_queue(dev, file, q);
@@ -300,13 +286,14 @@ out:
300 286
301static int macvtap_release(struct inode *inode, struct file *file) 287static int macvtap_release(struct inode *inode, struct file *file)
302{ 288{
303 macvtap_del_queue((struct macvtap_queue **)&file->private_data); 289 struct macvtap_queue *q = file->private_data;
290 macvtap_put_queue(q);
304 return 0; 291 return 0;
305} 292}
306 293
307static unsigned int macvtap_poll(struct file *file, poll_table * wait) 294static unsigned int macvtap_poll(struct file *file, poll_table * wait)
308{ 295{
309 struct macvtap_queue *q = macvtap_file_get_queue(file); 296 struct macvtap_queue *q = file->private_data;
310 unsigned int mask = POLLERR; 297 unsigned int mask = POLLERR;
311 298
312 if (!q) 299 if (!q)
@@ -323,7 +310,6 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
323 sock_writeable(&q->sk))) 310 sock_writeable(&q->sk)))
324 mask |= POLLOUT | POLLWRNORM; 311 mask |= POLLOUT | POLLWRNORM;
325 312
326 macvtap_file_put_queue(q);
327out: 313out:
328 return mask; 314 return mask;
329} 315}
@@ -334,6 +320,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
334 int noblock) 320 int noblock)
335{ 321{
336 struct sk_buff *skb; 322 struct sk_buff *skb;
323 struct macvlan_dev *vlan;
337 size_t len = count; 324 size_t len = count;
338 int err; 325 int err;
339 326
@@ -341,26 +328,37 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
341 return -EINVAL; 328 return -EINVAL;
342 329
343 skb = sock_alloc_send_skb(&q->sk, NET_IP_ALIGN + len, noblock, &err); 330 skb = sock_alloc_send_skb(&q->sk, NET_IP_ALIGN + len, noblock, &err);
344 331 if (!skb)
345 if (!skb) { 332 goto err;
346 macvlan_count_rx(q->vlan, 0, false, false);
347 return err;
348 }
349 333
350 skb_reserve(skb, NET_IP_ALIGN); 334 skb_reserve(skb, NET_IP_ALIGN);
351 skb_put(skb, count); 335 skb_put(skb, count);
352 336
353 if (skb_copy_datagram_from_iovec(skb, 0, iv, 0, len)) { 337 err = skb_copy_datagram_from_iovec(skb, 0, iv, 0, len);
354 macvlan_count_rx(q->vlan, 0, false, false); 338 if (err)
355 kfree_skb(skb); 339 goto err;
356 return -EFAULT;
357 }
358 340
359 skb_set_network_header(skb, ETH_HLEN); 341 skb_set_network_header(skb, ETH_HLEN);
360 342 rcu_read_lock_bh();
361 macvlan_start_xmit(skb, q->vlan->dev); 343 vlan = rcu_dereference(q->vlan);
344 if (vlan)
345 macvlan_start_xmit(skb, vlan->dev);
346 else
347 kfree_skb(skb);
348 rcu_read_unlock_bh();
362 349
363 return count; 350 return count;
351
352err:
353 rcu_read_lock_bh();
354 vlan = rcu_dereference(q->vlan);
355 if (vlan)
356 macvlan_count_rx(q->vlan, 0, false, false);
357 rcu_read_unlock_bh();
358
359 kfree_skb(skb);
360
361 return err;
364} 362}
365 363
366static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv, 364static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
@@ -368,15 +366,10 @@ static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
368{ 366{
369 struct file *file = iocb->ki_filp; 367 struct file *file = iocb->ki_filp;
370 ssize_t result = -ENOLINK; 368 ssize_t result = -ENOLINK;
371 struct macvtap_queue *q = macvtap_file_get_queue(file); 369 struct macvtap_queue *q = file->private_data;
372
373 if (!q)
374 goto out;
375 370
376 result = macvtap_get_user(q, iv, iov_length(iv, count), 371 result = macvtap_get_user(q, iv, iov_length(iv, count),
377 file->f_flags & O_NONBLOCK); 372 file->f_flags & O_NONBLOCK);
378 macvtap_file_put_queue(q);
379out:
380 return result; 373 return result;
381} 374}
382 375
@@ -385,14 +378,17 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
385 const struct sk_buff *skb, 378 const struct sk_buff *skb,
386 const struct iovec *iv, int len) 379 const struct iovec *iv, int len)
387{ 380{
388 struct macvlan_dev *vlan = q->vlan; 381 struct macvlan_dev *vlan;
389 int ret; 382 int ret;
390 383
391 len = min_t(int, skb->len, len); 384 len = min_t(int, skb->len, len);
392 385
393 ret = skb_copy_datagram_const_iovec(skb, 0, iv, 0, len); 386 ret = skb_copy_datagram_const_iovec(skb, 0, iv, 0, len);
394 387
388 rcu_read_lock_bh();
389 vlan = rcu_dereference(q->vlan);
395 macvlan_count_rx(vlan, len, ret == 0, 0); 390 macvlan_count_rx(vlan, len, ret == 0, 0);
391 rcu_read_unlock_bh();
396 392
397 return ret ? ret : len; 393 return ret ? ret : len;
398} 394}
@@ -401,14 +397,16 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
401 unsigned long count, loff_t pos) 397 unsigned long count, loff_t pos)
402{ 398{
403 struct file *file = iocb->ki_filp; 399 struct file *file = iocb->ki_filp;
404 struct macvtap_queue *q = macvtap_file_get_queue(file); 400 struct macvtap_queue *q = file->private_data;
405 401
406 DECLARE_WAITQUEUE(wait, current); 402 DECLARE_WAITQUEUE(wait, current);
407 struct sk_buff *skb; 403 struct sk_buff *skb;
408 ssize_t len, ret = 0; 404 ssize_t len, ret = 0;
409 405
410 if (!q) 406 if (!q) {
411 return -ENOLINK; 407 ret = -ENOLINK;
408 goto out;
409 }
412 410
413 len = iov_length(iv, count); 411 len = iov_length(iv, count);
414 if (len < 0) { 412 if (len < 0) {
@@ -444,7 +442,6 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
444 remove_wait_queue(q->sk.sk_sleep, &wait); 442 remove_wait_queue(q->sk.sk_sleep, &wait);
445 443
446out: 444out:
447 macvtap_file_put_queue(q);
448 return ret; 445 return ret;
449} 446}
450 447
@@ -454,12 +451,13 @@ out:
454static long macvtap_ioctl(struct file *file, unsigned int cmd, 451static long macvtap_ioctl(struct file *file, unsigned int cmd,
455 unsigned long arg) 452 unsigned long arg)
456{ 453{
457 struct macvtap_queue *q; 454 struct macvtap_queue *q = file->private_data;
455 struct macvlan_dev *vlan;
458 void __user *argp = (void __user *)arg; 456 void __user *argp = (void __user *)arg;
459 struct ifreq __user *ifr = argp; 457 struct ifreq __user *ifr = argp;
460 unsigned int __user *up = argp; 458 unsigned int __user *up = argp;
461 unsigned int u; 459 unsigned int u;
462 char devname[IFNAMSIZ]; 460 int ret;
463 461
464 switch (cmd) { 462 switch (cmd) {
465 case TUNSETIFF: 463 case TUNSETIFF:
@@ -471,16 +469,21 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
471 return 0; 469 return 0;
472 470
473 case TUNGETIFF: 471 case TUNGETIFF:
474 q = macvtap_file_get_queue(file); 472 rcu_read_lock_bh();
475 if (!q) 473 vlan = rcu_dereference(q->vlan);
474 if (vlan)
475 dev_hold(vlan->dev);
476 rcu_read_unlock_bh();
477
478 if (!vlan)
476 return -ENOLINK; 479 return -ENOLINK;
477 memcpy(devname, q->vlan->dev->name, sizeof(devname));
478 macvtap_file_put_queue(q);
479 480
481 ret = 0;
480 if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) || 482 if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) ||
481 put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags)) 483 put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags))
482 return -EFAULT; 484 ret = -EFAULT;
483 return 0; 485 dev_put(vlan->dev);
486 return ret;
484 487
485 case TUNGETFEATURES: 488 case TUNGETFEATURES:
486 if (put_user((IFF_TAP | IFF_NO_PI), up)) 489 if (put_user((IFF_TAP | IFF_NO_PI), up))
@@ -491,11 +494,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
491 if (get_user(u, up)) 494 if (get_user(u, up))
492 return -EFAULT; 495 return -EFAULT;
493 496
494 q = macvtap_file_get_queue(file);
495 if (!q)
496 return -ENOLINK;
497 q->sk.sk_sndbuf = u; 497 q->sk.sk_sndbuf = u;
498 macvtap_file_put_queue(q);
499 return 0; 498 return 0;
500 499
501 case TUNSETOFFLOAD: 500 case TUNSETOFFLOAD: