aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 12:25:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 12:25:34 -0400
commitc62b3898636072de4bf9af36b6cd5a920ebfe896 (patch)
tree5fcccbf7cb2c690f43aa9ee20577fafe0b0bcdac /drivers
parenteddecbb601c9ea3fab7e67d7892010fc9426d1e6 (diff)
parent736561a01f11114146b1b7f82d486fa9c95828ef (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (33 commits) IPVS: Use global mutex in ip_vs_app.c ipvs: fix a typo in __ip_vs_control_init() veth: Fix the byte counters net ipv6: Fix duplicate /proc/sys/net/ipv6/neigh directory entries. macvlan: Fix use after free of struct macvlan_port. net: fix incorrect spelling in drop monitor protocol can: c_can: Do basic c_can configuration _before_ enabling the interrupts net/appletalk: fix atalk_release use after free ipx: fix ipx_release() snmp: SNMP_UPD_PO_STATS_BH() always called from softirq l2tp: fix possible oops on l2tp_eth module unload xfrm: Fix initialize repl field of struct xfrm_state netfilter: ipt_CLUSTERIP: fix buffer overflow netfilter: xtables: fix reentrancy netfilter: ipset: fix checking the type revision at create command netfilter: ipset: fix address ranges at hash:*port* types niu: Rename NIU parent platform device name to fix conflict. r8169: fix a bug in rtl8169_init_phy() bonding: fix a typo in a comment ftmac100: use resource_size() ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/can/c_can/c_can.c6
-rw-r--r--drivers/net/ftmac100.c2
-rw-r--r--drivers/net/gianfar.c16
-rw-r--r--drivers/net/gianfar.h1
-rw-r--r--drivers/net/macvlan.c18
-rw-r--r--drivers/net/niu.c2
-rw-r--r--drivers/net/veth.c2
-rw-r--r--drivers/vhost/net.c159
-rw-r--r--drivers/vhost/vhost.c55
10 files changed, 98 insertions, 165 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1a6e9eb7af43..338bea147c64 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2130,7 +2130,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
2130} 2130}
2131 2131
2132/* 2132/*
2133* First release a slave and than destroy the bond if no more slaves are left. 2133* First release a slave and then destroy the bond if no more slaves are left.
2134* Must be under rtnl_lock when this function is called. 2134* Must be under rtnl_lock when this function is called.
2135*/ 2135*/
2136static int bond_release_and_destroy(struct net_device *bond_dev, 2136static int bond_release_and_destroy(struct net_device *bond_dev,
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 14050786218a..110eda01843c 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -633,9 +633,6 @@ static void c_can_start(struct net_device *dev)
633{ 633{
634 struct c_can_priv *priv = netdev_priv(dev); 634 struct c_can_priv *priv = netdev_priv(dev);
635 635
636 /* enable status change, error and module interrupts */
637 c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
638
639 /* basic c_can configuration */ 636 /* basic c_can configuration */
640 c_can_chip_config(dev); 637 c_can_chip_config(dev);
641 638
@@ -643,6 +640,9 @@ static void c_can_start(struct net_device *dev)
643 640
644 /* reset tx helper pointers */ 641 /* reset tx helper pointers */
645 priv->tx_next = priv->tx_echo = 0; 642 priv->tx_next = priv->tx_echo = 0;
643
644 /* enable status change, error and module interrupts */
645 c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
646} 646}
647 647
648static void c_can_stop(struct net_device *dev) 648static void c_can_stop(struct net_device *dev)
diff --git a/drivers/net/ftmac100.c b/drivers/net/ftmac100.c
index 1d6f4b8d393a..a31661948c42 100644
--- a/drivers/net/ftmac100.c
+++ b/drivers/net/ftmac100.c
@@ -1102,7 +1102,7 @@ static int ftmac100_probe(struct platform_device *pdev)
1102 goto err_req_mem; 1102 goto err_req_mem;
1103 } 1103 }
1104 1104
1105 priv->base = ioremap(res->start, res->end - res->start); 1105 priv->base = ioremap(res->start, resource_size(res));
1106 if (!priv->base) { 1106 if (!priv->base) {
1107 dev_err(&pdev->dev, "Failed to ioremap ethernet registers\n"); 1107 dev_err(&pdev->dev, "Failed to ioremap ethernet registers\n");
1108 err = -EIO; 1108 err = -EIO;
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index ccb231c4d933..2a0ad9a501bb 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -949,6 +949,11 @@ static void gfar_detect_errata(struct gfar_private *priv)
949 (pvr == 0x80861010 && (mod & 0xfff9) == 0x80c0)) 949 (pvr == 0x80861010 && (mod & 0xfff9) == 0x80c0))
950 priv->errata |= GFAR_ERRATA_A002; 950 priv->errata |= GFAR_ERRATA_A002;
951 951
952 /* MPC8313 Rev < 2.0, MPC8548 rev 2.0 */
953 if ((pvr == 0x80850010 && mod == 0x80b0 && rev < 0x0020) ||
954 (pvr == 0x80210020 && mod == 0x8030 && rev == 0x0020))
955 priv->errata |= GFAR_ERRATA_12;
956
952 if (priv->errata) 957 if (priv->errata)
953 dev_info(dev, "enabled errata workarounds, flags: 0x%x\n", 958 dev_info(dev, "enabled errata workarounds, flags: 0x%x\n",
954 priv->errata); 959 priv->errata);
@@ -2154,8 +2159,15 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
2154 /* Set up checksumming */ 2159 /* Set up checksumming */
2155 if (CHECKSUM_PARTIAL == skb->ip_summed) { 2160 if (CHECKSUM_PARTIAL == skb->ip_summed) {
2156 fcb = gfar_add_fcb(skb); 2161 fcb = gfar_add_fcb(skb);
2157 lstatus |= BD_LFLAG(TXBD_TOE); 2162 /* as specified by errata */
2158 gfar_tx_checksum(skb, fcb); 2163 if (unlikely(gfar_has_errata(priv, GFAR_ERRATA_12)
2164 && ((unsigned long)fcb % 0x20) > 0x18)) {
2165 __skb_pull(skb, GMAC_FCB_LEN);
2166 skb_checksum_help(skb);
2167 } else {
2168 lstatus |= BD_LFLAG(TXBD_TOE);
2169 gfar_tx_checksum(skb, fcb);
2170 }
2159 } 2171 }
2160 2172
2161 if (vlan_tx_tag_present(skb)) { 2173 if (vlan_tx_tag_present(skb)) {
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index 54de4135e932..ec5d595ce2e2 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -1039,6 +1039,7 @@ enum gfar_errata {
1039 GFAR_ERRATA_74 = 0x01, 1039 GFAR_ERRATA_74 = 0x01,
1040 GFAR_ERRATA_76 = 0x02, 1040 GFAR_ERRATA_76 = 0x02,
1041 GFAR_ERRATA_A002 = 0x04, 1041 GFAR_ERRATA_A002 = 0x04,
1042 GFAR_ERRATA_12 = 0x08, /* a.k.a errata eTSEC49 */
1042}; 1043};
1043 1044
1044/* Struct stolen almost completely (and shamelessly) from the FCC enet source 1045/* Struct stolen almost completely (and shamelessly) from the FCC enet source
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 5b37d3c191e4..78e34e9e4f00 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,8 +39,11 @@ struct macvlan_port {
39 struct list_head vlans; 39 struct list_head vlans;
40 struct rcu_head rcu; 40 struct rcu_head rcu;
41 bool passthru; 41 bool passthru;
42 int count;
42}; 43};
43 44
45static void macvlan_port_destroy(struct net_device *dev);
46
44#define macvlan_port_get_rcu(dev) \ 47#define macvlan_port_get_rcu(dev) \
45 ((struct macvlan_port *) rcu_dereference(dev->rx_handler_data)) 48 ((struct macvlan_port *) rcu_dereference(dev->rx_handler_data))
46#define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data) 49#define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data)
@@ -457,8 +460,13 @@ static int macvlan_init(struct net_device *dev)
457static void macvlan_uninit(struct net_device *dev) 460static void macvlan_uninit(struct net_device *dev)
458{ 461{
459 struct macvlan_dev *vlan = netdev_priv(dev); 462 struct macvlan_dev *vlan = netdev_priv(dev);
463 struct macvlan_port *port = vlan->port;
460 464
461 free_percpu(vlan->pcpu_stats); 465 free_percpu(vlan->pcpu_stats);
466
467 port->count -= 1;
468 if (!port->count)
469 macvlan_port_destroy(port->dev);
462} 470}
463 471
464static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev, 472static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
@@ -691,12 +699,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
691 vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]); 699 vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]);
692 700
693 if (vlan->mode == MACVLAN_MODE_PASSTHRU) { 701 if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
694 if (!list_empty(&port->vlans)) 702 if (port->count)
695 return -EINVAL; 703 return -EINVAL;
696 port->passthru = true; 704 port->passthru = true;
697 memcpy(dev->dev_addr, lowerdev->dev_addr, ETH_ALEN); 705 memcpy(dev->dev_addr, lowerdev->dev_addr, ETH_ALEN);
698 } 706 }
699 707
708 port->count += 1;
700 err = register_netdevice(dev); 709 err = register_netdevice(dev);
701 if (err < 0) 710 if (err < 0)
702 goto destroy_port; 711 goto destroy_port;
@@ -707,7 +716,8 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
707 return 0; 716 return 0;
708 717
709destroy_port: 718destroy_port:
710 if (list_empty(&port->vlans)) 719 port->count -= 1;
720 if (!port->count)
711 macvlan_port_destroy(lowerdev); 721 macvlan_port_destroy(lowerdev);
712 722
713 return err; 723 return err;
@@ -725,13 +735,9 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
725void macvlan_dellink(struct net_device *dev, struct list_head *head) 735void macvlan_dellink(struct net_device *dev, struct list_head *head)
726{ 736{
727 struct macvlan_dev *vlan = netdev_priv(dev); 737 struct macvlan_dev *vlan = netdev_priv(dev);
728 struct macvlan_port *port = vlan->port;
729 738
730 list_del(&vlan->list); 739 list_del(&vlan->list);
731 unregister_netdevice_queue(dev, head); 740 unregister_netdevice_queue(dev, head);
732
733 if (list_empty(&port->vlans))
734 macvlan_port_destroy(port->dev);
735} 741}
736EXPORT_SYMBOL_GPL(macvlan_dellink); 742EXPORT_SYMBOL_GPL(macvlan_dellink);
737 743
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 40fa59e2fd5c..32678b6c6b39 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -9501,7 +9501,7 @@ static struct niu_parent * __devinit niu_new_parent(struct niu *np,
9501 struct niu_parent *p; 9501 struct niu_parent *p;
9502 int i; 9502 int i;
9503 9503
9504 plat_dev = platform_device_register_simple("niu", niu_parent_index, 9504 plat_dev = platform_device_register_simple("niu-board", niu_parent_index,
9505 NULL, 0); 9505 NULL, 0);
9506 if (IS_ERR(plat_dev)) 9506 if (IS_ERR(plat_dev))
9507 return NULL; 9507 return NULL;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 105d7f0630cc..2de9b90c5f8f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -171,7 +171,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
171 if (skb->ip_summed == CHECKSUM_NONE) 171 if (skb->ip_summed == CHECKSUM_NONE)
172 skb->ip_summed = rcv_priv->ip_summed; 172 skb->ip_summed = rcv_priv->ip_summed;
173 173
174 length = skb->len + ETH_HLEN; 174 length = skb->len;
175 if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS) 175 if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
176 goto rx_drop; 176 goto rx_drop;
177 177
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f616cefc95ba..2f7c76a85e53 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -60,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to,
60{ 60{
61 int seg = 0; 61 int seg = 0;
62 size_t size; 62 size_t size;
63
63 while (len && seg < iov_count) { 64 while (len && seg < iov_count) {
64 size = min(from->iov_len, len); 65 size = min(from->iov_len, len);
65 to->iov_base = from->iov_base; 66 to->iov_base = from->iov_base;
@@ -79,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
79{ 80{
80 int seg = 0; 81 int seg = 0;
81 size_t size; 82 size_t size;
83
82 while (len && seg < iovcount) { 84 while (len && seg < iovcount) {
83 size = min(from->iov_len, len); 85 size = min(from->iov_len, len);
84 to->iov_base = from->iov_base; 86 to->iov_base = from->iov_base;
@@ -211,12 +213,13 @@ static int peek_head_len(struct sock *sk)
211{ 213{
212 struct sk_buff *head; 214 struct sk_buff *head;
213 int len = 0; 215 int len = 0;
216 unsigned long flags;
214 217
215 lock_sock(sk); 218 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
216 head = skb_peek(&sk->sk_receive_queue); 219 head = skb_peek(&sk->sk_receive_queue);
217 if (head) 220 if (likely(head))
218 len = head->len; 221 len = head->len;
219 release_sock(sk); 222 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
220 return len; 223 return len;
221} 224}
222 225
@@ -227,6 +230,7 @@ static int peek_head_len(struct sock *sk)
227 * @iovcount - returned count of io vectors we fill 230 * @iovcount - returned count of io vectors we fill
228 * @log - vhost log 231 * @log - vhost log
229 * @log_num - log offset 232 * @log_num - log offset
233 * @quota - headcount quota, 1 for big buffer
230 * returns number of buffer heads allocated, negative on error 234 * returns number of buffer heads allocated, negative on error
231 */ 235 */
232static int get_rx_bufs(struct vhost_virtqueue *vq, 236static int get_rx_bufs(struct vhost_virtqueue *vq,
@@ -234,7 +238,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
234 int datalen, 238 int datalen,
235 unsigned *iovcount, 239 unsigned *iovcount,
236 struct vhost_log *log, 240 struct vhost_log *log,
237 unsigned *log_num) 241 unsigned *log_num,
242 unsigned int quota)
238{ 243{
239 unsigned int out, in; 244 unsigned int out, in;
240 int seg = 0; 245 int seg = 0;
@@ -242,7 +247,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
242 unsigned d; 247 unsigned d;
243 int r, nlogs = 0; 248 int r, nlogs = 0;
244 249
245 while (datalen > 0) { 250 while (datalen > 0 && headcount < quota) {
246 if (unlikely(seg >= UIO_MAXIOV)) { 251 if (unlikely(seg >= UIO_MAXIOV)) {
247 r = -ENOBUFS; 252 r = -ENOBUFS;
248 goto err; 253 goto err;
@@ -282,117 +287,7 @@ err:
282 287
283/* Expects to be always run from workqueue - which acts as 288/* Expects to be always run from workqueue - which acts as
284 * read-size critical section for our kind of RCU. */ 289 * read-size critical section for our kind of RCU. */
285static void handle_rx_big(struct vhost_net *net) 290static void handle_rx(struct vhost_net *net)
286{
287 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
288 unsigned out, in, log, s;
289 int head;
290 struct vhost_log *vq_log;
291 struct msghdr msg = {
292 .msg_name = NULL,
293 .msg_namelen = 0,
294 .msg_control = NULL, /* FIXME: get and handle RX aux data. */
295 .msg_controllen = 0,
296 .msg_iov = vq->iov,
297 .msg_flags = MSG_DONTWAIT,
298 };
299
300 struct virtio_net_hdr hdr = {
301 .flags = 0,
302 .gso_type = VIRTIO_NET_HDR_GSO_NONE
303 };
304
305 size_t len, total_len = 0;
306 int err;
307 size_t hdr_size;
308 /* TODO: check that we are running from vhost_worker? */
309 struct socket *sock = rcu_dereference_check(vq->private_data, 1);
310 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
311 return;
312
313 mutex_lock(&vq->mutex);
314 vhost_disable_notify(vq);
315 hdr_size = vq->vhost_hlen;
316
317 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
318 vq->log : NULL;
319
320 for (;;) {
321 head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
322 ARRAY_SIZE(vq->iov),
323 &out, &in,
324 vq_log, &log);
325 /* On error, stop handling until the next kick. */
326 if (unlikely(head < 0))
327 break;
328 /* OK, now we need to know about added descriptors. */
329 if (head == vq->num) {
330 if (unlikely(vhost_enable_notify(vq))) {
331 /* They have slipped one in as we were
332 * doing that: check again. */
333 vhost_disable_notify(vq);
334 continue;
335 }
336 /* Nothing new? Wait for eventfd to tell us
337 * they refilled. */
338 break;
339 }
340 /* We don't need to be notified again. */
341 if (out) {
342 vq_err(vq, "Unexpected descriptor format for RX: "
343 "out %d, int %d\n",
344 out, in);
345 break;
346 }
347 /* Skip header. TODO: support TSO/mergeable rx buffers. */
348 s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
349 msg.msg_iovlen = in;
350 len = iov_length(vq->iov, in);
351 /* Sanity check */
352 if (!len) {
353 vq_err(vq, "Unexpected header len for RX: "
354 "%zd expected %zd\n",
355 iov_length(vq->hdr, s), hdr_size);
356 break;
357 }
358 err = sock->ops->recvmsg(NULL, sock, &msg,
359 len, MSG_DONTWAIT | MSG_TRUNC);
360 /* TODO: Check specific error and bomb out unless EAGAIN? */
361 if (err < 0) {
362 vhost_discard_vq_desc(vq, 1);
363 break;
364 }
365 /* TODO: Should check and handle checksum. */
366 if (err > len) {
367 pr_debug("Discarded truncated rx packet: "
368 " len %d > %zd\n", err, len);
369 vhost_discard_vq_desc(vq, 1);
370 continue;
371 }
372 len = err;
373 err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
374 if (err) {
375 vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
376 vq->iov->iov_base, err);
377 break;
378 }
379 len += hdr_size;
380 vhost_add_used_and_signal(&net->dev, vq, head, len);
381 if (unlikely(vq_log))
382 vhost_log_write(vq, vq_log, log, len);
383 total_len += len;
384 if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
385 vhost_poll_queue(&vq->poll);
386 break;
387 }
388 }
389
390 mutex_unlock(&vq->mutex);
391}
392
393/* Expects to be always run from workqueue - which acts as
394 * read-size critical section for our kind of RCU. */
395static void handle_rx_mergeable(struct vhost_net *net)
396{ 291{
397 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; 292 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
398 unsigned uninitialized_var(in), log; 293 unsigned uninitialized_var(in), log;
@@ -405,19 +300,18 @@ static void handle_rx_mergeable(struct vhost_net *net)
405 .msg_iov = vq->iov, 300 .msg_iov = vq->iov,
406 .msg_flags = MSG_DONTWAIT, 301 .msg_flags = MSG_DONTWAIT,
407 }; 302 };
408
409 struct virtio_net_hdr_mrg_rxbuf hdr = { 303 struct virtio_net_hdr_mrg_rxbuf hdr = {
410 .hdr.flags = 0, 304 .hdr.flags = 0,
411 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE 305 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
412 }; 306 };
413
414 size_t total_len = 0; 307 size_t total_len = 0;
415 int err, headcount; 308 int err, headcount, mergeable;
416 size_t vhost_hlen, sock_hlen; 309 size_t vhost_hlen, sock_hlen;
417 size_t vhost_len, sock_len; 310 size_t vhost_len, sock_len;
418 /* TODO: check that we are running from vhost_worker? */ 311 /* TODO: check that we are running from vhost_worker? */
419 struct socket *sock = rcu_dereference_check(vq->private_data, 1); 312 struct socket *sock = rcu_dereference_check(vq->private_data, 1);
420 if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) 313
314 if (!sock)
421 return; 315 return;
422 316
423 mutex_lock(&vq->mutex); 317 mutex_lock(&vq->mutex);
@@ -427,12 +321,14 @@ static void handle_rx_mergeable(struct vhost_net *net)
427 321
428 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? 322 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
429 vq->log : NULL; 323 vq->log : NULL;
324 mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
430 325
431 while ((sock_len = peek_head_len(sock->sk))) { 326 while ((sock_len = peek_head_len(sock->sk))) {
432 sock_len += sock_hlen; 327 sock_len += sock_hlen;
433 vhost_len = sock_len + vhost_hlen; 328 vhost_len = sock_len + vhost_hlen;
434 headcount = get_rx_bufs(vq, vq->heads, vhost_len, 329 headcount = get_rx_bufs(vq, vq->heads, vhost_len,
435 &in, vq_log, &log); 330 &in, vq_log, &log,
331 likely(mergeable) ? UIO_MAXIOV : 1);
436 /* On error, stop handling until the next kick. */ 332 /* On error, stop handling until the next kick. */
437 if (unlikely(headcount < 0)) 333 if (unlikely(headcount < 0))
438 break; 334 break;
@@ -476,7 +372,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
476 break; 372 break;
477 } 373 }
478 /* TODO: Should check and handle checksum. */ 374 /* TODO: Should check and handle checksum. */
479 if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) && 375 if (likely(mergeable) &&
480 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, 376 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
481 offsetof(typeof(hdr), num_buffers), 377 offsetof(typeof(hdr), num_buffers),
482 sizeof hdr.num_buffers)) { 378 sizeof hdr.num_buffers)) {
@@ -498,14 +394,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
498 mutex_unlock(&vq->mutex); 394 mutex_unlock(&vq->mutex);
499} 395}
500 396
501static void handle_rx(struct vhost_net *net)
502{
503 if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
504 handle_rx_mergeable(net);
505 else
506 handle_rx_big(net);
507}
508
509static void handle_tx_kick(struct vhost_work *work) 397static void handle_tx_kick(struct vhost_work *work)
510{ 398{
511 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 399 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -654,6 +542,7 @@ static struct socket *get_raw_socket(int fd)
654 } uaddr; 542 } uaddr;
655 int uaddr_len = sizeof uaddr, r; 543 int uaddr_len = sizeof uaddr, r;
656 struct socket *sock = sockfd_lookup(fd, &r); 544 struct socket *sock = sockfd_lookup(fd, &r);
545
657 if (!sock) 546 if (!sock)
658 return ERR_PTR(-ENOTSOCK); 547 return ERR_PTR(-ENOTSOCK);
659 548
@@ -682,6 +571,7 @@ static struct socket *get_tap_socket(int fd)
682{ 571{
683 struct file *file = fget(fd); 572 struct file *file = fget(fd);
684 struct socket *sock; 573 struct socket *sock;
574
685 if (!file) 575 if (!file)
686 return ERR_PTR(-EBADF); 576 return ERR_PTR(-EBADF);
687 sock = tun_get_socket(file); 577 sock = tun_get_socket(file);
@@ -696,6 +586,7 @@ static struct socket *get_tap_socket(int fd)
696static struct socket *get_socket(int fd) 586static struct socket *get_socket(int fd)
697{ 587{
698 struct socket *sock; 588 struct socket *sock;
589
699 /* special case to disable backend */ 590 /* special case to disable backend */
700 if (fd == -1) 591 if (fd == -1)
701 return NULL; 592 return NULL;
@@ -741,9 +632,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
741 oldsock = rcu_dereference_protected(vq->private_data, 632 oldsock = rcu_dereference_protected(vq->private_data,
742 lockdep_is_held(&vq->mutex)); 633 lockdep_is_held(&vq->mutex));
743 if (sock != oldsock) { 634 if (sock != oldsock) {
744 vhost_net_disable_vq(n, vq); 635 vhost_net_disable_vq(n, vq);
745 rcu_assign_pointer(vq->private_data, sock); 636 rcu_assign_pointer(vq->private_data, sock);
746 vhost_net_enable_vq(n, vq); 637 vhost_net_enable_vq(n, vq);
747 } 638 }
748 639
749 mutex_unlock(&vq->mutex); 640 mutex_unlock(&vq->mutex);
@@ -768,6 +659,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
768 struct socket *tx_sock = NULL; 659 struct socket *tx_sock = NULL;
769 struct socket *rx_sock = NULL; 660 struct socket *rx_sock = NULL;
770 long err; 661 long err;
662
771 mutex_lock(&n->dev.mutex); 663 mutex_lock(&n->dev.mutex);
772 err = vhost_dev_check_owner(&n->dev); 664 err = vhost_dev_check_owner(&n->dev);
773 if (err) 665 if (err)
@@ -829,6 +721,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
829 struct vhost_vring_file backend; 721 struct vhost_vring_file backend;
830 u64 features; 722 u64 features;
831 int r; 723 int r;
724
832 switch (ioctl) { 725 switch (ioctl) {
833 case VHOST_NET_SET_BACKEND: 726 case VHOST_NET_SET_BACKEND:
834 if (copy_from_user(&backend, argp, sizeof backend)) 727 if (copy_from_user(&backend, argp, sizeof backend))
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ade0568c07a4..2ab291241635 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -41,8 +41,8 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
41 poll_table *pt) 41 poll_table *pt)
42{ 42{
43 struct vhost_poll *poll; 43 struct vhost_poll *poll;
44 poll = container_of(pt, struct vhost_poll, table);
45 44
45 poll = container_of(pt, struct vhost_poll, table);
46 poll->wqh = wqh; 46 poll->wqh = wqh;
47 add_wait_queue(wqh, &poll->wait); 47 add_wait_queue(wqh, &poll->wait);
48} 48}
@@ -85,6 +85,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
85void vhost_poll_start(struct vhost_poll *poll, struct file *file) 85void vhost_poll_start(struct vhost_poll *poll, struct file *file)
86{ 86{
87 unsigned long mask; 87 unsigned long mask;
88
88 mask = file->f_op->poll(file, &poll->table); 89 mask = file->f_op->poll(file, &poll->table);
89 if (mask) 90 if (mask)
90 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); 91 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
@@ -101,6 +102,7 @@ static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
101 unsigned seq) 102 unsigned seq)
102{ 103{
103 int left; 104 int left;
105
104 spin_lock_irq(&dev->work_lock); 106 spin_lock_irq(&dev->work_lock);
105 left = seq - work->done_seq; 107 left = seq - work->done_seq;
106 spin_unlock_irq(&dev->work_lock); 108 spin_unlock_irq(&dev->work_lock);
@@ -222,6 +224,7 @@ static int vhost_worker(void *data)
222static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) 224static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
223{ 225{
224 int i; 226 int i;
227
225 for (i = 0; i < dev->nvqs; ++i) { 228 for (i = 0; i < dev->nvqs; ++i) {
226 dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * 229 dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
227 UIO_MAXIOV, GFP_KERNEL); 230 UIO_MAXIOV, GFP_KERNEL);
@@ -235,6 +238,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
235 goto err_nomem; 238 goto err_nomem;
236 } 239 }
237 return 0; 240 return 0;
241
238err_nomem: 242err_nomem:
239 for (; i >= 0; --i) { 243 for (; i >= 0; --i) {
240 kfree(dev->vqs[i].indirect); 244 kfree(dev->vqs[i].indirect);
@@ -247,6 +251,7 @@ err_nomem:
247static void vhost_dev_free_iovecs(struct vhost_dev *dev) 251static void vhost_dev_free_iovecs(struct vhost_dev *dev)
248{ 252{
249 int i; 253 int i;
254
250 for (i = 0; i < dev->nvqs; ++i) { 255 for (i = 0; i < dev->nvqs; ++i) {
251 kfree(dev->vqs[i].indirect); 256 kfree(dev->vqs[i].indirect);
252 dev->vqs[i].indirect = NULL; 257 dev->vqs[i].indirect = NULL;
@@ -296,26 +301,28 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
296} 301}
297 302
298struct vhost_attach_cgroups_struct { 303struct vhost_attach_cgroups_struct {
299 struct vhost_work work; 304 struct vhost_work work;
300 struct task_struct *owner; 305 struct task_struct *owner;
301 int ret; 306 int ret;
302}; 307};
303 308
304static void vhost_attach_cgroups_work(struct vhost_work *work) 309static void vhost_attach_cgroups_work(struct vhost_work *work)
305{ 310{
306 struct vhost_attach_cgroups_struct *s; 311 struct vhost_attach_cgroups_struct *s;
307 s = container_of(work, struct vhost_attach_cgroups_struct, work); 312
308 s->ret = cgroup_attach_task_all(s->owner, current); 313 s = container_of(work, struct vhost_attach_cgroups_struct, work);
314 s->ret = cgroup_attach_task_all(s->owner, current);
309} 315}
310 316
311static int vhost_attach_cgroups(struct vhost_dev *dev) 317static int vhost_attach_cgroups(struct vhost_dev *dev)
312{ 318{
313 struct vhost_attach_cgroups_struct attach; 319 struct vhost_attach_cgroups_struct attach;
314 attach.owner = current; 320
315 vhost_work_init(&attach.work, vhost_attach_cgroups_work); 321 attach.owner = current;
316 vhost_work_queue(dev, &attach.work); 322 vhost_work_init(&attach.work, vhost_attach_cgroups_work);
317 vhost_work_flush(dev, &attach.work); 323 vhost_work_queue(dev, &attach.work);
318 return attach.ret; 324 vhost_work_flush(dev, &attach.work);
325 return attach.ret;
319} 326}
320 327
321/* Caller should have device mutex */ 328/* Caller should have device mutex */
@@ -323,11 +330,13 @@ static long vhost_dev_set_owner(struct vhost_dev *dev)
323{ 330{
324 struct task_struct *worker; 331 struct task_struct *worker;
325 int err; 332 int err;
333
326 /* Is there an owner already? */ 334 /* Is there an owner already? */
327 if (dev->mm) { 335 if (dev->mm) {
328 err = -EBUSY; 336 err = -EBUSY;
329 goto err_mm; 337 goto err_mm;
330 } 338 }
339
331 /* No owner, become one */ 340 /* No owner, become one */
332 dev->mm = get_task_mm(current); 341 dev->mm = get_task_mm(current);
333 worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); 342 worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
@@ -380,6 +389,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
380void vhost_dev_cleanup(struct vhost_dev *dev) 389void vhost_dev_cleanup(struct vhost_dev *dev)
381{ 390{
382 int i; 391 int i;
392
383 for (i = 0; i < dev->nvqs; ++i) { 393 for (i = 0; i < dev->nvqs; ++i) {
384 if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { 394 if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
385 vhost_poll_stop(&dev->vqs[i].poll); 395 vhost_poll_stop(&dev->vqs[i].poll);
@@ -421,6 +431,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
421static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) 431static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
422{ 432{
423 u64 a = addr / VHOST_PAGE_SIZE / 8; 433 u64 a = addr / VHOST_PAGE_SIZE / 8;
434
424 /* Make sure 64 bit math will not overflow. */ 435 /* Make sure 64 bit math will not overflow. */
425 if (a > ULONG_MAX - (unsigned long)log_base || 436 if (a > ULONG_MAX - (unsigned long)log_base ||
426 a + (unsigned long)log_base > ULONG_MAX) 437 a + (unsigned long)log_base > ULONG_MAX)
@@ -461,6 +472,7 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
461 int log_all) 472 int log_all)
462{ 473{
463 int i; 474 int i;
475
464 for (i = 0; i < d->nvqs; ++i) { 476 for (i = 0; i < d->nvqs; ++i) {
465 int ok; 477 int ok;
466 mutex_lock(&d->vqs[i].mutex); 478 mutex_lock(&d->vqs[i].mutex);
@@ -527,6 +539,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
527{ 539{
528 struct vhost_memory mem, *newmem, *oldmem; 540 struct vhost_memory mem, *newmem, *oldmem;
529 unsigned long size = offsetof(struct vhost_memory, regions); 541 unsigned long size = offsetof(struct vhost_memory, regions);
542
530 if (copy_from_user(&mem, m, size)) 543 if (copy_from_user(&mem, m, size))
531 return -EFAULT; 544 return -EFAULT;
532 if (mem.padding) 545 if (mem.padding)
@@ -544,7 +557,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
544 return -EFAULT; 557 return -EFAULT;
545 } 558 }
546 559
547 if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL))) { 560 if (!memory_access_ok(d, newmem,
561 vhost_has_feature(d, VHOST_F_LOG_ALL))) {
548 kfree(newmem); 562 kfree(newmem);
549 return -EFAULT; 563 return -EFAULT;
550 } 564 }
@@ -560,6 +574,7 @@ static int init_used(struct vhost_virtqueue *vq,
560 struct vring_used __user *used) 574 struct vring_used __user *used)
561{ 575{
562 int r = put_user(vq->used_flags, &used->flags); 576 int r = put_user(vq->used_flags, &used->flags);
577
563 if (r) 578 if (r)
564 return r; 579 return r;
565 return get_user(vq->last_used_idx, &used->idx); 580 return get_user(vq->last_used_idx, &used->idx);
@@ -849,6 +864,7 @@ static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
849{ 864{
850 struct vhost_memory_region *reg; 865 struct vhost_memory_region *reg;
851 int i; 866 int i;
867
852 /* linear search is not brilliant, but we really have on the order of 6 868 /* linear search is not brilliant, but we really have on the order of 6
853 * regions in practice */ 869 * regions in practice */
854 for (i = 0; i < mem->nregions; ++i) { 870 for (i = 0; i < mem->nregions; ++i) {
@@ -871,6 +887,7 @@ static int set_bit_to_user(int nr, void __user *addr)
871 void *base; 887 void *base;
872 int bit = nr + (log % PAGE_SIZE) * 8; 888 int bit = nr + (log % PAGE_SIZE) * 8;
873 int r; 889 int r;
890
874 r = get_user_pages_fast(log, 1, 1, &page); 891 r = get_user_pages_fast(log, 1, 1, &page);
875 if (r < 0) 892 if (r < 0)
876 return r; 893 return r;
@@ -888,6 +905,7 @@ static int log_write(void __user *log_base,
888{ 905{
889 u64 write_page = write_address / VHOST_PAGE_SIZE; 906 u64 write_page = write_address / VHOST_PAGE_SIZE;
890 int r; 907 int r;
908
891 if (!write_length) 909 if (!write_length)
892 return 0; 910 return 0;
893 write_length += write_address % VHOST_PAGE_SIZE; 911 write_length += write_address % VHOST_PAGE_SIZE;
@@ -1037,8 +1055,8 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
1037 i, count); 1055 i, count);
1038 return -EINVAL; 1056 return -EINVAL;
1039 } 1057 }
1040 if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect, 1058 if (unlikely(memcpy_fromiovec((unsigned char *)&desc,
1041 sizeof desc))) { 1059 vq->indirect, sizeof desc))) {
1042 vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", 1060 vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
1043 i, (size_t)indirect->addr + i * sizeof desc); 1061 i, (size_t)indirect->addr + i * sizeof desc);
1044 return -EINVAL; 1062 return -EINVAL;
@@ -1153,7 +1171,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
1153 i, vq->num, head); 1171 i, vq->num, head);
1154 return -EINVAL; 1172 return -EINVAL;
1155 } 1173 }
1156 ret = copy_from_user(&desc, vq->desc + i, sizeof desc); 1174 ret = __copy_from_user(&desc, vq->desc + i, sizeof desc);
1157 if (unlikely(ret)) { 1175 if (unlikely(ret)) {
1158 vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", 1176 vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
1159 i, vq->desc + i); 1177 i, vq->desc + i);
@@ -1317,6 +1335,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
1317void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) 1335void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
1318{ 1336{
1319 __u16 flags; 1337 __u16 flags;
1338
1320 /* Flush out used index updates. This is paired 1339 /* Flush out used index updates. This is paired
1321 * with the barrier that the Guest executes when enabling 1340 * with the barrier that the Guest executes when enabling
1322 * interrupts. */ 1341 * interrupts. */
@@ -1361,6 +1380,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
1361{ 1380{
1362 u16 avail_idx; 1381 u16 avail_idx;
1363 int r; 1382 int r;
1383
1364 if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) 1384 if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
1365 return false; 1385 return false;
1366 vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; 1386 vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
@@ -1387,6 +1407,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
1387void vhost_disable_notify(struct vhost_virtqueue *vq) 1407void vhost_disable_notify(struct vhost_virtqueue *vq)
1388{ 1408{
1389 int r; 1409 int r;
1410
1390 if (vq->used_flags & VRING_USED_F_NO_NOTIFY) 1411 if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
1391 return; 1412 return;
1392 vq->used_flags |= VRING_USED_F_NO_NOTIFY; 1413 vq->used_flags |= VRING_USED_F_NO_NOTIFY;