aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@citi.umich.edu>2008-07-03 16:24:06 -0400
committerJ. Bruce Fields <bfields@citi.umich.edu>2008-07-03 16:24:06 -0400
commite86322f611eef95aafaf726fd3965e5b211f1985 (patch)
tree28547e26df4fc6ae671dc8cc6912a53717e4db08 /net
parentb001a1b6aa960949a24c2cdc28257dfcc9428d74 (diff)
parent8948896c9e098c6fd31a6a698a598a7cbd7fa40e (diff)
Merge branch 'for-bfields' of git://linux-nfs.org/~tomtucker/xprt-switch-2.6 into for-2.6.27
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c28
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/atm/br2684.c78
-rw-r--r--net/ax25/ax25_subr.c11
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/tty.c13
-rw-r--r--net/core/dev.c46
-rw-r--r--net/core/neighbour.c9
-rw-r--r--net/core/net_namespace.c3
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/rtnetlink.c3
-rw-r--r--net/core/skbuff.c5
-rw-r--r--net/core/user_dma.c2
-rw-r--r--net/dccp/ackvec.c29
-rw-r--r--net/dccp/ccids/ccid3.c27
-rw-r--r--net/dccp/ccids/lib/tfrc.c8
-rw-r--r--net/dccp/ccids/lib/tfrc.h25
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c8
-rw-r--r--net/dccp/ipv4.c7
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/dccp/minisocks.c8
-rw-r--r--net/dccp/options.c4
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/probe.c2
-rw-r--r--net/ipv4/arp.c5
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/inet_connection_sock.c11
-rw-r--r--net/ipv4/ip_gre.c146
-rw-r--r--net/ipv4/ipip.c130
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c14
-rw-r--r--net/ipv4/raw.c11
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/tcp.c27
-rw-r--r--net/ipv4/tcp_input.c80
-rw-r--r--net/ipv4/tcp_ipv4.c14
-rw-r--r--net/ipv4/tcp_minisocks.c32
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv6/addrconf.c142
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/datagram.c50
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_input.c9
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c44
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c8
-rw-r--r--net/ipv6/raw.c13
-rw-r--r--net/ipv6/route.c26
-rw-r--r--net/ipv6/sit.c133
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/tunnel6.c2
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/irda/af_irda.c12
-rw-r--r--net/key/af_key.c5
-rw-r--r--net/llc/llc_sap.c10
-rw-r--r--net/mac80211/cfg.c4
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mlme.c74
-rw-r--r--net/mac80211/rx.c4
-rw-r--r--net/mac80211/tx.c13
-rw-r--r--net/mac80211/util.c41
-rw-r--r--net/mac80211/wext.c28
-rw-r--r--net/mac80211/wme.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_extend.c9
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c22
-rw-r--r--net/netfilter/nf_log.c4
-rw-r--r--net/netfilter/xt_connlimit.c3
-rw-r--r--net/netlink/attr.c12
-rw-r--r--net/netlink/genetlink.c21
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_gred.c3
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_htb.c23
-rw-r--r--net/sched/sch_red.c3
-rw-r--r--net/sctp/associola.c34
-rw-r--r--net/sctp/ipv6.c11
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c120
-rw-r--r--net/sctp/protocol.c26
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/sctp/transport.c50
-rw-r--r--net/sunrpc/auth_generic.c8
-rw-r--r--net/sunrpc/svc_xprt.c23
-rw-r--r--net/sunrpc/svcauth_unix.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c35
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c186
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c177
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c419
-rw-r--r--net/unix/af_unix.c79
-rw-r--r--net/wireless/nl80211.c12
-rw-r--r--net/xfrm/xfrm_algo.c4
-rw-r--r--net/xfrm/xfrm_user.c11
105 files changed, 1391 insertions, 1426 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2a739adaa92b..ab2225da0ee2 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -382,6 +382,18 @@ static void vlan_sync_address(struct net_device *dev,
382 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); 382 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
383} 383}
384 384
385static void vlan_transfer_features(struct net_device *dev,
386 struct net_device *vlandev)
387{
388 unsigned long old_features = vlandev->features;
389
390 vlandev->features &= ~dev->vlan_features;
391 vlandev->features |= dev->features & dev->vlan_features;
392
393 if (old_features != vlandev->features)
394 netdev_features_change(vlandev);
395}
396
385static void __vlan_device_event(struct net_device *dev, unsigned long event) 397static void __vlan_device_event(struct net_device *dev, unsigned long event)
386{ 398{
387 switch (event) { 399 switch (event) {
@@ -410,10 +422,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
410 int i, flgs; 422 int i, flgs;
411 struct net_device *vlandev; 423 struct net_device *vlandev;
412 424
413 if (is_vlan_dev(dev)) { 425 if (is_vlan_dev(dev))
414 __vlan_device_event(dev, event); 426 __vlan_device_event(dev, event);
415 goto out;
416 }
417 427
418 grp = __vlan_find_group(dev); 428 grp = __vlan_find_group(dev);
419 if (!grp) 429 if (!grp)
@@ -450,6 +460,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
450 } 460 }
451 break; 461 break;
452 462
463 case NETDEV_FEAT_CHANGE:
464 /* Propagate device features to underlying device */
465 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
466 vlandev = vlan_group_get_device(grp, i);
467 if (!vlandev)
468 continue;
469
470 vlan_transfer_features(dev, vlandev);
471 }
472
473 break;
474
453 case NETDEV_DOWN: 475 case NETDEV_DOWN:
454 /* Put all VLANs for this dev in the down state too. */ 476 /* Put all VLANs for this dev in the down state too. */
455 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 477 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c961f0826005..5d055c242ed8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -663,6 +663,8 @@ static int vlan_dev_init(struct net_device *dev)
663 (1<<__LINK_STATE_DORMANT))) | 663 (1<<__LINK_STATE_DORMANT))) |
664 (1<<__LINK_STATE_PRESENT); 664 (1<<__LINK_STATE_PRESENT);
665 665
666 dev->features |= real_dev->features & real_dev->vlan_features;
667
666 /* ipv6 shared card related stuff */ 668 /* ipv6 shared card related stuff */
667 dev->dev_id = real_dev->dev_id; 669 dev->dev_id = real_dev->dev_id;
668 670
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 9d52ebfc1962..05fafdc2eea3 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -188,10 +188,13 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
188 return 0; 188 return 0;
189 } 189 }
190 } 190 }
191 } else { 191 } else { /* e_vc */
192 skb_push(skb, 2); 192 if (brdev->payload == p_bridged) {
193 if (brdev->payload == p_bridged) 193 skb_push(skb, 2);
194 memset(skb->data, 0, 2); 194 memset(skb->data, 0, 2);
195 } else { /* p_routed */
196 skb_pull(skb, ETH_HLEN);
197 }
195 } 198 }
196 skb_debug(skb); 199 skb_debug(skb);
197 200
@@ -377,11 +380,8 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
377 (skb->data + 6, ethertype_ipv4, 380 (skb->data + 6, ethertype_ipv4,
378 sizeof(ethertype_ipv4)) == 0) 381 sizeof(ethertype_ipv4)) == 0)
379 skb->protocol = __constant_htons(ETH_P_IP); 382 skb->protocol = __constant_htons(ETH_P_IP);
380 else { 383 else
381 brdev->stats.rx_errors++; 384 goto error;
382 dev_kfree_skb(skb);
383 return;
384 }
385 skb_pull(skb, sizeof(llc_oui_ipv4)); 385 skb_pull(skb, sizeof(llc_oui_ipv4));
386 skb_reset_network_header(skb); 386 skb_reset_network_header(skb);
387 skb->pkt_type = PACKET_HOST; 387 skb->pkt_type = PACKET_HOST;
@@ -394,44 +394,56 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
394 (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) { 394 (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) {
395 skb_pull(skb, sizeof(llc_oui_pid_pad)); 395 skb_pull(skb, sizeof(llc_oui_pid_pad));
396 skb->protocol = eth_type_trans(skb, net_dev); 396 skb->protocol = eth_type_trans(skb, net_dev);
397 } else { 397 } else
398 brdev->stats.rx_errors++; 398 goto error;
399 dev_kfree_skb(skb);
400 return;
401 }
402 399
403 } else { 400 } else { /* e_vc */
404 /* first 2 chars should be 0 */ 401 if (brdev->payload == p_routed) {
405 if (*((u16 *) (skb->data)) != 0) { 402 struct iphdr *iph;
406 brdev->stats.rx_errors++; 403
407 dev_kfree_skb(skb); 404 skb_reset_network_header(skb);
408 return; 405 iph = ip_hdr(skb);
406 if (iph->version == 4)
407 skb->protocol = __constant_htons(ETH_P_IP);
408 else if (iph->version == 6)
409 skb->protocol = __constant_htons(ETH_P_IPV6);
410 else
411 goto error;
412 skb->pkt_type = PACKET_HOST;
413 } else { /* p_bridged */
414 /* first 2 chars should be 0 */
415 if (*((u16 *) (skb->data)) != 0)
416 goto error;
417 skb_pull(skb, BR2684_PAD_LEN);
418 skb->protocol = eth_type_trans(skb, net_dev);
409 } 419 }
410 skb_pull(skb, BR2684_PAD_LEN + ETH_HLEN); /* pad, dstmac, srcmac, ethtype */
411 skb->protocol = eth_type_trans(skb, net_dev);
412 } 420 }
413 421
414#ifdef CONFIG_ATM_BR2684_IPFILTER 422#ifdef CONFIG_ATM_BR2684_IPFILTER
415 if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) { 423 if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb)))
416 brdev->stats.rx_dropped++; 424 goto dropped;
417 dev_kfree_skb(skb);
418 return;
419 }
420#endif /* CONFIG_ATM_BR2684_IPFILTER */ 425#endif /* CONFIG_ATM_BR2684_IPFILTER */
421 skb->dev = net_dev; 426 skb->dev = net_dev;
422 ATM_SKB(skb)->vcc = atmvcc; /* needed ? */ 427 ATM_SKB(skb)->vcc = atmvcc; /* needed ? */
423 pr_debug("received packet's protocol: %x\n", ntohs(skb->protocol)); 428 pr_debug("received packet's protocol: %x\n", ntohs(skb->protocol));
424 skb_debug(skb); 429 skb_debug(skb);
425 if (unlikely(!(net_dev->flags & IFF_UP))) { 430 /* sigh, interface is down? */
426 /* sigh, interface is down */ 431 if (unlikely(!(net_dev->flags & IFF_UP)))
427 brdev->stats.rx_dropped++; 432 goto dropped;
428 dev_kfree_skb(skb);
429 return;
430 }
431 brdev->stats.rx_packets++; 433 brdev->stats.rx_packets++;
432 brdev->stats.rx_bytes += skb->len; 434 brdev->stats.rx_bytes += skb->len;
433 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); 435 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
434 netif_rx(skb); 436 netif_rx(skb);
437 return;
438
439dropped:
440 brdev->stats.rx_dropped++;
441 goto free_skb;
442error:
443 brdev->stats.rx_errors++;
444free_skb:
445 dev_kfree_skb(skb);
446 return;
435} 447}
436 448
437/* 449/*
@@ -518,9 +530,9 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
518 struct sk_buff *next = skb->next; 530 struct sk_buff *next = skb->next;
519 531
520 skb->next = skb->prev = NULL; 532 skb->next = skb->prev = NULL;
533 br2684_push(atmvcc, skb);
521 BRPRIV(skb->dev)->stats.rx_bytes -= skb->len; 534 BRPRIV(skb->dev)->stats.rx_bytes -= skb->len;
522 BRPRIV(skb->dev)->stats.rx_packets--; 535 BRPRIV(skb->dev)->stats.rx_packets--;
523 br2684_push(atmvcc, skb);
524 536
525 skb = next; 537 skb = next;
526 } 538 }
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index d8f215733175..034aa10a5198 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -64,20 +64,15 @@ void ax25_frames_acked(ax25_cb *ax25, unsigned short nr)
64 64
65void ax25_requeue_frames(ax25_cb *ax25) 65void ax25_requeue_frames(ax25_cb *ax25)
66{ 66{
67 struct sk_buff *skb, *skb_prev = NULL; 67 struct sk_buff *skb;
68 68
69 /* 69 /*
70 * Requeue all the un-ack-ed frames on the output queue to be picked 70 * Requeue all the un-ack-ed frames on the output queue to be picked
71 * up by ax25_kick called from the timer. This arrangement handles the 71 * up by ax25_kick called from the timer. This arrangement handles the
72 * possibility of an empty output queue. 72 * possibility of an empty output queue.
73 */ 73 */
74 while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) { 74 while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL)
75 if (skb_prev == NULL) 75 skb_queue_head(&ax25->write_queue, skb);
76 skb_queue_head(&ax25->write_queue, skb);
77 else
78 skb_append(skb_prev, skb, &ax25->write_queue);
79 skb_prev = skb;
80 }
81} 76}
82 77
83/* 78/*
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index eb62558e9b09..0c2c93735e93 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -423,8 +423,8 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
423 423
424 rfcomm_dlc_lock(d); 424 rfcomm_dlc_lock(d);
425 d->state = BT_CLOSED; 425 d->state = BT_CLOSED;
426 rfcomm_dlc_unlock(d);
427 d->state_change(d, err); 426 d->state_change(d, err);
427 rfcomm_dlc_unlock(d);
428 428
429 skb_queue_purge(&d->tx_queue); 429 skb_queue_purge(&d->tx_queue);
430 rfcomm_dlc_unlink(d); 430 rfcomm_dlc_unlink(d);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index c3f749abb2d0..c9191871c1e0 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -566,11 +566,22 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
566 if (dlc->state == BT_CLOSED) { 566 if (dlc->state == BT_CLOSED) {
567 if (!dev->tty) { 567 if (!dev->tty) {
568 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { 568 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
569 if (rfcomm_dev_get(dev->id) == NULL) 569 /* Drop DLC lock here to avoid deadlock
570 * 1. rfcomm_dev_get will take rfcomm_dev_lock
571 * but in rfcomm_dev_add there's lock order:
572 * rfcomm_dev_lock -> dlc lock
573 * 2. rfcomm_dev_put will deadlock if it's
574 * the last reference
575 */
576 rfcomm_dlc_unlock(dlc);
577 if (rfcomm_dev_get(dev->id) == NULL) {
578 rfcomm_dlc_lock(dlc);
570 return; 579 return;
580 }
571 581
572 rfcomm_dev_del(dev); 582 rfcomm_dev_del(dev);
573 rfcomm_dev_put(dev); 583 rfcomm_dev_put(dev);
584 rfcomm_dlc_lock(dlc);
574 } 585 }
575 } else 586 } else
576 tty_hangup(dev->tty); 587 tty_hangup(dev->tty);
diff --git a/net/core/dev.c b/net/core/dev.c
index a1607bc0cd4c..c421a1f8f0b9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -119,6 +119,7 @@
119#include <linux/err.h> 119#include <linux/err.h>
120#include <linux/ctype.h> 120#include <linux/ctype.h>
121#include <linux/if_arp.h> 121#include <linux/if_arp.h>
122#include <linux/if_vlan.h>
122 123
123#include "net-sysfs.h" 124#include "net-sysfs.h"
124 125
@@ -903,7 +904,11 @@ int dev_change_name(struct net_device *dev, char *newname)
903 strlcpy(dev->name, newname, IFNAMSIZ); 904 strlcpy(dev->name, newname, IFNAMSIZ);
904 905
905rollback: 906rollback:
906 device_rename(&dev->dev, dev->name); 907 err = device_rename(&dev->dev, dev->name);
908 if (err) {
909 memcpy(dev->name, oldname, IFNAMSIZ);
910 return err;
911 }
907 912
908 write_lock_bh(&dev_base_lock); 913 write_lock_bh(&dev_base_lock);
909 hlist_del(&dev->name_hlist); 914 hlist_del(&dev->name_hlist);
@@ -1358,6 +1363,29 @@ void netif_device_attach(struct net_device *dev)
1358} 1363}
1359EXPORT_SYMBOL(netif_device_attach); 1364EXPORT_SYMBOL(netif_device_attach);
1360 1365
1366static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1367{
1368 return ((features & NETIF_F_GEN_CSUM) ||
1369 ((features & NETIF_F_IP_CSUM) &&
1370 protocol == htons(ETH_P_IP)) ||
1371 ((features & NETIF_F_IPV6_CSUM) &&
1372 protocol == htons(ETH_P_IPV6)));
1373}
1374
1375static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1376{
1377 if (can_checksum_protocol(dev->features, skb->protocol))
1378 return true;
1379
1380 if (skb->protocol == htons(ETH_P_8021Q)) {
1381 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1382 if (can_checksum_protocol(dev->features & dev->vlan_features,
1383 veh->h_vlan_encapsulated_proto))
1384 return true;
1385 }
1386
1387 return false;
1388}
1361 1389
1362/* 1390/*
1363 * Invalidate hardware checksum when packet is to be mangled, and 1391 * Invalidate hardware checksum when packet is to be mangled, and
@@ -1636,14 +1664,8 @@ int dev_queue_xmit(struct sk_buff *skb)
1636 if (skb->ip_summed == CHECKSUM_PARTIAL) { 1664 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1637 skb_set_transport_header(skb, skb->csum_start - 1665 skb_set_transport_header(skb, skb->csum_start -
1638 skb_headroom(skb)); 1666 skb_headroom(skb));
1639 1667 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1640 if (!(dev->features & NETIF_F_GEN_CSUM) && 1668 goto out_kfree_skb;
1641 !((dev->features & NETIF_F_IP_CSUM) &&
1642 skb->protocol == htons(ETH_P_IP)) &&
1643 !((dev->features & NETIF_F_IPV6_CSUM) &&
1644 skb->protocol == htons(ETH_P_IPV6)))
1645 if (skb_checksum_help(skb))
1646 goto out_kfree_skb;
1647 } 1669 }
1648 1670
1649gso: 1671gso:
@@ -2055,6 +2077,10 @@ int netif_receive_skb(struct sk_buff *skb)
2055 2077
2056 rcu_read_lock(); 2078 rcu_read_lock();
2057 2079
2080 /* Don't receive packets in an exiting network namespace */
2081 if (!net_alive(dev_net(skb->dev)))
2082 goto out;
2083
2058#ifdef CONFIG_NET_CLS_ACT 2084#ifdef CONFIG_NET_CLS_ACT
2059 if (skb->tc_verd & TC_NCLS) { 2085 if (skb->tc_verd & TC_NCLS) {
2060 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 2086 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -3137,7 +3163,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
3137 * Load in the correct multicast list now the flags have changed. 3163 * Load in the correct multicast list now the flags have changed.
3138 */ 3164 */
3139 3165
3140 if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) 3166 if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
3141 dev->change_rx_flags(dev, IFF_MULTICAST); 3167 dev->change_rx_flags(dev, IFF_MULTICAST);
3142 3168
3143 dev_set_rx_mode(dev); 3169 dev_set_rx_mode(dev);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5d9d7130bd6e..65f01f71b3f3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1714,7 +1714,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1714 return nla_nest_end(skb, nest); 1714 return nla_nest_end(skb, nest);
1715 1715
1716nla_put_failure: 1716nla_put_failure:
1717 return nla_nest_cancel(skb, nest); 1717 nla_nest_cancel(skb, nest);
1718 return -EMSGSIZE;
1718} 1719}
1719 1720
1720static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 1721static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
@@ -2057,9 +2058,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2057 goto nla_put_failure; 2058 goto nla_put_failure;
2058 } 2059 }
2059 2060
2060 ci.ndm_used = now - neigh->used; 2061 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2061 ci.ndm_confirmed = now - neigh->confirmed; 2062 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2062 ci.ndm_updated = now - neigh->updated; 2063 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2063 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; 2064 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
2064 read_unlock_bh(&neigh->lock); 2065 read_unlock_bh(&neigh->lock);
2065 2066
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 72b4c184dd84..7c52fe277b62 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work)
140 struct pernet_operations *ops; 140 struct pernet_operations *ops;
141 struct net *net; 141 struct net *net;
142 142
143 /* Be very certain incoming network packets will not find us */
144 rcu_barrier();
145
143 net = container_of(work, struct net, work); 146 net = container_of(work, struct net, work);
144 147
145 mutex_lock(&net_mutex); 148 mutex_lock(&net_mutex);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8dca21110493..fdf537707e51 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -390,6 +390,7 @@ struct pktgen_thread {
390 int cpu; 390 int cpu;
391 391
392 wait_queue_head_t queue; 392 wait_queue_head_t queue;
393 struct completion start_done;
393}; 394};
394 395
395#define REMOVE 1 396#define REMOVE 1
@@ -3414,6 +3415,7 @@ static int pktgen_thread_worker(void *arg)
3414 BUG_ON(smp_processor_id() != cpu); 3415 BUG_ON(smp_processor_id() != cpu);
3415 3416
3416 init_waitqueue_head(&t->queue); 3417 init_waitqueue_head(&t->queue);
3418 complete(&t->start_done);
3417 3419
3418 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); 3420 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
3419 3421
@@ -3615,6 +3617,7 @@ static int __init pktgen_create_thread(int cpu)
3615 INIT_LIST_HEAD(&t->if_list); 3617 INIT_LIST_HEAD(&t->if_list);
3616 3618
3617 list_add_tail(&t->th_list, &pktgen_threads); 3619 list_add_tail(&t->th_list, &pktgen_threads);
3620 init_completion(&t->start_done);
3618 3621
3619 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); 3622 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
3620 if (IS_ERR(p)) { 3623 if (IS_ERR(p)) {
@@ -3639,6 +3642,7 @@ static int __init pktgen_create_thread(int cpu)
3639 } 3642 }
3640 3643
3641 wake_up_process(p); 3644 wake_up_process(p);
3645 wait_for_completion(&t->start_done);
3642 3646
3643 return 0; 3647 return 0;
3644} 3648}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cf857c4dc7b1..a9a77216310e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
498 return nla_nest_end(skb, mx); 498 return nla_nest_end(skb, mx);
499 499
500nla_put_failure: 500nla_put_failure:
501 return nla_nest_cancel(skb, mx); 501 nla_nest_cancel(skb, mx);
502 return -EMSGSIZE;
502} 503}
503 504
504int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, 505int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5c459f2b7985..1e556d312117 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1445,6 +1445,7 @@ done:
1445 1445
1446 if (spd.nr_pages) { 1446 if (spd.nr_pages) {
1447 int ret; 1447 int ret;
1448 struct sock *sk = __skb->sk;
1448 1449
1449 /* 1450 /*
1450 * Drop the socket lock, otherwise we have reverse 1451 * Drop the socket lock, otherwise we have reverse
@@ -1455,9 +1456,9 @@ done:
1455 * we call into ->sendpage() with the i_mutex lock held 1456 * we call into ->sendpage() with the i_mutex lock held
1456 * and networking will grab the socket lock. 1457 * and networking will grab the socket lock.
1457 */ 1458 */
1458 release_sock(__skb->sk); 1459 release_sock(sk);
1459 ret = splice_to_pipe(pipe, &spd); 1460 ret = splice_to_pipe(pipe, &spd);
1460 lock_sock(__skb->sk); 1461 lock_sock(sk);
1461 return ret; 1462 return ret;
1462 } 1463 }
1463 1464
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 0ad1cd57bc39..c77aff9c6eb3 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -75,7 +75,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
75 75
76 end = start + skb_shinfo(skb)->frags[i].size; 76 end = start + skb_shinfo(skb)->frags[i].size;
77 copy = end - offset; 77 copy = end - offset;
78 if ((copy = end - offset) > 0) { 78 if (copy > 0) {
79 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 79 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
80 struct page *page = frag->page; 80 struct page *page = frag->page;
81 81
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 6de4bd195d28..1e8be246ad15 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -290,12 +290,12 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
290 290
291 while (1) { 291 while (1) {
292 const u8 len = dccp_ackvec_len(av, index); 292 const u8 len = dccp_ackvec_len(av, index);
293 const u8 state = dccp_ackvec_state(av, index); 293 const u8 av_state = dccp_ackvec_state(av, index);
294 /* 294 /*
295 * valid packets not yet in av_buf have a reserved 295 * valid packets not yet in av_buf have a reserved
296 * entry, with a len equal to 0. 296 * entry, with a len equal to 0.
297 */ 297 */
298 if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && 298 if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
299 len == 0 && delta == 0) { /* Found our 299 len == 0 && delta == 0) { /* Found our
300 reserved seat! */ 300 reserved seat! */
301 dccp_pr_debug("Found %llu reserved seat!\n", 301 dccp_pr_debug("Found %llu reserved seat!\n",
@@ -325,31 +325,6 @@ out_duplicate:
325 return -EILSEQ; 325 return -EILSEQ;
326} 326}
327 327
328#ifdef CONFIG_IP_DCCP_DEBUG
329void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
330{
331 dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len,
332 (unsigned long long)ackno);
333
334 while (len--) {
335 const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
336 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
337
338 dccp_pr_debug_cat("%d,%d|", state, rl);
339 ++vector;
340 }
341
342 dccp_pr_debug_cat("\n");
343}
344
345void dccp_ackvec_print(const struct dccp_ackvec *av)
346{
347 dccp_ackvector_print(av->av_buf_ackno,
348 av->av_buf + av->av_buf_head,
349 av->av_vec_len);
350}
351#endif
352
353static void dccp_ackvec_throw_record(struct dccp_ackvec *av, 328static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
354 struct dccp_ackvec_record *avr) 329 struct dccp_ackvec_record *avr)
355{ 330{
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index cd61dea2eea1..a1929f33d703 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -159,8 +159,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
159 } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) 159 } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld)
160 - (s64)hctx->ccid3hctx_rtt >= 0) { 160 - (s64)hctx->ccid3hctx_rtt >= 0) {
161 161
162 hctx->ccid3hctx_x = 162 hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate);
163 max(min(2 * hctx->ccid3hctx_x, min_rate), 163 hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
164 scaled_div(((__u64)hctx->ccid3hctx_s) << 6, 164 scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
165 hctx->ccid3hctx_rtt)); 165 hctx->ccid3hctx_rtt));
166 hctx->ccid3hctx_t_ld = now; 166 hctx->ccid3hctx_t_ld = now;
@@ -193,22 +193,17 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
193 193
194/* 194/*
195 * Update Window Counter using the algorithm from [RFC 4342, 8.1]. 195 * Update Window Counter using the algorithm from [RFC 4342, 8.1].
196 * The algorithm is not applicable if RTT < 4 microseconds. 196 * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt().
197 */ 197 */
198static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, 198static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
199 ktime_t now) 199 ktime_t now)
200{ 200{
201 u32 quarter_rtts; 201 u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count),
202 202 quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt;
203 if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */
204 return;
205
206 quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
207 quarter_rtts /= hctx->ccid3hctx_rtt / 4;
208 203
209 if (quarter_rtts > 0) { 204 if (quarter_rtts > 0) {
210 hctx->ccid3hctx_t_last_win_count = now; 205 hctx->ccid3hctx_t_last_win_count = now;
211 hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); 206 hctx->ccid3hctx_last_win_count += min(quarter_rtts, 5U);
212 hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ 207 hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */
213 } 208 }
214} 209}
@@ -334,8 +329,14 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
334 hctx->ccid3hctx_x = rfc3390_initial_rate(sk); 329 hctx->ccid3hctx_x = rfc3390_initial_rate(sk);
335 hctx->ccid3hctx_t_ld = now; 330 hctx->ccid3hctx_t_ld = now;
336 } else { 331 } else {
337 /* Sender does not have RTT sample: X_pps = 1 pkt/sec */ 332 /*
338 hctx->ccid3hctx_x = hctx->ccid3hctx_s; 333 * Sender does not have RTT sample:
334 * - set fallback RTT (RFC 4340, 3.4) since a RTT value
335 * is needed in several parts (e.g. window counter);
336 * - set sending rate X_pps = 1pps as per RFC 3448, 4.2.
337 */
338 hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT;
339 hctx->ccid3hctx_x = hctx->ccid3hctx_s;
339 hctx->ccid3hctx_x <<= 6; 340 hctx->ccid3hctx_x <<= 6;
340 } 341 }
341 ccid3_update_send_interval(hctx); 342 ccid3_update_send_interval(hctx);
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index d1dfbb8de64c..97ecec0a8e76 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -14,14 +14,6 @@ module_param(tfrc_debug, bool, 0444);
14MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); 14MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
15#endif 15#endif
16 16
17extern int tfrc_tx_packet_history_init(void);
18extern void tfrc_tx_packet_history_exit(void);
19extern int tfrc_rx_packet_history_init(void);
20extern void tfrc_rx_packet_history_exit(void);
21
22extern int tfrc_li_init(void);
23extern void tfrc_li_exit(void);
24
25static int __init tfrc_module_init(void) 17static int __init tfrc_module_init(void)
26{ 18{
27 int rc = tfrc_li_init(); 19 int rc = tfrc_li_init();
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 1fb1187bbf1c..ed9857527acf 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -15,7 +15,7 @@
15 * (at your option) any later version. 15 * (at your option) any later version.
16 */ 16 */
17#include <linux/types.h> 17#include <linux/types.h>
18#include <asm/div64.h> 18#include <linux/math64.h>
19#include "../../dccp.h" 19#include "../../dccp.h"
20/* internal includes that this module exports: */ 20/* internal includes that this module exports: */
21#include "loss_interval.h" 21#include "loss_interval.h"
@@ -29,21 +29,19 @@ extern int tfrc_debug;
29#endif 29#endif
30 30
31/* integer-arithmetic divisions of type (a * 1000000)/b */ 31/* integer-arithmetic divisions of type (a * 1000000)/b */
32static inline u64 scaled_div(u64 a, u32 b) 32static inline u64 scaled_div(u64 a, u64 b)
33{ 33{
34 BUG_ON(b==0); 34 BUG_ON(b==0);
35 a *= 1000000; 35 return div64_u64(a * 1000000, b);
36 do_div(a, b);
37 return a;
38} 36}
39 37
40static inline u32 scaled_div32(u64 a, u32 b) 38static inline u32 scaled_div32(u64 a, u64 b)
41{ 39{
42 u64 result = scaled_div(a, b); 40 u64 result = scaled_div(a, b);
43 41
44 if (result > UINT_MAX) { 42 if (result > UINT_MAX) {
45 DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U", 43 DCCP_CRIT("Overflow: %llu/%llu > UINT_MAX",
46 (unsigned long long)a, b); 44 (unsigned long long)a, (unsigned long long)b);
47 return UINT_MAX; 45 return UINT_MAX;
48 } 46 }
49 return result; 47 return result;
@@ -58,7 +56,14 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
58 return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval; 56 return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval;
59} 57}
60 58
61extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); 59extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
62extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); 60extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
63 61
62extern int tfrc_tx_packet_history_init(void);
63extern void tfrc_tx_packet_history_exit(void);
64extern int tfrc_rx_packet_history_init(void);
65extern void tfrc_rx_packet_history_exit(void);
66
67extern int tfrc_li_init(void);
68extern void tfrc_li_exit(void);
64#endif /* _TFRC_H_ */ 69#endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index e4e64b76c10c..2f20a29cffe4 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -661,7 +661,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
661 661
662EXPORT_SYMBOL_GPL(tfrc_calc_x); 662EXPORT_SYMBOL_GPL(tfrc_calc_x);
663 663
664/* 664/**
665 * tfrc_calc_x_reverse_lookup - try to find p given f(p) 665 * tfrc_calc_x_reverse_lookup - try to find p given f(p)
666 * 666 *
667 * @fvalue: function value to match, scaled by 1000000 667 * @fvalue: function value to match, scaled by 1000000
@@ -676,11 +676,11 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
676 676
677 /* Error cases. */ 677 /* Error cases. */
678 if (fvalue < tfrc_calc_x_lookup[0][1]) { 678 if (fvalue < tfrc_calc_x_lookup[0][1]) {
679 DCCP_WARN("fvalue %d smaller than resolution\n", fvalue); 679 DCCP_WARN("fvalue %u smaller than resolution\n", fvalue);
680 return tfrc_calc_x_lookup[0][1]; 680 return TFRC_SMALLEST_P;
681 } 681 }
682 if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) { 682 if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) {
683 DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue); 683 DCCP_WARN("fvalue %u exceeds bounds!\n", fvalue);
684 return 1000000; 684 return 1000000;
685 } 685 }
686 686
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b348dd70c685..37d27bcb361f 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -589,7 +589,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
589 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 589 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
590 goto drop; 590 goto drop;
591 591
592 req = reqsk_alloc(&dccp_request_sock_ops); 592 req = inet_reqsk_alloc(&dccp_request_sock_ops);
593 if (req == NULL) 593 if (req == NULL)
594 goto drop; 594 goto drop;
595 595
@@ -605,7 +605,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
605 ireq = inet_rsk(req); 605 ireq = inet_rsk(req);
606 ireq->loc_addr = ip_hdr(skb)->daddr; 606 ireq->loc_addr = ip_hdr(skb)->daddr;
607 ireq->rmt_addr = ip_hdr(skb)->saddr; 607 ireq->rmt_addr = ip_hdr(skb)->saddr;
608 ireq->opt = NULL;
609 608
610 /* 609 /*
611 * Step 3: Process LISTEN state 610 * Step 3: Process LISTEN state
@@ -739,8 +738,8 @@ int dccp_invalid_packet(struct sk_buff *skb)
739 * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet 738 * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
740 * has short sequence numbers), drop packet and return 739 * has short sequence numbers), drop packet and return
741 */ 740 */
742 if (dh->dccph_type >= DCCP_PKT_DATA && 741 if ((dh->dccph_type < DCCP_PKT_DATA ||
743 dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) { 742 dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) {
744 DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", 743 DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n",
745 dccp_packet_name(dh->dccph_type)); 744 dccp_packet_name(dh->dccph_type));
746 return 1; 745 return 1;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9b1129bb7ece..f7fe2a572d7b 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -421,7 +421,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
421 ireq6 = inet6_rsk(req); 421 ireq6 = inet6_rsk(req);
422 ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); 422 ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
423 ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); 423 ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
424 ireq6->pktopts = NULL;
425 424
426 if (ipv6_opt_accepted(sk, skb) || 425 if (ipv6_opt_accepted(sk, skb) ||
427 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || 426 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 33ad48321b08..66dca5bba858 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -165,12 +165,12 @@ out_free:
165 /* See dccp_v4_conn_request */ 165 /* See dccp_v4_conn_request */
166 newdmsk->dccpms_sequence_window = req->rcv_wnd; 166 newdmsk->dccpms_sequence_window = req->rcv_wnd;
167 167
168 newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; 168 newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
169 dccp_update_gsr(newsk, dreq->dreq_isr);
170
171 newdp->dccps_iss = dreq->dreq_iss;
172 dccp_update_gss(newsk, dreq->dreq_iss); 169 dccp_update_gss(newsk, dreq->dreq_iss);
173 170
171 newdp->dccps_isr = dreq->dreq_isr;
172 dccp_update_gsr(newsk, dreq->dreq_isr);
173
174 /* 174 /*
175 * SWL and AWL are initially adjusted so that they are not less than 175 * SWL and AWL are initially adjusted so that they are not less than
176 * the initial Sequence Numbers received and sent, respectively: 176 * the initial Sequence Numbers received and sent, respectively:
diff --git a/net/dccp/options.c b/net/dccp/options.c
index d2a84a2fecee..43bc24e761d0 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -107,9 +107,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
107 * 107 *
108 * CCID-specific options are ignored during connection setup, as 108 * CCID-specific options are ignored during connection setup, as
109 * negotiation may still be in progress (see RFC 4340, 10.3). 109 * negotiation may still be in progress (see RFC 4340, 10.3).
110 * The same applies to Ack Vectors, as these depend on the CCID.
110 * 111 *
111 */ 112 */
112 if (dreq != NULL && opt >= 128) 113 if (dreq != NULL && (opt >= 128 ||
114 opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
113 goto ignore_option; 115 goto ignore_option;
114 116
115 switch (opt) { 117 switch (opt) {
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 1f8a9b64c083..fe20068c5d8e 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -508,6 +508,7 @@ void dccp_send_ack(struct sock *sk)
508 508
509EXPORT_SYMBOL_GPL(dccp_send_ack); 509EXPORT_SYMBOL_GPL(dccp_send_ack);
510 510
511#if 0
511/* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */ 512/* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */
512void dccp_send_delayed_ack(struct sock *sk) 513void dccp_send_delayed_ack(struct sock *sk)
513{ 514{
@@ -538,6 +539,7 @@ void dccp_send_delayed_ack(struct sock *sk)
538 icsk->icsk_ack.timeout = timeout; 539 icsk->icsk_ack.timeout = timeout;
539 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); 540 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
540} 541}
542#endif
541 543
542void dccp_send_sync(struct sock *sk, const u64 ackno, 544void dccp_send_sync(struct sock *sk, const u64 ackno,
543 const enum dccp_pkt_type pkt_type) 545 const enum dccp_pkt_type pkt_type)
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 0bcdc9250279..81368a7f5379 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -42,7 +42,7 @@ static int bufsize = 64 * 1024;
42 42
43static const char procname[] = "dccpprobe"; 43static const char procname[] = "dccpprobe";
44 44
45struct { 45static struct {
46 struct kfifo *fifo; 46 struct kfifo *fifo;
47 spinlock_t lock; 47 spinlock_t lock;
48 wait_queue_head_t wait; 48 wait_queue_head_t wait;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 418862f1bf22..9b539fa9fe18 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1288,7 +1288,6 @@ static void arp_format_neigh_entry(struct seq_file *seq,
1288 struct neighbour *n) 1288 struct neighbour *n)
1289{ 1289{
1290 char hbuffer[HBUFFERLEN]; 1290 char hbuffer[HBUFFERLEN];
1291 const char hexbuf[] = "0123456789ABCDEF";
1292 int k, j; 1291 int k, j;
1293 char tbuf[16]; 1292 char tbuf[16];
1294 struct net_device *dev = n->dev; 1293 struct net_device *dev = n->dev;
@@ -1302,8 +1301,8 @@ static void arp_format_neigh_entry(struct seq_file *seq,
1302 else { 1301 else {
1303#endif 1302#endif
1304 for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) { 1303 for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
1305 hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15]; 1304 hbuffer[k++] = hex_asc_hi(n->ha[j]);
1306 hbuffer[k++] = hexbuf[n->ha[j] & 15]; 1305 hbuffer[k++] = hex_asc_lo(n->ha[j]);
1307 hbuffer[k++] = ':'; 1306 hbuffer[k++] = ':';
1308 } 1307 }
1309 hbuffer[--k] = 0; 1308 hbuffer[--k] = 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6848e4760f34..79a7ef6209ff 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 [IFA_LOCAL] = { .type = NLA_U32 }, 90 [IFA_LOCAL] = { .type = NLA_U32 },
91 [IFA_ADDRESS] = { .type = NLA_U32 }, 91 [IFA_ADDRESS] = { .type = NLA_U32 },
92 [IFA_BROADCAST] = { .type = NLA_U32 }, 92 [IFA_BROADCAST] = { .type = NLA_U32 },
93 [IFA_ANYCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95}; 94};
96 95
@@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
536 if (tb[IFA_BROADCAST]) 535 if (tb[IFA_BROADCAST])
537 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 536 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
538 537
539 if (tb[IFA_ANYCAST])
540 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
541
542 if (tb[IFA_LABEL]) 538 if (tb[IFA_LABEL])
543 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 539 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
544 else 540 else
@@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
745 break; 741 break;
746 inet_del_ifa(in_dev, ifap, 0); 742 inet_del_ifa(in_dev, ifap, 0);
747 ifa->ifa_broadcast = 0; 743 ifa->ifa_broadcast = 0;
748 ifa->ifa_anycast = 0;
749 ifa->ifa_scope = 0; 744 ifa->ifa_scope = 0;
750 } 745 }
751 746
@@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void)
1113 + nla_total_size(4) /* IFA_ADDRESS */ 1108 + nla_total_size(4) /* IFA_ADDRESS */
1114 + nla_total_size(4) /* IFA_LOCAL */ 1109 + nla_total_size(4) /* IFA_LOCAL */
1115 + nla_total_size(4) /* IFA_BROADCAST */ 1110 + nla_total_size(4) /* IFA_BROADCAST */
1116 + nla_total_size(4) /* IFA_ANYCAST */
1117 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1111 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1118} 1112}
1119 1113
@@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1143 if (ifa->ifa_broadcast) 1137 if (ifa->ifa_broadcast)
1144 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1138 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1145 1139
1146 if (ifa->ifa_anycast)
1147 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1148
1149 if (ifa->ifa_label[0]) 1140 if (ifa->ifa_label[0])
1150 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); 1141 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151 1142
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0f1557a4ac7a..0b2ac6a3d903 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
506 [RTA_PREFSRC] = { .type = NLA_U32 }, 506 [RTA_PREFSRC] = { .type = NLA_U32 },
507 [RTA_METRICS] = { .type = NLA_NESTED }, 507 [RTA_METRICS] = { .type = NLA_NESTED },
508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
509 [RTA_PROTOINFO] = { .type = NLA_U32 },
510 [RTA_FLOW] = { .type = NLA_U32 }, 509 [RTA_FLOW] = { .type = NLA_U32 },
511}; 510};
512 511
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3b83c34019fc..0d4d72827e4b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -960,7 +960,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
960 rtm->rtm_dst_len = dst_len; 960 rtm->rtm_dst_len = dst_len;
961 rtm->rtm_src_len = 0; 961 rtm->rtm_src_len = 0;
962 rtm->rtm_tos = tos; 962 rtm->rtm_tos = tos;
963 rtm->rtm_table = tb_id; 963 if (tb_id < 256)
964 rtm->rtm_table = tb_id;
965 else
966 rtm->rtm_table = RT_TABLE_COMPAT;
964 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 967 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
965 rtm->rtm_type = type; 968 rtm->rtm_type = type;
966 rtm->rtm_flags = fi->fib_flags; 969 rtm->rtm_flags = fi->fib_flags;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea211ff21..ec834480abe7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
419 struct inet_connection_sock *icsk = inet_csk(parent); 419 struct inet_connection_sock *icsk = inet_csk(parent);
420 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 420 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
421 struct listen_sock *lopt = queue->listen_opt; 421 struct listen_sock *lopt = queue->listen_opt;
422 int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 422 int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
423 int thresh = max_retries;
423 unsigned long now = jiffies; 424 unsigned long now = jiffies;
424 struct request_sock **reqp, *req; 425 struct request_sock **reqp, *req;
425 int i, budget; 426 int i, budget;
@@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
455 } 456 }
456 } 457 }
457 458
459 if (queue->rskq_defer_accept)
460 max_retries = queue->rskq_defer_accept;
461
458 budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 462 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
459 i = lopt->clock_hand; 463 i = lopt->clock_hand;
460 464
@@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
462 reqp=&lopt->syn_table[i]; 466 reqp=&lopt->syn_table[i];
463 while ((req = *reqp) != NULL) { 467 while ((req = *reqp) != NULL) {
464 if (time_after_eq(now, req->expires)) { 468 if (time_after_eq(now, req->expires)) {
465 if (req->retrans < thresh && 469 if ((req->retrans < thresh ||
466 !req->rsk_ops->rtx_syn_ack(parent, req)) { 470 (inet_rsk(req)->acked && req->retrans < max_retries))
471 && !req->rsk_ops->rtx_syn_ack(parent, req)) {
467 unsigned long timeo; 472 unsigned long timeo;
468 473
469 if (req->retrans++ == 0) 474 if (req->retrans++ == 0)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2ada033406de..4342cba4ff82 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev)
313 313
314static void ipgre_err(struct sk_buff *skb, u32 info) 314static void ipgre_err(struct sk_buff *skb, u32 info)
315{ 315{
316#ifndef I_WISH_WORLD_WERE_PERFECT
317 316
318/* It is not :-( All the routers (except for Linux) return only 317/* All the routers (except for Linux) return only
319 8 bytes of packet payload. It means, that precise relaying of 318 8 bytes of packet payload. It means, that precise relaying of
320 ICMP in the real Internet is absolutely infeasible. 319 ICMP in the real Internet is absolutely infeasible.
321 320
@@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
398out: 397out:
399 read_unlock(&ipgre_lock); 398 read_unlock(&ipgre_lock);
400 return; 399 return;
401#else
402 struct iphdr *iph = (struct iphdr*)dp;
403 struct iphdr *eiph;
404 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
405 const int type = icmp_hdr(skb)->type;
406 const int code = icmp_hdr(skb)->code;
407 int rel_type = 0;
408 int rel_code = 0;
409 __be32 rel_info = 0;
410 __u32 n = 0;
411 __be16 flags;
412 int grehlen = (iph->ihl<<2) + 4;
413 struct sk_buff *skb2;
414 struct flowi fl;
415 struct rtable *rt;
416
417 if (p[1] != htons(ETH_P_IP))
418 return;
419
420 flags = p[0];
421 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
422 if (flags&(GRE_VERSION|GRE_ROUTING))
423 return;
424 if (flags&GRE_CSUM)
425 grehlen += 4;
426 if (flags&GRE_KEY)
427 grehlen += 4;
428 if (flags&GRE_SEQ)
429 grehlen += 4;
430 }
431 if (len < grehlen + sizeof(struct iphdr))
432 return;
433 eiph = (struct iphdr*)(dp + grehlen);
434
435 switch (type) {
436 default:
437 return;
438 case ICMP_PARAMETERPROB:
439 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
440 if (n < (iph->ihl<<2))
441 return;
442
443 /* So... This guy found something strange INSIDE encapsulated
444 packet. Well, he is fool, but what can we do ?
445 */
446 rel_type = ICMP_PARAMETERPROB;
447 n -= grehlen;
448 rel_info = htonl(n << 24);
449 break;
450
451 case ICMP_DEST_UNREACH:
452 switch (code) {
453 case ICMP_SR_FAILED:
454 case ICMP_PORT_UNREACH:
455 /* Impossible event. */
456 return;
457 case ICMP_FRAG_NEEDED:
458 /* And it is the only really necessary thing :-) */
459 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
460 if (n < grehlen+68)
461 return;
462 n -= grehlen;
463 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
464 if (n > ntohs(eiph->tot_len))
465 return;
466 rel_info = htonl(n);
467 break;
468 default:
469 /* All others are translated to HOST_UNREACH.
470 rfc2003 contains "deep thoughts" about NET_UNREACH,
471 I believe, it is just ether pollution. --ANK
472 */
473 rel_type = ICMP_DEST_UNREACH;
474 rel_code = ICMP_HOST_UNREACH;
475 break;
476 }
477 break;
478 case ICMP_TIME_EXCEEDED:
479 if (code != ICMP_EXC_TTL)
480 return;
481 break;
482 }
483
484 /* Prepare fake skb to feed it to icmp_send */
485 skb2 = skb_clone(skb, GFP_ATOMIC);
486 if (skb2 == NULL)
487 return;
488 dst_release(skb2->dst);
489 skb2->dst = NULL;
490 skb_pull(skb2, skb->data - (u8*)eiph);
491 skb_reset_network_header(skb2);
492
493 /* Try to guess incoming interface */
494 memset(&fl, 0, sizeof(fl));
495 fl.fl4_dst = eiph->saddr;
496 fl.fl4_tos = RT_TOS(eiph->tos);
497 fl.proto = IPPROTO_GRE;
498 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
499 kfree_skb(skb2);
500 return;
501 }
502 skb2->dev = rt->u.dst.dev;
503
504 /* route "incoming" packet */
505 if (rt->rt_flags&RTCF_LOCAL) {
506 ip_rt_put(rt);
507 rt = NULL;
508 fl.fl4_dst = eiph->daddr;
509 fl.fl4_src = eiph->saddr;
510 fl.fl4_tos = eiph->tos;
511 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
512 rt->u.dst.dev->type != ARPHRD_IPGRE) {
513 ip_rt_put(rt);
514 kfree_skb(skb2);
515 return;
516 }
517 } else {
518 ip_rt_put(rt);
519 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
520 skb2->dst->dev->type != ARPHRD_IPGRE) {
521 kfree_skb(skb2);
522 return;
523 }
524 }
525
526 /* change mtu on this route */
527 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
528 if (n > dst_mtu(skb2->dst)) {
529 kfree_skb(skb2);
530 return;
531 }
532 skb2->dst->ops->update_pmtu(skb2->dst, n);
533 } else if (type == ICMP_TIME_EXCEEDED) {
534 struct ip_tunnel *t = netdev_priv(skb2->dev);
535 if (t->parms.iph.ttl) {
536 rel_type = ICMP_DEST_UNREACH;
537 rel_code = ICMP_HOST_UNREACH;
538 }
539 }
540
541 icmp_send(skb2, rel_type, rel_code, rel_info);
542 kfree_skb(skb2);
543#endif
544} 400}
545 401
546static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 402static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 149111f08e8d..af5cb53da5cc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -278,9 +278,8 @@ static void ipip_tunnel_uninit(struct net_device *dev)
278 278
279static int ipip_err(struct sk_buff *skb, u32 info) 279static int ipip_err(struct sk_buff *skb, u32 info)
280{ 280{
281#ifndef I_WISH_WORLD_WERE_PERFECT
282 281
283/* It is not :-( All the routers (except for Linux) return only 282/* All the routers (except for Linux) return only
284 8 bytes of packet payload. It means, that precise relaying of 283 8 bytes of packet payload. It means, that precise relaying of
285 ICMP in the real Internet is absolutely infeasible. 284 ICMP in the real Internet is absolutely infeasible.
286 */ 285 */
@@ -337,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
337out: 336out:
338 read_unlock(&ipip_lock); 337 read_unlock(&ipip_lock);
339 return err; 338 return err;
340#else
341 struct iphdr *iph = (struct iphdr*)dp;
342 int hlen = iph->ihl<<2;
343 struct iphdr *eiph;
344 const int type = icmp_hdr(skb)->type;
345 const int code = icmp_hdr(skb)->code;
346 int rel_type = 0;
347 int rel_code = 0;
348 __be32 rel_info = 0;
349 __u32 n = 0;
350 struct sk_buff *skb2;
351 struct flowi fl;
352 struct rtable *rt;
353
354 if (len < hlen + sizeof(struct iphdr))
355 return 0;
356 eiph = (struct iphdr*)(dp + hlen);
357
358 switch (type) {
359 default:
360 return 0;
361 case ICMP_PARAMETERPROB:
362 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
363 if (n < hlen)
364 return 0;
365
366 /* So... This guy found something strange INSIDE encapsulated
367 packet. Well, he is fool, but what can we do ?
368 */
369 rel_type = ICMP_PARAMETERPROB;
370 rel_info = htonl((n - hlen) << 24);
371 break;
372
373 case ICMP_DEST_UNREACH:
374 switch (code) {
375 case ICMP_SR_FAILED:
376 case ICMP_PORT_UNREACH:
377 /* Impossible event. */
378 return 0;
379 case ICMP_FRAG_NEEDED:
380 /* And it is the only really necessary thing :-) */
381 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
382 if (n < hlen+68)
383 return 0;
384 n -= hlen;
385 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
386 if (n > ntohs(eiph->tot_len))
387 return 0;
388 rel_info = htonl(n);
389 break;
390 default:
391 /* All others are translated to HOST_UNREACH.
392 rfc2003 contains "deep thoughts" about NET_UNREACH,
393 I believe, it is just ether pollution. --ANK
394 */
395 rel_type = ICMP_DEST_UNREACH;
396 rel_code = ICMP_HOST_UNREACH;
397 break;
398 }
399 break;
400 case ICMP_TIME_EXCEEDED:
401 if (code != ICMP_EXC_TTL)
402 return 0;
403 break;
404 }
405
406 /* Prepare fake skb to feed it to icmp_send */
407 skb2 = skb_clone(skb, GFP_ATOMIC);
408 if (skb2 == NULL)
409 return 0;
410 dst_release(skb2->dst);
411 skb2->dst = NULL;
412 skb_pull(skb2, skb->data - (u8*)eiph);
413 skb_reset_network_header(skb2);
414
415 /* Try to guess incoming interface */
416 memset(&fl, 0, sizeof(fl));
417 fl.fl4_daddr = eiph->saddr;
418 fl.fl4_tos = RT_TOS(eiph->tos);
419 fl.proto = IPPROTO_IPIP;
420 if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) {
421 kfree_skb(skb2);
422 return 0;
423 }
424 skb2->dev = rt->u.dst.dev;
425
426 /* route "incoming" packet */
427 if (rt->rt_flags&RTCF_LOCAL) {
428 ip_rt_put(rt);
429 rt = NULL;
430 fl.fl4_daddr = eiph->daddr;
431 fl.fl4_src = eiph->saddr;
432 fl.fl4_tos = eiph->tos;
433 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
434 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
435 ip_rt_put(rt);
436 kfree_skb(skb2);
437 return 0;
438 }
439 } else {
440 ip_rt_put(rt);
441 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
442 skb2->dst->dev->type != ARPHRD_TUNNEL) {
443 kfree_skb(skb2);
444 return 0;
445 }
446 }
447
448 /* change mtu on this route */
449 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
450 if (n > dst_mtu(skb2->dst)) {
451 kfree_skb(skb2);
452 return 0;
453 }
454 skb2->dst->ops->update_pmtu(skb2->dst, n);
455 } else if (type == ICMP_TIME_EXCEEDED) {
456 struct ip_tunnel *t = netdev_priv(skb2->dev);
457 if (t->parms.iph.ttl) {
458 rel_type = ICMP_DEST_UNREACH;
459 rel_code = ICMP_HOST_UNREACH;
460 }
461 }
462
463 icmp_send(skb2, rel_type, rel_code, rel_info);
464 kfree_skb(skb2);
465 return 0;
466#endif
467} 339}
468 340
469static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, 341static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 04578593e100..d2a887fc8d9b 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -556,7 +556,6 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
556 556
557 spin_lock_bh(&nf_nat_lock); 557 spin_lock_bh(&nf_nat_lock);
558 hlist_del_rcu(&nat->bysource); 558 hlist_del_rcu(&nat->bysource);
559 nat->ct = NULL;
560 spin_unlock_bh(&nf_nat_lock); 559 spin_unlock_bh(&nf_nat_lock);
561} 560}
562 561
@@ -570,8 +569,8 @@ static void nf_nat_move_storage(void *new, void *old)
570 return; 569 return;
571 570
572 spin_lock_bh(&nf_nat_lock); 571 spin_lock_bh(&nf_nat_lock);
573 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
574 new_nat->ct = ct; 572 new_nat->ct = ct;
573 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
575 spin_unlock_bh(&nf_nat_lock); 574 spin_unlock_bh(&nf_nat_lock);
576} 575}
577 576
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5daefad3d193..7750c97fde7b 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
232 } 232 }
233 } 233 }
234 } 234 }
235
236 /* don't trust len bigger than ctx buffer */
237 if (*len > ctx->end - ctx->pointer)
238 return 0;
239
235 return 1; 240 return 1;
236} 241}
237 242
@@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
250 if (!asn1_length_decode(ctx, &def, &len)) 255 if (!asn1_length_decode(ctx, &def, &len))
251 return 0; 256 return 0;
252 257
258 /* primitive shall be definite, indefinite shall be constructed */
259 if (*con == ASN1_PRI && !def)
260 return 0;
261
253 if (def) 262 if (def)
254 *eoc = ctx->pointer + len; 263 *eoc = ctx->pointer + len;
255 else 264 else
@@ -434,6 +443,11 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
434 unsigned long *optr; 443 unsigned long *optr;
435 444
436 size = eoc - ctx->pointer + 1; 445 size = eoc - ctx->pointer + 1;
446
447 /* first subid actually encodes first two subids */
448 if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
449 return 0;
450
437 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); 451 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
438 if (*oid == NULL) { 452 if (*oid == NULL) {
439 if (net_ratelimit()) 453 if (net_ratelimit())
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index fead049daf43..37a1ecd9d600 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -608,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout)
608 sk_common_release(sk); 608 sk_common_release(sk);
609} 609}
610 610
611static int raw_destroy(struct sock *sk)
612{
613 lock_sock(sk);
614 ip_flush_pending_frames(sk);
615 release_sock(sk);
616 return 0;
617}
618
611/* This gets rid of all the nasties in af_inet. -DaveM */ 619/* This gets rid of all the nasties in af_inet. -DaveM */
612static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 620static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
613{ 621{
@@ -820,6 +828,7 @@ struct proto raw_prot = {
820 .name = "RAW", 828 .name = "RAW",
821 .owner = THIS_MODULE, 829 .owner = THIS_MODULE,
822 .close = raw_close, 830 .close = raw_close,
831 .destroy = raw_destroy,
823 .connect = ip4_datagram_connect, 832 .connect = ip4_datagram_connect,
824 .disconnect = udp_disconnect, 833 .disconnect = udp_disconnect,
825 .ioctl = raw_ioctl, 834 .ioctl = raw_ioctl,
@@ -925,7 +934,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
925 srcp = inet->num; 934 srcp = inet->num;
926 935
927 seq_printf(seq, "%4d: %08X:%04X %08X:%04X" 936 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
928 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", 937 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
929 i, src, srcp, dest, destp, sp->sk_state, 938 i, src, srcp, dest, destp, sp->sk_state,
930 atomic_read(&sp->sk_wmem_alloc), 939 atomic_read(&sp->sk_wmem_alloc),
931 atomic_read(&sp->sk_rmem_alloc), 940 atomic_read(&sp->sk_rmem_alloc),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92f90ae46f4a..96be336064fb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = {
160 .negative_advice = ipv4_negative_advice, 160 .negative_advice = ipv4_negative_advice,
161 .link_failure = ipv4_link_failure, 161 .link_failure = ipv4_link_failure,
162 .update_pmtu = ip_rt_update_pmtu, 162 .update_pmtu = ip_rt_update_pmtu,
163 .local_out = ip_local_out, 163 .local_out = __ip_local_out,
164 .entry_size = sizeof(struct rtable), 164 .entry_size = sizeof(struct rtable),
165 .entries = ATOMIC_INIT(0), 165 .entries = ATOMIC_INIT(0),
166}; 166};
@@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb,
1792 if (err) 1792 if (err)
1793 flags |= RTCF_DIRECTSRC; 1793 flags |= RTCF_DIRECTSRC;
1794 1794
1795 if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && 1795 if (out_dev == in_dev && err &&
1796 (IN_DEV_SHARED_MEDIA(out_dev) || 1796 (IN_DEV_SHARED_MEDIA(out_dev) ||
1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1798 flags |= RTCF_DOREDIRECT; 1798 flags |= RTCF_DOREDIRECT;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 73ba98921d64..d182a2a26291 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
285 cookie_check_timestamp(&tcp_opt); 285 cookie_check_timestamp(&tcp_opt);
286 286
287 ret = NULL; 287 ret = NULL;
288 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 288 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
289 if (!req) 289 if (!req)
290 goto out; 290 goto out;
291 291
@@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
301 ireq->rmt_port = th->source; 301 ireq->rmt_port = th->source;
302 ireq->loc_addr = ip_hdr(skb)->daddr; 302 ireq->loc_addr = ip_hdr(skb)->daddr;
303 ireq->rmt_addr = ip_hdr(skb)->saddr; 303 ireq->rmt_addr = ip_hdr(skb)->saddr;
304 ireq->opt = NULL;
305 ireq->snd_wscale = tcp_opt.snd_wscale; 304 ireq->snd_wscale = tcp_opt.snd_wscale;
306 ireq->rcv_wscale = tcp_opt.rcv_wscale; 305 ireq->rcv_wscale = tcp_opt.rcv_wscale;
307 ireq->sack_ok = tcp_opt.sack_ok; 306 ireq->sack_ok = tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f88653138621..fc54a48fde1e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1227,7 +1227,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1227 copied += used; 1227 copied += used;
1228 offset += used; 1228 offset += used;
1229 } 1229 }
1230 if (offset != skb->len) 1230 /*
1231 * If recv_actor drops the lock (e.g. TCP splice
1232 * receive) the skb pointer might be invalid when
1233 * getting here: tcp_collapse might have deleted it
1234 * while aggregating skbs from the socket queue.
1235 */
1236 skb = tcp_recv_skb(sk, seq-1, &offset);
1237 if (!skb || (offset+1 != skb->len))
1231 break; 1238 break;
1232 } 1239 }
1233 if (tcp_hdr(skb)->fin) { 1240 if (tcp_hdr(skb)->fin) {
@@ -2105,12 +2112,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2105 break; 2112 break;
2106 2113
2107 case TCP_DEFER_ACCEPT: 2114 case TCP_DEFER_ACCEPT:
2108 if (val < 0) { 2115 icsk->icsk_accept_queue.rskq_defer_accept = 0;
2109 err = -EINVAL; 2116 if (val > 0) {
2110 } else { 2117 /* Translate value in seconds to number of
2111 if (val > MAX_TCP_ACCEPT_DEFERRED) 2118 * retransmits */
2112 val = MAX_TCP_ACCEPT_DEFERRED; 2119 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
2113 icsk->icsk_accept_queue.rskq_defer_accept = val; 2120 val > ((TCP_TIMEOUT_INIT / HZ) <<
2121 icsk->icsk_accept_queue.rskq_defer_accept))
2122 icsk->icsk_accept_queue.rskq_defer_accept++;
2123 icsk->icsk_accept_queue.rskq_defer_accept++;
2114 } 2124 }
2115 break; 2125 break;
2116 2126
@@ -2292,7 +2302,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2292 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2302 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2293 break; 2303 break;
2294 case TCP_DEFER_ACCEPT: 2304 case TCP_DEFER_ACCEPT:
2295 val = icsk->icsk_accept_queue.rskq_defer_accept; 2305 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
2306 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
2296 break; 2307 break;
2297 case TCP_WINDOW_CLAMP: 2308 case TCP_WINDOW_CLAMP:
2298 val = tp->window_clamp; 2309 val = tp->window_clamp;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b54d9d37b636..cad73b7dfef0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1392,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1392 1392
1393 if (before(next_dup->start_seq, skip_to_seq)) { 1393 if (before(next_dup->start_seq, skip_to_seq)) {
1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); 1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
1395 tcp_sacktag_walk(skb, sk, NULL, 1395 skb = tcp_sacktag_walk(skb, sk, NULL,
1396 next_dup->start_seq, next_dup->end_seq, 1396 next_dup->start_seq, next_dup->end_seq,
1397 1, fack_count, reord, flag); 1397 1, fack_count, reord, flag);
1398 } 1398 }
1399 1399
1400 return skb; 1400 return skb;
@@ -2483,6 +2483,20 @@ static inline void tcp_complete_cwr(struct sock *sk)
2483 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2483 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2484} 2484}
2485 2485
2486static void tcp_try_keep_open(struct sock *sk)
2487{
2488 struct tcp_sock *tp = tcp_sk(sk);
2489 int state = TCP_CA_Open;
2490
2491 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2492 state = TCP_CA_Disorder;
2493
2494 if (inet_csk(sk)->icsk_ca_state != state) {
2495 tcp_set_ca_state(sk, state);
2496 tp->high_seq = tp->snd_nxt;
2497 }
2498}
2499
2486static void tcp_try_to_open(struct sock *sk, int flag) 2500static void tcp_try_to_open(struct sock *sk, int flag)
2487{ 2501{
2488 struct tcp_sock *tp = tcp_sk(sk); 2502 struct tcp_sock *tp = tcp_sk(sk);
@@ -2496,15 +2510,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2496 tcp_enter_cwr(sk, 1); 2510 tcp_enter_cwr(sk, 1);
2497 2511
2498 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2512 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2499 int state = TCP_CA_Open; 2513 tcp_try_keep_open(sk);
2500
2501 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2502 state = TCP_CA_Disorder;
2503
2504 if (inet_csk(sk)->icsk_ca_state != state) {
2505 tcp_set_ca_state(sk, state);
2506 tp->high_seq = tp->snd_nxt;
2507 }
2508 tcp_moderate_cwnd(tp); 2514 tcp_moderate_cwnd(tp);
2509 } else { 2515 } else {
2510 tcp_cwnd_down(sk, flag); 2516 tcp_cwnd_down(sk, flag);
@@ -3310,8 +3316,11 @@ no_queue:
3310 return 1; 3316 return 1;
3311 3317
3312old_ack: 3318old_ack:
3313 if (TCP_SKB_CB(skb)->sacked) 3319 if (TCP_SKB_CB(skb)->sacked) {
3314 tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3320 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3321 if (icsk->icsk_ca_state == TCP_CA_Open)
3322 tcp_try_keep_open(sk);
3323 }
3315 3324
3316uninteresting_ack: 3325uninteresting_ack:
3317 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3326 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -4532,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4532 } 4541 }
4533} 4542}
4534 4543
4535static int tcp_defer_accept_check(struct sock *sk)
4536{
4537 struct tcp_sock *tp = tcp_sk(sk);
4538
4539 if (tp->defer_tcp_accept.request) {
4540 int queued_data = tp->rcv_nxt - tp->copied_seq;
4541 int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
4542 tcp_hdr((struct sk_buff *)
4543 sk->sk_receive_queue.prev)->fin : 0;
4544
4545 if (queued_data && hasfin)
4546 queued_data--;
4547
4548 if (queued_data &&
4549 tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
4550 if (sock_flag(sk, SOCK_KEEPOPEN)) {
4551 inet_csk_reset_keepalive_timer(sk,
4552 keepalive_time_when(tp));
4553 } else {
4554 inet_csk_delete_keepalive_timer(sk);
4555 }
4556
4557 inet_csk_reqsk_queue_add(
4558 tp->defer_tcp_accept.listen_sk,
4559 tp->defer_tcp_accept.request,
4560 sk);
4561
4562 tp->defer_tcp_accept.listen_sk->sk_data_ready(
4563 tp->defer_tcp_accept.listen_sk, 0);
4564
4565 sock_put(tp->defer_tcp_accept.listen_sk);
4566 sock_put(sk);
4567 tp->defer_tcp_accept.listen_sk = NULL;
4568 tp->defer_tcp_accept.request = NULL;
4569 } else if (hasfin ||
4570 tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
4571 tcp_reset(sk);
4572 return -1;
4573 }
4574 }
4575 return 0;
4576}
4577
4578static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) 4544static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4579{ 4545{
4580 struct tcp_sock *tp = tcp_sk(sk); 4546 struct tcp_sock *tp = tcp_sk(sk);
@@ -4935,8 +4901,6 @@ step5:
4935 4901
4936 tcp_data_snd_check(sk); 4902 tcp_data_snd_check(sk);
4937 tcp_ack_snd_check(sk); 4903 tcp_ack_snd_check(sk);
4938
4939 tcp_defer_accept_check(sk);
4940 return 0; 4904 return 0;
4941 4905
4942csum_error: 4906csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2f..12695be2c255 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,10 +85,6 @@
85int sysctl_tcp_tw_reuse __read_mostly; 85int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly; 86int sysctl_tcp_low_latency __read_mostly;
87 87
88/* Check TCP sequence numbers in ICMP packets. */
89#define ICMP_MIN_LENGTH 8
90
91void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
92 88
93#ifdef CONFIG_TCP_MD5SIG 89#ifdef CONFIG_TCP_MD5SIG
94static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, 90static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
@@ -1285,7 +1281,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1285 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 1281 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1286 goto drop; 1282 goto drop;
1287 1283
1288 req = reqsk_alloc(&tcp_request_sock_ops); 1284 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1289 if (!req) 1285 if (!req)
1290 goto drop; 1286 goto drop;
1291 1287
@@ -1918,14 +1914,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
1918 sk->sk_sndmsg_page = NULL; 1914 sk->sk_sndmsg_page = NULL;
1919 } 1915 }
1920 1916
1921 if (tp->defer_tcp_accept.request) {
1922 reqsk_free(tp->defer_tcp_accept.request);
1923 sock_put(tp->defer_tcp_accept.listen_sk);
1924 sock_put(sk);
1925 tp->defer_tcp_accept.listen_sk = NULL;
1926 tp->defer_tcp_accept.request = NULL;
1927 }
1928
1929 atomic_dec(&tcp_sockets_allocated); 1917 atomic_dec(&tcp_sockets_allocated);
1930 1918
1931 return 0; 1919 return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5cc..8245247a6ceb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
571 does sequence test, SYN is truncated, and thus we consider 571 does sequence test, SYN is truncated, and thus we consider
572 it a bare ACK. 572 it a bare ACK.
573 573
574 Both ends (listening sockets) accept the new incoming 574 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
575 connection and try to talk to each other. 8-) 575 bare ACK. Otherwise, we create an established connection. Both
576 ends (listening sockets) accept the new incoming connection and try
577 to talk to each other. 8-)
576 578
577 Note: This case is both harmless, and rare. Possibility is about the 579 Note: This case is both harmless, and rare. Possibility is about the
578 same as us discovering intelligent life on another plant tomorrow. 580 same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
640 if (!(flg & TCP_FLAG_ACK)) 642 if (!(flg & TCP_FLAG_ACK))
641 return NULL; 643 return NULL;
642 644
645 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
646 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
647 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
648 inet_rsk(req)->acked = 1;
649 return NULL;
650 }
651
643 /* OK, ACK is valid, create big socket and 652 /* OK, ACK is valid, create big socket and
644 * feed this segment to it. It will repeat all 653 * feed this segment to it. It will repeat all
645 * the tests. THIS SEGMENT MUST MOVE SOCKET TO 654 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
678 inet_csk_reqsk_queue_unlink(sk, req, prev); 687 inet_csk_reqsk_queue_unlink(sk, req, prev);
679 inet_csk_reqsk_queue_removed(sk, req); 688 inet_csk_reqsk_queue_removed(sk, req);
680 689
681 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 690 inet_csk_reqsk_queue_add(sk, req, child);
682 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
683
684 /* the accept queue handling is done is est recv slow
685 * path so lets make sure to start there
686 */
687 tcp_sk(child)->pred_flags = 0;
688 sock_hold(sk);
689 sock_hold(child);
690 tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
691 tcp_sk(child)->defer_tcp_accept.request = req;
692
693 inet_csk_reset_keepalive_timer(child,
694 inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
695 } else {
696 inet_csk_reqsk_queue_add(sk, req, child);
697 }
698
699 return child; 691 return child;
700 692
701 listen_overflow: 693 listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index debf23581606..ad993ecb4810 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1836,7 +1836,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1836{ 1836{
1837 struct tcp_sock *tp = tcp_sk(sk); 1837 struct tcp_sock *tp = tcp_sk(sk);
1838 struct inet_connection_sock *icsk = inet_csk(sk); 1838 struct inet_connection_sock *icsk = inet_csk(sk);
1839 unsigned int cur_mss = tcp_current_mss(sk, 0); 1839 unsigned int cur_mss;
1840 int err; 1840 int err;
1841 1841
1842 /* Inconslusive MTU probe */ 1842 /* Inconslusive MTU probe */
@@ -1858,6 +1858,11 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1858 return -ENOMEM; 1858 return -ENOMEM;
1859 } 1859 }
1860 1860
1861 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1862 return -EHOSTUNREACH; /* Routing failure or similar. */
1863
1864 cur_mss = tcp_current_mss(sk, 0);
1865
1861 /* If receiver has shrunk his window, and skb is out of 1866 /* If receiver has shrunk his window, and skb is out of
1862 * new window, do not retransmit it. The exception is the 1867 * new window, do not retransmit it. The exception is the
1863 * case, when window is shrunk to zero. In this case 1868 * case, when window is shrunk to zero. In this case
@@ -1884,9 +1889,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1884 (sysctl_tcp_retrans_collapse != 0)) 1889 (sysctl_tcp_retrans_collapse != 0))
1885 tcp_retrans_try_collapse(sk, skb, cur_mss); 1890 tcp_retrans_try_collapse(sk, skb, cur_mss);
1886 1891
1887 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1888 return -EHOSTUNREACH; /* Routing failure or similar. */
1889
1890 /* Some Solaris stacks overoptimize and ignore the FIN on a 1892 /* Some Solaris stacks overoptimize and ignore the FIN on a
1891 * retransmit when old data is attached. So strip it off 1893 * retransmit when old data is attached. So strip it off
1892 * since it is cheap to do so and saves bytes on the network. 1894 * since it is cheap to do so and saves bytes on the network.
@@ -2129,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2129 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2131 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2130 if (tcp_transmit_skb(sk, skb, 0, priority)) 2132 if (tcp_transmit_skb(sk, skb, 0, priority))
2131 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2133 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
2134
2135 TCP_INC_STATS(TCP_MIB_OUTRSTS);
2132} 2136}
2133 2137
2134/* WARNING: This routine must only be called when we have already sent 2138/* WARNING: This routine must only be called when we have already sent
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2aa..63ed9d6830e7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)
489 goto death; 489 goto death;
490 } 490 }
491 491
492 if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
493 tcp_send_active_reset(sk, GFP_ATOMIC);
494 goto death;
495 }
496
497 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) 492 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
498 goto out; 493 goto out;
499 494
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index d3b709a6f264..cb1f0e83830b 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
97{ 97{
98 struct xfrm_tunnel *handler; 98 struct xfrm_tunnel *handler;
99 99
100 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 100 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
101 goto drop; 101 goto drop;
102 102
103 for (handler = tunnel64_handlers; handler; handler = handler->next) 103 for (handler = tunnel64_handlers; handler; handler = handler->next)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1cb7c96d63..56fcda3694ba 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)
420/* 420/*
421 * Throw away all pending data and cancel the corking. Socket is locked. 421 * Throw away all pending data and cancel the corking. Socket is locked.
422 */ 422 */
423static void udp_flush_pending_frames(struct sock *sk) 423void udp_flush_pending_frames(struct sock *sk)
424{ 424{
425 struct udp_sock *up = udp_sk(sk); 425 struct udp_sock *up = udp_sk(sk);
426 426
@@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)
430 ip_flush_pending_frames(sk); 430 ip_flush_pending_frames(sk);
431 } 431 }
432} 432}
433EXPORT_SYMBOL(udp_flush_pending_frames);
433 434
434/** 435/**
435 * udp4_hwcsum_outgoing - handle outgoing HW checksumming 436 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 584e6d74e3a9..7135279f3f84 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -52,7 +52,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
52 IP_ECN_clear(top_iph); 52 IP_ECN_clear(top_iph);
53 53
54 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 54 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
55 0 : XFRM_MODE_SKB_CB(skb)->frag_off; 55 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
56 ip_select_ident(top_iph, dst->child, NULL); 56 ip_select_ident(top_iph, dst->child, NULL);
57 57
58 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); 58 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e591e09e5e4e..147588f4c7c0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -731,8 +731,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
731 onlink = -1; 731 onlink = -1;
732 732
733 spin_lock(&ifa->lock); 733 spin_lock(&ifa->lock);
734 lifetime = min_t(unsigned long, 734
735 ifa->valid_lft, 0x7fffffffUL/HZ); 735 lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
736 /*
737 * Note: Because this address is
738 * not permanent, lifetime <
739 * LONG_MAX / HZ here.
740 */
736 if (time_before(expires, 741 if (time_before(expires,
737 ifa->tstamp + lifetime * HZ)) 742 ifa->tstamp + lifetime * HZ))
738 expires = ifa->tstamp + lifetime * HZ; 743 expires = ifa->tstamp + lifetime * HZ;
@@ -1722,7 +1727,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1722 __u32 valid_lft; 1727 __u32 valid_lft;
1723 __u32 prefered_lft; 1728 __u32 prefered_lft;
1724 int addr_type; 1729 int addr_type;
1725 unsigned long rt_expires;
1726 struct inet6_dev *in6_dev; 1730 struct inet6_dev *in6_dev;
1727 1731
1728 pinfo = (struct prefix_info *) opt; 1732 pinfo = (struct prefix_info *) opt;
@@ -1764,41 +1768,49 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1764 * 2) Configure prefixes with the auto flag set 1768 * 2) Configure prefixes with the auto flag set
1765 */ 1769 */
1766 1770
1767 /* Avoid arithmetic overflow. Really, we could
1768 save rt_expires in seconds, likely valid_lft,
1769 but it would require division in fib gc, that it
1770 not good.
1771 */
1772 if (valid_lft >= 0x7FFFFFFF/HZ)
1773 rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
1774 else
1775 rt_expires = valid_lft * HZ;
1776
1777 /*
1778 * We convert this (in jiffies) to clock_t later.
1779 * Avoid arithmetic overflow there as well.
1780 * Overflow can happen only if HZ < USER_HZ.
1781 */
1782 if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
1783 rt_expires = 0x7FFFFFFF / USER_HZ;
1784
1785 if (pinfo->onlink) { 1771 if (pinfo->onlink) {
1786 struct rt6_info *rt; 1772 struct rt6_info *rt;
1773 unsigned long rt_expires;
1774
1775 /* Avoid arithmetic overflow. Really, we could
1776 * save rt_expires in seconds, likely valid_lft,
1777 * but it would require division in fib gc, that it
1778 * not good.
1779 */
1780 if (HZ > USER_HZ)
1781 rt_expires = addrconf_timeout_fixup(valid_lft, HZ);
1782 else
1783 rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ);
1784
1785 if (addrconf_finite_timeout(rt_expires))
1786 rt_expires *= HZ;
1787
1787 rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, 1788 rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL,
1788 dev->ifindex, 1); 1789 dev->ifindex, 1);
1789 1790
1790 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { 1791 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
1791 if (rt->rt6i_flags&RTF_EXPIRES) { 1792 /* Autoconf prefix route */
1792 if (valid_lft == 0) { 1793 if (valid_lft == 0) {
1793 ip6_del_rt(rt); 1794 ip6_del_rt(rt);
1794 rt = NULL; 1795 rt = NULL;
1795 } else { 1796 } else if (addrconf_finite_timeout(rt_expires)) {
1796 rt->rt6i_expires = jiffies + rt_expires; 1797 /* not infinity */
1797 } 1798 rt->rt6i_expires = jiffies + rt_expires;
1799 rt->rt6i_flags |= RTF_EXPIRES;
1800 } else {
1801 rt->rt6i_flags &= ~RTF_EXPIRES;
1802 rt->rt6i_expires = 0;
1798 } 1803 }
1799 } else if (valid_lft) { 1804 } else if (valid_lft) {
1805 clock_t expires = 0;
1806 int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
1807 if (addrconf_finite_timeout(rt_expires)) {
1808 /* not infinity */
1809 flags |= RTF_EXPIRES;
1810 expires = jiffies_to_clock_t(rt_expires);
1811 }
1800 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, 1812 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
1801 dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); 1813 dev, expires, flags);
1802 } 1814 }
1803 if (rt) 1815 if (rt)
1804 dst_release(&rt->u.dst); 1816 dst_release(&rt->u.dst);
@@ -2014,17 +2026,22 @@ err_exit:
2014 * Manual configuration of address on an interface 2026 * Manual configuration of address on an interface
2015 */ 2027 */
2016static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, 2028static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2017 int plen, __u8 ifa_flags, __u32 prefered_lft, 2029 unsigned int plen, __u8 ifa_flags, __u32 prefered_lft,
2018 __u32 valid_lft) 2030 __u32 valid_lft)
2019{ 2031{
2020 struct inet6_ifaddr *ifp; 2032 struct inet6_ifaddr *ifp;
2021 struct inet6_dev *idev; 2033 struct inet6_dev *idev;
2022 struct net_device *dev; 2034 struct net_device *dev;
2023 int scope; 2035 int scope;
2024 u32 flags = RTF_EXPIRES; 2036 u32 flags;
2037 clock_t expires;
2038 unsigned long timeout;
2025 2039
2026 ASSERT_RTNL(); 2040 ASSERT_RTNL();
2027 2041
2042 if (plen > 128)
2043 return -EINVAL;
2044
2028 /* check the lifetime */ 2045 /* check the lifetime */
2029 if (!valid_lft || prefered_lft > valid_lft) 2046 if (!valid_lft || prefered_lft > valid_lft)
2030 return -EINVAL; 2047 return -EINVAL;
@@ -2038,17 +2055,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2038 2055
2039 scope = ipv6_addr_scope(pfx); 2056 scope = ipv6_addr_scope(pfx);
2040 2057
2041 if (valid_lft == INFINITY_LIFE_TIME) { 2058 timeout = addrconf_timeout_fixup(valid_lft, HZ);
2042 ifa_flags |= IFA_F_PERMANENT; 2059 if (addrconf_finite_timeout(timeout)) {
2060 expires = jiffies_to_clock_t(timeout * HZ);
2061 valid_lft = timeout;
2062 flags = RTF_EXPIRES;
2063 } else {
2064 expires = 0;
2043 flags = 0; 2065 flags = 0;
2044 } else if (valid_lft >= 0x7FFFFFFF/HZ) 2066 ifa_flags |= IFA_F_PERMANENT;
2045 valid_lft = 0x7FFFFFFF/HZ; 2067 }
2046 2068
2047 if (prefered_lft == 0) 2069 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
2048 ifa_flags |= IFA_F_DEPRECATED; 2070 if (addrconf_finite_timeout(timeout)) {
2049 else if ((prefered_lft >= 0x7FFFFFFF/HZ) && 2071 if (timeout == 0)
2050 (prefered_lft != INFINITY_LIFE_TIME)) 2072 ifa_flags |= IFA_F_DEPRECATED;
2051 prefered_lft = 0x7FFFFFFF/HZ; 2073 prefered_lft = timeout;
2074 }
2052 2075
2053 ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); 2076 ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
2054 2077
@@ -2060,7 +2083,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2060 spin_unlock_bh(&ifp->lock); 2083 spin_unlock_bh(&ifp->lock);
2061 2084
2062 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 2085 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
2063 jiffies_to_clock_t(valid_lft * HZ), flags); 2086 expires, flags);
2064 /* 2087 /*
2065 * Note that section 3.1 of RFC 4429 indicates 2088 * Note that section 3.1 of RFC 4429 indicates
2066 * that the Optimistic flag should not be set for 2089 * that the Optimistic flag should not be set for
@@ -2076,12 +2099,15 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2076} 2099}
2077 2100
2078static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, 2101static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
2079 int plen) 2102 unsigned int plen)
2080{ 2103{
2081 struct inet6_ifaddr *ifp; 2104 struct inet6_ifaddr *ifp;
2082 struct inet6_dev *idev; 2105 struct inet6_dev *idev;
2083 struct net_device *dev; 2106 struct net_device *dev;
2084 2107
2108 if (plen > 128)
2109 return -EINVAL;
2110
2085 dev = __dev_get_by_index(net, ifindex); 2111 dev = __dev_get_by_index(net, ifindex);
2086 if (!dev) 2112 if (!dev)
2087 return -ENODEV; 2113 return -ENODEV;
@@ -3148,22 +3174,30 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
3148static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, 3174static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3149 u32 prefered_lft, u32 valid_lft) 3175 u32 prefered_lft, u32 valid_lft)
3150{ 3176{
3151 u32 flags = RTF_EXPIRES; 3177 u32 flags;
3178 clock_t expires;
3179 unsigned long timeout;
3152 3180
3153 if (!valid_lft || (prefered_lft > valid_lft)) 3181 if (!valid_lft || (prefered_lft > valid_lft))
3154 return -EINVAL; 3182 return -EINVAL;
3155 3183
3156 if (valid_lft == INFINITY_LIFE_TIME) { 3184 timeout = addrconf_timeout_fixup(valid_lft, HZ);
3157 ifa_flags |= IFA_F_PERMANENT; 3185 if (addrconf_finite_timeout(timeout)) {
3186 expires = jiffies_to_clock_t(timeout * HZ);
3187 valid_lft = timeout;
3188 flags = RTF_EXPIRES;
3189 } else {
3190 expires = 0;
3158 flags = 0; 3191 flags = 0;
3159 } else if (valid_lft >= 0x7FFFFFFF/HZ) 3192 ifa_flags |= IFA_F_PERMANENT;
3160 valid_lft = 0x7FFFFFFF/HZ; 3193 }
3161 3194
3162 if (prefered_lft == 0) 3195 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
3163 ifa_flags |= IFA_F_DEPRECATED; 3196 if (addrconf_finite_timeout(timeout)) {
3164 else if ((prefered_lft >= 0x7FFFFFFF/HZ) && 3197 if (timeout == 0)
3165 (prefered_lft != INFINITY_LIFE_TIME)) 3198 ifa_flags |= IFA_F_DEPRECATED;
3166 prefered_lft = 0x7FFFFFFF/HZ; 3199 prefered_lft = timeout;
3200 }
3167 3201
3168 spin_lock_bh(&ifp->lock); 3202 spin_lock_bh(&ifp->lock);
3169 ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; 3203 ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
@@ -3176,7 +3210,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3176 ipv6_ifa_notify(0, ifp); 3210 ipv6_ifa_notify(0, ifp);
3177 3211
3178 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, 3212 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
3179 jiffies_to_clock_t(valid_lft * HZ), flags); 3213 expires, flags);
3180 addrconf_verify(0); 3214 addrconf_verify(0);
3181 3215
3182 return 0; 3216 return 0;
@@ -4242,7 +4276,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev)
4242 neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, 4276 neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
4243 NET_IPV6_NEIGH, "ipv6", 4277 NET_IPV6_NEIGH, "ipv6",
4244 &ndisc_ifinfo_sysctl_change, 4278 &ndisc_ifinfo_sysctl_change,
4245 NULL); 4279 ndisc_ifinfo_sysctl_strategy);
4246 __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, 4280 __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
4247 idev->dev->ifindex, idev, &idev->cnf); 4281 idev->dev->ifindex, idev, &idev->cnf);
4248} 4282}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3c6aafb02183..e84b3fd17fb4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -191,7 +191,7 @@ lookup_protocol:
191 np->mcast_hops = -1; 191 np->mcast_hops = -1;
192 np->mc_loop = 1; 192 np->mc_loop = 1;
193 np->pmtudisc = IPV6_PMTUDISC_WANT; 193 np->pmtudisc = IPV6_PMTUDISC_WANT;
194 np->ipv6only = init_net.ipv6.sysctl.bindv6only; 194 np->ipv6only = net->ipv6.sysctl.bindv6only;
195 195
196 /* Init the ipv4 part of the socket since we can have sockets 196 /* Init the ipv4 part of the socket since we can have sockets
197 * using v6 API for ipv4. 197 * using v6 API for ipv4.
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 94fa6ae77cfe..0f0f94a40335 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -496,7 +496,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
496 return 0; 496 return 0;
497} 497}
498 498
499int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, 499int datagram_send_ctl(struct net *net,
500 struct msghdr *msg, struct flowi *fl,
500 struct ipv6_txoptions *opt, 501 struct ipv6_txoptions *opt,
501 int *hlimit, int *tclass) 502 int *hlimit, int *tclass)
502{ 503{
@@ -509,7 +510,6 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
509 510
510 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { 511 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
511 int addr_type; 512 int addr_type;
512 struct net_device *dev = NULL;
513 513
514 if (!CMSG_OK(msg, cmsg)) { 514 if (!CMSG_OK(msg, cmsg)) {
515 err = -EINVAL; 515 err = -EINVAL;
@@ -522,6 +522,9 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
522 switch (cmsg->cmsg_type) { 522 switch (cmsg->cmsg_type) {
523 case IPV6_PKTINFO: 523 case IPV6_PKTINFO:
524 case IPV6_2292PKTINFO: 524 case IPV6_2292PKTINFO:
525 {
526 struct net_device *dev = NULL;
527
525 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { 528 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
526 err = -EINVAL; 529 err = -EINVAL;
527 goto exit_f; 530 goto exit_f;
@@ -535,32 +538,32 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
535 fl->oif = src_info->ipi6_ifindex; 538 fl->oif = src_info->ipi6_ifindex;
536 } 539 }
537 540
538 addr_type = ipv6_addr_type(&src_info->ipi6_addr); 541 addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
539 542
540 if (addr_type == IPV6_ADDR_ANY) 543 if (fl->oif) {
541 break; 544 dev = dev_get_by_index(net, fl->oif);
545 if (!dev)
546 return -ENODEV;
547 } else if (addr_type & IPV6_ADDR_LINKLOCAL)
548 return -EINVAL;
542 549
543 if (addr_type & IPV6_ADDR_LINKLOCAL) { 550 if (addr_type != IPV6_ADDR_ANY) {
544 if (!src_info->ipi6_ifindex) 551 int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
545 return -EINVAL; 552 if (!ipv6_chk_addr(net, &src_info->ipi6_addr,
546 else { 553 strict ? dev : NULL, 0))
547 dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); 554 err = -EINVAL;
548 if (!dev) 555 else
549 return -ENODEV; 556 ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
550 }
551 }
552 if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr,
553 dev, 0)) {
554 if (dev)
555 dev_put(dev);
556 err = -EINVAL;
557 goto exit_f;
558 } 557 }
558
559 if (dev) 559 if (dev)
560 dev_put(dev); 560 dev_put(dev);
561 561
562 ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); 562 if (err)
563 goto exit_f;
564
563 break; 565 break;
566 }
564 567
565 case IPV6_FLOWINFO: 568 case IPV6_FLOWINFO:
566 if (cmsg->cmsg_len < CMSG_LEN(4)) { 569 if (cmsg->cmsg_len < CMSG_LEN(4)) {
@@ -702,6 +705,11 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
702 } 705 }
703 706
704 *hlimit = *(int *)CMSG_DATA(cmsg); 707 *hlimit = *(int *)CMSG_DATA(cmsg);
708 if (*hlimit < -1 || *hlimit > 0xff) {
709 err = -EINVAL;
710 goto exit_f;
711 }
712
705 break; 713 break;
706 714
707 case IPV6_TCLASS: 715 case IPV6_TCLASS:
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index eb7a940310f4..37a4e777e347 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
354 msg.msg_control = (void*)(fl->opt+1); 354 msg.msg_control = (void*)(fl->opt+1);
355 flowi.oif = 0; 355 flowi.oif = 0;
356 356
357 err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); 357 err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk);
358 if (err) 358 if (err)
359 goto done; 359 goto done;
360 err = -EINVAL; 360 err = -EINVAL;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 4e5c8615832c..17eb48b8e329 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -102,6 +102,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
102 if (hdr->version != 6) 102 if (hdr->version != 6)
103 goto err; 103 goto err;
104 104
105 /*
106 * RFC4291 2.5.3
107 * A packet received on an interface with a destination address
108 * of loopback must be dropped.
109 */
110 if (!(dev->flags & IFF_LOOPBACK) &&
111 ipv6_addr_loopback(&hdr->daddr))
112 goto err;
113
105 skb->transport_header = skb->network_header + sizeof(*hdr); 114 skb->transport_header = skb->network_header + sizeof(*hdr);
106 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); 115 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
107 116
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2de3c464fe75..14796181e8b5 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -197,7 +197,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
197 const char *name = vif->dev ? vif->dev->name : "none"; 197 const char *name = vif->dev ? vif->dev->name : "none";
198 198
199 seq_printf(seq, 199 seq_printf(seq,
200 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n", 200 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
201 vif - vif6_table, 201 vif - vif6_table,
202 name, vif->bytes_in, vif->pkt_in, 202 name, vif->bytes_in, vif->pkt_in,
203 vif->bytes_out, vif->pkt_out, 203 vif->bytes_out, vif->pkt_out,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 56d55fecf8ec..86e28a75267f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -67,7 +67,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
67 67
68 /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ 68 /* RA packet may be delivered ONLY to IPPROTO_RAW socket */
69 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW) 69 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
70 return -EINVAL; 70 return -ENOPROTOOPT;
71 71
72 new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 72 new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
73 73
@@ -161,9 +161,17 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
161 struct ipv6_txoptions *opt; 161 struct ipv6_txoptions *opt;
162 struct sk_buff *pktopt; 162 struct sk_buff *pktopt;
163 163
164 if (sk->sk_protocol != IPPROTO_UDP && 164 if (sk->sk_type == SOCK_RAW)
165 sk->sk_protocol != IPPROTO_UDPLITE && 165 break;
166 sk->sk_protocol != IPPROTO_TCP) 166
167 if (sk->sk_protocol == IPPROTO_UDP ||
168 sk->sk_protocol == IPPROTO_UDPLITE) {
169 struct udp_sock *up = udp_sk(sk);
170 if (up->pending == AF_INET6) {
171 retv = -EBUSY;
172 break;
173 }
174 } else if (sk->sk_protocol != IPPROTO_TCP)
167 break; 175 break;
168 176
169 if (sk->sk_state != TCP_ESTABLISHED) { 177 if (sk->sk_state != TCP_ESTABLISHED) {
@@ -337,18 +345,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
337 case IPV6_DSTOPTS: 345 case IPV6_DSTOPTS:
338 { 346 {
339 struct ipv6_txoptions *opt; 347 struct ipv6_txoptions *opt;
348
349 /* remove any sticky options header with a zero option
350 * length, per RFC3542.
351 */
340 if (optlen == 0) 352 if (optlen == 0)
341 optval = NULL; 353 optval = NULL;
354 else if (optlen < sizeof(struct ipv6_opt_hdr) ||
355 optlen & 0x7 || optlen > 8 * 255)
356 goto e_inval;
342 357
343 /* hop-by-hop / destination options are privileged option */ 358 /* hop-by-hop / destination options are privileged option */
344 retv = -EPERM; 359 retv = -EPERM;
345 if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) 360 if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
346 break; 361 break;
347 362
348 if (optlen < sizeof(struct ipv6_opt_hdr) ||
349 optlen & 0x7 || optlen > 8 * 255)
350 goto e_inval;
351
352 opt = ipv6_renew_options(sk, np->opt, optname, 363 opt = ipv6_renew_options(sk, np->opt, optname,
353 (struct ipv6_opt_hdr __user *)optval, 364 (struct ipv6_opt_hdr __user *)optval,
354 optlen); 365 optlen);
@@ -416,7 +427,7 @@ sticky_done:
416 msg.msg_controllen = optlen; 427 msg.msg_controllen = optlen;
417 msg.msg_control = (void*)(opt+1); 428 msg.msg_control = (void*)(opt+1);
418 429
419 retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); 430 retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk);
420 if (retv) 431 if (retv)
421 goto done; 432 goto done;
422update: 433update:
@@ -438,7 +449,7 @@ done:
438 449
439 case IPV6_MULTICAST_HOPS: 450 case IPV6_MULTICAST_HOPS:
440 if (sk->sk_type == SOCK_STREAM) 451 if (sk->sk_type == SOCK_STREAM)
441 goto e_inval; 452 break;
442 if (optlen < sizeof(int)) 453 if (optlen < sizeof(int))
443 goto e_inval; 454 goto e_inval;
444 if (val > 255 || val < -1) 455 if (val > 255 || val < -1)
@@ -450,13 +461,15 @@ done:
450 case IPV6_MULTICAST_LOOP: 461 case IPV6_MULTICAST_LOOP:
451 if (optlen < sizeof(int)) 462 if (optlen < sizeof(int))
452 goto e_inval; 463 goto e_inval;
464 if (val != valbool)
465 goto e_inval;
453 np->mc_loop = valbool; 466 np->mc_loop = valbool;
454 retv = 0; 467 retv = 0;
455 break; 468 break;
456 469
457 case IPV6_MULTICAST_IF: 470 case IPV6_MULTICAST_IF:
458 if (sk->sk_type == SOCK_STREAM) 471 if (sk->sk_type == SOCK_STREAM)
459 goto e_inval; 472 break;
460 if (optlen < sizeof(int)) 473 if (optlen < sizeof(int))
461 goto e_inval; 474 goto e_inval;
462 475
@@ -832,7 +845,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
832 len = min_t(unsigned int, len, ipv6_optlen(hdr)); 845 len = min_t(unsigned int, len, ipv6_optlen(hdr));
833 if (copy_to_user(optval, hdr, len)) 846 if (copy_to_user(optval, hdr, len))
834 return -EFAULT; 847 return -EFAULT;
835 return ipv6_optlen(hdr); 848 return len;
836} 849}
837 850
838static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, 851static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
@@ -852,7 +865,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
852 if (sk->sk_protocol != IPPROTO_UDP && 865 if (sk->sk_protocol != IPPROTO_UDP &&
853 sk->sk_protocol != IPPROTO_UDPLITE && 866 sk->sk_protocol != IPPROTO_UDPLITE &&
854 sk->sk_protocol != IPPROTO_TCP) 867 sk->sk_protocol != IPPROTO_TCP)
855 return -EINVAL; 868 return -ENOPROTOOPT;
856 if (sk->sk_state != TCP_ESTABLISHED) 869 if (sk->sk_state != TCP_ESTABLISHED)
857 return -ENOTCONN; 870 return -ENOTCONN;
858 val = sk->sk_family; 871 val = sk->sk_family;
@@ -866,6 +879,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
866 return -EINVAL; 879 return -EINVAL;
867 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) 880 if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
868 return -EFAULT; 881 return -EFAULT;
882 if (gsf.gf_group.ss_family != AF_INET6)
883 return -EADDRNOTAVAIL;
869 lock_sock(sk); 884 lock_sock(sk);
870 err = ip6_mc_msfget(sk, &gsf, 885 err = ip6_mc_msfget(sk, &gsf,
871 (struct group_filter __user *)optval, optlen); 886 (struct group_filter __user *)optval, optlen);
@@ -975,6 +990,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
975 len = ipv6_getsockopt_sticky(sk, np->opt, 990 len = ipv6_getsockopt_sticky(sk, np->opt,
976 optname, optval, len); 991 optname, optval, len);
977 release_sock(sk); 992 release_sock(sk);
993 /* check if ipv6_getsockopt_sticky() returns err code */
994 if (len < 0)
995 return len;
978 return put_user(len, optlen); 996 return put_user(len, optlen);
979 } 997 }
980 998
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a55fc05b8125..282fdb31f8ed 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1727,10 +1727,10 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
1727 return ret; 1727 return ret;
1728} 1728}
1729 1729
1730static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, 1730int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
1731 int nlen, void __user *oldval, 1731 int nlen, void __user *oldval,
1732 size_t __user *oldlenp, 1732 size_t __user *oldlenp,
1733 void __user *newval, size_t newlen) 1733 void __user *newval, size_t newlen)
1734{ 1734{
1735 struct net_device *dev = ctl->extra1; 1735 struct net_device *dev = ctl->extra1;
1736 struct inet6_dev *idev; 1736 struct inet6_dev *idev;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 2dccad48058c..e65e26e210ee 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -209,7 +209,9 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
209 arg.dst = dst; 209 arg.dst = dst;
210 hash = ip6qhashfn(id, src, dst); 210 hash = ip6qhashfn(id, src, dst);
211 211
212 local_bh_disable();
212 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); 213 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
214 local_bh_enable();
213 if (q == NULL) 215 if (q == NULL)
214 goto oom; 216 goto oom;
215 217
@@ -638,10 +640,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
638 goto ret_orig; 640 goto ret_orig;
639 } 641 }
640 642
641 spin_lock(&fq->q.lock); 643 spin_lock_bh(&fq->q.lock);
642 644
643 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { 645 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
644 spin_unlock(&fq->q.lock); 646 spin_unlock_bh(&fq->q.lock);
645 pr_debug("Can't insert skb to queue\n"); 647 pr_debug("Can't insert skb to queue\n");
646 fq_put(fq); 648 fq_put(fq);
647 goto ret_orig; 649 goto ret_orig;
@@ -653,7 +655,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
653 if (ret_skb == NULL) 655 if (ret_skb == NULL)
654 pr_debug("Can't reassemble fragmented packets\n"); 656 pr_debug("Can't reassemble fragmented packets\n");
655 } 657 }
656 spin_unlock(&fq->q.lock); 658 spin_unlock_bh(&fq->q.lock);
657 659
658 fq_put(fq); 660 fq_put(fq);
659 return ret_skb; 661 return ret_skb;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 232e0dc45bf5..3aee12310d94 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -813,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
813 memset(opt, 0, sizeof(struct ipv6_txoptions)); 813 memset(opt, 0, sizeof(struct ipv6_txoptions));
814 opt->tot_len = sizeof(struct ipv6_txoptions); 814 opt->tot_len = sizeof(struct ipv6_txoptions);
815 815
816 err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); 816 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
817 if (err < 0) { 817 if (err < 0) {
818 fl6_sock_release(flowlabel); 818 fl6_sock_release(flowlabel);
819 return err; 819 return err;
@@ -1164,6 +1164,15 @@ static void rawv6_close(struct sock *sk, long timeout)
1164 sk_common_release(sk); 1164 sk_common_release(sk);
1165} 1165}
1166 1166
1167static int raw6_destroy(struct sock *sk)
1168{
1169 lock_sock(sk);
1170 ip6_flush_pending_frames(sk);
1171 release_sock(sk);
1172
1173 return inet6_destroy_sock(sk);
1174}
1175
1167static int rawv6_init_sk(struct sock *sk) 1176static int rawv6_init_sk(struct sock *sk)
1168{ 1177{
1169 struct raw6_sock *rp = raw6_sk(sk); 1178 struct raw6_sock *rp = raw6_sk(sk);
@@ -1187,11 +1196,11 @@ struct proto rawv6_prot = {
1187 .name = "RAWv6", 1196 .name = "RAWv6",
1188 .owner = THIS_MODULE, 1197 .owner = THIS_MODULE,
1189 .close = rawv6_close, 1198 .close = rawv6_close,
1199 .destroy = raw6_destroy,
1190 .connect = ip6_datagram_connect, 1200 .connect = ip6_datagram_connect,
1191 .disconnect = udp_disconnect, 1201 .disconnect = udp_disconnect,
1192 .ioctl = rawv6_ioctl, 1202 .ioctl = rawv6_ioctl,
1193 .init = rawv6_init_sk, 1203 .init = rawv6_init_sk,
1194 .destroy = inet6_destroy_sock,
1195 .setsockopt = rawv6_setsockopt, 1204 .setsockopt = rawv6_setsockopt,
1196 .getsockopt = rawv6_getsockopt, 1205 .getsockopt = rawv6_getsockopt,
1197 .sendmsg = rawv6_sendmsg, 1206 .sendmsg = rawv6_sendmsg,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 12bba0880345..d1f3e19b06c7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -109,7 +109,7 @@ static struct dst_ops ip6_dst_ops_template = {
109 .negative_advice = ip6_negative_advice, 109 .negative_advice = ip6_negative_advice,
110 .link_failure = ip6_link_failure, 110 .link_failure = ip6_link_failure,
111 .update_pmtu = ip6_rt_update_pmtu, 111 .update_pmtu = ip6_rt_update_pmtu,
112 .local_out = ip6_local_out, 112 .local_out = __ip6_local_out,
113 .entry_size = sizeof(struct rt6_info), 113 .entry_size = sizeof(struct rt6_info),
114 .entries = ATOMIC_INIT(0), 114 .entries = ATOMIC_INIT(0),
115}; 115};
@@ -446,7 +446,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct route_info *rinfo = (struct route_info *) opt; 446 struct route_info *rinfo = (struct route_info *) opt;
447 struct in6_addr prefix_buf, *prefix; 447 struct in6_addr prefix_buf, *prefix;
448 unsigned int pref; 448 unsigned int pref;
449 u32 lifetime; 449 unsigned long lifetime;
450 struct rt6_info *rt; 450 struct rt6_info *rt;
451 451
452 if (len < sizeof(struct route_info)) { 452 if (len < sizeof(struct route_info)) {
@@ -472,13 +472,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
472 if (pref == ICMPV6_ROUTER_PREF_INVALID) 472 if (pref == ICMPV6_ROUTER_PREF_INVALID)
473 pref = ICMPV6_ROUTER_PREF_MEDIUM; 473 pref = ICMPV6_ROUTER_PREF_MEDIUM;
474 474
475 lifetime = ntohl(rinfo->lifetime); 475 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
476 if (lifetime == 0xffffffff) {
477 /* infinity */
478 } else if (lifetime > 0x7fffffff/HZ) {
479 /* Avoid arithmetic overflow */
480 lifetime = 0x7fffffff/HZ - 1;
481 }
482 476
483 if (rinfo->length == 3) 477 if (rinfo->length == 3)
484 prefix = (struct in6_addr *)rinfo->prefix; 478 prefix = (struct in6_addr *)rinfo->prefix;
@@ -506,7 +500,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
506 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 500 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
507 501
508 if (rt) { 502 if (rt) {
509 if (lifetime == 0xffffffff) { 503 if (!addrconf_finite_timeout(lifetime)) {
510 rt->rt6i_flags &= ~RTF_EXPIRES; 504 rt->rt6i_flags &= ~RTF_EXPIRES;
511 } else { 505 } else {
512 rt->rt6i_expires = jiffies + HZ * lifetime; 506 rt->rt6i_expires = jiffies + HZ * lifetime;
@@ -1106,7 +1100,9 @@ int ip6_route_add(struct fib6_config *cfg)
1106 } 1100 }
1107 1101
1108 rt->u.dst.obsolete = -1; 1102 rt->u.dst.obsolete = -1;
1109 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); 1103 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1104 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1105 0;
1110 1106
1111 if (cfg->fc_protocol == RTPROT_UNSPEC) 1107 if (cfg->fc_protocol == RTPROT_UNSPEC)
1112 cfg->fc_protocol = RTPROT_BOOT; 1108 cfg->fc_protocol = RTPROT_BOOT;
@@ -2200,7 +2196,13 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2200 2196
2201 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2197 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2202 2198
2203 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; 2199 if (!(rt->rt6i_flags & RTF_EXPIRES))
2200 expires = 0;
2201 else if (rt->rt6i_expires - jiffies < INT_MAX)
2202 expires = rt->rt6i_expires - jiffies;
2203 else
2204 expires = INT_MAX;
2205
2204 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2206 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2205 expires, rt->u.dst.error) < 0) 2207 expires, rt->u.dst.error) < 0)
2206 goto nla_put_failure; 2208 goto nla_put_failure;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5a6fab95569f..32e871a6c25a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -222,15 +222,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
222 222
223} 223}
224 224
225static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) 225static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
226 struct ip_tunnel_prl __user *a)
226{ 227{
227 struct ip_tunnel_prl *kp; 228 struct ip_tunnel_prl kprl, *kp;
228 struct ip_tunnel_prl_entry *prl; 229 struct ip_tunnel_prl_entry *prl;
229 unsigned int cmax, c = 0, ca, len; 230 unsigned int cmax, c = 0, ca, len;
230 int ret = 0; 231 int ret = 0;
231 232
232 cmax = a->datalen / sizeof(*a); 233 if (copy_from_user(&kprl, a, sizeof(kprl)))
233 if (cmax > 1 && a->addr != htonl(INADDR_ANY)) 234 return -EFAULT;
235 cmax = kprl.datalen / sizeof(kprl);
236 if (cmax > 1 && kprl.addr != htonl(INADDR_ANY))
234 cmax = 1; 237 cmax = 1;
235 238
236 /* For simple GET or for root users, 239 /* For simple GET or for root users,
@@ -261,26 +264,25 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
261 for (prl = t->prl; prl; prl = prl->next) { 264 for (prl = t->prl; prl; prl = prl->next) {
262 if (c > cmax) 265 if (c > cmax)
263 break; 266 break;
264 if (a->addr != htonl(INADDR_ANY) && prl->addr != a->addr) 267 if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
265 continue; 268 continue;
266 kp[c].addr = prl->addr; 269 kp[c].addr = prl->addr;
267 kp[c].flags = prl->flags; 270 kp[c].flags = prl->flags;
268 c++; 271 c++;
269 if (a->addr != htonl(INADDR_ANY)) 272 if (kprl.addr != htonl(INADDR_ANY))
270 break; 273 break;
271 } 274 }
272out: 275out:
273 read_unlock(&ipip6_lock); 276 read_unlock(&ipip6_lock);
274 277
275 len = sizeof(*kp) * c; 278 len = sizeof(*kp) * c;
276 ret = len ? copy_to_user(a->data, kp, len) : 0; 279 ret = 0;
280 if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen))
281 ret = -EFAULT;
277 282
278 kfree(kp); 283 kfree(kp);
279 if (ret)
280 return -EFAULT;
281 284
282 a->datalen = len; 285 return ret;
283 return 0;
284} 286}
285 287
286static int 288static int
@@ -403,9 +405,8 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
403 405
404static int ipip6_err(struct sk_buff *skb, u32 info) 406static int ipip6_err(struct sk_buff *skb, u32 info)
405{ 407{
406#ifndef I_WISH_WORLD_WERE_PERFECT
407 408
408/* It is not :-( All the routers (except for Linux) return only 409/* All the routers (except for Linux) return only
409 8 bytes of packet payload. It means, that precise relaying of 410 8 bytes of packet payload. It means, that precise relaying of
410 ICMP in the real Internet is absolutely infeasible. 411 ICMP in the real Internet is absolutely infeasible.
411 */ 412 */
@@ -462,92 +463,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
462out: 463out:
463 read_unlock(&ipip6_lock); 464 read_unlock(&ipip6_lock);
464 return err; 465 return err;
465#else
466 struct iphdr *iph = (struct iphdr*)dp;
467 int hlen = iph->ihl<<2;
468 struct ipv6hdr *iph6;
469 const int type = icmp_hdr(skb)->type;
470 const int code = icmp_hdr(skb)->code;
471 int rel_type = 0;
472 int rel_code = 0;
473 int rel_info = 0;
474 struct sk_buff *skb2;
475 struct rt6_info *rt6i;
476
477 if (len < hlen + sizeof(struct ipv6hdr))
478 return;
479 iph6 = (struct ipv6hdr*)(dp + hlen);
480
481 switch (type) {
482 default:
483 return;
484 case ICMP_PARAMETERPROB:
485 if (icmp_hdr(skb)->un.gateway < hlen)
486 return;
487
488 /* So... This guy found something strange INSIDE encapsulated
489 packet. Well, he is fool, but what can we do ?
490 */
491 rel_type = ICMPV6_PARAMPROB;
492 rel_info = icmp_hdr(skb)->un.gateway - hlen;
493 break;
494
495 case ICMP_DEST_UNREACH:
496 switch (code) {
497 case ICMP_SR_FAILED:
498 case ICMP_PORT_UNREACH:
499 /* Impossible event. */
500 return;
501 case ICMP_FRAG_NEEDED:
502 /* Too complicated case ... */
503 return;
504 default:
505 /* All others are translated to HOST_UNREACH.
506 rfc2003 contains "deep thoughts" about NET_UNREACH,
507 I believe, it is just ether pollution. --ANK
508 */
509 rel_type = ICMPV6_DEST_UNREACH;
510 rel_code = ICMPV6_ADDR_UNREACH;
511 break;
512 }
513 break;
514 case ICMP_TIME_EXCEEDED:
515 if (code != ICMP_EXC_TTL)
516 return;
517 rel_type = ICMPV6_TIME_EXCEED;
518 rel_code = ICMPV6_EXC_HOPLIMIT;
519 break;
520 }
521
522 /* Prepare fake skb to feed it to icmpv6_send */
523 skb2 = skb_clone(skb, GFP_ATOMIC);
524 if (skb2 == NULL)
525 return 0;
526 dst_release(skb2->dst);
527 skb2->dst = NULL;
528 skb_pull(skb2, skb->data - (u8*)iph6);
529 skb_reset_network_header(skb2);
530
531 /* Try to guess incoming interface */
532 rt6i = rt6_lookup(dev_net(skb->dev), &iph6->saddr, NULL, NULL, 0);
533 if (rt6i && rt6i->rt6i_dev) {
534 skb2->dev = rt6i->rt6i_dev;
535
536 rt6i = rt6_lookup(dev_net(skb->dev),
537 &iph6->daddr, &iph6->saddr, NULL, 0);
538
539 if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
540 struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev);
541 if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
542 rel_type = ICMPV6_DEST_UNREACH;
543 rel_code = ICMPV6_ADDR_UNREACH;
544 }
545 icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
546 }
547 }
548 kfree_skb(skb2);
549 return 0;
550#endif
551} 466}
552 467
553static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 468static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
@@ -960,11 +875,20 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
960 break; 875 break;
961 876
962 case SIOCGETPRL: 877 case SIOCGETPRL:
878 err = -EINVAL;
879 if (dev == sitn->fb_tunnel_dev)
880 goto done;
881 err = -ENOENT;
882 if (!(t = netdev_priv(dev)))
883 goto done;
884 err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
885 break;
886
963 case SIOCADDPRL: 887 case SIOCADDPRL:
964 case SIOCDELPRL: 888 case SIOCDELPRL:
965 case SIOCCHGPRL: 889 case SIOCCHGPRL:
966 err = -EPERM; 890 err = -EPERM;
967 if (cmd != SIOCGETPRL && !capable(CAP_NET_ADMIN)) 891 if (!capable(CAP_NET_ADMIN))
968 goto done; 892 goto done;
969 err = -EINVAL; 893 err = -EINVAL;
970 if (dev == sitn->fb_tunnel_dev) 894 if (dev == sitn->fb_tunnel_dev)
@@ -977,12 +901,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
977 goto done; 901 goto done;
978 902
979 switch (cmd) { 903 switch (cmd) {
980 case SIOCGETPRL:
981 err = ipip6_tunnel_get_prl(t, &prl);
982 if (!err && copy_to_user(ifr->ifr_ifru.ifru_data,
983 &prl, sizeof(prl)))
984 err = -EFAULT;
985 break;
986 case SIOCDELPRL: 904 case SIOCDELPRL:
987 err = ipip6_tunnel_del_prl(t, &prl); 905 err = ipip6_tunnel_del_prl(t, &prl);
988 break; 906 break;
@@ -991,8 +909,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
991 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); 909 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
992 break; 910 break;
993 } 911 }
994 if (cmd != SIOCGETPRL) 912 netdev_state_change(dev);
995 netdev_state_change(dev);
996 break; 913 break;
997 914
998 default: 915 default:
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 938ce4ecde55..3ecc1157994e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -198,7 +198,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
198 ireq = inet_rsk(req); 198 ireq = inet_rsk(req);
199 ireq6 = inet6_rsk(req); 199 ireq6 = inet6_rsk(req);
200 treq = tcp_rsk(req); 200 treq = tcp_rsk(req);
201 ireq6->pktopts = NULL;
202 201
203 if (security_inet_conn_request(sk, skb, req)) { 202 if (security_inet_conn_request(sk, skb, req)) {
204 reqsk_free(req); 203 reqsk_free(req);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 715965f0fac0..cb46749d4c32 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1299,7 +1299,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1299 treq = inet6_rsk(req); 1299 treq = inet6_rsk(req);
1300 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); 1300 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1301 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); 1301 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1302 treq->pktopts = NULL;
1303 if (!want_cookie) 1302 if (!want_cookie)
1304 TCP_ECN_create_request(req, tcp_hdr(skb)); 1303 TCP_ECN_create_request(req, tcp_hdr(skb));
1305 1304
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 6323921b40be..669f280989c3 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -109,7 +109,7 @@ static int tunnel46_rcv(struct sk_buff *skb)
109{ 109{
110 struct xfrm6_tunnel *handler; 110 struct xfrm6_tunnel *handler;
111 111
112 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 112 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
113 goto drop; 113 goto drop;
114 114
115 for (handler = tunnel46_handlers; handler; handler = handler->next) 115 for (handler = tunnel46_handlers; handler; handler = handler->next)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1fd784f3e2ec..dd309626ae9a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -534,7 +534,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
534{ 534{
535 struct udp_sock *up = udp_sk(sk); 535 struct udp_sock *up = udp_sk(sk);
536 536
537 if (up->pending) { 537 if (up->pending == AF_INET)
538 udp_flush_pending_frames(sk);
539 else if (up->pending) {
538 up->len = 0; 540 up->len = 0;
539 up->pending = 0; 541 up->pending = 0;
540 ip6_flush_pending_frames(sk); 542 ip6_flush_pending_frames(sk);
@@ -731,7 +733,7 @@ do_udp_sendmsg:
731 memset(opt, 0, sizeof(struct ipv6_txoptions)); 733 memset(opt, 0, sizeof(struct ipv6_txoptions));
732 opt->tot_len = sizeof(*opt); 734 opt->tot_len = sizeof(*opt);
733 735
734 err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); 736 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
735 if (err < 0) { 737 if (err < 0) {
736 fl6_sock_release(flowlabel); 738 fl6_sock_release(flowlabel);
737 return err; 739 return err;
@@ -848,12 +850,14 @@ do_append_data:
848 } else { 850 } else {
849 dst_release(dst); 851 dst_release(dst);
850 } 852 }
853 dst = NULL;
851 } 854 }
852 855
853 if (err > 0) 856 if (err > 0)
854 err = np->recverr ? net_xmit_errno(err) : 0; 857 err = np->recverr ? net_xmit_errno(err) : 0;
855 release_sock(sk); 858 release_sock(sk);
856out: 859out:
860 dst_release(dst);
857 fl6_sock_release(flowlabel); 861 fl6_sock_release(flowlabel);
858 if (!err) 862 if (!err)
859 return len; 863 return len;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ae54b20d0470..3eb5bcc75f99 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1093,11 +1093,6 @@ static int irda_create(struct net *net, struct socket *sock, int protocol)
1093 1093
1094 init_waitqueue_head(&self->query_wait); 1094 init_waitqueue_head(&self->query_wait);
1095 1095
1096 /* Initialise networking socket struct */
1097 sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */
1098 sk->sk_family = PF_IRDA;
1099 sk->sk_protocol = protocol;
1100
1101 switch (sock->type) { 1096 switch (sock->type) {
1102 case SOCK_STREAM: 1097 case SOCK_STREAM:
1103 sock->ops = &irda_stream_ops; 1098 sock->ops = &irda_stream_ops;
@@ -1124,13 +1119,20 @@ static int irda_create(struct net *net, struct socket *sock, int protocol)
1124 self->max_sdu_size_rx = TTP_SAR_UNBOUND; 1119 self->max_sdu_size_rx = TTP_SAR_UNBOUND;
1125 break; 1120 break;
1126 default: 1121 default:
1122 sk_free(sk);
1127 return -ESOCKTNOSUPPORT; 1123 return -ESOCKTNOSUPPORT;
1128 } 1124 }
1129 break; 1125 break;
1130 default: 1126 default:
1127 sk_free(sk);
1131 return -ESOCKTNOSUPPORT; 1128 return -ESOCKTNOSUPPORT;
1132 } 1129 }
1133 1130
1131 /* Initialise networking socket struct */
1132 sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */
1133 sk->sk_family = PF_IRDA;
1134 sk->sk_protocol = protocol;
1135
1134 /* Register as a client with IrLMP */ 1136 /* Register as a client with IrLMP */
1135 self->ckey = irlmp_register_client(0, NULL, NULL, NULL); 1137 self->ckey = irlmp_register_client(0, NULL, NULL, NULL);
1136 self->mask.word = 0xffff; 1138 self->mask.word = 0xffff;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9e7236ff6bcc..7470e367272b 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1251,7 +1251,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
1251 x->sel.prefixlen_s = addr->sadb_address_prefixlen; 1251 x->sel.prefixlen_s = addr->sadb_address_prefixlen;
1252 } 1252 }
1253 1253
1254 if (x->props.mode == XFRM_MODE_TRANSPORT) 1254 if (!x->sel.family)
1255 x->sel.family = x->props.family; 1255 x->sel.family = x->props.family;
1256 1256
1257 if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { 1257 if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
@@ -3030,6 +3030,9 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c)
3030 3030
3031static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) 3031static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
3032{ 3032{
3033 if (atomic_read(&pfkey_socks_nr) == 0)
3034 return 0;
3035
3033 switch (c->event) { 3036 switch (c->event) {
3034 case XFRM_MSG_EXPIRE: 3037 case XFRM_MSG_EXPIRE:
3035 return key_notify_sa_expire(x, c); 3038 return key_notify_sa_expire(x, c);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index e2ddde755019..008de1fc42ca 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -286,12 +286,14 @@ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb,
286 * 286 *
287 * Sends received pdus to the sap state machine. 287 * Sends received pdus to the sap state machine.
288 */ 288 */
289static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb) 289static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
290 struct sock *sk)
290{ 291{
291 struct llc_sap_state_ev *ev = llc_sap_ev(skb); 292 struct llc_sap_state_ev *ev = llc_sap_ev(skb);
292 293
293 ev->type = LLC_SAP_EV_TYPE_PDU; 294 ev->type = LLC_SAP_EV_TYPE_PDU;
294 ev->reason = 0; 295 ev->reason = 0;
296 skb->sk = sk;
295 llc_sap_state_process(sap, skb); 297 llc_sap_state_process(sap, skb);
296} 298}
297 299
@@ -360,8 +362,7 @@ static void llc_sap_mcast(struct llc_sap *sap,
360 break; 362 break;
361 363
362 sock_hold(sk); 364 sock_hold(sk);
363 skb_set_owner_r(skb1, sk); 365 llc_sap_rcv(sap, skb1, sk);
364 llc_sap_rcv(sap, skb1);
365 sock_put(sk); 366 sock_put(sk);
366 } 367 }
367 read_unlock_bh(&sap->sk_list.lock); 368 read_unlock_bh(&sap->sk_list.lock);
@@ -381,8 +382,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
381 } else { 382 } else {
382 struct sock *sk = llc_lookup_dgram(sap, &laddr); 383 struct sock *sk = llc_lookup_dgram(sap, &laddr);
383 if (sk) { 384 if (sk) {
384 skb_set_owner_r(skb, sk); 385 llc_sap_rcv(sap, skb, sk);
385 llc_sap_rcv(sap, skb);
386 sock_put(sk); 386 sock_put(sk);
387 } else 387 } else
388 kfree_skb(skb); 388 kfree_skb(skb);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 699d97b8de5e..a9fce4afdf21 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -672,7 +672,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
672 if (params->vlan) { 672 if (params->vlan) {
673 sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); 673 sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
674 674
675 if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN || 675 if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN &&
676 sdata->vif.type != IEEE80211_IF_TYPE_AP) 676 sdata->vif.type != IEEE80211_IF_TYPE_AP)
677 return -EINVAL; 677 return -EINVAL;
678 } else 678 } else
@@ -760,7 +760,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
760 if (params->vlan && params->vlan != sta->sdata->dev) { 760 if (params->vlan && params->vlan != sta->sdata->dev) {
761 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); 761 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
762 762
763 if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN || 763 if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN &&
764 vlansdata->vif.type != IEEE80211_IF_TYPE_AP) { 764 vlansdata->vif.type != IEEE80211_IF_TYPE_AP) {
765 rcu_read_unlock(); 765 rcu_read_unlock();
766 return -EINVAL; 766 return -EINVAL;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c7314bf4bec2..006486b26726 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -899,7 +899,7 @@ extern const struct iw_handler_def ieee80211_iw_handler_def;
899 899
900 900
901/* ieee80211_ioctl.c */ 901/* ieee80211_ioctl.c */
902int ieee80211_set_freq(struct ieee80211_local *local, int freq); 902int ieee80211_set_freq(struct net_device *dev, int freq);
903/* ieee80211_sta.c */ 903/* ieee80211_sta.c */
904void ieee80211_sta_timer(unsigned long data); 904void ieee80211_sta_timer(unsigned long data);
905void ieee80211_sta_work(struct work_struct *work); 905void ieee80211_sta_work(struct work_struct *work);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 915afadb0602..98c0b5e56ecc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -511,6 +511,7 @@ static int ieee80211_stop(struct net_device *dev)
511 case IEEE80211_IF_TYPE_STA: 511 case IEEE80211_IF_TYPE_STA:
512 case IEEE80211_IF_TYPE_IBSS: 512 case IEEE80211_IF_TYPE_IBSS:
513 sdata->u.sta.state = IEEE80211_DISABLED; 513 sdata->u.sta.state = IEEE80211_DISABLED;
514 memset(sdata->u.sta.bssid, 0, ETH_ALEN);
514 del_timer_sync(&sdata->u.sta.timer); 515 del_timer_sync(&sdata->u.sta.timer);
515 /* 516 /*
516 * When we get here, the interface is marked down. 517 * When we get here, the interface is marked down.
@@ -1313,7 +1314,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
1313 /* 1314 /*
1314 * Clear the TX filter mask for this STA when sending the next 1315 * Clear the TX filter mask for this STA when sending the next
1315 * packet. If the STA went to power save mode, this will happen 1316 * packet. If the STA went to power save mode, this will happen
1316 * happen when it wakes up for the next time. 1317 * when it wakes up for the next time.
1317 */ 1318 */
1318 sta->flags |= WLAN_STA_CLEAR_PS_FILT; 1319 sta->flags |= WLAN_STA_CLEAR_PS_FILT;
1319 1320
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4adba09e80ca..4d2b582dd055 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -44,7 +44,7 @@
44#define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) 44#define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ)
45#define IEEE80211_SCAN_INTERVAL (2 * HZ) 45#define IEEE80211_SCAN_INTERVAL (2 * HZ)
46#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) 46#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ)
47#define IEEE80211_IBSS_JOIN_TIMEOUT (20 * HZ) 47#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
48 48
49#define IEEE80211_PROBE_DELAY (HZ / 33) 49#define IEEE80211_PROBE_DELAY (HZ / 33)
50#define IEEE80211_CHANNEL_TIME (HZ / 33) 50#define IEEE80211_CHANNEL_TIME (HZ / 33)
@@ -730,7 +730,17 @@ static void ieee80211_send_assoc(struct net_device *dev,
730 if (bss->wmm_ie) { 730 if (bss->wmm_ie) {
731 wmm = 1; 731 wmm = 1;
732 } 732 }
733
734 /* get all rates supported by the device and the AP as
735 * some APs don't like getting a superset of their rates
736 * in the association request (e.g. D-Link DAP 1353 in
737 * b-only mode) */
738 rates_len = ieee80211_compatible_rates(bss, sband, &rates);
739
733 ieee80211_rx_bss_put(dev, bss); 740 ieee80211_rx_bss_put(dev, bss);
741 } else {
742 rates = ~0;
743 rates_len = sband->n_bitrates;
734 } 744 }
735 745
736 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); 746 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
@@ -761,10 +771,7 @@ static void ieee80211_send_assoc(struct net_device *dev,
761 *pos++ = ifsta->ssid_len; 771 *pos++ = ifsta->ssid_len;
762 memcpy(pos, ifsta->ssid, ifsta->ssid_len); 772 memcpy(pos, ifsta->ssid, ifsta->ssid_len);
763 773
764 /* all supported rates should be added here but some APs 774 /* add all rates which were marked to be used above */
765 * (e.g. D-Link DAP 1353 in b-only mode) don't like that
766 * Therefore only add rates the AP supports */
767 rates_len = ieee80211_compatible_rates(bss, sband, &rates);
768 supp_rates_len = rates_len; 775 supp_rates_len = rates_len;
769 if (supp_rates_len > 8) 776 if (supp_rates_len > 8)
770 supp_rates_len = 8; 777 supp_rates_len = 8;
@@ -1318,7 +1325,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
1318 1325
1319 /* prepare reordering buffer */ 1326 /* prepare reordering buffer */
1320 tid_agg_rx->reorder_buf = 1327 tid_agg_rx->reorder_buf =
1321 kmalloc(buf_size * sizeof(struct sk_buf *), GFP_ATOMIC); 1328 kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC);
1322 if (!tid_agg_rx->reorder_buf) { 1329 if (!tid_agg_rx->reorder_buf) {
1323 if (net_ratelimit()) 1330 if (net_ratelimit())
1324 printk(KERN_ERR "can not allocate reordering buffer " 1331 printk(KERN_ERR "can not allocate reordering buffer "
@@ -1327,7 +1334,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev,
1327 goto end; 1334 goto end;
1328 } 1335 }
1329 memset(tid_agg_rx->reorder_buf, 0, 1336 memset(tid_agg_rx->reorder_buf, 0,
1330 buf_size * sizeof(struct sk_buf *)); 1337 buf_size * sizeof(struct sk_buff *));
1331 1338
1332 if (local->ops->ampdu_action) 1339 if (local->ops->ampdu_action)
1333 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, 1340 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
@@ -1607,7 +1614,7 @@ void sta_addba_resp_timer_expired(unsigned long data)
1607 * only one argument, and both sta_info and TID are needed, so init 1614 * only one argument, and both sta_info and TID are needed, so init
1608 * flow in sta_info_create gives the TID as data, while the timer_to_id 1615 * flow in sta_info_create gives the TID as data, while the timer_to_id
1609 * array gives the sta through container_of */ 1616 * array gives the sta through container_of */
1610 u16 tid = *(int *)data; 1617 u16 tid = *(u8 *)data;
1611 struct sta_info *temp_sta = container_of((void *)data, 1618 struct sta_info *temp_sta = container_of((void *)data,
1612 struct sta_info, timer_to_tid[tid]); 1619 struct sta_info, timer_to_tid[tid]);
1613 1620
@@ -1655,7 +1662,7 @@ timer_expired_exit:
1655void sta_rx_agg_session_timer_expired(unsigned long data) 1662void sta_rx_agg_session_timer_expired(unsigned long data)
1656{ 1663{
1657 /* not an elegant detour, but there is no choice as the timer passes 1664 /* not an elegant detour, but there is no choice as the timer passes
1658 * only one argument, and verious sta_info are needed here, so init 1665 * only one argument, and various sta_info are needed here, so init
1659 * flow in sta_info_create gives the TID as data, while the timer_to_id 1666 * flow in sta_info_create gives the TID as data, while the timer_to_id
1660 * array gives the sta through container_of */ 1667 * array gives the sta through container_of */
1661 u8 *ptid = (u8 *)data; 1668 u8 *ptid = (u8 *)data;
@@ -2329,6 +2336,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2329 u8 *pos; 2336 u8 *pos;
2330 struct ieee80211_sub_if_data *sdata; 2337 struct ieee80211_sub_if_data *sdata;
2331 struct ieee80211_supported_band *sband; 2338 struct ieee80211_supported_band *sband;
2339 union iwreq_data wrqu;
2332 2340
2333 sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; 2341 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
2334 2342
@@ -2351,13 +2359,10 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2351 sdata->drop_unencrypted = bss->capability & 2359 sdata->drop_unencrypted = bss->capability &
2352 WLAN_CAPABILITY_PRIVACY ? 1 : 0; 2360 WLAN_CAPABILITY_PRIVACY ? 1 : 0;
2353 2361
2354 res = ieee80211_set_freq(local, bss->freq); 2362 res = ieee80211_set_freq(dev, bss->freq);
2355 2363
2356 if (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) { 2364 if (res)
2357 printk(KERN_DEBUG "%s: IBSS not allowed on frequency " 2365 return res;
2358 "%d MHz\n", dev->name, local->oper_channel->center_freq);
2359 return -1;
2360 }
2361 2366
2362 /* Set beacon template */ 2367 /* Set beacon template */
2363 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); 2368 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
@@ -2472,7 +2477,9 @@ static int ieee80211_sta_join_ibss(struct net_device *dev,
2472 ifsta->state = IEEE80211_IBSS_JOINED; 2477 ifsta->state = IEEE80211_IBSS_JOINED;
2473 mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); 2478 mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
2474 2479
2475 ieee80211_rx_bss_put(dev, bss); 2480 memset(&wrqu, 0, sizeof(wrqu));
2481 memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN);
2482 wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
2476 2483
2477 return res; 2484 return res;
2478} 2485}
@@ -3446,21 +3453,17 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
3446 struct ieee80211_sta_bss *bss, *selected = NULL; 3453 struct ieee80211_sta_bss *bss, *selected = NULL;
3447 int top_rssi = 0, freq; 3454 int top_rssi = 0, freq;
3448 3455
3449 if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
3450 IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) {
3451 ifsta->state = IEEE80211_AUTHENTICATE;
3452 ieee80211_sta_reset_auth(dev, ifsta);
3453 return 0;
3454 }
3455
3456 spin_lock_bh(&local->sta_bss_lock); 3456 spin_lock_bh(&local->sta_bss_lock);
3457 freq = local->oper_channel->center_freq; 3457 freq = local->oper_channel->center_freq;
3458 list_for_each_entry(bss, &local->sta_bss_list, list) { 3458 list_for_each_entry(bss, &local->sta_bss_list, list) {
3459 if (!(bss->capability & WLAN_CAPABILITY_ESS)) 3459 if (!(bss->capability & WLAN_CAPABILITY_ESS))
3460 continue; 3460 continue;
3461 3461
3462 if (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ 3462 if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
3463 !!sdata->default_key) 3463 IEEE80211_STA_AUTO_BSSID_SEL |
3464 IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
3465 (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
3466 !!sdata->default_key))
3464 continue; 3467 continue;
3465 3468
3466 if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && 3469 if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
@@ -3485,7 +3488,7 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
3485 spin_unlock_bh(&local->sta_bss_lock); 3488 spin_unlock_bh(&local->sta_bss_lock);
3486 3489
3487 if (selected) { 3490 if (selected) {
3488 ieee80211_set_freq(local, selected->freq); 3491 ieee80211_set_freq(dev, selected->freq);
3489 if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) 3492 if (!(ifsta->flags & IEEE80211_STA_SSID_SET))
3490 ieee80211_sta_set_ssid(dev, selected->ssid, 3493 ieee80211_sta_set_ssid(dev, selected->ssid,
3491 selected->ssid_len); 3494 selected->ssid_len);
@@ -3520,6 +3523,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
3520 struct ieee80211_supported_band *sband; 3523 struct ieee80211_supported_band *sband;
3521 u8 bssid[ETH_ALEN], *pos; 3524 u8 bssid[ETH_ALEN], *pos;
3522 int i; 3525 int i;
3526 int ret;
3523 DECLARE_MAC_BUF(mac); 3527 DECLARE_MAC_BUF(mac);
3524 3528
3525#if 0 3529#if 0
@@ -3564,7 +3568,9 @@ static int ieee80211_sta_create_ibss(struct net_device *dev,
3564 *pos++ = (u8) (rate / 5); 3568 *pos++ = (u8) (rate / 5);
3565 } 3569 }
3566 3570
3567 return ieee80211_sta_join_ibss(dev, ifsta, bss); 3571 ret = ieee80211_sta_join_ibss(dev, ifsta, bss);
3572 ieee80211_rx_bss_put(dev, bss);
3573 return ret;
3568} 3574}
3569 3575
3570 3576
@@ -3612,10 +3618,13 @@ static int ieee80211_sta_find_ibss(struct net_device *dev,
3612 (bss = ieee80211_rx_bss_get(dev, bssid, 3618 (bss = ieee80211_rx_bss_get(dev, bssid,
3613 local->hw.conf.channel->center_freq, 3619 local->hw.conf.channel->center_freq,
3614 ifsta->ssid, ifsta->ssid_len))) { 3620 ifsta->ssid, ifsta->ssid_len))) {
3621 int ret;
3615 printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" 3622 printk(KERN_DEBUG "%s: Selected IBSS BSSID %s"
3616 " based on configured SSID\n", 3623 " based on configured SSID\n",
3617 dev->name, print_mac(mac, bssid)); 3624 dev->name, print_mac(mac, bssid));
3618 return ieee80211_sta_join_ibss(dev, ifsta, bss); 3625 ret = ieee80211_sta_join_ibss(dev, ifsta, bss);
3626 ieee80211_rx_bss_put(dev, bss);
3627 return ret;
3619 } 3628 }
3620#ifdef CONFIG_MAC80211_IBSS_DEBUG 3629#ifdef CONFIG_MAC80211_IBSS_DEBUG
3621 printk(KERN_DEBUG " did not try to join ibss\n"); 3630 printk(KERN_DEBUG " did not try to join ibss\n");
@@ -4092,18 +4101,17 @@ ieee80211_sta_scan_result(struct net_device *dev,
4092 4101
4093 memset(&iwe, 0, sizeof(iwe)); 4102 memset(&iwe, 0, sizeof(iwe));
4094 iwe.cmd = SIOCGIWFREQ; 4103 iwe.cmd = SIOCGIWFREQ;
4095 iwe.u.freq.m = bss->freq; 4104 iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq);
4096 iwe.u.freq.e = 6; 4105 iwe.u.freq.e = 0;
4097 current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, 4106 current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
4098 IW_EV_FREQ_LEN); 4107 IW_EV_FREQ_LEN);
4099 4108
4100 memset(&iwe, 0, sizeof(iwe)); 4109 memset(&iwe, 0, sizeof(iwe));
4101 iwe.cmd = SIOCGIWFREQ; 4110 iwe.cmd = SIOCGIWFREQ;
4102 iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); 4111 iwe.u.freq.m = bss->freq;
4103 iwe.u.freq.e = 0; 4112 iwe.u.freq.e = 6;
4104 current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, 4113 current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe,
4105 IW_EV_FREQ_LEN); 4114 IW_EV_FREQ_LEN);
4106
4107 memset(&iwe, 0, sizeof(iwe)); 4115 memset(&iwe, 0, sizeof(iwe));
4108 iwe.cmd = IWEVQUAL; 4116 iwe.cmd = IWEVQUAL;
4109 iwe.u.qual.qual = bss->signal; 4117 iwe.u.qual.qual = bss->signal;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1958bfb361c6..0941e5d6a522 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1091,7 +1091,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
1091 u16 fc, hdrlen, ethertype; 1091 u16 fc, hdrlen, ethertype;
1092 u8 *payload; 1092 u8 *payload;
1093 u8 dst[ETH_ALEN]; 1093 u8 dst[ETH_ALEN];
1094 u8 src[ETH_ALEN]; 1094 u8 src[ETH_ALEN] __aligned(2);
1095 struct sk_buff *skb = rx->skb; 1095 struct sk_buff *skb = rx->skb;
1096 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1096 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1097 DECLARE_MAC_BUF(mac); 1097 DECLARE_MAC_BUF(mac);
@@ -1234,7 +1234,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
1234 */ 1234 */
1235static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx) 1235static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx)
1236{ 1236{
1237 static const u8 pae_group_addr[ETH_ALEN] 1237 static const u8 pae_group_addr[ETH_ALEN] __aligned(2)
1238 = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 }; 1238 = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 };
1239 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; 1239 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
1240 1240
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1d7dd54aacef..c80d5899f279 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1132,7 +1132,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
1132 ieee80211_tx_handler *handler; 1132 ieee80211_tx_handler *handler;
1133 struct ieee80211_tx_data tx; 1133 struct ieee80211_tx_data tx;
1134 ieee80211_tx_result res = TX_DROP, res_prepare; 1134 ieee80211_tx_result res = TX_DROP, res_prepare;
1135 int ret, i; 1135 int ret, i, retries = 0;
1136 1136
1137 WARN_ON(__ieee80211_queue_pending(local, control->queue)); 1137 WARN_ON(__ieee80211_queue_pending(local, control->queue));
1138 1138
@@ -1216,6 +1216,13 @@ retry:
1216 if (!__ieee80211_queue_stopped(local, control->queue)) { 1216 if (!__ieee80211_queue_stopped(local, control->queue)) {
1217 clear_bit(IEEE80211_LINK_STATE_PENDING, 1217 clear_bit(IEEE80211_LINK_STATE_PENDING,
1218 &local->state[control->queue]); 1218 &local->state[control->queue]);
1219 retries++;
1220 /*
1221 * Driver bug, it's rejecting packets but
1222 * not stopping queues.
1223 */
1224 if (WARN_ON_ONCE(retries > 5))
1225 goto drop;
1219 goto retry; 1226 goto retry;
1220 } 1227 }
1221 memcpy(&store->control, control, 1228 memcpy(&store->control, control,
@@ -1562,13 +1569,13 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1562 * be cloned. This could happen, e.g., with Linux bridge code passing 1569 * be cloned. This could happen, e.g., with Linux bridge code passing
1563 * us broadcast frames. */ 1570 * us broadcast frames. */
1564 1571
1565 if (head_need > 0 || skb_header_cloned(skb)) { 1572 if (head_need > 0 || skb_cloned(skb)) {
1566#if 0 1573#if 0
1567 printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " 1574 printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes "
1568 "of headroom\n", dev->name, head_need); 1575 "of headroom\n", dev->name, head_need);
1569#endif 1576#endif
1570 1577
1571 if (skb_header_cloned(skb)) 1578 if (skb_cloned(skb))
1572 I802_DEBUG_INC(local->tx_expand_skb_head_cloned); 1579 I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
1573 else 1580 else
1574 I802_DEBUG_INC(local->tx_expand_skb_head); 1581 I802_DEBUG_INC(local->tx_expand_skb_head);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 24a465c4df09..4e97b266f907 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -34,11 +34,11 @@ void *mac80211_wiphy_privid = &mac80211_wiphy_privid;
34 34
35/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ 35/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
36/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ 36/* Ethernet-II snap header (RFC1042 for most EtherTypes) */
37const unsigned char rfc1042_header[] = 37const unsigned char rfc1042_header[] __aligned(2) =
38 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; 38 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 };
39 39
40/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ 40/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */
41const unsigned char bridge_tunnel_header[] = 41const unsigned char bridge_tunnel_header[] __aligned(2) =
42 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 42 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
43 43
44 44
@@ -389,6 +389,41 @@ void ieee80211_iterate_active_interfaces(
389 struct ieee80211_local *local = hw_to_local(hw); 389 struct ieee80211_local *local = hw_to_local(hw);
390 struct ieee80211_sub_if_data *sdata; 390 struct ieee80211_sub_if_data *sdata;
391 391
392 rtnl_lock();
393
394 list_for_each_entry(sdata, &local->interfaces, list) {
395 switch (sdata->vif.type) {
396 case IEEE80211_IF_TYPE_INVALID:
397 case IEEE80211_IF_TYPE_MNTR:
398 case IEEE80211_IF_TYPE_VLAN:
399 continue;
400 case IEEE80211_IF_TYPE_AP:
401 case IEEE80211_IF_TYPE_STA:
402 case IEEE80211_IF_TYPE_IBSS:
403 case IEEE80211_IF_TYPE_WDS:
404 case IEEE80211_IF_TYPE_MESH_POINT:
405 break;
406 }
407 if (sdata->dev == local->mdev)
408 continue;
409 if (netif_running(sdata->dev))
410 iterator(data, sdata->dev->dev_addr,
411 &sdata->vif);
412 }
413
414 rtnl_unlock();
415}
416EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
417
418void ieee80211_iterate_active_interfaces_atomic(
419 struct ieee80211_hw *hw,
420 void (*iterator)(void *data, u8 *mac,
421 struct ieee80211_vif *vif),
422 void *data)
423{
424 struct ieee80211_local *local = hw_to_local(hw);
425 struct ieee80211_sub_if_data *sdata;
426
392 rcu_read_lock(); 427 rcu_read_lock();
393 428
394 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 429 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -413,4 +448,4 @@ void ieee80211_iterate_active_interfaces(
413 448
414 rcu_read_unlock(); 449 rcu_read_unlock();
415} 450}
416EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); 451EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 76e1de1dc735..6106cb79060c 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -209,7 +209,6 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev,
209 range->num_frequency = c; 209 range->num_frequency = c;
210 210
211 IW_EVENT_CAPA_SET_KERNEL(range->event_capa); 211 IW_EVENT_CAPA_SET_KERNEL(range->event_capa);
212 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY);
213 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); 212 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
214 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); 213 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
215 214
@@ -291,14 +290,22 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev,
291 return 0; 290 return 0;
292} 291}
293 292
294int ieee80211_set_freq(struct ieee80211_local *local, int freqMHz) 293int ieee80211_set_freq(struct net_device *dev, int freqMHz)
295{ 294{
296 int ret = -EINVAL; 295 int ret = -EINVAL;
297 struct ieee80211_channel *chan; 296 struct ieee80211_channel *chan;
297 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
298 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
298 299
299 chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); 300 chan = ieee80211_get_channel(local->hw.wiphy, freqMHz);
300 301
301 if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { 302 if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) {
303 if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS &&
304 chan->flags & IEEE80211_CHAN_NO_IBSS) {
305 printk(KERN_DEBUG "%s: IBSS not allowed on frequency "
306 "%d MHz\n", dev->name, chan->center_freq);
307 return ret;
308 }
302 local->oper_channel = chan; 309 local->oper_channel = chan;
303 310
304 if (local->sta_sw_scanning || local->sta_hw_scanning) 311 if (local->sta_sw_scanning || local->sta_hw_scanning)
@@ -316,7 +323,6 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
316 struct iw_request_info *info, 323 struct iw_request_info *info,
317 struct iw_freq *freq, char *extra) 324 struct iw_freq *freq, char *extra)
318{ 325{
319 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
320 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 326 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
321 327
322 if (sdata->vif.type == IEEE80211_IF_TYPE_STA) 328 if (sdata->vif.type == IEEE80211_IF_TYPE_STA)
@@ -330,14 +336,14 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
330 IEEE80211_STA_AUTO_CHANNEL_SEL; 336 IEEE80211_STA_AUTO_CHANNEL_SEL;
331 return 0; 337 return 0;
332 } else 338 } else
333 return ieee80211_set_freq(local, 339 return ieee80211_set_freq(dev,
334 ieee80211_channel_to_frequency(freq->m)); 340 ieee80211_channel_to_frequency(freq->m));
335 } else { 341 } else {
336 int i, div = 1000000; 342 int i, div = 1000000;
337 for (i = 0; i < freq->e; i++) 343 for (i = 0; i < freq->e; i++)
338 div /= 10; 344 div /= 10;
339 if (div > 0) 345 if (div > 0)
340 return ieee80211_set_freq(local, freq->m / div); 346 return ieee80211_set_freq(dev, freq->m / div);
341 else 347 else
342 return -EINVAL; 348 return -EINVAL;
343 } 349 }
@@ -490,9 +496,15 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
490 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 496 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
491 if (sdata->vif.type == IEEE80211_IF_TYPE_STA || 497 if (sdata->vif.type == IEEE80211_IF_TYPE_STA ||
492 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { 498 sdata->vif.type == IEEE80211_IF_TYPE_IBSS) {
493 ap_addr->sa_family = ARPHRD_ETHER; 499 if (sdata->u.sta.state == IEEE80211_ASSOCIATED ||
494 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); 500 sdata->u.sta.state == IEEE80211_IBSS_JOINED) {
495 return 0; 501 ap_addr->sa_family = ARPHRD_ETHER;
502 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN);
503 return 0;
504 } else {
505 memset(&ap_addr->sa_data, 0, ETH_ALEN);
506 return 0;
507 }
496 } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { 508 } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) {
497 ap_addr->sa_family = ARPHRD_ETHER; 509 ap_addr->sa_family = ARPHRD_ETHER;
498 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); 510 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index dc1598b86004..635b996c8c35 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -673,7 +673,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
673#ifdef CONFIG_MAC80211_HT_DEBUG 673#ifdef CONFIG_MAC80211_HT_DEBUG
674 if (net_ratelimit()) 674 if (net_ratelimit())
675 printk(KERN_DEBUG "allocated aggregation queue" 675 printk(KERN_DEBUG "allocated aggregation queue"
676 " %d tid %d addr %s pool=0x%lX", 676 " %d tid %d addr %s pool=0x%lX\n",
677 i, tid, print_mac(mac, sta->addr), 677 i, tid, print_mac(mac, sta->addr),
678 q->qdisc_pool[0]); 678 q->qdisc_pool[0]);
679#endif /* CONFIG_MAC80211_HT_DEBUG */ 679#endif /* CONFIG_MAC80211_HT_DEBUG */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c4b1799da5d7..662c1ccfee26 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -196,8 +196,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
196 if (l4proto && l4proto->destroy) 196 if (l4proto && l4proto->destroy)
197 l4proto->destroy(ct); 197 l4proto->destroy(ct);
198 198
199 nf_ct_ext_destroy(ct);
200
201 rcu_read_unlock(); 199 rcu_read_unlock();
202 200
203 spin_lock_bh(&nf_conntrack_lock); 201 spin_lock_bh(&nf_conntrack_lock);
@@ -520,6 +518,7 @@ static void nf_conntrack_free_rcu(struct rcu_head *head)
520 518
521void nf_conntrack_free(struct nf_conn *ct) 519void nf_conntrack_free(struct nf_conn *ct)
522{ 520{
521 nf_ct_ext_destroy(ct);
523 call_rcu(&ct->rcu, nf_conntrack_free_rcu); 522 call_rcu(&ct->rcu, nf_conntrack_free_rcu);
524} 523}
525EXPORT_SYMBOL_GPL(nf_conntrack_free); 524EXPORT_SYMBOL_GPL(nf_conntrack_free);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index e31beeb33b2b..e8f0dead267f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -587,10 +587,10 @@ int __init nf_conntrack_expect_init(void)
587 return 0; 587 return 0;
588 588
589err3: 589err3:
590 kmem_cache_destroy(nf_ct_expect_cachep);
591err2:
590 nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, 592 nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
591 nf_ct_expect_hsize); 593 nf_ct_expect_hsize);
592err2:
593 kmem_cache_destroy(nf_ct_expect_cachep);
594err1: 594err1:
595 return err; 595 return err;
596} 596}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index bcc19fa4ed1e..8a3f8b34e466 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -59,12 +59,19 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
59 if (!*ext) 59 if (!*ext)
60 return NULL; 60 return NULL;
61 61
62 INIT_RCU_HEAD(&(*ext)->rcu);
62 (*ext)->offset[id] = off; 63 (*ext)->offset[id] = off;
63 (*ext)->len = len; 64 (*ext)->len = len;
64 65
65 return (void *)(*ext) + off; 66 return (void *)(*ext) + off;
66} 67}
67 68
69static void __nf_ct_ext_free_rcu(struct rcu_head *head)
70{
71 struct nf_ct_ext *ext = container_of(head, struct nf_ct_ext, rcu);
72 kfree(ext);
73}
74
68void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) 75void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
69{ 76{
70 struct nf_ct_ext *new; 77 struct nf_ct_ext *new;
@@ -106,7 +113,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
106 (void *)ct->ext + ct->ext->offset[i]); 113 (void *)ct->ext + ct->ext->offset[i]);
107 rcu_read_unlock(); 114 rcu_read_unlock();
108 } 115 }
109 kfree(ct->ext); 116 call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu);
110 ct->ext = new; 117 ct->ext = new;
111 } 118 }
112 119
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 95da1a24aab7..2f83c158934d 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -619,6 +619,7 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = {
619static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { 619static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = {
620 .name = "H.245", 620 .name = "H.245",
621 .me = THIS_MODULE, 621 .me = THIS_MODULE,
622 .tuple.src.l3num = AF_UNSPEC,
622 .tuple.dst.protonum = IPPROTO_UDP, 623 .tuple.dst.protonum = IPPROTO_UDP,
623 .help = h245_help, 624 .help = h245_help,
624 .expect_policy = &h245_exp_policy, 625 .expect_policy = &h245_exp_policy,
@@ -1765,6 +1766,7 @@ static void __exit nf_conntrack_h323_fini(void)
1765 nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); 1766 nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
1766 nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); 1767 nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
1767 nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); 1768 nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
1769 nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
1768 kfree(h323_buffer); 1770 kfree(h323_buffer);
1769 pr_debug("nf_ct_h323: fini\n"); 1771 pr_debug("nf_ct_h323: fini\n");
1770} 1772}
@@ -1777,28 +1779,34 @@ static int __init nf_conntrack_h323_init(void)
1777 h323_buffer = kmalloc(65536, GFP_KERNEL); 1779 h323_buffer = kmalloc(65536, GFP_KERNEL);
1778 if (!h323_buffer) 1780 if (!h323_buffer)
1779 return -ENOMEM; 1781 return -ENOMEM;
1780 ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); 1782 ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245);
1781 if (ret < 0) 1783 if (ret < 0)
1782 goto err1; 1784 goto err1;
1783 ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); 1785 ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]);
1784 if (ret < 0) 1786 if (ret < 0)
1785 goto err2; 1787 goto err2;
1786 ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); 1788 ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]);
1787 if (ret < 0) 1789 if (ret < 0)
1788 goto err3; 1790 goto err3;
1789 ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); 1791 ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]);
1790 if (ret < 0) 1792 if (ret < 0)
1791 goto err4; 1793 goto err4;
1794 ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]);
1795 if (ret < 0)
1796 goto err5;
1792 pr_debug("nf_ct_h323: init success\n"); 1797 pr_debug("nf_ct_h323: init success\n");
1793 return 0; 1798 return 0;
1794 1799
1795err4: 1800err5:
1796 nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); 1801 nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
1797err3: 1802err4:
1798 nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); 1803 nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
1799err2: 1804err3:
1800 nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); 1805 nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
1806err2:
1807 nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
1801err1: 1808err1:
1809 kfree(h323_buffer);
1802 return ret; 1810 return ret;
1803} 1811}
1804 1812
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index bc11d7092032..9fda6ee95a31 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -92,10 +92,6 @@ void nf_log_packet(int pf,
92 vsnprintf(prefix, sizeof(prefix), fmt, args); 92 vsnprintf(prefix, sizeof(prefix), fmt, args);
93 va_end(args); 93 va_end(args);
94 logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); 94 logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
95 } else if (net_ratelimit()) {
96 printk(KERN_WARNING "nf_log_packet: can\'t log since "
97 "no backend logging module loaded in! Please either "
98 "load one, or disable logging explicitly\n");
99 } 95 }
100 rcu_read_unlock(); 96 rcu_read_unlock();
101} 97}
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 2e89a00df92c..70907f6baac3 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -73,7 +73,8 @@ connlimit_iphash6(const union nf_inet_addr *addr,
73static inline bool already_closed(const struct nf_conn *conn) 73static inline bool already_closed(const struct nf_conn *conn)
74{ 74{
75 if (nf_ct_protonum(conn) == IPPROTO_TCP) 75 if (nf_ct_protonum(conn) == IPPROTO_TCP)
76 return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT; 76 return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
77 conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
77 else 78 else
78 return 0; 79 return 0;
79} 80}
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index feb326f4a752..47bbf45ae5d7 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -400,13 +400,13 @@ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
400 * @attrlen: length of attribute payload 400 * @attrlen: length of attribute payload
401 * @data: head of attribute payload 401 * @data: head of attribute payload
402 * 402 *
403 * Returns -1 if the tailroom of the skb is insufficient to store 403 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
404 * the attribute header and payload. 404 * the attribute header and payload.
405 */ 405 */
406int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) 406int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
407{ 407{
408 if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) 408 if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
409 return -1; 409 return -EMSGSIZE;
410 410
411 __nla_put(skb, attrtype, attrlen, data); 411 __nla_put(skb, attrtype, attrlen, data);
412 return 0; 412 return 0;
@@ -418,13 +418,13 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
418 * @attrlen: length of attribute payload 418 * @attrlen: length of attribute payload
419 * @data: head of attribute payload 419 * @data: head of attribute payload
420 * 420 *
421 * Returns -1 if the tailroom of the skb is insufficient to store 421 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
422 * the attribute payload. 422 * the attribute payload.
423 */ 423 */
424int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) 424int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
425{ 425{
426 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) 426 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
427 return -1; 427 return -EMSGSIZE;
428 428
429 __nla_put_nohdr(skb, attrlen, data); 429 __nla_put_nohdr(skb, attrlen, data);
430 return 0; 430 return 0;
@@ -436,13 +436,13 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
436 * @attrlen: length of attribute payload 436 * @attrlen: length of attribute payload
437 * @data: head of attribute payload 437 * @data: head of attribute payload
438 * 438 *
439 * Returns -1 if the tailroom of the skb is insufficient to store 439 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
440 * the attribute payload. 440 * the attribute payload.
441 */ 441 */
442int nla_append(struct sk_buff *skb, int attrlen, const void *data) 442int nla_append(struct sk_buff *skb, int attrlen, const void *data)
443{ 443{
444 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) 444 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
445 return -1; 445 return -EMSGSIZE;
446 446
447 memcpy(skb_put(skb, attrlen), data, attrlen); 447 memcpy(skb_put(skb, attrlen), data, attrlen);
448 return 0; 448 return 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d16929c9b4bc..3e1191cecaf0 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -444,8 +444,11 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
444 if (ops->dumpit == NULL) 444 if (ops->dumpit == NULL)
445 return -EOPNOTSUPP; 445 return -EOPNOTSUPP;
446 446
447 return netlink_dump_start(genl_sock, skb, nlh, 447 genl_unlock();
448 ops->dumpit, ops->done); 448 err = netlink_dump_start(genl_sock, skb, nlh,
449 ops->dumpit, ops->done);
450 genl_lock();
451 return err;
449 } 452 }
450 453
451 if (ops->doit == NULL) 454 if (ops->doit == NULL)
@@ -554,7 +557,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
554 return genlmsg_end(skb, hdr); 557 return genlmsg_end(skb, hdr);
555 558
556nla_put_failure: 559nla_put_failure:
557 return genlmsg_cancel(skb, hdr); 560 genlmsg_cancel(skb, hdr);
561 return -EMSGSIZE;
558} 562}
559 563
560static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, 564static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
@@ -590,7 +594,8 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
590 return genlmsg_end(skb, hdr); 594 return genlmsg_end(skb, hdr);
591 595
592nla_put_failure: 596nla_put_failure:
593 return genlmsg_cancel(skb, hdr); 597 genlmsg_cancel(skb, hdr);
598 return -EMSGSIZE;
594} 599}
595 600
596static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) 601static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
@@ -601,9 +606,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
601 int chains_to_skip = cb->args[0]; 606 int chains_to_skip = cb->args[0];
602 int fams_to_skip = cb->args[1]; 607 int fams_to_skip = cb->args[1];
603 608
604 if (chains_to_skip != 0)
605 genl_lock();
606
607 for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { 609 for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
608 if (i < chains_to_skip) 610 if (i < chains_to_skip)
609 continue; 611 continue;
@@ -621,9 +623,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
621 } 623 }
622 624
623errout: 625errout:
624 if (chains_to_skip != 0)
625 genl_unlock();
626
627 cb->args[0] = i; 626 cb->args[0] = i;
628 cb->args[1] = n; 627 cb->args[1] = n;
629 628
@@ -768,7 +767,7 @@ static int __init genl_init(void)
768 767
769 /* we'll bump the group number right afterwards */ 768 /* we'll bump the group number right afterwards */
770 genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, 0, 769 genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, 0,
771 genl_rcv, NULL, THIS_MODULE); 770 genl_rcv, &genl_mutex, THIS_MODULE);
772 if (genl_sock == NULL) 771 if (genl_sock == NULL)
773 panic("GENL: Cannot initialize generic netlink\n"); 772 panic("GENL: Cannot initialize generic netlink\n");
774 773
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1086df7478bc..9360fc81e8c7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -220,7 +220,7 @@ replay:
220 tp = kzalloc(sizeof(*tp), GFP_KERNEL); 220 tp = kzalloc(sizeof(*tp), GFP_KERNEL);
221 if (tp == NULL) 221 if (tp == NULL)
222 goto errout; 222 goto errout;
223 err = -EINVAL; 223 err = -ENOENT;
224 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); 224 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
225 if (tp_ops == NULL) { 225 if (tp_ops == NULL) {
226#ifdef CONFIG_KMOD 226#ifdef CONFIG_KMOD
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0df911fd67b1..64465bacbe79 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -444,7 +444,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
444 return nla_nest_end(skb, opts); 444 return nla_nest_end(skb, opts);
445 445
446nla_put_failure: 446nla_put_failure:
447 return nla_nest_cancel(skb, opts); 447 nla_nest_cancel(skb, opts);
448 return -EMSGSIZE;
448} 449}
449 450
450static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) 451static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -466,7 +467,8 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
466 return nla_nest_end(skb, opts); 467 return nla_nest_end(skb, opts);
467 468
468nla_put_failure: 469nla_put_failure:
469 return nla_nest_cancel(skb, opts); 470 nla_nest_cancel(skb, opts);
471 return -EMSGSIZE;
470} 472}
471 473
472static const struct Qdisc_class_ops dsmark_class_ops = { 474static const struct Qdisc_class_ops dsmark_class_ops = {
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 3a9d226ff1e4..c89fba56db56 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -582,7 +582,8 @@ append_opt:
582 return nla_nest_end(skb, opts); 582 return nla_nest_end(skb, opts);
583 583
584nla_put_failure: 584nla_put_failure:
585 return nla_nest_cancel(skb, opts); 585 nla_nest_cancel(skb, opts);
586 return -EMSGSIZE;
586} 587}
587 588
588static void gred_destroy(struct Qdisc *sch) 589static void gred_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 87293d0db1d7..fdfaa3fcc16d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1360,7 +1360,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
1360 1360
1361 nla_put_failure: 1361 nla_put_failure:
1362 nla_nest_cancel(skb, nest); 1362 nla_nest_cancel(skb, nest);
1363 return -1; 1363 return -EMSGSIZE;
1364} 1364}
1365 1365
1366static int 1366static int
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5bc1ed490180..6807c97985a5 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -28,6 +28,7 @@
28 * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ 28 * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
29 */ 29 */
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/moduleparam.h>
31#include <linux/types.h> 32#include <linux/types.h>
32#include <linux/kernel.h> 33#include <linux/kernel.h>
33#include <linux/string.h> 34#include <linux/string.h>
@@ -53,13 +54,17 @@
53*/ 54*/
54 55
55#define HTB_HSIZE 16 /* classid hash size */ 56#define HTB_HSIZE 16 /* classid hash size */
56#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ 57static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
57#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ 58#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
58 59
59#if HTB_VER >> 16 != TC_HTB_PROTOVER 60#if HTB_VER >> 16 != TC_HTB_PROTOVER
60#error "Mismatched sch_htb.c and pkt_sch.h" 61#error "Mismatched sch_htb.c and pkt_sch.h"
61#endif 62#endif
62 63
64/* Module parameter and sysfs export */
65module_param (htb_hysteresis, int, 0640);
66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67
63/* used internaly to keep status of single class */ 68/* used internaly to keep status of single class */
64enum htb_cmode { 69enum htb_cmode {
65 HTB_CANT_SEND, /* class can't send and can't borrow */ 70 HTB_CANT_SEND, /* class can't send and can't borrow */
@@ -462,19 +467,21 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
462 htb_remove_class_from_row(q, cl, mask); 467 htb_remove_class_from_row(q, cl, mask);
463} 468}
464 469
465#if HTB_HYSTERESIS
466static inline long htb_lowater(const struct htb_class *cl) 470static inline long htb_lowater(const struct htb_class *cl)
467{ 471{
468 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; 472 if (htb_hysteresis)
473 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
474 else
475 return 0;
469} 476}
470static inline long htb_hiwater(const struct htb_class *cl) 477static inline long htb_hiwater(const struct htb_class *cl)
471{ 478{
472 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; 479 if (htb_hysteresis)
480 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
481 else
482 return 0;
473} 483}
474#else 484
475#define htb_lowater(cl) (0)
476#define htb_hiwater(cl) (0)
477#endif
478 485
479/** 486/**
480 * htb_class_mode - computes and returns current class mode 487 * htb_class_mode - computes and returns current class mode
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 3dcd493f4f4a..5c569853b9c0 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -281,7 +281,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
281 return nla_nest_end(skb, opts); 281 return nla_nest_end(skb, opts);
282 282
283nla_put_failure: 283nla_put_failure:
284 return nla_nest_cancel(skb, opts); 284 nla_nest_cancel(skb, opts);
285 return -EMSGSIZE;
285} 286}
286 287
287static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 288static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b4cd2b71953f..024c3ebd9661 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -474,6 +474,15 @@ static void sctp_association_destroy(struct sctp_association *asoc)
474void sctp_assoc_set_primary(struct sctp_association *asoc, 474void sctp_assoc_set_primary(struct sctp_association *asoc,
475 struct sctp_transport *transport) 475 struct sctp_transport *transport)
476{ 476{
477 int changeover = 0;
478
479 /* it's a changeover only if we already have a primary path
480 * that we are changing
481 */
482 if (asoc->peer.primary_path != NULL &&
483 asoc->peer.primary_path != transport)
484 changeover = 1 ;
485
477 asoc->peer.primary_path = transport; 486 asoc->peer.primary_path = transport;
478 487
479 /* Set a default msg_name for events. */ 488 /* Set a default msg_name for events. */
@@ -499,12 +508,12 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
499 * double switch to the same destination address. 508 * double switch to the same destination address.
500 */ 509 */
501 if (transport->cacc.changeover_active) 510 if (transport->cacc.changeover_active)
502 transport->cacc.cycling_changeover = 1; 511 transport->cacc.cycling_changeover = changeover;
503 512
504 /* 2) The sender MUST set CHANGEOVER_ACTIVE to indicate that 513 /* 2) The sender MUST set CHANGEOVER_ACTIVE to indicate that
505 * a changeover has occurred. 514 * a changeover has occurred.
506 */ 515 */
507 transport->cacc.changeover_active = 1; 516 transport->cacc.changeover_active = changeover;
508 517
509 /* 3) The sender MUST store the next TSN to be sent in 518 /* 3) The sender MUST store the next TSN to be sent in
510 * next_tsn_at_change. 519 * next_tsn_at_change.
@@ -1203,6 +1212,9 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1203 struct list_head *head = &asoc->peer.transport_addr_list; 1212 struct list_head *head = &asoc->peer.transport_addr_list;
1204 struct list_head *pos; 1213 struct list_head *pos;
1205 1214
1215 if (asoc->peer.transport_count == 1)
1216 return;
1217
1206 /* Find the next transport in a round-robin fashion. */ 1218 /* Find the next transport in a round-robin fashion. */
1207 t = asoc->peer.retran_path; 1219 t = asoc->peer.retran_path;
1208 pos = &t->transports; 1220 pos = &t->transports;
@@ -1217,6 +1229,15 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1217 1229
1218 t = list_entry(pos, struct sctp_transport, transports); 1230 t = list_entry(pos, struct sctp_transport, transports);
1219 1231
1232 /* We have exhausted the list, but didn't find any
1233 * other active transports. If so, use the next
1234 * transport.
1235 */
1236 if (t == asoc->peer.retran_path) {
1237 t = next;
1238 break;
1239 }
1240
1220 /* Try to find an active transport. */ 1241 /* Try to find an active transport. */
1221 1242
1222 if ((t->state == SCTP_ACTIVE) || 1243 if ((t->state == SCTP_ACTIVE) ||
@@ -1229,15 +1250,6 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1229 if (!next) 1250 if (!next)
1230 next = t; 1251 next = t;
1231 } 1252 }
1232
1233 /* We have exhausted the list, but didn't find any
1234 * other active transports. If so, use the next
1235 * transport.
1236 */
1237 if (t == asoc->peer.retran_path) {
1238 t = next;
1239 break;
1240 }
1241 } 1253 }
1242 1254
1243 asoc->peer.retran_path = t; 1255 asoc->peer.retran_path = t;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e45e44c60635..a2f4d4d51593 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
299/* Fills in the source address(saddr) based on the destination address(daddr) 299/* Fills in the source address(saddr) based on the destination address(daddr)
300 * and asoc's bind address list. 300 * and asoc's bind address list.
301 */ 301 */
302static void sctp_v6_get_saddr(struct sctp_association *asoc, 302static void sctp_v6_get_saddr(struct sctp_sock *sk,
303 struct sctp_association *asoc,
303 struct dst_entry *dst, 304 struct dst_entry *dst,
304 union sctp_addr *daddr, 305 union sctp_addr *daddr,
305 union sctp_addr *saddr) 306 union sctp_addr *saddr)
@@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
318 if (!asoc) { 319 if (!asoc) {
319 ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, 320 ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL,
320 &daddr->v6.sin6_addr, 321 &daddr->v6.sin6_addr,
321 inet6_sk(asoc->base.sk)->srcprefs, 322 inet6_sk(&sk->inet.sk)->srcprefs,
322 &saddr->v6.sin6_addr); 323 &saddr->v6.sin6_addr);
323 SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n", 324 SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n",
324 NIP6(saddr->v6.sin6_addr)); 325 NIP6(saddr->v6.sin6_addr));
@@ -726,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
726 seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr)); 727 seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr));
727} 728}
728 729
730static void sctp_v6_ecn_capable(struct sock *sk)
731{
732 inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
733}
734
729/* Initialize a PF_INET6 socket msg_name. */ 735/* Initialize a PF_INET6 socket msg_name. */
730static void sctp_inet6_msgname(char *msgname, int *addr_len) 736static void sctp_inet6_msgname(char *msgname, int *addr_len)
731{ 737{
@@ -996,6 +1002,7 @@ static struct sctp_af sctp_af_inet6 = {
996 .skb_iif = sctp_v6_skb_iif, 1002 .skb_iif = sctp_v6_skb_iif,
997 .is_ce = sctp_v6_is_ce, 1003 .is_ce = sctp_v6_is_ce,
998 .seq_dump_addr = sctp_v6_seq_dump_addr, 1004 .seq_dump_addr = sctp_v6_seq_dump_addr,
1005 .ecn_capable = sctp_v6_ecn_capable,
999 .net_header_len = sizeof(struct ipv6hdr), 1006 .net_header_len = sizeof(struct ipv6hdr),
1000 .sockaddr_len = sizeof(struct sockaddr_in6), 1007 .sockaddr_len = sizeof(struct sockaddr_in6),
1001#ifdef CONFIG_COMPAT 1008#ifdef CONFIG_COMPAT
diff --git a/net/sctp/output.c b/net/sctp/output.c
index cf4f9fb6819d..6d45bae93b46 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
548 * Note: The works for IPv6 layer checks this bit too later 548 * Note: The works for IPv6 layer checks this bit too later
549 * in transmission. See IP6_ECN_flow_xmit(). 549 * in transmission. See IP6_ECN_flow_xmit().
550 */ 550 */
551 INET_ECN_xmit(nskb->sk); 551 (*tp->af_specific->ecn_capable)(nskb->sk);
552 552
553 /* Set up the IP options. */ 553 /* Set up the IP options. */
554 /* BUG: not implemented 554 /* BUG: not implemented
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 59edfd25a19c..ace6770e9048 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -208,6 +208,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
208 INIT_LIST_HEAD(&q->sacked); 208 INIT_LIST_HEAD(&q->sacked);
209 INIT_LIST_HEAD(&q->abandoned); 209 INIT_LIST_HEAD(&q->abandoned);
210 210
211 q->fast_rtx = 0;
211 q->outstanding_bytes = 0; 212 q->outstanding_bytes = 0;
212 q->empty = 1; 213 q->empty = 1;
213 q->cork = 0; 214 q->cork = 0;
@@ -500,6 +501,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
500 case SCTP_RTXR_FAST_RTX: 501 case SCTP_RTXR_FAST_RTX:
501 SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); 502 SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
502 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); 503 sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
504 q->fast_rtx = 1;
503 break; 505 break;
504 case SCTP_RTXR_PMTUD: 506 case SCTP_RTXR_PMTUD:
505 SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); 507 SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
@@ -518,9 +520,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
518 * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by 520 * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by
519 * following the procedures outlined in C1 - C5. 521 * following the procedures outlined in C1 - C5.
520 */ 522 */
521 sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); 523 if (reason == SCTP_RTXR_T3_RTX)
524 sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point);
522 525
523 error = sctp_outq_flush(q, /* rtx_timeout */ 1); 526 /* Flush the queues only on timeout, since fast_rtx is only
527 * triggered during sack processing and the queue
528 * will be flushed at the end.
529 */
530 if (reason != SCTP_RTXR_FAST_RTX)
531 error = sctp_outq_flush(q, /* rtx_timeout */ 1);
524 532
525 if (error) 533 if (error)
526 q->asoc->base.sk->sk_err = -error; 534 q->asoc->base.sk->sk_err = -error;
@@ -538,17 +546,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
538 int rtx_timeout, int *start_timer) 546 int rtx_timeout, int *start_timer)
539{ 547{
540 struct list_head *lqueue; 548 struct list_head *lqueue;
541 struct list_head *lchunk;
542 struct sctp_transport *transport = pkt->transport; 549 struct sctp_transport *transport = pkt->transport;
543 sctp_xmit_t status; 550 sctp_xmit_t status;
544 struct sctp_chunk *chunk, *chunk1; 551 struct sctp_chunk *chunk, *chunk1;
545 struct sctp_association *asoc; 552 struct sctp_association *asoc;
553 int fast_rtx;
546 int error = 0; 554 int error = 0;
555 int timer = 0;
556 int done = 0;
547 557
548 asoc = q->asoc; 558 asoc = q->asoc;
549 lqueue = &q->retransmit; 559 lqueue = &q->retransmit;
560 fast_rtx = q->fast_rtx;
550 561
551 /* RFC 2960 6.3.3 Handle T3-rtx Expiration 562 /* This loop handles time-out retransmissions, fast retransmissions,
563 * and retransmissions due to opening of whindow.
564 *
565 * RFC 2960 6.3.3 Handle T3-rtx Expiration
552 * 566 *
553 * E3) Determine how many of the earliest (i.e., lowest TSN) 567 * E3) Determine how many of the earliest (i.e., lowest TSN)
554 * outstanding DATA chunks for the address for which the 568 * outstanding DATA chunks for the address for which the
@@ -563,12 +577,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
563 * [Just to be painfully clear, if we are retransmitting 577 * [Just to be painfully clear, if we are retransmitting
564 * because a timeout just happened, we should send only ONE 578 * because a timeout just happened, we should send only ONE
565 * packet of retransmitted data.] 579 * packet of retransmitted data.]
580 *
581 * For fast retransmissions we also send only ONE packet. However,
582 * if we are just flushing the queue due to open window, we'll
583 * try to send as much as possible.
566 */ 584 */
567 lchunk = sctp_list_dequeue(lqueue); 585 list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) {
568
569 while (lchunk) {
570 chunk = list_entry(lchunk, struct sctp_chunk,
571 transmitted_list);
572 586
573 /* Make sure that Gap Acked TSNs are not retransmitted. A 587 /* Make sure that Gap Acked TSNs are not retransmitted. A
574 * simple approach is just to move such TSNs out of the 588 * simple approach is just to move such TSNs out of the
@@ -576,58 +590,60 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
576 * next chunk. 590 * next chunk.
577 */ 591 */
578 if (chunk->tsn_gap_acked) { 592 if (chunk->tsn_gap_acked) {
579 list_add_tail(lchunk, &transport->transmitted); 593 list_del(&chunk->transmitted_list);
580 lchunk = sctp_list_dequeue(lqueue); 594 list_add_tail(&chunk->transmitted_list,
595 &transport->transmitted);
581 continue; 596 continue;
582 } 597 }
583 598
599 /* If we are doing fast retransmit, ignore non-fast_rtransmit
600 * chunks
601 */
602 if (fast_rtx && !chunk->fast_retransmit)
603 continue;
604
584 /* Attempt to append this chunk to the packet. */ 605 /* Attempt to append this chunk to the packet. */
585 status = sctp_packet_append_chunk(pkt, chunk); 606 status = sctp_packet_append_chunk(pkt, chunk);
586 607
587 switch (status) { 608 switch (status) {
588 case SCTP_XMIT_PMTU_FULL: 609 case SCTP_XMIT_PMTU_FULL:
589 /* Send this packet. */ 610 /* Send this packet. */
590 if ((error = sctp_packet_transmit(pkt)) == 0) 611 error = sctp_packet_transmit(pkt);
591 *start_timer = 1;
592 612
593 /* If we are retransmitting, we should only 613 /* If we are retransmitting, we should only
594 * send a single packet. 614 * send a single packet.
595 */ 615 */
596 if (rtx_timeout) { 616 if (rtx_timeout || fast_rtx)
597 list_add(lchunk, lqueue); 617 done = 1;
598 lchunk = NULL;
599 }
600 618
601 /* Bundle lchunk in the next round. */ 619 /* Bundle next chunk in the next round. */
602 break; 620 break;
603 621
604 case SCTP_XMIT_RWND_FULL: 622 case SCTP_XMIT_RWND_FULL:
605 /* Send this packet. */ 623 /* Send this packet. */
606 if ((error = sctp_packet_transmit(pkt)) == 0) 624 error = sctp_packet_transmit(pkt);
607 *start_timer = 1;
608 625
609 /* Stop sending DATA as there is no more room 626 /* Stop sending DATA as there is no more room
610 * at the receiver. 627 * at the receiver.
611 */ 628 */
612 list_add(lchunk, lqueue); 629 done = 1;
613 lchunk = NULL;
614 break; 630 break;
615 631
616 case SCTP_XMIT_NAGLE_DELAY: 632 case SCTP_XMIT_NAGLE_DELAY:
617 /* Send this packet. */ 633 /* Send this packet. */
618 if ((error = sctp_packet_transmit(pkt)) == 0) 634 error = sctp_packet_transmit(pkt);
619 *start_timer = 1;
620 635
621 /* Stop sending DATA because of nagle delay. */ 636 /* Stop sending DATA because of nagle delay. */
622 list_add(lchunk, lqueue); 637 done = 1;
623 lchunk = NULL;
624 break; 638 break;
625 639
626 default: 640 default:
627 /* The append was successful, so add this chunk to 641 /* The append was successful, so add this chunk to
628 * the transmitted list. 642 * the transmitted list.
629 */ 643 */
630 list_add_tail(lchunk, &transport->transmitted); 644 list_del(&chunk->transmitted_list);
645 list_add_tail(&chunk->transmitted_list,
646 &transport->transmitted);
631 647
632 /* Mark the chunk as ineligible for fast retransmit 648 /* Mark the chunk as ineligible for fast retransmit
633 * after it is retransmitted. 649 * after it is retransmitted.
@@ -635,27 +651,44 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
635 if (chunk->fast_retransmit > 0) 651 if (chunk->fast_retransmit > 0)
636 chunk->fast_retransmit = -1; 652 chunk->fast_retransmit = -1;
637 653
638 *start_timer = 1; 654 /* Force start T3-rtx timer when fast retransmitting
639 q->empty = 0; 655 * the earliest outstanding TSN
656 */
657 if (!timer && fast_rtx &&
658 ntohl(chunk->subh.data_hdr->tsn) ==
659 asoc->ctsn_ack_point + 1)
660 timer = 2;
640 661
641 /* Retrieve a new chunk to bundle. */ 662 q->empty = 0;
642 lchunk = sctp_list_dequeue(lqueue);
643 break; 663 break;
644 } 664 }
645 665
646 /* If we are here due to a retransmit timeout or a fast 666 /* Set the timer if there were no errors */
647 * retransmit and if there are any chunks left in the retransmit 667 if (!error && !timer)
648 * queue that could not fit in the PMTU sized packet, they need 668 timer = 1;
649 * to be marked as ineligible for a subsequent fast retransmit. 669
650 */ 670 if (done)
651 if (rtx_timeout && !lchunk) { 671 break;
652 list_for_each_entry(chunk1, lqueue, transmitted_list) { 672 }
653 if (chunk1->fast_retransmit > 0) 673
654 chunk1->fast_retransmit = -1; 674 /* If we are here due to a retransmit timeout or a fast
655 } 675 * retransmit and if there are any chunks left in the retransmit
676 * queue that could not fit in the PMTU sized packet, they need
677 * to be marked as ineligible for a subsequent fast retransmit.
678 */
679 if (rtx_timeout || fast_rtx) {
680 list_for_each_entry(chunk1, lqueue, transmitted_list) {
681 if (chunk1->fast_retransmit > 0)
682 chunk1->fast_retransmit = -1;
656 } 683 }
657 } 684 }
658 685
686 *start_timer = timer;
687
688 /* Clear fast retransmit hint */
689 if (fast_rtx)
690 q->fast_rtx = 0;
691
659 return error; 692 return error;
660} 693}
661 694
@@ -862,7 +895,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
862 rtx_timeout, &start_timer); 895 rtx_timeout, &start_timer);
863 896
864 if (start_timer) 897 if (start_timer)
865 sctp_transport_reset_timers(transport); 898 sctp_transport_reset_timers(transport,
899 start_timer-1);
866 900
867 /* This can happen on COOKIE-ECHO resend. Only 901 /* This can happen on COOKIE-ECHO resend. Only
868 * one chunk can get bundled with a COOKIE-ECHO. 902 * one chunk can get bundled with a COOKIE-ECHO.
@@ -977,7 +1011,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
977 list_add_tail(&chunk->transmitted_list, 1011 list_add_tail(&chunk->transmitted_list,
978 &transport->transmitted); 1012 &transport->transmitted);
979 1013
980 sctp_transport_reset_timers(transport); 1014 sctp_transport_reset_timers(transport, start_timer-1);
981 1015
982 q->empty = 0; 1016 q->empty = 0;
983 1017
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 0ec234b762c2..9258dfe784ae 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -108,14 +108,23 @@ static __init int sctp_proc_init(void)
108 } 108 }
109 109
110 if (sctp_snmp_proc_init()) 110 if (sctp_snmp_proc_init())
111 goto out_nomem; 111 goto out_snmp_proc_init;
112 if (sctp_eps_proc_init()) 112 if (sctp_eps_proc_init())
113 goto out_nomem; 113 goto out_eps_proc_init;
114 if (sctp_assocs_proc_init()) 114 if (sctp_assocs_proc_init())
115 goto out_nomem; 115 goto out_assocs_proc_init;
116 116
117 return 0; 117 return 0;
118 118
119out_assocs_proc_init:
120 sctp_eps_proc_exit();
121out_eps_proc_init:
122 sctp_snmp_proc_exit();
123out_snmp_proc_init:
124 if (proc_net_sctp) {
125 proc_net_sctp = NULL;
126 remove_proc_entry("sctp", init_net.proc_net);
127 }
119out_nomem: 128out_nomem:
120 return -ENOMEM; 129 return -ENOMEM;
121} 130}
@@ -470,11 +479,11 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
470 /* Walk through the bind address list and look for a bind 479 /* Walk through the bind address list and look for a bind
471 * address that matches the source address of the returned dst. 480 * address that matches the source address of the returned dst.
472 */ 481 */
482 sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
473 rcu_read_lock(); 483 rcu_read_lock();
474 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 484 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
475 if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) 485 if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
476 continue; 486 continue;
477 sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
478 if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) 487 if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
479 goto out_unlock; 488 goto out_unlock;
480 } 489 }
@@ -519,7 +528,8 @@ out:
519/* For v4, the source address is cached in the route entry(dst). So no need 528/* For v4, the source address is cached in the route entry(dst). So no need
520 * to cache it separately and hence this is an empty routine. 529 * to cache it separately and hence this is an empty routine.
521 */ 530 */
522static void sctp_v4_get_saddr(struct sctp_association *asoc, 531static void sctp_v4_get_saddr(struct sctp_sock *sk,
532 struct sctp_association *asoc,
523 struct dst_entry *dst, 533 struct dst_entry *dst,
524 union sctp_addr *daddr, 534 union sctp_addr *daddr,
525 union sctp_addr *saddr) 535 union sctp_addr *saddr)
@@ -616,6 +626,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
616 seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); 626 seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr));
617} 627}
618 628
629static void sctp_v4_ecn_capable(struct sock *sk)
630{
631 INET_ECN_xmit(sk);
632}
633
619/* Event handler for inet address addition/deletion events. 634/* Event handler for inet address addition/deletion events.
620 * The sctp_local_addr_list needs to be protocted by a spin lock since 635 * The sctp_local_addr_list needs to be protocted by a spin lock since
621 * multiple notifiers (say IPv4 and IPv6) may be running at the same 636 * multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -934,6 +949,7 @@ static struct sctp_af sctp_af_inet = {
934 .skb_iif = sctp_v4_skb_iif, 949 .skb_iif = sctp_v4_skb_iif,
935 .is_ce = sctp_v4_is_ce, 950 .is_ce = sctp_v4_is_ce,
936 .seq_dump_addr = sctp_v4_seq_dump_addr, 951 .seq_dump_addr = sctp_v4_seq_dump_addr,
952 .ecn_capable = sctp_v4_ecn_capable,
937 .net_header_len = sizeof(struct iphdr), 953 .net_header_len = sizeof(struct iphdr),
938 .sockaddr_len = sizeof(struct sockaddr_in), 954 .sockaddr_len = sizeof(struct sockaddr_in),
939#ifdef CONFIG_COMPAT 955#ifdef CONFIG_COMPAT
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e7e3baf7009e..0dbcde6758ea 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4401,7 +4401,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
4401 if (copy_from_user(&getaddrs, optval, len)) 4401 if (copy_from_user(&getaddrs, optval, len))
4402 return -EFAULT; 4402 return -EFAULT;
4403 4403
4404 if (getaddrs.addr_num <= 0) return -EINVAL; 4404 if (getaddrs.addr_num <= 0 ||
4405 getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr)))
4406 return -EINVAL;
4405 /* 4407 /*
4406 * For UDP-style sockets, id specifies the association to query. 4408 * For UDP-style sockets, id specifies the association to query.
4407 * If the id field is set to the value '0' then the locally bound 4409 * If the id field is set to the value '0' then the locally bound
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f4938f6c5abe..3f34f61221ec 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
79 peer->rttvar = 0; 79 peer->rttvar = 0;
80 peer->srtt = 0; 80 peer->srtt = 0;
81 peer->rto_pending = 0; 81 peer->rto_pending = 0;
82 peer->fast_recovery = 0;
82 83
83 peer->last_time_heard = jiffies; 84 peer->last_time_heard = jiffies;
84 peer->last_time_used = jiffies; 85 peer->last_time_used = jiffies;
@@ -190,7 +191,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
190/* Start T3_rtx timer if it is not already running and update the heartbeat 191/* Start T3_rtx timer if it is not already running and update the heartbeat
191 * timer. This routine is called every time a DATA chunk is sent. 192 * timer. This routine is called every time a DATA chunk is sent.
192 */ 193 */
193void sctp_transport_reset_timers(struct sctp_transport *transport) 194void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
194{ 195{
195 /* RFC 2960 6.3.2 Retransmission Timer Rules 196 /* RFC 2960 6.3.2 Retransmission Timer Rules
196 * 197 *
@@ -200,7 +201,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport)
200 * address. 201 * address.
201 */ 202 */
202 203
203 if (!timer_pending(&transport->T3_rtx_timer)) 204 if (force || !timer_pending(&transport->T3_rtx_timer))
204 if (!mod_timer(&transport->T3_rtx_timer, 205 if (!mod_timer(&transport->T3_rtx_timer,
205 jiffies + transport->rto)) 206 jiffies + transport->rto))
206 sctp_transport_hold(transport); 207 sctp_transport_hold(transport);
@@ -291,7 +292,7 @@ void sctp_transport_route(struct sctp_transport *transport,
291 if (saddr) 292 if (saddr)
292 memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); 293 memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
293 else 294 else
294 af->get_saddr(asoc, dst, daddr, &transport->saddr); 295 af->get_saddr(opt, asoc, dst, daddr, &transport->saddr);
295 296
296 transport->dst = dst; 297 transport->dst = dst;
297 if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { 298 if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
@@ -403,11 +404,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
403 cwnd = transport->cwnd; 404 cwnd = transport->cwnd;
404 flight_size = transport->flight_size; 405 flight_size = transport->flight_size;
405 406
407 /* See if we need to exit Fast Recovery first */
408 if (transport->fast_recovery &&
409 TSN_lte(transport->fast_recovery_exit, sack_ctsn))
410 transport->fast_recovery = 0;
411
406 /* The appropriate cwnd increase algorithm is performed if, and only 412 /* The appropriate cwnd increase algorithm is performed if, and only
407 * if the cumulative TSN has advanced and the congestion window is 413 * if the cumulative TSN whould advanced and the congestion window is
408 * being fully utilized. 414 * being fully utilized.
409 */ 415 */
410 if ((transport->asoc->ctsn_ack_point >= sack_ctsn) || 416 if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) ||
411 (flight_size < cwnd)) 417 (flight_size < cwnd))
412 return; 418 return;
413 419
@@ -416,17 +422,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
416 pmtu = transport->asoc->pathmtu; 422 pmtu = transport->asoc->pathmtu;
417 423
418 if (cwnd <= ssthresh) { 424 if (cwnd <= ssthresh) {
419 /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less 425 /* RFC 4960 7.2.1
420 * than or equal to ssthresh an SCTP endpoint MUST use the 426 * o When cwnd is less than or equal to ssthresh, an SCTP
421 * slow start algorithm to increase cwnd only if the current 427 * endpoint MUST use the slow-start algorithm to increase
422 * congestion window is being fully utilized and an incoming 428 * cwnd only if the current congestion window is being fully
423 * SACK advances the Cumulative TSN Ack Point. Only when these 429 * utilized, an incoming SACK advances the Cumulative TSN
424 * two conditions are met can the cwnd be increased otherwise 430 * Ack Point, and the data sender is not in Fast Recovery.
425 * the cwnd MUST not be increased. If these conditions are met 431 * Only when these three conditions are met can the cwnd be
426 * then cwnd MUST be increased by at most the lesser of 432 * increased; otherwise, the cwnd MUST not be increased.
427 * 1) the total size of the previously outstanding DATA 433 * If these conditions are met, then cwnd MUST be increased
428 * chunk(s) acknowledged, and 2) the destination's path MTU. 434 * by, at most, the lesser of 1) the total size of the
435 * previously outstanding DATA chunk(s) acknowledged, and
436 * 2) the destination's path MTU. This upper bound protects
437 * against the ACK-Splitting attack outlined in [SAVAGE99].
429 */ 438 */
439 if (transport->fast_recovery)
440 return;
441
430 if (bytes_acked > pmtu) 442 if (bytes_acked > pmtu)
431 cwnd += pmtu; 443 cwnd += pmtu;
432 else 444 else
@@ -502,6 +514,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
502 * cwnd = ssthresh 514 * cwnd = ssthresh
503 * partial_bytes_acked = 0 515 * partial_bytes_acked = 0
504 */ 516 */
517 if (transport->fast_recovery)
518 return;
519
520 /* Mark Fast recovery */
521 transport->fast_recovery = 1;
522 transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
523
505 transport->ssthresh = max(transport->cwnd/2, 524 transport->ssthresh = max(transport->cwnd/2,
506 4*transport->asoc->pathmtu); 525 4*transport->asoc->pathmtu);
507 transport->cwnd = transport->ssthresh; 526 transport->cwnd = transport->ssthresh;
@@ -586,6 +605,7 @@ void sctp_transport_reset(struct sctp_transport *t)
586 t->flight_size = 0; 605 t->flight_size = 0;
587 t->error_count = 0; 606 t->error_count = 0;
588 t->rto_pending = 0; 607 t->rto_pending = 0;
608 t->fast_recovery = 0;
589 609
590 /* Initialize the state information for SFR-CACC */ 610 /* Initialize the state information for SFR-CACC */
591 t->cacc.changeover_active = 0; 611 t->cacc.changeover_active = 0;
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index d927d9f57412..744b79fdcb19 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -17,8 +17,8 @@
17# define RPCDBG_FACILITY RPCDBG_AUTH 17# define RPCDBG_FACILITY RPCDBG_AUTH
18#endif 18#endif
19 19
20#define RPC_ANONYMOUS_USERID ((uid_t)-2) 20#define RPC_MACHINE_CRED_USERID ((uid_t)0)
21#define RPC_ANONYMOUS_GROUPID ((gid_t)-2) 21#define RPC_MACHINE_CRED_GROUPID ((gid_t)0)
22 22
23struct generic_cred { 23struct generic_cred {
24 struct rpc_cred gc_base; 24 struct rpc_cred gc_base;
@@ -44,8 +44,8 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred);
44struct rpc_cred *rpc_lookup_machine_cred(void) 44struct rpc_cred *rpc_lookup_machine_cred(void)
45{ 45{
46 struct auth_cred acred = { 46 struct auth_cred acred = {
47 .uid = RPC_ANONYMOUS_USERID, 47 .uid = RPC_MACHINE_CRED_USERID,
48 .gid = RPC_ANONYMOUS_GROUPID, 48 .gid = RPC_MACHINE_CRED_GROUPID,
49 .machine_cred = 1, 49 .machine_cred = 1,
50 }; 50 };
51 51
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d8e8d79a8451..e46c825f4954 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -6,30 +6,9 @@
6 6
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/fcntl.h>
10#include <linux/net.h>
11#include <linux/in.h>
12#include <linux/inet.h>
13#include <linux/udp.h>
14#include <linux/tcp.h>
15#include <linux/unistd.h>
16#include <linux/slab.h>
17#include <linux/netdevice.h>
18#include <linux/skbuff.h>
19#include <linux/file.h>
20#include <linux/freezer.h> 9#include <linux/freezer.h>
21#include <linux/kthread.h> 10#include <linux/kthread.h>
22#include <net/sock.h> 11#include <net/sock.h>
23#include <net/checksum.h>
24#include <net/ip.h>
25#include <net/ipv6.h>
26#include <net/tcp_states.h>
27#include <linux/uaccess.h>
28#include <asm/ioctls.h>
29
30#include <linux/sunrpc/types.h>
31#include <linux/sunrpc/clnt.h>
32#include <linux/sunrpc/xdr.h>
33#include <linux/sunrpc/stats.h> 12#include <linux/sunrpc/stats.h>
34#include <linux/sunrpc/svc_xprt.h> 13#include <linux/sunrpc/svc_xprt.h>
35 14
@@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
296 if (!(xprt->xpt_flags & 275 if (!(xprt->xpt_flags &
297 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) 276 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
298 return; 277 return;
299 if (test_bit(XPT_DEAD, &xprt->xpt_flags))
300 return;
301 278
302 cpu = get_cpu(); 279 cpu = get_cpu();
303 pool = svc_pool_for_cpu(xprt->xpt_server, cpu); 280 pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 3f30ee6006ae..f24800f2c098 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -278,7 +278,7 @@ static int ip_map_show(struct seq_file *m,
278 dom = im->m_client->h.name; 278 dom = im->m_client->h.name;
279 279
280 if (ipv6_addr_v4mapped(&addr)) { 280 if (ipv6_addr_v4mapped(&addr)) {
281 seq_printf(m, "%s" NIPQUAD_FMT "%s\n", 281 seq_printf(m, "%s " NIPQUAD_FMT " %s\n",
282 im->m_class, 282 im->m_class,
283 ntohl(addr.s6_addr32[3]) >> 24 & 0xff, 283 ntohl(addr.s6_addr32[3]) >> 24 & 0xff,
284 ntohl(addr.s6_addr32[3]) >> 16 & 0xff, 284 ntohl(addr.s6_addr32[3]) >> 16 & 0xff,
@@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m,
286 ntohl(addr.s6_addr32[3]) >> 0 & 0xff, 286 ntohl(addr.s6_addr32[3]) >> 0 & 0xff,
287 dom); 287 dom);
288 } else { 288 } else {
289 seq_printf(m, "%s" NIP6_FMT "%s\n", 289 seq_printf(m, "%s " NIP6_FMT " %s\n",
290 im->m_class, NIP6(addr), dom); 290 im->m_class, NIP6(addr), dom);
291 } 291 }
292 return 0; 292 return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 88c0ca20bb1e..87101177825b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -69,6 +69,10 @@ atomic_t rdma_stat_rq_prod;
69atomic_t rdma_stat_sq_poll; 69atomic_t rdma_stat_sq_poll;
70atomic_t rdma_stat_sq_prod; 70atomic_t rdma_stat_sq_prod;
71 71
72/* Temporary NFS request map and context caches */
73struct kmem_cache *svc_rdma_map_cachep;
74struct kmem_cache *svc_rdma_ctxt_cachep;
75
72/* 76/*
73 * This function implements reading and resetting an atomic_t stat 77 * This function implements reading and resetting an atomic_t stat
74 * variable through read/write to a proc file. Any write to the file 78 * variable through read/write to a proc file. Any write to the file
@@ -236,11 +240,14 @@ static ctl_table svcrdma_root_table[] = {
236void svc_rdma_cleanup(void) 240void svc_rdma_cleanup(void)
237{ 241{
238 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); 242 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
243 flush_scheduled_work();
239 if (svcrdma_table_header) { 244 if (svcrdma_table_header) {
240 unregister_sysctl_table(svcrdma_table_header); 245 unregister_sysctl_table(svcrdma_table_header);
241 svcrdma_table_header = NULL; 246 svcrdma_table_header = NULL;
242 } 247 }
243 svc_unreg_xprt_class(&svc_rdma_class); 248 svc_unreg_xprt_class(&svc_rdma_class);
249 kmem_cache_destroy(svc_rdma_map_cachep);
250 kmem_cache_destroy(svc_rdma_ctxt_cachep);
244} 251}
245 252
246int svc_rdma_init(void) 253int svc_rdma_init(void)
@@ -255,9 +262,37 @@ int svc_rdma_init(void)
255 svcrdma_table_header = 262 svcrdma_table_header =
256 register_sysctl_table(svcrdma_root_table); 263 register_sysctl_table(svcrdma_root_table);
257 264
265 /* Create the temporary map cache */
266 svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
267 sizeof(struct svc_rdma_req_map),
268 0,
269 SLAB_HWCACHE_ALIGN,
270 NULL);
271 if (!svc_rdma_map_cachep) {
272 printk(KERN_INFO "Could not allocate map cache.\n");
273 goto err0;
274 }
275
276 /* Create the temporary context cache */
277 svc_rdma_ctxt_cachep =
278 kmem_cache_create("svc_rdma_ctxt_cache",
279 sizeof(struct svc_rdma_op_ctxt),
280 0,
281 SLAB_HWCACHE_ALIGN,
282 NULL);
283 if (!svc_rdma_ctxt_cachep) {
284 printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
285 goto err1;
286 }
287
258 /* Register RDMA with the SVC transport switch */ 288 /* Register RDMA with the SVC transport switch */
259 svc_reg_xprt_class(&svc_rdma_class); 289 svc_reg_xprt_class(&svc_rdma_class);
260 return 0; 290 return 0;
291 err1:
292 kmem_cache_destroy(svc_rdma_map_cachep);
293 err0:
294 unregister_sysctl_table(svcrdma_table_header);
295 return -ENOMEM;
261} 296}
262MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 297MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
263MODULE_DESCRIPTION("SVC RDMA Transport"); 298MODULE_DESCRIPTION("SVC RDMA Transport");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c22d6b6f2db4..b4b17f44cb29 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -112,11 +112,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
112 rqstp->rq_arg.tail[0].iov_len = 0; 112 rqstp->rq_arg.tail[0].iov_len = 0;
113} 113}
114 114
115struct chunk_sge {
116 int start; /* sge no for this chunk */
117 int count; /* sge count for this chunk */
118};
119
120/* Encode a read-chunk-list as an array of IB SGE 115/* Encode a read-chunk-list as an array of IB SGE
121 * 116 *
122 * Assumptions: 117 * Assumptions:
@@ -134,8 +129,8 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
134 struct svc_rqst *rqstp, 129 struct svc_rqst *rqstp,
135 struct svc_rdma_op_ctxt *head, 130 struct svc_rdma_op_ctxt *head,
136 struct rpcrdma_msg *rmsgp, 131 struct rpcrdma_msg *rmsgp,
137 struct ib_sge *sge, 132 struct svc_rdma_req_map *rpl_map,
138 struct chunk_sge *ch_sge_ary, 133 struct svc_rdma_req_map *chl_map,
139 int ch_count, 134 int ch_count,
140 int byte_count) 135 int byte_count)
141{ 136{
@@ -156,22 +151,18 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
156 head->arg.head[0] = rqstp->rq_arg.head[0]; 151 head->arg.head[0] = rqstp->rq_arg.head[0];
157 head->arg.tail[0] = rqstp->rq_arg.tail[0]; 152 head->arg.tail[0] = rqstp->rq_arg.tail[0];
158 head->arg.pages = &head->pages[head->count]; 153 head->arg.pages = &head->pages[head->count];
159 head->sge[0].length = head->count; /* save count of hdr pages */ 154 head->hdr_count = head->count; /* save count of hdr pages */
160 head->arg.page_base = 0; 155 head->arg.page_base = 0;
161 head->arg.page_len = ch_bytes; 156 head->arg.page_len = ch_bytes;
162 head->arg.len = rqstp->rq_arg.len + ch_bytes; 157 head->arg.len = rqstp->rq_arg.len + ch_bytes;
163 head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; 158 head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
164 head->count++; 159 head->count++;
165 ch_sge_ary[0].start = 0; 160 chl_map->ch[0].start = 0;
166 while (byte_count) { 161 while (byte_count) {
162 rpl_map->sge[sge_no].iov_base =
163 page_address(rqstp->rq_arg.pages[page_no]) + page_off;
167 sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); 164 sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
168 sge[sge_no].addr = 165 rpl_map->sge[sge_no].iov_len = sge_bytes;
169 ib_dma_map_page(xprt->sc_cm_id->device,
170 rqstp->rq_arg.pages[page_no],
171 page_off, sge_bytes,
172 DMA_FROM_DEVICE);
173 sge[sge_no].length = sge_bytes;
174 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
175 /* 166 /*
176 * Don't bump head->count here because the same page 167 * Don't bump head->count here because the same page
177 * may be used by multiple SGE. 168 * may be used by multiple SGE.
@@ -187,11 +178,11 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
187 * SGE, move to the next SGE 178 * SGE, move to the next SGE
188 */ 179 */
189 if (ch_bytes == 0) { 180 if (ch_bytes == 0) {
190 ch_sge_ary[ch_no].count = 181 chl_map->ch[ch_no].count =
191 sge_no - ch_sge_ary[ch_no].start; 182 sge_no - chl_map->ch[ch_no].start;
192 ch_no++; 183 ch_no++;
193 ch++; 184 ch++;
194 ch_sge_ary[ch_no].start = sge_no; 185 chl_map->ch[ch_no].start = sge_no;
195 ch_bytes = ch->rc_target.rs_length; 186 ch_bytes = ch->rc_target.rs_length;
196 /* If bytes remaining account for next chunk */ 187 /* If bytes remaining account for next chunk */
197 if (byte_count) { 188 if (byte_count) {
@@ -220,18 +211,25 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
220 return sge_no; 211 return sge_no;
221} 212}
222 213
223static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt, 214static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
224 struct ib_sge *sge, 215 struct svc_rdma_op_ctxt *ctxt,
216 struct kvec *vec,
225 u64 *sgl_offset, 217 u64 *sgl_offset,
226 int count) 218 int count)
227{ 219{
228 int i; 220 int i;
229 221
230 ctxt->count = count; 222 ctxt->count = count;
223 ctxt->direction = DMA_FROM_DEVICE;
231 for (i = 0; i < count; i++) { 224 for (i = 0; i < count; i++) {
232 ctxt->sge[i].addr = sge[i].addr; 225 atomic_inc(&xprt->sc_dma_used);
233 ctxt->sge[i].length = sge[i].length; 226 ctxt->sge[i].addr =
234 *sgl_offset = *sgl_offset + sge[i].length; 227 ib_dma_map_single(xprt->sc_cm_id->device,
228 vec[i].iov_base, vec[i].iov_len,
229 DMA_FROM_DEVICE);
230 ctxt->sge[i].length = vec[i].iov_len;
231 ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
232 *sgl_offset = *sgl_offset + vec[i].iov_len;
235 } 233 }
236} 234}
237 235
@@ -260,11 +258,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
260 * On our side, we need to read into a pagelist. The first page immediately 258 * On our side, we need to read into a pagelist. The first page immediately
261 * follows the RPC header. 259 * follows the RPC header.
262 * 260 *
263 * This function returns 1 to indicate success. The data is not yet in 261 * This function returns:
262 * 0 - No error and no read-list found.
263 *
264 * 1 - Successful read-list processing. The data is not yet in
264 * the pagelist and therefore the RPC request must be deferred. The 265 * the pagelist and therefore the RPC request must be deferred. The
265 * I/O completion will enqueue the transport again and 266 * I/O completion will enqueue the transport again and
266 * svc_rdma_recvfrom will complete the request. 267 * svc_rdma_recvfrom will complete the request.
267 * 268 *
269 * <0 - Error processing/posting read-list.
270 *
268 * NOTE: The ctxt must not be touched after the last WR has been posted 271 * NOTE: The ctxt must not be touched after the last WR has been posted
269 * because the I/O completion processing may occur on another 272 * because the I/O completion processing may occur on another
270 * processor and free / modify the context. Ne touche pas! 273 * processor and free / modify the context. Ne touche pas!
@@ -277,50 +280,38 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
277 struct ib_send_wr read_wr; 280 struct ib_send_wr read_wr;
278 int err = 0; 281 int err = 0;
279 int ch_no; 282 int ch_no;
280 struct ib_sge *sge;
281 int ch_count; 283 int ch_count;
282 int byte_count; 284 int byte_count;
283 int sge_count; 285 int sge_count;
284 u64 sgl_offset; 286 u64 sgl_offset;
285 struct rpcrdma_read_chunk *ch; 287 struct rpcrdma_read_chunk *ch;
286 struct svc_rdma_op_ctxt *ctxt = NULL; 288 struct svc_rdma_op_ctxt *ctxt = NULL;
287 struct svc_rdma_op_ctxt *head; 289 struct svc_rdma_req_map *rpl_map;
288 struct svc_rdma_op_ctxt *tmp_sge_ctxt; 290 struct svc_rdma_req_map *chl_map;
289 struct svc_rdma_op_ctxt *tmp_ch_ctxt;
290 struct chunk_sge *ch_sge_ary;
291 291
292 /* If no read list is present, return 0 */ 292 /* If no read list is present, return 0 */
293 ch = svc_rdma_get_read_chunk(rmsgp); 293 ch = svc_rdma_get_read_chunk(rmsgp);
294 if (!ch) 294 if (!ch)
295 return 0; 295 return 0;
296 296
297 /* Allocate temporary contexts to keep SGE */ 297 /* Allocate temporary reply and chunk maps */
298 BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge)); 298 rpl_map = svc_rdma_get_req_map();
299 tmp_sge_ctxt = svc_rdma_get_context(xprt); 299 chl_map = svc_rdma_get_req_map();
300 sge = tmp_sge_ctxt->sge;
301 tmp_ch_ctxt = svc_rdma_get_context(xprt);
302 ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
303 300
304 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); 301 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
302 if (ch_count > RPCSVC_MAXPAGES)
303 return -EINVAL;
305 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, 304 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
306 sge, ch_sge_ary, 305 rpl_map, chl_map,
307 ch_count, byte_count); 306 ch_count, byte_count);
308 head = svc_rdma_get_context(xprt);
309 sgl_offset = 0; 307 sgl_offset = 0;
310 ch_no = 0; 308 ch_no = 0;
311 309
312 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; 310 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
313 ch->rc_discrim != 0; ch++, ch_no++) { 311 ch->rc_discrim != 0; ch++, ch_no++) {
314next_sge: 312next_sge:
315 if (!ctxt) 313 ctxt = svc_rdma_get_context(xprt);
316 ctxt = head;
317 else {
318 ctxt->next = svc_rdma_get_context(xprt);
319 ctxt = ctxt->next;
320 }
321 ctxt->next = NULL;
322 ctxt->direction = DMA_FROM_DEVICE; 314 ctxt->direction = DMA_FROM_DEVICE;
323 clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
324 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 315 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
325 316
326 /* Prepare READ WR */ 317 /* Prepare READ WR */
@@ -333,50 +324,46 @@ next_sge:
333 read_wr.wr.rdma.remote_addr = 324 read_wr.wr.rdma.remote_addr =
334 get_unaligned(&(ch->rc_target.rs_offset)) + 325 get_unaligned(&(ch->rc_target.rs_offset)) +
335 sgl_offset; 326 sgl_offset;
336 read_wr.sg_list = &sge[ch_sge_ary[ch_no].start]; 327 read_wr.sg_list = ctxt->sge;
337 read_wr.num_sge = 328 read_wr.num_sge =
338 rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count); 329 rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
339 rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start], 330 rdma_set_ctxt_sge(xprt, ctxt,
331 &rpl_map->sge[chl_map->ch[ch_no].start],
340 &sgl_offset, 332 &sgl_offset,
341 read_wr.num_sge); 333 read_wr.num_sge);
342 if (((ch+1)->rc_discrim == 0) && 334 if (((ch+1)->rc_discrim == 0) &&
343 (read_wr.num_sge == ch_sge_ary[ch_no].count)) { 335 (read_wr.num_sge == chl_map->ch[ch_no].count)) {
344 /* 336 /*
345 * Mark the last RDMA_READ with a bit to 337 * Mark the last RDMA_READ with a bit to
346 * indicate all RPC data has been fetched from 338 * indicate all RPC data has been fetched from
347 * the client and the RPC needs to be enqueued. 339 * the client and the RPC needs to be enqueued.
348 */ 340 */
349 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 341 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
350 ctxt->next = hdr_ctxt; 342 ctxt->read_hdr = hdr_ctxt;
351 hdr_ctxt->next = head;
352 } 343 }
353 /* Post the read */ 344 /* Post the read */
354 err = svc_rdma_send(xprt, &read_wr); 345 err = svc_rdma_send(xprt, &read_wr);
355 if (err) { 346 if (err) {
356 printk(KERN_ERR "svcrdma: Error posting send = %d\n", 347 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
357 err); 348 err);
358 /* 349 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
359 * Break the circular list so free knows when 350 svc_rdma_put_context(ctxt, 0);
360 * to stop if the error happened to occur on
361 * the last read
362 */
363 ctxt->next = NULL;
364 goto out; 351 goto out;
365 } 352 }
366 atomic_inc(&rdma_stat_read); 353 atomic_inc(&rdma_stat_read);
367 354
368 if (read_wr.num_sge < ch_sge_ary[ch_no].count) { 355 if (read_wr.num_sge < chl_map->ch[ch_no].count) {
369 ch_sge_ary[ch_no].count -= read_wr.num_sge; 356 chl_map->ch[ch_no].count -= read_wr.num_sge;
370 ch_sge_ary[ch_no].start += read_wr.num_sge; 357 chl_map->ch[ch_no].start += read_wr.num_sge;
371 goto next_sge; 358 goto next_sge;
372 } 359 }
373 sgl_offset = 0; 360 sgl_offset = 0;
374 err = 0; 361 err = 1;
375 } 362 }
376 363
377 out: 364 out:
378 svc_rdma_put_context(tmp_sge_ctxt, 0); 365 svc_rdma_put_req_map(rpl_map);
379 svc_rdma_put_context(tmp_ch_ctxt, 0); 366 svc_rdma_put_req_map(chl_map);
380 367
381 /* Detach arg pages. svc_recv will replenish them */ 368 /* Detach arg pages. svc_recv will replenish them */
382 for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) 369 for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
@@ -389,25 +376,12 @@ next_sge:
389 while (rqstp->rq_resused) 376 while (rqstp->rq_resused)
390 rqstp->rq_respages[--rqstp->rq_resused] = NULL; 377 rqstp->rq_respages[--rqstp->rq_resused] = NULL;
391 378
392 if (err) { 379 return err;
393 printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
394 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
395 /* Free the linked list of read contexts */
396 while (head != NULL) {
397 ctxt = head->next;
398 svc_rdma_put_context(head, 1);
399 head = ctxt;
400 }
401 return 0;
402 }
403
404 return 1;
405} 380}
406 381
407static int rdma_read_complete(struct svc_rqst *rqstp, 382static int rdma_read_complete(struct svc_rqst *rqstp,
408 struct svc_rdma_op_ctxt *data) 383 struct svc_rdma_op_ctxt *head)
409{ 384{
410 struct svc_rdma_op_ctxt *head = data->next;
411 int page_no; 385 int page_no;
412 int ret; 386 int ret;
413 387
@@ -419,7 +393,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
419 rqstp->rq_pages[page_no] = head->pages[page_no]; 393 rqstp->rq_pages[page_no] = head->pages[page_no];
420 } 394 }
421 /* Point rq_arg.pages past header */ 395 /* Point rq_arg.pages past header */
422 rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length]; 396 rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
423 rqstp->rq_arg.page_len = head->arg.page_len; 397 rqstp->rq_arg.page_len = head->arg.page_len;
424 rqstp->rq_arg.page_base = head->arg.page_base; 398 rqstp->rq_arg.page_base = head->arg.page_base;
425 399
@@ -433,21 +407,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
433 rqstp->rq_arg.len = head->arg.len; 407 rqstp->rq_arg.len = head->arg.len;
434 rqstp->rq_arg.buflen = head->arg.buflen; 408 rqstp->rq_arg.buflen = head->arg.buflen;
435 409
410 /* Free the context */
411 svc_rdma_put_context(head, 0);
412
436 /* XXX: What should this be? */ 413 /* XXX: What should this be? */
437 rqstp->rq_prot = IPPROTO_MAX; 414 rqstp->rq_prot = IPPROTO_MAX;
438 415 svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
439 /*
440 * Free the contexts we used to build the RDMA_READ. We have
441 * to be careful here because the context list uses the same
442 * next pointer used to chain the contexts associated with the
443 * RDMA_READ
444 */
445 data->next = NULL; /* terminate circular list */
446 do {
447 data = head->next;
448 svc_rdma_put_context(head, 0);
449 head = data;
450 } while (head != NULL);
451 416
452 ret = rqstp->rq_arg.head[0].iov_len 417 ret = rqstp->rq_arg.head[0].iov_len
453 + rqstp->rq_arg.page_len 418 + rqstp->rq_arg.page_len
@@ -457,8 +422,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
457 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, 422 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
458 rqstp->rq_arg.head[0].iov_len); 423 rqstp->rq_arg.head[0].iov_len);
459 424
460 /* Indicate that we've consumed an RQ credit */
461 rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
462 svc_xprt_received(rqstp->rq_xprt); 425 svc_xprt_received(rqstp->rq_xprt);
463 return ret; 426 return ret;
464} 427}
@@ -480,13 +443,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
480 443
481 dprintk("svcrdma: rqstp=%p\n", rqstp); 444 dprintk("svcrdma: rqstp=%p\n", rqstp);
482 445
483 /*
484 * The rq_xprt_ctxt indicates if we've consumed an RQ credit
485 * or not. It is used in the rdma xpo_release_rqst function to
486 * determine whether or not to return an RQ WQE to the RQ.
487 */
488 rqstp->rq_xprt_ctxt = NULL;
489
490 spin_lock_bh(&rdma_xprt->sc_read_complete_lock); 446 spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
491 if (!list_empty(&rdma_xprt->sc_read_complete_q)) { 447 if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
492 ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, 448 ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
@@ -537,21 +493,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
537 /* If the request is invalid, reply with an error */ 493 /* If the request is invalid, reply with an error */
538 if (len < 0) { 494 if (len < 0) {
539 if (len == -ENOSYS) 495 if (len == -ENOSYS)
540 (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); 496 svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
541 goto close_out; 497 goto close_out;
542 } 498 }
543 499
544 /* Read read-list data. If we would need to wait, defer 500 /* Read read-list data. */
545 * it. Not that in this case, we don't return the RQ credit 501 ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
546 * until after the read completes. 502 if (ret > 0) {
547 */ 503 /* read-list posted, defer until data received from client. */
548 if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) {
549 svc_xprt_received(xprt); 504 svc_xprt_received(xprt);
550 return 0; 505 return 0;
551 } 506 }
552 507 if (ret < 0) {
553 /* Indicate we've consumed an RQ credit */ 508 /* Post of read-list failed, free context. */
554 rqstp->rq_xprt_ctxt = rqstp->rq_xprt; 509 svc_rdma_put_context(ctxt, 1);
510 return 0;
511 }
555 512
556 ret = rqstp->rq_arg.head[0].iov_len 513 ret = rqstp->rq_arg.head[0].iov_len
557 + rqstp->rq_arg.page_len 514 + rqstp->rq_arg.page_len
@@ -569,11 +526,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
569 return ret; 526 return ret;
570 527
571 close_out: 528 close_out:
572 if (ctxt) { 529 if (ctxt)
573 svc_rdma_put_context(ctxt, 1); 530 svc_rdma_put_context(ctxt, 1);
574 /* Indicate we've consumed an RQ credit */
575 rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
576 }
577 dprintk("svcrdma: transport %p is closing\n", xprt); 531 dprintk("svcrdma: transport %p is closing\n", xprt);
578 /* 532 /*
579 * Set the close bit and enqueue it. svc_recv will see the 533 * Set the close bit and enqueue it. svc_recv will see the
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 981f190c1b39..a19b22b452a3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -63,52 +63,44 @@
63 * SGE[2..sge_count-2] data from xdr->pages[] 63 * SGE[2..sge_count-2] data from xdr->pages[]
64 * SGE[sge_count-1] data from xdr->tail. 64 * SGE[sge_count-1] data from xdr->tail.
65 * 65 *
66 * The max SGE we need is the length of the XDR / pagesize + one for
67 * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
68 * reserves a page for both the request and the reply header, and this
69 * array is only concerned with the reply we are assured that we have
70 * on extra page for the RPCRMDA header.
66 */ 71 */
67static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, 72static void xdr_to_sge(struct svcxprt_rdma *xprt,
68 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
69 struct ib_sge *sge, 74 struct svc_rdma_req_map *vec)
70 int *sge_count)
71{ 75{
72 /* Max we need is the length of the XDR / pagesize + one for
73 * head + one for tail + one for RPCRDMA header
74 */
75 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; 76 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
76 int sge_no; 77 int sge_no;
77 u32 byte_count = xdr->len;
78 u32 sge_bytes; 78 u32 sge_bytes;
79 u32 page_bytes; 79 u32 page_bytes;
80 int page_off; 80 u32 page_off;
81 int page_no; 81 int page_no;
82 82
83 BUG_ON(xdr->len !=
84 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
85
83 /* Skip the first sge, this is for the RPCRDMA header */ 86 /* Skip the first sge, this is for the RPCRDMA header */
84 sge_no = 1; 87 sge_no = 1;
85 88
86 /* Head SGE */ 89 /* Head SGE */
87 sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, 90 vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
88 xdr->head[0].iov_base, 91 vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
89 xdr->head[0].iov_len,
90 DMA_TO_DEVICE);
91 sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len);
92 byte_count -= sge_bytes;
93 sge[sge_no].length = sge_bytes;
94 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
95 sge_no++; 92 sge_no++;
96 93
97 /* pages SGE */ 94 /* pages SGE */
98 page_no = 0; 95 page_no = 0;
99 page_bytes = xdr->page_len; 96 page_bytes = xdr->page_len;
100 page_off = xdr->page_base; 97 page_off = xdr->page_base;
101 while (byte_count && page_bytes) { 98 while (page_bytes) {
102 sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); 99 vec->sge[sge_no].iov_base =
103 sge[sge_no].addr = 100 page_address(xdr->pages[page_no]) + page_off;
104 ib_dma_map_page(xprt->sc_cm_id->device, 101 sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
105 xdr->pages[page_no], page_off,
106 sge_bytes, DMA_TO_DEVICE);
107 sge_bytes = min(sge_bytes, page_bytes);
108 byte_count -= sge_bytes;
109 page_bytes -= sge_bytes; 102 page_bytes -= sge_bytes;
110 sge[sge_no].length = sge_bytes; 103 vec->sge[sge_no].iov_len = sge_bytes;
111 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
112 104
113 sge_no++; 105 sge_no++;
114 page_no++; 106 page_no++;
@@ -116,36 +108,24 @@ static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt,
116 } 108 }
117 109
118 /* Tail SGE */ 110 /* Tail SGE */
119 if (byte_count && xdr->tail[0].iov_len) { 111 if (xdr->tail[0].iov_len) {
120 sge[sge_no].addr = 112 vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
121 ib_dma_map_single(xprt->sc_cm_id->device, 113 vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
122 xdr->tail[0].iov_base,
123 xdr->tail[0].iov_len,
124 DMA_TO_DEVICE);
125 sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len);
126 byte_count -= sge_bytes;
127 sge[sge_no].length = sge_bytes;
128 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
129 sge_no++; 114 sge_no++;
130 } 115 }
131 116
132 BUG_ON(sge_no > sge_max); 117 BUG_ON(sge_no > sge_max);
133 BUG_ON(byte_count != 0); 118 vec->count = sge_no;
134
135 *sge_count = sge_no;
136 return sge;
137} 119}
138 120
139
140/* Assumptions: 121/* Assumptions:
141 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 122 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
142 */ 123 */
143static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, 124static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
144 u32 rmr, u64 to, 125 u32 rmr, u64 to,
145 u32 xdr_off, int write_len, 126 u32 xdr_off, int write_len,
146 struct ib_sge *xdr_sge, int sge_count) 127 struct svc_rdma_req_map *vec)
147{ 128{
148 struct svc_rdma_op_ctxt *tmp_sge_ctxt;
149 struct ib_send_wr write_wr; 129 struct ib_send_wr write_wr;
150 struct ib_sge *sge; 130 struct ib_sge *sge;
151 int xdr_sge_no; 131 int xdr_sge_no;
@@ -154,25 +134,23 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
154 int sge_off; 134 int sge_off;
155 int bc; 135 int bc;
156 struct svc_rdma_op_ctxt *ctxt; 136 struct svc_rdma_op_ctxt *ctxt;
157 int ret = 0;
158 137
159 BUG_ON(sge_count > RPCSVC_MAXPAGES); 138 BUG_ON(vec->count > RPCSVC_MAXPAGES);
160 dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " 139 dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
161 "write_len=%d, xdr_sge=%p, sge_count=%d\n", 140 "write_len=%d, vec->sge=%p, vec->count=%lu\n",
162 rmr, (unsigned long long)to, xdr_off, 141 rmr, (unsigned long long)to, xdr_off,
163 write_len, xdr_sge, sge_count); 142 write_len, vec->sge, vec->count);
164 143
165 ctxt = svc_rdma_get_context(xprt); 144 ctxt = svc_rdma_get_context(xprt);
166 ctxt->count = 0; 145 ctxt->direction = DMA_TO_DEVICE;
167 tmp_sge_ctxt = svc_rdma_get_context(xprt); 146 sge = ctxt->sge;
168 sge = tmp_sge_ctxt->sge;
169 147
170 /* Find the SGE associated with xdr_off */ 148 /* Find the SGE associated with xdr_off */
171 for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; 149 for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
172 xdr_sge_no++) { 150 xdr_sge_no++) {
173 if (xdr_sge[xdr_sge_no].length > bc) 151 if (vec->sge[xdr_sge_no].iov_len > bc)
174 break; 152 break;
175 bc -= xdr_sge[xdr_sge_no].length; 153 bc -= vec->sge[xdr_sge_no].iov_len;
176 } 154 }
177 155
178 sge_off = bc; 156 sge_off = bc;
@@ -180,21 +158,28 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
180 sge_no = 0; 158 sge_no = 0;
181 159
182 /* Copy the remaining SGE */ 160 /* Copy the remaining SGE */
183 while (bc != 0 && xdr_sge_no < sge_count) { 161 while (bc != 0 && xdr_sge_no < vec->count) {
184 sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; 162 sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
185 sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey;
186 sge_bytes = min((size_t)bc, 163 sge_bytes = min((size_t)bc,
187 (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); 164 (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
188 sge[sge_no].length = sge_bytes; 165 sge[sge_no].length = sge_bytes;
189 166 atomic_inc(&xprt->sc_dma_used);
167 sge[sge_no].addr =
168 ib_dma_map_single(xprt->sc_cm_id->device,
169 (void *)
170 vec->sge[xdr_sge_no].iov_base + sge_off,
171 sge_bytes, DMA_TO_DEVICE);
172 if (dma_mapping_error(sge[sge_no].addr))
173 goto err;
190 sge_off = 0; 174 sge_off = 0;
191 sge_no++; 175 sge_no++;
176 ctxt->count++;
192 xdr_sge_no++; 177 xdr_sge_no++;
193 bc -= sge_bytes; 178 bc -= sge_bytes;
194 } 179 }
195 180
196 BUG_ON(bc != 0); 181 BUG_ON(bc != 0);
197 BUG_ON(xdr_sge_no > sge_count); 182 BUG_ON(xdr_sge_no > vec->count);
198 183
199 /* Prepare WRITE WR */ 184 /* Prepare WRITE WR */
200 memset(&write_wr, 0, sizeof write_wr); 185 memset(&write_wr, 0, sizeof write_wr);
@@ -209,21 +194,20 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
209 194
210 /* Post It */ 195 /* Post It */
211 atomic_inc(&rdma_stat_write); 196 atomic_inc(&rdma_stat_write);
212 if (svc_rdma_send(xprt, &write_wr)) { 197 if (svc_rdma_send(xprt, &write_wr))
213 svc_rdma_put_context(ctxt, 1); 198 goto err;
214 /* Fatal error, close transport */ 199 return 0;
215 ret = -EIO; 200 err:
216 } 201 svc_rdma_put_context(ctxt, 0);
217 svc_rdma_put_context(tmp_sge_ctxt, 0); 202 /* Fatal error, close transport */
218 return ret; 203 return -EIO;
219} 204}
220 205
221static int send_write_chunks(struct svcxprt_rdma *xprt, 206static int send_write_chunks(struct svcxprt_rdma *xprt,
222 struct rpcrdma_msg *rdma_argp, 207 struct rpcrdma_msg *rdma_argp,
223 struct rpcrdma_msg *rdma_resp, 208 struct rpcrdma_msg *rdma_resp,
224 struct svc_rqst *rqstp, 209 struct svc_rqst *rqstp,
225 struct ib_sge *sge, 210 struct svc_rdma_req_map *vec)
226 int sge_count)
227{ 211{
228 u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 212 u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
229 int write_len; 213 int write_len;
@@ -269,8 +253,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
269 rs_offset + chunk_off, 253 rs_offset + chunk_off,
270 xdr_off, 254 xdr_off,
271 this_write, 255 this_write,
272 sge, 256 vec);
273 sge_count);
274 if (ret) { 257 if (ret) {
275 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 258 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
276 ret); 259 ret);
@@ -292,8 +275,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
292 struct rpcrdma_msg *rdma_argp, 275 struct rpcrdma_msg *rdma_argp,
293 struct rpcrdma_msg *rdma_resp, 276 struct rpcrdma_msg *rdma_resp,
294 struct svc_rqst *rqstp, 277 struct svc_rqst *rqstp,
295 struct ib_sge *sge, 278 struct svc_rdma_req_map *vec)
296 int sge_count)
297{ 279{
298 u32 xfer_len = rqstp->rq_res.len; 280 u32 xfer_len = rqstp->rq_res.len;
299 int write_len; 281 int write_len;
@@ -341,8 +323,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
341 rs_offset + chunk_off, 323 rs_offset + chunk_off,
342 xdr_off, 324 xdr_off,
343 this_write, 325 this_write,
344 sge, 326 vec);
345 sge_count);
346 if (ret) { 327 if (ret) {
347 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 328 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
348 ret); 329 ret);
@@ -380,7 +361,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
380 struct page *page, 361 struct page *page,
381 struct rpcrdma_msg *rdma_resp, 362 struct rpcrdma_msg *rdma_resp,
382 struct svc_rdma_op_ctxt *ctxt, 363 struct svc_rdma_op_ctxt *ctxt,
383 int sge_count, 364 struct svc_rdma_req_map *vec,
384 int byte_count) 365 int byte_count)
385{ 366{
386 struct ib_send_wr send_wr; 367 struct ib_send_wr send_wr;
@@ -389,11 +370,23 @@ static int send_reply(struct svcxprt_rdma *rdma,
389 int page_no; 370 int page_no;
390 int ret; 371 int ret;
391 372
373 /* Post a recv buffer to handle another request. */
374 ret = svc_rdma_post_recv(rdma);
375 if (ret) {
376 printk(KERN_INFO
377 "svcrdma: could not post a receive buffer, err=%d."
378 "Closing transport %p.\n", ret, rdma);
379 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
380 svc_rdma_put_context(ctxt, 0);
381 return -ENOTCONN;
382 }
383
392 /* Prepare the context */ 384 /* Prepare the context */
393 ctxt->pages[0] = page; 385 ctxt->pages[0] = page;
394 ctxt->count = 1; 386 ctxt->count = 1;
395 387
396 /* Prepare the SGE for the RPCRDMA Header */ 388 /* Prepare the SGE for the RPCRDMA Header */
389 atomic_inc(&rdma->sc_dma_used);
397 ctxt->sge[0].addr = 390 ctxt->sge[0].addr =
398 ib_dma_map_page(rdma->sc_cm_id->device, 391 ib_dma_map_page(rdma->sc_cm_id->device,
399 page, 0, PAGE_SIZE, DMA_TO_DEVICE); 392 page, 0, PAGE_SIZE, DMA_TO_DEVICE);
@@ -402,10 +395,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
402 ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; 395 ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
403 396
404 /* Determine how many of our SGE are to be transmitted */ 397 /* Determine how many of our SGE are to be transmitted */
405 for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { 398 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
406 sge_bytes = min((size_t)ctxt->sge[sge_no].length, 399 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
407 (size_t)byte_count);
408 byte_count -= sge_bytes; 400 byte_count -= sge_bytes;
401 atomic_inc(&rdma->sc_dma_used);
402 ctxt->sge[sge_no].addr =
403 ib_dma_map_single(rdma->sc_cm_id->device,
404 vec->sge[sge_no].iov_base,
405 sge_bytes, DMA_TO_DEVICE);
406 ctxt->sge[sge_no].length = sge_bytes;
407 ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
409 } 408 }
410 BUG_ON(byte_count != 0); 409 BUG_ON(byte_count != 0);
411 410
@@ -417,8 +416,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
417 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; 416 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
418 ctxt->count++; 417 ctxt->count++;
419 rqstp->rq_respages[page_no] = NULL; 418 rqstp->rq_respages[page_no] = NULL;
419 /* If there are more pages than SGE, terminate SGE list */
420 if (page_no+1 >= sge_no)
421 ctxt->sge[page_no+1].length = 0;
420 } 422 }
421
422 BUG_ON(sge_no > rdma->sc_max_sge); 423 BUG_ON(sge_no > rdma->sc_max_sge);
423 memset(&send_wr, 0, sizeof send_wr); 424 memset(&send_wr, 0, sizeof send_wr);
424 ctxt->wr_op = IB_WR_SEND; 425 ctxt->wr_op = IB_WR_SEND;
@@ -462,20 +463,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
462 enum rpcrdma_proc reply_type; 463 enum rpcrdma_proc reply_type;
463 int ret; 464 int ret;
464 int inline_bytes; 465 int inline_bytes;
465 struct ib_sge *sge;
466 int sge_count = 0;
467 struct page *res_page; 466 struct page *res_page;
468 struct svc_rdma_op_ctxt *ctxt; 467 struct svc_rdma_op_ctxt *ctxt;
468 struct svc_rdma_req_map *vec;
469 469
470 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); 470 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
471 471
472 /* Get the RDMA request header. */ 472 /* Get the RDMA request header. */
473 rdma_argp = xdr_start(&rqstp->rq_arg); 473 rdma_argp = xdr_start(&rqstp->rq_arg);
474 474
475 /* Build an SGE for the XDR */ 475 /* Build an req vec for the XDR */
476 ctxt = svc_rdma_get_context(rdma); 476 ctxt = svc_rdma_get_context(rdma);
477 ctxt->direction = DMA_TO_DEVICE; 477 ctxt->direction = DMA_TO_DEVICE;
478 sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); 478 vec = svc_rdma_get_req_map();
479 xdr_to_sge(rdma, &rqstp->rq_res, vec);
479 480
480 inline_bytes = rqstp->rq_res.len; 481 inline_bytes = rqstp->rq_res.len;
481 482
@@ -492,7 +493,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
492 493
493 /* Send any write-chunk data and build resp write-list */ 494 /* Send any write-chunk data and build resp write-list */
494 ret = send_write_chunks(rdma, rdma_argp, rdma_resp, 495 ret = send_write_chunks(rdma, rdma_argp, rdma_resp,
495 rqstp, sge, sge_count); 496 rqstp, vec);
496 if (ret < 0) { 497 if (ret < 0) {
497 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", 498 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
498 ret); 499 ret);
@@ -502,7 +503,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
502 503
503 /* Send any reply-list data and update resp reply-list */ 504 /* Send any reply-list data and update resp reply-list */
504 ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, 505 ret = send_reply_chunks(rdma, rdma_argp, rdma_resp,
505 rqstp, sge, sge_count); 506 rqstp, vec);
506 if (ret < 0) { 507 if (ret < 0) {
507 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", 508 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
508 ret); 509 ret);
@@ -510,11 +511,13 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
510 } 511 }
511 inline_bytes -= ret; 512 inline_bytes -= ret;
512 513
513 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, 514 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
514 inline_bytes); 515 inline_bytes);
516 svc_rdma_put_req_map(vec);
515 dprintk("svcrdma: send_reply returns %d\n", ret); 517 dprintk("svcrdma: send_reply returns %d\n", ret);
516 return ret; 518 return ret;
517 error: 519 error:
520 svc_rdma_put_req_map(vec);
518 svc_rdma_put_context(ctxt, 0); 521 svc_rdma_put_context(ctxt, 0);
519 put_page(res_page); 522 put_page(res_page);
520 return ret; 523 return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index af408fc12634..19ddc382b777 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -84,67 +84,37 @@ struct svc_xprt_class svc_rdma_class = {
84 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, 84 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
85}; 85};
86 86
87static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) 87/* WR context cache. Created in svc_rdma.c */
88extern struct kmem_cache *svc_rdma_ctxt_cachep;
89
90struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
88{ 91{
89 int target;
90 int at_least_one = 0;
91 struct svc_rdma_op_ctxt *ctxt; 92 struct svc_rdma_op_ctxt *ctxt;
92 93
93 target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump, 94 while (1) {
94 xprt->sc_ctxt_max); 95 ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
95 96 if (ctxt)
96 spin_lock_bh(&xprt->sc_ctxt_lock);
97 while (xprt->sc_ctxt_cnt < target) {
98 xprt->sc_ctxt_cnt++;
99 spin_unlock_bh(&xprt->sc_ctxt_lock);
100
101 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
102
103 spin_lock_bh(&xprt->sc_ctxt_lock);
104 if (ctxt) {
105 at_least_one = 1;
106 ctxt->next = xprt->sc_ctxt_head;
107 xprt->sc_ctxt_head = ctxt;
108 } else {
109 /* kmalloc failed...give up for now */
110 xprt->sc_ctxt_cnt--;
111 break; 97 break;
112 } 98 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
113 } 99 }
114 spin_unlock_bh(&xprt->sc_ctxt_lock); 100 ctxt->xprt = xprt;
115 dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n", 101 INIT_LIST_HEAD(&ctxt->dto_q);
116 xprt->sc_ctxt_max, xprt->sc_ctxt_cnt); 102 ctxt->count = 0;
117 return at_least_one; 103 atomic_inc(&xprt->sc_ctxt_used);
104 return ctxt;
118} 105}
119 106
120struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 107static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
121{ 108{
122 struct svc_rdma_op_ctxt *ctxt; 109 struct svcxprt_rdma *xprt = ctxt->xprt;
123 110 int i;
124 while (1) { 111 for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
125 spin_lock_bh(&xprt->sc_ctxt_lock); 112 atomic_dec(&xprt->sc_dma_used);
126 if (unlikely(xprt->sc_ctxt_head == NULL)) { 113 ib_dma_unmap_single(xprt->sc_cm_id->device,
127 /* Try to bump my cache. */ 114 ctxt->sge[i].addr,
128 spin_unlock_bh(&xprt->sc_ctxt_lock); 115 ctxt->sge[i].length,
129 116 ctxt->direction);
130 if (rdma_bump_context_cache(xprt))
131 continue;
132
133 printk(KERN_INFO "svcrdma: sleeping waiting for "
134 "context memory on xprt=%p\n",
135 xprt);
136 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
137 continue;
138 }
139 ctxt = xprt->sc_ctxt_head;
140 xprt->sc_ctxt_head = ctxt->next;
141 spin_unlock_bh(&xprt->sc_ctxt_lock);
142 ctxt->xprt = xprt;
143 INIT_LIST_HEAD(&ctxt->dto_q);
144 ctxt->count = 0;
145 break;
146 } 117 }
147 return ctxt;
148} 118}
149 119
150void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) 120void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -158,15 +128,34 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
158 for (i = 0; i < ctxt->count; i++) 128 for (i = 0; i < ctxt->count; i++)
159 put_page(ctxt->pages[i]); 129 put_page(ctxt->pages[i]);
160 130
161 for (i = 0; i < ctxt->count; i++) 131 kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
162 dma_unmap_single(xprt->sc_cm_id->device->dma_device, 132 atomic_dec(&xprt->sc_ctxt_used);
163 ctxt->sge[i].addr, 133}
164 ctxt->sge[i].length, 134
165 ctxt->direction); 135/* Temporary NFS request map cache. Created in svc_rdma.c */
166 spin_lock_bh(&xprt->sc_ctxt_lock); 136extern struct kmem_cache *svc_rdma_map_cachep;
167 ctxt->next = xprt->sc_ctxt_head; 137
168 xprt->sc_ctxt_head = ctxt; 138/*
169 spin_unlock_bh(&xprt->sc_ctxt_lock); 139 * Temporary NFS req mappings are shared across all transport
140 * instances. These are short lived and should be bounded by the number
141 * of concurrent server threads * depth of the SQ.
142 */
143struct svc_rdma_req_map *svc_rdma_get_req_map(void)
144{
145 struct svc_rdma_req_map *map;
146 while (1) {
147 map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
148 if (map)
149 break;
150 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
151 }
152 map->count = 0;
153 return map;
154}
155
156void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
157{
158 kmem_cache_free(svc_rdma_map_cachep, map);
170} 159}
171 160
172/* ib_cq event handler */ 161/* ib_cq event handler */
@@ -228,23 +217,8 @@ static void dto_tasklet_func(unsigned long data)
228 list_del_init(&xprt->sc_dto_q); 217 list_del_init(&xprt->sc_dto_q);
229 spin_unlock_irqrestore(&dto_lock, flags); 218 spin_unlock_irqrestore(&dto_lock, flags);
230 219
231 if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { 220 rq_cq_reap(xprt);
232 ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); 221 sq_cq_reap(xprt);
233 rq_cq_reap(xprt);
234 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
235 /*
236 * If data arrived before established event,
237 * don't enqueue. This defers RPC I/O until the
238 * RDMA connection is complete.
239 */
240 if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
241 svc_xprt_enqueue(&xprt->sc_xprt);
242 }
243
244 if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) {
245 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
246 sq_cq_reap(xprt);
247 }
248 222
249 svc_xprt_put(&xprt->sc_xprt); 223 svc_xprt_put(&xprt->sc_xprt);
250 spin_lock_irqsave(&dto_lock, flags); 224 spin_lock_irqsave(&dto_lock, flags);
@@ -263,11 +237,15 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
263 struct svcxprt_rdma *xprt = cq_context; 237 struct svcxprt_rdma *xprt = cq_context;
264 unsigned long flags; 238 unsigned long flags;
265 239
240 /* Guard against unconditional flush call for destroyed QP */
241 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
242 return;
243
266 /* 244 /*
267 * Set the bit regardless of whether or not it's on the list 245 * Set the bit regardless of whether or not it's on the list
268 * because it may be on the list already due to an SQ 246 * because it may be on the list already due to an SQ
269 * completion. 247 * completion.
270 */ 248 */
271 set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); 249 set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags);
272 250
273 /* 251 /*
@@ -290,6 +268,8 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
290 * 268 *
291 * Take all completing WC off the CQE and enqueue the associated DTO 269 * Take all completing WC off the CQE and enqueue the associated DTO
292 * context on the dto_q for the transport. 270 * context on the dto_q for the transport.
271 *
272 * Note that caller must hold a transport reference.
293 */ 273 */
294static void rq_cq_reap(struct svcxprt_rdma *xprt) 274static void rq_cq_reap(struct svcxprt_rdma *xprt)
295{ 275{
@@ -297,29 +277,48 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
297 struct ib_wc wc; 277 struct ib_wc wc;
298 struct svc_rdma_op_ctxt *ctxt = NULL; 278 struct svc_rdma_op_ctxt *ctxt = NULL;
299 279
280 if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags))
281 return;
282
283 ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
300 atomic_inc(&rdma_stat_rq_poll); 284 atomic_inc(&rdma_stat_rq_poll);
301 285
302 spin_lock_bh(&xprt->sc_rq_dto_lock);
303 while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { 286 while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) {
304 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 287 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
305 ctxt->wc_status = wc.status; 288 ctxt->wc_status = wc.status;
306 ctxt->byte_len = wc.byte_len; 289 ctxt->byte_len = wc.byte_len;
290 svc_rdma_unmap_dma(ctxt);
307 if (wc.status != IB_WC_SUCCESS) { 291 if (wc.status != IB_WC_SUCCESS) {
308 /* Close the transport */ 292 /* Close the transport */
293 dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
309 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 294 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
310 svc_rdma_put_context(ctxt, 1); 295 svc_rdma_put_context(ctxt, 1);
296 svc_xprt_put(&xprt->sc_xprt);
311 continue; 297 continue;
312 } 298 }
299 spin_lock_bh(&xprt->sc_rq_dto_lock);
313 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); 300 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
301 spin_unlock_bh(&xprt->sc_rq_dto_lock);
302 svc_xprt_put(&xprt->sc_xprt);
314 } 303 }
315 spin_unlock_bh(&xprt->sc_rq_dto_lock);
316 304
317 if (ctxt) 305 if (ctxt)
318 atomic_inc(&rdma_stat_rq_prod); 306 atomic_inc(&rdma_stat_rq_prod);
307
308 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
309 /*
310 * If data arrived before established event,
311 * don't enqueue. This defers RPC I/O until the
312 * RDMA connection is complete.
313 */
314 if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
315 svc_xprt_enqueue(&xprt->sc_xprt);
319} 316}
320 317
321/* 318/*
322 * Send Queue Completion Handler - potentially called on interrupt context. 319 * Send Queue Completion Handler - potentially called on interrupt context.
320 *
321 * Note that caller must hold a transport reference.
323 */ 322 */
324static void sq_cq_reap(struct svcxprt_rdma *xprt) 323static void sq_cq_reap(struct svcxprt_rdma *xprt)
325{ 324{
@@ -328,11 +327,17 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
328 struct ib_cq *cq = xprt->sc_sq_cq; 327 struct ib_cq *cq = xprt->sc_sq_cq;
329 int ret; 328 int ret;
330 329
330
331 if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
332 return;
333
334 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
331 atomic_inc(&rdma_stat_sq_poll); 335 atomic_inc(&rdma_stat_sq_poll);
332 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { 336 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
333 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 337 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
334 xprt = ctxt->xprt; 338 xprt = ctxt->xprt;
335 339
340 svc_rdma_unmap_dma(ctxt);
336 if (wc.status != IB_WC_SUCCESS) 341 if (wc.status != IB_WC_SUCCESS)
337 /* Close the transport */ 342 /* Close the transport */
338 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 343 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -343,20 +348,25 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
343 348
344 switch (ctxt->wr_op) { 349 switch (ctxt->wr_op) {
345 case IB_WR_SEND: 350 case IB_WR_SEND:
346 case IB_WR_RDMA_WRITE:
347 svc_rdma_put_context(ctxt, 1); 351 svc_rdma_put_context(ctxt, 1);
348 break; 352 break;
349 353
354 case IB_WR_RDMA_WRITE:
355 svc_rdma_put_context(ctxt, 0);
356 break;
357
350 case IB_WR_RDMA_READ: 358 case IB_WR_RDMA_READ:
351 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { 359 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
360 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
361 BUG_ON(!read_hdr);
352 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 362 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
353 set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
354 spin_lock_bh(&xprt->sc_read_complete_lock); 363 spin_lock_bh(&xprt->sc_read_complete_lock);
355 list_add_tail(&ctxt->dto_q, 364 list_add_tail(&read_hdr->dto_q,
356 &xprt->sc_read_complete_q); 365 &xprt->sc_read_complete_q);
357 spin_unlock_bh(&xprt->sc_read_complete_lock); 366 spin_unlock_bh(&xprt->sc_read_complete_lock);
358 svc_xprt_enqueue(&xprt->sc_xprt); 367 svc_xprt_enqueue(&xprt->sc_xprt);
359 } 368 }
369 svc_rdma_put_context(ctxt, 0);
360 break; 370 break;
361 371
362 default: 372 default:
@@ -365,6 +375,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
365 wc.opcode, wc.status); 375 wc.opcode, wc.status);
366 break; 376 break;
367 } 377 }
378 svc_xprt_put(&xprt->sc_xprt);
368 } 379 }
369 380
370 if (ctxt) 381 if (ctxt)
@@ -376,11 +387,15 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
376 struct svcxprt_rdma *xprt = cq_context; 387 struct svcxprt_rdma *xprt = cq_context;
377 unsigned long flags; 388 unsigned long flags;
378 389
390 /* Guard against unconditional flush call for destroyed QP */
391 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
392 return;
393
379 /* 394 /*
380 * Set the bit regardless of whether or not it's on the list 395 * Set the bit regardless of whether or not it's on the list
381 * because it may be on the list already due to an RQ 396 * because it may be on the list already due to an RQ
382 * completion. 397 * completion.
383 */ 398 */
384 set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags); 399 set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
385 400
386 /* 401 /*
@@ -398,39 +413,6 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
398 tasklet_schedule(&dto_tasklet); 413 tasklet_schedule(&dto_tasklet);
399} 414}
400 415
401static void create_context_cache(struct svcxprt_rdma *xprt,
402 int ctxt_count, int ctxt_bump, int ctxt_max)
403{
404 struct svc_rdma_op_ctxt *ctxt;
405 int i;
406
407 xprt->sc_ctxt_max = ctxt_max;
408 xprt->sc_ctxt_bump = ctxt_bump;
409 xprt->sc_ctxt_cnt = 0;
410 xprt->sc_ctxt_head = NULL;
411 for (i = 0; i < ctxt_count; i++) {
412 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
413 if (ctxt) {
414 ctxt->next = xprt->sc_ctxt_head;
415 xprt->sc_ctxt_head = ctxt;
416 xprt->sc_ctxt_cnt++;
417 }
418 }
419}
420
421static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt)
422{
423 struct svc_rdma_op_ctxt *next;
424 if (!ctxt)
425 return;
426
427 do {
428 next = ctxt->next;
429 kfree(ctxt);
430 ctxt = next;
431 } while (next);
432}
433
434static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, 416static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
435 int listener) 417 int listener)
436{ 418{
@@ -447,7 +429,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
447 429
448 spin_lock_init(&cma_xprt->sc_lock); 430 spin_lock_init(&cma_xprt->sc_lock);
449 spin_lock_init(&cma_xprt->sc_read_complete_lock); 431 spin_lock_init(&cma_xprt->sc_read_complete_lock);
450 spin_lock_init(&cma_xprt->sc_ctxt_lock);
451 spin_lock_init(&cma_xprt->sc_rq_dto_lock); 432 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
452 433
453 cma_xprt->sc_ord = svcrdma_ord; 434 cma_xprt->sc_ord = svcrdma_ord;
@@ -456,21 +437,9 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
456 cma_xprt->sc_max_requests = svcrdma_max_requests; 437 cma_xprt->sc_max_requests = svcrdma_max_requests;
457 cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT; 438 cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
458 atomic_set(&cma_xprt->sc_sq_count, 0); 439 atomic_set(&cma_xprt->sc_sq_count, 0);
440 atomic_set(&cma_xprt->sc_ctxt_used, 0);
459 441
460 if (!listener) { 442 if (listener)
461 int reqs = cma_xprt->sc_max_requests;
462 create_context_cache(cma_xprt,
463 reqs << 1, /* starting size */
464 reqs, /* bump amount */
465 reqs +
466 cma_xprt->sc_sq_depth +
467 RPCRDMA_MAX_THREADS + 1); /* max */
468 if (!cma_xprt->sc_ctxt_head) {
469 kfree(cma_xprt);
470 return NULL;
471 }
472 clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
473 } else
474 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); 443 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
475 444
476 return cma_xprt; 445 return cma_xprt;
@@ -506,6 +475,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
506 BUG_ON(sge_no >= xprt->sc_max_sge); 475 BUG_ON(sge_no >= xprt->sc_max_sge);
507 page = svc_rdma_get_page(); 476 page = svc_rdma_get_page();
508 ctxt->pages[sge_no] = page; 477 ctxt->pages[sge_no] = page;
478 atomic_inc(&xprt->sc_dma_used);
509 pa = ib_dma_map_page(xprt->sc_cm_id->device, 479 pa = ib_dma_map_page(xprt->sc_cm_id->device,
510 page, 0, PAGE_SIZE, 480 page, 0, PAGE_SIZE,
511 DMA_FROM_DEVICE); 481 DMA_FROM_DEVICE);
@@ -520,7 +490,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
520 recv_wr.num_sge = ctxt->count; 490 recv_wr.num_sge = ctxt->count;
521 recv_wr.wr_id = (u64)(unsigned long)ctxt; 491 recv_wr.wr_id = (u64)(unsigned long)ctxt;
522 492
493 svc_xprt_get(&xprt->sc_xprt);
523 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); 494 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
495 if (ret) {
496 svc_xprt_put(&xprt->sc_xprt);
497 svc_rdma_put_context(ctxt, 1);
498 }
524 return ret; 499 return ret;
525} 500}
526 501
@@ -535,10 +510,11 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
535 * will call the recvfrom method on the listen xprt which will accept the new 510 * will call the recvfrom method on the listen xprt which will accept the new
536 * connection. 511 * connection.
537 */ 512 */
538static void handle_connect_req(struct rdma_cm_id *new_cma_id) 513static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
539{ 514{
540 struct svcxprt_rdma *listen_xprt = new_cma_id->context; 515 struct svcxprt_rdma *listen_xprt = new_cma_id->context;
541 struct svcxprt_rdma *newxprt; 516 struct svcxprt_rdma *newxprt;
517 struct sockaddr *sa;
542 518
543 /* Create a new transport */ 519 /* Create a new transport */
544 newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); 520 newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0);
@@ -551,6 +527,15 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
551 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", 527 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
552 newxprt, newxprt->sc_cm_id, listen_xprt); 528 newxprt, newxprt->sc_cm_id, listen_xprt);
553 529
530 /* Save client advertised inbound read limit for use later in accept. */
531 newxprt->sc_ord = client_ird;
532
533 /* Set the local and remote addresses in the transport */
534 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
535 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
536 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
537 svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
538
554 /* 539 /*
555 * Enqueue the new transport on the accept queue of the listening 540 * Enqueue the new transport on the accept queue of the listening
556 * transport 541 * transport
@@ -581,7 +566,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
581 case RDMA_CM_EVENT_CONNECT_REQUEST: 566 case RDMA_CM_EVENT_CONNECT_REQUEST:
582 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " 567 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
583 "event=%d\n", cma_id, cma_id->context, event->event); 568 "event=%d\n", cma_id, cma_id->context, event->event);
584 handle_connect_req(cma_id); 569 handle_connect_req(cma_id,
570 event->param.conn.responder_resources);
585 break; 571 break;
586 572
587 case RDMA_CM_EVENT_ESTABLISHED: 573 case RDMA_CM_EVENT_ESTABLISHED:
@@ -627,6 +613,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
627 if (xprt) { 613 if (xprt) {
628 set_bit(XPT_CLOSE, &xprt->xpt_flags); 614 set_bit(XPT_CLOSE, &xprt->xpt_flags);
629 svc_xprt_enqueue(xprt); 615 svc_xprt_enqueue(xprt);
616 svc_xprt_put(xprt);
630 } 617 }
631 break; 618 break;
632 case RDMA_CM_EVENT_DEVICE_REMOVAL: 619 case RDMA_CM_EVENT_DEVICE_REMOVAL:
@@ -661,31 +648,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
661 648
662 cma_xprt = rdma_create_xprt(serv, 1); 649 cma_xprt = rdma_create_xprt(serv, 1);
663 if (!cma_xprt) 650 if (!cma_xprt)
664 return ERR_PTR(ENOMEM); 651 return ERR_PTR(-ENOMEM);
665 xprt = &cma_xprt->sc_xprt; 652 xprt = &cma_xprt->sc_xprt;
666 653
667 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); 654 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
668 if (IS_ERR(listen_id)) { 655 if (IS_ERR(listen_id)) {
669 svc_xprt_put(&cma_xprt->sc_xprt); 656 ret = PTR_ERR(listen_id);
670 dprintk("svcrdma: rdma_create_id failed = %ld\n", 657 dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
671 PTR_ERR(listen_id)); 658 goto err0;
672 return (void *)listen_id;
673 } 659 }
660
674 ret = rdma_bind_addr(listen_id, sa); 661 ret = rdma_bind_addr(listen_id, sa);
675 if (ret) { 662 if (ret) {
676 rdma_destroy_id(listen_id);
677 svc_xprt_put(&cma_xprt->sc_xprt);
678 dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); 663 dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
679 return ERR_PTR(ret); 664 goto err1;
680 } 665 }
681 cma_xprt->sc_cm_id = listen_id; 666 cma_xprt->sc_cm_id = listen_id;
682 667
683 ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); 668 ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
684 if (ret) { 669 if (ret) {
685 rdma_destroy_id(listen_id);
686 svc_xprt_put(&cma_xprt->sc_xprt);
687 dprintk("svcrdma: rdma_listen failed = %d\n", ret); 670 dprintk("svcrdma: rdma_listen failed = %d\n", ret);
688 return ERR_PTR(ret); 671 goto err1;
689 } 672 }
690 673
691 /* 674 /*
@@ -696,6 +679,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
696 svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); 679 svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
697 680
698 return &cma_xprt->sc_xprt; 681 return &cma_xprt->sc_xprt;
682
683 err1:
684 rdma_destroy_id(listen_id);
685 err0:
686 kfree(cma_xprt);
687 return ERR_PTR(ret);
699} 688}
700 689
701/* 690/*
@@ -716,7 +705,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
716 struct rdma_conn_param conn_param; 705 struct rdma_conn_param conn_param;
717 struct ib_qp_init_attr qp_attr; 706 struct ib_qp_init_attr qp_attr;
718 struct ib_device_attr devattr; 707 struct ib_device_attr devattr;
719 struct sockaddr *sa;
720 int ret; 708 int ret;
721 int i; 709 int i;
722 710
@@ -753,8 +741,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
753 (size_t)svcrdma_max_requests); 741 (size_t)svcrdma_max_requests);
754 newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; 742 newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
755 743
756 newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom, 744 /*
757 (size_t)svcrdma_ord); 745 * Limit ORD based on client limit, local device limit, and
746 * configured svcrdma limit.
747 */
748 newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
749 newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
758 750
759 newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device); 751 newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
760 if (IS_ERR(newxprt->sc_pd)) { 752 if (IS_ERR(newxprt->sc_pd)) {
@@ -826,7 +818,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
826 newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; 818 newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
827 newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; 819 newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
828 } 820 }
829 svc_xprt_get(&newxprt->sc_xprt);
830 newxprt->sc_qp = newxprt->sc_cm_id->qp; 821 newxprt->sc_qp = newxprt->sc_cm_id->qp;
831 822
832 /* Register all of physical memory */ 823 /* Register all of physical memory */
@@ -850,6 +841,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
850 /* Swap out the handler */ 841 /* Swap out the handler */
851 newxprt->sc_cm_id->event_handler = rdma_cma_handler; 842 newxprt->sc_cm_id->event_handler = rdma_cma_handler;
852 843
844 /*
845 * Arm the CQs for the SQ and RQ before accepting so we can't
846 * miss the first message
847 */
848 ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
849 ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
850
853 /* Accept Connection */ 851 /* Accept Connection */
854 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); 852 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
855 memset(&conn_param, 0, sizeof conn_param); 853 memset(&conn_param, 0, sizeof conn_param);
@@ -886,58 +884,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
886 newxprt->sc_max_requests, 884 newxprt->sc_max_requests,
887 newxprt->sc_ord); 885 newxprt->sc_ord);
888 886
889 /* Set the local and remote addresses in the transport */
890 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
891 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
892 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
893 svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
894
895 ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
896 ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
897 return &newxprt->sc_xprt; 887 return &newxprt->sc_xprt;
898 888
899 errout: 889 errout:
900 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); 890 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
901 /* Take a reference in case the DTO handler runs */ 891 /* Take a reference in case the DTO handler runs */
902 svc_xprt_get(&newxprt->sc_xprt); 892 svc_xprt_get(&newxprt->sc_xprt);
903 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { 893 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
904 ib_destroy_qp(newxprt->sc_qp); 894 ib_destroy_qp(newxprt->sc_qp);
905 svc_xprt_put(&newxprt->sc_xprt);
906 }
907 rdma_destroy_id(newxprt->sc_cm_id); 895 rdma_destroy_id(newxprt->sc_cm_id);
908 /* This call to put will destroy the transport */ 896 /* This call to put will destroy the transport */
909 svc_xprt_put(&newxprt->sc_xprt); 897 svc_xprt_put(&newxprt->sc_xprt);
910 return NULL; 898 return NULL;
911} 899}
912 900
913/*
914 * Post an RQ WQE to the RQ when the rqst is being released. This
915 * effectively returns an RQ credit to the client. The rq_xprt_ctxt
916 * will be null if the request is deferred due to an RDMA_READ or the
917 * transport had no data ready (EAGAIN). Note that an RPC deferred in
918 * svc_process will still return the credit, this is because the data
919 * is copied and no longer consume a WQE/WC.
920 */
921static void svc_rdma_release_rqst(struct svc_rqst *rqstp) 901static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
922{ 902{
923 int err;
924 struct svcxprt_rdma *rdma =
925 container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
926 if (rqstp->rq_xprt_ctxt) {
927 BUG_ON(rqstp->rq_xprt_ctxt != rdma);
928 err = svc_rdma_post_recv(rdma);
929 if (err)
930 dprintk("svcrdma: failed to post an RQ WQE error=%d\n",
931 err);
932 }
933 rqstp->rq_xprt_ctxt = NULL;
934} 903}
935 904
936/* 905/*
937 * When connected, an svc_xprt has at least three references: 906 * When connected, an svc_xprt has at least two references:
938 *
939 * - A reference held by the QP. We still hold that here because this
940 * code deletes the QP and puts the reference.
941 * 907 *
942 * - A reference held by the cm_id between the ESTABLISHED and 908 * - A reference held by the cm_id between the ESTABLISHED and
943 * DISCONNECTED events. If the remote peer disconnected first, this 909 * DISCONNECTED events. If the remote peer disconnected first, this
@@ -946,7 +912,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
946 * - A reference held by the svc_recv code that called this function 912 * - A reference held by the svc_recv code that called this function
947 * as part of close processing. 913 * as part of close processing.
948 * 914 *
949 * At a minimum two references should still be held. 915 * At a minimum one references should still be held.
950 */ 916 */
951static void svc_rdma_detach(struct svc_xprt *xprt) 917static void svc_rdma_detach(struct svc_xprt *xprt)
952{ 918{
@@ -956,23 +922,50 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
956 922
957 /* Disconnect and flush posted WQE */ 923 /* Disconnect and flush posted WQE */
958 rdma_disconnect(rdma->sc_cm_id); 924 rdma_disconnect(rdma->sc_cm_id);
959
960 /* Destroy the QP if present (not a listener) */
961 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) {
962 ib_destroy_qp(rdma->sc_qp);
963 svc_xprt_put(xprt);
964 }
965
966 /* Destroy the CM ID */
967 rdma_destroy_id(rdma->sc_cm_id);
968} 925}
969 926
970static void svc_rdma_free(struct svc_xprt *xprt) 927static void __svc_rdma_free(struct work_struct *work)
971{ 928{
972 struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; 929 struct svcxprt_rdma *rdma =
930 container_of(work, struct svcxprt_rdma, sc_work);
973 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); 931 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
932
974 /* We should only be called from kref_put */ 933 /* We should only be called from kref_put */
975 BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); 934 BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0);
935
936 /*
937 * Destroy queued, but not processed read completions. Note
938 * that this cleanup has to be done before destroying the
939 * cm_id because the device ptr is needed to unmap the dma in
940 * svc_rdma_put_context.
941 */
942 while (!list_empty(&rdma->sc_read_complete_q)) {
943 struct svc_rdma_op_ctxt *ctxt;
944 ctxt = list_entry(rdma->sc_read_complete_q.next,
945 struct svc_rdma_op_ctxt,
946 dto_q);
947 list_del_init(&ctxt->dto_q);
948 svc_rdma_put_context(ctxt, 1);
949 }
950
951 /* Destroy queued, but not processed recv completions */
952 while (!list_empty(&rdma->sc_rq_dto_q)) {
953 struct svc_rdma_op_ctxt *ctxt;
954 ctxt = list_entry(rdma->sc_rq_dto_q.next,
955 struct svc_rdma_op_ctxt,
956 dto_q);
957 list_del_init(&ctxt->dto_q);
958 svc_rdma_put_context(ctxt, 1);
959 }
960
961 /* Warn if we leaked a resource or under-referenced */
962 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
963 WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
964
965 /* Destroy the QP if present (not a listener) */
966 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
967 ib_destroy_qp(rdma->sc_qp);
968
976 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) 969 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
977 ib_destroy_cq(rdma->sc_sq_cq); 970 ib_destroy_cq(rdma->sc_sq_cq);
978 971
@@ -985,10 +978,20 @@ static void svc_rdma_free(struct svc_xprt *xprt)
985 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) 978 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
986 ib_dealloc_pd(rdma->sc_pd); 979 ib_dealloc_pd(rdma->sc_pd);
987 980
988 destroy_context_cache(rdma->sc_ctxt_head); 981 /* Destroy the CM ID */
982 rdma_destroy_id(rdma->sc_cm_id);
983
989 kfree(rdma); 984 kfree(rdma);
990} 985}
991 986
987static void svc_rdma_free(struct svc_xprt *xprt)
988{
989 struct svcxprt_rdma *rdma =
990 container_of(xprt, struct svcxprt_rdma, sc_xprt);
991 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
992 schedule_work(&rdma->sc_work);
993}
994
992static int svc_rdma_has_wspace(struct svc_xprt *xprt) 995static int svc_rdma_has_wspace(struct svc_xprt *xprt)
993{ 996{
994 struct svcxprt_rdma *rdma = 997 struct svcxprt_rdma *rdma =
@@ -1018,7 +1021,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1018 int ret; 1021 int ret;
1019 1022
1020 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1023 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1021 return 0; 1024 return -ENOTCONN;
1022 1025
1023 BUG_ON(wr->send_flags != IB_SEND_SIGNALED); 1026 BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
1024 BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != 1027 BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
@@ -1029,7 +1032,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1029 if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { 1032 if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
1030 spin_unlock_bh(&xprt->sc_lock); 1033 spin_unlock_bh(&xprt->sc_lock);
1031 atomic_inc(&rdma_stat_sq_starve); 1034 atomic_inc(&rdma_stat_sq_starve);
1032 /* See if we can reap some SQ WR */ 1035
1036 /* See if we can opportunistically reap SQ WR to make room */
1033 sq_cq_reap(xprt); 1037 sq_cq_reap(xprt);
1034 1038
1035 /* Wait until SQ WR available if SQ still full */ 1039 /* Wait until SQ WR available if SQ still full */
@@ -1041,22 +1045,25 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1041 continue; 1045 continue;
1042 } 1046 }
1043 /* Bumped used SQ WR count and post */ 1047 /* Bumped used SQ WR count and post */
1048 svc_xprt_get(&xprt->sc_xprt);
1044 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); 1049 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
1045 if (!ret) 1050 if (!ret)
1046 atomic_inc(&xprt->sc_sq_count); 1051 atomic_inc(&xprt->sc_sq_count);
1047 else 1052 else {
1053 svc_xprt_put(&xprt->sc_xprt);
1048 dprintk("svcrdma: failed to post SQ WR rc=%d, " 1054 dprintk("svcrdma: failed to post SQ WR rc=%d, "
1049 "sc_sq_count=%d, sc_sq_depth=%d\n", 1055 "sc_sq_count=%d, sc_sq_depth=%d\n",
1050 ret, atomic_read(&xprt->sc_sq_count), 1056 ret, atomic_read(&xprt->sc_sq_count),
1051 xprt->sc_sq_depth); 1057 xprt->sc_sq_depth);
1058 }
1052 spin_unlock_bh(&xprt->sc_lock); 1059 spin_unlock_bh(&xprt->sc_lock);
1053 break; 1060 break;
1054 } 1061 }
1055 return ret; 1062 return ret;
1056} 1063}
1057 1064
1058int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, 1065void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1059 enum rpcrdma_errcode err) 1066 enum rpcrdma_errcode err)
1060{ 1067{
1061 struct ib_send_wr err_wr; 1068 struct ib_send_wr err_wr;
1062 struct ib_sge sge; 1069 struct ib_sge sge;
@@ -1073,6 +1080,7 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1073 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1080 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1074 1081
1075 /* Prepare SGE for local address */ 1082 /* Prepare SGE for local address */
1083 atomic_inc(&xprt->sc_dma_used);
1076 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, 1084 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
1077 p, 0, PAGE_SIZE, DMA_FROM_DEVICE); 1085 p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
1078 sge.lkey = xprt->sc_phys_mr->lkey; 1086 sge.lkey = xprt->sc_phys_mr->lkey;
@@ -1094,9 +1102,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1094 /* Post It */ 1102 /* Post It */
1095 ret = svc_rdma_send(xprt, &err_wr); 1103 ret = svc_rdma_send(xprt, &err_wr);
1096 if (ret) { 1104 if (ret) {
1097 dprintk("svcrdma: Error posting send = %d\n", ret); 1105 dprintk("svcrdma: Error %d posting send for protocol error\n",
1106 ret);
1098 svc_rdma_put_context(ctxt, 1); 1107 svc_rdma_put_context(ctxt, 1);
1099 } 1108 }
1100
1101 return ret;
1102} 1109}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e18cd3628db4..657835f227d3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -169,6 +169,11 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk)
169 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); 169 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
170} 170}
171 171
172static inline int unix_recvq_full(struct sock const *sk)
173{
174 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
175}
176
172static struct sock *unix_peer_get(struct sock *s) 177static struct sock *unix_peer_get(struct sock *s)
173{ 178{
174 struct sock *peer; 179 struct sock *peer;
@@ -482,6 +487,8 @@ static int unix_socketpair(struct socket *, struct socket *);
482static int unix_accept(struct socket *, struct socket *, int); 487static int unix_accept(struct socket *, struct socket *, int);
483static int unix_getname(struct socket *, struct sockaddr *, int *, int); 488static int unix_getname(struct socket *, struct sockaddr *, int *, int);
484static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 489static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490static unsigned int unix_datagram_poll(struct file *, struct socket *,
491 poll_table *);
485static int unix_ioctl(struct socket *, unsigned int, unsigned long); 492static int unix_ioctl(struct socket *, unsigned int, unsigned long);
486static int unix_shutdown(struct socket *, int); 493static int unix_shutdown(struct socket *, int);
487static int unix_stream_sendmsg(struct kiocb *, struct socket *, 494static int unix_stream_sendmsg(struct kiocb *, struct socket *,
@@ -527,7 +534,7 @@ static const struct proto_ops unix_dgram_ops = {
527 .socketpair = unix_socketpair, 534 .socketpair = unix_socketpair,
528 .accept = sock_no_accept, 535 .accept = sock_no_accept,
529 .getname = unix_getname, 536 .getname = unix_getname,
530 .poll = datagram_poll, 537 .poll = unix_datagram_poll,
531 .ioctl = unix_ioctl, 538 .ioctl = unix_ioctl,
532 .listen = sock_no_listen, 539 .listen = sock_no_listen,
533 .shutdown = unix_shutdown, 540 .shutdown = unix_shutdown,
@@ -548,7 +555,7 @@ static const struct proto_ops unix_seqpacket_ops = {
548 .socketpair = unix_socketpair, 555 .socketpair = unix_socketpair,
549 .accept = unix_accept, 556 .accept = unix_accept,
550 .getname = unix_getname, 557 .getname = unix_getname,
551 .poll = datagram_poll, 558 .poll = unix_datagram_poll,
552 .ioctl = unix_ioctl, 559 .ioctl = unix_ioctl,
553 .listen = unix_listen, 560 .listen = unix_listen,
554 .shutdown = unix_shutdown, 561 .shutdown = unix_shutdown,
@@ -983,8 +990,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo)
983 990
984 sched = !sock_flag(other, SOCK_DEAD) && 991 sched = !sock_flag(other, SOCK_DEAD) &&
985 !(other->sk_shutdown & RCV_SHUTDOWN) && 992 !(other->sk_shutdown & RCV_SHUTDOWN) &&
986 (skb_queue_len(&other->sk_receive_queue) > 993 unix_recvq_full(other);
987 other->sk_max_ack_backlog);
988 994
989 unix_state_unlock(other); 995 unix_state_unlock(other);
990 996
@@ -1058,8 +1064,7 @@ restart:
1058 if (other->sk_state != TCP_LISTEN) 1064 if (other->sk_state != TCP_LISTEN)
1059 goto out_unlock; 1065 goto out_unlock;
1060 1066
1061 if (skb_queue_len(&other->sk_receive_queue) > 1067 if (unix_recvq_full(other)) {
1062 other->sk_max_ack_backlog) {
1063 err = -EAGAIN; 1068 err = -EAGAIN;
1064 if (!timeo) 1069 if (!timeo)
1065 goto out_unlock; 1070 goto out_unlock;
@@ -1428,9 +1433,7 @@ restart:
1428 goto out_unlock; 1433 goto out_unlock;
1429 } 1434 }
1430 1435
1431 if (unix_peer(other) != sk && 1436 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1432 (skb_queue_len(&other->sk_receive_queue) >
1433 other->sk_max_ack_backlog)) {
1434 if (!timeo) { 1437 if (!timeo) {
1435 err = -EAGAIN; 1438 err = -EAGAIN;
1436 goto out_unlock; 1439 goto out_unlock;
@@ -1991,6 +1994,64 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
1991 return mask; 1994 return mask;
1992} 1995}
1993 1996
1997static unsigned int unix_datagram_poll(struct file *file, struct socket *sock,
1998 poll_table *wait)
1999{
2000 struct sock *sk = sock->sk, *peer;
2001 unsigned int mask;
2002
2003 poll_wait(file, sk->sk_sleep, wait);
2004
2005 peer = unix_peer_get(sk);
2006 if (peer) {
2007 if (peer != sk) {
2008 /*
2009 * Writability of a connected socket additionally
2010 * depends on the state of the receive queue of the
2011 * peer.
2012 */
2013 poll_wait(file, &unix_sk(peer)->peer_wait, wait);
2014 } else {
2015 sock_put(peer);
2016 peer = NULL;
2017 }
2018 }
2019
2020 mask = 0;
2021
2022 /* exceptional events? */
2023 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2024 mask |= POLLERR;
2025 if (sk->sk_shutdown & RCV_SHUTDOWN)
2026 mask |= POLLRDHUP;
2027 if (sk->sk_shutdown == SHUTDOWN_MASK)
2028 mask |= POLLHUP;
2029
2030 /* readable? */
2031 if (!skb_queue_empty(&sk->sk_receive_queue) ||
2032 (sk->sk_shutdown & RCV_SHUTDOWN))
2033 mask |= POLLIN | POLLRDNORM;
2034
2035 /* Connection-based need to check for termination and startup */
2036 if (sk->sk_type == SOCK_SEQPACKET) {
2037 if (sk->sk_state == TCP_CLOSE)
2038 mask |= POLLHUP;
2039 /* connection hasn't started yet? */
2040 if (sk->sk_state == TCP_SYN_SENT)
2041 return mask;
2042 }
2043
2044 /* writable? */
2045 if (unix_writable(sk) && !(peer && unix_recvq_full(peer)))
2046 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2047 else
2048 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2049
2050 if (peer)
2051 sock_put(peer);
2052
2053 return mask;
2054}
1994 2055
1995#ifdef CONFIG_PROC_FS 2056#ifdef CONFIG_PROC_FS
1996static struct sock *first_unix_socket(int *i) 2057static struct sock *first_unix_socket(int *i)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2bdd4dddc0e1..fb75f265b39c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -187,7 +187,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
187 return genlmsg_end(msg, hdr); 187 return genlmsg_end(msg, hdr);
188 188
189 nla_put_failure: 189 nla_put_failure:
190 return genlmsg_cancel(msg, hdr); 190 genlmsg_cancel(msg, hdr);
191 return -EMSGSIZE;
191} 192}
192 193
193static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) 194static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
@@ -273,7 +274,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags,
273 return genlmsg_end(msg, hdr); 274 return genlmsg_end(msg, hdr);
274 275
275 nla_put_failure: 276 nla_put_failure:
276 return genlmsg_cancel(msg, hdr); 277 genlmsg_cancel(msg, hdr);
278 return -EMSGSIZE;
277} 279}
278 280
279static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) 281static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb)
@@ -928,7 +930,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
928 return genlmsg_end(msg, hdr); 930 return genlmsg_end(msg, hdr);
929 931
930 nla_put_failure: 932 nla_put_failure:
931 return genlmsg_cancel(msg, hdr); 933 genlmsg_cancel(msg, hdr);
934 return -EMSGSIZE;
932} 935}
933 936
934static int nl80211_dump_station(struct sk_buff *skb, 937static int nl80211_dump_station(struct sk_buff *skb,
@@ -1267,7 +1270,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
1267 return genlmsg_end(msg, hdr); 1270 return genlmsg_end(msg, hdr);
1268 1271
1269 nla_put_failure: 1272 nla_put_failure:
1270 return genlmsg_cancel(msg, hdr); 1273 genlmsg_cancel(msg, hdr);
1274 return -EMSGSIZE;
1271} 1275}
1272 1276
1273static int nl80211_dump_mpath(struct sk_buff *skb, 1277static int nl80211_dump_mpath(struct sk_buff *skb,
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index ac765dd9c7f5..23a2cc04b8cd 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -200,8 +200,8 @@ static struct xfrm_algo_desc aalg_list[] = {
200 } 200 }
201}, 201},
202{ 202{
203 .name = "hmac(ripemd160)", 203 .name = "hmac(rmd160)",
204 .compat = "ripemd160", 204 .compat = "rmd160",
205 205
206 .uinfo = { 206 .uinfo = {
207 .auth = { 207 .auth = {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a1b0fbe3ea35..b976d9ed10e4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -50,19 +50,8 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
50 50
51 switch (type) { 51 switch (type) {
52 case XFRMA_ALG_AUTH: 52 case XFRMA_ALG_AUTH:
53 if (!algp->alg_key_len &&
54 strcmp(algp->alg_name, "digest_null") != 0)
55 return -EINVAL;
56 break;
57
58 case XFRMA_ALG_CRYPT: 53 case XFRMA_ALG_CRYPT:
59 if (!algp->alg_key_len &&
60 strcmp(algp->alg_name, "cipher_null") != 0)
61 return -EINVAL;
62 break;
63
64 case XFRMA_ALG_COMP: 54 case XFRMA_ALG_COMP:
65 /* Zero length keys are legal. */
66 break; 55 break;
67 56
68 default: 57 default: