aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJaroslav Kysela <perex@hera.kernel.org>2005-06-21 10:39:41 -0400
committerJaroslav Kysela <perex@hera.kernel.org>2005-06-21 10:39:41 -0400
commitfae6ec69c84d71b1d5bda9ede1a262c1681684aa (patch)
treeeb4aff9a5c2b7d04ce09a3717bb1dd4a79fe7595 /net
parentbbc0274e9bb2e3f1d724d445a2bd32566b9b66f7 (diff)
parent1d345dac1f30af1cd9f3a1faa12f9f18f17f236e (diff)
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'net')
-rw-r--r--net/appletalk/aarp.c2
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/bridge/br_device.c15
-rw-r--r--net/bridge/br_if.c23
-rw-r--r--net/bridge/br_input.c8
-rw-r--r--net/bridge/br_notify.c9
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/br_stp_bpdu.c3
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c13
-rw-r--r--net/core/ethtool.c22
-rw-r--r--net/core/neighbour.c333
-rw-r--r--net/core/net-sysfs.c20
-rw-r--r--net/core/request_sock.c64
-rw-r--r--net/core/rtnetlink.c33
-rw-r--r--net/core/sock.c35
-rw-r--r--net/core/wireless.c74
-rw-r--r--net/decnet/dn_dev.c9
-rw-r--r--net/decnet/dn_neigh.c1
-rw-r--r--net/decnet/dn_route.c11
-rw-r--r--net/decnet/dn_rules.c7
-rw-r--r--net/decnet/dn_table.c8
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/devinet.c43
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv4/fib_frontend.c55
-rw-r--r--net/ipv4/fib_hash.c3
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/fib_semantics.c10
-rw-r--r--net/ipv4/icmp.c9
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ipcomp.c11
-rw-r--r--net/ipv4/ipvs/Makefile2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c3
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_icmp.c182
-rw-r--r--net/ipv4/multipath_drr.c20
-rw-r--r--net/ipv4/multipath_random.c2
-rw-r--r--net/ipv4/multipath_rr.c22
-rw-r--r--net/ipv4/multipath_wrandom.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c1
-rw-r--r--net/ipv4/netfilter/ip_queue.c10
-rw-r--r--net/ipv4/netfilter/ipt_recent.c10
-rw-r--r--net/ipv4/raw.c22
-rw-r--r--net/ipv4/route.c11
-rw-r--r--net/ipv4/syncookies.c49
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c86
-rw-r--r--net/ipv4/tcp_diag.c37
-rw-r--r--net/ipv4/tcp_ipv4.c172
-rw-r--r--net/ipv4/tcp_minisocks.c68
-rw-r--r--net/ipv4/tcp_output.c27
-rw-r--r--net/ipv4/tcp_timer.c18
-rw-r--r--net/ipv4/udp.c12
-rw-r--r--net/ipv4/xfrm4_output.c8
-rw-r--r--net/ipv4/xfrm4_state.c9
-rw-r--r--net/ipv4/xfrm4_tunnel.c2
-rw-r--r--net/ipv6/addrconf.c58
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/datagram.c6
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/ip6_flowlabel.c10
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/ipcomp6.c9
-rw-r--r--net/ipv6/ipv6_syms.c1
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/route.c11
-rw-r--r--net/ipv6/tcp_ipv6.c148
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/key/af_key.c385
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/act_api.c13
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_basic.c3
-rw-r--r--net/sched/em_meta.c295
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_dsmark.c373
-rw-r--r--net/sched/sch_fifo.c152
-rw-r--r--net/sched/sch_generic.c84
-rw-r--r--net/sctp/associola.c151
-rw-r--r--net/sctp/endpointola.c1
-rw-r--r--net/sctp/input.c51
-rw-r--r--net/sctp/ipv6.c36
-rw-r--r--net/sctp/outqueue.c11
-rw-r--r--net/sctp/proc.c194
-rw-r--r--net/sctp/protocol.c7
-rw-r--r--net/sctp/sm_make_chunk.c20
-rw-r--r--net/sctp/sm_sideeffect.c105
-rw-r--r--net/sctp/sm_statefuns.c148
-rw-r--r--net/sctp/sm_statetable.c6
-rw-r--r--net/sctp/socket.c425
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/socket.c9
-rw-r--r--net/xfrm/xfrm_policy.c9
-rw-r--r--net/xfrm/xfrm_state.c118
-rw-r--r--net/xfrm/xfrm_user.c297
100 files changed, 3228 insertions, 1604 deletions
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 54640c01b50c..10d040461021 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -565,7 +565,7 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
565 * numbers we just happen to need. Now put the 565 * numbers we just happen to need. Now put the
566 * length in the lower two. 566 * length in the lower two.
567 */ 567 */
568 *((__u16 *)skb->data) = htons(skb->len); 568 *((__be16 *)skb->data) = htons(skb->len);
569 ft = 1; 569 ft = 1;
570 } 570 }
571 /* 571 /*
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 876dbac71060..192b529f86a4 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -401,7 +401,7 @@ out_err:
401} 401}
402 402
403/* Find a match for a specific network:node pair */ 403/* Find a match for a specific network:node pair */
404static struct atalk_iface *atalk_find_interface(int net, int node) 404static struct atalk_iface *atalk_find_interface(__be16 net, int node)
405{ 405{
406 struct atalk_iface *iface; 406 struct atalk_iface *iface;
407 407
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index d9b72fde433c..f564ee99782d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -21,10 +21,7 @@
21 21
22static struct net_device_stats *br_dev_get_stats(struct net_device *dev) 22static struct net_device_stats *br_dev_get_stats(struct net_device *dev)
23{ 23{
24 struct net_bridge *br; 24 struct net_bridge *br = netdev_priv(dev);
25
26 br = dev->priv;
27
28 return &br->statistics; 25 return &br->statistics;
29} 26}
30 27
@@ -54,9 +51,11 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
54 51
55static int br_dev_open(struct net_device *dev) 52static int br_dev_open(struct net_device *dev)
56{ 53{
57 netif_start_queue(dev); 54 struct net_bridge *br = netdev_priv(dev);
58 55
59 br_stp_enable_bridge(dev->priv); 56 br_features_recompute(br);
57 netif_start_queue(dev);
58 br_stp_enable_bridge(br);
60 59
61 return 0; 60 return 0;
62} 61}
@@ -67,7 +66,7 @@ static void br_dev_set_multicast_list(struct net_device *dev)
67 66
68static int br_dev_stop(struct net_device *dev) 67static int br_dev_stop(struct net_device *dev)
69{ 68{
70 br_stp_disable_bridge(dev->priv); 69 br_stp_disable_bridge(netdev_priv(dev));
71 70
72 netif_stop_queue(dev); 71 netif_stop_queue(dev);
73 72
@@ -76,7 +75,7 @@ static int br_dev_stop(struct net_device *dev)
76 75
77static int br_change_mtu(struct net_device *dev, int new_mtu) 76static int br_change_mtu(struct net_device *dev, int new_mtu)
78{ 77{
79 if ((new_mtu < 68) || new_mtu > br_min_mtu(dev->priv)) 78 if (new_mtu < 68 || new_mtu > br_min_mtu(netdev_priv(dev)))
80 return -EINVAL; 79 return -EINVAL;
81 80
82 dev->mtu = new_mtu; 81 dev->mtu = new_mtu;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 69872bf3b87e..91bb895375f4 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -314,6 +314,28 @@ int br_min_mtu(const struct net_bridge *br)
314 return mtu; 314 return mtu;
315} 315}
316 316
317/*
318 * Recomputes features using slave's features
319 */
320void br_features_recompute(struct net_bridge *br)
321{
322 struct net_bridge_port *p;
323 unsigned long features, checksum;
324
325 features = NETIF_F_SG | NETIF_F_FRAGLIST
326 | NETIF_F_HIGHDMA | NETIF_F_TSO;
327 checksum = NETIF_F_IP_CSUM; /* least commmon subset */
328
329 list_for_each_entry(p, &br->port_list, list) {
330 if (!(p->dev->features
331 & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)))
332 checksum = 0;
333 features &= p->dev->features;
334 }
335
336 br->dev->features = features | checksum | NETIF_F_LLTX;
337}
338
317/* called with RTNL */ 339/* called with RTNL */
318int br_add_if(struct net_bridge *br, struct net_device *dev) 340int br_add_if(struct net_bridge *br, struct net_device *dev)
319{ 341{
@@ -368,6 +390,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
368 390
369 spin_lock_bh(&br->lock); 391 spin_lock_bh(&br->lock);
370 br_stp_recalculate_bridge_id(br); 392 br_stp_recalculate_bridge_id(br);
393 br_features_recompute(br);
371 spin_unlock_bh(&br->lock); 394 spin_unlock_bh(&br->lock);
372 395
373 return 0; 396 return 0;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 2b1cce46cab4..8f5f2e730992 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,7 +26,7 @@ static int br_pass_frame_up_finish(struct sk_buff *skb)
26#ifdef CONFIG_NETFILTER_DEBUG 26#ifdef CONFIG_NETFILTER_DEBUG
27 skb->nf_debug = 0; 27 skb->nf_debug = 0;
28#endif 28#endif
29 netif_rx(skb); 29 netif_receive_skb(skb);
30 30
31 return 0; 31 return 0;
32} 32}
@@ -54,6 +54,9 @@ int br_handle_frame_finish(struct sk_buff *skb)
54 struct net_bridge_fdb_entry *dst; 54 struct net_bridge_fdb_entry *dst;
55 int passedup = 0; 55 int passedup = 0;
56 56
57 /* insert into forwarding database after filtering to avoid spoofing */
58 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
59
57 if (br->dev->flags & IFF_PROMISC) { 60 if (br->dev->flags & IFF_PROMISC) {
58 struct sk_buff *skb2; 61 struct sk_buff *skb2;
59 62
@@ -108,8 +111,7 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
108 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 111 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
109 goto err; 112 goto err;
110 113
111 if (p->state == BR_STATE_LEARNING || 114 if (p->state == BR_STATE_LEARNING)
112 p->state == BR_STATE_FORWARDING)
113 br_fdb_update(p->br, p, eth_hdr(skb)->h_source); 115 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
114 116
115 if (p->br->stp_enabled && 117 if (p->br->stp_enabled &&
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index f8fb49e34764..917311c6828b 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -65,6 +65,15 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
65 } 65 }
66 break; 66 break;
67 67
68 case NETDEV_FEAT_CHANGE:
69 if (br->dev->flags & IFF_UP)
70 br_features_recompute(br);
71
72 /* could do recursive feature change notification
73 * but who would care??
74 */
75 break;
76
68 case NETDEV_DOWN: 77 case NETDEV_DOWN:
69 if (br->dev->flags & IFF_UP) 78 if (br->dev->flags & IFF_UP)
70 br_stp_disable_port(p); 79 br_stp_disable_port(p);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 54d63f1372a0..bdf95a74d8cd 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -174,6 +174,7 @@ extern int br_add_if(struct net_bridge *br,
174extern int br_del_if(struct net_bridge *br, 174extern int br_del_if(struct net_bridge *br,
175 struct net_device *dev); 175 struct net_device *dev);
176extern int br_min_mtu(const struct net_bridge *br); 176extern int br_min_mtu(const struct net_bridge *br);
177extern void br_features_recompute(struct net_bridge *br);
177 178
178/* br_input.c */ 179/* br_input.c */
179extern int br_handle_frame_finish(struct sk_buff *skb); 180extern int br_handle_frame_finish(struct sk_buff *skb);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index b91a875aca01..d071f1c9ad0b 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -140,6 +140,9 @@ int br_stp_handle_bpdu(struct sk_buff *skb)
140 struct net_bridge *br = p->br; 140 struct net_bridge *br = p->br;
141 unsigned char *buf; 141 unsigned char *buf;
142 142
143 /* insert into forwarding database after filtering to avoid spoofing */
144 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
145
143 /* need at least the 802 and STP headers */ 146 /* need at least the 802 and STP headers */
144 if (!pskb_may_pull(skb, sizeof(header)+1) || 147 if (!pskb_may_pull(skb, sizeof(header)+1) ||
145 memcmp(skb->data, header, sizeof(header))) 148 memcmp(skb->data, header, sizeof(header)))
diff --git a/net/core/Makefile b/net/core/Makefile
index 81f03243fe2f..5e0c56b7f607 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,8 @@
2# Makefile for the Linux networking core. 2# Makefile for the Linux networking core.
3# 3#
4 4
5obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o 5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o
6 7
7obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
8 9
diff --git a/net/core/dev.c b/net/core/dev.c
index d4d9e2680adb..ab935778ce81 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -761,6 +761,18 @@ int dev_change_name(struct net_device *dev, char *newname)
761} 761}
762 762
763/** 763/**
764 * netdev_features_change - device changes fatures
765 * @dev: device to cause notification
766 *
767 * Called to indicate a device has changed features.
768 */
769void netdev_features_change(struct net_device *dev)
770{
771 notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
772}
773EXPORT_SYMBOL(netdev_features_change);
774
775/**
764 * netdev_state_change - device changes state 776 * netdev_state_change - device changes state
765 * @dev: device to cause notification 777 * @dev: device to cause notification
766 * 778 *
@@ -1732,6 +1744,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
1732 struct softnet_data *queue = &__get_cpu_var(softnet_data); 1744 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1733 unsigned long start_time = jiffies; 1745 unsigned long start_time = jiffies;
1734 1746
1747 backlog_dev->weight = weight_p;
1735 for (;;) { 1748 for (;;) {
1736 struct sk_buff *skb; 1749 struct sk_buff *skb;
1737 struct net_device *dev; 1750 struct net_device *dev;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f05fde97c43d..a3eeb88e1c81 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -29,7 +29,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
29 29
30u32 ethtool_op_get_tx_csum(struct net_device *dev) 30u32 ethtool_op_get_tx_csum(struct net_device *dev)
31{ 31{
32 return (dev->features & NETIF_F_IP_CSUM) != 0; 32 return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0;
33} 33}
34 34
35int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) 35int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
@@ -42,6 +42,15 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
42 return 0; 42 return 0;
43} 43}
44 44
45int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
46{
47 if (data)
48 dev->features |= NETIF_F_HW_CSUM;
49 else
50 dev->features &= ~NETIF_F_HW_CSUM;
51
52 return 0;
53}
45u32 ethtool_op_get_sg(struct net_device *dev) 54u32 ethtool_op_get_sg(struct net_device *dev)
46{ 55{
47 return (dev->features & NETIF_F_SG) != 0; 56 return (dev->features & NETIF_F_SG) != 0;
@@ -347,7 +356,7 @@ static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
347{ 356{
348 struct ethtool_coalesce coalesce; 357 struct ethtool_coalesce coalesce;
349 358
350 if (!dev->ethtool_ops->get_coalesce) 359 if (!dev->ethtool_ops->set_coalesce)
351 return -EOPNOTSUPP; 360 return -EOPNOTSUPP;
352 361
353 if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) 362 if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
@@ -682,6 +691,7 @@ int dev_ethtool(struct ifreq *ifr)
682 void __user *useraddr = ifr->ifr_data; 691 void __user *useraddr = ifr->ifr_data;
683 u32 ethcmd; 692 u32 ethcmd;
684 int rc; 693 int rc;
694 unsigned long old_features;
685 695
686 /* 696 /*
687 * XXX: This can be pushed down into the ethtool_* handlers that 697 * XXX: This can be pushed down into the ethtool_* handlers that
@@ -703,6 +713,8 @@ int dev_ethtool(struct ifreq *ifr)
703 if ((rc = dev->ethtool_ops->begin(dev)) < 0) 713 if ((rc = dev->ethtool_ops->begin(dev)) < 0)
704 return rc; 714 return rc;
705 715
716 old_features = dev->features;
717
706 switch (ethcmd) { 718 switch (ethcmd) {
707 case ETHTOOL_GSET: 719 case ETHTOOL_GSET:
708 rc = ethtool_get_settings(dev, useraddr); 720 rc = ethtool_get_settings(dev, useraddr);
@@ -712,7 +724,6 @@ int dev_ethtool(struct ifreq *ifr)
712 break; 724 break;
713 case ETHTOOL_GDRVINFO: 725 case ETHTOOL_GDRVINFO:
714 rc = ethtool_get_drvinfo(dev, useraddr); 726 rc = ethtool_get_drvinfo(dev, useraddr);
715
716 break; 727 break;
717 case ETHTOOL_GREGS: 728 case ETHTOOL_GREGS:
718 rc = ethtool_get_regs(dev, useraddr); 729 rc = ethtool_get_regs(dev, useraddr);
@@ -801,6 +812,10 @@ int dev_ethtool(struct ifreq *ifr)
801 812
802 if(dev->ethtool_ops->complete) 813 if(dev->ethtool_ops->complete)
803 dev->ethtool_ops->complete(dev); 814 dev->ethtool_ops->complete(dev);
815
816 if (old_features != dev->features)
817 netdev_features_change(dev);
818
804 return rc; 819 return rc;
805 820
806 ioctl: 821 ioctl:
@@ -817,3 +832,4 @@ EXPORT_SYMBOL(ethtool_op_get_tx_csum);
817EXPORT_SYMBOL(ethtool_op_set_sg); 832EXPORT_SYMBOL(ethtool_op_set_sg);
818EXPORT_SYMBOL(ethtool_op_set_tso); 833EXPORT_SYMBOL(ethtool_op_set_tso);
819EXPORT_SYMBOL(ethtool_op_set_tx_csum); 834EXPORT_SYMBOL(ethtool_op_set_tx_csum);
835EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 43bdc521e20d..f6bdcad47da6 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1276,9 +1276,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1276 INIT_RCU_HEAD(&p->rcu_head); 1276 INIT_RCU_HEAD(&p->rcu_head);
1277 p->reachable_time = 1277 p->reachable_time =
1278 neigh_rand_reach_time(p->base_reachable_time); 1278 neigh_rand_reach_time(p->base_reachable_time);
1279 if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) { 1279 if (dev) {
1280 kfree(p); 1280 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1281 return NULL; 1281 kfree(p);
1282 return NULL;
1283 }
1284
1285 dev_hold(dev);
1286 p->dev = dev;
1282 } 1287 }
1283 p->sysctl_table = NULL; 1288 p->sysctl_table = NULL;
1284 write_lock_bh(&tbl->lock); 1289 write_lock_bh(&tbl->lock);
@@ -1309,6 +1314,8 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1309 *p = parms->next; 1314 *p = parms->next;
1310 parms->dead = 1; 1315 parms->dead = 1;
1311 write_unlock_bh(&tbl->lock); 1316 write_unlock_bh(&tbl->lock);
1317 if (parms->dev)
1318 dev_put(parms->dev);
1312 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1319 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1313 return; 1320 return;
1314 } 1321 }
@@ -1546,20 +1553,323 @@ out:
1546 return err; 1553 return err;
1547} 1554}
1548 1555
1556static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1557{
1558 struct rtattr *nest = NULL;
1559
1560 nest = RTA_NEST(skb, NDTA_PARMS);
1561
1562 if (parms->dev)
1563 RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1564
1565 RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1566 RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1567 RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1568 RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1569 RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1570 RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1571 RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1572 RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1573 parms->base_reachable_time);
1574 RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1575 RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1576 RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1577 RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1578 RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1579 RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1580
1581 return RTA_NEST_END(skb, nest);
1582
1583rtattr_failure:
1584 return RTA_NEST_CANCEL(skb, nest);
1585}
1586
1587static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
1588 struct netlink_callback *cb)
1589{
1590 struct nlmsghdr *nlh;
1591 struct ndtmsg *ndtmsg;
1592
1593 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
1594 NLM_F_MULTI);
1595
1596 ndtmsg = NLMSG_DATA(nlh);
1597
1598 read_lock_bh(&tbl->lock);
1599 ndtmsg->ndtm_family = tbl->family;
1600
1601 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1602 RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1603 RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1604 RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1605 RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1606
1607 {
1608 unsigned long now = jiffies;
1609 unsigned int flush_delta = now - tbl->last_flush;
1610 unsigned int rand_delta = now - tbl->last_rand;
1611
1612 struct ndt_config ndc = {
1613 .ndtc_key_len = tbl->key_len,
1614 .ndtc_entry_size = tbl->entry_size,
1615 .ndtc_entries = atomic_read(&tbl->entries),
1616 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1617 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1618 .ndtc_hash_rnd = tbl->hash_rnd,
1619 .ndtc_hash_mask = tbl->hash_mask,
1620 .ndtc_hash_chain_gc = tbl->hash_chain_gc,
1621 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1622 };
1623
1624 RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1625 }
1626
1627 {
1628 int cpu;
1629 struct ndt_stats ndst;
1630
1631 memset(&ndst, 0, sizeof(ndst));
1632
1633 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1634 struct neigh_statistics *st;
1635
1636 if (!cpu_possible(cpu))
1637 continue;
1638
1639 st = per_cpu_ptr(tbl->stats, cpu);
1640 ndst.ndts_allocs += st->allocs;
1641 ndst.ndts_destroys += st->destroys;
1642 ndst.ndts_hash_grows += st->hash_grows;
1643 ndst.ndts_res_failed += st->res_failed;
1644 ndst.ndts_lookups += st->lookups;
1645 ndst.ndts_hits += st->hits;
1646 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1647 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1648 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1649 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1650 }
1651
1652 RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1653 }
1654
1655 BUG_ON(tbl->parms.dev);
1656 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1657 goto rtattr_failure;
1658
1659 read_unlock_bh(&tbl->lock);
1660 return NLMSG_END(skb, nlh);
1661
1662rtattr_failure:
1663 read_unlock_bh(&tbl->lock);
1664 return NLMSG_CANCEL(skb, nlh);
1665
1666nlmsg_failure:
1667 return -1;
1668}
1669
1670static int neightbl_fill_param_info(struct neigh_table *tbl,
1671 struct neigh_parms *parms,
1672 struct sk_buff *skb,
1673 struct netlink_callback *cb)
1674{
1675 struct ndtmsg *ndtmsg;
1676 struct nlmsghdr *nlh;
1677
1678 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
1679 NLM_F_MULTI);
1680
1681 ndtmsg = NLMSG_DATA(nlh);
1682
1683 read_lock_bh(&tbl->lock);
1684 ndtmsg->ndtm_family = tbl->family;
1685 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1686
1687 if (neightbl_fill_parms(skb, parms) < 0)
1688 goto rtattr_failure;
1689
1690 read_unlock_bh(&tbl->lock);
1691 return NLMSG_END(skb, nlh);
1692
1693rtattr_failure:
1694 read_unlock_bh(&tbl->lock);
1695 return NLMSG_CANCEL(skb, nlh);
1696
1697nlmsg_failure:
1698 return -1;
1699}
1700
1701static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1702 int ifindex)
1703{
1704 struct neigh_parms *p;
1705
1706 for (p = &tbl->parms; p; p = p->next)
1707 if ((p->dev && p->dev->ifindex == ifindex) ||
1708 (!p->dev && !ifindex))
1709 return p;
1710
1711 return NULL;
1712}
1713
1714int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1715{
1716 struct neigh_table *tbl;
1717 struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
1718 struct rtattr **tb = arg;
1719 int err = -EINVAL;
1720
1721 if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
1722 return -EINVAL;
1723
1724 read_lock(&neigh_tbl_lock);
1725 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1726 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1727 continue;
1728
1729 if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
1730 break;
1731 }
1732
1733 if (tbl == NULL) {
1734 err = -ENOENT;
1735 goto errout;
1736 }
1737
1738 /*
1739 * We acquire tbl->lock to be nice to the periodic timers and
1740 * make sure they always see a consistent set of values.
1741 */
1742 write_lock_bh(&tbl->lock);
1743
1744 if (tb[NDTA_THRESH1 - 1])
1745 tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
1746
1747 if (tb[NDTA_THRESH2 - 1])
1748 tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
1749
1750 if (tb[NDTA_THRESH3 - 1])
1751 tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
1752
1753 if (tb[NDTA_GC_INTERVAL - 1])
1754 tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
1755
1756 if (tb[NDTA_PARMS - 1]) {
1757 struct rtattr *tbp[NDTPA_MAX];
1758 struct neigh_parms *p;
1759 u32 ifindex = 0;
1760
1761 if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0)
1762 goto rtattr_failure;
1763
1764 if (tbp[NDTPA_IFINDEX - 1])
1765 ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
1766
1767 p = lookup_neigh_params(tbl, ifindex);
1768 if (p == NULL) {
1769 err = -ENOENT;
1770 goto rtattr_failure;
1771 }
1772
1773 if (tbp[NDTPA_QUEUE_LEN - 1])
1774 p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
1775
1776 if (tbp[NDTPA_PROXY_QLEN - 1])
1777 p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
1778
1779 if (tbp[NDTPA_APP_PROBES - 1])
1780 p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
1781
1782 if (tbp[NDTPA_UCAST_PROBES - 1])
1783 p->ucast_probes =
1784 RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]);
1785
1786 if (tbp[NDTPA_MCAST_PROBES - 1])
1787 p->mcast_probes =
1788 RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
1789
1790 if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
1791 p->base_reachable_time =
1792 RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
1793
1794 if (tbp[NDTPA_GC_STALETIME - 1])
1795 p->gc_staletime =
1796 RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
1797
1798 if (tbp[NDTPA_DELAY_PROBE_TIME - 1])
1799 p->delay_probe_time =
1800 RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]);
1801
1802 if (tbp[NDTPA_RETRANS_TIME - 1])
1803 p->retrans_time =
1804 RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
1805
1806 if (tbp[NDTPA_ANYCAST_DELAY - 1])
1807 p->anycast_delay =
1808 RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
1809
1810 if (tbp[NDTPA_PROXY_DELAY - 1])
1811 p->proxy_delay =
1812 RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
1813
1814 if (tbp[NDTPA_LOCKTIME - 1])
1815 p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]);
1816 }
1817
1818 err = 0;
1819
1820rtattr_failure:
1821 write_unlock_bh(&tbl->lock);
1822errout:
1823 read_unlock(&neigh_tbl_lock);
1824 return err;
1825}
1826
1827int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1828{
1829 int idx, family;
1830 int s_idx = cb->args[0];
1831 struct neigh_table *tbl;
1832
1833 family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
1834
1835 read_lock(&neigh_tbl_lock);
1836 for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) {
1837 struct neigh_parms *p;
1838
1839 if (idx < s_idx || (family && tbl->family != family))
1840 continue;
1841
1842 if (neightbl_fill_info(tbl, skb, cb) <= 0)
1843 break;
1844
1845 for (++idx, p = tbl->parms.next; p; p = p->next, idx++) {
1846 if (idx < s_idx)
1847 continue;
1848
1849 if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0)
1850 goto out;
1851 }
1852
1853 }
1854out:
1855 read_unlock(&neigh_tbl_lock);
1856 cb->args[0] = idx;
1857
1858 return skb->len;
1859}
1549 1860
1550static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, 1861static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
1551 u32 pid, u32 seq, int event) 1862 u32 pid, u32 seq, int event, unsigned int flags)
1552{ 1863{
1553 unsigned long now = jiffies; 1864 unsigned long now = jiffies;
1554 unsigned char *b = skb->tail; 1865 unsigned char *b = skb->tail;
1555 struct nda_cacheinfo ci; 1866 struct nda_cacheinfo ci;
1556 int locked = 0; 1867 int locked = 0;
1557 u32 probes; 1868 u32 probes;
1558 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event, 1869 struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
1559 sizeof(struct ndmsg)); 1870 sizeof(struct ndmsg), flags);
1560 struct ndmsg *ndm = NLMSG_DATA(nlh); 1871 struct ndmsg *ndm = NLMSG_DATA(nlh);
1561 1872
1562 nlh->nlmsg_flags = pid ? NLM_F_MULTI : 0;
1563 ndm->ndm_family = n->ops->family; 1873 ndm->ndm_family = n->ops->family;
1564 ndm->ndm_flags = n->flags; 1874 ndm->ndm_flags = n->flags;
1565 ndm->ndm_type = n->type; 1875 ndm->ndm_type = n->type;
@@ -1609,7 +1919,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
1609 continue; 1919 continue;
1610 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, 1920 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
1611 cb->nlh->nlmsg_seq, 1921 cb->nlh->nlmsg_seq,
1612 RTM_NEWNEIGH) <= 0) { 1922 RTM_NEWNEIGH,
1923 NLM_F_MULTI) <= 0) {
1613 read_unlock_bh(&tbl->lock); 1924 read_unlock_bh(&tbl->lock);
1614 rc = -1; 1925 rc = -1;
1615 goto out; 1926 goto out;
@@ -2018,7 +2329,7 @@ void neigh_app_ns(struct neighbour *n)
2018 if (!skb) 2329 if (!skb)
2019 return; 2330 return;
2020 2331
2021 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) { 2332 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) {
2022 kfree_skb(skb); 2333 kfree_skb(skb);
2023 return; 2334 return;
2024 } 2335 }
@@ -2037,7 +2348,7 @@ static void neigh_app_notify(struct neighbour *n)
2037 if (!skb) 2348 if (!skb)
2038 return; 2349 return;
2039 2350
2040 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) { 2351 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) {
2041 kfree_skb(skb); 2352 kfree_skb(skb);
2042 return; 2353 return;
2043 } 2354 }
@@ -2352,6 +2663,8 @@ EXPORT_SYMBOL(neigh_update);
2352EXPORT_SYMBOL(neigh_update_hhs); 2663EXPORT_SYMBOL(neigh_update_hhs);
2353EXPORT_SYMBOL(pneigh_enqueue); 2664EXPORT_SYMBOL(pneigh_enqueue);
2354EXPORT_SYMBOL(pneigh_lookup); 2665EXPORT_SYMBOL(pneigh_lookup);
2666EXPORT_SYMBOL(neightbl_dump_info);
2667EXPORT_SYMBOL(neightbl_set);
2355 2668
2356#ifdef CONFIG_ARPD 2669#ifdef CONFIG_ARPD
2357EXPORT_SYMBOL(neigh_app_ns); 2670EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 060f703659e8..e2137f3e489d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
21#define to_net_dev(class) container_of(class, struct net_device, class_dev) 21#define to_net_dev(class) container_of(class, struct net_device, class_dev)
22 22
23static const char fmt_hex[] = "%#x\n"; 23static const char fmt_hex[] = "%#x\n";
24static const char fmt_long_hex[] = "%#lx\n";
24static const char fmt_dec[] = "%d\n"; 25static const char fmt_dec[] = "%d\n";
25static const char fmt_ulong[] = "%lu\n"; 26static const char fmt_ulong[] = "%lu\n";
26 27
@@ -91,7 +92,7 @@ static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL) \
91NETDEVICE_ATTR(addr_len, fmt_dec); 92NETDEVICE_ATTR(addr_len, fmt_dec);
92NETDEVICE_ATTR(iflink, fmt_dec); 93NETDEVICE_ATTR(iflink, fmt_dec);
93NETDEVICE_ATTR(ifindex, fmt_dec); 94NETDEVICE_ATTR(ifindex, fmt_dec);
94NETDEVICE_ATTR(features, fmt_hex); 95NETDEVICE_ATTR(features, fmt_long_hex);
95NETDEVICE_ATTR(type, fmt_dec); 96NETDEVICE_ATTR(type, fmt_dec);
96 97
97/* use same locking rules as GIFHWADDR ioctl's */ 98/* use same locking rules as GIFHWADDR ioctl's */
@@ -184,6 +185,22 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz
184static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 185static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
185 store_tx_queue_len); 186 store_tx_queue_len);
186 187
188NETDEVICE_SHOW(weight, fmt_dec);
189
190static int change_weight(struct net_device *net, unsigned long new_weight)
191{
192 net->weight = new_weight;
193 return 0;
194}
195
196static ssize_t store_weight(struct class_device *dev, const char *buf, size_t len)
197{
198 return netdev_store(dev, buf, len, change_weight);
199}
200
201static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight,
202 store_weight);
203
187 204
188static struct class_device_attribute *net_class_attributes[] = { 205static struct class_device_attribute *net_class_attributes[] = {
189 &class_device_attr_ifindex, 206 &class_device_attr_ifindex,
@@ -193,6 +210,7 @@ static struct class_device_attribute *net_class_attributes[] = {
193 &class_device_attr_features, 210 &class_device_attr_features,
194 &class_device_attr_mtu, 211 &class_device_attr_mtu,
195 &class_device_attr_flags, 212 &class_device_attr_flags,
213 &class_device_attr_weight,
196 &class_device_attr_type, 214 &class_device_attr_type,
197 &class_device_attr_address, 215 &class_device_attr_address,
198 &class_device_attr_broadcast, 216 &class_device_attr_broadcast,
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
new file mode 100644
index 000000000000..bb55675f0685
--- /dev/null
+++ b/net/core/request_sock.c
@@ -0,0 +1,64 @@
1/*
2 * NET Generic infrastructure for Network protocols.
3 *
4 * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
5 *
6 * From code originally in include/net/tcp.h
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/random.h>
16#include <linux/slab.h>
17#include <linux/string.h>
18
19#include <net/request_sock.h>
20
21/*
22 * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
23 * One SYN_RECV socket costs about 80bytes on a 32bit machine.
24 * It would be better to replace it with a global counter for all sockets
25 * but then some measure against one socket starving all other sockets
26 * would be needed.
27 *
28 * It was 128 by default. Experiments with real servers show, that
29 * it is absolutely not enough even at 100conn/sec. 256 cures most
30 * of problems. This value is adjusted to 128 for very small machines
31 * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
32 * Further increasing requires to change hash table size.
33 */
34int sysctl_max_syn_backlog = 256;
35EXPORT_SYMBOL(sysctl_max_syn_backlog);
36
37int reqsk_queue_alloc(struct request_sock_queue *queue,
38 const int nr_table_entries)
39{
40 const int lopt_size = sizeof(struct listen_sock) +
41 nr_table_entries * sizeof(struct request_sock *);
42 struct listen_sock *lopt = kmalloc(lopt_size, GFP_KERNEL);
43
44 if (lopt == NULL)
45 return -ENOMEM;
46
47 memset(lopt, 0, lopt_size);
48
49 for (lopt->max_qlen_log = 6;
50 (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
51 lopt->max_qlen_log++);
52
53 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
54 rwlock_init(&queue->syn_wait_lock);
55 queue->rskq_accept_head = queue->rskq_accept_head = NULL;
56
57 write_lock_bh(&queue->syn_wait_lock);
58 queue->listen_opt = lopt;
59 write_unlock_bh(&queue->syn_wait_lock);
60
61 return 0;
62}
63
64EXPORT_SYMBOL(reqsk_queue_alloc);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 00caf4b318b2..e013d836a7ab 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -100,6 +100,7 @@ static const int rtm_min[RTM_NR_FAMILIES] =
100 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 100 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
101 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 101 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
102 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 102 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
103 [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)),
103}; 104};
104 105
105static const int rta_max[RTM_NR_FAMILIES] = 106static const int rta_max[RTM_NR_FAMILIES] =
@@ -113,6 +114,7 @@ static const int rta_max[RTM_NR_FAMILIES] =
113 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, 114 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
114 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, 115 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
115 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, 116 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
117 [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX,
116}; 118};
117 119
118void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) 120void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -176,14 +178,14 @@ rtattr_failure:
176 178
177 179
178static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 180static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
179 int type, u32 pid, u32 seq, u32 change) 181 int type, u32 pid, u32 seq, u32 change,
182 unsigned int flags)
180{ 183{
181 struct ifinfomsg *r; 184 struct ifinfomsg *r;
182 struct nlmsghdr *nlh; 185 struct nlmsghdr *nlh;
183 unsigned char *b = skb->tail; 186 unsigned char *b = skb->tail;
184 187
185 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r)); 188 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
186 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
187 r = NLMSG_DATA(nlh); 189 r = NLMSG_DATA(nlh);
188 r->ifi_family = AF_UNSPEC; 190 r->ifi_family = AF_UNSPEC;
189 r->ifi_type = dev->type; 191 r->ifi_type = dev->type;
@@ -273,7 +275,10 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
273 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { 275 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
274 if (idx < s_idx) 276 if (idx < s_idx)
275 continue; 277 continue;
276 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0) 278 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
279 NETLINK_CB(cb->skb).pid,
280 cb->nlh->nlmsg_seq, 0,
281 NLM_F_MULTI) <= 0)
277 break; 282 break;
278 } 283 }
279 read_unlock(&dev_base_lock); 284 read_unlock(&dev_base_lock);
@@ -447,7 +452,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
447 if (!skb) 452 if (!skb)
448 return; 453 return;
449 454
450 if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) { 455 if (rtnetlink_fill_ifinfo(skb, dev, type, current->pid, 0, change, 0) < 0) {
451 kfree_skb(skb); 456 kfree_skb(skb);
452 return; 457 return;
453 } 458 }
@@ -649,14 +654,16 @@ static void rtnetlink_rcv(struct sock *sk, int len)
649 654
650static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = 655static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
651{ 656{
652 [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo }, 657 [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
653 [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, 658 [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink },
654 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 659 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
655 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 660 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
656 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, 661 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
657 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, 662 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
658 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, 663 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
659 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 664 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
665 [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
666 [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
660}; 667};
661 668
662static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) 669static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/net/core/sock.c b/net/core/sock.c
index 96e00b08698f..a6ec3ada7f9e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -118,6 +118,7 @@
118#include <linux/netdevice.h> 118#include <linux/netdevice.h>
119#include <net/protocol.h> 119#include <net/protocol.h>
120#include <linux/skbuff.h> 120#include <linux/skbuff.h>
121#include <net/request_sock.h>
121#include <net/sock.h> 122#include <net/sock.h>
122#include <net/xfrm.h> 123#include <net/xfrm.h>
123#include <linux/ipsec.h> 124#include <linux/ipsec.h>
@@ -1363,6 +1364,7 @@ static LIST_HEAD(proto_list);
1363 1364
1364int proto_register(struct proto *prot, int alloc_slab) 1365int proto_register(struct proto *prot, int alloc_slab)
1365{ 1366{
1367 char *request_sock_slab_name;
1366 int rc = -ENOBUFS; 1368 int rc = -ENOBUFS;
1367 1369
1368 if (alloc_slab) { 1370 if (alloc_slab) {
@@ -1374,6 +1376,25 @@ int proto_register(struct proto *prot, int alloc_slab)
1374 prot->name); 1376 prot->name);
1375 goto out; 1377 goto out;
1376 } 1378 }
1379
1380 if (prot->rsk_prot != NULL) {
1381 static const char mask[] = "request_sock_%s";
1382
1383 request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1384 if (request_sock_slab_name == NULL)
1385 goto out_free_sock_slab;
1386
1387 sprintf(request_sock_slab_name, mask, prot->name);
1388 prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1389 prot->rsk_prot->obj_size, 0,
1390 SLAB_HWCACHE_ALIGN, NULL, NULL);
1391
1392 if (prot->rsk_prot->slab == NULL) {
1393 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1394 prot->name);
1395 goto out_free_request_sock_slab_name;
1396 }
1397 }
1377 } 1398 }
1378 1399
1379 write_lock(&proto_list_lock); 1400 write_lock(&proto_list_lock);
@@ -1382,6 +1403,12 @@ int proto_register(struct proto *prot, int alloc_slab)
1382 rc = 0; 1403 rc = 0;
1383out: 1404out:
1384 return rc; 1405 return rc;
1406out_free_request_sock_slab_name:
1407 kfree(request_sock_slab_name);
1408out_free_sock_slab:
1409 kmem_cache_destroy(prot->slab);
1410 prot->slab = NULL;
1411 goto out;
1385} 1412}
1386 1413
1387EXPORT_SYMBOL(proto_register); 1414EXPORT_SYMBOL(proto_register);
@@ -1395,6 +1422,14 @@ void proto_unregister(struct proto *prot)
1395 prot->slab = NULL; 1422 prot->slab = NULL;
1396 } 1423 }
1397 1424
1425 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1426 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1427
1428 kmem_cache_destroy(prot->rsk_prot->slab);
1429 kfree(name);
1430 prot->rsk_prot->slab = NULL;
1431 }
1432
1398 list_del(&prot->node); 1433 list_del(&prot->node);
1399 write_unlock(&proto_list_lock); 1434 write_unlock(&proto_list_lock);
1400} 1435}
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 750cc5daeb03..b2fe378dfbf8 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -2,7 +2,7 @@
2 * This file implement the Wireless Extensions APIs. 2 * This file implement the Wireless Extensions APIs.
3 * 3 *
4 * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> 4 * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
5 * Copyright (c) 1997-2004 Jean Tourrilhes, All Rights Reserved. 5 * Copyright (c) 1997-2005 Jean Tourrilhes, All Rights Reserved.
6 * 6 *
7 * (As all part of the Linux kernel, this file is GPL) 7 * (As all part of the Linux kernel, this file is GPL)
8 */ 8 */
@@ -187,6 +187,12 @@ static const struct iw_ioctl_description standard_ioctl[] = {
187 .header_type = IW_HEADER_TYPE_ADDR, 187 .header_type = IW_HEADER_TYPE_ADDR,
188 .flags = IW_DESCR_FLAG_DUMP, 188 .flags = IW_DESCR_FLAG_DUMP,
189 }, 189 },
190 [SIOCSIWMLME - SIOCIWFIRST] = {
191 .header_type = IW_HEADER_TYPE_POINT,
192 .token_size = 1,
193 .min_tokens = sizeof(struct iw_mlme),
194 .max_tokens = sizeof(struct iw_mlme),
195 },
190 [SIOCGIWAPLIST - SIOCIWFIRST] = { 196 [SIOCGIWAPLIST - SIOCIWFIRST] = {
191 .header_type = IW_HEADER_TYPE_POINT, 197 .header_type = IW_HEADER_TYPE_POINT,
192 .token_size = sizeof(struct sockaddr) + 198 .token_size = sizeof(struct sockaddr) +
@@ -195,7 +201,10 @@ static const struct iw_ioctl_description standard_ioctl[] = {
195 .flags = IW_DESCR_FLAG_NOMAX, 201 .flags = IW_DESCR_FLAG_NOMAX,
196 }, 202 },
197 [SIOCSIWSCAN - SIOCIWFIRST] = { 203 [SIOCSIWSCAN - SIOCIWFIRST] = {
198 .header_type = IW_HEADER_TYPE_PARAM, 204 .header_type = IW_HEADER_TYPE_POINT,
205 .token_size = 1,
206 .min_tokens = 0,
207 .max_tokens = sizeof(struct iw_scan_req),
199 }, 208 },
200 [SIOCGIWSCAN - SIOCIWFIRST] = { 209 [SIOCGIWSCAN - SIOCIWFIRST] = {
201 .header_type = IW_HEADER_TYPE_POINT, 210 .header_type = IW_HEADER_TYPE_POINT,
@@ -273,6 +282,42 @@ static const struct iw_ioctl_description standard_ioctl[] = {
273 [SIOCGIWPOWER - SIOCIWFIRST] = { 282 [SIOCGIWPOWER - SIOCIWFIRST] = {
274 .header_type = IW_HEADER_TYPE_PARAM, 283 .header_type = IW_HEADER_TYPE_PARAM,
275 }, 284 },
285 [SIOCSIWGENIE - SIOCIWFIRST] = {
286 .header_type = IW_HEADER_TYPE_POINT,
287 .token_size = 1,
288 .max_tokens = IW_GENERIC_IE_MAX,
289 },
290 [SIOCGIWGENIE - SIOCIWFIRST] = {
291 .header_type = IW_HEADER_TYPE_POINT,
292 .token_size = 1,
293 .max_tokens = IW_GENERIC_IE_MAX,
294 },
295 [SIOCSIWAUTH - SIOCIWFIRST] = {
296 .header_type = IW_HEADER_TYPE_PARAM,
297 },
298 [SIOCGIWAUTH - SIOCIWFIRST] = {
299 .header_type = IW_HEADER_TYPE_PARAM,
300 },
301 [SIOCSIWENCODEEXT - SIOCIWFIRST] = {
302 .header_type = IW_HEADER_TYPE_POINT,
303 .token_size = 1,
304 .min_tokens = sizeof(struct iw_encode_ext),
305 .max_tokens = sizeof(struct iw_encode_ext) +
306 IW_ENCODING_TOKEN_MAX,
307 },
308 [SIOCGIWENCODEEXT - SIOCIWFIRST] = {
309 .header_type = IW_HEADER_TYPE_POINT,
310 .token_size = 1,
311 .min_tokens = sizeof(struct iw_encode_ext),
312 .max_tokens = sizeof(struct iw_encode_ext) +
313 IW_ENCODING_TOKEN_MAX,
314 },
315 [SIOCSIWPMKSA - SIOCIWFIRST] = {
316 .header_type = IW_HEADER_TYPE_POINT,
317 .token_size = 1,
318 .min_tokens = sizeof(struct iw_pmksa),
319 .max_tokens = sizeof(struct iw_pmksa),
320 },
276}; 321};
277static const int standard_ioctl_num = (sizeof(standard_ioctl) / 322static const int standard_ioctl_num = (sizeof(standard_ioctl) /
278 sizeof(struct iw_ioctl_description)); 323 sizeof(struct iw_ioctl_description));
@@ -299,6 +344,31 @@ static const struct iw_ioctl_description standard_event[] = {
299 [IWEVEXPIRED - IWEVFIRST] = { 344 [IWEVEXPIRED - IWEVFIRST] = {
300 .header_type = IW_HEADER_TYPE_ADDR, 345 .header_type = IW_HEADER_TYPE_ADDR,
301 }, 346 },
347 [IWEVGENIE - IWEVFIRST] = {
348 .header_type = IW_HEADER_TYPE_POINT,
349 .token_size = 1,
350 .max_tokens = IW_GENERIC_IE_MAX,
351 },
352 [IWEVMICHAELMICFAILURE - IWEVFIRST] = {
353 .header_type = IW_HEADER_TYPE_POINT,
354 .token_size = 1,
355 .max_tokens = sizeof(struct iw_michaelmicfailure),
356 },
357 [IWEVASSOCREQIE - IWEVFIRST] = {
358 .header_type = IW_HEADER_TYPE_POINT,
359 .token_size = 1,
360 .max_tokens = IW_GENERIC_IE_MAX,
361 },
362 [IWEVASSOCRESPIE - IWEVFIRST] = {
363 .header_type = IW_HEADER_TYPE_POINT,
364 .token_size = 1,
365 .max_tokens = IW_GENERIC_IE_MAX,
366 },
367 [IWEVPMKIDCAND - IWEVFIRST] = {
368 .header_type = IW_HEADER_TYPE_POINT,
369 .token_size = 1,
370 .max_tokens = sizeof(struct iw_pmkid_cand),
371 },
302}; 372};
303static const int standard_event_num = (sizeof(standard_event) / 373static const int standard_event_num = (sizeof(standard_event) /
304 sizeof(struct iw_ioctl_description)); 374 sizeof(struct iw_ioctl_description));
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index ee7bf46eb78a..00233ecbc9cb 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -716,13 +716,13 @@ static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *a
716} 716}
717 717
718static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, 718static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
719 u32 pid, u32 seq, int event) 719 u32 pid, u32 seq, int event, unsigned int flags)
720{ 720{
721 struct ifaddrmsg *ifm; 721 struct ifaddrmsg *ifm;
722 struct nlmsghdr *nlh; 722 struct nlmsghdr *nlh;
723 unsigned char *b = skb->tail; 723 unsigned char *b = skb->tail;
724 724
725 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 725 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
726 ifm = NLMSG_DATA(nlh); 726 ifm = NLMSG_DATA(nlh);
727 727
728 ifm->ifa_family = AF_DECnet; 728 ifm->ifa_family = AF_DECnet;
@@ -755,7 +755,7 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
755 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS); 755 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS);
756 return; 756 return;
757 } 757 }
758 if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 758 if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
759 kfree_skb(skb); 759 kfree_skb(skb);
760 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL); 760 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL);
761 return; 761 return;
@@ -790,7 +790,8 @@ static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
790 if (dn_dev_fill_ifaddr(skb, ifa, 790 if (dn_dev_fill_ifaddr(skb, ifa,
791 NETLINK_CB(cb->skb).pid, 791 NETLINK_CB(cb->skb).pid,
792 cb->nlh->nlmsg_seq, 792 cb->nlh->nlmsg_seq,
793 RTM_NEWADDR) <= 0) 793 RTM_NEWADDR,
794 NLM_F_MULTI) <= 0)
794 goto done; 795 goto done;
795 } 796 }
796 } 797 }
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index f6dfe96f45b7..f32dba9e26fe 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -101,7 +101,6 @@ struct neigh_table dn_neigh_table = {
101 .id = "dn_neigh_cache", 101 .id = "dn_neigh_cache",
102 .parms ={ 102 .parms ={
103 .tbl = &dn_neigh_table, 103 .tbl = &dn_neigh_table,
104 .entries = 0,
105 .base_reachable_time = 30 * HZ, 104 .base_reachable_time = 30 * HZ,
106 .retrans_time = 1 * HZ, 105 .retrans_time = 1 * HZ,
107 .gc_staletime = 60 * HZ, 106 .gc_staletime = 60 * HZ,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1e7b5c3ea215..2399fa8a3f86 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1465,7 +1465,8 @@ int dn_route_input(struct sk_buff *skb)
1465 return dn_route_input_slow(skb); 1465 return dn_route_input_slow(skb);
1466} 1466}
1467 1467
1468static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait) 1468static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1469 int event, int nowait, unsigned int flags)
1469{ 1470{
1470 struct dn_route *rt = (struct dn_route *)skb->dst; 1471 struct dn_route *rt = (struct dn_route *)skb->dst;
1471 struct rtmsg *r; 1472 struct rtmsg *r;
@@ -1473,9 +1474,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int
1473 unsigned char *b = skb->tail; 1474 unsigned char *b = skb->tail;
1474 struct rta_cacheinfo ci; 1475 struct rta_cacheinfo ci;
1475 1476
1476 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 1477 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
1477 r = NLMSG_DATA(nlh); 1478 r = NLMSG_DATA(nlh);
1478 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
1479 r->rtm_family = AF_DECnet; 1479 r->rtm_family = AF_DECnet;
1480 r->rtm_dst_len = 16; 1480 r->rtm_dst_len = 16;
1481 r->rtm_src_len = 0; 1481 r->rtm_src_len = 0;
@@ -1596,7 +1596,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
1596 1596
1597 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 1597 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1598 1598
1599 err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0); 1599 err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
1600 1600
1601 if (err == 0) 1601 if (err == 0)
1602 goto out_free; 1602 goto out_free;
@@ -1644,7 +1644,8 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
1644 continue; 1644 continue;
1645 skb->dst = dst_clone(&rt->u.dst); 1645 skb->dst = dst_clone(&rt->u.dst);
1646 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 1646 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
1647 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) { 1647 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1648 1, NLM_F_MULTI) <= 0) {
1648 dst_release(xchg(&skb->dst, NULL)); 1649 dst_release(xchg(&skb->dst, NULL));
1649 rcu_read_unlock_bh(); 1650 rcu_read_unlock_bh();
1650 goto done; 1651 goto done;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 597587d170d8..1060de70bc0c 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -342,14 +342,15 @@ static struct notifier_block dn_fib_rules_notifier = {
342 .notifier_call = dn_fib_rules_event, 342 .notifier_call = dn_fib_rules_event,
343}; 343};
344 344
345static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, struct netlink_callback *cb) 345static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
346 struct netlink_callback *cb, unsigned int flags)
346{ 347{
347 struct rtmsg *rtm; 348 struct rtmsg *rtm;
348 struct nlmsghdr *nlh; 349 struct nlmsghdr *nlh;
349 unsigned char *b = skb->tail; 350 unsigned char *b = skb->tail;
350 351
351 352
352 nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); 353 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
353 rtm = NLMSG_DATA(nlh); 354 rtm = NLMSG_DATA(nlh);
354 rtm->rtm_family = AF_DECnet; 355 rtm->rtm_family = AF_DECnet;
355 rtm->rtm_dst_len = r->r_dst_len; 356 rtm->rtm_dst_len = r->r_dst_len;
@@ -394,7 +395,7 @@ int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
394 for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) { 395 for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) {
395 if (idx < s_idx) 396 if (idx < s_idx)
396 continue; 397 continue;
397 if (dn_fib_fill_rule(skb, r, cb) < 0) 398 if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
398 break; 399 break;
399 } 400 }
400 read_unlock(&dn_fib_rules_lock); 401 read_unlock(&dn_fib_rules_lock);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index dad5603912be..28ba5777a25a 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -270,13 +270,13 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
270 270
271static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 271static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
272 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, 272 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
273 struct dn_fib_info *fi) 273 struct dn_fib_info *fi, unsigned int flags)
274{ 274{
275 struct rtmsg *rtm; 275 struct rtmsg *rtm;
276 struct nlmsghdr *nlh; 276 struct nlmsghdr *nlh;
277 unsigned char *b = skb->tail; 277 unsigned char *b = skb->tail;
278 278
279 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); 279 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
280 rtm = NLMSG_DATA(nlh); 280 rtm = NLMSG_DATA(nlh);
281 rtm->rtm_family = AF_DECnet; 281 rtm->rtm_family = AF_DECnet;
282 rtm->rtm_dst_len = dst_len; 282 rtm->rtm_dst_len = dst_len;
@@ -345,7 +345,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
345 345
346 if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 346 if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
347 f->fn_type, f->fn_scope, &f->fn_key, z, 347 f->fn_type, f->fn_scope, &f->fn_key, z,
348 DN_FIB_INFO(f)) < 0) { 348 DN_FIB_INFO(f), 0) < 0) {
349 kfree_skb(skb); 349 kfree_skb(skb);
350 return; 350 return;
351 } 351 }
@@ -377,7 +377,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
377 tb->n, 377 tb->n,
378 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, 378 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
379 f->fn_scope, &f->fn_key, dz->dz_order, 379 f->fn_scope, &f->fn_key, dz->dz_order,
380 f->fn_info) < 0) { 380 f->fn_info, NLM_F_MULTI) < 0) {
381 cb->args[3] = i; 381 cb->args[3] = i;
382 return -1; 382 return -1;
383 } 383 }
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b3cb49ce5fad..03942f133944 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1181,6 +1181,7 @@ EXPORT_SYMBOL(inet_stream_connect);
1181EXPORT_SYMBOL(inet_stream_ops); 1181EXPORT_SYMBOL(inet_stream_ops);
1182EXPORT_SYMBOL(inet_unregister_protosw); 1182EXPORT_SYMBOL(inet_unregister_protosw);
1183EXPORT_SYMBOL(net_statistics); 1183EXPORT_SYMBOL(net_statistics);
1184EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
1184 1185
1185#ifdef INET_REFCNT_DEBUG 1186#ifdef INET_REFCNT_DEBUG
1186EXPORT_SYMBOL(inet_sock_nr); 1187EXPORT_SYMBOL(inet_sock_nr);
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 0e98f2235b6e..514c85b2631a 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -200,7 +200,7 @@ static void ah4_err(struct sk_buff *skb, u32 info)
200 xfrm_state_put(x); 200 xfrm_state_put(x);
201} 201}
202 202
203static int ah_init_state(struct xfrm_state *x, void *args) 203static int ah_init_state(struct xfrm_state *x)
204{ 204{
205 struct ah_data *ahp = NULL; 205 struct ah_data *ahp = NULL;
206 struct xfrm_algo_desc *aalg_desc; 206 struct xfrm_algo_desc *aalg_desc;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3cc96730c4ed..650dcb12d9a1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -233,11 +233,14 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
233static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 233static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
234 int destroy) 234 int destroy)
235{ 235{
236 struct in_ifaddr *promote = NULL;
236 struct in_ifaddr *ifa1 = *ifap; 237 struct in_ifaddr *ifa1 = *ifap;
237 238
238 ASSERT_RTNL(); 239 ASSERT_RTNL();
239 240
240 /* 1. Deleting primary ifaddr forces deletion all secondaries */ 241 /* 1. Deleting primary ifaddr forces deletion all secondaries
242 * unless alias promotion is set
243 **/
241 244
242 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 245 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
243 struct in_ifaddr *ifa; 246 struct in_ifaddr *ifa;
@@ -251,11 +254,16 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
251 continue; 254 continue;
252 } 255 }
253 256
254 *ifap1 = ifa->ifa_next; 257 if (!IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
258 *ifap1 = ifa->ifa_next;
255 259
256 rtmsg_ifa(RTM_DELADDR, ifa); 260 rtmsg_ifa(RTM_DELADDR, ifa);
257 notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); 261 notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
258 inet_free_ifa(ifa); 262 inet_free_ifa(ifa);
263 } else {
264 promote = ifa;
265 break;
266 }
259 } 267 }
260 } 268 }
261 269
@@ -281,6 +289,13 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
281 if (!in_dev->ifa_list) 289 if (!in_dev->ifa_list)
282 inetdev_destroy(in_dev); 290 inetdev_destroy(in_dev);
283 } 291 }
292
293 if (promote && IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
294 /* not sure if we should send a delete notify first? */
295 promote->ifa_flags &= ~IFA_F_SECONDARY;
296 rtmsg_ifa(RTM_NEWADDR, promote);
297 notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote);
298 }
284} 299}
285 300
286static int inet_insert_ifa(struct in_ifaddr *ifa) 301static int inet_insert_ifa(struct in_ifaddr *ifa)
@@ -1015,14 +1030,13 @@ static struct notifier_block ip_netdev_notifier = {
1015}; 1030};
1016 1031
1017static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1032static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1018 u32 pid, u32 seq, int event) 1033 u32 pid, u32 seq, int event, unsigned int flags)
1019{ 1034{
1020 struct ifaddrmsg *ifm; 1035 struct ifaddrmsg *ifm;
1021 struct nlmsghdr *nlh; 1036 struct nlmsghdr *nlh;
1022 unsigned char *b = skb->tail; 1037 unsigned char *b = skb->tail;
1023 1038
1024 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 1039 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
1025 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
1026 ifm = NLMSG_DATA(nlh); 1040 ifm = NLMSG_DATA(nlh);
1027 ifm->ifa_family = AF_INET; 1041 ifm->ifa_family = AF_INET;
1028 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1042 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
@@ -1075,7 +1089,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1075 continue; 1089 continue;
1076 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, 1090 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1077 cb->nlh->nlmsg_seq, 1091 cb->nlh->nlmsg_seq,
1078 RTM_NEWADDR) <= 0) { 1092 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1079 rcu_read_unlock(); 1093 rcu_read_unlock();
1080 goto done; 1094 goto done;
1081 } 1095 }
@@ -1098,7 +1112,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
1098 1112
1099 if (!skb) 1113 if (!skb)
1100 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); 1114 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
1101 else if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 1115 else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
1102 kfree_skb(skb); 1116 kfree_skb(skb);
1103 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); 1117 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
1104 } else { 1118 } else {
@@ -1384,6 +1398,15 @@ static struct devinet_sysctl_table {
1384 .proc_handler = &ipv4_doint_and_flush, 1398 .proc_handler = &ipv4_doint_and_flush,
1385 .strategy = &ipv4_doint_and_flush_strategy, 1399 .strategy = &ipv4_doint_and_flush_strategy,
1386 }, 1400 },
1401 {
1402 .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES,
1403 .procname = "promote_secondaries",
1404 .data = &ipv4_devconf.promote_secondaries,
1405 .maxlen = sizeof(int),
1406 .mode = 0644,
1407 .proc_handler = &ipv4_doint_and_flush,
1408 .strategy = &ipv4_doint_and_flush_strategy,
1409 },
1387 }, 1410 },
1388 .devinet_dev = { 1411 .devinet_dev = {
1389 { 1412 {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 053a883247ba..ba57446d5d1f 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -362,7 +362,7 @@ static void esp_destroy(struct xfrm_state *x)
362 kfree(esp); 362 kfree(esp);
363} 363}
364 364
365static int esp_init_state(struct xfrm_state *x, void *args) 365static int esp_init_state(struct xfrm_state *x)
366{ 366{
367 struct esp_data *esp = NULL; 367 struct esp_data *esp = NULL;
368 368
@@ -478,7 +478,7 @@ static int __init esp4_init(void)
478{ 478{
479 struct xfrm_decap_state decap; 479 struct xfrm_decap_state decap;
480 480
481 if (sizeof(struct esp_decap_data) < 481 if (sizeof(struct esp_decap_data) >
482 sizeof(decap.decap_data)) { 482 sizeof(decap.decap_data)) {
483 extern void decap_data_too_small(void); 483 extern void decap_data_too_small(void);
484 484
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 563e7d612706..cd8e45ab9580 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -516,6 +516,60 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
516#undef BRD1_OK 516#undef BRD1_OK
517} 517}
518 518
519static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
520{
521
522 struct fib_result res;
523 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
524 .fwmark = frn->fl_fwmark,
525 .tos = frn->fl_tos,
526 .scope = frn->fl_scope } } };
527 if (tb) {
528 local_bh_disable();
529
530 frn->tb_id = tb->tb_id;
531 frn->err = tb->tb_lookup(tb, &fl, &res);
532
533 if (!frn->err) {
534 frn->prefixlen = res.prefixlen;
535 frn->nh_sel = res.nh_sel;
536 frn->type = res.type;
537 frn->scope = res.scope;
538 }
539 local_bh_enable();
540 }
541}
542
543static void nl_fib_input(struct sock *sk, int len)
544{
545 struct sk_buff *skb = NULL;
546 struct nlmsghdr *nlh = NULL;
547 struct fib_result_nl *frn;
548 int err;
549 u32 pid;
550 struct fib_table *tb;
551
552 skb = skb_recv_datagram(sk, 0, 0, &err);
553 nlh = (struct nlmsghdr *)skb->data;
554
555 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
556 tb = fib_get_table(frn->tb_id_in);
557
558 nl_fib_lookup(frn, tb);
559
560 pid = nlh->nlmsg_pid; /*pid of sending process */
561 NETLINK_CB(skb).groups = 0; /* not in mcast group */
562 NETLINK_CB(skb).pid = 0; /* from kernel */
563 NETLINK_CB(skb).dst_pid = pid;
564 NETLINK_CB(skb).dst_groups = 0; /* unicast */
565 netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
566}
567
568static void nl_fib_lookup_init(void)
569{
570 netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input);
571}
572
519static void fib_disable_ip(struct net_device *dev, int force) 573static void fib_disable_ip(struct net_device *dev, int force)
520{ 574{
521 if (fib_sync_down(0, dev, force)) 575 if (fib_sync_down(0, dev, force))
@@ -604,6 +658,7 @@ void __init ip_fib_init(void)
604 658
605 register_netdevice_notifier(&fib_netdev_notifier); 659 register_netdevice_notifier(&fib_netdev_notifier);
606 register_inetaddr_notifier(&fib_inetaddr_notifier); 660 register_inetaddr_notifier(&fib_inetaddr_notifier);
661 nl_fib_lookup_init();
607} 662}
608 663
609EXPORT_SYMBOL(inet_addr_type); 664EXPORT_SYMBOL(inet_addr_type);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 6506dcc01b46..b10d6bb5ef3d 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -703,7 +703,8 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
703 &f->fn_key, 703 &f->fn_key,
704 fz->fz_order, 704 fz->fz_order,
705 fa->fa_tos, 705 fa->fa_tos,
706 fa->fa_info) < 0) { 706 fa->fa_info,
707 NLM_F_MULTI) < 0) {
707 cb->args[3] = i; 708 cb->args[3] = i;
708 return -1; 709 return -1;
709 } 710 }
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ac4485f75e97..b729d97cfa93 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -30,7 +30,8 @@ extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
30 struct kern_rta *rta, struct fib_info *fi); 30 struct kern_rta *rta, struct fib_info *fi);
31extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 31extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
32 u8 tb_id, u8 type, u8 scope, void *dst, 32 u8 tb_id, u8 type, u8 scope, void *dst,
33 int dst_len, u8 tos, struct fib_info *fi); 33 int dst_len, u8 tos, struct fib_info *fi,
34 unsigned int);
34extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 35extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
35 int z, int tb_id, 36 int z, int tb_id,
36 struct nlmsghdr *n, struct netlink_skb_parms *req); 37 struct nlmsghdr *n, struct netlink_skb_parms *req);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 39d0aadb9a2a..0b298bbc1518 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -367,13 +367,14 @@ static struct notifier_block fib_rules_notifier = {
367 367
368static __inline__ int inet_fill_rule(struct sk_buff *skb, 368static __inline__ int inet_fill_rule(struct sk_buff *skb,
369 struct fib_rule *r, 369 struct fib_rule *r,
370 struct netlink_callback *cb) 370 struct netlink_callback *cb,
371 unsigned int flags)
371{ 372{
372 struct rtmsg *rtm; 373 struct rtmsg *rtm;
373 struct nlmsghdr *nlh; 374 struct nlmsghdr *nlh;
374 unsigned char *b = skb->tail; 375 unsigned char *b = skb->tail;
375 376
376 nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); 377 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
377 rtm = NLMSG_DATA(nlh); 378 rtm = NLMSG_DATA(nlh);
378 rtm->rtm_family = AF_INET; 379 rtm->rtm_family = AF_INET;
379 rtm->rtm_dst_len = r->r_dst_len; 380 rtm->rtm_dst_len = r->r_dst_len;
@@ -422,7 +423,7 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
422 for (r=fib_rules, idx=0; r; r = r->r_next, idx++) { 423 for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
423 if (idx < s_idx) 424 if (idx < s_idx)
424 continue; 425 continue;
425 if (inet_fill_rule(skb, r, cb) < 0) 426 if (inet_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
426 break; 427 break;
427 } 428 }
428 read_unlock(&fib_rules_lock); 429 read_unlock(&fib_rules_lock);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 029362d66135..c886b28ba9f5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -276,7 +276,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
276 struct nlmsghdr *n, struct netlink_skb_parms *req) 276 struct nlmsghdr *n, struct netlink_skb_parms *req)
277{ 277{
278 struct sk_buff *skb; 278 struct sk_buff *skb;
279 u32 pid = req ? req->pid : 0; 279 u32 pid = req ? req->pid : n->nlmsg_pid;
280 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 280 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
281 281
282 skb = alloc_skb(size, GFP_KERNEL); 282 skb = alloc_skb(size, GFP_KERNEL);
@@ -286,7 +286,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
286 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, 286 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
287 fa->fa_type, fa->fa_scope, &key, z, 287 fa->fa_type, fa->fa_scope, &key, z,
288 fa->fa_tos, 288 fa->fa_tos,
289 fa->fa_info) < 0) { 289 fa->fa_info, 0) < 0) {
290 kfree_skb(skb); 290 kfree_skb(skb);
291 return; 291 return;
292 } 292 }
@@ -932,13 +932,13 @@ u32 __fib_res_prefsrc(struct fib_result *res)
932int 932int
933fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 933fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
934 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, 934 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
935 struct fib_info *fi) 935 struct fib_info *fi, unsigned int flags)
936{ 936{
937 struct rtmsg *rtm; 937 struct rtmsg *rtm;
938 struct nlmsghdr *nlh; 938 struct nlmsghdr *nlh;
939 unsigned char *b = skb->tail; 939 unsigned char *b = skb->tail;
940 940
941 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); 941 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
942 rtm = NLMSG_DATA(nlh); 942 rtm = NLMSG_DATA(nlh);
943 rtm->rtm_family = AF_INET; 943 rtm->rtm_family = AF_INET;
944 rtm->rtm_dst_len = dst_len; 944 rtm->rtm_dst_len = dst_len;
@@ -1035,7 +1035,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1035 } 1035 }
1036 1036
1037 nl->nlmsg_flags = NLM_F_REQUEST; 1037 nl->nlmsg_flags = NLM_F_REQUEST;
1038 nl->nlmsg_pid = 0; 1038 nl->nlmsg_pid = current->pid;
1039 nl->nlmsg_seq = 0; 1039 nl->nlmsg_seq = 0;
1040 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); 1040 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1041 if (cmd == SIOCDELRT) { 1041 if (cmd == SIOCDELRT) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 85bf0d3e294b..cb759484979d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -207,6 +207,7 @@ int sysctl_icmp_ignore_bogus_error_responses;
207 207
208int sysctl_icmp_ratelimit = 1 * HZ; 208int sysctl_icmp_ratelimit = 1 * HZ;
209int sysctl_icmp_ratemask = 0x1818; 209int sysctl_icmp_ratemask = 0x1818;
210int sysctl_icmp_errors_use_inbound_ifaddr;
210 211
211/* 212/*
212 * ICMP control array. This specifies what to do with each ICMP. 213 * ICMP control array. This specifies what to do with each ICMP.
@@ -511,8 +512,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
511 */ 512 */
512 513
513 saddr = iph->daddr; 514 saddr = iph->daddr;
514 if (!(rt->rt_flags & RTCF_LOCAL)) 515 if (!(rt->rt_flags & RTCF_LOCAL)) {
515 saddr = 0; 516 if (sysctl_icmp_errors_use_inbound_ifaddr)
517 saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK);
518 else
519 saddr = 0;
520 }
516 521
517 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | 522 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
518 IPTOS_PREC_INTERNETCONTROL) : 523 IPTOS_PREC_INTERNETCONTROL) :
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 47012b93cad2..f8b172f89811 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -360,14 +360,14 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
360 err = copied; 360 err = copied;
361 361
362 /* Reset and regenerate socket error */ 362 /* Reset and regenerate socket error */
363 spin_lock_irq(&sk->sk_error_queue.lock); 363 spin_lock_bh(&sk->sk_error_queue.lock);
364 sk->sk_err = 0; 364 sk->sk_err = 0;
365 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { 365 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
366 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; 366 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
367 spin_unlock_irq(&sk->sk_error_queue.lock); 367 spin_unlock_bh(&sk->sk_error_queue.lock);
368 sk->sk_error_report(sk); 368 sk->sk_error_report(sk);
369 } else 369 } else
370 spin_unlock_irq(&sk->sk_error_queue.lock); 370 spin_unlock_bh(&sk->sk_error_queue.lock);
371 371
372out_free_skb: 372out_free_skb:
373 kfree_skb(skb); 373 kfree_skb(skb);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 1a23c5263b99..2065944fd9e5 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -236,15 +236,10 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
236 t->props.mode = 1; 236 t->props.mode = 1;
237 t->props.saddr.a4 = x->props.saddr.a4; 237 t->props.saddr.a4 = x->props.saddr.a4;
238 t->props.flags = x->props.flags; 238 t->props.flags = x->props.flags;
239 239
240 t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family); 240 if (xfrm_init_state(t))
241 if (t->type == NULL)
242 goto error;
243
244 if (t->type->init_state(t, NULL))
245 goto error; 241 goto error;
246 242
247 t->km.state = XFRM_STATE_VALID;
248 atomic_set(&t->tunnel_users, 1); 243 atomic_set(&t->tunnel_users, 1);
249out: 244out:
250 return t; 245 return t;
@@ -422,7 +417,7 @@ static void ipcomp_destroy(struct xfrm_state *x)
422 kfree(ipcd); 417 kfree(ipcd);
423} 418}
424 419
425static int ipcomp_init_state(struct xfrm_state *x, void *args) 420static int ipcomp_init_state(struct xfrm_state *x)
426{ 421{
427 int err; 422 int err;
428 struct ipcomp_data *ipcd; 423 struct ipcomp_data *ipcd;
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile
index a788461a40c9..30e85de9ffff 100644
--- a/net/ipv4/ipvs/Makefile
+++ b/net/ipv4/ipvs/Makefile
@@ -11,7 +11,7 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
11 11
12ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ 12ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
13 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ 13 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
14 ip_vs_est.o ip_vs_proto.o ip_vs_proto_icmp.o \ 14 ip_vs_est.o ip_vs_proto.o \
15 $(ip_vs_proto-objs-y) 15 $(ip_vs_proto-objs-y)
16 16
17 17
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 253c46252bd5..867d4e9c6594 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -216,9 +216,6 @@ int ip_vs_protocol_init(void)
216#ifdef CONFIG_IP_VS_PROTO_UDP 216#ifdef CONFIG_IP_VS_PROTO_UDP
217 REGISTER_PROTOCOL(&ip_vs_protocol_udp); 217 REGISTER_PROTOCOL(&ip_vs_protocol_udp);
218#endif 218#endif
219#ifdef CONFIG_IP_VS_PROTO_ICMP
220 REGISTER_PROTOCOL(&ip_vs_protocol_icmp);
221#endif
222#ifdef CONFIG_IP_VS_PROTO_AH 219#ifdef CONFIG_IP_VS_PROTO_AH
223 REGISTER_PROTOCOL(&ip_vs_protocol_ah); 220 REGISTER_PROTOCOL(&ip_vs_protocol_ah);
224#endif 221#endif
diff --git a/net/ipv4/ipvs/ip_vs_proto_icmp.c b/net/ipv4/ipvs/ip_vs_proto_icmp.c
deleted file mode 100644
index 191e94aa1c1f..000000000000
--- a/net/ipv4/ipvs/ip_vs_proto_icmp.c
+++ /dev/null
@@ -1,182 +0,0 @@
1/*
2 * ip_vs_proto_icmp.c: ICMP load balancing support for IP Virtual Server
3 *
4 * Authors: Julian Anastasov <ja@ssi.bg>, March 2002
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation;
9 *
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/icmp.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter_ipv4.h>
17
18#include <net/ip_vs.h>
19
20
21static int icmp_timeouts[1] = { 1*60*HZ };
22
23static char * icmp_state_name_table[1] = { "ICMP" };
24
25static struct ip_vs_conn *
26icmp_conn_in_get(const struct sk_buff *skb,
27 struct ip_vs_protocol *pp,
28 const struct iphdr *iph,
29 unsigned int proto_off,
30 int inverse)
31{
32#if 0
33 struct ip_vs_conn *cp;
34
35 if (likely(!inverse)) {
36 cp = ip_vs_conn_in_get(iph->protocol,
37 iph->saddr, 0,
38 iph->daddr, 0);
39 } else {
40 cp = ip_vs_conn_in_get(iph->protocol,
41 iph->daddr, 0,
42 iph->saddr, 0);
43 }
44
45 return cp;
46
47#else
48 return NULL;
49#endif
50}
51
52static struct ip_vs_conn *
53icmp_conn_out_get(const struct sk_buff *skb,
54 struct ip_vs_protocol *pp,
55 const struct iphdr *iph,
56 unsigned int proto_off,
57 int inverse)
58{
59#if 0
60 struct ip_vs_conn *cp;
61
62 if (likely(!inverse)) {
63 cp = ip_vs_conn_out_get(iph->protocol,
64 iph->saddr, 0,
65 iph->daddr, 0);
66 } else {
67 cp = ip_vs_conn_out_get(IPPROTO_UDP,
68 iph->daddr, 0,
69 iph->saddr, 0);
70 }
71
72 return cp;
73#else
74 return NULL;
75#endif
76}
77
78static int
79icmp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
80 int *verdict, struct ip_vs_conn **cpp)
81{
82 *verdict = NF_ACCEPT;
83 return 0;
84}
85
86static int
87icmp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
88{
89 if (!(skb->nh.iph->frag_off & __constant_htons(IP_OFFSET))) {
90 if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
91 if (ip_vs_checksum_complete(skb, skb->nh.iph->ihl * 4)) {
92 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "Failed checksum for");
93 return 0;
94 }
95 }
96 }
97 return 1;
98}
99
100static void
101icmp_debug_packet(struct ip_vs_protocol *pp,
102 const struct sk_buff *skb,
103 int offset,
104 const char *msg)
105{
106 char buf[256];
107 struct iphdr _iph, *ih;
108
109 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
110 if (ih == NULL)
111 sprintf(buf, "%s TRUNCATED", pp->name);
112 else if (ih->frag_off & __constant_htons(IP_OFFSET))
113 sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
114 pp->name, NIPQUAD(ih->saddr),
115 NIPQUAD(ih->daddr));
116 else {
117 struct icmphdr _icmph, *ic;
118
119 ic = skb_header_pointer(skb, offset + ih->ihl*4,
120 sizeof(_icmph), &_icmph);
121 if (ic == NULL)
122 sprintf(buf, "%s TRUNCATED to %u bytes\n",
123 pp->name, skb->len - offset);
124 else
125 sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d",
126 pp->name, NIPQUAD(ih->saddr),
127 NIPQUAD(ih->daddr),
128 ic->type, ic->code);
129 }
130 printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
131}
132
133static int
134icmp_state_transition(struct ip_vs_conn *cp, int direction,
135 const struct sk_buff *skb,
136 struct ip_vs_protocol *pp)
137{
138 cp->timeout = pp->timeout_table[IP_VS_ICMP_S_NORMAL];
139 return 1;
140}
141
142static int
143icmp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
144{
145 int num;
146 char **names;
147
148 num = IP_VS_ICMP_S_LAST;
149 names = icmp_state_name_table;
150 return ip_vs_set_state_timeout(pp->timeout_table, num, names, sname, to);
151}
152
153
154static void icmp_init(struct ip_vs_protocol *pp)
155{
156 pp->timeout_table = icmp_timeouts;
157}
158
159static void icmp_exit(struct ip_vs_protocol *pp)
160{
161}
162
163struct ip_vs_protocol ip_vs_protocol_icmp = {
164 .name = "ICMP",
165 .protocol = IPPROTO_ICMP,
166 .dont_defrag = 0,
167 .init = icmp_init,
168 .exit = icmp_exit,
169 .conn_schedule = icmp_conn_schedule,
170 .conn_in_get = icmp_conn_in_get,
171 .conn_out_get = icmp_conn_out_get,
172 .snat_handler = NULL,
173 .dnat_handler = NULL,
174 .csum_check = icmp_csum_check,
175 .state_transition = icmp_state_transition,
176 .register_app = NULL,
177 .unregister_app = NULL,
178 .app_conn_bind = NULL,
179 .debug_packet = icmp_debug_packet,
180 .timeout_change = NULL,
181 .set_state_timeout = icmp_set_state_timeout,
182};
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index 9349686131fc..c9cf8726051d 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -57,7 +58,6 @@ struct multipath_device {
57 58
58static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; 59static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
59static DEFINE_SPINLOCK(state_lock); 60static DEFINE_SPINLOCK(state_lock);
60static struct rtable *last_selection = NULL;
61 61
62static int inline __multipath_findslot(void) 62static int inline __multipath_findslot(void)
63{ 63{
@@ -111,11 +111,6 @@ struct notifier_block drr_dev_notifier = {
111 .notifier_call = drr_dev_event, 111 .notifier_call = drr_dev_event,
112}; 112};
113 113
114static void drr_remove(struct rtable *rt)
115{
116 if (last_selection == rt)
117 last_selection = NULL;
118}
119 114
120static void drr_safe_inc(atomic_t *usecount) 115static void drr_safe_inc(atomic_t *usecount)
121{ 116{
@@ -144,14 +139,6 @@ static void drr_select_route(const struct flowi *flp,
144 int devidx = -1; 139 int devidx = -1;
145 int cur_min_devidx = -1; 140 int cur_min_devidx = -1;
146 141
147 /* if necessary and possible utilize the old alternative */
148 if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
149 last_selection != NULL) {
150 result = last_selection;
151 *rp = result;
152 return;
153 }
154
155 /* 1. make sure all alt. nexthops have the same GC related data */ 142 /* 1. make sure all alt. nexthops have the same GC related data */
156 /* 2. determine the new candidate to be returned */ 143 /* 2. determine the new candidate to be returned */
157 result = NULL; 144 result = NULL;
@@ -229,12 +216,10 @@ static void drr_select_route(const struct flowi *flp,
229 } 216 }
230 217
231 *rp = result; 218 *rp = result;
232 last_selection = result;
233} 219}
234 220
235static struct ip_mp_alg_ops drr_ops = { 221static struct ip_mp_alg_ops drr_ops = {
236 .mp_alg_select_route = drr_select_route, 222 .mp_alg_select_route = drr_select_route,
237 .mp_alg_remove = drr_remove,
238}; 223};
239 224
240static int __init drr_init(void) 225static int __init drr_init(void)
@@ -244,7 +229,7 @@ static int __init drr_init(void)
244 if (err) 229 if (err)
245 return err; 230 return err;
246 231
247 err = multipath_alg_register(&drr_ops, IP_MP_ALG_RR); 232 err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR);
248 if (err) 233 if (err)
249 goto fail; 234 goto fail;
250 235
@@ -263,3 +248,4 @@ static void __exit drr_exit(void)
263 248
264module_init(drr_init); 249module_init(drr_init);
265module_exit(drr_exit); 250module_exit(drr_exit);
251MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c
index 805a16e47de5..5249dbe7c559 100644
--- a/net/ipv4/multipath_random.c
+++ b/net/ipv4/multipath_random.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -126,3 +127,4 @@ static void __exit random_exit(void)
126 127
127module_init(random_init); 128module_init(random_init);
128module_exit(random_exit); 129module_exit(random_exit);
130MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
index 554a82568160..b6cd2870478f 100644
--- a/net/ipv4/multipath_rr.c
+++ b/net/ipv4/multipath_rr.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -47,29 +48,12 @@
47#include <net/checksum.h> 48#include <net/checksum.h>
48#include <net/ip_mp_alg.h> 49#include <net/ip_mp_alg.h>
49 50
50#define MULTIPATH_MAX_CANDIDATES 40
51
52static struct rtable* last_used = NULL;
53
54static void rr_remove(struct rtable *rt)
55{
56 if (last_used == rt)
57 last_used = NULL;
58}
59
60static void rr_select_route(const struct flowi *flp, 51static void rr_select_route(const struct flowi *flp,
61 struct rtable *first, struct rtable **rp) 52 struct rtable *first, struct rtable **rp)
62{ 53{
63 struct rtable *nh, *result, *min_use_cand = NULL; 54 struct rtable *nh, *result, *min_use_cand = NULL;
64 int min_use = -1; 55 int min_use = -1;
65 56
66 /* if necessary and possible utilize the old alternative */
67 if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 &&
68 last_used != NULL) {
69 result = last_used;
70 goto out;
71 }
72
73 /* 1. make sure all alt. nexthops have the same GC related data 57 /* 1. make sure all alt. nexthops have the same GC related data
74 * 2. determine the new candidate to be returned 58 * 2. determine the new candidate to be returned
75 */ 59 */
@@ -90,15 +74,12 @@ static void rr_select_route(const struct flowi *flp,
90 if (!result) 74 if (!result)
91 result = first; 75 result = first;
92 76
93out:
94 last_used = result;
95 result->u.dst.__use++; 77 result->u.dst.__use++;
96 *rp = result; 78 *rp = result;
97} 79}
98 80
99static struct ip_mp_alg_ops rr_ops = { 81static struct ip_mp_alg_ops rr_ops = {
100 .mp_alg_select_route = rr_select_route, 82 .mp_alg_select_route = rr_select_route,
101 .mp_alg_remove = rr_remove,
102}; 83};
103 84
104static int __init rr_init(void) 85static int __init rr_init(void)
@@ -113,3 +94,4 @@ static void __exit rr_exit(void)
113 94
114module_init(rr_init); 95module_init(rr_init);
115module_exit(rr_exit); 96module_exit(rr_exit);
97MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
index c3d2ca1a6781..bd7d75b6abe0 100644
--- a/net/ipv4/multipath_wrandom.c
+++ b/net/ipv4/multipath_wrandom.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -342,3 +343,4 @@ static void __exit wrandom_exit(void)
342 343
343module_init(wrandom_init); 344module_init(wrandom_init);
344module_exit(wrandom_exit); 345module_exit(wrandom_exit);
346MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 46ca45f74d85..bc59f7b39805 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -256,6 +256,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
256{ 256{
257 struct list_head *e = v; 257 struct list_head *e = v;
258 258
259 ++*pos;
259 e = e->next; 260 e = e->next;
260 261
261 if (e == &ip_conntrack_expect_list) 262 if (e == &ip_conntrack_expect_list)
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e5746b674413..eda1fba431a4 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -3,6 +3,7 @@
3 * communicating with userspace via netlink. 3 * communicating with userspace via netlink.
4 * 4 *
5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au> 5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
6 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -17,6 +18,7 @@
17 * 2005-01-10: Added /proc counter for dropped packets; fixed so 18 * 2005-01-10: Added /proc counter for dropped packets; fixed so
18 * packets aren't delivered to user space if they're going 19 * packets aren't delivered to user space if they're going
19 * to be dropped. 20 * to be dropped.
21 * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
20 * 22 *
21 */ 23 */
22#include <linux/module.h> 24#include <linux/module.h>
@@ -71,7 +73,15 @@ static DECLARE_MUTEX(ipqnl_sem);
71static void 73static void
72ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) 74ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
73{ 75{
76 /* TCP input path (and probably other bits) assume to be called
77 * from softirq context, not from syscall, like ipq_issue_verdict is
78 * called. TCP input path deadlocks with locks taken from timer
79 * softirq, e.g. We therefore emulate this by local_bh_disable() */
80
81 local_bh_disable();
74 nf_reinject(entry->skb, entry->info, verdict); 82 nf_reinject(entry->skb, entry->info, verdict);
83 local_bh_enable();
84
75 kfree(entry); 85 kfree(entry);
76} 86}
77 87
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 25ab9fabdcba..2d44b07688af 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -223,7 +223,7 @@ static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned
223 curr_table->table[count].last_seen = 0; 223 curr_table->table[count].last_seen = 0;
224 curr_table->table[count].addr = 0; 224 curr_table->table[count].addr = 0;
225 curr_table->table[count].ttl = 0; 225 curr_table->table[count].ttl = 0;
226 memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); 226 memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
227 curr_table->table[count].oldest_pkt = 0; 227 curr_table->table[count].oldest_pkt = 0;
228 curr_table->table[count].time_pos = 0; 228 curr_table->table[count].time_pos = 0;
229 curr_table->time_info[count].position = count; 229 curr_table->time_info[count].position = count;
@@ -502,7 +502,7 @@ match(const struct sk_buff *skb,
502 location = time_info[curr_table->time_pos].position; 502 location = time_info[curr_table->time_pos].position;
503 hash_table[r_list[location].hash_entry] = -1; 503 hash_table[r_list[location].hash_entry] = -1;
504 hash_table[hash_result] = location; 504 hash_table[hash_result] = location;
505 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); 505 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
506 r_list[location].time_pos = curr_table->time_pos; 506 r_list[location].time_pos = curr_table->time_pos;
507 r_list[location].addr = addr; 507 r_list[location].addr = addr;
508 r_list[location].ttl = ttl; 508 r_list[location].ttl = ttl;
@@ -631,7 +631,7 @@ match(const struct sk_buff *skb,
631 r_list[location].last_seen = 0; 631 r_list[location].last_seen = 0;
632 r_list[location].addr = 0; 632 r_list[location].addr = 0;
633 r_list[location].ttl = 0; 633 r_list[location].ttl = 0;
634 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); 634 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
635 r_list[location].oldest_pkt = 0; 635 r_list[location].oldest_pkt = 0;
636 ans = !info->invert; 636 ans = !info->invert;
637 } 637 }
@@ -734,10 +734,10 @@ checkentry(const char *tablename,
734 memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot); 734 memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot);
735#ifdef DEBUG 735#ifdef DEBUG
736 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n", 736 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n",
737 sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot); 737 sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
738#endif 738#endif
739 739
740 hold = vmalloc(sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot); 740 hold = vmalloc(sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
741#ifdef DEBUG 741#ifdef DEBUG
742 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n"); 742 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n");
743#endif 743#endif
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5b1ec586bae6..d1835b1bc8c4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -259,7 +259,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
259 return 0; 259 return 0;
260} 260}
261 261
262static int raw_send_hdrinc(struct sock *sk, void *from, int length, 262static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
263 struct rtable *rt, 263 struct rtable *rt,
264 unsigned int flags) 264 unsigned int flags)
265{ 265{
@@ -298,7 +298,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, int length,
298 goto error_fault; 298 goto error_fault;
299 299
300 /* We don't modify invalid header */ 300 /* We don't modify invalid header */
301 if (length >= sizeof(*iph) && iph->ihl * 4 <= length) { 301 if (length >= sizeof(*iph) && iph->ihl * 4U <= length) {
302 if (!iph->saddr) 302 if (!iph->saddr)
303 iph->saddr = rt->rt_src; 303 iph->saddr = rt->rt_src;
304 iph->check = 0; 304 iph->check = 0;
@@ -332,7 +332,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
332 u8 __user *type = NULL; 332 u8 __user *type = NULL;
333 u8 __user *code = NULL; 333 u8 __user *code = NULL;
334 int probed = 0; 334 int probed = 0;
335 int i; 335 unsigned int i;
336 336
337 if (!msg->msg_iov) 337 if (!msg->msg_iov)
338 return; 338 return;
@@ -384,7 +384,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
384 int err; 384 int err;
385 385
386 err = -EMSGSIZE; 386 err = -EMSGSIZE;
387 if (len < 0 || len > 0xFFFF) 387 if (len > 0xFFFF)
388 goto out; 388 goto out;
389 389
390 /* 390 /*
@@ -514,7 +514,10 @@ done:
514 kfree(ipc.opt); 514 kfree(ipc.opt);
515 ip_rt_put(rt); 515 ip_rt_put(rt);
516 516
517out: return err < 0 ? err : len; 517out:
518 if (err < 0)
519 return err;
520 return len;
518 521
519do_confirm: 522do_confirm:
520 dst_confirm(&rt->u.dst); 523 dst_confirm(&rt->u.dst);
@@ -610,7 +613,10 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
610 copied = skb->len; 613 copied = skb->len;
611done: 614done:
612 skb_free_datagram(sk, skb); 615 skb_free_datagram(sk, skb);
613out: return err ? err : copied; 616out:
617 if (err)
618 return err;
619 return copied;
614} 620}
615 621
616static int raw_init(struct sock *sk) 622static int raw_init(struct sock *sk)
@@ -691,11 +697,11 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
691 struct sk_buff *skb; 697 struct sk_buff *skb;
692 int amount = 0; 698 int amount = 0;
693 699
694 spin_lock_irq(&sk->sk_receive_queue.lock); 700 spin_lock_bh(&sk->sk_receive_queue.lock);
695 skb = skb_peek(&sk->sk_receive_queue); 701 skb = skb_peek(&sk->sk_receive_queue);
696 if (skb != NULL) 702 if (skb != NULL)
697 amount = skb->len; 703 amount = skb->len;
698 spin_unlock_irq(&sk->sk_receive_queue.lock); 704 spin_unlock_bh(&sk->sk_receive_queue.lock);
699 return put_user(amount, (int __user *)arg); 705 return put_user(amount, (int __user *)arg);
700 } 706 }
701 707
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a682d28e247b..f4d53c919869 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2581,7 +2581,7 @@ int ip_route_output_key(struct rtable **rp, struct flowi *flp)
2581} 2581}
2582 2582
2583static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2583static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2584 int nowait) 2584 int nowait, unsigned int flags)
2585{ 2585{
2586 struct rtable *rt = (struct rtable*)skb->dst; 2586 struct rtable *rt = (struct rtable*)skb->dst;
2587 struct rtmsg *r; 2587 struct rtmsg *r;
@@ -2591,9 +2591,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2591#ifdef CONFIG_IP_MROUTE 2591#ifdef CONFIG_IP_MROUTE
2592 struct rtattr *eptr; 2592 struct rtattr *eptr;
2593#endif 2593#endif
2594 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 2594 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
2595 r = NLMSG_DATA(nlh); 2595 r = NLMSG_DATA(nlh);
2596 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
2597 r->rtm_family = AF_INET; 2596 r->rtm_family = AF_INET;
2598 r->rtm_dst_len = 32; 2597 r->rtm_dst_len = 32;
2599 r->rtm_src_len = 0; 2598 r->rtm_src_len = 0;
@@ -2744,7 +2743,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2744 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 2743 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2745 2744
2746 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2745 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2747 RTM_NEWROUTE, 0); 2746 RTM_NEWROUTE, 0, 0);
2748 if (!err) 2747 if (!err)
2749 goto out_free; 2748 goto out_free;
2750 if (err < 0) { 2749 if (err < 0) {
@@ -2781,8 +2780,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2781 continue; 2780 continue;
2782 skb->dst = dst_clone(&rt->u.dst); 2781 skb->dst = dst_clone(&rt->u.dst);
2783 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2782 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
2784 cb->nlh->nlmsg_seq, 2783 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2785 RTM_NEWROUTE, 1) <= 0) { 2784 1, NLM_F_MULTI) <= 0) {
2786 dst_release(xchg(&skb->dst, NULL)); 2785 dst_release(xchg(&skb->dst, NULL));
2787 rcu_read_unlock_bh(); 2786 rcu_read_unlock_bh();
2788 goto done; 2787 goto done;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e923d2f021aa..72d014442185 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -169,10 +169,10 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
169 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 169 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
170} 170}
171 171
172extern struct or_calltable or_ipv4; 172extern struct request_sock_ops tcp_request_sock_ops;
173 173
174static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 174static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
175 struct open_request *req, 175 struct request_sock *req,
176 struct dst_entry *dst) 176 struct dst_entry *dst)
177{ 177{
178 struct tcp_sock *tp = tcp_sk(sk); 178 struct tcp_sock *tp = tcp_sk(sk);
@@ -182,7 +182,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
182 if (child) 182 if (child)
183 tcp_acceptq_queue(sk, req, child); 183 tcp_acceptq_queue(sk, req, child);
184 else 184 else
185 tcp_openreq_free(req); 185 reqsk_free(req);
186 186
187 return child; 187 return child;
188} 188}
@@ -190,10 +190,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
190struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 190struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
191 struct ip_options *opt) 191 struct ip_options *opt)
192{ 192{
193 struct inet_request_sock *ireq;
194 struct tcp_request_sock *treq;
193 struct tcp_sock *tp = tcp_sk(sk); 195 struct tcp_sock *tp = tcp_sk(sk);
194 __u32 cookie = ntohl(skb->h.th->ack_seq) - 1; 196 __u32 cookie = ntohl(skb->h.th->ack_seq) - 1;
195 struct sock *ret = sk; 197 struct sock *ret = sk;
196 struct open_request *req; 198 struct request_sock *req;
197 int mss; 199 int mss;
198 struct rtable *rt; 200 struct rtable *rt;
199 __u8 rcv_wscale; 201 __u8 rcv_wscale;
@@ -209,19 +211,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
209 211
210 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); 212 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
211 213
212 req = tcp_openreq_alloc();
213 ret = NULL; 214 ret = NULL;
215 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
214 if (!req) 216 if (!req)
215 goto out; 217 goto out;
216 218
217 req->rcv_isn = htonl(skb->h.th->seq) - 1; 219 ireq = inet_rsk(req);
218 req->snt_isn = cookie; 220 treq = tcp_rsk(req);
221 treq->rcv_isn = htonl(skb->h.th->seq) - 1;
222 treq->snt_isn = cookie;
219 req->mss = mss; 223 req->mss = mss;
220 req->rmt_port = skb->h.th->source; 224 ireq->rmt_port = skb->h.th->source;
221 req->af.v4_req.loc_addr = skb->nh.iph->daddr; 225 ireq->loc_addr = skb->nh.iph->daddr;
222 req->af.v4_req.rmt_addr = skb->nh.iph->saddr; 226 ireq->rmt_addr = skb->nh.iph->saddr;
223 req->class = &or_ipv4; /* for savety */ 227 ireq->opt = NULL;
224 req->af.v4_req.opt = NULL;
225 228
226 /* We throwed the options of the initial SYN away, so we hope 229 /* We throwed the options of the initial SYN away, so we hope
227 * the ACK carries the same options again (see RFC1122 4.2.3.8) 230 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -229,17 +232,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
229 if (opt && opt->optlen) { 232 if (opt && opt->optlen) {
230 int opt_size = sizeof(struct ip_options) + opt->optlen; 233 int opt_size = sizeof(struct ip_options) + opt->optlen;
231 234
232 req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC); 235 ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
233 if (req->af.v4_req.opt) { 236 if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) {
234 if (ip_options_echo(req->af.v4_req.opt, skb)) { 237 kfree(ireq->opt);
235 kfree(req->af.v4_req.opt); 238 ireq->opt = NULL;
236 req->af.v4_req.opt = NULL;
237 }
238 } 239 }
239 } 240 }
240 241
241 req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0; 242 ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
242 req->wscale_ok = req->sack_ok = 0; 243 ireq->wscale_ok = ireq->sack_ok = 0;
243 req->expires = 0UL; 244 req->expires = 0UL;
244 req->retrans = 0; 245 req->retrans = 0;
245 246
@@ -253,15 +254,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
253 struct flowi fl = { .nl_u = { .ip4_u = 254 struct flowi fl = { .nl_u = { .ip4_u =
254 { .daddr = ((opt && opt->srr) ? 255 { .daddr = ((opt && opt->srr) ?
255 opt->faddr : 256 opt->faddr :
256 req->af.v4_req.rmt_addr), 257 ireq->rmt_addr),
257 .saddr = req->af.v4_req.loc_addr, 258 .saddr = ireq->loc_addr,
258 .tos = RT_CONN_FLAGS(sk) } }, 259 .tos = RT_CONN_FLAGS(sk) } },
259 .proto = IPPROTO_TCP, 260 .proto = IPPROTO_TCP,
260 .uli_u = { .ports = 261 .uli_u = { .ports =
261 { .sport = skb->h.th->dest, 262 { .sport = skb->h.th->dest,
262 .dport = skb->h.th->source } } }; 263 .dport = skb->h.th->source } } };
263 if (ip_route_output_key(&rt, &fl)) { 264 if (ip_route_output_key(&rt, &fl)) {
264 tcp_openreq_free(req); 265 reqsk_free(req);
265 goto out; 266 goto out;
266 } 267 }
267 } 268 }
@@ -272,7 +273,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
272 &req->rcv_wnd, &req->window_clamp, 273 &req->rcv_wnd, &req->window_clamp,
273 0, &rcv_wscale); 274 0, &rcv_wscale);
274 /* BTW win scale with syncookies is 0 by definition */ 275 /* BTW win scale with syncookies is 0 by definition */
275 req->rcv_wscale = rcv_wscale; 276 ireq->rcv_wscale = rcv_wscale;
276 277
277 ret = get_cookie_sock(sk, skb, req, &rt->u.dst); 278 ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
278out: return ret; 279out: return ret;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3aafb298c1c1..23068bddbf0b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -23,6 +23,7 @@ extern int sysctl_ip_nonlocal_bind;
23extern int sysctl_icmp_echo_ignore_all; 23extern int sysctl_icmp_echo_ignore_all;
24extern int sysctl_icmp_echo_ignore_broadcasts; 24extern int sysctl_icmp_echo_ignore_broadcasts;
25extern int sysctl_icmp_ignore_bogus_error_responses; 25extern int sysctl_icmp_ignore_bogus_error_responses;
26extern int sysctl_icmp_errors_use_inbound_ifaddr;
26 27
27/* From ip_fragment.c */ 28/* From ip_fragment.c */
28extern int sysctl_ipfrag_low_thresh; 29extern int sysctl_ipfrag_low_thresh;
@@ -396,6 +397,14 @@ ctl_table ipv4_table[] = {
396 .proc_handler = &proc_dointvec 397 .proc_handler = &proc_dointvec
397 }, 398 },
398 { 399 {
400 .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR,
401 .procname = "icmp_errors_use_inbound_ifaddr",
402 .data = &sysctl_icmp_errors_use_inbound_ifaddr,
403 .maxlen = sizeof(int),
404 .mode = 0644,
405 .proc_handler = &proc_dointvec
406 },
407 {
399 .ctl_name = NET_IPV4_ROUTE, 408 .ctl_name = NET_IPV4_ROUTE,
400 .procname = "route", 409 .procname = "route",
401 .maxlen = 0, 410 .maxlen = 0,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a037bafcba3c..674bbd8cfd36 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,7 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
271 271
272DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); 272DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
273 273
274kmem_cache_t *tcp_openreq_cachep;
275kmem_cache_t *tcp_bucket_cachep; 274kmem_cache_t *tcp_bucket_cachep;
276kmem_cache_t *tcp_timewait_cachep; 275kmem_cache_t *tcp_timewait_cachep;
277 276
@@ -317,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure);
317static __inline__ unsigned int tcp_listen_poll(struct sock *sk, 316static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
318 poll_table *wait) 317 poll_table *wait)
319{ 318{
320 return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0; 319 return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
321} 320}
322 321
323/* 322/*
@@ -463,28 +462,15 @@ int tcp_listen_start(struct sock *sk)
463{ 462{
464 struct inet_sock *inet = inet_sk(sk); 463 struct inet_sock *inet = inet_sk(sk);
465 struct tcp_sock *tp = tcp_sk(sk); 464 struct tcp_sock *tp = tcp_sk(sk);
466 struct tcp_listen_opt *lopt; 465 int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
466
467 if (rc != 0)
468 return rc;
467 469
468 sk->sk_max_ack_backlog = 0; 470 sk->sk_max_ack_backlog = 0;
469 sk->sk_ack_backlog = 0; 471 sk->sk_ack_backlog = 0;
470 tp->accept_queue = tp->accept_queue_tail = NULL;
471 rwlock_init(&tp->syn_wait_lock);
472 tcp_delack_init(tp); 472 tcp_delack_init(tp);
473 473
474 lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
475 if (!lopt)
476 return -ENOMEM;
477
478 memset(lopt, 0, sizeof(struct tcp_listen_opt));
479 for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
480 if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
481 break;
482 get_random_bytes(&lopt->hash_rnd, 4);
483
484 write_lock_bh(&tp->syn_wait_lock);
485 tp->listen_opt = lopt;
486 write_unlock_bh(&tp->syn_wait_lock);
487
488 /* There is race window here: we announce ourselves listening, 474 /* There is race window here: we announce ourselves listening,
489 * but this transition is still not validated by get_port(). 475 * but this transition is still not validated by get_port().
490 * It is OK, because this socket enters to hash table only 476 * It is OK, because this socket enters to hash table only
@@ -501,10 +487,7 @@ int tcp_listen_start(struct sock *sk)
501 } 487 }
502 488
503 sk->sk_state = TCP_CLOSE; 489 sk->sk_state = TCP_CLOSE;
504 write_lock_bh(&tp->syn_wait_lock); 490 reqsk_queue_destroy(&tp->accept_queue);
505 tp->listen_opt = NULL;
506 write_unlock_bh(&tp->syn_wait_lock);
507 kfree(lopt);
508 return -EADDRINUSE; 491 return -EADDRINUSE;
509} 492}
510 493
@@ -516,25 +499,23 @@ int tcp_listen_start(struct sock *sk)
516static void tcp_listen_stop (struct sock *sk) 499static void tcp_listen_stop (struct sock *sk)
517{ 500{
518 struct tcp_sock *tp = tcp_sk(sk); 501 struct tcp_sock *tp = tcp_sk(sk);
519 struct tcp_listen_opt *lopt = tp->listen_opt; 502 struct listen_sock *lopt;
520 struct open_request *acc_req = tp->accept_queue; 503 struct request_sock *acc_req;
521 struct open_request *req; 504 struct request_sock *req;
522 int i; 505 int i;
523 506
524 tcp_delete_keepalive_timer(sk); 507 tcp_delete_keepalive_timer(sk);
525 508
526 /* make all the listen_opt local to us */ 509 /* make all the listen_opt local to us */
527 write_lock_bh(&tp->syn_wait_lock); 510 lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
528 tp->listen_opt = NULL; 511 acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
529 write_unlock_bh(&tp->syn_wait_lock);
530 tp->accept_queue = tp->accept_queue_tail = NULL;
531 512
532 if (lopt->qlen) { 513 if (lopt->qlen) {
533 for (i = 0; i < TCP_SYNQ_HSIZE; i++) { 514 for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
534 while ((req = lopt->syn_table[i]) != NULL) { 515 while ((req = lopt->syn_table[i]) != NULL) {
535 lopt->syn_table[i] = req->dl_next; 516 lopt->syn_table[i] = req->dl_next;
536 lopt->qlen--; 517 lopt->qlen--;
537 tcp_openreq_free(req); 518 reqsk_free(req);
538 519
539 /* Following specs, it would be better either to send FIN 520 /* Following specs, it would be better either to send FIN
540 * (and enter FIN-WAIT-1, it is normal close) 521 * (and enter FIN-WAIT-1, it is normal close)
@@ -574,7 +555,7 @@ static void tcp_listen_stop (struct sock *sk)
574 sock_put(child); 555 sock_put(child);
575 556
576 sk_acceptq_removed(sk); 557 sk_acceptq_removed(sk);
577 tcp_openreq_fastfree(req); 558 __reqsk_free(req);
578 } 559 }
579 BUG_TRAP(!sk->sk_ack_backlog); 560 BUG_TRAP(!sk->sk_ack_backlog);
580} 561}
@@ -1345,7 +1326,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1345 1326
1346 cleanup_rbuf(sk, copied); 1327 cleanup_rbuf(sk, copied);
1347 1328
1348 if (tp->ucopy.task == user_recv) { 1329 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1349 /* Install new reader */ 1330 /* Install new reader */
1350 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { 1331 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1351 user_recv = current; 1332 user_recv = current;
@@ -1868,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo)
1868 prepare_to_wait_exclusive(sk->sk_sleep, &wait, 1849 prepare_to_wait_exclusive(sk->sk_sleep, &wait,
1869 TASK_INTERRUPTIBLE); 1850 TASK_INTERRUPTIBLE);
1870 release_sock(sk); 1851 release_sock(sk);
1871 if (!tp->accept_queue) 1852 if (reqsk_queue_empty(&tp->accept_queue))
1872 timeo = schedule_timeout(timeo); 1853 timeo = schedule_timeout(timeo);
1873 lock_sock(sk); 1854 lock_sock(sk);
1874 err = 0; 1855 err = 0;
1875 if (tp->accept_queue) 1856 if (!reqsk_queue_empty(&tp->accept_queue))
1876 break; 1857 break;
1877 err = -EINVAL; 1858 err = -EINVAL;
1878 if (sk->sk_state != TCP_LISTEN) 1859 if (sk->sk_state != TCP_LISTEN)
@@ -1895,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo)
1895struct sock *tcp_accept(struct sock *sk, int flags, int *err) 1876struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1896{ 1877{
1897 struct tcp_sock *tp = tcp_sk(sk); 1878 struct tcp_sock *tp = tcp_sk(sk);
1898 struct open_request *req;
1899 struct sock *newsk; 1879 struct sock *newsk;
1900 int error; 1880 int error;
1901 1881
@@ -1906,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1906 */ 1886 */
1907 error = -EINVAL; 1887 error = -EINVAL;
1908 if (sk->sk_state != TCP_LISTEN) 1888 if (sk->sk_state != TCP_LISTEN)
1909 goto out; 1889 goto out_err;
1910 1890
1911 /* Find already established connection */ 1891 /* Find already established connection */
1912 if (!tp->accept_queue) { 1892 if (reqsk_queue_empty(&tp->accept_queue)) {
1913 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1893 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1914 1894
1915 /* If this is a non blocking socket don't sleep */ 1895 /* If this is a non blocking socket don't sleep */
1916 error = -EAGAIN; 1896 error = -EAGAIN;
1917 if (!timeo) 1897 if (!timeo)
1918 goto out; 1898 goto out_err;
1919 1899
1920 error = wait_for_connect(sk, timeo); 1900 error = wait_for_connect(sk, timeo);
1921 if (error) 1901 if (error)
1922 goto out; 1902 goto out_err;
1923 } 1903 }
1924 1904
1925 req = tp->accept_queue; 1905 newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
1926 if ((tp->accept_queue = req->dl_next) == NULL)
1927 tp->accept_queue_tail = NULL;
1928
1929 newsk = req->sk;
1930 sk_acceptq_removed(sk);
1931 tcp_openreq_fastfree(req);
1932 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); 1906 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
1933 release_sock(sk);
1934 return newsk;
1935
1936out: 1907out:
1937 release_sock(sk); 1908 release_sock(sk);
1909 return newsk;
1910out_err:
1911 newsk = NULL;
1938 *err = error; 1912 *err = error;
1939 return NULL; 1913 goto out;
1940} 1914}
1941 1915
1942/* 1916/*
@@ -2271,13 +2245,6 @@ void __init tcp_init(void)
2271 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), 2245 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
2272 sizeof(skb->cb)); 2246 sizeof(skb->cb));
2273 2247
2274 tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
2275 sizeof(struct open_request),
2276 0, SLAB_HWCACHE_ALIGN,
2277 NULL, NULL);
2278 if (!tcp_openreq_cachep)
2279 panic("tcp_init: Cannot alloc open_request cache.");
2280
2281 tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", 2248 tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
2282 sizeof(struct tcp_bind_bucket), 2249 sizeof(struct tcp_bind_bucket),
2283 0, SLAB_HWCACHE_ALIGN, 2250 0, SLAB_HWCACHE_ALIGN,
@@ -2338,7 +2305,7 @@ void __init tcp_init(void)
2338 (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); 2305 (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket));
2339 order++) 2306 order++)
2340 ; 2307 ;
2341 if (order > 4) { 2308 if (order >= 4) {
2342 sysctl_local_port_range[0] = 32768; 2309 sysctl_local_port_range[0] = 32768;
2343 sysctl_local_port_range[1] = 61000; 2310 sysctl_local_port_range[1] = 61000;
2344 sysctl_tcp_max_tw_buckets = 180000; 2311 sysctl_tcp_max_tw_buckets = 180000;
@@ -2374,7 +2341,6 @@ EXPORT_SYMBOL(tcp_destroy_sock);
2374EXPORT_SYMBOL(tcp_disconnect); 2341EXPORT_SYMBOL(tcp_disconnect);
2375EXPORT_SYMBOL(tcp_getsockopt); 2342EXPORT_SYMBOL(tcp_getsockopt);
2376EXPORT_SYMBOL(tcp_ioctl); 2343EXPORT_SYMBOL(tcp_ioctl);
2377EXPORT_SYMBOL(tcp_openreq_cachep);
2378EXPORT_SYMBOL(tcp_poll); 2344EXPORT_SYMBOL(tcp_poll);
2379EXPORT_SYMBOL(tcp_read_sock); 2345EXPORT_SYMBOL(tcp_read_sock);
2380EXPORT_SYMBOL(tcp_recvmsg); 2346EXPORT_SYMBOL(tcp_recvmsg);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 8faa8948f75c..634befc07921 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -455,9 +455,10 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
455} 455}
456 456
457static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, 457static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
458 struct open_request *req, 458 struct request_sock *req,
459 u32 pid, u32 seq) 459 u32 pid, u32 seq)
460{ 460{
461 const struct inet_request_sock *ireq = inet_rsk(req);
461 struct inet_sock *inet = inet_sk(sk); 462 struct inet_sock *inet = inet_sk(sk);
462 unsigned char *b = skb->tail; 463 unsigned char *b = skb->tail;
463 struct tcpdiagmsg *r; 464 struct tcpdiagmsg *r;
@@ -482,9 +483,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
482 tmo = 0; 483 tmo = 0;
483 484
484 r->id.tcpdiag_sport = inet->sport; 485 r->id.tcpdiag_sport = inet->sport;
485 r->id.tcpdiag_dport = req->rmt_port; 486 r->id.tcpdiag_dport = ireq->rmt_port;
486 r->id.tcpdiag_src[0] = req->af.v4_req.loc_addr; 487 r->id.tcpdiag_src[0] = ireq->loc_addr;
487 r->id.tcpdiag_dst[0] = req->af.v4_req.rmt_addr; 488 r->id.tcpdiag_dst[0] = ireq->rmt_addr;
488 r->tcpdiag_expires = jiffies_to_msecs(tmo), 489 r->tcpdiag_expires = jiffies_to_msecs(tmo),
489 r->tcpdiag_rqueue = 0; 490 r->tcpdiag_rqueue = 0;
490 r->tcpdiag_wqueue = 0; 491 r->tcpdiag_wqueue = 0;
@@ -493,9 +494,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
493#ifdef CONFIG_IP_TCPDIAG_IPV6 494#ifdef CONFIG_IP_TCPDIAG_IPV6
494 if (r->tcpdiag_family == AF_INET6) { 495 if (r->tcpdiag_family == AF_INET6) {
495 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, 496 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
496 &req->af.v6_req.loc_addr); 497 &tcp6_rsk(req)->loc_addr);
497 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, 498 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
498 &req->af.v6_req.rmt_addr); 499 &tcp6_rsk(req)->rmt_addr);
499 } 500 }
500#endif 501#endif
501 nlh->nlmsg_len = skb->tail - b; 502 nlh->nlmsg_len = skb->tail - b;
@@ -513,7 +514,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
513 struct tcpdiag_entry entry; 514 struct tcpdiag_entry entry;
514 struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); 515 struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
515 struct tcp_sock *tp = tcp_sk(sk); 516 struct tcp_sock *tp = tcp_sk(sk);
516 struct tcp_listen_opt *lopt; 517 struct listen_sock *lopt;
517 struct rtattr *bc = NULL; 518 struct rtattr *bc = NULL;
518 struct inet_sock *inet = inet_sk(sk); 519 struct inet_sock *inet = inet_sk(sk);
519 int j, s_j; 520 int j, s_j;
@@ -528,9 +529,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
528 529
529 entry.family = sk->sk_family; 530 entry.family = sk->sk_family;
530 531
531 read_lock_bh(&tp->syn_wait_lock); 532 read_lock_bh(&tp->accept_queue.syn_wait_lock);
532 533
533 lopt = tp->listen_opt; 534 lopt = tp->accept_queue.listen_opt;
534 if (!lopt || !lopt->qlen) 535 if (!lopt || !lopt->qlen)
535 goto out; 536 goto out;
536 537
@@ -541,13 +542,15 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
541 } 542 }
542 543
543 for (j = s_j; j < TCP_SYNQ_HSIZE; j++) { 544 for (j = s_j; j < TCP_SYNQ_HSIZE; j++) {
544 struct open_request *req, *head = lopt->syn_table[j]; 545 struct request_sock *req, *head = lopt->syn_table[j];
545 546
546 reqnum = 0; 547 reqnum = 0;
547 for (req = head; req; reqnum++, req = req->dl_next) { 548 for (req = head; req; reqnum++, req = req->dl_next) {
549 struct inet_request_sock *ireq = inet_rsk(req);
550
548 if (reqnum < s_reqnum) 551 if (reqnum < s_reqnum)
549 continue; 552 continue;
550 if (r->id.tcpdiag_dport != req->rmt_port && 553 if (r->id.tcpdiag_dport != ireq->rmt_port &&
551 r->id.tcpdiag_dport) 554 r->id.tcpdiag_dport)
552 continue; 555 continue;
553 556
@@ -555,16 +558,16 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
555 entry.saddr = 558 entry.saddr =
556#ifdef CONFIG_IP_TCPDIAG_IPV6 559#ifdef CONFIG_IP_TCPDIAG_IPV6
557 (entry.family == AF_INET6) ? 560 (entry.family == AF_INET6) ?
558 req->af.v6_req.loc_addr.s6_addr32 : 561 tcp6_rsk(req)->loc_addr.s6_addr32 :
559#endif 562#endif
560 &req->af.v4_req.loc_addr; 563 &ireq->loc_addr;
561 entry.daddr = 564 entry.daddr =
562#ifdef CONFIG_IP_TCPDIAG_IPV6 565#ifdef CONFIG_IP_TCPDIAG_IPV6
563 (entry.family == AF_INET6) ? 566 (entry.family == AF_INET6) ?
564 req->af.v6_req.rmt_addr.s6_addr32 : 567 tcp6_rsk(req)->rmt_addr.s6_addr32 :
565#endif 568#endif
566 &req->af.v4_req.rmt_addr; 569 &ireq->rmt_addr;
567 entry.dport = ntohs(req->rmt_port); 570 entry.dport = ntohs(ireq->rmt_port);
568 571
569 if (!tcpdiag_bc_run(RTA_DATA(bc), 572 if (!tcpdiag_bc_run(RTA_DATA(bc),
570 RTA_PAYLOAD(bc), &entry)) 573 RTA_PAYLOAD(bc), &entry))
@@ -585,7 +588,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
585 } 588 }
586 589
587out: 590out:
588 read_unlock_bh(&tp->syn_wait_lock); 591 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
589 592
590 return err; 593 return err;
591} 594}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dad98e4a5043..2d41d5d6ad19 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -36,7 +36,7 @@
36 * ACK bit. 36 * ACK bit.
37 * Andi Kleen : Implemented fast path mtu discovery. 37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the 38 * Fixed many serious bugs in the
39 * open_request handling and moved 39 * request_sock handling and moved
40 * most of it into the af independent code. 40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes. 41 * Added tail drop and some other bugfixes.
42 * Added new listen sematics. 42 * Added new listen sematics.
@@ -869,21 +869,23 @@ static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
869 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); 869 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
870} 870}
871 871
872static struct open_request *tcp_v4_search_req(struct tcp_sock *tp, 872static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
873 struct open_request ***prevp, 873 struct request_sock ***prevp,
874 __u16 rport, 874 __u16 rport,
875 __u32 raddr, __u32 laddr) 875 __u32 raddr, __u32 laddr)
876{ 876{
877 struct tcp_listen_opt *lopt = tp->listen_opt; 877 struct listen_sock *lopt = tp->accept_queue.listen_opt;
878 struct open_request *req, **prev; 878 struct request_sock *req, **prev;
879 879
880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; 880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
881 (req = *prev) != NULL; 881 (req = *prev) != NULL;
882 prev = &req->dl_next) { 882 prev = &req->dl_next) {
883 if (req->rmt_port == rport && 883 const struct inet_request_sock *ireq = inet_rsk(req);
884 req->af.v4_req.rmt_addr == raddr && 884
885 req->af.v4_req.loc_addr == laddr && 885 if (ireq->rmt_port == rport &&
886 TCP_INET_FAMILY(req->class->family)) { 886 ireq->rmt_addr == raddr &&
887 ireq->loc_addr == laddr &&
888 TCP_INET_FAMILY(req->rsk_ops->family)) {
887 BUG_TRAP(!req->sk); 889 BUG_TRAP(!req->sk);
888 *prevp = prev; 890 *prevp = prev;
889 break; 891 break;
@@ -893,21 +895,13 @@ static struct open_request *tcp_v4_search_req(struct tcp_sock *tp,
893 return req; 895 return req;
894} 896}
895 897
896static void tcp_v4_synq_add(struct sock *sk, struct open_request *req) 898static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
897{ 899{
898 struct tcp_sock *tp = tcp_sk(sk); 900 struct tcp_sock *tp = tcp_sk(sk);
899 struct tcp_listen_opt *lopt = tp->listen_opt; 901 struct listen_sock *lopt = tp->accept_queue.listen_opt;
900 u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd); 902 u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
901
902 req->expires = jiffies + TCP_TIMEOUT_INIT;
903 req->retrans = 0;
904 req->sk = NULL;
905 req->dl_next = lopt->syn_table[h];
906
907 write_lock(&tp->syn_wait_lock);
908 lopt->syn_table[h] = req;
909 write_unlock(&tp->syn_wait_lock);
910 903
904 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
911 tcp_synq_added(sk); 905 tcp_synq_added(sk);
912} 906}
913 907
@@ -1050,7 +1044,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1050 } 1044 }
1051 1045
1052 switch (sk->sk_state) { 1046 switch (sk->sk_state) {
1053 struct open_request *req, **prev; 1047 struct request_sock *req, **prev;
1054 case TCP_LISTEN: 1048 case TCP_LISTEN:
1055 if (sock_owned_by_user(sk)) 1049 if (sock_owned_by_user(sk))
1056 goto out; 1050 goto out;
@@ -1065,7 +1059,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1065 */ 1059 */
1066 BUG_TRAP(!req->sk); 1060 BUG_TRAP(!req->sk);
1067 1061
1068 if (seq != req->snt_isn) { 1062 if (seq != tcp_rsk(req)->snt_isn) {
1069 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 1063 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1070 goto out; 1064 goto out;
1071 } 1065 }
@@ -1254,28 +1248,29 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1254 tcp_tw_put(tw); 1248 tcp_tw_put(tw);
1255} 1249}
1256 1250
1257static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req) 1251static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1258{ 1252{
1259 tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd, 1253 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
1260 req->ts_recent); 1254 req->ts_recent);
1261} 1255}
1262 1256
1263static struct dst_entry* tcp_v4_route_req(struct sock *sk, 1257static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1264 struct open_request *req) 1258 struct request_sock *req)
1265{ 1259{
1266 struct rtable *rt; 1260 struct rtable *rt;
1267 struct ip_options *opt = req->af.v4_req.opt; 1261 const struct inet_request_sock *ireq = inet_rsk(req);
1262 struct ip_options *opt = inet_rsk(req)->opt;
1268 struct flowi fl = { .oif = sk->sk_bound_dev_if, 1263 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1269 .nl_u = { .ip4_u = 1264 .nl_u = { .ip4_u =
1270 { .daddr = ((opt && opt->srr) ? 1265 { .daddr = ((opt && opt->srr) ?
1271 opt->faddr : 1266 opt->faddr :
1272 req->af.v4_req.rmt_addr), 1267 ireq->rmt_addr),
1273 .saddr = req->af.v4_req.loc_addr, 1268 .saddr = ireq->loc_addr,
1274 .tos = RT_CONN_FLAGS(sk) } }, 1269 .tos = RT_CONN_FLAGS(sk) } },
1275 .proto = IPPROTO_TCP, 1270 .proto = IPPROTO_TCP,
1276 .uli_u = { .ports = 1271 .uli_u = { .ports =
1277 { .sport = inet_sk(sk)->sport, 1272 { .sport = inet_sk(sk)->sport,
1278 .dport = req->rmt_port } } }; 1273 .dport = ireq->rmt_port } } };
1279 1274
1280 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 1275 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1281 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 1276 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -1291,12 +1286,13 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1291 1286
1292/* 1287/*
1293 * Send a SYN-ACK after having received an ACK. 1288 * Send a SYN-ACK after having received an ACK.
1294 * This still operates on a open_request only, not on a big 1289 * This still operates on a request_sock only, not on a big
1295 * socket. 1290 * socket.
1296 */ 1291 */
1297static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, 1292static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
1298 struct dst_entry *dst) 1293 struct dst_entry *dst)
1299{ 1294{
1295 const struct inet_request_sock *ireq = inet_rsk(req);
1300 int err = -1; 1296 int err = -1;
1301 struct sk_buff * skb; 1297 struct sk_buff * skb;
1302 1298
@@ -1310,14 +1306,14 @@ static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
1310 struct tcphdr *th = skb->h.th; 1306 struct tcphdr *th = skb->h.th;
1311 1307
1312 th->check = tcp_v4_check(th, skb->len, 1308 th->check = tcp_v4_check(th, skb->len,
1313 req->af.v4_req.loc_addr, 1309 ireq->loc_addr,
1314 req->af.v4_req.rmt_addr, 1310 ireq->rmt_addr,
1315 csum_partial((char *)th, skb->len, 1311 csum_partial((char *)th, skb->len,
1316 skb->csum)); 1312 skb->csum));
1317 1313
1318 err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr, 1314 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
1319 req->af.v4_req.rmt_addr, 1315 ireq->rmt_addr,
1320 req->af.v4_req.opt); 1316 ireq->opt);
1321 if (err == NET_XMIT_CN) 1317 if (err == NET_XMIT_CN)
1322 err = 0; 1318 err = 0;
1323 } 1319 }
@@ -1328,12 +1324,12 @@ out:
1328} 1324}
1329 1325
1330/* 1326/*
1331 * IPv4 open_request destructor. 1327 * IPv4 request_sock destructor.
1332 */ 1328 */
1333static void tcp_v4_or_free(struct open_request *req) 1329static void tcp_v4_reqsk_destructor(struct request_sock *req)
1334{ 1330{
1335 if (req->af.v4_req.opt) 1331 if (inet_rsk(req)->opt)
1336 kfree(req->af.v4_req.opt); 1332 kfree(inet_rsk(req)->opt);
1337} 1333}
1338 1334
1339static inline void syn_flood_warning(struct sk_buff *skb) 1335static inline void syn_flood_warning(struct sk_buff *skb)
@@ -1349,7 +1345,7 @@ static inline void syn_flood_warning(struct sk_buff *skb)
1349} 1345}
1350 1346
1351/* 1347/*
1352 * Save and compile IPv4 options into the open_request if needed. 1348 * Save and compile IPv4 options into the request_sock if needed.
1353 */ 1349 */
1354static inline struct ip_options *tcp_v4_save_options(struct sock *sk, 1350static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1355 struct sk_buff *skb) 1351 struct sk_buff *skb)
@@ -1370,33 +1366,20 @@ static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1370 return dopt; 1366 return dopt;
1371} 1367}
1372 1368
1373/* 1369struct request_sock_ops tcp_request_sock_ops = {
1374 * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
1375 * One SYN_RECV socket costs about 80bytes on a 32bit machine.
1376 * It would be better to replace it with a global counter for all sockets
1377 * but then some measure against one socket starving all other sockets
1378 * would be needed.
1379 *
1380 * It was 128 by default. Experiments with real servers show, that
1381 * it is absolutely not enough even at 100conn/sec. 256 cures most
1382 * of problems. This value is adjusted to 128 for very small machines
1383 * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
1384 * Further increasing requires to change hash table size.
1385 */
1386int sysctl_max_syn_backlog = 256;
1387
1388struct or_calltable or_ipv4 = {
1389 .family = PF_INET, 1370 .family = PF_INET,
1371 .obj_size = sizeof(struct tcp_request_sock),
1390 .rtx_syn_ack = tcp_v4_send_synack, 1372 .rtx_syn_ack = tcp_v4_send_synack,
1391 .send_ack = tcp_v4_or_send_ack, 1373 .send_ack = tcp_v4_reqsk_send_ack,
1392 .destructor = tcp_v4_or_free, 1374 .destructor = tcp_v4_reqsk_destructor,
1393 .send_reset = tcp_v4_send_reset, 1375 .send_reset = tcp_v4_send_reset,
1394}; 1376};
1395 1377
1396int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1378int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1397{ 1379{
1380 struct inet_request_sock *ireq;
1398 struct tcp_options_received tmp_opt; 1381 struct tcp_options_received tmp_opt;
1399 struct open_request *req; 1382 struct request_sock *req;
1400 __u32 saddr = skb->nh.iph->saddr; 1383 __u32 saddr = skb->nh.iph->saddr;
1401 __u32 daddr = skb->nh.iph->daddr; 1384 __u32 daddr = skb->nh.iph->daddr;
1402 __u32 isn = TCP_SKB_CB(skb)->when; 1385 __u32 isn = TCP_SKB_CB(skb)->when;
@@ -1433,7 +1416,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1433 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) 1416 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1434 goto drop; 1417 goto drop;
1435 1418
1436 req = tcp_openreq_alloc(); 1419 req = reqsk_alloc(&tcp_request_sock_ops);
1437 if (!req) 1420 if (!req)
1438 goto drop; 1421 goto drop;
1439 1422
@@ -1461,10 +1444,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1461 1444
1462 tcp_openreq_init(req, &tmp_opt, skb); 1445 tcp_openreq_init(req, &tmp_opt, skb);
1463 1446
1464 req->af.v4_req.loc_addr = daddr; 1447 ireq = inet_rsk(req);
1465 req->af.v4_req.rmt_addr = saddr; 1448 ireq->loc_addr = daddr;
1466 req->af.v4_req.opt = tcp_v4_save_options(sk, skb); 1449 ireq->rmt_addr = saddr;
1467 req->class = &or_ipv4; 1450 ireq->opt = tcp_v4_save_options(sk, skb);
1468 if (!want_cookie) 1451 if (!want_cookie)
1469 TCP_ECN_create_request(req, skb->h.th); 1452 TCP_ECN_create_request(req, skb->h.th);
1470 1453
@@ -1523,20 +1506,20 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1523 1506
1524 isn = tcp_v4_init_sequence(sk, skb); 1507 isn = tcp_v4_init_sequence(sk, skb);
1525 } 1508 }
1526 req->snt_isn = isn; 1509 tcp_rsk(req)->snt_isn = isn;
1527 1510
1528 if (tcp_v4_send_synack(sk, req, dst)) 1511 if (tcp_v4_send_synack(sk, req, dst))
1529 goto drop_and_free; 1512 goto drop_and_free;
1530 1513
1531 if (want_cookie) { 1514 if (want_cookie) {
1532 tcp_openreq_free(req); 1515 reqsk_free(req);
1533 } else { 1516 } else {
1534 tcp_v4_synq_add(sk, req); 1517 tcp_v4_synq_add(sk, req);
1535 } 1518 }
1536 return 0; 1519 return 0;
1537 1520
1538drop_and_free: 1521drop_and_free:
1539 tcp_openreq_free(req); 1522 reqsk_free(req);
1540drop: 1523drop:
1541 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1524 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1542 return 0; 1525 return 0;
@@ -1548,9 +1531,10 @@ drop:
1548 * now create the new socket. 1531 * now create the new socket.
1549 */ 1532 */
1550struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1533struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1551 struct open_request *req, 1534 struct request_sock *req,
1552 struct dst_entry *dst) 1535 struct dst_entry *dst)
1553{ 1536{
1537 struct inet_request_sock *ireq;
1554 struct inet_sock *newinet; 1538 struct inet_sock *newinet;
1555 struct tcp_sock *newtp; 1539 struct tcp_sock *newtp;
1556 struct sock *newsk; 1540 struct sock *newsk;
@@ -1570,11 +1554,12 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1570 1554
1571 newtp = tcp_sk(newsk); 1555 newtp = tcp_sk(newsk);
1572 newinet = inet_sk(newsk); 1556 newinet = inet_sk(newsk);
1573 newinet->daddr = req->af.v4_req.rmt_addr; 1557 ireq = inet_rsk(req);
1574 newinet->rcv_saddr = req->af.v4_req.loc_addr; 1558 newinet->daddr = ireq->rmt_addr;
1575 newinet->saddr = req->af.v4_req.loc_addr; 1559 newinet->rcv_saddr = ireq->loc_addr;
1576 newinet->opt = req->af.v4_req.opt; 1560 newinet->saddr = ireq->loc_addr;
1577 req->af.v4_req.opt = NULL; 1561 newinet->opt = ireq->opt;
1562 ireq->opt = NULL;
1578 newinet->mc_index = tcp_v4_iif(skb); 1563 newinet->mc_index = tcp_v4_iif(skb);
1579 newinet->mc_ttl = skb->nh.iph->ttl; 1564 newinet->mc_ttl = skb->nh.iph->ttl;
1580 newtp->ext_header_len = 0; 1565 newtp->ext_header_len = 0;
@@ -1605,9 +1590,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1605 struct iphdr *iph = skb->nh.iph; 1590 struct iphdr *iph = skb->nh.iph;
1606 struct tcp_sock *tp = tcp_sk(sk); 1591 struct tcp_sock *tp = tcp_sk(sk);
1607 struct sock *nsk; 1592 struct sock *nsk;
1608 struct open_request **prev; 1593 struct request_sock **prev;
1609 /* Find possible connection requests. */ 1594 /* Find possible connection requests. */
1610 struct open_request *req = tcp_v4_search_req(tp, &prev, th->source, 1595 struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source,
1611 iph->saddr, iph->daddr); 1596 iph->saddr, iph->daddr);
1612 if (req) 1597 if (req)
1613 return tcp_check_req(sk, skb, req, prev); 1598 return tcp_check_req(sk, skb, req, prev);
@@ -2144,13 +2129,13 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2144 ++st->num; 2129 ++st->num;
2145 2130
2146 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2131 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2147 struct open_request *req = cur; 2132 struct request_sock *req = cur;
2148 2133
2149 tp = tcp_sk(st->syn_wait_sk); 2134 tp = tcp_sk(st->syn_wait_sk);
2150 req = req->dl_next; 2135 req = req->dl_next;
2151 while (1) { 2136 while (1) {
2152 while (req) { 2137 while (req) {
2153 if (req->class->family == st->family) { 2138 if (req->rsk_ops->family == st->family) {
2154 cur = req; 2139 cur = req;
2155 goto out; 2140 goto out;
2156 } 2141 }
@@ -2159,17 +2144,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2159 if (++st->sbucket >= TCP_SYNQ_HSIZE) 2144 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2160 break; 2145 break;
2161get_req: 2146get_req:
2162 req = tp->listen_opt->syn_table[st->sbucket]; 2147 req = tp->accept_queue.listen_opt->syn_table[st->sbucket];
2163 } 2148 }
2164 sk = sk_next(st->syn_wait_sk); 2149 sk = sk_next(st->syn_wait_sk);
2165 st->state = TCP_SEQ_STATE_LISTENING; 2150 st->state = TCP_SEQ_STATE_LISTENING;
2166 read_unlock_bh(&tp->syn_wait_lock); 2151 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2167 } else { 2152 } else {
2168 tp = tcp_sk(sk); 2153 tp = tcp_sk(sk);
2169 read_lock_bh(&tp->syn_wait_lock); 2154 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2170 if (tp->listen_opt && tp->listen_opt->qlen) 2155 if (reqsk_queue_len(&tp->accept_queue))
2171 goto start_req; 2156 goto start_req;
2172 read_unlock_bh(&tp->syn_wait_lock); 2157 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2173 sk = sk_next(sk); 2158 sk = sk_next(sk);
2174 } 2159 }
2175get_sk: 2160get_sk:
@@ -2179,8 +2164,8 @@ get_sk:
2179 goto out; 2164 goto out;
2180 } 2165 }
2181 tp = tcp_sk(sk); 2166 tp = tcp_sk(sk);
2182 read_lock_bh(&tp->syn_wait_lock); 2167 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2183 if (tp->listen_opt && tp->listen_opt->qlen) { 2168 if (reqsk_queue_len(&tp->accept_queue)) {
2184start_req: 2169start_req:
2185 st->uid = sock_i_uid(sk); 2170 st->uid = sock_i_uid(sk);
2186 st->syn_wait_sk = sk; 2171 st->syn_wait_sk = sk;
@@ -2188,7 +2173,7 @@ start_req:
2188 st->sbucket = 0; 2173 st->sbucket = 0;
2189 goto get_req; 2174 goto get_req;
2190 } 2175 }
2191 read_unlock_bh(&tp->syn_wait_lock); 2176 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2192 } 2177 }
2193 if (++st->bucket < TCP_LHTABLE_SIZE) { 2178 if (++st->bucket < TCP_LHTABLE_SIZE) {
2194 sk = sk_head(&tcp_listening_hash[st->bucket]); 2179 sk = sk_head(&tcp_listening_hash[st->bucket]);
@@ -2375,7 +2360,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2375 case TCP_SEQ_STATE_OPENREQ: 2360 case TCP_SEQ_STATE_OPENREQ:
2376 if (v) { 2361 if (v) {
2377 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); 2362 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
2378 read_unlock_bh(&tp->syn_wait_lock); 2363 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2379 } 2364 }
2380 case TCP_SEQ_STATE_LISTENING: 2365 case TCP_SEQ_STATE_LISTENING:
2381 if (v != SEQ_START_TOKEN) 2366 if (v != SEQ_START_TOKEN)
@@ -2451,18 +2436,19 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2451 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 2436 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2452} 2437}
2453 2438
2454static void get_openreq4(struct sock *sk, struct open_request *req, 2439static void get_openreq4(struct sock *sk, struct request_sock *req,
2455 char *tmpbuf, int i, int uid) 2440 char *tmpbuf, int i, int uid)
2456{ 2441{
2442 const struct inet_request_sock *ireq = inet_rsk(req);
2457 int ttd = req->expires - jiffies; 2443 int ttd = req->expires - jiffies;
2458 2444
2459 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 2445 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2460 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p", 2446 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2461 i, 2447 i,
2462 req->af.v4_req.loc_addr, 2448 ireq->loc_addr,
2463 ntohs(inet_sk(sk)->sport), 2449 ntohs(inet_sk(sk)->sport),
2464 req->af.v4_req.rmt_addr, 2450 ireq->rmt_addr,
2465 ntohs(req->rmt_port), 2451 ntohs(ireq->rmt_port),
2466 TCP_SYN_RECV, 2452 TCP_SYN_RECV,
2467 0, 0, /* could print option size, but that is af dependent. */ 2453 0, 0, /* could print option size, but that is af dependent. */
2468 1, /* timers active (only the expire timer) */ 2454 1, /* timers active (only the expire timer) */
@@ -2618,6 +2604,7 @@ struct proto tcp_prot = {
2618 .sysctl_rmem = sysctl_tcp_rmem, 2604 .sysctl_rmem = sysctl_tcp_rmem,
2619 .max_header = MAX_TCP_HEADER, 2605 .max_header = MAX_TCP_HEADER,
2620 .obj_size = sizeof(struct tcp_sock), 2606 .obj_size = sizeof(struct tcp_sock),
2607 .rsk_prot = &tcp_request_sock_ops,
2621}; 2608};
2622 2609
2623 2610
@@ -2660,7 +2647,6 @@ EXPORT_SYMBOL(tcp_proc_register);
2660EXPORT_SYMBOL(tcp_proc_unregister); 2647EXPORT_SYMBOL(tcp_proc_unregister);
2661#endif 2648#endif
2662EXPORT_SYMBOL(sysctl_local_port_range); 2649EXPORT_SYMBOL(sysctl_local_port_range);
2663EXPORT_SYMBOL(sysctl_max_syn_backlog);
2664EXPORT_SYMBOL(sysctl_tcp_low_latency); 2650EXPORT_SYMBOL(sysctl_tcp_low_latency);
2665EXPORT_SYMBOL(sysctl_tcp_tw_reuse); 2651EXPORT_SYMBOL(sysctl_tcp_tw_reuse);
2666 2652
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index eea1a17a9ac2..b3943e7562f3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -684,7 +684,7 @@ out:
684 * Actually, we could lots of memory writes here. tp of listening 684 * Actually, we could lots of memory writes here. tp of listening
685 * socket contains all necessary default parameters. 685 * socket contains all necessary default parameters.
686 */ 686 */
687struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb) 687struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
688{ 688{
689 /* allocate the newsk from the same slab of the master sock, 689 /* allocate the newsk from the same slab of the master sock,
690 * if not, at sk_free time we'll try to free it from the wrong 690 * if not, at sk_free time we'll try to free it from the wrong
@@ -692,6 +692,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
692 struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0); 692 struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0);
693 693
694 if(newsk != NULL) { 694 if(newsk != NULL) {
695 struct inet_request_sock *ireq = inet_rsk(req);
696 struct tcp_request_sock *treq = tcp_rsk(req);
695 struct tcp_sock *newtp; 697 struct tcp_sock *newtp;
696 struct sk_filter *filter; 698 struct sk_filter *filter;
697 699
@@ -703,7 +705,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
703 tcp_sk(newsk)->bind_hash = NULL; 705 tcp_sk(newsk)->bind_hash = NULL;
704 706
705 /* Clone the TCP header template */ 707 /* Clone the TCP header template */
706 inet_sk(newsk)->dport = req->rmt_port; 708 inet_sk(newsk)->dport = ireq->rmt_port;
707 709
708 sock_lock_init(newsk); 710 sock_lock_init(newsk);
709 bh_lock_sock(newsk); 711 bh_lock_sock(newsk);
@@ -739,14 +741,14 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
739 /* Now setup tcp_sock */ 741 /* Now setup tcp_sock */
740 newtp = tcp_sk(newsk); 742 newtp = tcp_sk(newsk);
741 newtp->pred_flags = 0; 743 newtp->pred_flags = 0;
742 newtp->rcv_nxt = req->rcv_isn + 1; 744 newtp->rcv_nxt = treq->rcv_isn + 1;
743 newtp->snd_nxt = req->snt_isn + 1; 745 newtp->snd_nxt = treq->snt_isn + 1;
744 newtp->snd_una = req->snt_isn + 1; 746 newtp->snd_una = treq->snt_isn + 1;
745 newtp->snd_sml = req->snt_isn + 1; 747 newtp->snd_sml = treq->snt_isn + 1;
746 748
747 tcp_prequeue_init(newtp); 749 tcp_prequeue_init(newtp);
748 750
749 tcp_init_wl(newtp, req->snt_isn, req->rcv_isn); 751 tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
750 752
751 newtp->retransmits = 0; 753 newtp->retransmits = 0;
752 newtp->backoff = 0; 754 newtp->backoff = 0;
@@ -775,10 +777,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
775 tcp_set_ca_state(newtp, TCP_CA_Open); 777 tcp_set_ca_state(newtp, TCP_CA_Open);
776 tcp_init_xmit_timers(newsk); 778 tcp_init_xmit_timers(newsk);
777 skb_queue_head_init(&newtp->out_of_order_queue); 779 skb_queue_head_init(&newtp->out_of_order_queue);
778 newtp->rcv_wup = req->rcv_isn + 1; 780 newtp->rcv_wup = treq->rcv_isn + 1;
779 newtp->write_seq = req->snt_isn + 1; 781 newtp->write_seq = treq->snt_isn + 1;
780 newtp->pushed_seq = newtp->write_seq; 782 newtp->pushed_seq = newtp->write_seq;
781 newtp->copied_seq = req->rcv_isn + 1; 783 newtp->copied_seq = treq->rcv_isn + 1;
782 784
783 newtp->rx_opt.saw_tstamp = 0; 785 newtp->rx_opt.saw_tstamp = 0;
784 786
@@ -788,10 +790,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
788 newtp->probes_out = 0; 790 newtp->probes_out = 0;
789 newtp->rx_opt.num_sacks = 0; 791 newtp->rx_opt.num_sacks = 0;
790 newtp->urg_data = 0; 792 newtp->urg_data = 0;
791 newtp->listen_opt = NULL; 793 /* Deinitialize accept_queue to trap illegal accesses. */
792 newtp->accept_queue = newtp->accept_queue_tail = NULL; 794 memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue));
793 /* Deinitialize syn_wait_lock to trap illegal accesses. */
794 memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
795 795
796 /* Back to base struct sock members. */ 796 /* Back to base struct sock members. */
797 newsk->sk_err = 0; 797 newsk->sk_err = 0;
@@ -808,18 +808,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
808 newsk->sk_socket = NULL; 808 newsk->sk_socket = NULL;
809 newsk->sk_sleep = NULL; 809 newsk->sk_sleep = NULL;
810 810
811 newtp->rx_opt.tstamp_ok = req->tstamp_ok; 811 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
812 if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) { 812 if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
813 if (sysctl_tcp_fack) 813 if (sysctl_tcp_fack)
814 newtp->rx_opt.sack_ok |= 2; 814 newtp->rx_opt.sack_ok |= 2;
815 } 815 }
816 newtp->window_clamp = req->window_clamp; 816 newtp->window_clamp = req->window_clamp;
817 newtp->rcv_ssthresh = req->rcv_wnd; 817 newtp->rcv_ssthresh = req->rcv_wnd;
818 newtp->rcv_wnd = req->rcv_wnd; 818 newtp->rcv_wnd = req->rcv_wnd;
819 newtp->rx_opt.wscale_ok = req->wscale_ok; 819 newtp->rx_opt.wscale_ok = ireq->wscale_ok;
820 if (newtp->rx_opt.wscale_ok) { 820 if (newtp->rx_opt.wscale_ok) {
821 newtp->rx_opt.snd_wscale = req->snd_wscale; 821 newtp->rx_opt.snd_wscale = ireq->snd_wscale;
822 newtp->rx_opt.rcv_wscale = req->rcv_wscale; 822 newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
823 } else { 823 } else {
824 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; 824 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
825 newtp->window_clamp = min(newtp->window_clamp, 65535U); 825 newtp->window_clamp = min(newtp->window_clamp, 65535U);
@@ -851,12 +851,12 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
851 851
852/* 852/*
853 * Process an incoming packet for SYN_RECV sockets represented 853 * Process an incoming packet for SYN_RECV sockets represented
854 * as an open_request. 854 * as a request_sock.
855 */ 855 */
856 856
857struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, 857struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
858 struct open_request *req, 858 struct request_sock *req,
859 struct open_request **prev) 859 struct request_sock **prev)
860{ 860{
861 struct tcphdr *th = skb->h.th; 861 struct tcphdr *th = skb->h.th;
862 struct tcp_sock *tp = tcp_sk(sk); 862 struct tcp_sock *tp = tcp_sk(sk);
@@ -881,7 +881,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
881 } 881 }
882 882
883 /* Check for pure retransmitted SYN. */ 883 /* Check for pure retransmitted SYN. */
884 if (TCP_SKB_CB(skb)->seq == req->rcv_isn && 884 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
885 flg == TCP_FLAG_SYN && 885 flg == TCP_FLAG_SYN &&
886 !paws_reject) { 886 !paws_reject) {
887 /* 887 /*
@@ -901,7 +901,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
901 * Enforce "SYN-ACK" according to figure 8, figure 6 901 * Enforce "SYN-ACK" according to figure 8, figure 6
902 * of RFC793, fixed by RFC1122. 902 * of RFC793, fixed by RFC1122.
903 */ 903 */
904 req->class->rtx_syn_ack(sk, req, NULL); 904 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
905 return NULL; 905 return NULL;
906 } 906 }
907 907
@@ -959,7 +959,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
959 * Invalid ACK: reset will be sent by listening socket 959 * Invalid ACK: reset will be sent by listening socket
960 */ 960 */
961 if ((flg & TCP_FLAG_ACK) && 961 if ((flg & TCP_FLAG_ACK) &&
962 (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)) 962 (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
963 return sk; 963 return sk;
964 964
965 /* Also, it would be not so bad idea to check rcv_tsecr, which 965 /* Also, it would be not so bad idea to check rcv_tsecr, which
@@ -970,10 +970,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
970 /* RFC793: "first check sequence number". */ 970 /* RFC793: "first check sequence number". */
971 971
972 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, 972 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
973 req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) { 973 tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
974 /* Out of window: send ACK and drop. */ 974 /* Out of window: send ACK and drop. */
975 if (!(flg & TCP_FLAG_RST)) 975 if (!(flg & TCP_FLAG_RST))
976 req->class->send_ack(skb, req); 976 req->rsk_ops->send_ack(skb, req);
977 if (paws_reject) 977 if (paws_reject)
978 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 978 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
979 return NULL; 979 return NULL;
@@ -981,12 +981,12 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
981 981
982 /* In sequence, PAWS is OK. */ 982 /* In sequence, PAWS is OK. */
983 983
984 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1)) 984 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
985 req->ts_recent = tmp_opt.rcv_tsval; 985 req->ts_recent = tmp_opt.rcv_tsval;
986 986
987 if (TCP_SKB_CB(skb)->seq == req->rcv_isn) { 987 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
988 /* Truncate SYN, it is out of window starting 988 /* Truncate SYN, it is out of window starting
989 at req->rcv_isn+1. */ 989 at tcp_rsk(req)->rcv_isn + 1. */
990 flg &= ~TCP_FLAG_SYN; 990 flg &= ~TCP_FLAG_SYN;
991 } 991 }
992 992
@@ -1003,8 +1003,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
1003 return NULL; 1003 return NULL;
1004 1004
1005 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ 1005 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
1006 if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) { 1006 if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
1007 req->acked = 1; 1007 inet_rsk(req)->acked = 1;
1008 return NULL; 1008 return NULL;
1009 } 1009 }
1010 1010
@@ -1026,14 +1026,14 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
1026 1026
1027 listen_overflow: 1027 listen_overflow:
1028 if (!sysctl_tcp_abort_on_overflow) { 1028 if (!sysctl_tcp_abort_on_overflow) {
1029 req->acked = 1; 1029 inet_rsk(req)->acked = 1;
1030 return NULL; 1030 return NULL;
1031 } 1031 }
1032 1032
1033 embryonic_reset: 1033 embryonic_reset:
1034 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS); 1034 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
1035 if (!(flg & TCP_FLAG_RST)) 1035 if (!(flg & TCP_FLAG_RST))
1036 req->class->send_reset(skb); 1036 req->rsk_ops->send_reset(skb);
1037 1037
1038 tcp_synq_drop(sk, req, prev); 1038 tcp_synq_drop(sk, req, prev);
1039 return NULL; 1039 return NULL;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fa24e7ae1f40..f17c6577e337 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1356,8 +1356,9 @@ int tcp_send_synack(struct sock *sk)
1356 * Prepare a SYN-ACK. 1356 * Prepare a SYN-ACK.
1357 */ 1357 */
1358struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, 1358struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1359 struct open_request *req) 1359 struct request_sock *req)
1360{ 1360{
1361 struct inet_request_sock *ireq = inet_rsk(req);
1361 struct tcp_sock *tp = tcp_sk(sk); 1362 struct tcp_sock *tp = tcp_sk(sk);
1362 struct tcphdr *th; 1363 struct tcphdr *th;
1363 int tcp_header_size; 1364 int tcp_header_size;
@@ -1373,47 +1374,47 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1373 skb->dst = dst_clone(dst); 1374 skb->dst = dst_clone(dst);
1374 1375
1375 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + 1376 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
1376 (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + 1377 (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
1377 (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + 1378 (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
1378 /* SACK_PERM is in the place of NOP NOP of TS */ 1379 /* SACK_PERM is in the place of NOP NOP of TS */
1379 ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); 1380 ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
1380 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); 1381 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
1381 1382
1382 memset(th, 0, sizeof(struct tcphdr)); 1383 memset(th, 0, sizeof(struct tcphdr));
1383 th->syn = 1; 1384 th->syn = 1;
1384 th->ack = 1; 1385 th->ack = 1;
1385 if (dst->dev->features&NETIF_F_TSO) 1386 if (dst->dev->features&NETIF_F_TSO)
1386 req->ecn_ok = 0; 1387 ireq->ecn_ok = 0;
1387 TCP_ECN_make_synack(req, th); 1388 TCP_ECN_make_synack(req, th);
1388 th->source = inet_sk(sk)->sport; 1389 th->source = inet_sk(sk)->sport;
1389 th->dest = req->rmt_port; 1390 th->dest = ireq->rmt_port;
1390 TCP_SKB_CB(skb)->seq = req->snt_isn; 1391 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
1391 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 1392 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
1392 TCP_SKB_CB(skb)->sacked = 0; 1393 TCP_SKB_CB(skb)->sacked = 0;
1393 skb_shinfo(skb)->tso_segs = 1; 1394 skb_shinfo(skb)->tso_segs = 1;
1394 skb_shinfo(skb)->tso_size = 0; 1395 skb_shinfo(skb)->tso_size = 0;
1395 th->seq = htonl(TCP_SKB_CB(skb)->seq); 1396 th->seq = htonl(TCP_SKB_CB(skb)->seq);
1396 th->ack_seq = htonl(req->rcv_isn + 1); 1397 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
1397 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ 1398 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
1398 __u8 rcv_wscale; 1399 __u8 rcv_wscale;
1399 /* Set this up on the first call only */ 1400 /* Set this up on the first call only */
1400 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 1401 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
1401 /* tcp_full_space because it is guaranteed to be the first packet */ 1402 /* tcp_full_space because it is guaranteed to be the first packet */
1402 tcp_select_initial_window(tcp_full_space(sk), 1403 tcp_select_initial_window(tcp_full_space(sk),
1403 dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 1404 dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
1404 &req->rcv_wnd, 1405 &req->rcv_wnd,
1405 &req->window_clamp, 1406 &req->window_clamp,
1406 req->wscale_ok, 1407 ireq->wscale_ok,
1407 &rcv_wscale); 1408 &rcv_wscale);
1408 req->rcv_wscale = rcv_wscale; 1409 ireq->rcv_wscale = rcv_wscale;
1409 } 1410 }
1410 1411
1411 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 1412 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
1412 th->window = htons(req->rcv_wnd); 1413 th->window = htons(req->rcv_wnd);
1413 1414
1414 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1415 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1415 tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok, 1416 tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
1416 req->sack_ok, req->wscale_ok, req->rcv_wscale, 1417 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
1417 TCP_SKB_CB(skb)->when, 1418 TCP_SKB_CB(skb)->when,
1418 req->ts_recent); 1419 req->ts_recent);
1419 1420
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 799ebe061e2c..b127b4498565 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -464,11 +464,11 @@ out_unlock:
464static void tcp_synack_timer(struct sock *sk) 464static void tcp_synack_timer(struct sock *sk)
465{ 465{
466 struct tcp_sock *tp = tcp_sk(sk); 466 struct tcp_sock *tp = tcp_sk(sk);
467 struct tcp_listen_opt *lopt = tp->listen_opt; 467 struct listen_sock *lopt = tp->accept_queue.listen_opt;
468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; 468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
469 int thresh = max_retries; 469 int thresh = max_retries;
470 unsigned long now = jiffies; 470 unsigned long now = jiffies;
471 struct open_request **reqp, *req; 471 struct request_sock **reqp, *req;
472 int i, budget; 472 int i, budget;
473 473
474 if (lopt == NULL || lopt->qlen == 0) 474 if (lopt == NULL || lopt->qlen == 0)
@@ -513,8 +513,8 @@ static void tcp_synack_timer(struct sock *sk)
513 while ((req = *reqp) != NULL) { 513 while ((req = *reqp) != NULL) {
514 if (time_after_eq(now, req->expires)) { 514 if (time_after_eq(now, req->expires)) {
515 if ((req->retrans < thresh || 515 if ((req->retrans < thresh ||
516 (req->acked && req->retrans < max_retries)) 516 (inet_rsk(req)->acked && req->retrans < max_retries))
517 && !req->class->rtx_syn_ack(sk, req, NULL)) { 517 && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) {
518 unsigned long timeo; 518 unsigned long timeo;
519 519
520 if (req->retrans++ == 0) 520 if (req->retrans++ == 0)
@@ -527,13 +527,9 @@ static void tcp_synack_timer(struct sock *sk)
527 } 527 }
528 528
529 /* Drop this request */ 529 /* Drop this request */
530 write_lock(&tp->syn_wait_lock); 530 tcp_synq_unlink(tp, req, reqp);
531 *reqp = req->dl_next; 531 reqsk_queue_removed(&tp->accept_queue, req);
532 write_unlock(&tp->syn_wait_lock); 532 reqsk_free(req);
533 lopt->qlen--;
534 if (req->retrans == 0)
535 lopt->qlen_young--;
536 tcp_openreq_free(req);
537 continue; 533 continue;
538 } 534 }
539 reqp = &req->dl_next; 535 reqp = &req->dl_next;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4a6952e3fee9..7c24e64b443f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -738,7 +738,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
738 unsigned long amount; 738 unsigned long amount;
739 739
740 amount = 0; 740 amount = 0;
741 spin_lock_irq(&sk->sk_receive_queue.lock); 741 spin_lock_bh(&sk->sk_receive_queue.lock);
742 skb = skb_peek(&sk->sk_receive_queue); 742 skb = skb_peek(&sk->sk_receive_queue);
743 if (skb != NULL) { 743 if (skb != NULL) {
744 /* 744 /*
@@ -748,7 +748,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
748 */ 748 */
749 amount = skb->len - sizeof(struct udphdr); 749 amount = skb->len - sizeof(struct udphdr);
750 } 750 }
751 spin_unlock_irq(&sk->sk_receive_queue.lock); 751 spin_unlock_bh(&sk->sk_receive_queue.lock);
752 return put_user(amount, (int __user *)arg); 752 return put_user(amount, (int __user *)arg);
753 } 753 }
754 754
@@ -848,12 +848,12 @@ csum_copy_err:
848 /* Clear queue. */ 848 /* Clear queue. */
849 if (flags&MSG_PEEK) { 849 if (flags&MSG_PEEK) {
850 int clear = 0; 850 int clear = 0;
851 spin_lock_irq(&sk->sk_receive_queue.lock); 851 spin_lock_bh(&sk->sk_receive_queue.lock);
852 if (skb == skb_peek(&sk->sk_receive_queue)) { 852 if (skb == skb_peek(&sk->sk_receive_queue)) {
853 __skb_unlink(skb, &sk->sk_receive_queue); 853 __skb_unlink(skb, &sk->sk_receive_queue);
854 clear = 1; 854 clear = 1;
855 } 855 }
856 spin_unlock_irq(&sk->sk_receive_queue.lock); 856 spin_unlock_bh(&sk->sk_receive_queue.lock);
857 if (clear) 857 if (clear)
858 kfree_skb(skb); 858 kfree_skb(skb);
859 } 859 }
@@ -1334,7 +1334,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1334 struct sk_buff_head *rcvq = &sk->sk_receive_queue; 1334 struct sk_buff_head *rcvq = &sk->sk_receive_queue;
1335 struct sk_buff *skb; 1335 struct sk_buff *skb;
1336 1336
1337 spin_lock_irq(&rcvq->lock); 1337 spin_lock_bh(&rcvq->lock);
1338 while ((skb = skb_peek(rcvq)) != NULL) { 1338 while ((skb = skb_peek(rcvq)) != NULL) {
1339 if (udp_checksum_complete(skb)) { 1339 if (udp_checksum_complete(skb)) {
1340 UDP_INC_STATS_BH(UDP_MIB_INERRORS); 1340 UDP_INC_STATS_BH(UDP_MIB_INERRORS);
@@ -1345,7 +1345,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1345 break; 1345 break;
1346 } 1346 }
1347 } 1347 }
1348 spin_unlock_irq(&rcvq->lock); 1348 spin_unlock_bh(&rcvq->lock);
1349 1349
1350 /* nothing to see, move along */ 1350 /* nothing to see, move along */
1351 if (skb == NULL) 1351 if (skb == NULL)
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index af2392ae5769..66620a95942a 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -33,6 +33,7 @@ static void xfrm4_encap(struct sk_buff *skb)
33 struct dst_entry *dst = skb->dst; 33 struct dst_entry *dst = skb->dst;
34 struct xfrm_state *x = dst->xfrm; 34 struct xfrm_state *x = dst->xfrm;
35 struct iphdr *iph, *top_iph; 35 struct iphdr *iph, *top_iph;
36 int flags;
36 37
37 iph = skb->nh.iph; 38 iph = skb->nh.iph;
38 skb->h.ipiph = iph; 39 skb->h.ipiph = iph;
@@ -51,10 +52,13 @@ static void xfrm4_encap(struct sk_buff *skb)
51 52
52 /* DS disclosed */ 53 /* DS disclosed */
53 top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); 54 top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
54 if (x->props.flags & XFRM_STATE_NOECN) 55
56 flags = x->props.flags;
57 if (flags & XFRM_STATE_NOECN)
55 IP_ECN_clear(top_iph); 58 IP_ECN_clear(top_iph);
56 59
57 top_iph->frag_off = iph->frag_off & htons(IP_DF); 60 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
61 0 : (iph->frag_off & htons(IP_DF));
58 if (!top_iph->frag_off) 62 if (!top_iph->frag_off)
59 __ip_select_ident(top_iph, dst, 0); 63 __ip_select_ident(top_iph, dst, 0);
60 64
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 223a2e83853f..050611d7a967 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -7,12 +7,20 @@
7 * 7 *
8 */ 8 */
9 9
10#include <net/ip.h>
10#include <net/xfrm.h> 11#include <net/xfrm.h>
11#include <linux/pfkeyv2.h> 12#include <linux/pfkeyv2.h>
12#include <linux/ipsec.h> 13#include <linux/ipsec.h>
13 14
14static struct xfrm_state_afinfo xfrm4_state_afinfo; 15static struct xfrm_state_afinfo xfrm4_state_afinfo;
15 16
17static int xfrm4_init_flags(struct xfrm_state *x)
18{
19 if (ipv4_config.no_pmtu_disc)
20 x->props.flags |= XFRM_STATE_NOPMTUDISC;
21 return 0;
22}
23
16static void 24static void
17__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, 25__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
18 struct xfrm_tmpl *tmpl, 26 struct xfrm_tmpl *tmpl,
@@ -109,6 +117,7 @@ __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto,
109static struct xfrm_state_afinfo xfrm4_state_afinfo = { 117static struct xfrm_state_afinfo xfrm4_state_afinfo = {
110 .family = AF_INET, 118 .family = AF_INET,
111 .lock = RW_LOCK_UNLOCKED, 119 .lock = RW_LOCK_UNLOCKED,
120 .init_flags = xfrm4_init_flags,
112 .init_tempsel = __xfrm4_init_tempsel, 121 .init_tempsel = __xfrm4_init_tempsel,
113 .state_lookup = __xfrm4_state_lookup, 122 .state_lookup = __xfrm4_state_lookup,
114 .find_acq = __xfrm4_find_acq, 123 .find_acq = __xfrm4_find_acq,
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 413191f585f6..e1fe360ed27a 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -84,7 +84,7 @@ static void ipip_err(struct sk_buff *skb, u32 info)
84 handler->err_handler(skb, &arg); 84 handler->err_handler(skb, &arg);
85} 85}
86 86
87static int ipip_init_state(struct xfrm_state *x, void *args) 87static int ipip_init_state(struct xfrm_state *x)
88{ 88{
89 if (!x->props.mode) 89 if (!x->props.mode)
90 return -EINVAL; 90 return -EINVAL;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7744a2592693..47a30c3188ea 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -131,7 +131,7 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
131 131
132static int addrconf_ifdown(struct net_device *dev, int how); 132static int addrconf_ifdown(struct net_device *dev, int how);
133 133
134static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags); 134static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
135static void addrconf_dad_timer(unsigned long data); 135static void addrconf_dad_timer(unsigned long data);
136static void addrconf_dad_completed(struct inet6_ifaddr *ifp); 136static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
137static void addrconf_rs_timer(unsigned long data); 137static void addrconf_rs_timer(unsigned long data);
@@ -372,6 +372,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
372 ndev->regen_timer.data = (unsigned long) ndev; 372 ndev->regen_timer.data = (unsigned long) ndev;
373 if ((dev->flags&IFF_LOOPBACK) || 373 if ((dev->flags&IFF_LOOPBACK) ||
374 dev->type == ARPHRD_TUNNEL || 374 dev->type == ARPHRD_TUNNEL ||
375 dev->type == ARPHRD_NONE ||
375 dev->type == ARPHRD_SIT) { 376 dev->type == ARPHRD_SIT) {
376 printk(KERN_INFO 377 printk(KERN_INFO
377 "Disabled Privacy Extensions on device %p(%s)\n", 378 "Disabled Privacy Extensions on device %p(%s)\n",
@@ -491,7 +492,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
491 492
492static struct inet6_ifaddr * 493static struct inet6_ifaddr *
493ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, 494ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
494 int scope, unsigned flags) 495 int scope, u32 flags)
495{ 496{
496 struct inet6_ifaddr *ifa = NULL; 497 struct inet6_ifaddr *ifa = NULL;
497 struct rt6_info *rt; 498 struct rt6_info *rt;
@@ -1319,7 +1320,7 @@ static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad
1319 1320
1320static void 1321static void
1321addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, 1322addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1322 unsigned long expires, unsigned flags) 1323 unsigned long expires, u32 flags)
1323{ 1324{
1324 struct in6_rtmsg rtmsg; 1325 struct in6_rtmsg rtmsg;
1325 1326
@@ -2228,7 +2229,7 @@ out:
2228/* 2229/*
2229 * Duplicate Address Detection 2230 * Duplicate Address Detection
2230 */ 2231 */
2231static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags) 2232static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2232{ 2233{
2233 struct inet6_dev *idev = ifp->idev; 2234 struct inet6_dev *idev = ifp->idev;
2234 struct net_device *dev = idev->dev; 2235 struct net_device *dev = idev->dev;
@@ -2621,15 +2622,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2621} 2622}
2622 2623
2623static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, 2624static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
2624 u32 pid, u32 seq, int event) 2625 u32 pid, u32 seq, int event, unsigned int flags)
2625{ 2626{
2626 struct ifaddrmsg *ifm; 2627 struct ifaddrmsg *ifm;
2627 struct nlmsghdr *nlh; 2628 struct nlmsghdr *nlh;
2628 struct ifa_cacheinfo ci; 2629 struct ifa_cacheinfo ci;
2629 unsigned char *b = skb->tail; 2630 unsigned char *b = skb->tail;
2630 2631
2631 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2632 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2632 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2633 ifm = NLMSG_DATA(nlh); 2633 ifm = NLMSG_DATA(nlh);
2634 ifm->ifa_family = AF_INET6; 2634 ifm->ifa_family = AF_INET6;
2635 ifm->ifa_prefixlen = ifa->prefix_len; 2635 ifm->ifa_prefixlen = ifa->prefix_len;
@@ -2671,15 +2671,14 @@ rtattr_failure:
2671} 2671}
2672 2672
2673static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, 2673static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
2674 u32 pid, u32 seq, int event) 2674 u32 pid, u32 seq, int event, u16 flags)
2675{ 2675{
2676 struct ifaddrmsg *ifm; 2676 struct ifaddrmsg *ifm;
2677 struct nlmsghdr *nlh; 2677 struct nlmsghdr *nlh;
2678 struct ifa_cacheinfo ci; 2678 struct ifa_cacheinfo ci;
2679 unsigned char *b = skb->tail; 2679 unsigned char *b = skb->tail;
2680 2680
2681 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2681 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2682 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2683 ifm = NLMSG_DATA(nlh); 2682 ifm = NLMSG_DATA(nlh);
2684 ifm->ifa_family = AF_INET6; 2683 ifm->ifa_family = AF_INET6;
2685 ifm->ifa_prefixlen = 128; 2684 ifm->ifa_prefixlen = 128;
@@ -2708,15 +2707,14 @@ rtattr_failure:
2708} 2707}
2709 2708
2710static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, 2709static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
2711 u32 pid, u32 seq, int event) 2710 u32 pid, u32 seq, int event, unsigned int flags)
2712{ 2711{
2713 struct ifaddrmsg *ifm; 2712 struct ifaddrmsg *ifm;
2714 struct nlmsghdr *nlh; 2713 struct nlmsghdr *nlh;
2715 struct ifa_cacheinfo ci; 2714 struct ifa_cacheinfo ci;
2716 unsigned char *b = skb->tail; 2715 unsigned char *b = skb->tail;
2717 2716
2718 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2717 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2719 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2720 ifm = NLMSG_DATA(nlh); 2718 ifm = NLMSG_DATA(nlh);
2721 ifm->ifa_family = AF_INET6; 2719 ifm->ifa_family = AF_INET6;
2722 ifm->ifa_prefixlen = 128; 2720 ifm->ifa_prefixlen = 128;
@@ -2785,7 +2783,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2785 continue; 2783 continue;
2786 if ((err = inet6_fill_ifaddr(skb, ifa, 2784 if ((err = inet6_fill_ifaddr(skb, ifa,
2787 NETLINK_CB(cb->skb).pid, 2785 NETLINK_CB(cb->skb).pid,
2788 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 2786 cb->nlh->nlmsg_seq, RTM_NEWADDR,
2787 NLM_F_MULTI)) <= 0)
2789 goto done; 2788 goto done;
2790 } 2789 }
2791 /* temp addr */ 2790 /* temp addr */
@@ -2796,7 +2795,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2796 continue; 2795 continue;
2797 if ((err = inet6_fill_ifaddr(skb, ifa, 2796 if ((err = inet6_fill_ifaddr(skb, ifa,
2798 NETLINK_CB(cb->skb).pid, 2797 NETLINK_CB(cb->skb).pid,
2799 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 2798 cb->nlh->nlmsg_seq, RTM_NEWADDR,
2799 NLM_F_MULTI)) <= 0)
2800 goto done; 2800 goto done;
2801 } 2801 }
2802#endif 2802#endif
@@ -2809,7 +2809,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2809 continue; 2809 continue;
2810 if ((err = inet6_fill_ifmcaddr(skb, ifmca, 2810 if ((err = inet6_fill_ifmcaddr(skb, ifmca,
2811 NETLINK_CB(cb->skb).pid, 2811 NETLINK_CB(cb->skb).pid,
2812 cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0) 2812 cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
2813 NLM_F_MULTI)) <= 0)
2813 goto done; 2814 goto done;
2814 } 2815 }
2815 break; 2816 break;
@@ -2821,7 +2822,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2821 continue; 2822 continue;
2822 if ((err = inet6_fill_ifacaddr(skb, ifaca, 2823 if ((err = inet6_fill_ifacaddr(skb, ifaca,
2823 NETLINK_CB(cb->skb).pid, 2824 NETLINK_CB(cb->skb).pid,
2824 cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0) 2825 cb->nlh->nlmsg_seq, RTM_GETANYCAST,
2826 NLM_F_MULTI)) <= 0)
2825 goto done; 2827 goto done;
2826 } 2828 }
2827 break; 2829 break;
@@ -2871,7 +2873,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
2871 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); 2873 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
2872 return; 2874 return;
2873 } 2875 }
2874 if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 2876 if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
2875 kfree_skb(skb); 2877 kfree_skb(skb);
2876 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); 2878 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
2877 return; 2879 return;
@@ -2906,7 +2908,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
2906} 2908}
2907 2909
2908static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 2910static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2909 u32 pid, u32 seq, int event) 2911 u32 pid, u32 seq, int event, unsigned int flags)
2910{ 2912{
2911 struct net_device *dev = idev->dev; 2913 struct net_device *dev = idev->dev;
2912 __s32 *array = NULL; 2914 __s32 *array = NULL;
@@ -2917,8 +2919,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2917 __u32 mtu = dev->mtu; 2919 __u32 mtu = dev->mtu;
2918 struct ifla_cacheinfo ci; 2920 struct ifla_cacheinfo ci;
2919 2921
2920 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 2922 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
2921 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2922 r = NLMSG_DATA(nlh); 2923 r = NLMSG_DATA(nlh);
2923 r->ifi_family = AF_INET6; 2924 r->ifi_family = AF_INET6;
2924 r->ifi_type = dev->type; 2925 r->ifi_type = dev->type;
@@ -2985,7 +2986,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
2985 if ((idev = in6_dev_get(dev)) == NULL) 2986 if ((idev = in6_dev_get(dev)) == NULL)
2986 continue; 2987 continue;
2987 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, 2988 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
2988 cb->nlh->nlmsg_seq, RTM_NEWLINK); 2989 cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
2989 in6_dev_put(idev); 2990 in6_dev_put(idev);
2990 if (err <= 0) 2991 if (err <= 0)
2991 break; 2992 break;
@@ -3007,7 +3008,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3007 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS); 3008 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
3008 return; 3009 return;
3009 } 3010 }
3010 if (inet6_fill_ifinfo(skb, idev, 0, 0, event) < 0) { 3011 if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
3011 kfree_skb(skb); 3012 kfree_skb(skb);
3012 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL); 3013 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
3013 return; 3014 return;
@@ -3017,18 +3018,15 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3017} 3018}
3018 3019
3019static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, 3020static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
3020 struct prefix_info *pinfo, u32 pid, u32 seq, int event) 3021 struct prefix_info *pinfo, u32 pid, u32 seq,
3022 int event, unsigned int flags)
3021{ 3023{
3022 struct prefixmsg *pmsg; 3024 struct prefixmsg *pmsg;
3023 struct nlmsghdr *nlh; 3025 struct nlmsghdr *nlh;
3024 unsigned char *b = skb->tail; 3026 unsigned char *b = skb->tail;
3025 struct prefix_cacheinfo ci; 3027 struct prefix_cacheinfo ci;
3026 3028
3027 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*pmsg)); 3029 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags);
3028
3029 if (pid)
3030 nlh->nlmsg_flags |= NLM_F_MULTI;
3031
3032 pmsg = NLMSG_DATA(nlh); 3030 pmsg = NLMSG_DATA(nlh);
3033 pmsg->prefix_family = AF_INET6; 3031 pmsg->prefix_family = AF_INET6;
3034 pmsg->prefix_ifindex = idev->dev->ifindex; 3032 pmsg->prefix_ifindex = idev->dev->ifindex;
@@ -3067,7 +3065,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3067 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS); 3065 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
3068 return; 3066 return;
3069 } 3067 }
3070 if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event) < 0) { 3068 if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
3071 kfree_skb(skb); 3069 kfree_skb(skb);
3072 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL); 3070 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
3073 return; 3071 return;
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index e3ecf626cbf7..986fdfdccbcd 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -339,7 +339,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
339 xfrm_state_put(x); 339 xfrm_state_put(x);
340} 340}
341 341
342static int ah6_init_state(struct xfrm_state *x, void *args) 342static int ah6_init_state(struct xfrm_state *x)
343{ 343{
344 struct ah_data *ahp = NULL; 344 struct ah_data *ahp = NULL;
345 struct xfrm_algo_desc *aalg_desc; 345 struct xfrm_algo_desc *aalg_desc;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 65b9375df57d..5229365cd8b4 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -353,14 +353,14 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
353 err = copied; 353 err = copied;
354 354
355 /* Reset and regenerate socket error */ 355 /* Reset and regenerate socket error */
356 spin_lock_irq(&sk->sk_error_queue.lock); 356 spin_lock_bh(&sk->sk_error_queue.lock);
357 sk->sk_err = 0; 357 sk->sk_err = 0;
358 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { 358 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
359 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; 359 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
360 spin_unlock_irq(&sk->sk_error_queue.lock); 360 spin_unlock_bh(&sk->sk_error_queue.lock);
361 sk->sk_error_report(sk); 361 sk->sk_error_report(sk);
362 } else { 362 } else {
363 spin_unlock_irq(&sk->sk_error_queue.lock); 363 spin_unlock_bh(&sk->sk_error_queue.lock);
364 } 364 }
365 365
366out_free_skb: 366out_free_skb:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index be7095d6babe..324db62515a2 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -296,7 +296,7 @@ static void esp6_destroy(struct xfrm_state *x)
296 kfree(esp); 296 kfree(esp);
297} 297}
298 298
299static int esp6_init_state(struct xfrm_state *x, void *args) 299static int esp6_init_state(struct xfrm_state *x)
300{ 300{
301 struct esp_data *esp = NULL; 301 struct esp_data *esp = NULL;
302 302
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8e0f569b883e..ff3ec9822e36 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -277,8 +277,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
277{ 277{
278 struct inet6_dev *idev = NULL; 278 struct inet6_dev *idev = NULL;
279 struct ipv6hdr *hdr = skb->nh.ipv6h; 279 struct ipv6hdr *hdr = skb->nh.ipv6h;
280 struct sock *sk = icmpv6_socket->sk; 280 struct sock *sk;
281 struct ipv6_pinfo *np = inet6_sk(sk); 281 struct ipv6_pinfo *np;
282 struct in6_addr *saddr = NULL; 282 struct in6_addr *saddr = NULL;
283 struct dst_entry *dst; 283 struct dst_entry *dst;
284 struct icmp6hdr tmp_hdr; 284 struct icmp6hdr tmp_hdr;
@@ -358,6 +358,9 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
358 if (icmpv6_xmit_lock()) 358 if (icmpv6_xmit_lock())
359 return; 359 return;
360 360
361 sk = icmpv6_socket->sk;
362 np = inet6_sk(sk);
363
361 if (!icmpv6_xrlim_allow(sk, type, &fl)) 364 if (!icmpv6_xrlim_allow(sk, type, &fl))
362 goto out; 365 goto out;
363 366
@@ -423,9 +426,9 @@ out:
423 426
424static void icmpv6_echo_reply(struct sk_buff *skb) 427static void icmpv6_echo_reply(struct sk_buff *skb)
425{ 428{
426 struct sock *sk = icmpv6_socket->sk; 429 struct sock *sk;
427 struct inet6_dev *idev; 430 struct inet6_dev *idev;
428 struct ipv6_pinfo *np = inet6_sk(sk); 431 struct ipv6_pinfo *np;
429 struct in6_addr *saddr = NULL; 432 struct in6_addr *saddr = NULL;
430 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw; 433 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
431 struct icmp6hdr tmp_hdr; 434 struct icmp6hdr tmp_hdr;
@@ -454,6 +457,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
454 if (icmpv6_xmit_lock()) 457 if (icmpv6_xmit_lock())
455 return; 458 return;
456 459
460 sk = icmpv6_socket->sk;
461 np = inet6_sk(sk);
462
457 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 463 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
458 fl.oif = np->mcast_oif; 464 fl.oif = np->mcast_oif;
459 465
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index a93f6dc51979..0e5f7499debb 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -535,10 +535,12 @@ release:
535 if (err) 535 if (err)
536 goto done; 536 goto done;
537 537
538 /* Do not check for fault */ 538 if (!freq.flr_label) {
539 if (!freq.flr_label) 539 if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
540 copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, 540 &fl->label, sizeof(fl->label))) {
541 &fl->label, sizeof(fl->label)); 541 /* Intentionally ignore fault. */
542 }
543 }
542 544
543 sfl1->fl = fl; 545 sfl1->fl = fl;
544 sfl1->next = np->ipv6_fl_list; 546 sfl1->next = np->ipv6_fl_list;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3b1c9fa184ae..ba3b0c267f75 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -882,6 +882,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
882 t->parms.hop_limit = p->hop_limit; 882 t->parms.hop_limit = p->hop_limit;
883 t->parms.encap_limit = p->encap_limit; 883 t->parms.encap_limit = p->encap_limit;
884 t->parms.flowinfo = p->flowinfo; 884 t->parms.flowinfo = p->flowinfo;
885 t->parms.link = p->link;
885 ip6ip6_tnl_link_config(t); 886 ip6ip6_tnl_link_config(t);
886 return 0; 887 return 0;
887} 888}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 6cde5310cd76..423feb46ccc0 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -234,14 +234,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
234 t->props.mode = 1; 234 t->props.mode = 1;
235 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); 235 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
236 236
237 t->type = xfrm_get_type(IPPROTO_IPV6, t->props.family); 237 if (xfrm_init_state(t))
238 if (t->type == NULL)
239 goto error; 238 goto error;
240 239
241 if (t->type->init_state(t, NULL))
242 goto error;
243
244 t->km.state = XFRM_STATE_VALID;
245 atomic_set(&t->tunnel_users, 1); 240 atomic_set(&t->tunnel_users, 1);
246 241
247out: 242out:
@@ -420,7 +415,7 @@ static void ipcomp6_destroy(struct xfrm_state *x)
420 xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr); 415 xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
421} 416}
422 417
423static int ipcomp6_init_state(struct xfrm_state *x, void *args) 418static int ipcomp6_init_state(struct xfrm_state *x)
424{ 419{
425 int err; 420 int err;
426 struct ipcomp_data *ipcd; 421 struct ipcomp_data *ipcd;
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
index 2f4c91ddc9a3..5ade5a5d1990 100644
--- a/net/ipv6/ipv6_syms.c
+++ b/net/ipv6/ipv6_syms.c
@@ -37,5 +37,4 @@ EXPORT_SYMBOL(in6_dev_finish_destroy);
37EXPORT_SYMBOL(xfrm6_rcv); 37EXPORT_SYMBOL(xfrm6_rcv);
38#endif 38#endif
39EXPORT_SYMBOL(rt6_lookup); 39EXPORT_SYMBOL(rt6_lookup);
40EXPORT_SYMBOL(fl6_sock_lookup);
41EXPORT_SYMBOL(ipv6_push_nfrag_opts); 40EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 617645bc5ed6..e2b848ec9851 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -434,12 +434,12 @@ csum_copy_err:
434 /* Clear queue. */ 434 /* Clear queue. */
435 if (flags&MSG_PEEK) { 435 if (flags&MSG_PEEK) {
436 int clear = 0; 436 int clear = 0;
437 spin_lock_irq(&sk->sk_receive_queue.lock); 437 spin_lock_bh(&sk->sk_receive_queue.lock);
438 if (skb == skb_peek(&sk->sk_receive_queue)) { 438 if (skb == skb_peek(&sk->sk_receive_queue)) {
439 __skb_unlink(skb, &sk->sk_receive_queue); 439 __skb_unlink(skb, &sk->sk_receive_queue);
440 clear = 1; 440 clear = 1;
441 } 441 }
442 spin_unlock_irq(&sk->sk_receive_queue.lock); 442 spin_unlock_bh(&sk->sk_receive_queue.lock);
443 if (clear) 443 if (clear)
444 kfree_skb(skb); 444 kfree_skb(skb);
445 } 445 }
@@ -971,11 +971,11 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
971 struct sk_buff *skb; 971 struct sk_buff *skb;
972 int amount = 0; 972 int amount = 0;
973 973
974 spin_lock_irq(&sk->sk_receive_queue.lock); 974 spin_lock_bh(&sk->sk_receive_queue.lock);
975 skb = skb_peek(&sk->sk_receive_queue); 975 skb = skb_peek(&sk->sk_receive_queue);
976 if (skb != NULL) 976 if (skb != NULL)
977 amount = skb->tail - skb->h.raw; 977 amount = skb->tail - skb->h.raw;
978 spin_unlock_irq(&sk->sk_receive_queue.lock); 978 spin_unlock_bh(&sk->sk_receive_queue.lock);
979 return put_user(amount, (int __user *)arg); 979 return put_user(amount, (int __user *)arg);
980 } 980 }
981 981
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3bf8a0254f81..1f5b226c3573 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1570,7 +1570,8 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1570 struct in6_addr *src, 1570 struct in6_addr *src,
1571 int iif, 1571 int iif,
1572 int type, u32 pid, u32 seq, 1572 int type, u32 pid, u32 seq,
1573 struct nlmsghdr *in_nlh, int prefix) 1573 struct nlmsghdr *in_nlh, int prefix,
1574 unsigned int flags)
1574{ 1575{
1575 struct rtmsg *rtm; 1576 struct rtmsg *rtm;
1576 struct nlmsghdr *nlh; 1577 struct nlmsghdr *nlh;
@@ -1588,7 +1589,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1588 pid = in_nlh->nlmsg_pid; 1589 pid = in_nlh->nlmsg_pid;
1589 } 1590 }
1590 1591
1591 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm)); 1592 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1592 rtm = NLMSG_DATA(nlh); 1593 rtm = NLMSG_DATA(nlh);
1593 rtm->rtm_family = AF_INET6; 1594 rtm->rtm_family = AF_INET6;
1594 rtm->rtm_dst_len = rt->rt6i_dst.plen; 1595 rtm->rtm_dst_len = rt->rt6i_dst.plen;
@@ -1674,7 +1675,7 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1674 1675
1675 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 1676 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 1677 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1677 NULL, prefix); 1678 NULL, prefix, NLM_F_MULTI);
1678} 1679}
1679 1680
1680static int fib6_dump_node(struct fib6_walker_t *w) 1681static int fib6_dump_node(struct fib6_walker_t *w)
@@ -1822,7 +1823,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1822 &fl.fl6_dst, &fl.fl6_src, 1823 &fl.fl6_dst, &fl.fl6_src,
1823 iif, 1824 iif,
1824 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 1825 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825 nlh->nlmsg_seq, nlh, 0); 1826 nlh->nlmsg_seq, nlh, 0, 0);
1826 if (err < 0) { 1827 if (err < 0) {
1827 err = -EMSGSIZE; 1828 err = -EMSGSIZE;
1828 goto out_free; 1829 goto out_free;
@@ -1848,7 +1849,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1848 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); 1849 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1849 return; 1850 return;
1850 } 1851 }
1851 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) { 1852 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0, 0) < 0) {
1852 kfree_skb(skb); 1853 kfree_skb(skb);
1853 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); 1854 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1854 return; 1855 return;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0f69e800a0ad..2414937f2a83 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -65,7 +65,7 @@
65#include <linux/seq_file.h> 65#include <linux/seq_file.h>
66 66
67static void tcp_v6_send_reset(struct sk_buff *skb); 67static void tcp_v6_send_reset(struct sk_buff *skb);
68static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req); 68static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb); 70 struct sk_buff *skb);
71 71
@@ -394,24 +394,26 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
394 return c & (TCP_SYNQ_HSIZE - 1); 394 return c & (TCP_SYNQ_HSIZE - 1);
395} 395}
396 396
397static struct open_request *tcp_v6_search_req(struct tcp_sock *tp, 397static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
398 struct open_request ***prevp, 398 struct request_sock ***prevp,
399 __u16 rport, 399 __u16 rport,
400 struct in6_addr *raddr, 400 struct in6_addr *raddr,
401 struct in6_addr *laddr, 401 struct in6_addr *laddr,
402 int iif) 402 int iif)
403{ 403{
404 struct tcp_listen_opt *lopt = tp->listen_opt; 404 struct listen_sock *lopt = tp->accept_queue.listen_opt;
405 struct open_request *req, **prev; 405 struct request_sock *req, **prev;
406 406
407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; 407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
408 (req = *prev) != NULL; 408 (req = *prev) != NULL;
409 prev = &req->dl_next) { 409 prev = &req->dl_next) {
410 if (req->rmt_port == rport && 410 const struct tcp6_request_sock *treq = tcp6_rsk(req);
411 req->class->family == AF_INET6 && 411
412 ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) && 412 if (inet_rsk(req)->rmt_port == rport &&
413 ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) && 413 req->rsk_ops->family == AF_INET6 &&
414 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) { 414 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
415 ipv6_addr_equal(&treq->loc_addr, laddr) &&
416 (!treq->iif || treq->iif == iif)) {
415 BUG_TRAP(req->sk == NULL); 417 BUG_TRAP(req->sk == NULL);
416 *prevp = prev; 418 *prevp = prev;
417 return req; 419 return req;
@@ -906,9 +908,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
906 908
907 icmpv6_err_convert(type, code, &err); 909 icmpv6_err_convert(type, code, &err);
908 910
909 /* Might be for an open_request */ 911 /* Might be for an request_sock */
910 switch (sk->sk_state) { 912 switch (sk->sk_state) {
911 struct open_request *req, **prev; 913 struct request_sock *req, **prev;
912 case TCP_LISTEN: 914 case TCP_LISTEN:
913 if (sock_owned_by_user(sk)) 915 if (sock_owned_by_user(sk))
914 goto out; 916 goto out;
@@ -923,7 +925,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
923 */ 925 */
924 BUG_TRAP(req->sk == NULL); 926 BUG_TRAP(req->sk == NULL);
925 927
926 if (seq != req->snt_isn) { 928 if (seq != tcp_rsk(req)->snt_isn) {
927 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 929 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
928 goto out; 930 goto out;
929 } 931 }
@@ -957,9 +959,10 @@ out:
957} 959}
958 960
959 961
960static int tcp_v6_send_synack(struct sock *sk, struct open_request *req, 962static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
961 struct dst_entry *dst) 963 struct dst_entry *dst)
962{ 964{
965 struct tcp6_request_sock *treq = tcp6_rsk(req);
963 struct ipv6_pinfo *np = inet6_sk(sk); 966 struct ipv6_pinfo *np = inet6_sk(sk);
964 struct sk_buff * skb; 967 struct sk_buff * skb;
965 struct ipv6_txoptions *opt = NULL; 968 struct ipv6_txoptions *opt = NULL;
@@ -969,19 +972,19 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
969 972
970 memset(&fl, 0, sizeof(fl)); 973 memset(&fl, 0, sizeof(fl));
971 fl.proto = IPPROTO_TCP; 974 fl.proto = IPPROTO_TCP;
972 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 975 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
973 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); 976 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
974 fl.fl6_flowlabel = 0; 977 fl.fl6_flowlabel = 0;
975 fl.oif = req->af.v6_req.iif; 978 fl.oif = treq->iif;
976 fl.fl_ip_dport = req->rmt_port; 979 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
977 fl.fl_ip_sport = inet_sk(sk)->sport; 980 fl.fl_ip_sport = inet_sk(sk)->sport;
978 981
979 if (dst == NULL) { 982 if (dst == NULL) {
980 opt = np->opt; 983 opt = np->opt;
981 if (opt == NULL && 984 if (opt == NULL &&
982 np->rxopt.bits.srcrt == 2 && 985 np->rxopt.bits.srcrt == 2 &&
983 req->af.v6_req.pktopts) { 986 treq->pktopts) {
984 struct sk_buff *pktopts = req->af.v6_req.pktopts; 987 struct sk_buff *pktopts = treq->pktopts;
985 struct inet6_skb_parm *rxopt = IP6CB(pktopts); 988 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
986 if (rxopt->srcrt) 989 if (rxopt->srcrt)
987 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt)); 990 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
@@ -1008,10 +1011,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
1008 struct tcphdr *th = skb->h.th; 1011 struct tcphdr *th = skb->h.th;
1009 1012
1010 th->check = tcp_v6_check(th, skb->len, 1013 th->check = tcp_v6_check(th, skb->len,
1011 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr, 1014 &treq->loc_addr, &treq->rmt_addr,
1012 csum_partial((char *)th, skb->len, skb->csum)); 1015 csum_partial((char *)th, skb->len, skb->csum));
1013 1016
1014 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 1017 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1015 err = ip6_xmit(sk, skb, &fl, opt, 0); 1018 err = ip6_xmit(sk, skb, &fl, opt, 0);
1016 if (err == NET_XMIT_CN) 1019 if (err == NET_XMIT_CN)
1017 err = 0; 1020 err = 0;
@@ -1024,17 +1027,18 @@ done:
1024 return err; 1027 return err;
1025} 1028}
1026 1029
1027static void tcp_v6_or_free(struct open_request *req) 1030static void tcp_v6_reqsk_destructor(struct request_sock *req)
1028{ 1031{
1029 if (req->af.v6_req.pktopts) 1032 if (tcp6_rsk(req)->pktopts)
1030 kfree_skb(req->af.v6_req.pktopts); 1033 kfree_skb(tcp6_rsk(req)->pktopts);
1031} 1034}
1032 1035
1033static struct or_calltable or_ipv6 = { 1036static struct request_sock_ops tcp6_request_sock_ops = {
1034 .family = AF_INET6, 1037 .family = AF_INET6,
1038 .obj_size = sizeof(struct tcp6_request_sock),
1035 .rtx_syn_ack = tcp_v6_send_synack, 1039 .rtx_syn_ack = tcp_v6_send_synack,
1036 .send_ack = tcp_v6_or_send_ack, 1040 .send_ack = tcp_v6_reqsk_send_ack,
1037 .destructor = tcp_v6_or_free, 1041 .destructor = tcp_v6_reqsk_destructor,
1038 .send_reset = tcp_v6_send_reset 1042 .send_reset = tcp_v6_send_reset
1039}; 1043};
1040 1044
@@ -1219,15 +1223,15 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1219 tcp_tw_put(tw); 1223 tcp_tw_put(tw);
1220} 1224}
1221 1225
1222static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req) 1226static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1223{ 1227{
1224 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent); 1228 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1225} 1229}
1226 1230
1227 1231
1228static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) 1232static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1229{ 1233{
1230 struct open_request *req, **prev; 1234 struct request_sock *req, **prev;
1231 struct tcphdr *th = skb->h.th; 1235 struct tcphdr *th = skb->h.th;
1232 struct tcp_sock *tp = tcp_sk(sk); 1236 struct tcp_sock *tp = tcp_sk(sk);
1233 struct sock *nsk; 1237 struct sock *nsk;
@@ -1260,21 +1264,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1260 return sk; 1264 return sk;
1261} 1265}
1262 1266
1263static void tcp_v6_synq_add(struct sock *sk, struct open_request *req) 1267static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1264{ 1268{
1265 struct tcp_sock *tp = tcp_sk(sk); 1269 struct tcp_sock *tp = tcp_sk(sk);
1266 struct tcp_listen_opt *lopt = tp->listen_opt; 1270 struct listen_sock *lopt = tp->accept_queue.listen_opt;
1267 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd); 1271 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1268
1269 req->sk = NULL;
1270 req->expires = jiffies + TCP_TIMEOUT_INIT;
1271 req->retrans = 0;
1272 req->dl_next = lopt->syn_table[h];
1273
1274 write_lock(&tp->syn_wait_lock);
1275 lopt->syn_table[h] = req;
1276 write_unlock(&tp->syn_wait_lock);
1277 1272
1273 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1278 tcp_synq_added(sk); 1274 tcp_synq_added(sk);
1279} 1275}
1280 1276
@@ -1284,10 +1280,11 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1284 */ 1280 */
1285static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1281static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1286{ 1282{
1283 struct tcp6_request_sock *treq;
1287 struct ipv6_pinfo *np = inet6_sk(sk); 1284 struct ipv6_pinfo *np = inet6_sk(sk);
1288 struct tcp_options_received tmp_opt; 1285 struct tcp_options_received tmp_opt;
1289 struct tcp_sock *tp = tcp_sk(sk); 1286 struct tcp_sock *tp = tcp_sk(sk);
1290 struct open_request *req = NULL; 1287 struct request_sock *req = NULL;
1291 __u32 isn = TCP_SKB_CB(skb)->when; 1288 __u32 isn = TCP_SKB_CB(skb)->when;
1292 1289
1293 if (skb->protocol == htons(ETH_P_IP)) 1290 if (skb->protocol == htons(ETH_P_IP))
@@ -1308,7 +1305,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1308 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) 1305 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1309 goto drop; 1306 goto drop;
1310 1307
1311 req = tcp_openreq_alloc(); 1308 req = reqsk_alloc(&tcp6_request_sock_ops);
1312 if (req == NULL) 1309 if (req == NULL)
1313 goto drop; 1310 goto drop;
1314 1311
@@ -1321,28 +1318,28 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1321 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 1318 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1322 tcp_openreq_init(req, &tmp_opt, skb); 1319 tcp_openreq_init(req, &tmp_opt, skb);
1323 1320
1324 req->class = &or_ipv6; 1321 treq = tcp6_rsk(req);
1325 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr); 1322 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1326 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr); 1323 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1327 TCP_ECN_create_request(req, skb->h.th); 1324 TCP_ECN_create_request(req, skb->h.th);
1328 req->af.v6_req.pktopts = NULL; 1325 treq->pktopts = NULL;
1329 if (ipv6_opt_accepted(sk, skb) || 1326 if (ipv6_opt_accepted(sk, skb) ||
1330 np->rxopt.bits.rxinfo || 1327 np->rxopt.bits.rxinfo ||
1331 np->rxopt.bits.rxhlim) { 1328 np->rxopt.bits.rxhlim) {
1332 atomic_inc(&skb->users); 1329 atomic_inc(&skb->users);
1333 req->af.v6_req.pktopts = skb; 1330 treq->pktopts = skb;
1334 } 1331 }
1335 req->af.v6_req.iif = sk->sk_bound_dev_if; 1332 treq->iif = sk->sk_bound_dev_if;
1336 1333
1337 /* So that link locals have meaning */ 1334 /* So that link locals have meaning */
1338 if (!sk->sk_bound_dev_if && 1335 if (!sk->sk_bound_dev_if &&
1339 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL) 1336 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1340 req->af.v6_req.iif = tcp_v6_iif(skb); 1337 treq->iif = tcp_v6_iif(skb);
1341 1338
1342 if (isn == 0) 1339 if (isn == 0)
1343 isn = tcp_v6_init_sequence(sk,skb); 1340 isn = tcp_v6_init_sequence(sk,skb);
1344 1341
1345 req->snt_isn = isn; 1342 tcp_rsk(req)->snt_isn = isn;
1346 1343
1347 if (tcp_v6_send_synack(sk, req, NULL)) 1344 if (tcp_v6_send_synack(sk, req, NULL))
1348 goto drop; 1345 goto drop;
@@ -1353,16 +1350,17 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1353 1350
1354drop: 1351drop:
1355 if (req) 1352 if (req)
1356 tcp_openreq_free(req); 1353 reqsk_free(req);
1357 1354
1358 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1355 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1359 return 0; /* don't send reset */ 1356 return 0; /* don't send reset */
1360} 1357}
1361 1358
1362static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1359static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1363 struct open_request *req, 1360 struct request_sock *req,
1364 struct dst_entry *dst) 1361 struct dst_entry *dst)
1365{ 1362{
1363 struct tcp6_request_sock *treq = tcp6_rsk(req);
1366 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 1364 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1367 struct tcp6_sock *newtcp6sk; 1365 struct tcp6_sock *newtcp6sk;
1368 struct inet_sock *newinet; 1366 struct inet_sock *newinet;
@@ -1426,10 +1424,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1426 goto out_overflow; 1424 goto out_overflow;
1427 1425
1428 if (np->rxopt.bits.srcrt == 2 && 1426 if (np->rxopt.bits.srcrt == 2 &&
1429 opt == NULL && req->af.v6_req.pktopts) { 1427 opt == NULL && treq->pktopts) {
1430 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts); 1428 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1431 if (rxopt->srcrt) 1429 if (rxopt->srcrt)
1432 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt)); 1430 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1433 } 1431 }
1434 1432
1435 if (dst == NULL) { 1433 if (dst == NULL) {
@@ -1438,16 +1436,16 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1438 1436
1439 memset(&fl, 0, sizeof(fl)); 1437 memset(&fl, 0, sizeof(fl));
1440 fl.proto = IPPROTO_TCP; 1438 fl.proto = IPPROTO_TCP;
1441 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 1439 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1442 if (opt && opt->srcrt) { 1440 if (opt && opt->srcrt) {
1443 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; 1441 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1444 ipv6_addr_copy(&final, &fl.fl6_dst); 1442 ipv6_addr_copy(&final, &fl.fl6_dst);
1445 ipv6_addr_copy(&fl.fl6_dst, rt0->addr); 1443 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1446 final_p = &final; 1444 final_p = &final;
1447 } 1445 }
1448 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); 1446 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1449 fl.oif = sk->sk_bound_dev_if; 1447 fl.oif = sk->sk_bound_dev_if;
1450 fl.fl_ip_dport = req->rmt_port; 1448 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1451 fl.fl_ip_sport = inet_sk(sk)->sport; 1449 fl.fl_ip_sport = inet_sk(sk)->sport;
1452 1450
1453 if (ip6_dst_lookup(sk, &dst, &fl)) 1451 if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1482,10 +1480,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1482 1480
1483 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1481 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1484 1482
1485 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr); 1483 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1486 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr); 1484 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1487 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr); 1485 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1488 newsk->sk_bound_dev_if = req->af.v6_req.iif; 1486 newsk->sk_bound_dev_if = treq->iif;
1489 1487
1490 /* Now IPv6 options... 1488 /* Now IPv6 options...
1491 1489
@@ -1498,11 +1496,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1498 1496
1499 /* Clone pktoptions received with SYN */ 1497 /* Clone pktoptions received with SYN */
1500 newnp->pktoptions = NULL; 1498 newnp->pktoptions = NULL;
1501 if (req->af.v6_req.pktopts) { 1499 if (treq->pktopts != NULL) {
1502 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts, 1500 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1503 GFP_ATOMIC); 1501 kfree_skb(treq->pktopts);
1504 kfree_skb(req->af.v6_req.pktopts); 1502 treq->pktopts = NULL;
1505 req->af.v6_req.pktopts = NULL;
1506 if (newnp->pktoptions) 1503 if (newnp->pktoptions)
1507 skb_set_owner_r(newnp->pktoptions, newsk); 1504 skb_set_owner_r(newnp->pktoptions, newsk);
1508 } 1505 }
@@ -2050,7 +2047,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
2050 2047
2051/* Proc filesystem TCPv6 sock list dumping. */ 2048/* Proc filesystem TCPv6 sock list dumping. */
2052static void get_openreq6(struct seq_file *seq, 2049static void get_openreq6(struct seq_file *seq,
2053 struct sock *sk, struct open_request *req, int i, int uid) 2050 struct sock *sk, struct request_sock *req, int i, int uid)
2054{ 2051{
2055 struct in6_addr *dest, *src; 2052 struct in6_addr *dest, *src;
2056 int ttd = req->expires - jiffies; 2053 int ttd = req->expires - jiffies;
@@ -2058,8 +2055,8 @@ static void get_openreq6(struct seq_file *seq,
2058 if (ttd < 0) 2055 if (ttd < 0)
2059 ttd = 0; 2056 ttd = 0;
2060 2057
2061 src = &req->af.v6_req.loc_addr; 2058 src = &tcp6_rsk(req)->loc_addr;
2062 dest = &req->af.v6_req.rmt_addr; 2059 dest = &tcp6_rsk(req)->rmt_addr;
2063 seq_printf(seq, 2060 seq_printf(seq,
2064 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2061 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2065 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", 2062 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -2069,7 +2066,7 @@ static void get_openreq6(struct seq_file *seq,
2069 ntohs(inet_sk(sk)->sport), 2066 ntohs(inet_sk(sk)->sport),
2070 dest->s6_addr32[0], dest->s6_addr32[1], 2067 dest->s6_addr32[0], dest->s6_addr32[1],
2071 dest->s6_addr32[2], dest->s6_addr32[3], 2068 dest->s6_addr32[2], dest->s6_addr32[3],
2072 ntohs(req->rmt_port), 2069 ntohs(inet_rsk(req)->rmt_port),
2073 TCP_SYN_RECV, 2070 TCP_SYN_RECV,
2074 0,0, /* could print option size, but that is af dependent. */ 2071 0,0, /* could print option size, but that is af dependent. */
2075 1, /* timers active (only the expire timer) */ 2072 1, /* timers active (only the expire timer) */
@@ -2239,6 +2236,7 @@ struct proto tcpv6_prot = {
2239 .sysctl_rmem = sysctl_tcp_rmem, 2236 .sysctl_rmem = sysctl_tcp_rmem,
2240 .max_header = MAX_TCP_HEADER, 2237 .max_header = MAX_TCP_HEADER,
2241 .obj_size = sizeof(struct tcp6_sock), 2238 .obj_size = sizeof(struct tcp6_sock),
2239 .rsk_prot = &tcp6_request_sock_ops,
2242}; 2240};
2243 2241
2244static struct inet6_protocol tcpv6_protocol = { 2242static struct inet6_protocol tcpv6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e251d0ba4f39..eff050ac7049 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -300,12 +300,12 @@ csum_copy_err:
300 /* Clear queue. */ 300 /* Clear queue. */
301 if (flags&MSG_PEEK) { 301 if (flags&MSG_PEEK) {
302 int clear = 0; 302 int clear = 0;
303 spin_lock_irq(&sk->sk_receive_queue.lock); 303 spin_lock_bh(&sk->sk_receive_queue.lock);
304 if (skb == skb_peek(&sk->sk_receive_queue)) { 304 if (skb == skb_peek(&sk->sk_receive_queue)) {
305 __skb_unlink(skb, &sk->sk_receive_queue); 305 __skb_unlink(skb, &sk->sk_receive_queue);
306 clear = 1; 306 clear = 1;
307 } 307 }
308 spin_unlock_irq(&sk->sk_receive_queue.lock); 308 spin_unlock_bh(&sk->sk_receive_queue.lock);
309 if (clear) 309 if (clear)
310 kfree_skb(skb); 310 kfree_skb(skb);
311 } 311 }
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index ffcadd68b951..60c26c87277e 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -466,7 +466,7 @@ static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
466 return; 466 return;
467} 467}
468 468
469static int xfrm6_tunnel_init_state(struct xfrm_state *x, void *args) 469static int xfrm6_tunnel_init_state(struct xfrm_state *x)
470{ 470{
471 if (!x->props.mode) 471 if (!x->props.mode)
472 return -EINVAL; 472 return -EINVAL;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ce980aa94ed8..4879743b945a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -656,13 +656,18 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
656 sa->sadb_sa_exttype = SADB_EXT_SA; 656 sa->sadb_sa_exttype = SADB_EXT_SA;
657 sa->sadb_sa_spi = x->id.spi; 657 sa->sadb_sa_spi = x->id.spi;
658 sa->sadb_sa_replay = x->props.replay_window; 658 sa->sadb_sa_replay = x->props.replay_window;
659 sa->sadb_sa_state = SADB_SASTATE_DYING; 659 switch (x->km.state) {
660 if (x->km.state == XFRM_STATE_VALID && !x->km.dying) 660 case XFRM_STATE_VALID:
661 sa->sadb_sa_state = SADB_SASTATE_MATURE; 661 sa->sadb_sa_state = x->km.dying ?
662 else if (x->km.state == XFRM_STATE_ACQ) 662 SADB_SASTATE_DYING : SADB_SASTATE_MATURE;
663 break;
664 case XFRM_STATE_ACQ:
663 sa->sadb_sa_state = SADB_SASTATE_LARVAL; 665 sa->sadb_sa_state = SADB_SASTATE_LARVAL;
664 else if (x->km.state == XFRM_STATE_EXPIRED) 666 break;
667 default:
665 sa->sadb_sa_state = SADB_SASTATE_DEAD; 668 sa->sadb_sa_state = SADB_SASTATE_DEAD;
669 break;
670 }
666 sa->sadb_sa_auth = 0; 671 sa->sadb_sa_auth = 0;
667 if (x->aalg) { 672 if (x->aalg) {
668 struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); 673 struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
@@ -685,6 +690,8 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
685 sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN; 690 sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN;
686 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 691 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
687 sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP; 692 sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP;
693 if (x->props.flags & XFRM_STATE_NOPMTUDISC)
694 sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC;
688 695
689 /* hard time */ 696 /* hard time */
690 if (hsc & 2) { 697 if (hsc & 2) {
@@ -969,6 +976,8 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
969 x->props.flags |= XFRM_STATE_NOECN; 976 x->props.flags |= XFRM_STATE_NOECN;
970 if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) 977 if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP)
971 x->props.flags |= XFRM_STATE_DECAP_DSCP; 978 x->props.flags |= XFRM_STATE_DECAP_DSCP;
979 if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC)
980 x->props.flags |= XFRM_STATE_NOPMTUDISC;
972 981
973 lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; 982 lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1];
974 if (lifetime != NULL) { 983 if (lifetime != NULL) {
@@ -1091,17 +1100,11 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
1091 } 1100 }
1092 } 1101 }
1093 1102
1094 x->type = xfrm_get_type(proto, x->props.family); 1103 err = xfrm_init_state(x);
1095 if (x->type == NULL) { 1104 if (err)
1096 err = -ENOPROTOOPT;
1097 goto out;
1098 }
1099 if (x->type->init_state(x, NULL)) {
1100 err = -EINVAL;
1101 goto out; 1105 goto out;
1102 } 1106
1103 x->km.seq = hdr->sadb_msg_seq; 1107 x->km.seq = hdr->sadb_msg_seq;
1104 x->km.state = XFRM_STATE_VALID;
1105 return x; 1108 return x;
1106 1109
1107out: 1110out:
@@ -1240,13 +1243,78 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
1240 return 0; 1243 return 0;
1241} 1244}
1242 1245
1246static inline int event2poltype(int event)
1247{
1248 switch (event) {
1249 case XFRM_MSG_DELPOLICY:
1250 return SADB_X_SPDDELETE;
1251 case XFRM_MSG_NEWPOLICY:
1252 return SADB_X_SPDADD;
1253 case XFRM_MSG_UPDPOLICY:
1254 return SADB_X_SPDUPDATE;
1255 case XFRM_MSG_POLEXPIRE:
1256 // return SADB_X_SPDEXPIRE;
1257 default:
1258 printk("pfkey: Unknown policy event %d\n", event);
1259 break;
1260 }
1261
1262 return 0;
1263}
1264
1265static inline int event2keytype(int event)
1266{
1267 switch (event) {
1268 case XFRM_MSG_DELSA:
1269 return SADB_DELETE;
1270 case XFRM_MSG_NEWSA:
1271 return SADB_ADD;
1272 case XFRM_MSG_UPDSA:
1273 return SADB_UPDATE;
1274 case XFRM_MSG_EXPIRE:
1275 return SADB_EXPIRE;
1276 default:
1277 printk("pfkey: Unknown SA event %d\n", event);
1278 break;
1279 }
1280
1281 return 0;
1282}
1283
1284/* ADD/UPD/DEL */
1285static int key_notify_sa(struct xfrm_state *x, struct km_event *c)
1286{
1287 struct sk_buff *skb;
1288 struct sadb_msg *hdr;
1289 int hsc = 3;
1290
1291 if (c->event == XFRM_MSG_DELSA)
1292 hsc = 0;
1293
1294 skb = pfkey_xfrm_state2msg(x, 0, hsc);
1295
1296 if (IS_ERR(skb))
1297 return PTR_ERR(skb);
1298
1299 hdr = (struct sadb_msg *) skb->data;
1300 hdr->sadb_msg_version = PF_KEY_V2;
1301 hdr->sadb_msg_type = event2keytype(c->event);
1302 hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
1303 hdr->sadb_msg_errno = 0;
1304 hdr->sadb_msg_reserved = 0;
1305 hdr->sadb_msg_seq = c->seq;
1306 hdr->sadb_msg_pid = c->pid;
1307
1308 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1309
1310 return 0;
1311}
1243 1312
1244static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1313static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1245{ 1314{
1246 struct sk_buff *out_skb;
1247 struct sadb_msg *out_hdr;
1248 struct xfrm_state *x; 1315 struct xfrm_state *x;
1249 int err; 1316 int err;
1317 struct km_event c;
1250 1318
1251 xfrm_probe_algs(); 1319 xfrm_probe_algs();
1252 1320
@@ -1254,6 +1322,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1254 if (IS_ERR(x)) 1322 if (IS_ERR(x))
1255 return PTR_ERR(x); 1323 return PTR_ERR(x);
1256 1324
1325 xfrm_state_hold(x);
1257 if (hdr->sadb_msg_type == SADB_ADD) 1326 if (hdr->sadb_msg_type == SADB_ADD)
1258 err = xfrm_state_add(x); 1327 err = xfrm_state_add(x);
1259 else 1328 else
@@ -1262,30 +1331,26 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1262 if (err < 0) { 1331 if (err < 0) {
1263 x->km.state = XFRM_STATE_DEAD; 1332 x->km.state = XFRM_STATE_DEAD;
1264 xfrm_state_put(x); 1333 xfrm_state_put(x);
1265 return err; 1334 goto out;
1266 } 1335 }
1267 1336
1268 out_skb = pfkey_xfrm_state2msg(x, 0, 3); 1337 if (hdr->sadb_msg_type == SADB_ADD)
1269 if (IS_ERR(out_skb)) 1338 c.event = XFRM_MSG_NEWSA;
1270 return PTR_ERR(out_skb); /* XXX Should we return 0 here ? */ 1339 else
1271 1340 c.event = XFRM_MSG_UPDSA;
1272 out_hdr = (struct sadb_msg *) out_skb->data; 1341 c.seq = hdr->sadb_msg_seq;
1273 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 1342 c.pid = hdr->sadb_msg_pid;
1274 out_hdr->sadb_msg_type = hdr->sadb_msg_type; 1343 km_state_notify(x, &c);
1275 out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); 1344out:
1276 out_hdr->sadb_msg_errno = 0; 1345 xfrm_state_put(x);
1277 out_hdr->sadb_msg_reserved = 0; 1346 return err;
1278 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
1279 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
1280
1281 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
1282
1283 return 0;
1284} 1347}
1285 1348
1286static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1349static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1287{ 1350{
1288 struct xfrm_state *x; 1351 struct xfrm_state *x;
1352 struct km_event c;
1353 int err;
1289 1354
1290 if (!ext_hdrs[SADB_EXT_SA-1] || 1355 if (!ext_hdrs[SADB_EXT_SA-1] ||
1291 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1356 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
@@ -1301,13 +1366,19 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1301 return -EPERM; 1366 return -EPERM;
1302 } 1367 }
1303 1368
1304 xfrm_state_delete(x); 1369 err = xfrm_state_delete(x);
1305 xfrm_state_put(x); 1370 if (err < 0) {
1371 xfrm_state_put(x);
1372 return err;
1373 }
1306 1374
1307 pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, 1375 c.seq = hdr->sadb_msg_seq;
1308 BROADCAST_ALL, sk); 1376 c.pid = hdr->sadb_msg_pid;
1377 c.event = XFRM_MSG_DELSA;
1378 km_state_notify(x, &c);
1379 xfrm_state_put(x);
1309 1380
1310 return 0; 1381 return err;
1311} 1382}
1312 1383
1313static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1384static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
@@ -1445,28 +1516,42 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1445 return 0; 1516 return 0;
1446} 1517}
1447 1518
1519static int key_notify_sa_flush(struct km_event *c)
1520{
1521 struct sk_buff *skb;
1522 struct sadb_msg *hdr;
1523
1524 skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
1525 if (!skb)
1526 return -ENOBUFS;
1527 hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
1528 hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto);
1529 hdr->sadb_msg_seq = c->seq;
1530 hdr->sadb_msg_pid = c->pid;
1531 hdr->sadb_msg_version = PF_KEY_V2;
1532 hdr->sadb_msg_errno = (uint8_t) 0;
1533 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
1534
1535 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1536
1537 return 0;
1538}
1539
1448static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1540static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1449{ 1541{
1450 unsigned proto; 1542 unsigned proto;
1451 struct sk_buff *skb_out; 1543 struct km_event c;
1452 struct sadb_msg *hdr_out;
1453 1544
1454 proto = pfkey_satype2proto(hdr->sadb_msg_satype); 1545 proto = pfkey_satype2proto(hdr->sadb_msg_satype);
1455 if (proto == 0) 1546 if (proto == 0)
1456 return -EINVAL; 1547 return -EINVAL;
1457 1548
1458 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
1459 if (!skb_out)
1460 return -ENOBUFS;
1461
1462 xfrm_state_flush(proto); 1549 xfrm_state_flush(proto);
1463 1550 c.data.proto = proto;
1464 hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); 1551 c.seq = hdr->sadb_msg_seq;
1465 pfkey_hdr_dup(hdr_out, hdr); 1552 c.pid = hdr->sadb_msg_pid;
1466 hdr_out->sadb_msg_errno = (uint8_t) 0; 1553 c.event = XFRM_MSG_FLUSHSA;
1467 hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 1554 km_state_notify(NULL, &c);
1468
1469 pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
1470 1555
1471 return 0; 1556 return 0;
1472} 1557}
@@ -1859,6 +1944,35 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
1859 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt); 1944 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
1860} 1945}
1861 1946
1947static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
1948{
1949 struct sk_buff *out_skb;
1950 struct sadb_msg *out_hdr;
1951 int err;
1952
1953 out_skb = pfkey_xfrm_policy2msg_prep(xp);
1954 if (IS_ERR(out_skb)) {
1955 err = PTR_ERR(out_skb);
1956 goto out;
1957 }
1958 pfkey_xfrm_policy2msg(out_skb, xp, dir);
1959
1960 out_hdr = (struct sadb_msg *) out_skb->data;
1961 out_hdr->sadb_msg_version = PF_KEY_V2;
1962
1963 if (c->data.byid && c->event == XFRM_MSG_DELPOLICY)
1964 out_hdr->sadb_msg_type = SADB_X_SPDDELETE2;
1965 else
1966 out_hdr->sadb_msg_type = event2poltype(c->event);
1967 out_hdr->sadb_msg_errno = 0;
1968 out_hdr->sadb_msg_seq = c->seq;
1969 out_hdr->sadb_msg_pid = c->pid;
1970 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1971out:
1972 return 0;
1973
1974}
1975
1862static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1976static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1863{ 1977{
1864 int err; 1978 int err;
@@ -1866,8 +1980,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1866 struct sadb_address *sa; 1980 struct sadb_address *sa;
1867 struct sadb_x_policy *pol; 1981 struct sadb_x_policy *pol;
1868 struct xfrm_policy *xp; 1982 struct xfrm_policy *xp;
1869 struct sk_buff *out_skb; 1983 struct km_event c;
1870 struct sadb_msg *out_hdr;
1871 1984
1872 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1985 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1873 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 1986 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -1935,31 +2048,23 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1935 (err = parse_ipsecrequests(xp, pol)) < 0) 2048 (err = parse_ipsecrequests(xp, pol)) < 0)
1936 goto out; 2049 goto out;
1937 2050
1938 out_skb = pfkey_xfrm_policy2msg_prep(xp);
1939 if (IS_ERR(out_skb)) {
1940 err = PTR_ERR(out_skb);
1941 goto out;
1942 }
1943
1944 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, 2051 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
1945 hdr->sadb_msg_type != SADB_X_SPDUPDATE); 2052 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
1946 if (err) { 2053 if (err) {
1947 kfree_skb(out_skb); 2054 kfree(xp);
1948 goto out; 2055 return err;
1949 } 2056 }
1950 2057
1951 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1); 2058 if (hdr->sadb_msg_type == SADB_X_SPDUPDATE)
2059 c.event = XFRM_MSG_UPDPOLICY;
2060 else
2061 c.event = XFRM_MSG_NEWPOLICY;
1952 2062
1953 xfrm_pol_put(xp); 2063 c.seq = hdr->sadb_msg_seq;
2064 c.pid = hdr->sadb_msg_pid;
1954 2065
1955 out_hdr = (struct sadb_msg *) out_skb->data; 2066 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
1956 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 2067 xfrm_pol_put(xp);
1957 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
1958 out_hdr->sadb_msg_satype = 0;
1959 out_hdr->sadb_msg_errno = 0;
1960 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
1961 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
1962 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
1963 return 0; 2068 return 0;
1964 2069
1965out: 2070out:
@@ -1973,9 +2078,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1973 struct sadb_address *sa; 2078 struct sadb_address *sa;
1974 struct sadb_x_policy *pol; 2079 struct sadb_x_policy *pol;
1975 struct xfrm_policy *xp; 2080 struct xfrm_policy *xp;
1976 struct sk_buff *out_skb;
1977 struct sadb_msg *out_hdr;
1978 struct xfrm_selector sel; 2081 struct xfrm_selector sel;
2082 struct km_event c;
1979 2083
1980 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 2084 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1981 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 2085 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2010,25 +2114,40 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2010 2114
2011 err = 0; 2115 err = 0;
2012 2116
2117 c.seq = hdr->sadb_msg_seq;
2118 c.pid = hdr->sadb_msg_pid;
2119 c.event = XFRM_MSG_DELPOLICY;
2120 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
2121
2122 xfrm_pol_put(xp);
2123 return err;
2124}
2125
2126static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb_msg *hdr, int dir)
2127{
2128 int err;
2129 struct sk_buff *out_skb;
2130 struct sadb_msg *out_hdr;
2131 err = 0;
2132
2013 out_skb = pfkey_xfrm_policy2msg_prep(xp); 2133 out_skb = pfkey_xfrm_policy2msg_prep(xp);
2014 if (IS_ERR(out_skb)) { 2134 if (IS_ERR(out_skb)) {
2015 err = PTR_ERR(out_skb); 2135 err = PTR_ERR(out_skb);
2016 goto out; 2136 goto out;
2017 } 2137 }
2018 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1); 2138 pfkey_xfrm_policy2msg(out_skb, xp, dir);
2019 2139
2020 out_hdr = (struct sadb_msg *) out_skb->data; 2140 out_hdr = (struct sadb_msg *) out_skb->data;
2021 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 2141 out_hdr->sadb_msg_version = hdr->sadb_msg_version;
2022 out_hdr->sadb_msg_type = SADB_X_SPDDELETE; 2142 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
2023 out_hdr->sadb_msg_satype = 0; 2143 out_hdr->sadb_msg_satype = 0;
2024 out_hdr->sadb_msg_errno = 0; 2144 out_hdr->sadb_msg_errno = 0;
2025 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; 2145 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
2026 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; 2146 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
2027 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk); 2147 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk);
2028 err = 0; 2148 err = 0;
2029 2149
2030out: 2150out:
2031 xfrm_pol_put(xp);
2032 return err; 2151 return err;
2033} 2152}
2034 2153
@@ -2037,8 +2156,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2037 int err; 2156 int err;
2038 struct sadb_x_policy *pol; 2157 struct sadb_x_policy *pol;
2039 struct xfrm_policy *xp; 2158 struct xfrm_policy *xp;
2040 struct sk_buff *out_skb; 2159 struct km_event c;
2041 struct sadb_msg *out_hdr;
2042 2160
2043 if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL) 2161 if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL)
2044 return -EINVAL; 2162 return -EINVAL;
@@ -2050,24 +2168,16 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2050 2168
2051 err = 0; 2169 err = 0;
2052 2170
2053 out_skb = pfkey_xfrm_policy2msg_prep(xp); 2171 c.seq = hdr->sadb_msg_seq;
2054 if (IS_ERR(out_skb)) { 2172 c.pid = hdr->sadb_msg_pid;
2055 err = PTR_ERR(out_skb); 2173 if (hdr->sadb_msg_type == SADB_X_SPDDELETE2) {
2056 goto out; 2174 c.data.byid = 1;
2175 c.event = XFRM_MSG_DELPOLICY;
2176 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
2177 } else {
2178 err = key_pol_get_resp(sk, xp, hdr, pol->sadb_x_policy_dir-1);
2057 } 2179 }
2058 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
2059 2180
2060 out_hdr = (struct sadb_msg *) out_skb->data;
2061 out_hdr->sadb_msg_version = hdr->sadb_msg_version;
2062 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
2063 out_hdr->sadb_msg_satype = 0;
2064 out_hdr->sadb_msg_errno = 0;
2065 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
2066 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
2067 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
2068 err = 0;
2069
2070out:
2071 xfrm_pol_put(xp); 2181 xfrm_pol_put(xp);
2072 return err; 2182 return err;
2073} 2183}
@@ -2102,22 +2212,34 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
2102 return xfrm_policy_walk(dump_sp, &data); 2212 return xfrm_policy_walk(dump_sp, &data);
2103} 2213}
2104 2214
2105static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2215static int key_notify_policy_flush(struct km_event *c)
2106{ 2216{
2107 struct sk_buff *skb_out; 2217 struct sk_buff *skb_out;
2108 struct sadb_msg *hdr_out; 2218 struct sadb_msg *hdr;
2109 2219
2110 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); 2220 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
2111 if (!skb_out) 2221 if (!skb_out)
2112 return -ENOBUFS; 2222 return -ENOBUFS;
2223 hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
2224 hdr->sadb_msg_seq = c->seq;
2225 hdr->sadb_msg_pid = c->pid;
2226 hdr->sadb_msg_version = PF_KEY_V2;
2227 hdr->sadb_msg_errno = (uint8_t) 0;
2228 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
2229 pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL);
2230 return 0;
2113 2231
2114 xfrm_policy_flush(); 2232}
2115 2233
2116 hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); 2234static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
2117 pfkey_hdr_dup(hdr_out, hdr); 2235{
2118 hdr_out->sadb_msg_errno = (uint8_t) 0; 2236 struct km_event c;
2119 hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 2237
2120 pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL); 2238 xfrm_policy_flush();
2239 c.event = XFRM_MSG_FLUSHPOLICY;
2240 c.pid = hdr->sadb_msg_pid;
2241 c.seq = hdr->sadb_msg_seq;
2242 km_policy_notify(NULL, 0, &c);
2121 2243
2122 return 0; 2244 return 0;
2123} 2245}
@@ -2317,11 +2439,23 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2317 } 2439 }
2318} 2440}
2319 2441
2320static int pfkey_send_notify(struct xfrm_state *x, int hard) 2442static int key_notify_policy_expire(struct xfrm_policy *xp, struct km_event *c)
2443{
2444 return 0;
2445}
2446
2447static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c)
2321{ 2448{
2322 struct sk_buff *out_skb; 2449 struct sk_buff *out_skb;
2323 struct sadb_msg *out_hdr; 2450 struct sadb_msg *out_hdr;
2324 int hsc = (hard ? 2 : 1); 2451 int hard;
2452 int hsc;
2453
2454 hard = c->data.hard;
2455 if (hard)
2456 hsc = 2;
2457 else
2458 hsc = 1;
2325 2459
2326 out_skb = pfkey_xfrm_state2msg(x, 0, hsc); 2460 out_skb = pfkey_xfrm_state2msg(x, 0, hsc);
2327 if (IS_ERR(out_skb)) 2461 if (IS_ERR(out_skb))
@@ -2340,6 +2474,44 @@ static int pfkey_send_notify(struct xfrm_state *x, int hard)
2340 return 0; 2474 return 0;
2341} 2475}
2342 2476
2477static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
2478{
2479 switch (c->event) {
2480 case XFRM_MSG_EXPIRE:
2481 return key_notify_sa_expire(x, c);
2482 case XFRM_MSG_DELSA:
2483 case XFRM_MSG_NEWSA:
2484 case XFRM_MSG_UPDSA:
2485 return key_notify_sa(x, c);
2486 case XFRM_MSG_FLUSHSA:
2487 return key_notify_sa_flush(c);
2488 default:
2489 printk("pfkey: Unknown SA event %d\n", c->event);
2490 break;
2491 }
2492
2493 return 0;
2494}
2495
2496static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
2497{
2498 switch (c->event) {
2499 case XFRM_MSG_POLEXPIRE:
2500 return key_notify_policy_expire(xp, c);
2501 case XFRM_MSG_DELPOLICY:
2502 case XFRM_MSG_NEWPOLICY:
2503 case XFRM_MSG_UPDPOLICY:
2504 return key_notify_policy(xp, dir, c);
2505 case XFRM_MSG_FLUSHPOLICY:
2506 return key_notify_policy_flush(c);
2507 default:
2508 printk("pfkey: Unknown policy event %d\n", c->event);
2509 break;
2510 }
2511
2512 return 0;
2513}
2514
2343static u32 get_acqseq(void) 2515static u32 get_acqseq(void)
2344{ 2516{
2345 u32 res; 2517 u32 res;
@@ -2856,6 +3028,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
2856 .acquire = pfkey_send_acquire, 3028 .acquire = pfkey_send_acquire,
2857 .compile_policy = pfkey_compile_policy, 3029 .compile_policy = pfkey_compile_policy,
2858 .new_mapping = pfkey_send_new_mapping, 3030 .new_mapping = pfkey_send_new_mapping,
3031 .notify_policy = pfkey_send_policy_notify,
2859}; 3032};
2860 3033
2861static void __exit ipsec_pfkey_exit(void) 3034static void __exit ipsec_pfkey_exit(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e41ce458c2a9..70bcd4744d93 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1095,8 +1095,7 @@ static int netlink_dump(struct sock *sk)
1095 return 0; 1095 return 0;
1096 } 1096 }
1097 1097
1098 nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int)); 1098 nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1099 nlh->nlmsg_flags |= NLM_F_MULTI;
1100 memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); 1099 memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
1101 skb_queue_tail(&sk->sk_receive_queue, skb); 1100 skb_queue_tail(&sk->sk_receive_queue, skb);
1102 sk->sk_data_ready(sk, skb->len); 1101 sk->sk_data_ready(sk, skb->len);
@@ -1107,6 +1106,9 @@ static int netlink_dump(struct sock *sk)
1107 1106
1108 netlink_destroy_callback(cb); 1107 netlink_destroy_callback(cb);
1109 return 0; 1108 return 0;
1109
1110nlmsg_failure:
1111 return -ENOBUFS;
1110} 1112}
1111 1113
1112int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1114int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1178,7 +1180,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1178 } 1180 }
1179 1181
1180 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1182 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1181 NLMSG_ERROR, sizeof(struct nlmsgerr)); 1183 NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
1182 errmsg = NLMSG_DATA(rep); 1184 errmsg = NLMSG_DATA(rep);
1183 errmsg->error = err; 1185 errmsg->error = err;
1184 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); 1186 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index b0941186f867..b22c9beb604d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -405,7 +405,7 @@ config NET_EMATCH_STACK
405 ---help--- 405 ---help---
406 Size of the local stack variable used while evaluating the tree of 406 Size of the local stack variable used while evaluating the tree of
407 ematches. Limits the depth of the tree, i.e. the number of 407 ematches. Limits the depth of the tree, i.e. the number of
408 encapsulated precedences. Every level requires 4 bytes of addtional 408 encapsulated precedences. Every level requires 4 bytes of additional
409 stack space. 409 stack space.
410 410
411config NET_EMATCH_CMP 411config NET_EMATCH_CMP
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cafcb084098d..9594206e6035 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -428,15 +428,15 @@ errout:
428 428
429static int 429static int
430tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, 430tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
431 unsigned flags, int event, int bind, int ref) 431 u16 flags, int event, int bind, int ref)
432{ 432{
433 struct tcamsg *t; 433 struct tcamsg *t;
434 struct nlmsghdr *nlh; 434 struct nlmsghdr *nlh;
435 unsigned char *b = skb->tail; 435 unsigned char *b = skb->tail;
436 struct rtattr *x; 436 struct rtattr *x;
437 437
438 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); 438 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
439 nlh->nlmsg_flags = flags; 439
440 t = NLMSG_DATA(nlh); 440 t = NLMSG_DATA(nlh);
441 t->tca_family = AF_UNSPEC; 441 t->tca_family = AF_UNSPEC;
442 442
@@ -669,7 +669,7 @@ err:
669} 669}
670 670
671static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, 671static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
672 unsigned flags) 672 u16 flags)
673{ 673{
674 struct tcamsg *t; 674 struct tcamsg *t;
675 struct nlmsghdr *nlh; 675 struct nlmsghdr *nlh;
@@ -684,8 +684,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
684 684
685 b = (unsigned char *)skb->tail; 685 b = (unsigned char *)skb->tail;
686 686
687 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); 687 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
688 nlh->nlmsg_flags = flags;
689 t = NLMSG_DATA(nlh); 688 t = NLMSG_DATA(nlh);
690 t->tca_family = AF_UNSPEC; 689 t->tca_family = AF_UNSPEC;
691 690
@@ -881,7 +880,7 @@ static int __init tc_action_init(void)
881 link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action; 880 link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
882 } 881 }
883 882
884 printk("TC classifier action (bugs to netdev@oss.sgi.com cc " 883 printk("TC classifier action (bugs to netdev@vger.kernel.org cc "
885 "hadi@cyberus.ca)\n"); 884 "hadi@cyberus.ca)\n");
886 return 0; 885 return 0;
887} 886}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 56e66c3fe0fa..1616bf5c9627 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -322,14 +322,13 @@ errout:
322 322
323static int 323static int
324tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, 324tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
325 u32 pid, u32 seq, unsigned flags, int event) 325 u32 pid, u32 seq, u16 flags, int event)
326{ 326{
327 struct tcmsg *tcm; 327 struct tcmsg *tcm;
328 struct nlmsghdr *nlh; 328 struct nlmsghdr *nlh;
329 unsigned char *b = skb->tail; 329 unsigned char *b = skb->tail;
330 330
331 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 331 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
332 nlh->nlmsg_flags = flags;
333 tcm = NLMSG_DATA(nlh); 332 tcm = NLMSG_DATA(nlh);
334 tcm->tcm_family = AF_UNSPEC; 333 tcm->tcm_family = AF_UNSPEC;
335 tcm->tcm_ifindex = tp->q->dev->ifindex; 334 tcm->tcm_ifindex = tp->q->dev->ifindex;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0d2d4415f334..dfb300bb6baa 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -261,6 +261,9 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
261 rta = (struct rtattr *) b; 261 rta = (struct rtattr *) b;
262 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 262 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
263 263
264 if (f->res.classid)
265 RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid);
266
264 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || 267 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
265 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) 268 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
266 goto rtattr_failure; 269 goto rtattr_failure;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index f1eeaf65cee5..48bb23c2a35a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -32,7 +32,7 @@
32 * +-----------+ +-----------+ 32 * +-----------+ +-----------+
33 * | | 33 * | |
34 * ---> meta_ops[INT][INDEV](...) | 34 * ---> meta_ops[INT][INDEV](...) |
35 * | | 35 * | |
36 * ----------- | 36 * ----------- |
37 * V V 37 * V V
38 * +-----------+ +-----------+ 38 * +-----------+ +-----------+
@@ -70,6 +70,7 @@
70#include <net/dst.h> 70#include <net/dst.h>
71#include <net/route.h> 71#include <net/route.h>
72#include <net/pkt_cls.h> 72#include <net/pkt_cls.h>
73#include <net/sock.h>
73 74
74struct meta_obj 75struct meta_obj
75{ 76{
@@ -284,6 +285,214 @@ META_COLLECTOR(int_rtiif)
284} 285}
285 286
286/************************************************************************** 287/**************************************************************************
288 * Socket Attributes
289 **************************************************************************/
290
291#define SKIP_NONLOCAL(skb) \
292 if (unlikely(skb->sk == NULL)) { \
293 *err = -1; \
294 return; \
295 }
296
297META_COLLECTOR(int_sk_family)
298{
299 SKIP_NONLOCAL(skb);
300 dst->value = skb->sk->sk_family;
301}
302
303META_COLLECTOR(int_sk_state)
304{
305 SKIP_NONLOCAL(skb);
306 dst->value = skb->sk->sk_state;
307}
308
309META_COLLECTOR(int_sk_reuse)
310{
311 SKIP_NONLOCAL(skb);
312 dst->value = skb->sk->sk_reuse;
313}
314
315META_COLLECTOR(int_sk_bound_if)
316{
317 SKIP_NONLOCAL(skb);
318 /* No error if bound_dev_if is 0, legal userspace check */
319 dst->value = skb->sk->sk_bound_dev_if;
320}
321
322META_COLLECTOR(var_sk_bound_if)
323{
324 SKIP_NONLOCAL(skb);
325
326 if (skb->sk->sk_bound_dev_if == 0) {
327 dst->value = (unsigned long) "any";
328 dst->len = 3;
329 } else {
330 struct net_device *dev;
331
332 dev = dev_get_by_index(skb->sk->sk_bound_dev_if);
333 *err = var_dev(dev, dst);
334 if (dev)
335 dev_put(dev);
336 }
337}
338
339META_COLLECTOR(int_sk_refcnt)
340{
341 SKIP_NONLOCAL(skb);
342 dst->value = atomic_read(&skb->sk->sk_refcnt);
343}
344
345META_COLLECTOR(int_sk_rcvbuf)
346{
347 SKIP_NONLOCAL(skb);
348 dst->value = skb->sk->sk_rcvbuf;
349}
350
351META_COLLECTOR(int_sk_shutdown)
352{
353 SKIP_NONLOCAL(skb);
354 dst->value = skb->sk->sk_shutdown;
355}
356
357META_COLLECTOR(int_sk_proto)
358{
359 SKIP_NONLOCAL(skb);
360 dst->value = skb->sk->sk_protocol;
361}
362
363META_COLLECTOR(int_sk_type)
364{
365 SKIP_NONLOCAL(skb);
366 dst->value = skb->sk->sk_type;
367}
368
369META_COLLECTOR(int_sk_rmem_alloc)
370{
371 SKIP_NONLOCAL(skb);
372 dst->value = atomic_read(&skb->sk->sk_rmem_alloc);
373}
374
375META_COLLECTOR(int_sk_wmem_alloc)
376{
377 SKIP_NONLOCAL(skb);
378 dst->value = atomic_read(&skb->sk->sk_wmem_alloc);
379}
380
381META_COLLECTOR(int_sk_omem_alloc)
382{
383 SKIP_NONLOCAL(skb);
384 dst->value = atomic_read(&skb->sk->sk_omem_alloc);
385}
386
387META_COLLECTOR(int_sk_rcv_qlen)
388{
389 SKIP_NONLOCAL(skb);
390 dst->value = skb->sk->sk_receive_queue.qlen;
391}
392
393META_COLLECTOR(int_sk_snd_qlen)
394{
395 SKIP_NONLOCAL(skb);
396 dst->value = skb->sk->sk_write_queue.qlen;
397}
398
399META_COLLECTOR(int_sk_wmem_queued)
400{
401 SKIP_NONLOCAL(skb);
402 dst->value = skb->sk->sk_wmem_queued;
403}
404
405META_COLLECTOR(int_sk_fwd_alloc)
406{
407 SKIP_NONLOCAL(skb);
408 dst->value = skb->sk->sk_forward_alloc;
409}
410
411META_COLLECTOR(int_sk_sndbuf)
412{
413 SKIP_NONLOCAL(skb);
414 dst->value = skb->sk->sk_sndbuf;
415}
416
417META_COLLECTOR(int_sk_alloc)
418{
419 SKIP_NONLOCAL(skb);
420 dst->value = skb->sk->sk_allocation;
421}
422
423META_COLLECTOR(int_sk_route_caps)
424{
425 SKIP_NONLOCAL(skb);
426 dst->value = skb->sk->sk_route_caps;
427}
428
429META_COLLECTOR(int_sk_hashent)
430{
431 SKIP_NONLOCAL(skb);
432 dst->value = skb->sk->sk_hashent;
433}
434
435META_COLLECTOR(int_sk_lingertime)
436{
437 SKIP_NONLOCAL(skb);
438 dst->value = skb->sk->sk_lingertime / HZ;
439}
440
441META_COLLECTOR(int_sk_err_qlen)
442{
443 SKIP_NONLOCAL(skb);
444 dst->value = skb->sk->sk_error_queue.qlen;
445}
446
447META_COLLECTOR(int_sk_ack_bl)
448{
449 SKIP_NONLOCAL(skb);
450 dst->value = skb->sk->sk_ack_backlog;
451}
452
453META_COLLECTOR(int_sk_max_ack_bl)
454{
455 SKIP_NONLOCAL(skb);
456 dst->value = skb->sk->sk_max_ack_backlog;
457}
458
459META_COLLECTOR(int_sk_prio)
460{
461 SKIP_NONLOCAL(skb);
462 dst->value = skb->sk->sk_priority;
463}
464
465META_COLLECTOR(int_sk_rcvlowat)
466{
467 SKIP_NONLOCAL(skb);
468 dst->value = skb->sk->sk_rcvlowat;
469}
470
471META_COLLECTOR(int_sk_rcvtimeo)
472{
473 SKIP_NONLOCAL(skb);
474 dst->value = skb->sk->sk_rcvtimeo / HZ;
475}
476
477META_COLLECTOR(int_sk_sndtimeo)
478{
479 SKIP_NONLOCAL(skb);
480 dst->value = skb->sk->sk_sndtimeo / HZ;
481}
482
483META_COLLECTOR(int_sk_sendmsg_off)
484{
485 SKIP_NONLOCAL(skb);
486 dst->value = skb->sk->sk_sndmsg_off;
487}
488
489META_COLLECTOR(int_sk_write_pend)
490{
491 SKIP_NONLOCAL(skb);
492 dst->value = skb->sk->sk_write_pending;
493}
494
495/**************************************************************************
287 * Meta value collectors assignment table 496 * Meta value collectors assignment table
288 **************************************************************************/ 497 **************************************************************************/
289 498
@@ -293,41 +502,75 @@ struct meta_ops
293 struct meta_value *, struct meta_obj *, int *); 502 struct meta_value *, struct meta_obj *, int *);
294}; 503};
295 504
505#define META_ID(name) TCF_META_ID_##name
506#define META_FUNC(name) { .get = meta_##name }
507
296/* Meta value operations table listing all meta value collectors and 508/* Meta value operations table listing all meta value collectors and
297 * assigns them to a type and meta id. */ 509 * assigns them to a type and meta id. */
298static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { 510static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
299 [TCF_META_TYPE_VAR] = { 511 [TCF_META_TYPE_VAR] = {
300 [TCF_META_ID_DEV] = { .get = meta_var_dev }, 512 [META_ID(DEV)] = META_FUNC(var_dev),
301 [TCF_META_ID_INDEV] = { .get = meta_var_indev }, 513 [META_ID(INDEV)] = META_FUNC(var_indev),
302 [TCF_META_ID_REALDEV] = { .get = meta_var_realdev } 514 [META_ID(REALDEV)] = META_FUNC(var_realdev),
515 [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if),
303 }, 516 },
304 [TCF_META_TYPE_INT] = { 517 [TCF_META_TYPE_INT] = {
305 [TCF_META_ID_RANDOM] = { .get = meta_int_random }, 518 [META_ID(RANDOM)] = META_FUNC(int_random),
306 [TCF_META_ID_LOADAVG_0] = { .get = meta_int_loadavg_0 }, 519 [META_ID(LOADAVG_0)] = META_FUNC(int_loadavg_0),
307 [TCF_META_ID_LOADAVG_1] = { .get = meta_int_loadavg_1 }, 520 [META_ID(LOADAVG_1)] = META_FUNC(int_loadavg_1),
308 [TCF_META_ID_LOADAVG_2] = { .get = meta_int_loadavg_2 }, 521 [META_ID(LOADAVG_2)] = META_FUNC(int_loadavg_2),
309 [TCF_META_ID_DEV] = { .get = meta_int_dev }, 522 [META_ID(DEV)] = META_FUNC(int_dev),
310 [TCF_META_ID_INDEV] = { .get = meta_int_indev }, 523 [META_ID(INDEV)] = META_FUNC(int_indev),
311 [TCF_META_ID_REALDEV] = { .get = meta_int_realdev }, 524 [META_ID(REALDEV)] = META_FUNC(int_realdev),
312 [TCF_META_ID_PRIORITY] = { .get = meta_int_priority }, 525 [META_ID(PRIORITY)] = META_FUNC(int_priority),
313 [TCF_META_ID_PROTOCOL] = { .get = meta_int_protocol }, 526 [META_ID(PROTOCOL)] = META_FUNC(int_protocol),
314 [TCF_META_ID_SECURITY] = { .get = meta_int_security }, 527 [META_ID(SECURITY)] = META_FUNC(int_security),
315 [TCF_META_ID_PKTTYPE] = { .get = meta_int_pkttype }, 528 [META_ID(PKTTYPE)] = META_FUNC(int_pkttype),
316 [TCF_META_ID_PKTLEN] = { .get = meta_int_pktlen }, 529 [META_ID(PKTLEN)] = META_FUNC(int_pktlen),
317 [TCF_META_ID_DATALEN] = { .get = meta_int_datalen }, 530 [META_ID(DATALEN)] = META_FUNC(int_datalen),
318 [TCF_META_ID_MACLEN] = { .get = meta_int_maclen }, 531 [META_ID(MACLEN)] = META_FUNC(int_maclen),
319#ifdef CONFIG_NETFILTER 532#ifdef CONFIG_NETFILTER
320 [TCF_META_ID_NFMARK] = { .get = meta_int_nfmark }, 533 [META_ID(NFMARK)] = META_FUNC(int_nfmark),
321#endif 534#endif
322 [TCF_META_ID_TCINDEX] = { .get = meta_int_tcindex }, 535 [META_ID(TCINDEX)] = META_FUNC(int_tcindex),
323#ifdef CONFIG_NET_CLS_ACT 536#ifdef CONFIG_NET_CLS_ACT
324 [TCF_META_ID_TCVERDICT] = { .get = meta_int_tcverd }, 537 [META_ID(TCVERDICT)] = META_FUNC(int_tcverd),
325 [TCF_META_ID_TCCLASSID] = { .get = meta_int_tcclassid }, 538 [META_ID(TCCLASSID)] = META_FUNC(int_tcclassid),
326#endif 539#endif
327#ifdef CONFIG_NET_CLS_ROUTE 540#ifdef CONFIG_NET_CLS_ROUTE
328 [TCF_META_ID_RTCLASSID] = { .get = meta_int_rtclassid }, 541 [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid),
329#endif 542#endif
330 [TCF_META_ID_RTIIF] = { .get = meta_int_rtiif } 543 [META_ID(RTIIF)] = META_FUNC(int_rtiif),
544 [META_ID(SK_FAMILY)] = META_FUNC(int_sk_family),
545 [META_ID(SK_STATE)] = META_FUNC(int_sk_state),
546 [META_ID(SK_REUSE)] = META_FUNC(int_sk_reuse),
547 [META_ID(SK_BOUND_IF)] = META_FUNC(int_sk_bound_if),
548 [META_ID(SK_REFCNT)] = META_FUNC(int_sk_refcnt),
549 [META_ID(SK_RCVBUF)] = META_FUNC(int_sk_rcvbuf),
550 [META_ID(SK_SNDBUF)] = META_FUNC(int_sk_sndbuf),
551 [META_ID(SK_SHUTDOWN)] = META_FUNC(int_sk_shutdown),
552 [META_ID(SK_PROTO)] = META_FUNC(int_sk_proto),
553 [META_ID(SK_TYPE)] = META_FUNC(int_sk_type),
554 [META_ID(SK_RMEM_ALLOC)] = META_FUNC(int_sk_rmem_alloc),
555 [META_ID(SK_WMEM_ALLOC)] = META_FUNC(int_sk_wmem_alloc),
556 [META_ID(SK_OMEM_ALLOC)] = META_FUNC(int_sk_omem_alloc),
557 [META_ID(SK_WMEM_QUEUED)] = META_FUNC(int_sk_wmem_queued),
558 [META_ID(SK_RCV_QLEN)] = META_FUNC(int_sk_rcv_qlen),
559 [META_ID(SK_SND_QLEN)] = META_FUNC(int_sk_snd_qlen),
560 [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen),
561 [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc),
562 [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc),
563 [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps),
564 [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent),
565 [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime),
566 [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl),
567 [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl),
568 [META_ID(SK_PRIO)] = META_FUNC(int_sk_prio),
569 [META_ID(SK_RCVLOWAT)] = META_FUNC(int_sk_rcvlowat),
570 [META_ID(SK_RCVTIMEO)] = META_FUNC(int_sk_rcvtimeo),
571 [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo),
572 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
573 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
331 } 574 }
332}; 575};
333 576
@@ -396,9 +639,9 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
396 /* Let gcc optimize it, the unlikely is not really based on 639 /* Let gcc optimize it, the unlikely is not really based on
397 * some numbers but jump free code for mismatches seems 640 * some numbers but jump free code for mismatches seems
398 * more logical. */ 641 * more logical. */
399 if (unlikely(a == b)) 642 if (unlikely(a->value == b->value))
400 return 0; 643 return 0;
401 else if (a < b) 644 else if (a->value < b->value)
402 return -1; 645 return -1;
403 else 646 else
404 return 1; 647 return 1;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 07977f8f2679..97c1c75d5c78 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -760,15 +760,14 @@ graft:
760} 760}
761 761
762static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 762static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
763 u32 pid, u32 seq, unsigned flags, int event) 763 u32 pid, u32 seq, u16 flags, int event)
764{ 764{
765 struct tcmsg *tcm; 765 struct tcmsg *tcm;
766 struct nlmsghdr *nlh; 766 struct nlmsghdr *nlh;
767 unsigned char *b = skb->tail; 767 unsigned char *b = skb->tail;
768 struct gnet_dump d; 768 struct gnet_dump d;
769 769
770 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 770 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
771 nlh->nlmsg_flags = flags;
772 tcm = NLMSG_DATA(nlh); 771 tcm = NLMSG_DATA(nlh);
773 tcm->tcm_family = AF_UNSPEC; 772 tcm->tcm_family = AF_UNSPEC;
774 tcm->tcm_ifindex = q->dev->ifindex; 773 tcm->tcm_ifindex = q->dev->ifindex;
@@ -997,7 +996,7 @@ out:
997 996
998static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 997static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
999 unsigned long cl, 998 unsigned long cl,
1000 u32 pid, u32 seq, unsigned flags, int event) 999 u32 pid, u32 seq, u16 flags, int event)
1001{ 1000{
1002 struct tcmsg *tcm; 1001 struct tcmsg *tcm;
1003 struct nlmsghdr *nlh; 1002 struct nlmsghdr *nlh;
@@ -1005,8 +1004,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1005 struct gnet_dump d; 1004 struct gnet_dump d;
1006 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1005 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1007 1006
1008 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 1007 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1009 nlh->nlmsg_flags = flags;
1010 tcm = NLMSG_DATA(nlh); 1008 tcm = NLMSG_DATA(nlh);
1011 tcm->tcm_family = AF_UNSPEC; 1009 tcm->tcm_family = AF_UNSPEC;
1012 tcm->tcm_ifindex = q->dev->ifindex; 1010 tcm->tcm_ifindex = q->dev->ifindex;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 8a3db9d95bab..13e0e7b3856b 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -18,7 +18,7 @@
18#include <asm/byteorder.h> 18#include <asm/byteorder.h>
19 19
20 20
21#if 1 /* control */ 21#if 0 /* control */
22#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) 22#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
23#else 23#else
24#define DPRINTK(format,args...) 24#define DPRINTK(format,args...)
@@ -31,7 +31,7 @@
31#endif 31#endif
32 32
33 33
34#define PRIV(sch) qdisc_priv(sch) 34#define PRIV(sch) ((struct dsmark_qdisc_data *) qdisc_priv(sch))
35 35
36 36
37/* 37/*
@@ -55,145 +55,163 @@
55struct dsmark_qdisc_data { 55struct dsmark_qdisc_data {
56 struct Qdisc *q; 56 struct Qdisc *q;
57 struct tcf_proto *filter_list; 57 struct tcf_proto *filter_list;
58 __u8 *mask; /* "owns" the array */ 58 u8 *mask; /* "owns" the array */
59 __u8 *value; 59 u8 *value;
60 __u16 indices; 60 u16 indices;
61 __u32 default_index; /* index range is 0...0xffff */ 61 u32 default_index; /* index range is 0...0xffff */
62 int set_tc_index; 62 int set_tc_index;
63}; 63};
64 64
65static inline int dsmark_valid_indices(u16 indices)
66{
67 while (indices != 1) {
68 if (indices & 1)
69 return 0;
70 indices >>= 1;
71 }
72
73 return 1;
74}
65 75
66/* ------------------------- Class/flow operations ------------------------- */ 76static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
77{
78 return (index <= p->indices && index > 0);
79}
67 80
81/* ------------------------- Class/flow operations ------------------------- */
68 82
69static int dsmark_graft(struct Qdisc *sch,unsigned long arg, 83static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
70 struct Qdisc *new,struct Qdisc **old) 84 struct Qdisc *new, struct Qdisc **old)
71{ 85{
72 struct dsmark_qdisc_data *p = PRIV(sch); 86 struct dsmark_qdisc_data *p = PRIV(sch);
73 87
74 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new, 88 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
75 old); 89 sch, p, new, old);
76 if (!new) 90
77 new = &noop_qdisc; 91 if (new == NULL) {
92 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
93 if (new == NULL)
94 new = &noop_qdisc;
95 }
96
78 sch_tree_lock(sch); 97 sch_tree_lock(sch);
79 *old = xchg(&p->q,new); 98 *old = xchg(&p->q, new);
80 if (*old) 99 qdisc_reset(*old);
81 qdisc_reset(*old);
82 sch->q.qlen = 0; 100 sch->q.qlen = 0;
83 sch_tree_unlock(sch); /* @@@ move up ? */ 101 sch_tree_unlock(sch);
102
84 return 0; 103 return 0;
85} 104}
86 105
87
88static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) 106static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
89{ 107{
90 struct dsmark_qdisc_data *p = PRIV(sch); 108 return PRIV(sch)->q;
91
92 return p->q;
93} 109}
94 110
95 111static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
96static unsigned long dsmark_get(struct Qdisc *sch,u32 classid)
97{ 112{
98 struct dsmark_qdisc_data *p __attribute__((unused)) = PRIV(sch); 113 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
114 sch, PRIV(sch), classid);
99 115
100 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid); 116 return TC_H_MIN(classid) + 1;
101 return TC_H_MIN(classid)+1;
102} 117}
103 118
104
105static unsigned long dsmark_bind_filter(struct Qdisc *sch, 119static unsigned long dsmark_bind_filter(struct Qdisc *sch,
106 unsigned long parent, u32 classid) 120 unsigned long parent, u32 classid)
107{ 121{
108 return dsmark_get(sch,classid); 122 return dsmark_get(sch, classid);
109} 123}
110 124
111
112static void dsmark_put(struct Qdisc *sch, unsigned long cl) 125static void dsmark_put(struct Qdisc *sch, unsigned long cl)
113{ 126{
114} 127}
115 128
116
117static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, 129static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
118 struct rtattr **tca, unsigned long *arg) 130 struct rtattr **tca, unsigned long *arg)
119{ 131{
120 struct dsmark_qdisc_data *p = PRIV(sch); 132 struct dsmark_qdisc_data *p = PRIV(sch);
121 struct rtattr *opt = tca[TCA_OPTIONS-1]; 133 struct rtattr *opt = tca[TCA_OPTIONS-1];
122 struct rtattr *tb[TCA_DSMARK_MAX]; 134 struct rtattr *tb[TCA_DSMARK_MAX];
135 int err = -EINVAL;
136 u8 mask = 0;
123 137
124 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," 138 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
125 "arg 0x%lx\n",sch,p,classid,parent,*arg); 139 "arg 0x%lx\n", sch, p, classid, parent, *arg);
126 if (*arg > p->indices) 140
127 return -ENOENT; 141 if (!dsmark_valid_index(p, *arg)) {
128 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt)) 142 err = -ENOENT;
129 return -EINVAL; 143 goto rtattr_failure;
130 if (tb[TCA_DSMARK_MASK-1]) {
131 if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK-1]))
132 return -EINVAL;
133 p->mask[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_MASK-1]);
134 }
135 if (tb[TCA_DSMARK_VALUE-1]) {
136 if (!RTA_PAYLOAD(tb[TCA_DSMARK_VALUE-1]))
137 return -EINVAL;
138 p->value[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_VALUE-1]);
139 } 144 }
140 return 0;
141}
142 145
146 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
147 goto rtattr_failure;
148
149 if (tb[TCA_DSMARK_MASK-1])
150 mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]);
151
152 if (tb[TCA_DSMARK_VALUE-1])
153 p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]);
154
155 if (tb[TCA_DSMARK_MASK-1])
156 p->mask[*arg-1] = mask;
143 157
144static int dsmark_delete(struct Qdisc *sch,unsigned long arg) 158 err = 0;
159
160rtattr_failure:
161 return err;
162}
163
164static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
145{ 165{
146 struct dsmark_qdisc_data *p = PRIV(sch); 166 struct dsmark_qdisc_data *p = PRIV(sch);
147 167
148 if (!arg || arg > p->indices) 168 if (!dsmark_valid_index(p, arg))
149 return -EINVAL; 169 return -EINVAL;
170
150 p->mask[arg-1] = 0xff; 171 p->mask[arg-1] = 0xff;
151 p->value[arg-1] = 0; 172 p->value[arg-1] = 0;
173
152 return 0; 174 return 0;
153} 175}
154 176
155
156static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker) 177static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
157{ 178{
158 struct dsmark_qdisc_data *p = PRIV(sch); 179 struct dsmark_qdisc_data *p = PRIV(sch);
159 int i; 180 int i;
160 181
161 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker); 182 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
183
162 if (walker->stop) 184 if (walker->stop)
163 return; 185 return;
186
164 for (i = 0; i < p->indices; i++) { 187 for (i = 0; i < p->indices; i++) {
165 if (p->mask[i] == 0xff && !p->value[i]) 188 if (p->mask[i] == 0xff && !p->value[i])
166 continue; 189 goto ignore;
167 if (walker->count >= walker->skip) { 190 if (walker->count >= walker->skip) {
168 if (walker->fn(sch, i+1, walker) < 0) { 191 if (walker->fn(sch, i+1, walker) < 0) {
169 walker->stop = 1; 192 walker->stop = 1;
170 break; 193 break;
171 } 194 }
172 } 195 }
173 walker->count++; 196ignore:
197 walker->count++;
174 } 198 }
175} 199}
176 200
177
178static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl) 201static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
179{ 202{
180 struct dsmark_qdisc_data *p = PRIV(sch); 203 return &PRIV(sch)->filter_list;
181
182 return &p->filter_list;
183} 204}
184 205
185
186/* --------------------------- Qdisc operations ---------------------------- */ 206/* --------------------------- Qdisc operations ---------------------------- */
187 207
188
189static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) 208static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
190{ 209{
191 struct dsmark_qdisc_data *p = PRIV(sch); 210 struct dsmark_qdisc_data *p = PRIV(sch);
192 struct tcf_result res; 211 int err;
193 int result; 212
194 int ret = NET_XMIT_POLICED; 213 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
195 214
196 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
197 if (p->set_tc_index) { 215 if (p->set_tc_index) {
198 /* FIXME: Safe with non-linear skbs? --RR */ 216 /* FIXME: Safe with non-linear skbs? --RR */
199 switch (skb->protocol) { 217 switch (skb->protocol) {
@@ -210,17 +228,21 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
210 break; 228 break;
211 }; 229 };
212 } 230 }
213 result = TC_POLICE_OK; /* be nice to gcc */ 231
214 if (TC_H_MAJ(skb->priority) == sch->handle) { 232 if (TC_H_MAJ(skb->priority) == sch->handle)
215 skb->tc_index = TC_H_MIN(skb->priority); 233 skb->tc_index = TC_H_MIN(skb->priority);
216 } else { 234 else {
217 result = tc_classify(skb,p->filter_list,&res); 235 struct tcf_result res;
218 D2PRINTK("result %d class 0x%04x\n",result,res.classid); 236 int result = tc_classify(skb, p->filter_list, &res);
237
238 D2PRINTK("result %d class 0x%04x\n", result, res.classid);
239
219 switch (result) { 240 switch (result) {
220#ifdef CONFIG_NET_CLS_POLICE 241#ifdef CONFIG_NET_CLS_POLICE
221 case TC_POLICE_SHOT: 242 case TC_POLICE_SHOT:
222 kfree_skb(skb); 243 kfree_skb(skb);
223 break; 244 sch->qstats.drops++;
245 return NET_XMIT_POLICED;
224#if 0 246#if 0
225 case TC_POLICE_RECLASSIFY: 247 case TC_POLICE_RECLASSIFY:
226 /* FIXME: what to do here ??? */ 248 /* FIXME: what to do here ??? */
@@ -237,43 +259,45 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
237 break; 259 break;
238 }; 260 };
239 } 261 }
240 if (
241#ifdef CONFIG_NET_CLS_POLICE
242 result == TC_POLICE_SHOT ||
243#endif
244 262
245 ((ret = p->q->enqueue(skb,p->q)) != 0)) { 263 err = p->q->enqueue(skb,p->q);
264 if (err != NET_XMIT_SUCCESS) {
246 sch->qstats.drops++; 265 sch->qstats.drops++;
247 return ret; 266 return err;
248 } 267 }
268
249 sch->bstats.bytes += skb->len; 269 sch->bstats.bytes += skb->len;
250 sch->bstats.packets++; 270 sch->bstats.packets++;
251 sch->q.qlen++; 271 sch->q.qlen++;
252 return ret;
253}
254 272
273 return NET_XMIT_SUCCESS;
274}
255 275
256static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) 276static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
257{ 277{
258 struct dsmark_qdisc_data *p = PRIV(sch); 278 struct dsmark_qdisc_data *p = PRIV(sch);
259 struct sk_buff *skb; 279 struct sk_buff *skb;
260 int index; 280 u32 index;
281
282 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
261 283
262 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n",sch,p);
263 skb = p->q->ops->dequeue(p->q); 284 skb = p->q->ops->dequeue(p->q);
264 if (!skb) 285 if (skb == NULL)
265 return NULL; 286 return NULL;
287
266 sch->q.qlen--; 288 sch->q.qlen--;
267 index = skb->tc_index & (p->indices-1); 289
268 D2PRINTK("index %d->%d\n",skb->tc_index,index); 290 index = skb->tc_index & (p->indices - 1);
291 D2PRINTK("index %d->%d\n", skb->tc_index, index);
292
269 switch (skb->protocol) { 293 switch (skb->protocol) {
270 case __constant_htons(ETH_P_IP): 294 case __constant_htons(ETH_P_IP):
271 ipv4_change_dsfield(skb->nh.iph, 295 ipv4_change_dsfield(skb->nh.iph, p->mask[index],
272 p->mask[index],p->value[index]); 296 p->value[index]);
273 break; 297 break;
274 case __constant_htons(ETH_P_IPV6): 298 case __constant_htons(ETH_P_IPV6):
275 ipv6_change_dsfield(skb->nh.ipv6h, 299 ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index],
276 p->mask[index],p->value[index]); 300 p->value[index]);
277 break; 301 break;
278 default: 302 default:
279 /* 303 /*
@@ -287,152 +311,162 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
287 htons(skb->protocol)); 311 htons(skb->protocol));
288 break; 312 break;
289 }; 313 };
314
290 return skb; 315 return skb;
291} 316}
292 317
293
294static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch) 318static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
295{ 319{
296 int ret;
297 struct dsmark_qdisc_data *p = PRIV(sch); 320 struct dsmark_qdisc_data *p = PRIV(sch);
321 int err;
298 322
299 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); 323 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
300 if ((ret = p->q->ops->requeue(skb, p->q)) == 0) { 324
301 sch->q.qlen++; 325 err = p->q->ops->requeue(skb, p->q);
302 sch->qstats.requeues++; 326 if (err != NET_XMIT_SUCCESS) {
303 return 0; 327 sch->qstats.drops++;
328 return err;
304 } 329 }
305 sch->qstats.drops++;
306 return ret;
307}
308 330
331 sch->q.qlen++;
332 sch->qstats.requeues++;
333
334 return NET_XMIT_SUCCESS;
335}
309 336
310static unsigned int dsmark_drop(struct Qdisc *sch) 337static unsigned int dsmark_drop(struct Qdisc *sch)
311{ 338{
312 struct dsmark_qdisc_data *p = PRIV(sch); 339 struct dsmark_qdisc_data *p = PRIV(sch);
313 unsigned int len; 340 unsigned int len;
314 341
315 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); 342 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
316 if (!p->q->ops->drop) 343
317 return 0; 344 if (p->q->ops->drop == NULL)
318 if (!(len = p->q->ops->drop(p->q)))
319 return 0; 345 return 0;
320 sch->q.qlen--; 346
347 len = p->q->ops->drop(p->q);
348 if (len)
349 sch->q.qlen--;
350
321 return len; 351 return len;
322} 352}
323 353
324 354static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
325static int dsmark_init(struct Qdisc *sch,struct rtattr *opt)
326{ 355{
327 struct dsmark_qdisc_data *p = PRIV(sch); 356 struct dsmark_qdisc_data *p = PRIV(sch);
328 struct rtattr *tb[TCA_DSMARK_MAX]; 357 struct rtattr *tb[TCA_DSMARK_MAX];
329 __u16 tmp; 358 int err = -EINVAL;
330 359 u32 default_index = NO_DEFAULT_INDEX;
331 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); 360 u16 indices;
332 if (!opt || 361 u8 *mask;
333 rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 || 362
334 !tb[TCA_DSMARK_INDICES-1] || 363 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
335 RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16)) 364
336 return -EINVAL; 365 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0)
337 p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]); 366 goto errout;
338 if (!p->indices) 367
339 return -EINVAL; 368 indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]);
340 for (tmp = p->indices; tmp != 1; tmp >>= 1) { 369 if (!indices || !dsmark_valid_indices(indices))
341 if (tmp & 1) 370 goto errout;
342 return -EINVAL; 371
343 } 372 if (tb[TCA_DSMARK_DEFAULT_INDEX-1])
344 p->default_index = NO_DEFAULT_INDEX; 373 default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
345 if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) { 374
346 if (RTA_PAYLOAD(tb[TCA_DSMARK_DEFAULT_INDEX-1]) < sizeof(__u16)) 375 mask = kmalloc(indices * 2, GFP_KERNEL);
347 return -EINVAL; 376 if (mask == NULL) {
348 p->default_index = 377 err = -ENOMEM;
349 *(__u16 *) RTA_DATA(tb[TCA_DSMARK_DEFAULT_INDEX-1]); 378 goto errout;
350 } 379 }
351 p->set_tc_index = !!tb[TCA_DSMARK_SET_TC_INDEX-1]; 380
352 p->mask = kmalloc(p->indices*2,GFP_KERNEL); 381 p->mask = mask;
353 if (!p->mask) 382 memset(p->mask, 0xff, indices);
354 return -ENOMEM; 383
355 p->value = p->mask+p->indices; 384 p->value = p->mask + indices;
356 memset(p->mask,0xff,p->indices); 385 memset(p->value, 0, indices);
357 memset(p->value,0,p->indices); 386
358 if (!(p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) 387 p->indices = indices;
388 p->default_index = default_index;
389 p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]);
390
391 p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
392 if (p->q == NULL)
359 p->q = &noop_qdisc; 393 p->q = &noop_qdisc;
360 DPRINTK("dsmark_init: qdisc %p\n",&p->q);
361 return 0;
362}
363 394
395 DPRINTK("dsmark_init: qdisc %p\n", p->q);
396
397 err = 0;
398errout:
399rtattr_failure:
400 return err;
401}
364 402
365static void dsmark_reset(struct Qdisc *sch) 403static void dsmark_reset(struct Qdisc *sch)
366{ 404{
367 struct dsmark_qdisc_data *p = PRIV(sch); 405 struct dsmark_qdisc_data *p = PRIV(sch);
368 406
369 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); 407 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
370 qdisc_reset(p->q); 408 qdisc_reset(p->q);
371 sch->q.qlen = 0; 409 sch->q.qlen = 0;
372} 410}
373 411
374
375static void dsmark_destroy(struct Qdisc *sch) 412static void dsmark_destroy(struct Qdisc *sch)
376{ 413{
377 struct dsmark_qdisc_data *p = PRIV(sch); 414 struct dsmark_qdisc_data *p = PRIV(sch);
378 struct tcf_proto *tp; 415 struct tcf_proto *tp;
379 416
380 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n",sch,p); 417 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
418
381 while (p->filter_list) { 419 while (p->filter_list) {
382 tp = p->filter_list; 420 tp = p->filter_list;
383 p->filter_list = tp->next; 421 p->filter_list = tp->next;
384 tcf_destroy(tp); 422 tcf_destroy(tp);
385 } 423 }
424
386 qdisc_destroy(p->q); 425 qdisc_destroy(p->q);
387 kfree(p->mask); 426 kfree(p->mask);
388} 427}
389 428
390
391static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, 429static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
392 struct sk_buff *skb, struct tcmsg *tcm) 430 struct sk_buff *skb, struct tcmsg *tcm)
393{ 431{
394 struct dsmark_qdisc_data *p = PRIV(sch); 432 struct dsmark_qdisc_data *p = PRIV(sch);
395 unsigned char *b = skb->tail; 433 struct rtattr *opts = NULL;
396 struct rtattr *rta; 434
435 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
397 436
398 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n",sch,p,cl); 437 if (!dsmark_valid_index(p, cl))
399 if (!cl || cl > p->indices)
400 return -EINVAL; 438 return -EINVAL;
401 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),cl-1); 439
402 rta = (struct rtattr *) b; 440 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
403 RTA_PUT(skb,TCA_OPTIONS,0,NULL); 441
404 RTA_PUT(skb,TCA_DSMARK_MASK,1,&p->mask[cl-1]); 442 opts = RTA_NEST(skb, TCA_OPTIONS);
405 RTA_PUT(skb,TCA_DSMARK_VALUE,1,&p->value[cl-1]); 443 RTA_PUT_U8(skb,TCA_DSMARK_MASK, p->mask[cl-1]);
406 rta->rta_len = skb->tail-b; 444 RTA_PUT_U8(skb,TCA_DSMARK_VALUE, p->value[cl-1]);
407 return skb->len; 445
446 return RTA_NEST_END(skb, opts);
408 447
409rtattr_failure: 448rtattr_failure:
410 skb_trim(skb,b-skb->data); 449 return RTA_NEST_CANCEL(skb, opts);
411 return -1;
412} 450}
413 451
414static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) 452static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
415{ 453{
416 struct dsmark_qdisc_data *p = PRIV(sch); 454 struct dsmark_qdisc_data *p = PRIV(sch);
417 unsigned char *b = skb->tail; 455 struct rtattr *opts = NULL;
418 struct rtattr *rta;
419 456
420 rta = (struct rtattr *) b; 457 opts = RTA_NEST(skb, TCA_OPTIONS);
421 RTA_PUT(skb,TCA_OPTIONS,0,NULL); 458 RTA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
422 RTA_PUT(skb,TCA_DSMARK_INDICES,sizeof(__u16),&p->indices); 459
423 if (p->default_index != NO_DEFAULT_INDEX) { 460 if (p->default_index != NO_DEFAULT_INDEX)
424 __u16 tmp = p->default_index; 461 RTA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
425 462
426 RTA_PUT(skb,TCA_DSMARK_DEFAULT_INDEX, sizeof(__u16), &tmp);
427 }
428 if (p->set_tc_index) 463 if (p->set_tc_index)
429 RTA_PUT(skb, TCA_DSMARK_SET_TC_INDEX, 0, NULL); 464 RTA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
430 rta->rta_len = skb->tail-b; 465
431 return skb->len; 466 return RTA_NEST_END(skb, opts);
432 467
433rtattr_failure: 468rtattr_failure:
434 skb_trim(skb,b-skb->data); 469 return RTA_NEST_CANCEL(skb, opts);
435 return -1;
436} 470}
437 471
438static struct Qdisc_class_ops dsmark_class_ops = { 472static struct Qdisc_class_ops dsmark_class_ops = {
@@ -470,10 +504,13 @@ static int __init dsmark_module_init(void)
470{ 504{
471 return register_qdisc(&dsmark_qdisc_ops); 505 return register_qdisc(&dsmark_qdisc_ops);
472} 506}
507
473static void __exit dsmark_module_exit(void) 508static void __exit dsmark_module_exit(void)
474{ 509{
475 unregister_qdisc(&dsmark_qdisc_ops); 510 unregister_qdisc(&dsmark_qdisc_ops);
476} 511}
512
477module_init(dsmark_module_init) 513module_init(dsmark_module_init)
478module_exit(dsmark_module_exit) 514module_exit(dsmark_module_exit)
515
479MODULE_LICENSE("GPL"); 516MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 4888305c96da..033083bf0e74 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -11,131 +11,38 @@
11 11
12#include <linux/config.h> 12#include <linux/config.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h> 14#include <linux/types.h>
18#include <linux/kernel.h> 15#include <linux/kernel.h>
19#include <linux/sched.h>
20#include <linux/string.h>
21#include <linux/mm.h>
22#include <linux/socket.h>
23#include <linux/sockios.h>
24#include <linux/in.h>
25#include <linux/errno.h> 16#include <linux/errno.h>
26#include <linux/interrupt.h>
27#include <linux/if_ether.h>
28#include <linux/inet.h>
29#include <linux/netdevice.h> 17#include <linux/netdevice.h>
30#include <linux/etherdevice.h>
31#include <linux/notifier.h>
32#include <net/ip.h>
33#include <net/route.h>
34#include <linux/skbuff.h> 18#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/pkt_sched.h> 19#include <net/pkt_sched.h>
37 20
38/* 1 band FIFO pseudo-"scheduler" */ 21/* 1 band FIFO pseudo-"scheduler" */
39 22
40struct fifo_sched_data 23struct fifo_sched_data
41{ 24{
42 unsigned limit; 25 u32 limit;
43}; 26};
44 27
45static int 28static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
46bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
47{ 29{
48 struct fifo_sched_data *q = qdisc_priv(sch); 30 struct fifo_sched_data *q = qdisc_priv(sch);
49 31
50 if (sch->qstats.backlog + skb->len <= q->limit) { 32 if (likely(sch->qstats.backlog + skb->len <= q->limit))
51 __skb_queue_tail(&sch->q, skb); 33 return qdisc_enqueue_tail(skb, sch);
52 sch->qstats.backlog += skb->len;
53 sch->bstats.bytes += skb->len;
54 sch->bstats.packets++;
55 return 0;
56 }
57 sch->qstats.drops++;
58#ifdef CONFIG_NET_CLS_POLICE
59 if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
60#endif
61 kfree_skb(skb);
62 return NET_XMIT_DROP;
63}
64
65static int
66bfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
67{
68 __skb_queue_head(&sch->q, skb);
69 sch->qstats.backlog += skb->len;
70 sch->qstats.requeues++;
71 return 0;
72}
73
74static struct sk_buff *
75bfifo_dequeue(struct Qdisc* sch)
76{
77 struct sk_buff *skb;
78 34
79 skb = __skb_dequeue(&sch->q); 35 return qdisc_reshape_fail(skb, sch);
80 if (skb)
81 sch->qstats.backlog -= skb->len;
82 return skb;
83} 36}
84 37
85static unsigned int 38static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
86fifo_drop(struct Qdisc* sch)
87{
88 struct sk_buff *skb;
89
90 skb = __skb_dequeue_tail(&sch->q);
91 if (skb) {
92 unsigned int len = skb->len;
93 sch->qstats.backlog -= len;
94 kfree_skb(skb);
95 return len;
96 }
97 return 0;
98}
99
100static void
101fifo_reset(struct Qdisc* sch)
102{
103 skb_queue_purge(&sch->q);
104 sch->qstats.backlog = 0;
105}
106
107static int
108pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
109{ 39{
110 struct fifo_sched_data *q = qdisc_priv(sch); 40 struct fifo_sched_data *q = qdisc_priv(sch);
111 41
112 if (sch->q.qlen < q->limit) { 42 if (likely(skb_queue_len(&sch->q) < q->limit))
113 __skb_queue_tail(&sch->q, skb); 43 return qdisc_enqueue_tail(skb, sch);
114 sch->bstats.bytes += skb->len;
115 sch->bstats.packets++;
116 return 0;
117 }
118 sch->qstats.drops++;
119#ifdef CONFIG_NET_CLS_POLICE
120 if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
121#endif
122 kfree_skb(skb);
123 return NET_XMIT_DROP;
124}
125
126static int
127pfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
128{
129 __skb_queue_head(&sch->q, skb);
130 sch->qstats.requeues++;
131 return 0;
132}
133
134 44
135static struct sk_buff * 45 return qdisc_reshape_fail(skb, sch);
136pfifo_dequeue(struct Qdisc* sch)
137{
138 return __skb_dequeue(&sch->q);
139} 46}
140 47
141static int fifo_init(struct Qdisc *sch, struct rtattr *opt) 48static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -143,66 +50,59 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
143 struct fifo_sched_data *q = qdisc_priv(sch); 50 struct fifo_sched_data *q = qdisc_priv(sch);
144 51
145 if (opt == NULL) { 52 if (opt == NULL) {
146 unsigned int limit = sch->dev->tx_queue_len ? : 1; 53 u32 limit = sch->dev->tx_queue_len ? : 1;
147 54
148 if (sch->ops == &bfifo_qdisc_ops) 55 if (sch->ops == &bfifo_qdisc_ops)
149 q->limit = limit*sch->dev->mtu; 56 limit *= sch->dev->mtu;
150 else 57
151 q->limit = limit; 58 q->limit = limit;
152 } else { 59 } else {
153 struct tc_fifo_qopt *ctl = RTA_DATA(opt); 60 struct tc_fifo_qopt *ctl = RTA_DATA(opt);
154 if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) 61
62 if (RTA_PAYLOAD(opt) < sizeof(*ctl))
155 return -EINVAL; 63 return -EINVAL;
64
156 q->limit = ctl->limit; 65 q->limit = ctl->limit;
157 } 66 }
67
158 return 0; 68 return 0;
159} 69}
160 70
161static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) 71static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
162{ 72{
163 struct fifo_sched_data *q = qdisc_priv(sch); 73 struct fifo_sched_data *q = qdisc_priv(sch);
164 unsigned char *b = skb->tail; 74 struct tc_fifo_qopt opt = { .limit = q->limit };
165 struct tc_fifo_qopt opt;
166 75
167 opt.limit = q->limit;
168 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 76 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
169
170 return skb->len; 77 return skb->len;
171 78
172rtattr_failure: 79rtattr_failure:
173 skb_trim(skb, b - skb->data);
174 return -1; 80 return -1;
175} 81}
176 82
177struct Qdisc_ops pfifo_qdisc_ops = { 83struct Qdisc_ops pfifo_qdisc_ops = {
178 .next = NULL,
179 .cl_ops = NULL,
180 .id = "pfifo", 84 .id = "pfifo",
181 .priv_size = sizeof(struct fifo_sched_data), 85 .priv_size = sizeof(struct fifo_sched_data),
182 .enqueue = pfifo_enqueue, 86 .enqueue = pfifo_enqueue,
183 .dequeue = pfifo_dequeue, 87 .dequeue = qdisc_dequeue_head,
184 .requeue = pfifo_requeue, 88 .requeue = qdisc_requeue,
185 .drop = fifo_drop, 89 .drop = qdisc_queue_drop,
186 .init = fifo_init, 90 .init = fifo_init,
187 .reset = fifo_reset, 91 .reset = qdisc_reset_queue,
188 .destroy = NULL,
189 .change = fifo_init, 92 .change = fifo_init,
190 .dump = fifo_dump, 93 .dump = fifo_dump,
191 .owner = THIS_MODULE, 94 .owner = THIS_MODULE,
192}; 95};
193 96
194struct Qdisc_ops bfifo_qdisc_ops = { 97struct Qdisc_ops bfifo_qdisc_ops = {
195 .next = NULL,
196 .cl_ops = NULL,
197 .id = "bfifo", 98 .id = "bfifo",
198 .priv_size = sizeof(struct fifo_sched_data), 99 .priv_size = sizeof(struct fifo_sched_data),
199 .enqueue = bfifo_enqueue, 100 .enqueue = bfifo_enqueue,
200 .dequeue = bfifo_dequeue, 101 .dequeue = qdisc_dequeue_head,
201 .requeue = bfifo_requeue, 102 .requeue = qdisc_requeue,
202 .drop = fifo_drop, 103 .drop = qdisc_queue_drop,
203 .init = fifo_init, 104 .init = fifo_init,
204 .reset = fifo_reset, 105 .reset = qdisc_reset_queue,
205 .destroy = NULL,
206 .change = fifo_init, 106 .change = fifo_init,
207 .dump = fifo_dump, 107 .dump = fifo_dump,
208 .owner = THIS_MODULE, 108 .owner = THIS_MODULE,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 87e48a4e1051..7683b34dc6a9 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -243,31 +243,27 @@ static void dev_watchdog_down(struct net_device *dev)
243 cheaper. 243 cheaper.
244 */ 244 */
245 245
246static int 246static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
247noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
248{ 247{
249 kfree_skb(skb); 248 kfree_skb(skb);
250 return NET_XMIT_CN; 249 return NET_XMIT_CN;
251} 250}
252 251
253static struct sk_buff * 252static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
254noop_dequeue(struct Qdisc * qdisc)
255{ 253{
256 return NULL; 254 return NULL;
257} 255}
258 256
259static int 257static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
260noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
261{ 258{
262 if (net_ratelimit()) 259 if (net_ratelimit())
263 printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name); 260 printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
261 skb->dev->name);
264 kfree_skb(skb); 262 kfree_skb(skb);
265 return NET_XMIT_CN; 263 return NET_XMIT_CN;
266} 264}
267 265
268struct Qdisc_ops noop_qdisc_ops = { 266struct Qdisc_ops noop_qdisc_ops = {
269 .next = NULL,
270 .cl_ops = NULL,
271 .id = "noop", 267 .id = "noop",
272 .priv_size = 0, 268 .priv_size = 0,
273 .enqueue = noop_enqueue, 269 .enqueue = noop_enqueue,
@@ -285,8 +281,6 @@ struct Qdisc noop_qdisc = {
285}; 281};
286 282
287static struct Qdisc_ops noqueue_qdisc_ops = { 283static struct Qdisc_ops noqueue_qdisc_ops = {
288 .next = NULL,
289 .cl_ops = NULL,
290 .id = "noqueue", 284 .id = "noqueue",
291 .priv_size = 0, 285 .priv_size = 0,
292 .enqueue = noop_enqueue, 286 .enqueue = noop_enqueue,
@@ -311,97 +305,87 @@ static const u8 prio2band[TC_PRIO_MAX+1] =
311 generic prio+fifo combination. 305 generic prio+fifo combination.
312 */ 306 */
313 307
314static int 308#define PFIFO_FAST_BANDS 3
315pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) 309
310static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
311 struct Qdisc *qdisc)
316{ 312{
317 struct sk_buff_head *list = qdisc_priv(qdisc); 313 struct sk_buff_head *list = qdisc_priv(qdisc);
314 return list + prio2band[skb->priority & TC_PRIO_MAX];
315}
318 316
319 list += prio2band[skb->priority&TC_PRIO_MAX]; 317static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
318{
319 struct sk_buff_head *list = prio2list(skb, qdisc);
320 320
321 if (list->qlen < qdisc->dev->tx_queue_len) { 321 if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
322 __skb_queue_tail(list, skb);
323 qdisc->q.qlen++; 322 qdisc->q.qlen++;
324 qdisc->bstats.bytes += skb->len; 323 return __qdisc_enqueue_tail(skb, qdisc, list);
325 qdisc->bstats.packets++;
326 return 0;
327 } 324 }
328 qdisc->qstats.drops++; 325
329 kfree_skb(skb); 326 return qdisc_drop(skb, qdisc);
330 return NET_XMIT_DROP;
331} 327}
332 328
333static struct sk_buff * 329static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
334pfifo_fast_dequeue(struct Qdisc* qdisc)
335{ 330{
336 int prio; 331 int prio;
337 struct sk_buff_head *list = qdisc_priv(qdisc); 332 struct sk_buff_head *list = qdisc_priv(qdisc);
338 struct sk_buff *skb;
339 333
340 for (prio = 0; prio < 3; prio++, list++) { 334 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++, list++) {
341 skb = __skb_dequeue(list); 335 struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
342 if (skb) { 336 if (skb) {
343 qdisc->q.qlen--; 337 qdisc->q.qlen--;
344 return skb; 338 return skb;
345 } 339 }
346 } 340 }
341
347 return NULL; 342 return NULL;
348} 343}
349 344
350static int 345static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
351pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
352{ 346{
353 struct sk_buff_head *list = qdisc_priv(qdisc);
354
355 list += prio2band[skb->priority&TC_PRIO_MAX];
356
357 __skb_queue_head(list, skb);
358 qdisc->q.qlen++; 347 qdisc->q.qlen++;
359 qdisc->qstats.requeues++; 348 return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
360 return 0;
361} 349}
362 350
363static void 351static void pfifo_fast_reset(struct Qdisc* qdisc)
364pfifo_fast_reset(struct Qdisc* qdisc)
365{ 352{
366 int prio; 353 int prio;
367 struct sk_buff_head *list = qdisc_priv(qdisc); 354 struct sk_buff_head *list = qdisc_priv(qdisc);
368 355
369 for (prio=0; prio < 3; prio++) 356 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
370 skb_queue_purge(list+prio); 357 __qdisc_reset_queue(qdisc, list + prio);
358
359 qdisc->qstats.backlog = 0;
371 qdisc->q.qlen = 0; 360 qdisc->q.qlen = 0;
372} 361}
373 362
374static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) 363static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
375{ 364{
376 unsigned char *b = skb->tail; 365 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
377 struct tc_prio_qopt opt;
378 366
379 opt.bands = 3;
380 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 367 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
381 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 368 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
382 return skb->len; 369 return skb->len;
383 370
384rtattr_failure: 371rtattr_failure:
385 skb_trim(skb, b - skb->data);
386 return -1; 372 return -1;
387} 373}
388 374
389static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) 375static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
390{ 376{
391 int i; 377 int prio;
392 struct sk_buff_head *list = qdisc_priv(qdisc); 378 struct sk_buff_head *list = qdisc_priv(qdisc);
393 379
394 for (i=0; i<3; i++) 380 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
395 skb_queue_head_init(list+i); 381 skb_queue_head_init(list + prio);
396 382
397 return 0; 383 return 0;
398} 384}
399 385
400static struct Qdisc_ops pfifo_fast_ops = { 386static struct Qdisc_ops pfifo_fast_ops = {
401 .next = NULL,
402 .cl_ops = NULL,
403 .id = "pfifo_fast", 387 .id = "pfifo_fast",
404 .priv_size = 3 * sizeof(struct sk_buff_head), 388 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
405 .enqueue = pfifo_fast_enqueue, 389 .enqueue = pfifo_fast_enqueue,
406 .dequeue = pfifo_fast_dequeue, 390 .dequeue = pfifo_fast_dequeue,
407 .requeue = pfifo_fast_requeue, 391 .requeue = pfifo_fast_requeue,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 663843d97a92..7ae6aa772dab 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -191,10 +191,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
191 asoc->last_cwr_tsn = asoc->ctsn_ack_point; 191 asoc->last_cwr_tsn = asoc->ctsn_ack_point;
192 asoc->unack_data = 0; 192 asoc->unack_data = 0;
193 193
194 SCTP_DEBUG_PRINTK("myctsnap for %s INIT as 0x%x.\n",
195 asoc->ep->debug_name,
196 asoc->ctsn_ack_point);
197
198 /* ADDIP Section 4.1 Asconf Chunk Procedures 194 /* ADDIP Section 4.1 Asconf Chunk Procedures
199 * 195 *
200 * When an endpoint has an ASCONF signaled change to be sent to the 196 * When an endpoint has an ASCONF signaled change to be sent to the
@@ -211,6 +207,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
211 207
212 /* Make an empty list of remote transport addresses. */ 208 /* Make an empty list of remote transport addresses. */
213 INIT_LIST_HEAD(&asoc->peer.transport_addr_list); 209 INIT_LIST_HEAD(&asoc->peer.transport_addr_list);
210 asoc->peer.transport_count = 0;
214 211
215 /* RFC 2960 5.1 Normal Establishment of an Association 212 /* RFC 2960 5.1 Normal Establishment of an Association
216 * 213 *
@@ -288,6 +285,7 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
288 285
289 asoc->base.malloced = 1; 286 asoc->base.malloced = 1;
290 SCTP_DBG_OBJCNT_INC(assoc); 287 SCTP_DBG_OBJCNT_INC(assoc);
288 SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc);
291 289
292 return asoc; 290 return asoc;
293 291
@@ -356,6 +354,8 @@ void sctp_association_free(struct sctp_association *asoc)
356 sctp_transport_free(transport); 354 sctp_transport_free(transport);
357 } 355 }
358 356
357 asoc->peer.transport_count = 0;
358
359 /* Free any cached ASCONF_ACK chunk. */ 359 /* Free any cached ASCONF_ACK chunk. */
360 if (asoc->addip_last_asconf_ack) 360 if (asoc->addip_last_asconf_ack)
361 sctp_chunk_free(asoc->addip_last_asconf_ack); 361 sctp_chunk_free(asoc->addip_last_asconf_ack);
@@ -400,7 +400,7 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
400 /* If the primary path is changing, assume that the 400 /* If the primary path is changing, assume that the
401 * user wants to use this new path. 401 * user wants to use this new path.
402 */ 402 */
403 if (transport->active) 403 if (transport->state != SCTP_INACTIVE)
404 asoc->peer.active_path = transport; 404 asoc->peer.active_path = transport;
405 405
406 /* 406 /*
@@ -428,10 +428,58 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
428 transport->cacc.next_tsn_at_change = asoc->next_tsn; 428 transport->cacc.next_tsn_at_change = asoc->next_tsn;
429} 429}
430 430
431/* Remove a transport from an association. */
432void sctp_assoc_rm_peer(struct sctp_association *asoc,
433 struct sctp_transport *peer)
434{
435 struct list_head *pos;
436 struct sctp_transport *transport;
437
438 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_rm_peer:association %p addr: ",
439 " port: %d\n",
440 asoc,
441 (&peer->ipaddr),
442 peer->ipaddr.v4.sin_port);
443
444 /* If we are to remove the current retran_path, update it
445 * to the next peer before removing this peer from the list.
446 */
447 if (asoc->peer.retran_path == peer)
448 sctp_assoc_update_retran_path(asoc);
449
450 /* Remove this peer from the list. */
451 list_del(&peer->transports);
452
453 /* Get the first transport of asoc. */
454 pos = asoc->peer.transport_addr_list.next;
455 transport = list_entry(pos, struct sctp_transport, transports);
456
457 /* Update any entries that match the peer to be deleted. */
458 if (asoc->peer.primary_path == peer)
459 sctp_assoc_set_primary(asoc, transport);
460 if (asoc->peer.active_path == peer)
461 asoc->peer.active_path = transport;
462 if (asoc->peer.last_data_from == peer)
463 asoc->peer.last_data_from = transport;
464
465 /* If we remove the transport an INIT was last sent to, set it to
466 * NULL. Combined with the update of the retran path above, this
467 * will cause the next INIT to be sent to the next available
468 * transport, maintaining the cycle.
469 */
470 if (asoc->init_last_sent_to == peer)
471 asoc->init_last_sent_to = NULL;
472
473 asoc->peer.transport_count--;
474
475 sctp_transport_free(peer);
476}
477
431/* Add a transport address to an association. */ 478/* Add a transport address to an association. */
432struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, 479struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
433 const union sctp_addr *addr, 480 const union sctp_addr *addr,
434 int gfp) 481 const int gfp,
482 const int peer_state)
435{ 483{
436 struct sctp_transport *peer; 484 struct sctp_transport *peer;
437 struct sctp_sock *sp; 485 struct sctp_sock *sp;
@@ -442,14 +490,25 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
442 /* AF_INET and AF_INET6 share common port field. */ 490 /* AF_INET and AF_INET6 share common port field. */
443 port = addr->v4.sin_port; 491 port = addr->v4.sin_port;
444 492
493 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ",
494 " port: %d state:%s\n",
495 asoc,
496 addr,
497 addr->v4.sin_port,
498 peer_state == SCTP_UNKNOWN?"UNKNOWN":"ACTIVE");
499
445 /* Set the port if it has not been set yet. */ 500 /* Set the port if it has not been set yet. */
446 if (0 == asoc->peer.port) 501 if (0 == asoc->peer.port)
447 asoc->peer.port = port; 502 asoc->peer.port = port;
448 503
449 /* Check to see if this is a duplicate. */ 504 /* Check to see if this is a duplicate. */
450 peer = sctp_assoc_lookup_paddr(asoc, addr); 505 peer = sctp_assoc_lookup_paddr(asoc, addr);
451 if (peer) 506 if (peer) {
507 if (peer_state == SCTP_ACTIVE &&
508 peer->state == SCTP_UNKNOWN)
509 peer->state = SCTP_ACTIVE;
452 return peer; 510 return peer;
511 }
453 512
454 peer = sctp_transport_new(addr, gfp); 513 peer = sctp_transport_new(addr, gfp);
455 if (!peer) 514 if (!peer)
@@ -516,8 +575,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
516 /* Set the transport's RTO.initial value */ 575 /* Set the transport's RTO.initial value */
517 peer->rto = asoc->rto_initial; 576 peer->rto = asoc->rto_initial;
518 577
578 /* Set the peer's active state. */
579 peer->state = peer_state;
580
519 /* Attach the remote transport to our asoc. */ 581 /* Attach the remote transport to our asoc. */
520 list_add_tail(&peer->transports, &asoc->peer.transport_addr_list); 582 list_add_tail(&peer->transports, &asoc->peer.transport_addr_list);
583 asoc->peer.transport_count++;
521 584
522 /* If we do not yet have a primary path, set one. */ 585 /* If we do not yet have a primary path, set one. */
523 if (!asoc->peer.primary_path) { 586 if (!asoc->peer.primary_path) {
@@ -525,8 +588,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
525 asoc->peer.retran_path = peer; 588 asoc->peer.retran_path = peer;
526 } 589 }
527 590
528 if (asoc->peer.active_path == asoc->peer.retran_path) 591 if (asoc->peer.active_path == asoc->peer.retran_path) {
529 asoc->peer.retran_path = peer; 592 asoc->peer.retran_path = peer;
593 }
530 594
531 return peer; 595 return peer;
532} 596}
@@ -537,37 +601,16 @@ void sctp_assoc_del_peer(struct sctp_association *asoc,
537{ 601{
538 struct list_head *pos; 602 struct list_head *pos;
539 struct list_head *temp; 603 struct list_head *temp;
540 struct sctp_transport *peer = NULL;
541 struct sctp_transport *transport; 604 struct sctp_transport *transport;
542 605
543 list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { 606 list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
544 transport = list_entry(pos, struct sctp_transport, transports); 607 transport = list_entry(pos, struct sctp_transport, transports);
545 if (sctp_cmp_addr_exact(addr, &transport->ipaddr)) { 608 if (sctp_cmp_addr_exact(addr, &transport->ipaddr)) {
546 peer = transport; 609 /* Do book keeping for removing the peer and free it. */
547 list_del(pos); 610 sctp_assoc_rm_peer(asoc, transport);
548 break; 611 break;
549 } 612 }
550 } 613 }
551
552 /* The address we want delete is not in the association. */
553 if (!peer)
554 return;
555
556 /* Get the first transport of asoc. */
557 pos = asoc->peer.transport_addr_list.next;
558 transport = list_entry(pos, struct sctp_transport, transports);
559
560 /* Update any entries that match the peer to be deleted. */
561 if (asoc->peer.primary_path == peer)
562 sctp_assoc_set_primary(asoc, transport);
563 if (asoc->peer.active_path == peer)
564 asoc->peer.active_path = transport;
565 if (asoc->peer.retran_path == peer)
566 asoc->peer.retran_path = transport;
567 if (asoc->peer.last_data_from == peer)
568 asoc->peer.last_data_from = transport;
569
570 sctp_transport_free(peer);
571} 614}
572 615
573/* Lookup a transport by address. */ 616/* Lookup a transport by address. */
@@ -608,12 +651,12 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
608 /* Record the transition on the transport. */ 651 /* Record the transition on the transport. */
609 switch (command) { 652 switch (command) {
610 case SCTP_TRANSPORT_UP: 653 case SCTP_TRANSPORT_UP:
611 transport->active = SCTP_ACTIVE; 654 transport->state = SCTP_ACTIVE;
612 spc_state = SCTP_ADDR_AVAILABLE; 655 spc_state = SCTP_ADDR_AVAILABLE;
613 break; 656 break;
614 657
615 case SCTP_TRANSPORT_DOWN: 658 case SCTP_TRANSPORT_DOWN:
616 transport->active = SCTP_INACTIVE; 659 transport->state = SCTP_INACTIVE;
617 spc_state = SCTP_ADDR_UNREACHABLE; 660 spc_state = SCTP_ADDR_UNREACHABLE;
618 break; 661 break;
619 662
@@ -643,7 +686,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
643 list_for_each(pos, &asoc->peer.transport_addr_list) { 686 list_for_each(pos, &asoc->peer.transport_addr_list) {
644 t = list_entry(pos, struct sctp_transport, transports); 687 t = list_entry(pos, struct sctp_transport, transports);
645 688
646 if (!t->active) 689 if (t->state == SCTP_INACTIVE)
647 continue; 690 continue;
648 if (!first || t->last_time_heard > first->last_time_heard) { 691 if (!first || t->last_time_heard > first->last_time_heard) {
649 second = first; 692 second = first;
@@ -663,7 +706,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
663 * [If the primary is active but not most recent, bump the most 706 * [If the primary is active but not most recent, bump the most
664 * recently used transport.] 707 * recently used transport.]
665 */ 708 */
666 if (asoc->peer.primary_path->active && 709 if (asoc->peer.primary_path->state != SCTP_INACTIVE &&
667 first != asoc->peer.primary_path) { 710 first != asoc->peer.primary_path) {
668 second = first; 711 second = first;
669 first = asoc->peer.primary_path; 712 first = asoc->peer.primary_path;
@@ -958,7 +1001,7 @@ void sctp_assoc_update(struct sctp_association *asoc,
958 transports); 1001 transports);
959 if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr)) 1002 if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr))
960 sctp_assoc_add_peer(asoc, &trans->ipaddr, 1003 sctp_assoc_add_peer(asoc, &trans->ipaddr,
961 GFP_ATOMIC); 1004 GFP_ATOMIC, SCTP_ACTIVE);
962 } 1005 }
963 1006
964 asoc->ctsn_ack_point = asoc->next_tsn - 1; 1007 asoc->ctsn_ack_point = asoc->next_tsn - 1;
@@ -998,7 +1041,7 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
998 1041
999 /* Try to find an active transport. */ 1042 /* Try to find an active transport. */
1000 1043
1001 if (t->active) { 1044 if (t->state != SCTP_INACTIVE) {
1002 break; 1045 break;
1003 } else { 1046 } else {
1004 /* Keep track of the next transport in case 1047 /* Keep track of the next transport in case
@@ -1019,6 +1062,40 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1019 } 1062 }
1020 1063
1021 asoc->peer.retran_path = t; 1064 asoc->peer.retran_path = t;
1065
1066 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
1067 " %p addr: ",
1068 " port: %d\n",
1069 asoc,
1070 (&t->ipaddr),
1071 t->ipaddr.v4.sin_port);
1072}
1073
1074/* Choose the transport for sending a INIT packet. */
1075struct sctp_transport *sctp_assoc_choose_init_transport(
1076 struct sctp_association *asoc)
1077{
1078 struct sctp_transport *t;
1079
1080 /* Use the retran path. If the last INIT was sent over the
1081 * retran path, update the retran path and use it.
1082 */
1083 if (!asoc->init_last_sent_to) {
1084 t = asoc->peer.active_path;
1085 } else {
1086 if (asoc->init_last_sent_to == asoc->peer.retran_path)
1087 sctp_assoc_update_retran_path(asoc);
1088 t = asoc->peer.retran_path;
1089 }
1090
1091 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
1092 " %p addr: ",
1093 " port: %d\n",
1094 asoc,
1095 (&t->ipaddr),
1096 t->ipaddr.v4.sin_port);
1097
1098 return t;
1022} 1099}
1023 1100
1024/* Choose the transport for sending a SHUTDOWN packet. */ 1101/* Choose the transport for sending a SHUTDOWN packet. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 334f61773e6d..2ec0320fac3b 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -134,7 +134,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
134 ep->last_key = ep->current_key = 0; 134 ep->last_key = ep->current_key = 0;
135 ep->key_changed_at = jiffies; 135 ep->key_changed_at = jiffies;
136 136
137 ep->debug_name = "unnamedEndpoint";
138 return ep; 137 return ep;
139} 138}
140 139
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b719a77d66b4..339f7acfdb64 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -178,6 +178,37 @@ int sctp_rcv(struct sk_buff *skb)
178 178
179 asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport); 179 asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport);
180 180
181 if (!asoc)
182 ep = __sctp_rcv_lookup_endpoint(&dest);
183
184 /* Retrieve the common input handling substructure. */
185 rcvr = asoc ? &asoc->base : &ep->base;
186 sk = rcvr->sk;
187
188 /*
189 * If a frame arrives on an interface and the receiving socket is
190 * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
191 */
192 if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb)))
193 {
194 sock_put(sk);
195 if (asoc) {
196 sctp_association_put(asoc);
197 asoc = NULL;
198 } else {
199 sctp_endpoint_put(ep);
200 ep = NULL;
201 }
202 sk = sctp_get_ctl_sock();
203 ep = sctp_sk(sk)->ep;
204 sctp_endpoint_hold(ep);
205 sock_hold(sk);
206 rcvr = &ep->base;
207 }
208
209 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
210 goto discard_release;
211
181 /* 212 /*
182 * RFC 2960, 8.4 - Handle "Out of the blue" Packets. 213 * RFC 2960, 8.4 - Handle "Out of the blue" Packets.
183 * An SCTP packet is called an "out of the blue" (OOTB) 214 * An SCTP packet is called an "out of the blue" (OOTB)
@@ -187,22 +218,12 @@ int sctp_rcv(struct sk_buff *skb)
187 * packet belongs. 218 * packet belongs.
188 */ 219 */
189 if (!asoc) { 220 if (!asoc) {
190 ep = __sctp_rcv_lookup_endpoint(&dest);
191 if (sctp_rcv_ootb(skb)) { 221 if (sctp_rcv_ootb(skb)) {
192 SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES); 222 SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES);
193 goto discard_release; 223 goto discard_release;
194 } 224 }
195 } 225 }
196 226
197 /* Retrieve the common input handling substructure. */
198 rcvr = asoc ? &asoc->base : &ep->base;
199 sk = rcvr->sk;
200
201 if ((sk) && (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)) {
202 goto discard_release;
203 }
204
205
206 /* SCTP seems to always need a timestamp right now (FIXME) */ 227 /* SCTP seems to always need a timestamp right now (FIXME) */
207 if (skb->stamp.tv_sec == 0) { 228 if (skb->stamp.tv_sec == 0) {
208 do_gettimeofday(&skb->stamp); 229 do_gettimeofday(&skb->stamp);
@@ -265,13 +286,11 @@ discard_it:
265 286
266discard_release: 287discard_release:
267 /* Release any structures we may be holding. */ 288 /* Release any structures we may be holding. */
268 if (asoc) { 289 sock_put(sk);
269 sock_put(asoc->base.sk); 290 if (asoc)
270 sctp_association_put(asoc); 291 sctp_association_put(asoc);
271 } else { 292 else
272 sock_put(ep->base.sk);
273 sctp_endpoint_put(ep); 293 sctp_endpoint_put(ep);
274 }
275 294
276 goto discard_it; 295 goto discard_it;
277} 296}
@@ -334,7 +353,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
334 353
335 sctp_do_sm(SCTP_EVENT_T_OTHER, 354 sctp_do_sm(SCTP_EVENT_T_OTHER,
336 SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), 355 SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
337 asoc->state, asoc->ep, asoc, NULL, 356 asoc->state, asoc->ep, asoc, t,
338 GFP_ATOMIC); 357 GFP_ATOMIC);
339 358
340} 359}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index c9d9ea064734..c7e42d125b9c 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -812,26 +812,23 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
812 if (addr->sa.sa_family != AF_INET6) 812 if (addr->sa.sa_family != AF_INET6)
813 af = sctp_get_af_specific(addr->sa.sa_family); 813 af = sctp_get_af_specific(addr->sa.sa_family);
814 else { 814 else {
815 struct sock *sk;
816 int type = ipv6_addr_type(&addr->v6.sin6_addr); 815 int type = ipv6_addr_type(&addr->v6.sin6_addr);
817 sk = sctp_opt2sk(opt); 816 struct net_device *dev;
817
818 if (type & IPV6_ADDR_LINKLOCAL) { 818 if (type & IPV6_ADDR_LINKLOCAL) {
819 /* Note: Behavior similar to af_inet6.c: 819 if (!addr->v6.sin6_scope_id)
820 * 1) Overrides previous bound_dev_if 820 return 0;
821 * 2) Destructive even if bind isn't successful. 821 dev = dev_get_by_index(addr->v6.sin6_scope_id);
822 */ 822 if (!dev)
823
824 if (addr->v6.sin6_scope_id)
825 sk->sk_bound_dev_if = addr->v6.sin6_scope_id;
826 if (!sk->sk_bound_dev_if)
827 return 0; 823 return 0;
824 dev_put(dev);
828 } 825 }
829 af = opt->pf->af; 826 af = opt->pf->af;
830 } 827 }
831 return af->available(addr, opt); 828 return af->available(addr, opt);
832} 829}
833 830
834/* Verify that the provided sockaddr looks bindable. Common verification, 831/* Verify that the provided sockaddr looks sendable. Common verification,
835 * has already been taken care of. 832 * has already been taken care of.
836 */ 833 */
837static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) 834static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
@@ -842,19 +839,16 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
842 if (addr->sa.sa_family != AF_INET6) 839 if (addr->sa.sa_family != AF_INET6)
843 af = sctp_get_af_specific(addr->sa.sa_family); 840 af = sctp_get_af_specific(addr->sa.sa_family);
844 else { 841 else {
845 struct sock *sk;
846 int type = ipv6_addr_type(&addr->v6.sin6_addr); 842 int type = ipv6_addr_type(&addr->v6.sin6_addr);
847 sk = sctp_opt2sk(opt); 843 struct net_device *dev;
844
848 if (type & IPV6_ADDR_LINKLOCAL) { 845 if (type & IPV6_ADDR_LINKLOCAL) {
849 /* Note: Behavior similar to af_inet6.c: 846 if (!addr->v6.sin6_scope_id)
850 * 1) Overrides previous bound_dev_if 847 return 0;
851 * 2) Destructive even if bind isn't successful. 848 dev = dev_get_by_index(addr->v6.sin6_scope_id);
852 */ 849 if (!dev)
853
854 if (addr->v6.sin6_scope_id)
855 sk->sk_bound_dev_if = addr->v6.sin6_scope_id;
856 if (!sk->sk_bound_dev_if)
857 return 0; 850 return 0;
851 dev_put(dev);
858 } 852 }
859 af = opt->pf->af; 853 af = opt->pf->af;
860 } 854 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 1b2d4adc4ddb..4eb81a1407b7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -682,9 +682,9 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
682 682
683 if (!new_transport) { 683 if (!new_transport) {
684 new_transport = asoc->peer.active_path; 684 new_transport = asoc->peer.active_path;
685 } else if (!new_transport->active) { 685 } else if (new_transport->state == SCTP_INACTIVE) {
686 /* If the chunk is Heartbeat or Heartbeat Ack, 686 /* If the chunk is Heartbeat or Heartbeat Ack,
687 * send it to chunk->transport, even if it's 687 * send it to chunk->transport, even if it's
688 * inactive. 688 * inactive.
689 * 689 *
690 * 3.3.6 Heartbeat Acknowledgement: 690 * 3.3.6 Heartbeat Acknowledgement:
@@ -840,7 +840,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
840 * Otherwise, we want to use the active path. 840 * Otherwise, we want to use the active path.
841 */ 841 */
842 new_transport = chunk->transport; 842 new_transport = chunk->transport;
843 if (!new_transport || !new_transport->active) 843 if (!new_transport ||
844 new_transport->state == SCTP_INACTIVE)
844 new_transport = asoc->peer.active_path; 845 new_transport = asoc->peer.active_path;
845 846
846 /* Change packets if necessary. */ 847 /* Change packets if necessary. */
@@ -1454,7 +1455,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1454 /* Mark the destination transport address as 1455 /* Mark the destination transport address as
1455 * active if it is not so marked. 1456 * active if it is not so marked.
1456 */ 1457 */
1457 if (!transport->active) { 1458 if (transport->state == SCTP_INACTIVE) {
1458 sctp_assoc_control_transport( 1459 sctp_assoc_control_transport(
1459 transport->asoc, 1460 transport->asoc,
1460 transport, 1461 transport,
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index e42fd8c2916b..98d49ec9b74b 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -132,14 +132,25 @@ void sctp_snmp_proc_exit(void)
132static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb) 132static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
133{ 133{
134 struct list_head *pos; 134 struct list_head *pos;
135 struct sctp_association *asoc;
135 struct sctp_sockaddr_entry *laddr; 136 struct sctp_sockaddr_entry *laddr;
136 union sctp_addr *addr; 137 struct sctp_transport *peer;
138 union sctp_addr *addr, *primary = NULL;
137 struct sctp_af *af; 139 struct sctp_af *af;
138 140
141 if (epb->type == SCTP_EP_TYPE_ASSOCIATION) {
142 asoc = sctp_assoc(epb);
143 peer = asoc->peer.primary_path;
144 primary = &peer->saddr;
145 }
146
139 list_for_each(pos, &epb->bind_addr.address_list) { 147 list_for_each(pos, &epb->bind_addr.address_list) {
140 laddr = list_entry(pos, struct sctp_sockaddr_entry, list); 148 laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
141 addr = (union sctp_addr *)&laddr->a; 149 addr = (union sctp_addr *)&laddr->a;
142 af = sctp_get_af_specific(addr->sa.sa_family); 150 af = sctp_get_af_specific(addr->sa.sa_family);
151 if (primary && af->cmp_addr(addr, primary)) {
152 seq_printf(seq, "*");
153 }
143 af->seq_dump_addr(seq, addr); 154 af->seq_dump_addr(seq, addr);
144 } 155 }
145} 156}
@@ -149,17 +160,54 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
149{ 160{
150 struct list_head *pos; 161 struct list_head *pos;
151 struct sctp_transport *transport; 162 struct sctp_transport *transport;
152 union sctp_addr *addr; 163 union sctp_addr *addr, *primary;
153 struct sctp_af *af; 164 struct sctp_af *af;
154 165
166 primary = &(assoc->peer.primary_addr);
155 list_for_each(pos, &assoc->peer.transport_addr_list) { 167 list_for_each(pos, &assoc->peer.transport_addr_list) {
156 transport = list_entry(pos, struct sctp_transport, transports); 168 transport = list_entry(pos, struct sctp_transport, transports);
157 addr = (union sctp_addr *)&transport->ipaddr; 169 addr = (union sctp_addr *)&transport->ipaddr;
158 af = sctp_get_af_specific(addr->sa.sa_family); 170 af = sctp_get_af_specific(addr->sa.sa_family);
171 if (af->cmp_addr(addr, primary)) {
172 seq_printf(seq, "*");
173 }
159 af->seq_dump_addr(seq, addr); 174 af->seq_dump_addr(seq, addr);
160 } 175 }
161} 176}
162 177
178static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
179{
180 if (*pos > sctp_ep_hashsize)
181 return NULL;
182
183 if (*pos < 0)
184 *pos = 0;
185
186 if (*pos == 0)
187 seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT UID INODE LADDRS\n");
188
189 ++*pos;
190
191 return (void *)pos;
192}
193
194static void sctp_eps_seq_stop(struct seq_file *seq, void *v)
195{
196 return;
197}
198
199
200static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos)
201{
202 if (*pos > sctp_ep_hashsize)
203 return NULL;
204
205 ++*pos;
206
207 return pos;
208}
209
210
163/* Display sctp endpoints (/proc/net/sctp/eps). */ 211/* Display sctp endpoints (/proc/net/sctp/eps). */
164static int sctp_eps_seq_show(struct seq_file *seq, void *v) 212static int sctp_eps_seq_show(struct seq_file *seq, void *v)
165{ 213{
@@ -167,38 +215,50 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
167 struct sctp_ep_common *epb; 215 struct sctp_ep_common *epb;
168 struct sctp_endpoint *ep; 216 struct sctp_endpoint *ep;
169 struct sock *sk; 217 struct sock *sk;
170 int hash; 218 int hash = *(int *)v;
171 219
172 seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT LADDRS\n"); 220 if (hash > sctp_ep_hashsize)
173 for (hash = 0; hash < sctp_ep_hashsize; hash++) { 221 return -ENOMEM;
174 head = &sctp_ep_hashtable[hash]; 222
175 read_lock(&head->lock); 223 head = &sctp_ep_hashtable[hash-1];
176 for (epb = head->chain; epb; epb = epb->next) { 224 sctp_local_bh_disable();
177 ep = sctp_ep(epb); 225 read_lock(&head->lock);
178 sk = epb->sk; 226 for (epb = head->chain; epb; epb = epb->next) {
179 seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d ", ep, sk, 227 ep = sctp_ep(epb);
180 sctp_sk(sk)->type, sk->sk_state, hash, 228 sk = epb->sk;
181 epb->bind_addr.port); 229 seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk,
182 sctp_seq_dump_local_addrs(seq, epb); 230 sctp_sk(sk)->type, sk->sk_state, hash-1,
183 seq_printf(seq, "\n"); 231 epb->bind_addr.port,
184 } 232 sock_i_uid(sk), sock_i_ino(sk));
185 read_unlock(&head->lock); 233
234 sctp_seq_dump_local_addrs(seq, epb);
235 seq_printf(seq, "\n");
186 } 236 }
237 read_unlock(&head->lock);
238 sctp_local_bh_enable();
187 239
188 return 0; 240 return 0;
189} 241}
190 242
243static struct seq_operations sctp_eps_ops = {
244 .start = sctp_eps_seq_start,
245 .next = sctp_eps_seq_next,
246 .stop = sctp_eps_seq_stop,
247 .show = sctp_eps_seq_show,
248};
249
250
191/* Initialize the seq file operations for 'eps' object. */ 251/* Initialize the seq file operations for 'eps' object. */
192static int sctp_eps_seq_open(struct inode *inode, struct file *file) 252static int sctp_eps_seq_open(struct inode *inode, struct file *file)
193{ 253{
194 return single_open(file, sctp_eps_seq_show, NULL); 254 return seq_open(file, &sctp_eps_ops);
195} 255}
196 256
197static struct file_operations sctp_eps_seq_fops = { 257static struct file_operations sctp_eps_seq_fops = {
198 .open = sctp_eps_seq_open, 258 .open = sctp_eps_seq_open,
199 .read = seq_read, 259 .read = seq_read,
200 .llseek = seq_lseek, 260 .llseek = seq_lseek,
201 .release = single_release, 261 .release = seq_release,
202}; 262};
203 263
204/* Set up the proc fs entry for 'eps' object. */ 264/* Set up the proc fs entry for 'eps' object. */
@@ -221,6 +281,40 @@ void sctp_eps_proc_exit(void)
221 remove_proc_entry("eps", proc_net_sctp); 281 remove_proc_entry("eps", proc_net_sctp);
222} 282}
223 283
284
285static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
286{
287 if (*pos > sctp_assoc_hashsize)
288 return NULL;
289
290 if (*pos < 0)
291 *pos = 0;
292
293 if (*pos == 0)
294 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
295 "RPORT LADDRS <-> RADDRS\n");
296
297 ++*pos;
298
299 return (void *)pos;
300}
301
302static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
303{
304 return;
305}
306
307
308static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos)
309{
310 if (*pos > sctp_assoc_hashsize)
311 return NULL;
312
313 ++*pos;
314
315 return pos;
316}
317
224/* Display sctp associations (/proc/net/sctp/assocs). */ 318/* Display sctp associations (/proc/net/sctp/assocs). */
225static int sctp_assocs_seq_show(struct seq_file *seq, void *v) 319static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
226{ 320{
@@ -228,43 +322,57 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
228 struct sctp_ep_common *epb; 322 struct sctp_ep_common *epb;
229 struct sctp_association *assoc; 323 struct sctp_association *assoc;
230 struct sock *sk; 324 struct sock *sk;
231 int hash; 325 int hash = *(int *)v;
232 326
233 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT LPORT RPORT " 327 if (hash > sctp_assoc_hashsize)
234 "LADDRS <-> RADDRS\n"); 328 return -ENOMEM;
235 for (hash = 0; hash < sctp_assoc_hashsize; hash++) { 329
236 head = &sctp_assoc_hashtable[hash]; 330 head = &sctp_assoc_hashtable[hash-1];
237 read_lock(&head->lock); 331 sctp_local_bh_disable();
238 for (epb = head->chain; epb; epb = epb->next) { 332 read_lock(&head->lock);
239 assoc = sctp_assoc(epb); 333 for (epb = head->chain; epb; epb = epb->next) {
240 sk = epb->sk; 334 assoc = sctp_assoc(epb);
241 seq_printf(seq, 335 sk = epb->sk;
242 "%8p %8p %-3d %-3d %-2d %-4d %-5d %-5d ", 336 seq_printf(seq,
243 assoc, sk, sctp_sk(sk)->type, sk->sk_state, 337 "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ",
244 assoc->state, hash, epb->bind_addr.port, 338 assoc, sk, sctp_sk(sk)->type, sk->sk_state,
245 assoc->peer.port); 339 assoc->state, hash-1, assoc->assoc_id,
246 sctp_seq_dump_local_addrs(seq, epb); 340 (sk->sk_rcvbuf - assoc->rwnd),
247 seq_printf(seq, "<-> "); 341 assoc->sndbuf_used,
248 sctp_seq_dump_remote_addrs(seq, assoc); 342 sock_i_uid(sk), sock_i_ino(sk),
249 seq_printf(seq, "\n"); 343 epb->bind_addr.port,
250 } 344 assoc->peer.port);
251 read_unlock(&head->lock); 345
346 seq_printf(seq, " ");
347 sctp_seq_dump_local_addrs(seq, epb);
348 seq_printf(seq, "<-> ");
349 sctp_seq_dump_remote_addrs(seq, assoc);
350 seq_printf(seq, "\n");
252 } 351 }
352 read_unlock(&head->lock);
353 sctp_local_bh_enable();
253 354
254 return 0; 355 return 0;
255} 356}
256 357
358static struct seq_operations sctp_assoc_ops = {
359 .start = sctp_assocs_seq_start,
360 .next = sctp_assocs_seq_next,
361 .stop = sctp_assocs_seq_stop,
362 .show = sctp_assocs_seq_show,
363};
364
257/* Initialize the seq file operations for 'assocs' object. */ 365/* Initialize the seq file operations for 'assocs' object. */
258static int sctp_assocs_seq_open(struct inode *inode, struct file *file) 366static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
259{ 367{
260 return single_open(file, sctp_assocs_seq_show, NULL); 368 return seq_open(file, &sctp_assoc_ops);
261} 369}
262 370
263static struct file_operations sctp_assocs_seq_fops = { 371static struct file_operations sctp_assocs_seq_fops = {
264 .open = sctp_assocs_seq_open, 372 .open = sctp_assocs_seq_open,
265 .read = seq_read, 373 .read = seq_read,
266 .llseek = seq_lseek, 374 .llseek = seq_lseek,
267 .release = single_release, 375 .release = seq_release,
268}; 376};
269 377
270/* Set up the proc fs entry for 'assocs' object. */ 378/* Set up the proc fs entry for 'assocs' object. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 2e1f9c3556f5..5135e1a25d25 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -378,10 +378,13 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
378{ 378{
379 int ret = inet_addr_type(addr->v4.sin_addr.s_addr); 379 int ret = inet_addr_type(addr->v4.sin_addr.s_addr);
380 380
381 /* FIXME: ip_nonlocal_bind sysctl support. */
382 381
383 if (addr->v4.sin_addr.s_addr != INADDR_ANY && ret != RTN_LOCAL) 382 if (addr->v4.sin_addr.s_addr != INADDR_ANY &&
383 ret != RTN_LOCAL &&
384 !sp->inet.freebind &&
385 !sysctl_ip_nonlocal_bind)
384 return 0; 386 return 0;
387
385 return 1; 388 return 1;
386} 389}
387 390
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 33ac8bf47b0e..5baed9bb7de5 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1830,7 +1830,7 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
1830 * be a a better choice than any of the embedded addresses. 1830 * be a a better choice than any of the embedded addresses.
1831 */ 1831 */
1832 if (peer_addr) 1832 if (peer_addr)
1833 if(!sctp_assoc_add_peer(asoc, peer_addr, gfp)) 1833 if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE))
1834 goto nomem; 1834 goto nomem;
1835 1835
1836 /* Process the initialization parameters. */ 1836 /* Process the initialization parameters. */
@@ -1841,6 +1841,14 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
1841 goto clean_up; 1841 goto clean_up;
1842 } 1842 }
1843 1843
1844 /* Walk list of transports, removing transports in the UNKNOWN state. */
1845 list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
1846 transport = list_entry(pos, struct sctp_transport, transports);
1847 if (transport->state == SCTP_UNKNOWN) {
1848 sctp_assoc_rm_peer(asoc, transport);
1849 }
1850 }
1851
1844 /* The fixed INIT headers are always in network byte 1852 /* The fixed INIT headers are always in network byte
1845 * order. 1853 * order.
1846 */ 1854 */
@@ -1906,7 +1914,8 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
1906 * stream sequence number shall be set to 0. 1914 * stream sequence number shall be set to 0.
1907 */ 1915 */
1908 1916
1909 /* Allocate storage for the negotiated streams if it is not a temporary * association. 1917 /* Allocate storage for the negotiated streams if it is not a temporary
1918 * association.
1910 */ 1919 */
1911 if (!asoc->temp) { 1920 if (!asoc->temp) {
1912 int assoc_id; 1921 int assoc_id;
@@ -1952,6 +1961,9 @@ clean_up:
1952 list_del_init(pos); 1961 list_del_init(pos);
1953 sctp_transport_free(transport); 1962 sctp_transport_free(transport);
1954 } 1963 }
1964
1965 asoc->peer.transport_count = 0;
1966
1955nomem: 1967nomem:
1956 return 0; 1968 return 0;
1957} 1969}
@@ -1995,7 +2007,7 @@ static int sctp_process_param(struct sctp_association *asoc,
1995 af->from_addr_param(&addr, param.addr, asoc->peer.port, 0); 2007 af->from_addr_param(&addr, param.addr, asoc->peer.port, 0);
1996 scope = sctp_scope(peer_addr); 2008 scope = sctp_scope(peer_addr);
1997 if (sctp_in_scope(&addr, scope)) 2009 if (sctp_in_scope(&addr, scope))
1998 if (!sctp_assoc_add_peer(asoc, &addr, gfp)) 2010 if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_ACTIVE))
1999 return 0; 2011 return 0;
2000 break; 2012 break;
2001 2013
@@ -2396,7 +2408,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc,
2396 * Due to Resource Shortage'. 2408 * Due to Resource Shortage'.
2397 */ 2409 */
2398 2410
2399 peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC); 2411 peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_ACTIVE);
2400 if (!peer) 2412 if (!peer)
2401 return SCTP_ERROR_RSRC_LOW; 2413 return SCTP_ERROR_RSRC_LOW;
2402 2414
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index f65fa441952f..778639db125a 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -414,11 +414,13 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
414 */ 414 */
415 asoc->overall_error_count++; 415 asoc->overall_error_count++;
416 416
417 if (transport->active && 417 if (transport->state != SCTP_INACTIVE &&
418 (transport->error_count++ >= transport->max_retrans)) { 418 (transport->error_count++ >= transport->max_retrans)) {
419 SCTP_DEBUG_PRINTK("transport_strike: transport " 419 SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p",
420 "IP:%d.%d.%d.%d failed.\n", 420 " transport IP: port:%d failed.\n",
421 NIPQUAD(transport->ipaddr.v4.sin_addr)); 421 asoc,
422 (&transport->ipaddr),
423 transport->ipaddr.v4.sin_port);
422 sctp_assoc_control_transport(asoc, transport, 424 sctp_assoc_control_transport(asoc, transport,
423 SCTP_TRANSPORT_DOWN, 425 SCTP_TRANSPORT_DOWN,
424 SCTP_FAILED_THRESHOLD); 426 SCTP_FAILED_THRESHOLD);
@@ -593,7 +595,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
593 /* Mark the destination transport address as active if it is not so 595 /* Mark the destination transport address as active if it is not so
594 * marked. 596 * marked.
595 */ 597 */
596 if (!t->active) 598 if (t->state == SCTP_INACTIVE)
597 sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, 599 sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP,
598 SCTP_HEARTBEAT_SUCCESS); 600 SCTP_HEARTBEAT_SUCCESS);
599 601
@@ -665,8 +667,11 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
665 667
666 asoc->state = state; 668 asoc->state = state;
667 669
670 SCTP_DEBUG_PRINTK("sctp_cmd_new_state: asoc %p[%s]\n",
671 asoc, sctp_state_tbl[state]);
672
668 if (sctp_style(sk, TCP)) { 673 if (sctp_style(sk, TCP)) {
669 /* Change the sk->sk_state of a TCP-style socket that has 674 /* Change the sk->sk_state of a TCP-style socket that has
670 * sucessfully completed a connect() call. 675 * sucessfully completed a connect() call.
671 */ 676 */
672 if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) 677 if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED))
@@ -678,6 +683,16 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
678 sk->sk_shutdown |= RCV_SHUTDOWN; 683 sk->sk_shutdown |= RCV_SHUTDOWN;
679 } 684 }
680 685
686 if (sctp_state(asoc, COOKIE_WAIT)) {
687 /* Reset init timeouts since they may have been
688 * increased due to timer expirations.
689 */
690 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] =
691 asoc->ep->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT];
692 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] =
693 asoc->ep->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE];
694 }
695
681 if (sctp_state(asoc, ESTABLISHED) || 696 if (sctp_state(asoc, ESTABLISHED) ||
682 sctp_state(asoc, CLOSED) || 697 sctp_state(asoc, CLOSED) ||
683 sctp_state(asoc, SHUTDOWN_RECEIVED)) { 698 sctp_state(asoc, SHUTDOWN_RECEIVED)) {
@@ -1120,10 +1135,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1120 * to be executed only during failed attempts of 1135 * to be executed only during failed attempts of
1121 * association establishment. 1136 * association establishment.
1122 */ 1137 */
1123 if ((asoc->peer.retran_path != 1138 if ((asoc->peer.retran_path !=
1124 asoc->peer.primary_path) && 1139 asoc->peer.primary_path) &&
1125 (asoc->counters[SCTP_COUNTER_INIT_ERROR] > 0)) { 1140 (asoc->init_err_counter > 0)) {
1126 sctp_add_cmd_sf(commands, 1141 sctp_add_cmd_sf(commands,
1127 SCTP_CMD_FORCE_PRIM_RETRAN, 1142 SCTP_CMD_FORCE_PRIM_RETRAN,
1128 SCTP_NULL()); 1143 SCTP_NULL());
1129 } 1144 }
@@ -1237,18 +1252,67 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1237 sctp_association_put(asoc); 1252 sctp_association_put(asoc);
1238 break; 1253 break;
1239 1254
1255 case SCTP_CMD_INIT_CHOOSE_TRANSPORT:
1256 chunk = cmd->obj.ptr;
1257 t = sctp_assoc_choose_init_transport(asoc);
1258 asoc->init_last_sent_to = t;
1259 chunk->transport = t;
1260 t->init_sent_count++;
1261 break;
1262
1240 case SCTP_CMD_INIT_RESTART: 1263 case SCTP_CMD_INIT_RESTART:
1241 /* Do the needed accounting and updates 1264 /* Do the needed accounting and updates
1242 * associated with restarting an initialization 1265 * associated with restarting an initialization
1243 * timer. 1266 * timer. Only multiply the timeout by two if
1267 * all transports have been tried at the current
1268 * timeout.
1269 */
1270 t = asoc->init_last_sent_to;
1271 asoc->init_err_counter++;
1272
1273 if (t->init_sent_count > (asoc->init_cycle + 1)) {
1274 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] *= 2;
1275 if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] >
1276 asoc->max_init_timeo) {
1277 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] =
1278 asoc->max_init_timeo;
1279 }
1280 asoc->init_cycle++;
1281 SCTP_DEBUG_PRINTK(
1282 "T1 INIT Timeout adjustment"
1283 " init_err_counter: %d"
1284 " cycle: %d"
1285 " timeout: %d\n",
1286 asoc->init_err_counter,
1287 asoc->init_cycle,
1288 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT]);
1289 }
1290
1291 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
1292 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
1293 break;
1294
1295 case SCTP_CMD_COOKIEECHO_RESTART:
1296 /* Do the needed accounting and updates
1297 * associated with restarting an initialization
1298 * timer. Only multiply the timeout by two if
1299 * all transports have been tried at the current
1300 * timeout.
1244 */ 1301 */
1245 asoc->counters[SCTP_COUNTER_INIT_ERROR]++; 1302 asoc->init_err_counter++;
1246 asoc->timeouts[cmd->obj.to] *= 2; 1303
1247 if (asoc->timeouts[cmd->obj.to] > 1304 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] *= 2;
1305 if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] >
1248 asoc->max_init_timeo) { 1306 asoc->max_init_timeo) {
1249 asoc->timeouts[cmd->obj.to] = 1307 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] =
1250 asoc->max_init_timeo; 1308 asoc->max_init_timeo;
1251 } 1309 }
1310 SCTP_DEBUG_PRINTK(
1311 "T1 COOKIE Timeout adjustment"
1312 " init_err_counter: %d"
1313 " timeout: %d\n",
1314 asoc->init_err_counter,
1315 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE]);
1252 1316
1253 /* If we've sent any data bundled with 1317 /* If we've sent any data bundled with
1254 * COOKIE-ECHO we need to resend. 1318 * COOKIE-ECHO we need to resend.
@@ -1261,7 +1325,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1261 1325
1262 sctp_add_cmd_sf(commands, 1326 sctp_add_cmd_sf(commands,
1263 SCTP_CMD_TIMER_RESTART, 1327 SCTP_CMD_TIMER_RESTART,
1264 SCTP_TO(cmd->obj.to)); 1328 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
1265 break; 1329 break;
1266 1330
1267 case SCTP_CMD_INIT_FAILED: 1331 case SCTP_CMD_INIT_FAILED:
@@ -1273,12 +1337,13 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1273 subtype, chunk, cmd->obj.u32); 1337 subtype, chunk, cmd->obj.u32);
1274 break; 1338 break;
1275 1339
1276 case SCTP_CMD_COUNTER_INC: 1340 case SCTP_CMD_INIT_COUNTER_INC:
1277 asoc->counters[cmd->obj.counter]++; 1341 asoc->init_err_counter++;
1278 break; 1342 break;
1279 1343
1280 case SCTP_CMD_COUNTER_RESET: 1344 case SCTP_CMD_INIT_COUNTER_RESET:
1281 asoc->counters[cmd->obj.counter] = 0; 1345 asoc->init_err_counter = 0;
1346 asoc->init_cycle = 0;
1282 break; 1347 break;
1283 1348
1284 case SCTP_CMD_REPORT_DUP: 1349 case SCTP_CMD_REPORT_DUP:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8e01b8f09ac2..058189684c7c 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -533,6 +533,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
533 sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT, 533 sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT,
534 SCTP_PEER_INIT(initchunk)); 534 SCTP_PEER_INIT(initchunk));
535 535
536 /* Reset init error count upon receipt of INIT-ACK. */
537 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
538
536 /* 5.1 C) "A" shall stop the T1-init timer and leave 539 /* 5.1 C) "A" shall stop the T1-init timer and leave
537 * COOKIE-WAIT state. "A" shall then ... start the T1-cookie 540 * COOKIE-WAIT state. "A" shall then ... start the T1-cookie
538 * timer, and enter the COOKIE-ECHOED state. 541 * timer, and enter the COOKIE-ECHOED state.
@@ -775,8 +778,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
775 * from the COOKIE-ECHOED state to the COOKIE-WAIT 778 * from the COOKIE-ECHOED state to the COOKIE-WAIT
776 * state is performed. 779 * state is performed.
777 */ 780 */
778 sctp_add_cmd_sf(commands, SCTP_CMD_COUNTER_RESET, 781 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
779 SCTP_COUNTER(SCTP_COUNTER_INIT_ERROR));
780 782
781 /* RFC 2960 5.1 Normal Establishment of an Association 783 /* RFC 2960 5.1 Normal Establishment of an Association
782 * 784 *
@@ -1019,10 +1021,22 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
1019 link = sctp_assoc_lookup_paddr(asoc, &from_addr); 1021 link = sctp_assoc_lookup_paddr(asoc, &from_addr);
1020 1022
1021 /* This should never happen, but lets log it if so. */ 1023 /* This should never happen, but lets log it if so. */
1022 if (!link) { 1024 if (unlikely(!link)) {
1023 printk(KERN_WARNING 1025 if (from_addr.sa.sa_family == AF_INET6) {
1024 "%s: Could not find address %d.%d.%d.%d\n", 1026 printk(KERN_WARNING
1025 __FUNCTION__, NIPQUAD(from_addr.v4.sin_addr)); 1027 "%s association %p could not find address "
1028 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
1029 __FUNCTION__,
1030 asoc,
1031 NIP6(from_addr.v6.sin6_addr));
1032 } else {
1033 printk(KERN_WARNING
1034 "%s association %p could not find address "
1035 "%u.%u.%u.%u\n",
1036 __FUNCTION__,
1037 asoc,
1038 NIPQUAD(from_addr.v4.sin_addr.s_addr));
1039 }
1026 return SCTP_DISPOSITION_DISCARD; 1040 return SCTP_DISPOSITION_DISCARD;
1027 } 1041 }
1028 1042
@@ -2095,9 +2109,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
2095 sctp_errhdr_t *err; 2109 sctp_errhdr_t *err;
2096 struct sctp_chunk *reply; 2110 struct sctp_chunk *reply;
2097 struct sctp_bind_addr *bp; 2111 struct sctp_bind_addr *bp;
2098 int attempts; 2112 int attempts = asoc->init_err_counter + 1;
2099
2100 attempts = asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1;
2101 2113
2102 if (attempts >= asoc->max_init_attempts) { 2114 if (attempts >= asoc->max_init_attempts) {
2103 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, 2115 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
@@ -2157,8 +2169,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
2157 /* Cast away the const modifier, as we want to just 2169 /* Cast away the const modifier, as we want to just
2158 * rerun it through as a sideffect. 2170 * rerun it through as a sideffect.
2159 */ 2171 */
2160 sctp_add_cmd_sf(commands, SCTP_CMD_COUNTER_INC, 2172 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_INC, SCTP_NULL());
2161 SCTP_COUNTER(SCTP_COUNTER_INIT_ERROR));
2162 2173
2163 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 2174 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
2164 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); 2175 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
@@ -2281,8 +2292,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
2281 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) 2292 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
2282 error = ((sctp_errhdr_t *)chunk->skb->data)->cause; 2293 error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
2283 2294
2284 sctp_stop_t1_and_abort(commands, error); 2295 return sctp_stop_t1_and_abort(commands, error, asoc, chunk->transport);
2285 return SCTP_DISPOSITION_ABORT;
2286} 2296}
2287 2297
2288/* 2298/*
@@ -2294,8 +2304,8 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep
2294 void *arg, 2304 void *arg,
2295 sctp_cmd_seq_t *commands) 2305 sctp_cmd_seq_t *commands)
2296{ 2306{
2297 sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR); 2307 return sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR, asoc,
2298 return SCTP_DISPOSITION_ABORT; 2308 (struct sctp_transport *)arg);
2299} 2309}
2300 2310
2301/* 2311/*
@@ -2318,8 +2328,12 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
2318 * 2328 *
2319 * This is common code called by several sctp_sf_*_abort() functions above. 2329 * This is common code called by several sctp_sf_*_abort() functions above.
2320 */ 2330 */
2321void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error) 2331sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
2332 __u16 error,
2333 const struct sctp_association *asoc,
2334 struct sctp_transport *transport)
2322{ 2335{
2336 SCTP_DEBUG_PRINTK("ABORT received (INIT).\n");
2323 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, 2337 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
2324 SCTP_STATE(SCTP_STATE_CLOSED)); 2338 SCTP_STATE(SCTP_STATE_CLOSED));
2325 SCTP_INC_STATS(SCTP_MIB_ABORTEDS); 2339 SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
@@ -2328,6 +2342,7 @@ void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error)
2328 /* CMD_INIT_FAILED will DELETE_TCB. */ 2342 /* CMD_INIT_FAILED will DELETE_TCB. */
2329 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, 2343 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
2330 SCTP_U32(error)); 2344 SCTP_U32(error));
2345 return SCTP_DISPOSITION_ABORT;
2331} 2346}
2332 2347
2333/* 2348/*
@@ -3805,6 +3820,10 @@ sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep,
3805 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, 3820 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC,
3806 SCTP_ASOC((struct sctp_association *) asoc)); 3821 SCTP_ASOC((struct sctp_association *) asoc));
3807 3822
3823 /* Choose transport for INIT. */
3824 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT,
3825 SCTP_CHUNK(repl));
3826
3808 /* After sending the INIT, "A" starts the T1-init timer and 3827 /* After sending the INIT, "A" starts the T1-init timer and
3809 * enters the COOKIE-WAIT state. 3828 * enters the COOKIE-WAIT state.
3810 */ 3829 */
@@ -4589,7 +4608,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
4589} 4608}
4590 4609
4591/* 4610/*
4592 * sctp_sf_t1_timer_expire 4611 * sctp_sf_t1_init_timer_expire
4593 * 4612 *
4594 * Section: 4 Note: 2 4613 * Section: 4 Note: 2
4595 * Verification Tag: 4614 * Verification Tag:
@@ -4603,7 +4622,59 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
4603 * endpoint MUST abort the initialization process and report the 4622 * endpoint MUST abort the initialization process and report the
4604 * error to SCTP user. 4623 * error to SCTP user.
4605 * 4624 *
4606 * 3) If the T1-cookie timer expires, the endpoint MUST retransmit 4625 * Outputs
4626 * (timers, events)
4627 *
4628 */
4629sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
4630 const struct sctp_association *asoc,
4631 const sctp_subtype_t type,
4632 void *arg,
4633 sctp_cmd_seq_t *commands)
4634{
4635 struct sctp_chunk *repl = NULL;
4636 struct sctp_bind_addr *bp;
4637 int attempts = asoc->init_err_counter + 1;
4638
4639 SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
4640
4641 if (attempts < asoc->max_init_attempts) {
4642 bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
4643 repl = sctp_make_init(asoc, bp, GFP_ATOMIC, 0);
4644 if (!repl)
4645 return SCTP_DISPOSITION_NOMEM;
4646
4647 /* Choose transport for INIT. */
4648 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT,
4649 SCTP_CHUNK(repl));
4650
4651 /* Issue a sideeffect to do the needed accounting. */
4652 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_RESTART,
4653 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
4654
4655 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
4656 } else {
4657 SCTP_DEBUG_PRINTK("Giving up on INIT, attempts: %d"
4658 " max_init_attempts: %d\n",
4659 attempts, asoc->max_init_attempts);
4660 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
4661 SCTP_U32(SCTP_ERROR_NO_ERROR));
4662 return SCTP_DISPOSITION_DELETE_TCB;
4663 }
4664
4665 return SCTP_DISPOSITION_CONSUME;
4666}
4667
4668/*
4669 * sctp_sf_t1_cookie_timer_expire
4670 *
4671 * Section: 4 Note: 2
4672 * Verification Tag:
4673 * Inputs
4674 * (endpoint, asoc)
4675 *
4676 * RFC 2960 Section 4 Notes
4677 * 3) If the T1-cookie timer expires, the endpoint MUST retransmit
4607 * COOKIE ECHO and re-start the T1-cookie timer without changing 4678 * COOKIE ECHO and re-start the T1-cookie timer without changing
4608 * state. This MUST be repeated up to 'Max.Init.Retransmits' times. 4679 * state. This MUST be repeated up to 'Max.Init.Retransmits' times.
4609 * After that, the endpoint MUST abort the initialization process and 4680 * After that, the endpoint MUST abort the initialization process and
@@ -4613,46 +4684,26 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
4613 * (timers, events) 4684 * (timers, events)
4614 * 4685 *
4615 */ 4686 */
4616sctp_disposition_t sctp_sf_t1_timer_expire(const struct sctp_endpoint *ep, 4687sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep,
4617 const struct sctp_association *asoc, 4688 const struct sctp_association *asoc,
4618 const sctp_subtype_t type, 4689 const sctp_subtype_t type,
4619 void *arg, 4690 void *arg,
4620 sctp_cmd_seq_t *commands) 4691 sctp_cmd_seq_t *commands)
4621{ 4692{
4622 struct sctp_chunk *repl; 4693 struct sctp_chunk *repl = NULL;
4623 struct sctp_bind_addr *bp; 4694 int attempts = asoc->init_err_counter + 1;
4624 sctp_event_timeout_t timer = (sctp_event_timeout_t) arg;
4625 int timeout;
4626 int attempts;
4627
4628 timeout = asoc->timeouts[timer];
4629 attempts = asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1;
4630 repl = NULL;
4631 4695
4632 SCTP_DEBUG_PRINTK("Timer T1 expired.\n"); 4696 SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
4633 4697
4634 if (attempts < asoc->max_init_attempts) { 4698 if (attempts < asoc->max_init_attempts) {
4635 switch (timer) { 4699 repl = sctp_make_cookie_echo(asoc, NULL);
4636 case SCTP_EVENT_TIMEOUT_T1_INIT:
4637 bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
4638 repl = sctp_make_init(asoc, bp, GFP_ATOMIC, 0);
4639 break;
4640
4641 case SCTP_EVENT_TIMEOUT_T1_COOKIE:
4642 repl = sctp_make_cookie_echo(asoc, NULL);
4643 break;
4644
4645 default:
4646 BUG();
4647 break;
4648 };
4649
4650 if (!repl) 4700 if (!repl)
4651 goto nomem; 4701 return SCTP_DISPOSITION_NOMEM;
4652 4702
4653 /* Issue a sideeffect to do the needed accounting. */ 4703 /* Issue a sideeffect to do the needed accounting. */
4654 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_RESTART, 4704 sctp_add_cmd_sf(commands, SCTP_CMD_COOKIEECHO_RESTART,
4655 SCTP_TO(timer)); 4705 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
4706
4656 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); 4707 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
4657 } else { 4708 } else {
4658 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, 4709 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
@@ -4661,9 +4712,6 @@ sctp_disposition_t sctp_sf_t1_timer_expire(const struct sctp_endpoint *ep,
4661 } 4712 }
4662 4713
4663 return SCTP_DISPOSITION_CONSUME; 4714 return SCTP_DISPOSITION_CONSUME;
4664
4665nomem:
4666 return SCTP_DISPOSITION_NOMEM;
4667} 4715}
4668 4716
4669/* RFC2960 9.2 If the timer expires, the endpoint must re-send the SHUTDOWN 4717/* RFC2960 9.2 If the timer expires, the endpoint must re-send the SHUTDOWN
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 8967846f69e8..75ef10408764 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -783,7 +783,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
783 /* SCTP_STATE_COOKIE_WAIT */ \ 783 /* SCTP_STATE_COOKIE_WAIT */ \
784 {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ 784 {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
785 /* SCTP_STATE_COOKIE_ECHOED */ \ 785 /* SCTP_STATE_COOKIE_ECHOED */ \
786 {.fn = sctp_sf_t1_timer_expire, .name = "sctp_sf_t1_timer_expire"}, \ 786 {.fn = sctp_sf_t1_cookie_timer_expire, \
787 .name = "sctp_sf_t1_cookie_timer_expire"}, \
787 /* SCTP_STATE_ESTABLISHED */ \ 788 /* SCTP_STATE_ESTABLISHED */ \
788 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ 789 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
789 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 790 /* SCTP_STATE_SHUTDOWN_PENDING */ \
@@ -802,7 +803,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
802 /* SCTP_STATE_CLOSED */ \ 803 /* SCTP_STATE_CLOSED */ \
803 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ 804 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
804 /* SCTP_STATE_COOKIE_WAIT */ \ 805 /* SCTP_STATE_COOKIE_WAIT */ \
805 {.fn = sctp_sf_t1_timer_expire, .name = "sctp_sf_t1_timer_expire"}, \ 806 {.fn = sctp_sf_t1_init_timer_expire, \
807 .name = "sctp_sf_t1_init_timer_expire"}, \
806 /* SCTP_STATE_COOKIE_ECHOED */ \ 808 /* SCTP_STATE_COOKIE_ECHOED */ \
807 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ 809 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
808 /* SCTP_STATE_ESTABLISHED */ \ 810 /* SCTP_STATE_ESTABLISHED */ \
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0b338eca6dc0..aad55dc3792b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -262,18 +262,18 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
262 * sockaddr_in6 [RFC 2553]), 262 * sockaddr_in6 [RFC 2553]),
263 * addr_len - the size of the address structure. 263 * addr_len - the size of the address structure.
264 */ 264 */
265SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 265SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len)
266{ 266{
267 int retval = 0; 267 int retval = 0;
268 268
269 sctp_lock_sock(sk); 269 sctp_lock_sock(sk);
270 270
271 SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, uaddr: %p, addr_len: %d)\n", 271 SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, addr: %p, addr_len: %d)\n",
272 sk, uaddr, addr_len); 272 sk, addr, addr_len);
273 273
274 /* Disallow binding twice. */ 274 /* Disallow binding twice. */
275 if (!sctp_sk(sk)->ep->base.bind_addr.port) 275 if (!sctp_sk(sk)->ep->base.bind_addr.port)
276 retval = sctp_do_bind(sk, (union sctp_addr *)uaddr, 276 retval = sctp_do_bind(sk, (union sctp_addr *)addr,
277 addr_len); 277 addr_len);
278 else 278 else
279 retval = -EINVAL; 279 retval = -EINVAL;
@@ -318,23 +318,27 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
318 unsigned short snum; 318 unsigned short snum;
319 int ret = 0; 319 int ret = 0;
320 320
321 SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d)\n",
322 sk, addr, len);
323
324 /* Common sockaddr verification. */ 321 /* Common sockaddr verification. */
325 af = sctp_sockaddr_af(sp, addr, len); 322 af = sctp_sockaddr_af(sp, addr, len);
326 if (!af) 323 if (!af) {
324 SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d) EINVAL\n",
325 sk, addr, len);
327 return -EINVAL; 326 return -EINVAL;
327 }
328
329 snum = ntohs(addr->v4.sin_port);
330
331 SCTP_DEBUG_PRINTK_IPADDR("sctp_do_bind(sk: %p, new addr: ",
332 ", port: %d, new port: %d, len: %d)\n",
333 sk,
334 addr,
335 bp->port, snum,
336 len);
328 337
329 /* PF specific bind() address verification. */ 338 /* PF specific bind() address verification. */
330 if (!sp->pf->bind_verify(sp, addr)) 339 if (!sp->pf->bind_verify(sp, addr))
331 return -EADDRNOTAVAIL; 340 return -EADDRNOTAVAIL;
332 341
333 snum= ntohs(addr->v4.sin_port);
334
335 SCTP_DEBUG_PRINTK("sctp_do_bind: port: %d, new port: %d\n",
336 bp->port, snum);
337
338 /* We must either be unbound, or bind to the same port. */ 342 /* We must either be unbound, or bind to the same port. */
339 if (bp->port && (snum != bp->port)) { 343 if (bp->port && (snum != bp->port)) {
340 SCTP_DEBUG_PRINTK("sctp_do_bind:" 344 SCTP_DEBUG_PRINTK("sctp_do_bind:"
@@ -816,7 +820,8 @@ out:
816 * 820 *
817 * Basically do nothing but copying the addresses from user to kernel 821 * Basically do nothing but copying the addresses from user to kernel
818 * land and invoking either sctp_bindx_add() or sctp_bindx_rem() on the sk. 822 * land and invoking either sctp_bindx_add() or sctp_bindx_rem() on the sk.
819 * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() * from userspace. 823 * This is used for tunneling the sctp_bindx() request through sctp_setsockopt()
824 * from userspace.
820 * 825 *
821 * We don't use copy_from_user() for optimization: we first do the 826 * We don't use copy_from_user() for optimization: we first do the
822 * sanity checks (buffer size -fast- and access check-healthy 827 * sanity checks (buffer size -fast- and access check-healthy
@@ -913,6 +918,243 @@ out:
913 return err; 918 return err;
914} 919}
915 920
921/* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size)
922 *
923 * Common routine for handling connect() and sctp_connectx().
924 * Connect will come in with just a single address.
925 */
926static int __sctp_connect(struct sock* sk,
927 struct sockaddr *kaddrs,
928 int addrs_size)
929{
930 struct sctp_sock *sp;
931 struct sctp_endpoint *ep;
932 struct sctp_association *asoc = NULL;
933 struct sctp_association *asoc2;
934 struct sctp_transport *transport;
935 union sctp_addr to;
936 struct sctp_af *af;
937 sctp_scope_t scope;
938 long timeo;
939 int err = 0;
940 int addrcnt = 0;
941 int walk_size = 0;
942 struct sockaddr *sa_addr;
943 void *addr_buf;
944
945 sp = sctp_sk(sk);
946 ep = sp->ep;
947
948 /* connect() cannot be done on a socket that is already in ESTABLISHED
949 * state - UDP-style peeled off socket or a TCP-style socket that
950 * is already connected.
951 * It cannot be done even on a TCP-style listening socket.
952 */
953 if (sctp_sstate(sk, ESTABLISHED) ||
954 (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
955 err = -EISCONN;
956 goto out_free;
957 }
958
959 /* Walk through the addrs buffer and count the number of addresses. */
960 addr_buf = kaddrs;
961 while (walk_size < addrs_size) {
962 sa_addr = (struct sockaddr *)addr_buf;
963 af = sctp_get_af_specific(sa_addr->sa_family);
964
965 /* If the address family is not supported or if this address
966 * causes the address buffer to overflow return EINVAL.
967 */
968 if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
969 err = -EINVAL;
970 goto out_free;
971 }
972
973 err = sctp_verify_addr(sk, (union sctp_addr *)sa_addr,
974 af->sockaddr_len);
975 if (err)
976 goto out_free;
977
978 memcpy(&to, sa_addr, af->sockaddr_len);
979 to.v4.sin_port = ntohs(to.v4.sin_port);
980
981 /* Check if there already is a matching association on the
982 * endpoint (other than the one created here).
983 */
984 asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport);
985 if (asoc2 && asoc2 != asoc) {
986 if (asoc2->state >= SCTP_STATE_ESTABLISHED)
987 err = -EISCONN;
988 else
989 err = -EALREADY;
990 goto out_free;
991 }
992
993 /* If we could not find a matching association on the endpoint,
994 * make sure that there is no peeled-off association matching
995 * the peer address even on another socket.
996 */
997 if (sctp_endpoint_is_peeled_off(ep, &to)) {
998 err = -EADDRNOTAVAIL;
999 goto out_free;
1000 }
1001
1002 if (!asoc) {
1003 /* If a bind() or sctp_bindx() is not called prior to
1004 * an sctp_connectx() call, the system picks an
1005 * ephemeral port and will choose an address set
1006 * equivalent to binding with a wildcard address.
1007 */
1008 if (!ep->base.bind_addr.port) {
1009 if (sctp_autobind(sk)) {
1010 err = -EAGAIN;
1011 goto out_free;
1012 }
1013 }
1014
1015 scope = sctp_scope(&to);
1016 asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
1017 if (!asoc) {
1018 err = -ENOMEM;
1019 goto out_free;
1020 }
1021 }
1022
1023 /* Prime the peer's transport structures. */
1024 transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL,
1025 SCTP_UNKNOWN);
1026 if (!transport) {
1027 err = -ENOMEM;
1028 goto out_free;
1029 }
1030
1031 addrcnt++;
1032 addr_buf += af->sockaddr_len;
1033 walk_size += af->sockaddr_len;
1034 }
1035
1036 err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL);
1037 if (err < 0) {
1038 goto out_free;
1039 }
1040
1041 err = sctp_primitive_ASSOCIATE(asoc, NULL);
1042 if (err < 0) {
1043 goto out_free;
1044 }
1045
1046 /* Initialize sk's dport and daddr for getpeername() */
1047 inet_sk(sk)->dport = htons(asoc->peer.port);
1048 af = sctp_get_af_specific(to.sa.sa_family);
1049 af->to_sk_daddr(&to, sk);
1050
1051 timeo = sock_sndtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK);
1052 err = sctp_wait_for_connect(asoc, &timeo);
1053
1054 /* Don't free association on exit. */
1055 asoc = NULL;
1056
1057out_free:
1058
1059 SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p"
1060 " kaddrs: %p err: %d\n",
1061 asoc, kaddrs, err);
1062 if (asoc)
1063 sctp_association_free(asoc);
1064 return err;
1065}
1066
1067/* Helper for tunneling sctp_connectx() requests through sctp_setsockopt()
1068 *
1069 * API 8.9
1070 * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt);
1071 *
1072 * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses.
1073 * If the sd is an IPv6 socket, the addresses passed can either be IPv4
1074 * or IPv6 addresses.
1075 *
1076 * A single address may be specified as INADDR_ANY or IN6ADDR_ANY, see
1077 * Section 3.1.2 for this usage.
1078 *
1079 * addrs is a pointer to an array of one or more socket addresses. Each
1080 * address is contained in its appropriate structure (i.e. struct
1081 * sockaddr_in or struct sockaddr_in6) the family of the address type
1082 * must be used to distengish the address length (note that this
1083 * representation is termed a "packed array" of addresses). The caller
1084 * specifies the number of addresses in the array with addrcnt.
1085 *
1086 * On success, sctp_connectx() returns 0. On failure, sctp_connectx() returns
1087 * -1, and sets errno to the appropriate error code.
1088 *
1089 * For SCTP, the port given in each socket address must be the same, or
1090 * sctp_connectx() will fail, setting errno to EINVAL.
1091 *
1092 * An application can use sctp_connectx to initiate an association with
1093 * an endpoint that is multi-homed. Much like sctp_bindx() this call
1094 * allows a caller to specify multiple addresses at which a peer can be
1095 * reached. The way the SCTP stack uses the list of addresses to set up
1096 * the association is implementation dependant. This function only
1097 * specifies that the stack will try to make use of all the addresses in
1098 * the list when needed.
1099 *
1100 * Note that the list of addresses passed in is only used for setting up
1101 * the association. It does not necessarily equal the set of addresses
1102 * the peer uses for the resulting association. If the caller wants to
1103 * find out the set of peer addresses, it must use sctp_getpaddrs() to
1104 * retrieve them after the association has been set up.
1105 *
1106 * Basically do nothing but copying the addresses from user to kernel
1107 * land and invoking either sctp_connectx(). This is used for tunneling
1108 * the sctp_connectx() request through sctp_setsockopt() from userspace.
1109 *
1110 * We don't use copy_from_user() for optimization: we first do the
1111 * sanity checks (buffer size -fast- and access check-healthy
1112 * pointer); if all of those succeed, then we can alloc the memory
1113 * (expensive operation) needed to copy the data to kernel. Then we do
1114 * the copying without checking the user space area
1115 * (__copy_from_user()).
1116 *
1117 * On exit there is no need to do sockfd_put(), sys_setsockopt() does
1118 * it.
1119 *
1120 * sk The sk of the socket
1121 * addrs The pointer to the addresses in user land
1122 * addrssize Size of the addrs buffer
1123 *
1124 * Returns 0 if ok, <0 errno code on error.
1125 */
1126SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
1127 struct sockaddr __user *addrs,
1128 int addrs_size)
1129{
1130 int err = 0;
1131 struct sockaddr *kaddrs;
1132
1133 SCTP_DEBUG_PRINTK("%s - sk %p addrs %p addrs_size %d\n",
1134 __FUNCTION__, sk, addrs, addrs_size);
1135
1136 if (unlikely(addrs_size <= 0))
1137 return -EINVAL;
1138
1139 /* Check the user passed a healthy pointer. */
1140 if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size)))
1141 return -EFAULT;
1142
1143 /* Alloc space for the address array in kernel memory. */
1144 kaddrs = (struct sockaddr *)kmalloc(addrs_size, GFP_KERNEL);
1145 if (unlikely(!kaddrs))
1146 return -ENOMEM;
1147
1148 if (__copy_from_user(kaddrs, addrs, addrs_size)) {
1149 err = -EFAULT;
1150 } else {
1151 err = __sctp_connect(sk, kaddrs, addrs_size);
1152 }
1153
1154 kfree(kaddrs);
1155 return err;
1156}
1157
916/* API 3.1.4 close() - UDP Style Syntax 1158/* API 3.1.4 close() - UDP Style Syntax
917 * Applications use close() to perform graceful shutdown (as described in 1159 * Applications use close() to perform graceful shutdown (as described in
918 * Section 10.1 of [SCTP]) on ALL the associations currently represented 1160 * Section 10.1 of [SCTP]) on ALL the associations currently represented
@@ -1095,7 +1337,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1095 sp = sctp_sk(sk); 1337 sp = sctp_sk(sk);
1096 ep = sp->ep; 1338 ep = sp->ep;
1097 1339
1098 SCTP_DEBUG_PRINTK("Using endpoint: %s.\n", ep->debug_name); 1340 SCTP_DEBUG_PRINTK("Using endpoint: %p.\n", ep);
1099 1341
1100 /* We cannot send a message over a TCP-style listening socket. */ 1342 /* We cannot send a message over a TCP-style listening socket. */
1101 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) { 1343 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
@@ -1306,7 +1548,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1306 } 1548 }
1307 1549
1308 /* Prime the peer's transport structures. */ 1550 /* Prime the peer's transport structures. */
1309 transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL); 1551 transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
1310 if (!transport) { 1552 if (!transport) {
1311 err = -ENOMEM; 1553 err = -ENOMEM;
1312 goto out_free; 1554 goto out_free;
@@ -2208,6 +2450,12 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
2208 optlen, SCTP_BINDX_REM_ADDR); 2450 optlen, SCTP_BINDX_REM_ADDR);
2209 break; 2451 break;
2210 2452
2453 case SCTP_SOCKOPT_CONNECTX:
2454 /* 'optlen' is the size of the addresses buffer. */
2455 retval = sctp_setsockopt_connectx(sk, (struct sockaddr __user *)optval,
2456 optlen);
2457 break;
2458
2211 case SCTP_DISABLE_FRAGMENTS: 2459 case SCTP_DISABLE_FRAGMENTS:
2212 retval = sctp_setsockopt_disable_fragments(sk, optval, optlen); 2460 retval = sctp_setsockopt_disable_fragments(sk, optval, optlen);
2213 break; 2461 break;
@@ -2283,112 +2531,29 @@ out_nounlock:
2283 * 2531 *
2284 * len: the size of the address. 2532 * len: the size of the address.
2285 */ 2533 */
2286SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *uaddr, 2534SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr,
2287 int addr_len) 2535 int addr_len)
2288{ 2536{
2289 struct sctp_sock *sp;
2290 struct sctp_endpoint *ep;
2291 struct sctp_association *asoc;
2292 struct sctp_transport *transport;
2293 union sctp_addr to;
2294 struct sctp_af *af;
2295 sctp_scope_t scope;
2296 long timeo;
2297 int err = 0; 2537 int err = 0;
2538 struct sctp_af *af;
2298 2539
2299 sctp_lock_sock(sk); 2540 sctp_lock_sock(sk);
2300 2541
2301 SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d)\n", 2542 SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d\n",
2302 __FUNCTION__, sk, uaddr, addr_len); 2543 __FUNCTION__, sk, addr, addr_len);
2303
2304 sp = sctp_sk(sk);
2305 ep = sp->ep;
2306
2307 /* connect() cannot be done on a socket that is already in ESTABLISHED
2308 * state - UDP-style peeled off socket or a TCP-style socket that
2309 * is already connected.
2310 * It cannot be done even on a TCP-style listening socket.
2311 */
2312 if (sctp_sstate(sk, ESTABLISHED) ||
2313 (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
2314 err = -EISCONN;
2315 goto out_unlock;
2316 }
2317
2318 err = sctp_verify_addr(sk, (union sctp_addr *)uaddr, addr_len);
2319 if (err)
2320 goto out_unlock;
2321 2544
2322 if (addr_len > sizeof(to)) 2545 /* Validate addr_len before calling common connect/connectx routine. */
2323 addr_len = sizeof(to); 2546 af = sctp_get_af_specific(addr->sa_family);
2324 memcpy(&to, uaddr, addr_len); 2547 if (!af || addr_len < af->sockaddr_len) {
2325 to.v4.sin_port = ntohs(to.v4.sin_port); 2548 err = -EINVAL;
2326 2549 } else {
2327 asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport); 2550 /* Pass correct addr len to common routine (so it knows there
2328 if (asoc) { 2551 * is only one address being passed.
2329 if (asoc->state >= SCTP_STATE_ESTABLISHED) 2552 */
2330 err = -EISCONN; 2553 err = __sctp_connect(sk, addr, af->sockaddr_len);
2331 else
2332 err = -EALREADY;
2333 goto out_unlock;
2334 }
2335
2336 /* If we could not find a matching association on the endpoint,
2337 * make sure that there is no peeled-off association matching the
2338 * peer address even on another socket.
2339 */
2340 if (sctp_endpoint_is_peeled_off(ep, &to)) {
2341 err = -EADDRNOTAVAIL;
2342 goto out_unlock;
2343 }
2344
2345 /* If a bind() or sctp_bindx() is not called prior to a connect()
2346 * call, the system picks an ephemeral port and will choose an address
2347 * set equivalent to binding with a wildcard address.
2348 */
2349 if (!ep->base.bind_addr.port) {
2350 if (sctp_autobind(sk)) {
2351 err = -EAGAIN;
2352 goto out_unlock;
2353 }
2354 }
2355
2356 scope = sctp_scope(&to);
2357 asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
2358 if (!asoc) {
2359 err = -ENOMEM;
2360 goto out_unlock;
2361 }
2362
2363 /* Prime the peer's transport structures. */
2364 transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL);
2365 if (!transport) {
2366 sctp_association_free(asoc);
2367 goto out_unlock;
2368 }
2369 err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL);
2370 if (err < 0) {
2371 sctp_association_free(asoc);
2372 goto out_unlock;
2373 }
2374
2375 err = sctp_primitive_ASSOCIATE(asoc, NULL);
2376 if (err < 0) {
2377 sctp_association_free(asoc);
2378 goto out_unlock;
2379 } 2554 }
2380 2555
2381 /* Initialize sk's dport and daddr for getpeername() */
2382 inet_sk(sk)->dport = htons(asoc->peer.port);
2383 af = sctp_get_af_specific(to.sa.sa_family);
2384 af->to_sk_daddr(&to, sk);
2385
2386 timeo = sock_sndtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK);
2387 err = sctp_wait_for_connect(asoc, &timeo);
2388
2389out_unlock:
2390 sctp_release_sock(sk); 2556 sctp_release_sock(sk);
2391
2392 return err; 2557 return err;
2393} 2558}
2394 2559
@@ -2677,12 +2842,15 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
2677 /* Map ipv4 address into v4-mapped-on-v6 address. */ 2842 /* Map ipv4 address into v4-mapped-on-v6 address. */
2678 sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), 2843 sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
2679 (union sctp_addr *)&status.sstat_primary.spinfo_address); 2844 (union sctp_addr *)&status.sstat_primary.spinfo_address);
2680 status.sstat_primary.spinfo_state = transport->active; 2845 status.sstat_primary.spinfo_state = transport->state;
2681 status.sstat_primary.spinfo_cwnd = transport->cwnd; 2846 status.sstat_primary.spinfo_cwnd = transport->cwnd;
2682 status.sstat_primary.spinfo_srtt = transport->srtt; 2847 status.sstat_primary.spinfo_srtt = transport->srtt;
2683 status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto); 2848 status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto);
2684 status.sstat_primary.spinfo_mtu = transport->pmtu; 2849 status.sstat_primary.spinfo_mtu = transport->pmtu;
2685 2850
2851 if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN)
2852 status.sstat_primary.spinfo_state = SCTP_ACTIVE;
2853
2686 if (put_user(len, optlen)) { 2854 if (put_user(len, optlen)) {
2687 retval = -EFAULT; 2855 retval = -EFAULT;
2688 goto out; 2856 goto out;
@@ -2733,12 +2901,15 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
2733 return -EINVAL; 2901 return -EINVAL;
2734 2902
2735 pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc); 2903 pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
2736 pinfo.spinfo_state = transport->active; 2904 pinfo.spinfo_state = transport->state;
2737 pinfo.spinfo_cwnd = transport->cwnd; 2905 pinfo.spinfo_cwnd = transport->cwnd;
2738 pinfo.spinfo_srtt = transport->srtt; 2906 pinfo.spinfo_srtt = transport->srtt;
2739 pinfo.spinfo_rto = jiffies_to_msecs(transport->rto); 2907 pinfo.spinfo_rto = jiffies_to_msecs(transport->rto);
2740 pinfo.spinfo_mtu = transport->pmtu; 2908 pinfo.spinfo_mtu = transport->pmtu;
2741 2909
2910 if (pinfo.spinfo_state == SCTP_UNKNOWN)
2911 pinfo.spinfo_state = SCTP_ACTIVE;
2912
2742 if (put_user(len, optlen)) { 2913 if (put_user(len, optlen)) {
2743 retval = -EFAULT; 2914 retval = -EFAULT;
2744 goto out; 2915 goto out;
@@ -3591,7 +3762,8 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
3591 int retval = 0; 3762 int retval = 0;
3592 int len; 3763 int len;
3593 3764
3594 SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p, ...)\n", sk); 3765 SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p... optname: %d)\n",
3766 sk, optname);
3595 3767
3596 /* I can hardly begin to describe how wrong this is. This is 3768 /* I can hardly begin to describe how wrong this is. This is
3597 * so broken as to be worse than useless. The API draft 3769 * so broken as to be worse than useless. The API draft
@@ -4368,15 +4540,11 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
4368 * However, this function was corrent in any case. 8) 4540 * However, this function was corrent in any case. 8)
4369 */ 4541 */
4370 if (flags & MSG_PEEK) { 4542 if (flags & MSG_PEEK) {
4371 unsigned long cpu_flags; 4543 spin_lock_bh(&sk->sk_receive_queue.lock);
4372
4373 sctp_spin_lock_irqsave(&sk->sk_receive_queue.lock,
4374 cpu_flags);
4375 skb = skb_peek(&sk->sk_receive_queue); 4544 skb = skb_peek(&sk->sk_receive_queue);
4376 if (skb) 4545 if (skb)
4377 atomic_inc(&skb->users); 4546 atomic_inc(&skb->users);
4378 sctp_spin_unlock_irqrestore(&sk->sk_receive_queue.lock, 4547 spin_unlock_bh(&sk->sk_receive_queue.lock);
4379 cpu_flags);
4380 } else { 4548 } else {
4381 skb = skb_dequeue(&sk->sk_receive_queue); 4549 skb = skb_dequeue(&sk->sk_receive_queue);
4382 } 4550 }
@@ -4600,8 +4768,7 @@ out:
4600 return err; 4768 return err;
4601 4769
4602do_error: 4770do_error:
4603 if (asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1 >= 4771 if (asoc->init_err_counter + 1 >= asoc->max_init_attempts)
4604 asoc->max_init_attempts)
4605 err = -ETIMEDOUT; 4772 err = -ETIMEDOUT;
4606 else 4773 else
4607 err = -ECONNREFUSED; 4774 err = -ECONNREFUSED;
@@ -4686,6 +4853,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
4686 struct sctp_endpoint *newep = newsp->ep; 4853 struct sctp_endpoint *newep = newsp->ep;
4687 struct sk_buff *skb, *tmp; 4854 struct sk_buff *skb, *tmp;
4688 struct sctp_ulpevent *event; 4855 struct sctp_ulpevent *event;
4856 int flags = 0;
4689 4857
4690 /* Migrate socket buffer sizes and all the socket level options to the 4858 /* Migrate socket buffer sizes and all the socket level options to the
4691 * new socket. 4859 * new socket.
@@ -4707,6 +4875,17 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
4707 sctp_sk(newsk)->bind_hash = pp; 4875 sctp_sk(newsk)->bind_hash = pp;
4708 inet_sk(newsk)->num = inet_sk(oldsk)->num; 4876 inet_sk(newsk)->num = inet_sk(oldsk)->num;
4709 4877
4878 /* Copy the bind_addr list from the original endpoint to the new
4879 * endpoint so that we can handle restarts properly
4880 */
4881 if (assoc->peer.ipv4_address)
4882 flags |= SCTP_ADDR4_PEERSUPP;
4883 if (assoc->peer.ipv6_address)
4884 flags |= SCTP_ADDR6_PEERSUPP;
4885 sctp_bind_addr_copy(&newsp->ep->base.bind_addr,
4886 &oldsp->ep->base.bind_addr,
4887 SCTP_SCOPE_GLOBAL, GFP_KERNEL, flags);
4888
4710 /* Move any messages in the old socket's receive queue that are for the 4889 /* Move any messages in the old socket's receive queue that are for the
4711 * peeled off association to the new socket's receive queue. 4890 * peeled off association to the new socket's receive queue.
4712 */ 4891 */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f30882e1e96a..0ec0fde6e6c5 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -83,7 +83,9 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
83 peer->last_time_used = jiffies; 83 peer->last_time_used = jiffies;
84 peer->last_time_ecne_reduced = jiffies; 84 peer->last_time_ecne_reduced = jiffies;
85 85
86 peer->active = SCTP_ACTIVE; 86 peer->init_sent_count = 0;
87
88 peer->state = SCTP_ACTIVE;
87 peer->hb_allowed = 0; 89 peer->hb_allowed = 0;
88 90
89 /* Initialize the default path max_retrans. */ 91 /* Initialize the default path max_retrans. */
diff --git a/net/socket.c b/net/socket.c
index cec0cb38b9ce..38729af09461 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -81,6 +81,7 @@
81#include <linux/syscalls.h> 81#include <linux/syscalls.h>
82#include <linux/compat.h> 82#include <linux/compat.h>
83#include <linux/kmod.h> 83#include <linux/kmod.h>
84#include <linux/audit.h>
84 85
85#ifdef CONFIG_NET_RADIO 86#ifdef CONFIG_NET_RADIO
86#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ 87#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
@@ -226,7 +227,7 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
226 return 0; 227 return 0;
227 if(copy_from_user(kaddr,uaddr,ulen)) 228 if(copy_from_user(kaddr,uaddr,ulen))
228 return -EFAULT; 229 return -EFAULT;
229 return 0; 230 return audit_sockaddr(ulen, kaddr);
230} 231}
231 232
232/** 233/**
@@ -1906,7 +1907,11 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1906 /* copy_from_user should be SMP safe. */ 1907 /* copy_from_user should be SMP safe. */
1907 if (copy_from_user(a, args, nargs[call])) 1908 if (copy_from_user(a, args, nargs[call]))
1908 return -EFAULT; 1909 return -EFAULT;
1909 1910
1911 err = audit_socketcall(nargs[call]/sizeof(unsigned long), a);
1912 if (err)
1913 return err;
1914
1910 a0=a[0]; 1915 a0=a[0];
1911 a1=a[1]; 1916 a1=a[1];
1912 1917
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d07f5ce31824..d65ed8684fc1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -118,7 +118,6 @@ retry:
118 xfrm_policy_put_afinfo(afinfo); 118 xfrm_policy_put_afinfo(afinfo);
119 return type; 119 return type;
120} 120}
121EXPORT_SYMBOL(xfrm_get_type);
122 121
123int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 122int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl,
124 unsigned short family) 123 unsigned short family)
@@ -216,8 +215,8 @@ out:
216 215
217expired: 216expired:
218 read_unlock(&xp->lock); 217 read_unlock(&xp->lock);
219 km_policy_expired(xp, dir, 1); 218 if (!xfrm_policy_delete(xp, dir))
220 xfrm_policy_delete(xp, dir); 219 km_policy_expired(xp, dir, 1);
221 xfrm_pol_put(xp); 220 xfrm_pol_put(xp);
222} 221}
223 222
@@ -555,7 +554,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
555 return NULL; 554 return NULL;
556} 555}
557 556
558void xfrm_policy_delete(struct xfrm_policy *pol, int dir) 557int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
559{ 558{
560 write_lock_bh(&xfrm_policy_lock); 559 write_lock_bh(&xfrm_policy_lock);
561 pol = __xfrm_policy_unlink(pol, dir); 560 pol = __xfrm_policy_unlink(pol, dir);
@@ -564,7 +563,9 @@ void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
564 if (dir < XFRM_POLICY_MAX) 563 if (dir < XFRM_POLICY_MAX)
565 atomic_inc(&flow_cache_genid); 564 atomic_inc(&flow_cache_genid);
566 xfrm_policy_kill(pol); 565 xfrm_policy_kill(pol);
566 return 0;
567 } 567 }
568 return -ENOENT;
568} 569}
569 570
570int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 571int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d11747c2a763..9d206c282cf1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -50,7 +50,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
50 50
51static int xfrm_state_gc_flush_bundles; 51static int xfrm_state_gc_flush_bundles;
52 52
53static void __xfrm_state_delete(struct xfrm_state *x); 53static int __xfrm_state_delete(struct xfrm_state *x);
54 54
55static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); 55static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
56static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 56static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -154,6 +154,7 @@ static void xfrm_timer_handler(unsigned long data)
154 next = tmo; 154 next = tmo;
155 } 155 }
156 156
157 x->km.dying = warn;
157 if (warn) 158 if (warn)
158 km_state_expired(x, 0); 159 km_state_expired(x, 0);
159resched: 160resched:
@@ -169,9 +170,8 @@ expired:
169 next = 2; 170 next = 2;
170 goto resched; 171 goto resched;
171 } 172 }
172 if (x->id.spi != 0) 173 if (!__xfrm_state_delete(x) && x->id.spi)
173 km_state_expired(x, 1); 174 km_state_expired(x, 1);
174 __xfrm_state_delete(x);
175 175
176out: 176out:
177 spin_unlock(&x->lock); 177 spin_unlock(&x->lock);
@@ -215,8 +215,10 @@ void __xfrm_state_destroy(struct xfrm_state *x)
215} 215}
216EXPORT_SYMBOL(__xfrm_state_destroy); 216EXPORT_SYMBOL(__xfrm_state_destroy);
217 217
218static void __xfrm_state_delete(struct xfrm_state *x) 218static int __xfrm_state_delete(struct xfrm_state *x)
219{ 219{
220 int err = -ESRCH;
221
220 if (x->km.state != XFRM_STATE_DEAD) { 222 if (x->km.state != XFRM_STATE_DEAD) {
221 x->km.state = XFRM_STATE_DEAD; 223 x->km.state = XFRM_STATE_DEAD;
222 spin_lock(&xfrm_state_lock); 224 spin_lock(&xfrm_state_lock);
@@ -245,14 +247,21 @@ static void __xfrm_state_delete(struct xfrm_state *x)
245 * is what we are dropping here. 247 * is what we are dropping here.
246 */ 248 */
247 atomic_dec(&x->refcnt); 249 atomic_dec(&x->refcnt);
250 err = 0;
248 } 251 }
252
253 return err;
249} 254}
250 255
251void xfrm_state_delete(struct xfrm_state *x) 256int xfrm_state_delete(struct xfrm_state *x)
252{ 257{
258 int err;
259
253 spin_lock_bh(&x->lock); 260 spin_lock_bh(&x->lock);
254 __xfrm_state_delete(x); 261 err = __xfrm_state_delete(x);
255 spin_unlock_bh(&x->lock); 262 spin_unlock_bh(&x->lock);
263
264 return err;
256} 265}
257EXPORT_SYMBOL(xfrm_state_delete); 266EXPORT_SYMBOL(xfrm_state_delete);
258 267
@@ -557,16 +566,18 @@ int xfrm_state_check_expire(struct xfrm_state *x)
557 566
558 if (x->curlft.bytes >= x->lft.hard_byte_limit || 567 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
559 x->curlft.packets >= x->lft.hard_packet_limit) { 568 x->curlft.packets >= x->lft.hard_packet_limit) {
560 km_state_expired(x, 1); 569 x->km.state = XFRM_STATE_EXPIRED;
561 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ)) 570 if (!mod_timer(&x->timer, jiffies))
562 xfrm_state_hold(x); 571 xfrm_state_hold(x);
563 return -EINVAL; 572 return -EINVAL;
564 } 573 }
565 574
566 if (!x->km.dying && 575 if (!x->km.dying &&
567 (x->curlft.bytes >= x->lft.soft_byte_limit || 576 (x->curlft.bytes >= x->lft.soft_byte_limit ||
568 x->curlft.packets >= x->lft.soft_packet_limit)) 577 x->curlft.packets >= x->lft.soft_packet_limit)) {
578 x->km.dying = 1;
569 km_state_expired(x, 0); 579 km_state_expired(x, 0);
580 }
570 return 0; 581 return 0;
571} 582}
572EXPORT_SYMBOL(xfrm_state_check_expire); 583EXPORT_SYMBOL(xfrm_state_check_expire);
@@ -796,34 +807,56 @@ EXPORT_SYMBOL(xfrm_replay_advance);
796static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); 807static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
797static DEFINE_RWLOCK(xfrm_km_lock); 808static DEFINE_RWLOCK(xfrm_km_lock);
798 809
799static void km_state_expired(struct xfrm_state *x, int hard) 810void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
800{ 811{
801 struct xfrm_mgr *km; 812 struct xfrm_mgr *km;
802 813
803 if (hard) 814 read_lock(&xfrm_km_lock);
804 x->km.state = XFRM_STATE_EXPIRED; 815 list_for_each_entry(km, &xfrm_km_list, list)
805 else 816 if (km->notify_policy)
806 x->km.dying = 1; 817 km->notify_policy(xp, dir, c);
818 read_unlock(&xfrm_km_lock);
819}
807 820
821void km_state_notify(struct xfrm_state *x, struct km_event *c)
822{
823 struct xfrm_mgr *km;
808 read_lock(&xfrm_km_lock); 824 read_lock(&xfrm_km_lock);
809 list_for_each_entry(km, &xfrm_km_list, list) 825 list_for_each_entry(km, &xfrm_km_list, list)
810 km->notify(x, hard); 826 if (km->notify)
827 km->notify(x, c);
811 read_unlock(&xfrm_km_lock); 828 read_unlock(&xfrm_km_lock);
829}
830
831EXPORT_SYMBOL(km_policy_notify);
832EXPORT_SYMBOL(km_state_notify);
833
834static void km_state_expired(struct xfrm_state *x, int hard)
835{
836 struct km_event c;
837
838 c.data.hard = hard;
839 c.event = XFRM_MSG_EXPIRE;
840 km_state_notify(x, &c);
812 841
813 if (hard) 842 if (hard)
814 wake_up(&km_waitq); 843 wake_up(&km_waitq);
815} 844}
816 845
846/*
847 * We send to all registered managers regardless of failure
848 * We are happy with one success
849*/
817static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) 850static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
818{ 851{
819 int err = -EINVAL; 852 int err = -EINVAL, acqret;
820 struct xfrm_mgr *km; 853 struct xfrm_mgr *km;
821 854
822 read_lock(&xfrm_km_lock); 855 read_lock(&xfrm_km_lock);
823 list_for_each_entry(km, &xfrm_km_list, list) { 856 list_for_each_entry(km, &xfrm_km_list, list) {
824 err = km->acquire(x, t, pol, XFRM_POLICY_OUT); 857 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
825 if (!err) 858 if (!acqret)
826 break; 859 err = acqret;
827 } 860 }
828 read_unlock(&xfrm_km_lock); 861 read_unlock(&xfrm_km_lock);
829 return err; 862 return err;
@@ -848,13 +881,11 @@ EXPORT_SYMBOL(km_new_mapping);
848 881
849void km_policy_expired(struct xfrm_policy *pol, int dir, int hard) 882void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
850{ 883{
851 struct xfrm_mgr *km; 884 struct km_event c;
852 885
853 read_lock(&xfrm_km_lock); 886 c.data.hard = hard;
854 list_for_each_entry(km, &xfrm_km_list, list) 887 c.event = XFRM_MSG_POLEXPIRE;
855 if (km->notify_policy) 888 km_policy_notify(pol, dir, &c);
856 km->notify_policy(pol, dir, hard);
857 read_unlock(&xfrm_km_lock);
858 889
859 if (hard) 890 if (hard)
860 wake_up(&km_waitq); 891 wake_up(&km_waitq);
@@ -1024,6 +1055,43 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1024} 1055}
1025 1056
1026EXPORT_SYMBOL(xfrm_state_mtu); 1057EXPORT_SYMBOL(xfrm_state_mtu);
1058
1059int xfrm_init_state(struct xfrm_state *x)
1060{
1061 struct xfrm_state_afinfo *afinfo;
1062 int family = x->props.family;
1063 int err;
1064
1065 err = -EAFNOSUPPORT;
1066 afinfo = xfrm_state_get_afinfo(family);
1067 if (!afinfo)
1068 goto error;
1069
1070 err = 0;
1071 if (afinfo->init_flags)
1072 err = afinfo->init_flags(x);
1073
1074 xfrm_state_put_afinfo(afinfo);
1075
1076 if (err)
1077 goto error;
1078
1079 err = -EPROTONOSUPPORT;
1080 x->type = xfrm_get_type(x->id.proto, family);
1081 if (x->type == NULL)
1082 goto error;
1083
1084 err = x->type->init_state(x);
1085 if (err)
1086 goto error;
1087
1088 x->km.state = XFRM_STATE_VALID;
1089
1090error:
1091 return err;
1092}
1093
1094EXPORT_SYMBOL(xfrm_init_state);
1027 1095
1028void __init xfrm_state_init(void) 1096void __init xfrm_state_init(void)
1029{ 1097{
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 97509011c274..ecade4893a13 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -249,17 +249,10 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
249 if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) 249 if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
250 goto error; 250 goto error;
251 251
252 err = -ENOENT; 252 err = xfrm_init_state(x);
253 x->type = xfrm_get_type(x->id.proto, x->props.family);
254 if (x->type == NULL)
255 goto error;
256
257 err = x->type->init_state(x, NULL);
258 if (err) 253 if (err)
259 goto error; 254 goto error;
260 255
261 x->curlft.add_time = (unsigned long) xtime.tv_sec;
262 x->km.state = XFRM_STATE_VALID;
263 x->km.seq = p->seq; 256 x->km.seq = p->seq;
264 257
265 return x; 258 return x;
@@ -277,6 +270,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
277 struct xfrm_usersa_info *p = NLMSG_DATA(nlh); 270 struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
278 struct xfrm_state *x; 271 struct xfrm_state *x;
279 int err; 272 int err;
273 struct km_event c;
280 274
281 err = verify_newsa_info(p, (struct rtattr **) xfrma); 275 err = verify_newsa_info(p, (struct rtattr **) xfrma);
282 if (err) 276 if (err)
@@ -286,6 +280,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
286 if (!x) 280 if (!x)
287 return err; 281 return err;
288 282
283 xfrm_state_hold(x);
289 if (nlh->nlmsg_type == XFRM_MSG_NEWSA) 284 if (nlh->nlmsg_type == XFRM_MSG_NEWSA)
290 err = xfrm_state_add(x); 285 err = xfrm_state_add(x);
291 else 286 else
@@ -294,14 +289,24 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
294 if (err < 0) { 289 if (err < 0) {
295 x->km.state = XFRM_STATE_DEAD; 290 x->km.state = XFRM_STATE_DEAD;
296 xfrm_state_put(x); 291 xfrm_state_put(x);
292 goto out;
297 } 293 }
298 294
295 c.seq = nlh->nlmsg_seq;
296 c.pid = nlh->nlmsg_pid;
297 c.event = nlh->nlmsg_type;
298
299 km_state_notify(x, &c);
300out:
301 xfrm_state_put(x);
299 return err; 302 return err;
300} 303}
301 304
302static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 305static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
303{ 306{
304 struct xfrm_state *x; 307 struct xfrm_state *x;
308 int err;
309 struct km_event c;
305 struct xfrm_usersa_id *p = NLMSG_DATA(nlh); 310 struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
306 311
307 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); 312 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
@@ -313,10 +318,19 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
313 return -EPERM; 318 return -EPERM;
314 } 319 }
315 320
316 xfrm_state_delete(x); 321 err = xfrm_state_delete(x);
322 if (err < 0) {
323 xfrm_state_put(x);
324 return err;
325 }
326
327 c.seq = nlh->nlmsg_seq;
328 c.pid = nlh->nlmsg_pid;
329 c.event = nlh->nlmsg_type;
330 km_state_notify(x, &c);
317 xfrm_state_put(x); 331 xfrm_state_put(x);
318 332
319 return 0; 333 return err;
320} 334}
321 335
322static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) 336static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
@@ -681,6 +695,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
681{ 695{
682 struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); 696 struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
683 struct xfrm_policy *xp; 697 struct xfrm_policy *xp;
698 struct km_event c;
684 int err; 699 int err;
685 int excl; 700 int excl;
686 701
@@ -692,6 +707,10 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
692 if (!xp) 707 if (!xp)
693 return err; 708 return err;
694 709
710 /* shouldnt excl be based on nlh flags??
711 * Aha! this is anti-netlink really i.e more pfkey derived
712 * in netlink excl is a flag and you wouldnt need
713 * a type XFRM_MSG_UPDPOLICY - JHS */
695 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; 714 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
696 err = xfrm_policy_insert(p->dir, xp, excl); 715 err = xfrm_policy_insert(p->dir, xp, excl);
697 if (err) { 716 if (err) {
@@ -699,6 +718,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
699 return err; 718 return err;
700 } 719 }
701 720
721 c.event = nlh->nlmsg_type;
722 c.seq = nlh->nlmsg_seq;
723 c.pid = nlh->nlmsg_pid;
724 km_policy_notify(xp, p->dir, &c);
725
702 xfrm_pol_put(xp); 726 xfrm_pol_put(xp);
703 727
704 return 0; 728 return 0;
@@ -816,6 +840,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
816 struct xfrm_policy *xp; 840 struct xfrm_policy *xp;
817 struct xfrm_userpolicy_id *p; 841 struct xfrm_userpolicy_id *p;
818 int err; 842 int err;
843 struct km_event c;
819 int delete; 844 int delete;
820 845
821 p = NLMSG_DATA(nlh); 846 p = NLMSG_DATA(nlh);
@@ -843,6 +868,12 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
843 NETLINK_CB(skb).pid, 868 NETLINK_CB(skb).pid,
844 MSG_DONTWAIT); 869 MSG_DONTWAIT);
845 } 870 }
871 } else {
872 c.data.byid = p->index;
873 c.event = nlh->nlmsg_type;
874 c.seq = nlh->nlmsg_seq;
875 c.pid = nlh->nlmsg_pid;
876 km_policy_notify(xp, p->dir, &c);
846 } 877 }
847 878
848 xfrm_pol_put(xp); 879 xfrm_pol_put(xp);
@@ -852,15 +883,28 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
852 883
853static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 884static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
854{ 885{
886 struct km_event c;
855 struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); 887 struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
856 888
857 xfrm_state_flush(p->proto); 889 xfrm_state_flush(p->proto);
890 c.data.proto = p->proto;
891 c.event = nlh->nlmsg_type;
892 c.seq = nlh->nlmsg_seq;
893 c.pid = nlh->nlmsg_pid;
894 km_state_notify(NULL, &c);
895
858 return 0; 896 return 0;
859} 897}
860 898
861static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 899static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
862{ 900{
901 struct km_event c;
902
863 xfrm_policy_flush(); 903 xfrm_policy_flush();
904 c.event = nlh->nlmsg_type;
905 c.seq = nlh->nlmsg_seq;
906 c.pid = nlh->nlmsg_pid;
907 km_policy_notify(NULL, 0, &c);
864 return 0; 908 return 0;
865} 909}
866 910
@@ -1069,15 +1113,16 @@ nlmsg_failure:
1069 return -1; 1113 return -1;
1070} 1114}
1071 1115
1072static int xfrm_send_state_notify(struct xfrm_state *x, int hard) 1116static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
1073{ 1117{
1074 struct sk_buff *skb; 1118 struct sk_buff *skb;
1119 int len = NLMSG_LENGTH(sizeof(struct xfrm_user_expire));
1075 1120
1076 skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC); 1121 skb = alloc_skb(len, GFP_ATOMIC);
1077 if (skb == NULL) 1122 if (skb == NULL)
1078 return -ENOMEM; 1123 return -ENOMEM;
1079 1124
1080 if (build_expire(skb, x, hard) < 0) 1125 if (build_expire(skb, x, c->data.hard) < 0)
1081 BUG(); 1126 BUG();
1082 1127
1083 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; 1128 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
@@ -1085,6 +1130,131 @@ static int xfrm_send_state_notify(struct xfrm_state *x, int hard)
1085 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); 1130 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
1086} 1131}
1087 1132
1133static int xfrm_notify_sa_flush(struct km_event *c)
1134{
1135 struct xfrm_usersa_flush *p;
1136 struct nlmsghdr *nlh;
1137 struct sk_buff *skb;
1138 unsigned char *b;
1139 int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
1140
1141 skb = alloc_skb(len, GFP_ATOMIC);
1142 if (skb == NULL)
1143 return -ENOMEM;
1144 b = skb->tail;
1145
1146 nlh = NLMSG_PUT(skb, c->pid, c->seq,
1147 XFRM_MSG_FLUSHSA, sizeof(*p));
1148 nlh->nlmsg_flags = 0;
1149
1150 p = NLMSG_DATA(nlh);
1151 p->proto = c->data.proto;
1152
1153 nlh->nlmsg_len = skb->tail - b;
1154
1155 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
1156
1157nlmsg_failure:
1158 kfree_skb(skb);
1159 return -1;
1160}
1161
1162static int inline xfrm_sa_len(struct xfrm_state *x)
1163{
1164 int l = 0;
1165 if (x->aalg)
1166 l += RTA_SPACE(sizeof(*x->aalg) + (x->aalg->alg_key_len+7)/8);
1167 if (x->ealg)
1168 l += RTA_SPACE(sizeof(*x->ealg) + (x->ealg->alg_key_len+7)/8);
1169 if (x->calg)
1170 l += RTA_SPACE(sizeof(*x->calg));
1171 if (x->encap)
1172 l += RTA_SPACE(sizeof(*x->encap));
1173
1174 return l;
1175}
1176
1177static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
1178{
1179 struct xfrm_usersa_info *p;
1180 struct xfrm_usersa_id *id;
1181 struct nlmsghdr *nlh;
1182 struct sk_buff *skb;
1183 unsigned char *b;
1184 int len = xfrm_sa_len(x);
1185 int headlen;
1186
1187 headlen = sizeof(*p);
1188 if (c->event == XFRM_MSG_DELSA) {
1189 len += RTA_SPACE(headlen);
1190 headlen = sizeof(*id);
1191 }
1192 len += NLMSG_SPACE(headlen);
1193
1194 skb = alloc_skb(len, GFP_ATOMIC);
1195 if (skb == NULL)
1196 return -ENOMEM;
1197 b = skb->tail;
1198
1199 nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen);
1200 nlh->nlmsg_flags = 0;
1201
1202 p = NLMSG_DATA(nlh);
1203 if (c->event == XFRM_MSG_DELSA) {
1204 id = NLMSG_DATA(nlh);
1205 memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr));
1206 id->spi = x->id.spi;
1207 id->family = x->props.family;
1208 id->proto = x->id.proto;
1209
1210 p = RTA_DATA(__RTA_PUT(skb, XFRMA_SA, sizeof(*p)));
1211 }
1212
1213 copy_to_user_state(x, p);
1214
1215 if (x->aalg)
1216 RTA_PUT(skb, XFRMA_ALG_AUTH,
1217 sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
1218 if (x->ealg)
1219 RTA_PUT(skb, XFRMA_ALG_CRYPT,
1220 sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
1221 if (x->calg)
1222 RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
1223
1224 if (x->encap)
1225 RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
1226
1227 nlh->nlmsg_len = skb->tail - b;
1228
1229 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
1230
1231nlmsg_failure:
1232rtattr_failure:
1233 kfree_skb(skb);
1234 return -1;
1235}
1236
1237static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c)
1238{
1239
1240 switch (c->event) {
1241 case XFRM_MSG_EXPIRE:
1242 return xfrm_exp_state_notify(x, c);
1243 case XFRM_MSG_DELSA:
1244 case XFRM_MSG_UPDSA:
1245 case XFRM_MSG_NEWSA:
1246 return xfrm_notify_sa(x, c);
1247 case XFRM_MSG_FLUSHSA:
1248 return xfrm_notify_sa_flush(c);
1249 default:
1250 printk("xfrm_user: Unknown SA event %d\n", c->event);
1251 break;
1252 }
1253
1254 return 0;
1255
1256}
1257
1088static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, 1258static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
1089 struct xfrm_tmpl *xt, struct xfrm_policy *xp, 1259 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
1090 int dir) 1260 int dir)
@@ -1218,7 +1388,7 @@ nlmsg_failure:
1218 return -1; 1388 return -1;
1219} 1389}
1220 1390
1221static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard) 1391static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1222{ 1392{
1223 struct sk_buff *skb; 1393 struct sk_buff *skb;
1224 size_t len; 1394 size_t len;
@@ -1229,7 +1399,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
1229 if (skb == NULL) 1399 if (skb == NULL)
1230 return -ENOMEM; 1400 return -ENOMEM;
1231 1401
1232 if (build_polexpire(skb, xp, dir, hard) < 0) 1402 if (build_polexpire(skb, xp, dir, c->data.hard) < 0)
1233 BUG(); 1403 BUG();
1234 1404
1235 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; 1405 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
@@ -1237,6 +1407,103 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
1237 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); 1407 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
1238} 1408}
1239 1409
1410static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
1411{
1412 struct xfrm_userpolicy_info *p;
1413 struct xfrm_userpolicy_id *id;
1414 struct nlmsghdr *nlh;
1415 struct sk_buff *skb;
1416 unsigned char *b;
1417 int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
1418 int headlen;
1419
1420 headlen = sizeof(*p);
1421 if (c->event == XFRM_MSG_DELPOLICY) {
1422 len += RTA_SPACE(headlen);
1423 headlen = sizeof(*id);
1424 }
1425 len += NLMSG_SPACE(headlen);
1426
1427 skb = alloc_skb(len, GFP_ATOMIC);
1428 if (skb == NULL)
1429 return -ENOMEM;
1430 b = skb->tail;
1431
1432 nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen);
1433
1434 p = NLMSG_DATA(nlh);
1435 if (c->event == XFRM_MSG_DELPOLICY) {
1436 id = NLMSG_DATA(nlh);
1437 memset(id, 0, sizeof(*id));
1438 id->dir = dir;
1439 if (c->data.byid)
1440 id->index = xp->index;
1441 else
1442 memcpy(&id->sel, &xp->selector, sizeof(id->sel));
1443
1444 p = RTA_DATA(__RTA_PUT(skb, XFRMA_POLICY, sizeof(*p)));
1445 }
1446
1447 nlh->nlmsg_flags = 0;
1448
1449 copy_to_user_policy(xp, p, dir);
1450 if (copy_to_user_tmpl(xp, skb) < 0)
1451 goto nlmsg_failure;
1452
1453 nlh->nlmsg_len = skb->tail - b;
1454
1455 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
1456
1457nlmsg_failure:
1458rtattr_failure:
1459 kfree_skb(skb);
1460 return -1;
1461}
1462
1463static int xfrm_notify_policy_flush(struct km_event *c)
1464{
1465 struct nlmsghdr *nlh;
1466 struct sk_buff *skb;
1467 unsigned char *b;
1468 int len = NLMSG_LENGTH(0);
1469
1470 skb = alloc_skb(len, GFP_ATOMIC);
1471 if (skb == NULL)
1472 return -ENOMEM;
1473 b = skb->tail;
1474
1475
1476 nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0);
1477
1478 nlh->nlmsg_len = skb->tail - b;
1479
1480 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
1481
1482nlmsg_failure:
1483 kfree_skb(skb);
1484 return -1;
1485}
1486
1487static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1488{
1489
1490 switch (c->event) {
1491 case XFRM_MSG_NEWPOLICY:
1492 case XFRM_MSG_UPDPOLICY:
1493 case XFRM_MSG_DELPOLICY:
1494 return xfrm_notify_policy(xp, dir, c);
1495 case XFRM_MSG_FLUSHPOLICY:
1496 return xfrm_notify_policy_flush(c);
1497 case XFRM_MSG_POLEXPIRE:
1498 return xfrm_exp_policy_notify(xp, dir, c);
1499 default:
1500 printk("xfrm_user: Unknown Policy event %d\n", c->event);
1501 }
1502
1503 return 0;
1504
1505}
1506
1240static struct xfrm_mgr netlink_mgr = { 1507static struct xfrm_mgr netlink_mgr = {
1241 .id = "netlink", 1508 .id = "netlink",
1242 .notify = xfrm_send_state_notify, 1509 .notify = xfrm_send_state_notify,